expops 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- expops-0.1.3.dist-info/METADATA +826 -0
- expops-0.1.3.dist-info/RECORD +86 -0
- expops-0.1.3.dist-info/WHEEL +5 -0
- expops-0.1.3.dist-info/entry_points.txt +3 -0
- expops-0.1.3.dist-info/licenses/LICENSE +674 -0
- expops-0.1.3.dist-info/top_level.txt +1 -0
- mlops/__init__.py +0 -0
- mlops/__main__.py +11 -0
- mlops/_version.py +34 -0
- mlops/adapters/__init__.py +12 -0
- mlops/adapters/base.py +86 -0
- mlops/adapters/config_schema.py +89 -0
- mlops/adapters/custom/__init__.py +3 -0
- mlops/adapters/custom/custom_adapter.py +447 -0
- mlops/adapters/plugin_manager.py +113 -0
- mlops/adapters/sklearn/__init__.py +3 -0
- mlops/adapters/sklearn/adapter.py +94 -0
- mlops/cluster/__init__.py +3 -0
- mlops/cluster/controller.py +496 -0
- mlops/cluster/process_runner.py +91 -0
- mlops/cluster/providers.py +258 -0
- mlops/core/__init__.py +95 -0
- mlops/core/custom_model_base.py +38 -0
- mlops/core/dask_networkx_executor.py +1265 -0
- mlops/core/executor_worker.py +1239 -0
- mlops/core/experiment_tracker.py +81 -0
- mlops/core/graph_types.py +64 -0
- mlops/core/networkx_parser.py +135 -0
- mlops/core/payload_spill.py +278 -0
- mlops/core/pipeline_utils.py +162 -0
- mlops/core/process_hashing.py +216 -0
- mlops/core/step_state_manager.py +1298 -0
- mlops/core/step_system.py +956 -0
- mlops/core/workspace.py +99 -0
- mlops/environment/__init__.py +10 -0
- mlops/environment/base.py +43 -0
- mlops/environment/conda_manager.py +307 -0
- mlops/environment/factory.py +70 -0
- mlops/environment/pyenv_manager.py +146 -0
- mlops/environment/setup_env.py +31 -0
- mlops/environment/system_manager.py +66 -0
- mlops/environment/utils.py +105 -0
- mlops/environment/venv_manager.py +134 -0
- mlops/main.py +527 -0
- mlops/managers/project_manager.py +400 -0
- mlops/managers/reproducibility_manager.py +575 -0
- mlops/platform.py +996 -0
- mlops/reporting/__init__.py +16 -0
- mlops/reporting/context.py +187 -0
- mlops/reporting/entrypoint.py +292 -0
- mlops/reporting/kv_utils.py +77 -0
- mlops/reporting/registry.py +50 -0
- mlops/runtime/__init__.py +9 -0
- mlops/runtime/context.py +34 -0
- mlops/runtime/env_export.py +113 -0
- mlops/storage/__init__.py +12 -0
- mlops/storage/adapters/__init__.py +9 -0
- mlops/storage/adapters/gcp_kv_store.py +778 -0
- mlops/storage/adapters/gcs_object_store.py +96 -0
- mlops/storage/adapters/memory_store.py +240 -0
- mlops/storage/adapters/redis_store.py +438 -0
- mlops/storage/factory.py +199 -0
- mlops/storage/interfaces/__init__.py +6 -0
- mlops/storage/interfaces/kv_store.py +118 -0
- mlops/storage/path_utils.py +38 -0
- mlops/templates/premier-league/charts/plot_metrics.js +70 -0
- mlops/templates/premier-league/charts/plot_metrics.py +145 -0
- mlops/templates/premier-league/charts/requirements.txt +6 -0
- mlops/templates/premier-league/configs/cluster_config.yaml +13 -0
- mlops/templates/premier-league/configs/project_config.yaml +207 -0
- mlops/templates/premier-league/data/England CSV.csv +12154 -0
- mlops/templates/premier-league/models/premier_league_model.py +638 -0
- mlops/templates/premier-league/requirements.txt +8 -0
- mlops/templates/sklearn-basic/README.md +22 -0
- mlops/templates/sklearn-basic/charts/plot_metrics.py +85 -0
- mlops/templates/sklearn-basic/charts/requirements.txt +3 -0
- mlops/templates/sklearn-basic/configs/project_config.yaml +64 -0
- mlops/templates/sklearn-basic/data/train.csv +14 -0
- mlops/templates/sklearn-basic/models/model.py +62 -0
- mlops/templates/sklearn-basic/requirements.txt +10 -0
- mlops/web/__init__.py +3 -0
- mlops/web/server.py +585 -0
- mlops/web/ui/index.html +52 -0
- mlops/web/ui/mlops-charts.js +357 -0
- mlops/web/ui/script.js +1244 -0
- mlops/web/ui/styles.css +248 -0
|
@@ -0,0 +1,575 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from typing import Dict, Any, Optional, List
|
|
3
|
+
import hashlib
|
|
4
|
+
import json
|
|
5
|
+
import yaml
|
|
6
|
+
import subprocess
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
import numpy as np
|
|
9
|
+
import os
|
|
10
|
+
import random
|
|
11
|
+
|
|
12
|
+
from ..core.experiment_tracker import ExperimentTracker, NoOpExperimentTracker
|
|
13
|
+
from ..environment.factory import create_environment_manager
|
|
14
|
+
from ..environment.base import EnvironmentManager
|
|
15
|
+
from mlops.environment.venv_manager import VenvEnvironmentManager
|
|
16
|
+
|
|
17
|
+
class ReproducibilityManager:
|
|
18
|
+
"""Lightweight reproducibility manager for custom models."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, config_path: str, tracker_instance: Optional[ExperimentTracker] = None,
|
|
21
|
+
project_path: Optional[Path] = None):
|
|
22
|
+
self.config_path = Path(config_path)
|
|
23
|
+
self.project_path = project_path
|
|
24
|
+
self.config = self._load_config()
|
|
25
|
+
self.environment_manager: Optional[EnvironmentManager] = None
|
|
26
|
+
self.reporting_environment_manager: Optional[EnvironmentManager] = None
|
|
27
|
+
|
|
28
|
+
if tracker_instance:
|
|
29
|
+
self.tracker = tracker_instance
|
|
30
|
+
else:
|
|
31
|
+
self.tracker = self._setup_tracker()
|
|
32
|
+
|
|
33
|
+
print(f"[ReproducibilityManager] Initialized with tracker: {type(self.tracker).__name__}")
|
|
34
|
+
if self.project_path:
|
|
35
|
+
print(f"[ReproducibilityManager] Using project path: {self.project_path}")
|
|
36
|
+
|
|
37
|
+
@property
|
|
38
|
+
def environment_name(self) -> Optional[str]:
|
|
39
|
+
"""Get the name of the current environment."""
|
|
40
|
+
return self.environment_manager.get_environment_name() if self.environment_manager else None
|
|
41
|
+
|
|
42
|
+
@property
|
|
43
|
+
def python_interpreter(self) -> Optional[str]:
|
|
44
|
+
"""Get the Python interpreter path for the current environment."""
|
|
45
|
+
return self.environment_manager.get_python_interpreter() if self.environment_manager else None
|
|
46
|
+
|
|
47
|
+
def _setup_tracker(self) -> ExperimentTracker:
|
|
48
|
+
"""Set up the experiment tracker based on configuration."""
|
|
49
|
+
tracker_config = self.config.get("reproducibility", {}).get("experiment_tracking", {})
|
|
50
|
+
tracker_name = str(tracker_config.get("backend", "noop") or "noop").strip().lower()
|
|
51
|
+
tracker_params = tracker_config.get("parameters", {})
|
|
52
|
+
if not isinstance(tracker_params, dict):
|
|
53
|
+
tracker_params = {}
|
|
54
|
+
|
|
55
|
+
# External experiment tracking backends are optional; default to NoOp.
|
|
56
|
+
if tracker_name not in {"noop"}:
|
|
57
|
+
print(f"Warning: Tracker '{tracker_name}' not available. Falling back to NoOpExperimentTracker.")
|
|
58
|
+
|
|
59
|
+
return NoOpExperimentTracker(config=tracker_params)
|
|
60
|
+
|
|
61
|
+
def ensure_reproducibility_setup(self) -> None:
|
|
62
|
+
"""Set up reproducibility across common ML libraries (minimal).
|
|
63
|
+
|
|
64
|
+
- Seed Python's random and NumPy RNGs
|
|
65
|
+
- Best-effort seed for PyTorch and TensorFlow if installed
|
|
66
|
+
- Apply environment flags that encourage deterministic behavior
|
|
67
|
+
"""
|
|
68
|
+
random_seed_config = self.config.get("reproducibility", {}).get("random_seed")
|
|
69
|
+
|
|
70
|
+
if not isinstance(random_seed_config, int):
|
|
71
|
+
random_seed_config = 42
|
|
72
|
+
if self.config.get("reproducibility", {}).get("random_seed") is None:
|
|
73
|
+
print(f"[ReproducibilityManager] random_seed not specified, using default {random_seed_config}.")
|
|
74
|
+
else:
|
|
75
|
+
print(
|
|
76
|
+
f"[ReproducibilityManager] Invalid random_seed "
|
|
77
|
+
f"'{self.config.get('reproducibility', {}).get('random_seed')}', "
|
|
78
|
+
f"using default {random_seed_config}."
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
seed = int(random_seed_config)
|
|
82
|
+
|
|
83
|
+
# Core Python and NumPy
|
|
84
|
+
try:
|
|
85
|
+
random.seed(seed)
|
|
86
|
+
except Exception as e:
|
|
87
|
+
print(f"[ReproducibilityManager] Failed to seed Python random: {e}")
|
|
88
|
+
try:
|
|
89
|
+
np.random.seed(seed)
|
|
90
|
+
print(f"[ReproducibilityManager] Global seeds set (python, numpy): {seed}")
|
|
91
|
+
except Exception as e:
|
|
92
|
+
print(f"[ReproducibilityManager] Failed to seed NumPy: {e}")
|
|
93
|
+
|
|
94
|
+
# Best-effort environment flags for deterministic operations in DL frameworks
|
|
95
|
+
try:
|
|
96
|
+
os.environ.setdefault("TF_DETERMINISTIC_OPS", "1")
|
|
97
|
+
os.environ.setdefault("TF_CUDNN_DETERMINISTIC", "1")
|
|
98
|
+
os.environ.setdefault("CUBLAS_WORKSPACE_CONFIG", ":16:8")
|
|
99
|
+
except Exception:
|
|
100
|
+
pass
|
|
101
|
+
|
|
102
|
+
# Framework-specific seeding
|
|
103
|
+
self._seed_pytorch_if_available(seed)
|
|
104
|
+
self._seed_tensorflow_if_available(seed)
|
|
105
|
+
|
|
106
|
+
# Export base seed and task-level seeding toggle for workers/subprocesses
|
|
107
|
+
try:
|
|
108
|
+
os.environ.setdefault("MLOPS_RANDOM_SEED", str(seed))
|
|
109
|
+
except Exception:
|
|
110
|
+
pass
|
|
111
|
+
try:
|
|
112
|
+
tl_seed_cfg = self.config.get("reproducibility", {}).get("task_level_seeding")
|
|
113
|
+
enabled = True if tl_seed_cfg is None else bool(tl_seed_cfg)
|
|
114
|
+
os.environ.setdefault("MLOPS_TASK_LEVEL_SEEDING", "1" if enabled else "0")
|
|
115
|
+
except Exception:
|
|
116
|
+
pass
|
|
117
|
+
|
|
118
|
+
print(f"[ReproducibilityManager] Reproducibility setup completed with seed {seed}.")
|
|
119
|
+
|
|
120
|
+
def _seed_pytorch_if_available(self, seed: int) -> None:
|
|
121
|
+
try:
|
|
122
|
+
import torch
|
|
123
|
+
try:
|
|
124
|
+
torch.manual_seed(seed)
|
|
125
|
+
except Exception:
|
|
126
|
+
pass
|
|
127
|
+
try:
|
|
128
|
+
if torch.cuda.is_available():
|
|
129
|
+
torch.cuda.manual_seed_all(seed)
|
|
130
|
+
# Ensure deterministic behavior when possible
|
|
131
|
+
try:
|
|
132
|
+
torch.use_deterministic_algorithms(True) # type: ignore[attr-defined]
|
|
133
|
+
except Exception:
|
|
134
|
+
pass
|
|
135
|
+
try:
|
|
136
|
+
import torch.backends.cudnn as cudnn # type: ignore
|
|
137
|
+
cudnn.deterministic = True
|
|
138
|
+
cudnn.benchmark = False
|
|
139
|
+
except Exception:
|
|
140
|
+
pass
|
|
141
|
+
except Exception:
|
|
142
|
+
pass
|
|
143
|
+
print("[ReproducibilityManager] PyTorch seed applied.")
|
|
144
|
+
except Exception:
|
|
145
|
+
# PyTorch not installed or failed to import; ignore silently
|
|
146
|
+
pass
|
|
147
|
+
|
|
148
|
+
def _seed_tensorflow_if_available(self, seed: int) -> None:
|
|
149
|
+
try:
|
|
150
|
+
import tensorflow as tf
|
|
151
|
+
try:
|
|
152
|
+
tf.random.set_seed(seed)
|
|
153
|
+
print("[ReproducibilityManager] TensorFlow seed applied.")
|
|
154
|
+
except Exception:
|
|
155
|
+
pass
|
|
156
|
+
except Exception:
|
|
157
|
+
pass
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _load_config(self) -> Dict[str, Any]:
|
|
162
|
+
"""Load and parse the configuration file."""
|
|
163
|
+
with open(self.config_path) as f:
|
|
164
|
+
return yaml.safe_load(f)
|
|
165
|
+
|
|
166
|
+
def setup_environment(self) -> None:
|
|
167
|
+
"""Set up the environment based on configuration."""
|
|
168
|
+
print("[ReproducibilityManager] Starting environment setup...")
|
|
169
|
+
|
|
170
|
+
self.environment_manager = create_environment_manager(self.config)
|
|
171
|
+
|
|
172
|
+
try:
|
|
173
|
+
self.environment_manager.setup_environment()
|
|
174
|
+
except RuntimeError as e:
|
|
175
|
+
print(f"[ReproducibilityManager] Environment setup failed: {e}")
|
|
176
|
+
raise
|
|
177
|
+
|
|
178
|
+
print(f"[ReproducibilityManager] Environment '{self.environment_manager.get_environment_name()}' setup completed.")
|
|
179
|
+
print(f"[ReproducibilityManager] Environment type: {self.environment_manager.get_environment_type()}")
|
|
180
|
+
print(f"[ReproducibilityManager] Python interpreter: {self.environment_manager.get_python_interpreter()}")
|
|
181
|
+
|
|
182
|
+
try:
|
|
183
|
+
env_cfg = self.config.get("environment", {}) or {}
|
|
184
|
+
reporting_cfg = None
|
|
185
|
+
venv_cfg = env_cfg.get("venv") if isinstance(env_cfg, dict) else None
|
|
186
|
+
if isinstance(venv_cfg, dict) and isinstance(venv_cfg.get("reporting"), dict):
|
|
187
|
+
reporting_cfg = dict(venv_cfg.get("reporting") or {})
|
|
188
|
+
# Default name if not provided -> derive from training venv name
|
|
189
|
+
if not reporting_cfg.get("name"):
|
|
190
|
+
train_name = venv_cfg.get("name")
|
|
191
|
+
if train_name:
|
|
192
|
+
reporting_cfg["name"] = f"{train_name}-reporting"
|
|
193
|
+
else:
|
|
194
|
+
proj_name = self.project_path.name if self.project_path else "reporting"
|
|
195
|
+
reporting_cfg["name"] = f"{proj_name}-reporting"
|
|
196
|
+
self.reporting_environment_manager = VenvEnvironmentManager(reporting_cfg)
|
|
197
|
+
self.reporting_environment_manager.setup_environment()
|
|
198
|
+
print(f"[ReproducibilityManager] Reporting environment '{self.reporting_environment_manager.get_environment_name()}' setup completed.")
|
|
199
|
+
print(f"[ReproducibilityManager] Reporting Python interpreter: {self.reporting_environment_manager.get_python_interpreter()}")
|
|
200
|
+
try:
|
|
201
|
+
rep_py = self.reporting_environment_manager.get_python_interpreter()
|
|
202
|
+
import_check = subprocess.run(
|
|
203
|
+
[rep_py, "-c", "import mlops"],
|
|
204
|
+
capture_output=True,
|
|
205
|
+
text=True,
|
|
206
|
+
check=False,
|
|
207
|
+
)
|
|
208
|
+
if import_check.returncode != 0:
|
|
209
|
+
# Prefer installing from local source checkout when available (dev workflow),
|
|
210
|
+
# otherwise install from index. Fall back to unpinned install when the
|
|
211
|
+
# current version isn't published (e.g., local .dev versions).
|
|
212
|
+
try:
|
|
213
|
+
from mlops.core.workspace import infer_source_root
|
|
214
|
+
src_root = infer_source_root()
|
|
215
|
+
except Exception:
|
|
216
|
+
src_root = None
|
|
217
|
+
if src_root and ((src_root / "pyproject.toml").exists() or (src_root / "setup.py").exists()):
|
|
218
|
+
subprocess.run(
|
|
219
|
+
[rep_py, "-m", "pip", "install", "--no-deps", "-e", str(src_root)],
|
|
220
|
+
check=False,
|
|
221
|
+
)
|
|
222
|
+
else:
|
|
223
|
+
try:
|
|
224
|
+
from importlib.metadata import version # type: ignore
|
|
225
|
+
|
|
226
|
+
v = version("expops")
|
|
227
|
+
res = subprocess.run(
|
|
228
|
+
[rep_py, "-m", "pip", "install", "--no-deps", f"expops=={v}"],
|
|
229
|
+
check=False,
|
|
230
|
+
)
|
|
231
|
+
if getattr(res, "returncode", 1) != 0:
|
|
232
|
+
subprocess.run(
|
|
233
|
+
[rep_py, "-m", "pip", "install", "--no-deps", "expops"],
|
|
234
|
+
check=False,
|
|
235
|
+
)
|
|
236
|
+
except Exception:
|
|
237
|
+
subprocess.run(
|
|
238
|
+
[rep_py, "-m", "pip", "install", "--no-deps", "expops"],
|
|
239
|
+
check=False,
|
|
240
|
+
)
|
|
241
|
+
except Exception as _e:
|
|
242
|
+
print(f"[ReproducibilityManager] Warning: failed to ensure platform is importable in reporting env: {_e}")
|
|
243
|
+
except Exception as e:
|
|
244
|
+
print(f"[ReproducibilityManager] Reporting environment setup skipped or failed: {e}")
|
|
245
|
+
|
|
246
|
+
def apply_cloud_env_from_config(self, model_section: Dict[str, Any]) -> None:
|
|
247
|
+
"""Apply cloud-related environment variables from the YAML config.
|
|
248
|
+
|
|
249
|
+
Sets GOOGLE_APPLICATION_CREDENTIALS, GOOGLE_CLOUD_PROJECT, and FIRESTORE_EMULATOR_HOST
|
|
250
|
+
if present under model.parameters.cache.backend. Only applies when configured.
|
|
251
|
+
"""
|
|
252
|
+
try:
|
|
253
|
+
base_dir = self.project_path or Path.cwd()
|
|
254
|
+
params = (model_section or {}).get("parameters", {}) or {}
|
|
255
|
+
cache_cfg = params.get("cache", {}) or {}
|
|
256
|
+
backend_cfg = cache_cfg.get("backend", {}) or {}
|
|
257
|
+
|
|
258
|
+
creds_rel = backend_cfg.get("credentials_json")
|
|
259
|
+
if creds_rel:
|
|
260
|
+
candidates = [
|
|
261
|
+
(base_dir / creds_rel).resolve(),
|
|
262
|
+
(Path.cwd() / creds_rel).resolve(),
|
|
263
|
+
]
|
|
264
|
+
chosen = next((p for p in candidates if p.exists()), None)
|
|
265
|
+
if chosen is not None:
|
|
266
|
+
current = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS")
|
|
267
|
+
if not current or not Path(current).expanduser().exists():
|
|
268
|
+
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = str(chosen)
|
|
269
|
+
else:
|
|
270
|
+
print(
|
|
271
|
+
f"[ReproducibilityManager] GCP credentials not found at: "
|
|
272
|
+
+ ", ".join(str(p) for p in candidates)
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
gcp_project = backend_cfg.get("gcp_project")
|
|
276
|
+
if gcp_project:
|
|
277
|
+
os.environ.setdefault("GOOGLE_CLOUD_PROJECT", str(gcp_project))
|
|
278
|
+
|
|
279
|
+
emulator_host = backend_cfg.get("emulator_host")
|
|
280
|
+
if emulator_host:
|
|
281
|
+
os.environ.setdefault("FIRESTORE_EMULATOR_HOST", str(emulator_host))
|
|
282
|
+
except Exception as e:
|
|
283
|
+
print(f"[ReproducibilityManager] Failed to apply cloud env: {e}")
|
|
284
|
+
|
|
285
|
+
def _pip_install(self, python_exec: str, packages: List[str]) -> None:
|
|
286
|
+
if not packages or not python_exec:
|
|
287
|
+
return
|
|
288
|
+
try:
|
|
289
|
+
# Pre-upgrade pip tooling
|
|
290
|
+
try:
|
|
291
|
+
subprocess.run([python_exec, "-m", "pip", "install", "--upgrade", "pip", "setuptools", "wheel"], check=False)
|
|
292
|
+
except Exception:
|
|
293
|
+
pass
|
|
294
|
+
timeout_env = os.getenv("MLOPS_PIP_TIMEOUT", "")
|
|
295
|
+
try:
|
|
296
|
+
timeout_s = int(timeout_env) if timeout_env.strip().isdigit() else 1200
|
|
297
|
+
except Exception:
|
|
298
|
+
timeout_s = 1200
|
|
299
|
+
try:
|
|
300
|
+
subprocess.run([python_exec, "-m", "pip", "install", *packages], check=True, timeout=timeout_s)
|
|
301
|
+
except subprocess.TimeoutExpired:
|
|
302
|
+
print(f"[ReproducibilityManager] pip install timed out for: {', '.join(packages)}. Retrying without timeout...")
|
|
303
|
+
subprocess.run([python_exec, "-m", "pip", "install", *packages], check=True)
|
|
304
|
+
except Exception as e:
|
|
305
|
+
print(f"[ReproducibilityManager] pip install failed for {packages} into {python_exec}: {e}")
|
|
306
|
+
|
|
307
|
+
def ensure_cloud_dependencies(self, model_section: Dict[str, Any]) -> None:
|
|
308
|
+
"""Ensure cloud libs are present based on configured backend/object store.
|
|
309
|
+
|
|
310
|
+
- google-cloud-storage when object_store.type == 'gcs'
|
|
311
|
+
- google-cloud-firestore and google-cloud-pubsub when backend.type == 'gcp'
|
|
312
|
+
- redis when backend.type == 'redis'
|
|
313
|
+
Installs into training/runtime interpreter and reporting interpreter if available.
|
|
314
|
+
"""
|
|
315
|
+
params = (model_section or {}).get("parameters", {}) or {}
|
|
316
|
+
cache_cfg = params.get("cache", {}) or {}
|
|
317
|
+
backend_cfg = cache_cfg.get("backend", {}) or {}
|
|
318
|
+
store_cfg = cache_cfg.get("object_store", {}) or {}
|
|
319
|
+
|
|
320
|
+
backend_type = backend_cfg.get("type")
|
|
321
|
+
need_storage = (store_cfg.get("type") == "gcs")
|
|
322
|
+
need_firestore = (backend_type == "gcp")
|
|
323
|
+
need_redis = (backend_type == "redis")
|
|
324
|
+
|
|
325
|
+
to_install: List[str] = []
|
|
326
|
+
if need_storage:
|
|
327
|
+
try:
|
|
328
|
+
__import__("google.cloud.storage")
|
|
329
|
+
except Exception:
|
|
330
|
+
to_install.append("google-cloud-storage>=2.10.0")
|
|
331
|
+
if need_firestore:
|
|
332
|
+
try:
|
|
333
|
+
__import__("google.cloud.firestore")
|
|
334
|
+
except Exception:
|
|
335
|
+
to_install.append("google-cloud-firestore>=2.11.0")
|
|
336
|
+
try:
|
|
337
|
+
__import__("google.cloud.pubsub")
|
|
338
|
+
except Exception:
|
|
339
|
+
to_install.append("google-cloud-pubsub>=2.13.0")
|
|
340
|
+
if need_redis:
|
|
341
|
+
try:
|
|
342
|
+
__import__("redis")
|
|
343
|
+
except Exception:
|
|
344
|
+
to_install.append("redis>=5.0.0")
|
|
345
|
+
|
|
346
|
+
# Deduplicate
|
|
347
|
+
seen = set()
|
|
348
|
+
final = [p for p in to_install if not (p in seen or seen.add(p))]
|
|
349
|
+
if final and self.python_interpreter:
|
|
350
|
+
self._pip_install(self.python_interpreter, final)
|
|
351
|
+
|
|
352
|
+
# Reporting env
|
|
353
|
+
reporting_packages: List[str] = []
|
|
354
|
+
if need_firestore:
|
|
355
|
+
reporting_packages += ["google-cloud-firestore>=2.11.0", "google-cloud-pubsub>=2.13.0"]
|
|
356
|
+
if need_storage:
|
|
357
|
+
reporting_packages += ["google-cloud-storage>=2.10.0"]
|
|
358
|
+
seen_r = set()
|
|
359
|
+
reporting_final = [p for p in reporting_packages if not (p in seen_r or seen_r.add(p))]
|
|
360
|
+
if reporting_final and self.reporting_python_interpreter:
|
|
361
|
+
self._pip_install(self.reporting_python_interpreter, reporting_final)
|
|
362
|
+
|
|
363
|
+
def verify_environment(self) -> bool:
|
|
364
|
+
"""Verify that the environment is properly configured."""
|
|
365
|
+
if not self.environment_manager:
|
|
366
|
+
print("[ReproducibilityManager] Environment manager not initialized.")
|
|
367
|
+
return False
|
|
368
|
+
|
|
369
|
+
return self.environment_manager.verify_environment()
|
|
370
|
+
|
|
371
|
+
@property
|
|
372
|
+
def reporting_python_interpreter(self) -> Optional[str]:
|
|
373
|
+
"""Get Python interpreter for the reporting environment, if configured."""
|
|
374
|
+
return self.reporting_environment_manager.get_python_interpreter() if self.reporting_environment_manager else None
|
|
375
|
+
|
|
376
|
+
def compute_data_hash(self, data_path: str) -> str:
|
|
377
|
+
"""Compute a hash of the input data (file or directory)."""
|
|
378
|
+
path = Path(data_path)
|
|
379
|
+
if not path.exists():
|
|
380
|
+
raise FileNotFoundError(f"Data path {data_path} not found for hashing.")
|
|
381
|
+
|
|
382
|
+
if path.is_dir():
|
|
383
|
+
hasher = hashlib.sha256()
|
|
384
|
+
for item in sorted(os.listdir(path)):
|
|
385
|
+
item_path = path / item
|
|
386
|
+
hasher.update(item.encode())
|
|
387
|
+
if item_path.is_file():
|
|
388
|
+
with open(item_path, 'rb') as f:
|
|
389
|
+
hasher.update(f.read())
|
|
390
|
+
return hasher.hexdigest()
|
|
391
|
+
else:
|
|
392
|
+
with open(data_path, 'rb') as f:
|
|
393
|
+
return hashlib.sha256(f.read()).hexdigest()
|
|
394
|
+
|
|
395
|
+
def log_run_info(self, run_id: str, custom_params: Optional[Dict[str, Any]] = None) -> None:
|
|
396
|
+
"""Log run parameters and metadata."""
|
|
397
|
+
if not self.tracker or not self.config.get("reproducibility", {}).get("experiment_tracking", {}).get("enabled", False):
|
|
398
|
+
return
|
|
399
|
+
|
|
400
|
+
if custom_params:
|
|
401
|
+
fn = getattr(self.tracker, "log_params", None)
|
|
402
|
+
if callable(fn):
|
|
403
|
+
try:
|
|
404
|
+
fn(custom_params)
|
|
405
|
+
except Exception:
|
|
406
|
+
pass
|
|
407
|
+
|
|
408
|
+
model_params = self.config.get("model", {}).get("parameters", {})
|
|
409
|
+
if model_params:
|
|
410
|
+
fn = getattr(self.tracker, "log_params", None)
|
|
411
|
+
if callable(fn):
|
|
412
|
+
try:
|
|
413
|
+
fn(model_params)
|
|
414
|
+
except Exception:
|
|
415
|
+
pass
|
|
416
|
+
|
|
417
|
+
if self.config.get("reproducibility", {}).get("version_control", {}).get("enabled", False):
|
|
418
|
+
try:
|
|
419
|
+
git_commit = subprocess.check_output(["git", "rev-parse", "HEAD"], text=True).strip()
|
|
420
|
+
fn = getattr(self.tracker, "log_param", None)
|
|
421
|
+
if callable(fn):
|
|
422
|
+
fn("git_commit", git_commit)
|
|
423
|
+
|
|
424
|
+
git_branch = subprocess.check_output(["git", "rev-parse", "--abbrev-ref", "HEAD"], text=True).strip()
|
|
425
|
+
fn = getattr(self.tracker, "log_param", None)
|
|
426
|
+
if callable(fn):
|
|
427
|
+
fn("git_branch", git_branch)
|
|
428
|
+
except Exception as e:
|
|
429
|
+
print(f"Could not log Git information: {e}")
|
|
430
|
+
|
|
431
|
+
try:
|
|
432
|
+
python_version = subprocess.check_output(["python", "--version"], text=True).strip()
|
|
433
|
+
fn = getattr(self.tracker, "log_param", None)
|
|
434
|
+
if callable(fn):
|
|
435
|
+
fn("python_version", python_version)
|
|
436
|
+
except Exception:
|
|
437
|
+
pass
|
|
438
|
+
|
|
439
|
+
set_tag = getattr(self.tracker, "set_tag", None)
|
|
440
|
+
if callable(set_tag):
|
|
441
|
+
try:
|
|
442
|
+
set_tag("run_id", run_id)
|
|
443
|
+
set_tag("pipeline_name", self.config.get("metadata", {}).get("name", "unknown_pipeline"))
|
|
444
|
+
set_tag("creation_timestamp", datetime.now().isoformat())
|
|
445
|
+
except Exception:
|
|
446
|
+
pass
|
|
447
|
+
|
|
448
|
+
def log_results(self, results: Dict[str, Any]) -> None:
|
|
449
|
+
"""Log experiment results/metrics."""
|
|
450
|
+
if not self.tracker or not self.config.get("reproducibility", {}).get("experiment_tracking", {}).get("enabled", False):
|
|
451
|
+
return
|
|
452
|
+
log_metric = getattr(self.tracker, "log_metric", None)
|
|
453
|
+
if not callable(log_metric):
|
|
454
|
+
return
|
|
455
|
+
|
|
456
|
+
for key, value in results.items():
|
|
457
|
+
if isinstance(value, (int, float)):
|
|
458
|
+
try:
|
|
459
|
+
log_metric(key, value)
|
|
460
|
+
except Exception:
|
|
461
|
+
pass
|
|
462
|
+
elif isinstance(value, dict):
|
|
463
|
+
for sub_key, sub_value in value.items():
|
|
464
|
+
if isinstance(sub_value, (int, float)):
|
|
465
|
+
try:
|
|
466
|
+
log_metric(f"{key}_{sub_key}", sub_value)
|
|
467
|
+
except Exception:
|
|
468
|
+
pass
|
|
469
|
+
|
|
470
|
+
def log_artifacts(self, artifacts: Dict[str, str]) -> None:
|
|
471
|
+
"""Log artifacts (files/directories) to the tracker."""
|
|
472
|
+
if not self.tracker or not self.config.get("reproducibility", {}).get("experiment_tracking", {}).get("enabled", False):
|
|
473
|
+
return
|
|
474
|
+
log_artifact = getattr(self.tracker, "log_artifact", None)
|
|
475
|
+
log_artifacts = getattr(self.tracker, "log_artifacts", None)
|
|
476
|
+
|
|
477
|
+
for artifact_name, artifact_path in artifacts.items():
|
|
478
|
+
path = Path(artifact_path)
|
|
479
|
+
if path.is_file():
|
|
480
|
+
if callable(log_artifact):
|
|
481
|
+
try:
|
|
482
|
+
log_artifact(str(path), artifact_name)
|
|
483
|
+
except Exception:
|
|
484
|
+
pass
|
|
485
|
+
elif path.is_dir():
|
|
486
|
+
if callable(log_artifacts):
|
|
487
|
+
try:
|
|
488
|
+
log_artifacts(str(path), artifact_name)
|
|
489
|
+
except Exception:
|
|
490
|
+
pass
|
|
491
|
+
else:
|
|
492
|
+
print(f"Warning: Artifact path {artifact_path} for '{artifact_name}' not found.")
|
|
493
|
+
|
|
494
|
+
def save_artifacts(self, run_id: str, artifacts: Dict[str, Any]) -> Dict[str, str]:
|
|
495
|
+
"""Save artifacts locally and return their paths."""
|
|
496
|
+
saved_paths = {}
|
|
497
|
+
if not artifacts:
|
|
498
|
+
return saved_paths
|
|
499
|
+
base_path = Path(f"artifacts/{run_id}")
|
|
500
|
+
base_path.mkdir(parents=True, exist_ok=True)
|
|
501
|
+
|
|
502
|
+
for artifact_name, artifact_data in artifacts.items():
|
|
503
|
+
try:
|
|
504
|
+
artifact_path = base_path / f"{artifact_name}.json"
|
|
505
|
+
|
|
506
|
+
with open(artifact_path, "w") as f:
|
|
507
|
+
if isinstance(artifact_data, (dict, list)):
|
|
508
|
+
json.dump(artifact_data, f, indent=2, default=str)
|
|
509
|
+
else:
|
|
510
|
+
json.dump({"data": str(artifact_data)}, f, indent=2)
|
|
511
|
+
|
|
512
|
+
saved_paths[artifact_name] = str(artifact_path)
|
|
513
|
+
print(f"Artifact '{artifact_name}' saved to {artifact_path}")
|
|
514
|
+
|
|
515
|
+
except Exception as e:
|
|
516
|
+
print(f"Error saving artifact '{artifact_name}': {e}")
|
|
517
|
+
|
|
518
|
+
return saved_paths
|
|
519
|
+
|
|
520
|
+
def get_tracker(self) -> ExperimentTracker:
|
|
521
|
+
"""Return the current tracker instance."""
|
|
522
|
+
return self.tracker
|
|
523
|
+
|
|
524
|
+
def capture_environment_info(self) -> Dict[str, Any]:
|
|
525
|
+
"""Capture environment information for logging."""
|
|
526
|
+
import platform
|
|
527
|
+
import sys
|
|
528
|
+
|
|
529
|
+
env_info = {
|
|
530
|
+
"python_version": sys.version,
|
|
531
|
+
"platform": platform.platform(),
|
|
532
|
+
"processor": platform.processor(),
|
|
533
|
+
"random_seed": self.config.get("reproducibility", {}).get("random_seed", "not_set")
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
if self.environment_manager:
|
|
537
|
+
env_info["environment_name"] = self.environment_name
|
|
538
|
+
env_info["python_interpreter"] = self.python_interpreter
|
|
539
|
+
|
|
540
|
+
return env_info
|
|
541
|
+
|
|
542
|
+
def save_run_artifacts_locally(self, run_id: str, adapter) -> Dict[str, str]:
|
|
543
|
+
"""Save run artifacts locally and return their paths."""
|
|
544
|
+
if not self.project_path:
|
|
545
|
+
# No-op for non-project runs (avoid creating empty artifacts folders).
|
|
546
|
+
return {}
|
|
547
|
+
|
|
548
|
+
saved_paths: Dict[str, str] = {}
|
|
549
|
+
|
|
550
|
+
# Save model if adapter has one.
|
|
551
|
+
try:
|
|
552
|
+
model_obj = getattr(adapter, "model", None)
|
|
553
|
+
except Exception:
|
|
554
|
+
model_obj = None
|
|
555
|
+
|
|
556
|
+
if model_obj is not None:
|
|
557
|
+
model_dir = self.project_path / "artifacts" / "models"
|
|
558
|
+
model_dir.mkdir(parents=True, exist_ok=True)
|
|
559
|
+
model_path = model_dir / f"{run_id}_model.joblib"
|
|
560
|
+
try:
|
|
561
|
+
import joblib
|
|
562
|
+
joblib.dump(model_obj, model_path)
|
|
563
|
+
saved_paths["model"] = str(model_path)
|
|
564
|
+
print(f"[ReproducibilityManager] Model saved to: {model_path}")
|
|
565
|
+
except Exception as e:
|
|
566
|
+
print(f"[ReproducibilityManager] Failed to save model: {e}")
|
|
567
|
+
|
|
568
|
+
# Log artifacts to tracker
|
|
569
|
+
if saved_paths:
|
|
570
|
+
try:
|
|
571
|
+
self.log_artifacts(saved_paths)
|
|
572
|
+
except Exception:
|
|
573
|
+
pass
|
|
574
|
+
|
|
575
|
+
return saved_paths
|