expops 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. expops-0.1.3.dist-info/METADATA +826 -0
  2. expops-0.1.3.dist-info/RECORD +86 -0
  3. expops-0.1.3.dist-info/WHEEL +5 -0
  4. expops-0.1.3.dist-info/entry_points.txt +3 -0
  5. expops-0.1.3.dist-info/licenses/LICENSE +674 -0
  6. expops-0.1.3.dist-info/top_level.txt +1 -0
  7. mlops/__init__.py +0 -0
  8. mlops/__main__.py +11 -0
  9. mlops/_version.py +34 -0
  10. mlops/adapters/__init__.py +12 -0
  11. mlops/adapters/base.py +86 -0
  12. mlops/adapters/config_schema.py +89 -0
  13. mlops/adapters/custom/__init__.py +3 -0
  14. mlops/adapters/custom/custom_adapter.py +447 -0
  15. mlops/adapters/plugin_manager.py +113 -0
  16. mlops/adapters/sklearn/__init__.py +3 -0
  17. mlops/adapters/sklearn/adapter.py +94 -0
  18. mlops/cluster/__init__.py +3 -0
  19. mlops/cluster/controller.py +496 -0
  20. mlops/cluster/process_runner.py +91 -0
  21. mlops/cluster/providers.py +258 -0
  22. mlops/core/__init__.py +95 -0
  23. mlops/core/custom_model_base.py +38 -0
  24. mlops/core/dask_networkx_executor.py +1265 -0
  25. mlops/core/executor_worker.py +1239 -0
  26. mlops/core/experiment_tracker.py +81 -0
  27. mlops/core/graph_types.py +64 -0
  28. mlops/core/networkx_parser.py +135 -0
  29. mlops/core/payload_spill.py +278 -0
  30. mlops/core/pipeline_utils.py +162 -0
  31. mlops/core/process_hashing.py +216 -0
  32. mlops/core/step_state_manager.py +1298 -0
  33. mlops/core/step_system.py +956 -0
  34. mlops/core/workspace.py +99 -0
  35. mlops/environment/__init__.py +10 -0
  36. mlops/environment/base.py +43 -0
  37. mlops/environment/conda_manager.py +307 -0
  38. mlops/environment/factory.py +70 -0
  39. mlops/environment/pyenv_manager.py +146 -0
  40. mlops/environment/setup_env.py +31 -0
  41. mlops/environment/system_manager.py +66 -0
  42. mlops/environment/utils.py +105 -0
  43. mlops/environment/venv_manager.py +134 -0
  44. mlops/main.py +527 -0
  45. mlops/managers/project_manager.py +400 -0
  46. mlops/managers/reproducibility_manager.py +575 -0
  47. mlops/platform.py +996 -0
  48. mlops/reporting/__init__.py +16 -0
  49. mlops/reporting/context.py +187 -0
  50. mlops/reporting/entrypoint.py +292 -0
  51. mlops/reporting/kv_utils.py +77 -0
  52. mlops/reporting/registry.py +50 -0
  53. mlops/runtime/__init__.py +9 -0
  54. mlops/runtime/context.py +34 -0
  55. mlops/runtime/env_export.py +113 -0
  56. mlops/storage/__init__.py +12 -0
  57. mlops/storage/adapters/__init__.py +9 -0
  58. mlops/storage/adapters/gcp_kv_store.py +778 -0
  59. mlops/storage/adapters/gcs_object_store.py +96 -0
  60. mlops/storage/adapters/memory_store.py +240 -0
  61. mlops/storage/adapters/redis_store.py +438 -0
  62. mlops/storage/factory.py +199 -0
  63. mlops/storage/interfaces/__init__.py +6 -0
  64. mlops/storage/interfaces/kv_store.py +118 -0
  65. mlops/storage/path_utils.py +38 -0
  66. mlops/templates/premier-league/charts/plot_metrics.js +70 -0
  67. mlops/templates/premier-league/charts/plot_metrics.py +145 -0
  68. mlops/templates/premier-league/charts/requirements.txt +6 -0
  69. mlops/templates/premier-league/configs/cluster_config.yaml +13 -0
  70. mlops/templates/premier-league/configs/project_config.yaml +207 -0
  71. mlops/templates/premier-league/data/England CSV.csv +12154 -0
  72. mlops/templates/premier-league/models/premier_league_model.py +638 -0
  73. mlops/templates/premier-league/requirements.txt +8 -0
  74. mlops/templates/sklearn-basic/README.md +22 -0
  75. mlops/templates/sklearn-basic/charts/plot_metrics.py +85 -0
  76. mlops/templates/sklearn-basic/charts/requirements.txt +3 -0
  77. mlops/templates/sklearn-basic/configs/project_config.yaml +64 -0
  78. mlops/templates/sklearn-basic/data/train.csv +14 -0
  79. mlops/templates/sklearn-basic/models/model.py +62 -0
  80. mlops/templates/sklearn-basic/requirements.txt +10 -0
  81. mlops/web/__init__.py +3 -0
  82. mlops/web/server.py +585 -0
  83. mlops/web/ui/index.html +52 -0
  84. mlops/web/ui/mlops-charts.js +357 -0
  85. mlops/web/ui/script.js +1244 -0
  86. mlops/web/ui/styles.css +248 -0
@@ -0,0 +1,447 @@
1
+ from __future__ import annotations
2
+
3
+ import importlib.util
4
+ import json
5
+ import logging
6
+ import os
7
+ import sys
8
+ import time
9
+ from pathlib import Path
10
+ from typing import Any, Dict, Optional
11
+
12
+ from mlops.core import StepStateManager, get_context_factory, get_step_registry, set_current_context
13
+ from mlops.core.custom_model_base import MLOpsCustomModelBase
14
+ from mlops.core.dask_networkx_executor import DaskNetworkXExecutor
15
+ from mlops.core.networkx_parser import parse_networkx_pipeline_from_config
16
+
17
+ from ..base import ModelAdapter
18
+ from ..config_schema import AdapterConfig
19
+
20
+
21
+ def _as_dict(value: Any) -> Dict[str, Any]:
22
+ if isinstance(value, dict):
23
+ return value
24
+ try:
25
+ if hasattr(value, "model_dump"):
26
+ return value.model_dump() # type: ignore[attr-defined]
27
+ except Exception:
28
+ pass
29
+ try:
30
+ return dict(value) # type: ignore[arg-type]
31
+ except Exception:
32
+ return {}
33
+
34
+
35
+ def _to_int(value: Any) -> Optional[int]:
36
+ try:
37
+ if value is None:
38
+ return None
39
+ return int(value)
40
+ except Exception:
41
+ return None
42
+
43
+
44
+ def _to_float(value: Any) -> Optional[float]:
45
+ try:
46
+ if value is None:
47
+ return None
48
+ return float(value)
49
+ except Exception:
50
+ return None
51
+
52
+
53
+ class CustomModelAdapter(ModelAdapter):
54
+ """Adapter for user-provided Python model scripts.
55
+
56
+ Responsibilities:
57
+ - Load the custom script and register `@step` functions
58
+ - Build a StepStateManager (KV + optional object store)
59
+ - Execute the configured NetworkX pipeline via DaskNetworkXExecutor
60
+ """
61
+
62
+ def __init__(
63
+ self,
64
+ config: AdapterConfig,
65
+ python_interpreter: Optional[str] = None,
66
+ environment_name: Optional[str] = None,
67
+ conda_env_name: Optional[str] = None,
68
+ project_path: Optional[Path] = None,
69
+ run_context: Optional[object] = None,
70
+ ) -> None:
71
+ super().__init__(config, python_interpreter=python_interpreter, environment_name=environment_name, conda_env_name=conda_env_name)
72
+ self.project_path = project_path
73
+ self.run_context = run_context
74
+ self.step_registry = get_step_registry()
75
+ self.step_state_manager: StepStateManager | None = None
76
+ self.networkx_executor: DaskNetworkXExecutor | None = None
77
+ self.tracker: Any = None
78
+ self.logger = logging.getLogger(__name__)
79
+
80
+ def set_tracker(self, tracker: Any) -> None:
81
+ self.tracker = tracker
82
+
83
+ def _repo_root(self) -> Path:
84
+ """Best-effort workspace root resolution (where projects/ lives)."""
85
+ try:
86
+ from mlops.core.workspace import get_workspace_root
87
+ return get_workspace_root()
88
+ except Exception:
89
+ return Path.cwd()
90
+
91
+ def _resolve_custom_script_path(self, custom_script_path: str) -> Path:
92
+ p = Path(custom_script_path)
93
+ if p.is_absolute() and p.exists():
94
+ return p
95
+ if p.exists():
96
+ return p
97
+ # Try relative to project path (for configs that use "models/foo.py")
98
+ try:
99
+ if self.project_path and (self.project_path / p).exists():
100
+ return self.project_path / p
101
+ except Exception:
102
+ pass
103
+ # Try relative to repo root (for configs that use "projects/<id>/models/foo.py")
104
+ try:
105
+ repo_root = self._repo_root()
106
+ cand = repo_root / p
107
+ if cand.exists():
108
+ return cand
109
+ except Exception:
110
+ pass
111
+ return p
112
+
113
+ def initialize(self) -> None:
114
+ """Initialize the adapter: import model module, configure caching, and set up the executor."""
115
+ self.logger = logging.getLogger(__name__)
116
+
117
+ custom_script_path = getattr(self.config.parameters, "custom_script_path", None)
118
+ if not custom_script_path:
119
+ raise ValueError("custom_script_path must be specified in configuration")
120
+ custom_target = getattr(self.config.parameters, "custom_target", None)
121
+
122
+ script_path = self._resolve_custom_script_path(str(custom_script_path))
123
+ if not script_path.exists():
124
+ raise FileNotFoundError(f"Custom script not found: {custom_script_path}")
125
+
126
+ spec = importlib.util.spec_from_file_location("custom_model", str(script_path.resolve()))
127
+ if spec is None or spec.loader is None:
128
+ raise ImportError(f"Failed to create import spec for: {script_path}")
129
+ custom_module = importlib.util.module_from_spec(spec)
130
+ sys.modules["custom_model"] = custom_module
131
+ spec.loader.exec_module(custom_module) # type: ignore[attr-defined]
132
+
133
+ if custom_target:
134
+ if not hasattr(custom_module, custom_target):
135
+ raise AttributeError(f"Target '{custom_target}' not found in {custom_script_path}")
136
+ self.model_class = getattr(custom_module, custom_target)
137
+ try:
138
+ if not issubclass(self.model_class, MLOpsCustomModelBase):
139
+ self.logger.info(f"Target '{custom_target}' does not inherit MLOpsCustomModelBase; proceeding anyway.")
140
+ except TypeError:
141
+ pass
142
+
143
+ self.logger.info(f"Imported model module from: {custom_script_path}")
144
+
145
+ # Steps are registered via decorators during import.
146
+ self.step_registry = get_step_registry()
147
+ try:
148
+ step_names = list(getattr(self.step_registry, "_steps", {}).keys())
149
+ if not step_names:
150
+ self.logger.warning("No steps found in registry. Ensure your model uses @step decorators.")
151
+ else:
152
+ self.logger.info(f"Found {len(step_names)} registered step(s)")
153
+ except Exception:
154
+ pass
155
+
156
+ cache_config = getattr(self.config.parameters, "cache", {}) or {}
157
+ cache_config = _as_dict(cache_config)
158
+ cache_ttl_hours = cache_config.get("ttl_hours", 24) if isinstance(cache_config, dict) else 24
159
+
160
+ step_cache_dir = (self.project_path / "cache" / "steps") if self.project_path else Path("step_cache")
161
+
162
+ project_id_for_ns = self.project_path.name if self.project_path else "default"
163
+ backend_cfg = _as_dict(cache_config.get("backend", {}))
164
+
165
+ # Centralized KV/object-store creation (config -> env override -> safe fallback).
166
+ try:
167
+ from mlops.storage.factory import create_kv_store, create_object_store
168
+ except Exception:
169
+ create_kv_store = None # type: ignore[assignment]
170
+ create_object_store = None # type: ignore[assignment]
171
+
172
+ try:
173
+ ws_root = self._repo_root()
174
+ except Exception:
175
+ ws_root = None
176
+
177
+ if create_kv_store:
178
+ kv_store = create_kv_store(
179
+ project_id_for_ns,
180
+ backend_cfg if isinstance(backend_cfg, dict) else {},
181
+ env=os.environ,
182
+ workspace_root=ws_root,
183
+ project_root=self.project_path,
184
+ )
185
+ else:
186
+ from mlops.storage.adapters.memory_store import InMemoryStore
187
+ kv_store = InMemoryStore(project_id_for_ns)
188
+
189
+ obj_store = None
190
+ if create_object_store:
191
+ try:
192
+ obj_store = create_object_store(cache_config if isinstance(cache_config, dict) else {}, env=os.environ)
193
+ except Exception:
194
+ obj_store = None
195
+ obj_prefix = None
196
+
197
+ self.step_state_manager = StepStateManager(
198
+ cache_dir=step_cache_dir,
199
+ kv_store=kv_store,
200
+ logger=self.logger,
201
+ cache_ttl_hours=cache_ttl_hours,
202
+ object_store=obj_store,
203
+ object_prefix=obj_prefix,
204
+ )
205
+
206
+ executor_config = _as_dict(getattr(self.config.parameters, "executor", {}) or {})
207
+ env_workers = os.environ.get("MLOPS_N_WORKERS")
208
+ try:
209
+ n_workers = int(env_workers) if env_workers else int(executor_config.get("n_workers", 2))
210
+ except Exception:
211
+ n_workers = 2
212
+
213
+ dask_tuning_cfg = _as_dict(executor_config.get("dask") or {})
214
+ if isinstance(dask_tuning_cfg, dict) and not dask_tuning_cfg and executor_config.get("dask_config"):
215
+ dask_tuning_cfg = _as_dict(executor_config.get("dask_config"))
216
+
217
+ min_workers_override = _to_int(executor_config.get("min_workers")) if isinstance(executor_config, dict) else None
218
+ wait_for_workers_override = _to_float(executor_config.get("wait_for_workers_sec")) if isinstance(executor_config, dict) else None
219
+ if isinstance(dask_tuning_cfg, dict):
220
+ if min_workers_override is None:
221
+ min_workers_override = _to_int(dask_tuning_cfg.get("min_workers"))
222
+ if wait_for_workers_override is None:
223
+ wait_for_workers_override = _to_float(dask_tuning_cfg.get("wait_for_workers_sec"))
224
+
225
+ dask_overrides: Dict[str, Any] = {}
226
+ compression_setting = None
227
+ if isinstance(dask_tuning_cfg, dict):
228
+ comm_cfg = _as_dict(dask_tuning_cfg.get("comm") or {})
229
+ if isinstance(comm_cfg, dict):
230
+ compression_setting = comm_cfg.get("compression") or comm_cfg.get("codec")
231
+ if compression_setting:
232
+ dask_overrides["distributed.comm.compression"] = str(compression_setting)
233
+ memory_cfg = _as_dict(dask_tuning_cfg.get("memory") or {})
234
+ if isinstance(memory_cfg, dict):
235
+ mem_map = {
236
+ "worker_target_fraction": "distributed.worker.memory.target",
237
+ "worker_spill_fraction": "distributed.worker.memory.spill",
238
+ "worker_pause_fraction": "distributed.worker.memory.pause",
239
+ }
240
+ for src_key, dst_key in mem_map.items():
241
+ val = memory_cfg.get(src_key)
242
+ if val is not None:
243
+ try:
244
+ dask_overrides[dst_key] = float(val)
245
+ except Exception:
246
+ dask_overrides[dst_key] = val
247
+ overrides_block = dask_tuning_cfg.get("overrides")
248
+ if isinstance(overrides_block, dict):
249
+ for key, value in overrides_block.items():
250
+ if isinstance(key, str):
251
+ dask_overrides[key] = value
252
+
253
+ if compression_setting:
254
+ try:
255
+ os.environ.setdefault("DASK_DISTRIBUTED__COMM__COMPRESSION", str(compression_setting))
256
+ except Exception:
257
+ pass
258
+
259
+ scheduler_address = executor_config.get("scheduler_address") or os.environ.get("DASK_SCHEDULER_ADDRESS")
260
+ scheduler_mode = "distributed" if scheduler_address else "threads"
261
+
262
+ extra_files_to_upload: list[str] = []
263
+ try:
264
+ extra_files_to_upload = [str(script_path)]
265
+ except Exception:
266
+ extra_files_to_upload = []
267
+ # Upload reporting entrypoint for worker-side chart imports (best-effort).
268
+ try:
269
+ rep_cfg_text = os.environ.get("MLOPS_REPORTING_CONFIG") or ""
270
+ if rep_cfg_text:
271
+ rep_cfg = json.loads(rep_cfg_text) or {}
272
+ ep = rep_cfg.get("static_entrypoint") or rep_cfg.get("entrypoint")
273
+ if isinstance(ep, str) and ep.strip():
274
+ p = Path(ep)
275
+ if not p.is_absolute():
276
+ p = self._repo_root() / p
277
+ if p.exists():
278
+ extra_files_to_upload.append(str(p))
279
+ except Exception:
280
+ pass
281
+
282
+ self.networkx_executor = DaskNetworkXExecutor(
283
+ step_registry=self.step_registry,
284
+ state_manager=self.step_state_manager,
285
+ logger=self.logger,
286
+ n_workers=n_workers,
287
+ scheduler_mode=scheduler_mode,
288
+ scheduler_address=scheduler_address,
289
+ client=None,
290
+ extra_files_to_upload=extra_files_to_upload,
291
+ min_workers=min_workers_override,
292
+ wait_for_workers_sec=wait_for_workers_override,
293
+ dask_config_overrides=dask_overrides,
294
+ )
295
+ mode_label = "distributed" if scheduler_mode == "distributed" else "threads"
296
+ self.logger.info(f"Initialized Dask NetworkX executor with {n_workers} workers ({mode_label} scheduler)")
297
+
298
+ self.logger.info("Enhanced caching with function hashing: ENABLED")
299
+
300
+ # Make state manager available for manual step-level caching in decorators
301
+ try:
302
+ from mlops.core.step_system import set_state_manager as _set_sm
303
+ _set_sm(self.step_state_manager)
304
+ except Exception:
305
+ pass
306
+
307
+
308
+
309
+ def _execute_step_graph(self, run_id: str, data_paths: Dict[str, Path], tracker: Any = None, **kwargs) -> Dict[str, Any]:
310
+ """Execute the configured pipeline once."""
311
+ if not self.networkx_executor or not self.step_state_manager:
312
+ raise RuntimeError("NetworkX execution not properly initialized")
313
+
314
+ pipeline_config = self.config.parameters.pipeline
315
+ if not pipeline_config:
316
+ raise ValueError("NetworkX executor requires pipeline configuration with 'processes' and 'steps' sections")
317
+
318
+ pipeline_dict = pipeline_config.model_dump() if hasattr(pipeline_config, 'model_dump') else pipeline_config
319
+
320
+ if "processes" not in pipeline_dict:
321
+ raise ValueError(
322
+ "NetworkX executor requires 'processes' section in pipeline configuration. "
323
+ "Legacy 'main_flow' syntax is no longer supported. "
324
+ "Please use the NetworkX format with processes and steps."
325
+ )
326
+
327
+ networkx_config = parse_networkx_pipeline_from_config(pipeline_dict)
328
+
329
+ try:
330
+ project_id = getattr(getattr(self, "run_context", None), "project_id", None) or (
331
+ self.project_path.name if self.project_path else "default"
332
+ )
333
+ except Exception:
334
+ project_id = self.project_path.name if self.project_path else "default"
335
+
336
+ context_factory = get_context_factory()
337
+ global_cfg = _as_dict(self.config.parameters)
338
+ overrides = kwargs.get("global_config_overrides")
339
+ if isinstance(overrides, dict) and overrides:
340
+ global_cfg = {**global_cfg, **overrides}
341
+
342
+ context = context_factory.create_context(
343
+ project_id=project_id,
344
+ run_id=run_id,
345
+ tracker=tracker,
346
+ global_config=global_cfg,
347
+ data_paths=data_paths,
348
+ checkpoint_dir=self.project_path / "artifacts" / "checkpoints" if self.project_path else Path("artifacts/checkpoints")
349
+ )
350
+
351
+ set_current_context(context)
352
+
353
+ try:
354
+ resume_from_process = kwargs.get("resume_from_process")
355
+ config_hash = self.step_state_manager._compute_config_hash(pipeline_dict)
356
+
357
+ if resume_from_process and self.step_state_manager.can_resume_from_step(run_id, resume_from_process, config_hash):
358
+ self.logger.info(f"Resuming execution from process: {resume_from_process}")
359
+ context.step_results = self.step_state_manager.get_step_results(run_id)
360
+ else:
361
+ self.step_state_manager.start_pipeline_execution(run_id, pipeline_dict)
362
+
363
+ single_process = kwargs.get("single_process", False)
364
+ execution_results = self.networkx_executor.execute_graph(
365
+ networkx_config,
366
+ context,
367
+ run_id=run_id,
368
+ resume_from_process=resume_from_process,
369
+ stop_after_process=single_process,
370
+ )
371
+
372
+ results = {}
373
+
374
+ for process_name, process_result in execution_results.items():
375
+ if process_result.error is None: # success check for simplified result
376
+ if process_result.result:
377
+ result_dict = process_result.result
378
+ results[f"{process_name}_result"] = result_dict
379
+
380
+ if process_name == "model_training" and result_dict:
381
+ training_summary = {}
382
+ if 'model' in result_dict:
383
+ model = result_dict['model']
384
+ if hasattr(model, 'get_training_metrics'):
385
+ training_summary['model_metrics'] = model.get_training_metrics()
386
+ training_summary['model_type'] = type(model).__name__
387
+ results[f"{process_name}_summary"] = training_summary
388
+
389
+ elif process_name == "evaluate_model" and result_dict:
390
+ evaluation_summary = {}
391
+ if 'evaluation_metrics' in result_dict:
392
+ evaluation_summary['final_metrics'] = result_dict['evaluation_metrics']
393
+ if 'predictions' in result_dict:
394
+ evaluation_summary['prediction_count'] = len(result_dict['predictions'])
395
+ results[f"{process_name}_summary"] = evaluation_summary
396
+
397
+ else:
398
+ results[f"{process_name}_error"] = process_result.error
399
+ self.logger.error(f"Process {process_name} failed: {process_result.error}")
400
+
401
+ return results
402
+
403
+ finally:
404
+ set_current_context(None)
405
+
406
+ def run(self, data_paths: Dict[str, Path] | None = None, **kwargs) -> Dict[str, Any]:
407
+ """Run the pipeline once according to the configured processes/steps."""
408
+ run_id = kwargs.pop("run_id", f"run_{int(time.time())}")
409
+ tracker = kwargs.pop("tracker", self.tracker)
410
+ normalized_paths: Dict[str, Path] = data_paths or {}
411
+ return self._execute_step_graph(run_id=run_id, data_paths=normalized_paths, tracker=tracker, **kwargs)
412
+
413
+ def save_model(self, model_path: str, **kwargs) -> None:
414
+ """Save model artifacts (handled automatically by step system)."""
415
+ self.logger.info(f"Model artifacts will be saved automatically by the step system to: {model_path}")
416
+
417
+ def load_model(self, model_path: str, **kwargs) -> Any:
418
+ """Load model artifacts (handled automatically by step system)."""
419
+ self.logger.info(f"Model artifacts will be loaded automatically by the step system from: {model_path}")
420
+ return None
421
+
422
+
423
+
424
+ @classmethod
425
+ def validate_config(cls, config: AdapterConfig) -> bool:
426
+ """Validate the adapter configuration."""
427
+ try:
428
+ if not config.parameters.custom_script_path:
429
+ return False
430
+ if not config.parameters.pipeline:
431
+ return False
432
+ pipeline_dict = config.parameters.pipeline.model_dump() if hasattr(config.parameters.pipeline, 'model_dump') else config.parameters.pipeline
433
+ if "processes" not in pipeline_dict:
434
+ return False
435
+ return True
436
+ except Exception:
437
+ return False
438
+
439
+ def save(self, path: Path) -> None:
440
+ """Save adapter state."""
441
+ pass
442
+
443
+ def load(self, path: Path) -> None:
444
+ """Load adapter state."""
445
+ pass
446
+
447
+ Adapter = CustomModelAdapter
@@ -0,0 +1,113 @@
1
+ import importlib
2
+ import pkgutil
3
+ from pathlib import Path
4
+ from typing import Dict, Type, Optional
5
+ from .base import ModelAdapter, ModelConfig
6
+ from .config_schema import AdapterConfig
7
+
8
+ class AdapterPluginManager:
9
+ """Manages the discovery and loading of model adapters."""
10
+
11
+ def __init__(self):
12
+ self._adapters: Dict[str, Type[ModelAdapter]] = {}
13
+
14
+ def discover_adapters(self, package_path: str) -> None:
15
+ """Discover and load all adapter plugins in the given package.
16
+
17
+ Tries both the provided path and a 'src.'-prefixed or de-prefixed variant
18
+ to support source-tree layouts (src/mlops) and installed packages (mlops).
19
+
20
+ Args:
21
+ package_path: Dotted path to the package containing adapters
22
+ (e.g., 'mlops.adapters' or 'src.mlops.adapters').
23
+ """
24
+ candidate_paths = [package_path]
25
+ # Add fallback variants for common source layout differences
26
+ if package_path.startswith("src."):
27
+ candidate_paths.append(package_path[len("src."):])
28
+ else:
29
+ candidate_paths.append(f"src.{package_path}")
30
+
31
+ for pkg in candidate_paths:
32
+ try:
33
+ package = importlib.import_module(pkg)
34
+ except Exception:
35
+ continue
36
+ for _, name, is_pkg in pkgutil.iter_modules(package.__path__):
37
+ if not is_pkg:
38
+ continue
39
+ try:
40
+ module = importlib.import_module(f"{pkg}.{name}")
41
+ if hasattr(module, "Adapter"):
42
+ adapter_class = getattr(module, "Adapter")
43
+ if issubclass(adapter_class, ModelAdapter):
44
+ self._adapters[name] = adapter_class
45
+ except Exception as e:
46
+ print(f"Failed to load adapter {name}: {e}")
47
+
48
+ def get_adapter(self, name: str) -> Optional[Type[ModelAdapter]]:
49
+ """Get an adapter class by name.
50
+
51
+ Args:
52
+ name: Name of the adapter
53
+
54
+ Returns:
55
+ The adapter class if found, None otherwise
56
+ """
57
+ return self._adapters.get(name)
58
+
59
+ def list_adapters(self) -> Dict[str, Type[ModelAdapter]]:
60
+ """List all available adapters.
61
+
62
+ Returns:
63
+ Dictionary mapping adapter names to their classes
64
+ """
65
+ return self._adapters.copy()
66
+
67
+ def create_adapter(
68
+ self,
69
+ name: str,
70
+ config: AdapterConfig,
71
+ python_interpreter: Optional[str] = None,
72
+ environment_name: Optional[str] = None,
73
+ conda_env_name: Optional[str] = None,
74
+ project_path: Optional[Path] = None,
75
+ run_context: Optional[object] = None,
76
+ ) -> Optional[ModelAdapter]:
77
+ """
78
+ Create an adapter by name with the given configuration.
79
+
80
+ Args:
81
+ name: Name of the adapter.
82
+ config: Configuration for the adapter.
83
+ python_interpreter: Path to the python interpreter in the environment.
84
+ environment_name: Name of the environment (supports all types: conda, venv, etc.).
85
+ conda_env_name: Legacy parameter name for backward compatibility.
86
+ project_path: Path to the project directory for project-specific artifact storage.
87
+ run_context: Optional run-scoped context object (passed through when supported by the adapter).
88
+
89
+ Returns:
90
+ An instance of the adapter or None if not found.
91
+ """
92
+ if name not in self._adapters:
93
+ print(f"Adapter '{name}' not found.")
94
+ return None
95
+
96
+ adapter_class = self._adapters[name]
97
+
98
+ # Support both new and legacy parameter names
99
+ env_name = environment_name or conda_env_name
100
+
101
+ # Check if the adapter class supports optional parameters
102
+ import inspect
103
+ signature = inspect.signature(adapter_class.__init__)
104
+ kwargs = {
105
+ "python_interpreter": python_interpreter,
106
+ "environment_name": env_name,
107
+ "conda_env_name": env_name,
108
+ }
109
+ if "project_path" in signature.parameters:
110
+ kwargs["project_path"] = project_path
111
+ if "run_context" in signature.parameters:
112
+ kwargs["run_context"] = run_context
113
+ return adapter_class(config, **kwargs)
@@ -0,0 +1,3 @@
1
+ from .adapter import Adapter
2
+
3
+ __all__ = ["Adapter"]
@@ -0,0 +1,94 @@
1
+ from pathlib import Path
2
+ from typing import Any, Dict, Optional
3
+ import joblib
4
+ import numpy as np
5
+ from sklearn.base import BaseEstimator
6
+ from sklearn.model_selection import train_test_split
7
+ import importlib
8
+
9
+ from ..base import ModelAdapter
10
+ from ..config_schema import AdapterConfig
11
+
12
+ class Adapter(ModelAdapter):
13
+ """Adapter for scikit-learn models."""
14
+
15
+ def __init__(self, config: AdapterConfig, python_interpreter: Optional[str] = None, environment_name: Optional[str] = None, conda_env_name: Optional[str] = None):
16
+ super().__init__(config, python_interpreter=python_interpreter, environment_name=environment_name, conda_env_name=conda_env_name)
17
+ self.model: BaseEstimator = None
18
+
19
+ def initialize(self) -> None:
20
+ """Initialize the scikit-learn model."""
21
+ module_name, class_name_val = self.config.parameters.class_name.rsplit('.', 1)
22
+ try:
23
+ ModelClass = getattr(importlib.import_module(module_name), class_name_val)
24
+ except (ImportError, AttributeError) as e:
25
+ raise ValueError(f"Could not import class_name '{self.config.parameters.class_name}': {e}")
26
+
27
+ if not issubclass(ModelClass, BaseEstimator):
28
+ raise ValueError(f"class_name '{self.config.parameters.class_name}' must be a scikit-learn BaseEstimator.")
29
+
30
+ model_hyperparams = self.config.parameters.hyperparameters if self.config.parameters.hyperparameters is not None else {}
31
+ self.model = ModelClass(**model_hyperparams)
32
+
33
+ def run(self, data_paths: Dict[str, Path] | None = None, **kwargs) -> Dict[str, Any]:
34
+ """Run a minimal fit/evaluate loop depending on provided data paths."""
35
+ if self.model is None:
36
+ self.initialize()
37
+
38
+ data_paths = data_paths or {}
39
+ results: Dict[str, Any] = {}
40
+
41
+ def _load_xy(csv_path: Path):
42
+ import pandas as pd
43
+ df = pd.read_csv(csv_path)
44
+ if 'label' not in df.columns:
45
+ raise ValueError("Target column 'label' not found in the CSV.")
46
+ X = df.drop('label', axis=1).values
47
+ y = df['label'].values
48
+ return X, y
49
+
50
+ if 'training' in data_paths and data_paths['training'] is not None:
51
+ X, y = _load_xy(data_paths['training'])
52
+ self.model.fit(X, y)
53
+ results['model_type'] = self.model.__class__.__name__
54
+ results['parameters'] = self.model.get_params()
55
+
56
+ if 'validation' in data_paths and data_paths['validation'] is not None:
57
+ Xv, yv = _load_xy(data_paths['validation'])
58
+ results['validation_score'] = self.model.score(Xv, yv)
59
+
60
+ return results
61
+
62
+ def save(self, path: Path) -> None:
63
+ """Save the scikit-learn model."""
64
+ if self.model is None:
65
+ raise ValueError("No model to save")
66
+ path.mkdir(parents=True, exist_ok=True)
67
+ joblib.dump(self.model, path / "model.joblib")
68
+
69
+ def load(self, path: Path) -> None:
70
+ """Load the scikit-learn model."""
71
+ self.model = joblib.load(path / "model.joblib")
72
+
73
+ def predict(self, data: Any) -> Any:
74
+ """Make predictions using the scikit-learn model."""
75
+ if self.model is None:
76
+ raise ValueError("Model not initialized")
77
+ return self.model.predict(data)
78
+
79
+ @classmethod
80
+ def validate_config(cls, config: AdapterConfig) -> bool:
81
+ """Validate the configuration for this adapter."""
82
+ if not hasattr(config, 'parameters') or config.parameters is None:
83
+ print("Validation failed: config has no parameters attribute or it is None")
84
+ return False
85
+
86
+ if not hasattr(config.parameters, 'class_name') or not isinstance(config.parameters.class_name, str) or not config.parameters.class_name:
87
+ print(f"Validation failed: class_name issue. Has attr: {hasattr(config.parameters, 'class_name')}, Is str: {isinstance(config.parameters.class_name, str) if hasattr(config.parameters, 'class_name') else 'N/A'}, Value: {config.parameters.class_name if hasattr(config.parameters, 'class_name') else 'N/A'}")
88
+ return False
89
+
90
+ if not hasattr(config.parameters, 'hyperparameters') or not isinstance(config.parameters.hyperparameters, dict):
91
+ print(f"Validation failed: hyperparameters issue. Has attr: {hasattr(config.parameters, 'hyperparameters')}, Is dict: {isinstance(config.parameters.hyperparameters, dict) if hasattr(config.parameters, 'hyperparameters') else 'N/A'}")
92
+ return False
93
+
94
+ return True
@@ -0,0 +1,3 @@
1
+ # Cluster providers package
2
+
3
+