PyPI - expops - Versions diffs - 0.1.3__py3-none-any.whl - Mend

expops 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

expops-0.1.3.dist-info/METADATA +826 -0
expops-0.1.3.dist-info/RECORD +86 -0
expops-0.1.3.dist-info/WHEEL +5 -0
expops-0.1.3.dist-info/entry_points.txt +3 -0
expops-0.1.3.dist-info/licenses/LICENSE +674 -0
expops-0.1.3.dist-info/top_level.txt +1 -0
mlops/__init__.py +0 -0
mlops/__main__.py +11 -0
mlops/_version.py +34 -0
mlops/adapters/__init__.py +12 -0
mlops/adapters/base.py +86 -0
mlops/adapters/config_schema.py +89 -0
mlops/adapters/custom/__init__.py +3 -0
mlops/adapters/custom/custom_adapter.py +447 -0
mlops/adapters/plugin_manager.py +113 -0
mlops/adapters/sklearn/__init__.py +3 -0
mlops/adapters/sklearn/adapter.py +94 -0
mlops/cluster/__init__.py +3 -0
mlops/cluster/controller.py +496 -0
mlops/cluster/process_runner.py +91 -0
mlops/cluster/providers.py +258 -0
mlops/core/__init__.py +95 -0
mlops/core/custom_model_base.py +38 -0
mlops/core/dask_networkx_executor.py +1265 -0
mlops/core/executor_worker.py +1239 -0
mlops/core/experiment_tracker.py +81 -0
mlops/core/graph_types.py +64 -0
mlops/core/networkx_parser.py +135 -0
mlops/core/payload_spill.py +278 -0
mlops/core/pipeline_utils.py +162 -0
mlops/core/process_hashing.py +216 -0
mlops/core/step_state_manager.py +1298 -0
mlops/core/step_system.py +956 -0
mlops/core/workspace.py +99 -0
mlops/environment/__init__.py +10 -0
mlops/environment/base.py +43 -0
mlops/environment/conda_manager.py +307 -0
mlops/environment/factory.py +70 -0
mlops/environment/pyenv_manager.py +146 -0
mlops/environment/setup_env.py +31 -0
mlops/environment/system_manager.py +66 -0
mlops/environment/utils.py +105 -0
mlops/environment/venv_manager.py +134 -0
mlops/main.py +527 -0
mlops/managers/project_manager.py +400 -0
mlops/managers/reproducibility_manager.py +575 -0
mlops/platform.py +996 -0
mlops/reporting/__init__.py +16 -0
mlops/reporting/context.py +187 -0
mlops/reporting/entrypoint.py +292 -0
mlops/reporting/kv_utils.py +77 -0
mlops/reporting/registry.py +50 -0
mlops/runtime/__init__.py +9 -0
mlops/runtime/context.py +34 -0
mlops/runtime/env_export.py +113 -0
mlops/storage/__init__.py +12 -0
mlops/storage/adapters/__init__.py +9 -0
mlops/storage/adapters/gcp_kv_store.py +778 -0
mlops/storage/adapters/gcs_object_store.py +96 -0
mlops/storage/adapters/memory_store.py +240 -0
mlops/storage/adapters/redis_store.py +438 -0
mlops/storage/factory.py +199 -0
mlops/storage/interfaces/__init__.py +6 -0
mlops/storage/interfaces/kv_store.py +118 -0
mlops/storage/path_utils.py +38 -0
mlops/templates/premier-league/charts/plot_metrics.js +70 -0
mlops/templates/premier-league/charts/plot_metrics.py +145 -0
mlops/templates/premier-league/charts/requirements.txt +6 -0
mlops/templates/premier-league/configs/cluster_config.yaml +13 -0
mlops/templates/premier-league/configs/project_config.yaml +207 -0
mlops/templates/premier-league/data/England CSV.csv +12154 -0
mlops/templates/premier-league/models/premier_league_model.py +638 -0
mlops/templates/premier-league/requirements.txt +8 -0
mlops/templates/sklearn-basic/README.md +22 -0
mlops/templates/sklearn-basic/charts/plot_metrics.py +85 -0
mlops/templates/sklearn-basic/charts/requirements.txt +3 -0
mlops/templates/sklearn-basic/configs/project_config.yaml +64 -0
mlops/templates/sklearn-basic/data/train.csv +14 -0
mlops/templates/sklearn-basic/models/model.py +62 -0
mlops/templates/sklearn-basic/requirements.txt +10 -0
mlops/web/__init__.py +3 -0
mlops/web/server.py +585 -0
mlops/web/ui/index.html +52 -0
mlops/web/ui/mlops-charts.js +357 -0
mlops/web/ui/script.js +1244 -0
mlops/web/ui/styles.css +248 -0

mlops/storage/adapters/gcs_object_store.py ADDED Viewed

@@ -0,0 +1,96 @@
+from __future__ import annotations
+from typing import Optional
+from ..interfaces.kv_store import ObjectStore
+class GCSObjectStore(ObjectStore):
+    """Google Cloud Storage implementation of ObjectStore.
+    URIs use the form gs://bucket/path/to/object
+    The instance is initialized with a default bucket and optional prefix.
+    If a provided uri already includes gs://bucket, that bucket is used.
+    """
+    def __init__(self, bucket: str, prefix: Optional[str] = None) -> None:
+        from google.cloud import storage  # type: ignore
+        self._client = storage.Client()
+        self._bucket = self._client.bucket(bucket)
+        self._prefix = prefix.strip("/") if prefix else None
+    # ----- helpers -----
+    def _split_gs_uri(self, uri: str) -> tuple[str, str]:
+        assert uri.startswith("gs://"), f"Not a GCS URI: {uri}"
+        without = uri[len("gs://") :]
+        parts = without.split("/", 1)
+        bucket = parts[0]
+        key = parts[1] if len(parts) > 1 else ""
+        return bucket, key
+    def _blob_for(self, uri_or_key: str):
+        if uri_or_key.startswith("gs://"):
+            bkt, key = self._split_gs_uri(uri_or_key)
+            return self._client.bucket(bkt).blob(key)
+        key = uri_or_key.lstrip("/")
+        if self._prefix:
+            if not key:
+                key = self._prefix
+            elif key == self._prefix or key.startswith(f"{self._prefix}/"):
+                pass
+            else:
+                key = f"{self._prefix}/{key}"
+        return self._bucket.blob(key)
+    def put_bytes(self, uri: str, data: bytes, content_type: Optional[str] = None) -> None:
+        blob = self._blob_for(uri)
+        blob.upload_from_string(data, content_type=content_type)
+    def get_bytes(self, uri: str) -> bytes:
+        blob = self._blob_for(uri)
+        return blob.download_as_bytes()
+    def put_file(self, uri: str, file_path: str, content_type: Optional[str] = None) -> None:
+        blob = self._blob_for(uri)
+        # Use streaming upload directly from filename to avoid large in-memory buffers
+        blob.upload_from_filename(file_path, content_type=content_type)
+    def exists(self, uri: str) -> bool:
+        blob = self._blob_for(uri)
+        return bool(blob.exists())
+    def build_uri(self, *parts: str) -> str:
+        if not parts:
+            return f"gs://{self._bucket.name}/{self._prefix}" if self._prefix else f"gs://{self._bucket.name}"
+        # If first part is already a gs:// prefix, treat rest as path
+        if parts[0].startswith("gs://"):
+            base = parts[0].rstrip("/")
+            rest = "/".join([p.strip("/") for p in parts[1:]])
+            return f"{base}/{rest}" if rest else base
+        key = "/".join([p.strip("/") for p in parts])
+        if self._prefix:
+            # Avoid double-prefix if caller-provided parts already start with the prefix
+            if not key:
+                key = self._prefix
+            elif key == self._prefix or key.startswith(f"{self._prefix}/"):
+                pass
+            else:
+                key = f"{self._prefix}/{key}"
+        return f"gs://{self._bucket.name}/{key}"
+    # --------- pickling support (avoid shipping live clients) ---------
+    def __getstate__(self) -> dict:
+        return {
+            "_bucket_name": getattr(self._bucket, "name", None),
+            "_prefix": self._prefix,
+        }
+    def __setstate__(self, state: dict) -> None:
+        from google.cloud import storage  # type: ignore
+        self._prefix = state.get("_prefix")
+        bucket_name = state.get("_bucket_name")
+        self._client = storage.Client()
+        self._bucket = self._client.bucket(bucket_name) if bucket_name else None

mlops/storage/adapters/memory_store.py ADDED Viewed

@@ -0,0 +1,240 @@
+from __future__ import annotations
+from typing import Any, Dict, Optional
+import numbers
+import json
+import time
+from ..interfaces.kv_store import KeyValueEventStore
+from ..path_utils import encode_probe_path
+class InMemoryStore(KeyValueEventStore):
+    """Simple in-memory implementation for dev/tests.
+    Not persistent and no real pub/sub; events are appended to a list.
+    """
+    def __init__(self, project_id: str) -> None:
+        self.project_id = project_id
+        self._kv: Dict[str, Any] = {}
+        self._events: list[Dict[str, Any]] = []
+    # Helpers
+    def _json_set(self, key: str, value: Dict[str, Any]) -> None:
+        self._kv[key] = json.loads(json.dumps(value, default=str))
+    def _json_get(self, key: str) -> Optional[Dict[str, Any]]:
+        val = self._kv.get(key)
+        if val is None:
+            return None
+        return json.loads(json.dumps(val))
+    # Cache indices
+    def set_step_cache_record(self, process_name: str, step_name: str, input_hash: str, config_hash: str,
+                               function_hash: Optional[str], record: Dict[str, Any], ttl_seconds: Optional[int] = None) -> None:
+        key = f"steps:{process_name}:{step_name}:{input_hash}:{config_hash}:{function_hash or 'none'}"
+        self._json_set(key, record)
+    def get_step_cache_path(self, process_name: str, step_name: str, input_hash: Optional[str], config_hash: Optional[str], function_hash: Optional[str]) -> Optional[str]:
+        if not input_hash or not config_hash:
+            return None
+        key = f"steps:{process_name}:{step_name}:{input_hash}:{config_hash}:{function_hash or 'none'}"
+        rec = self._json_get(key)
+        if rec and rec.get("status") in ("completed", "cached") and rec.get("cache_path"):
+            return rec["cache_path"]
+        return None
+    def get_step_cache_record(self, process_name: str, step_name: str, input_hash: Optional[str], config_hash: Optional[str], function_hash: Optional[str]) -> Optional[Dict[str, Any]]:
+        if not input_hash or not config_hash:
+            return None
+        key = f"steps:{process_name}:{step_name}:{input_hash}:{config_hash}:{function_hash or 'none'}"
+        return self._json_get(key)
+    def set_process_cache_record(self, process_name: str, input_hash: str, config_hash: str, function_hash: Optional[str], record: Dict[str, Any], ttl_seconds: Optional[int] = None) -> None:
+        key = f"process:{process_name}:{input_hash}:{config_hash}:{function_hash or 'none'}"
+        self._json_set(key, record)
+    def get_process_cache_path(self, process_name: str, input_hash: Optional[str], config_hash: Optional[str], function_hash: Optional[str]) -> Optional[str]:
+        if not input_hash or not config_hash:
+            return None
+        key = f"process:{process_name}:{input_hash}:{config_hash}:{function_hash or 'none'}"
+        rec = self._json_get(key)
+        if rec and rec.get("status") in ("completed", "cached") and rec.get("cache_path"):
+            return rec["cache_path"]
+        return None
+    def get_process_cache_record(self, process_name: str, input_hash: Optional[str], config_hash: Optional[str], function_hash: Optional[str]) -> Optional[Dict[str, Any]]:
+        if not input_hash or not config_hash:
+            return None
+        key = f"process:{process_name}:{input_hash}:{config_hash}:{function_hash or 'none'}"
+        return self._json_get(key)
+    def get_process_cache_paths_batch(self, lookups: list[tuple[str, Optional[str], Optional[str], Optional[str]]]) -> dict[str, Optional[str]]:
+        """In-memory batched lookup by iterating local dict; returns composite-key map."""
+        out: dict[str, Optional[str]] = {}
+        for process_name, ih, ch, fh in lookups or []:
+            fhash = (fh or 'none') if (ih and ch) else (fh or 'none')
+            comp = f"{process_name}|{ih}|{ch}|{fhash}"
+            if not ih or not ch:
+                out[comp] = None
+                continue
+            key = f"process:{process_name}:{ih}:{ch}:{fhash}"
+            rec = self._json_get(key)
+            if rec and rec.get("status") in ("completed", "cached") and rec.get("cache_path"):
+                out[comp] = rec.get("cache_path")
+            else:
+                out[comp] = None
+        return out
+    # Run lifecycle + metrics
+    def mark_pipeline_started(self, run_id: str) -> None:
+        self._kv[f"runs:{run_id}:status"] = "running"
+        self._json_set(f"runs:{run_id}:timestamps", {"start": time.time(), "end": None})
+        self.publish_event({"type": "pipeline.started", "run_id": run_id, "status": "running"})
+    def mark_pipeline_completed(self, run_id: str, success: bool) -> None:
+        self._kv[f"runs:{run_id}:status"] = "completed" if success else "failed"
+        self._json_set(f"runs:{run_id}:timestamps", {"start": None, "end": time.time()})
+        self.publish_event({"type": "pipeline.completed", "run_id": run_id, "status": self._kv[f'runs:{run_id}:status']})
+    # Events
+    def publish_event(self, event: Dict[str, Any]) -> None:
+        self._events.append(json.loads(json.dumps(event, default=str)))
+    def get_run_status(self, run_id: str) -> Optional[str]:
+        status = self._kv.get(f"runs:{run_id}:status")
+        if status is None:
+            return None
+        if isinstance(status, (bytes, bytearray)):
+            try:
+                status = status.decode()
+            except Exception:
+                return None
+        return str(status).lower() if isinstance(status, str) else None
+    # Per-run step bookkeeping
+    def record_run_step(self, run_id: str, process_name: str, step_name: str, record: Dict[str, Any]) -> None:
+        self._json_set(f"runs:{run_id}:steps:{process_name}:{step_name}", record)
+    def list_run_steps(self, run_id: str) -> Dict[str, Dict[str, Any]]:
+        prefix = f"runs:{run_id}:steps:"
+        out: Dict[str, Dict[str, Any]] = {}
+        for key, val in self._kv.items():
+            if isinstance(key, str) and key.startswith(prefix):
+                _, _, _, process, step = key.split(":", 4)
+                out[f"{process}.{step}"] = self._json_get(key) or {}
+        return out
+    # Stats
+    def increment_stat(self, run_id: str, name: str, amount: int = 1) -> None:
+        hkey = f"runs:{run_id}:stats:{name}"
+        self._kv[hkey] = int(self._kv.get(hkey, 0)) + amount
+    def get_pipeline_stats(self, run_id: str) -> Dict[str, Any]:
+        prefix = f"runs:{run_id}:stats:"
+        return { key[len(prefix):]: int(val) for key, val in self._kv.items() if isinstance(key, str) and key.startswith(prefix) }
+    # Charts index
+    def record_run_chart_artifacts(self, run_id: str, chart_name: str, artifacts: list[dict[str, Any]]) -> None:
+        idx_key = f"runs:{run_id}:charts:{chart_name}"
+        self._json_set(idx_key, {"items": artifacts})
+    def list_run_charts(self, run_id: str) -> Dict[str, Any]:
+        # Debug trace
+        try:
+            import logging as _logging
+            _logging.getLogger(__name__).info(f"[InMemoryStore] list_run_charts(run_id={run_id})")
+        except Exception:
+            pass
+        prefix = f"runs:{run_id}:charts:"
+        out: Dict[str, Any] = {}
+        for key, val in self._kv.items():
+            if isinstance(key, str) and key.startswith(prefix):
+                name = key[len(prefix):]
+                data = self._json_get(key) or {}
+                items = data.get("items", [])
+                # Derive chart type from first item's chart_type if available
+                ctype = None
+                try:
+                    if isinstance(items, list) and items and isinstance(items[0], dict):
+                        ctype = items[0].get("chart_type")
+                except Exception:
+                    ctype = None
+                out[name] = {"type": (ctype or "static"), "items": items}
+        try:
+            import logging as _logging
+            _logging.getLogger(__name__).info(f"[InMemoryStore] list_run_charts -> {list(out.keys())}")
+        except Exception:
+            pass
+        return out
+    def copy_run_chart_artifacts(self, from_run_id: str, to_run_id: str, chart_name: str) -> bool:
+        """Copy chart artifacts from one run to another.
+        Args:
+            from_run_id: Source run ID
+            to_run_id: Destination run ID
+            chart_name: Name of the chart to copy
+        Returns:
+            True if copy was successful, False otherwise
+        """
+        try:
+            # Read chart artifacts from source run
+            from_key = f"runs:{from_run_id}:charts:{chart_name}"
+            from_data = self._json_get(from_key)
+            if not from_data:
+                try:
+                    import logging as _logging
+                    _logging.getLogger(__name__).info(f"[InMemoryStore] copy_run_chart_artifacts: chart {chart_name} not found in run {from_run_id}")
+                except Exception:
+                    pass
+                return False
+            # Write to destination run
+            to_key = f"runs:{to_run_id}:charts:{chart_name}"
+            self._json_set(to_key, from_data)
+            try:
+                import logging as _logging
+                _logging.getLogger(__name__).info(f"[InMemoryStore] copy_run_chart_artifacts: copied chart {chart_name} from {from_run_id} to {to_run_id}")
+            except Exception:
+                pass
+            return True
+        except Exception as e:
+            try:
+                import logging as _logging
+                _logging.getLogger(__name__).warning(f"[InMemoryStore] copy_run_chart_artifacts failed: {e}")
+            except Exception:
+                pass
+            return False
+    def save_probe_metrics_by_path(self, run_id: str, probe_path: str, metrics: Dict[str, Any]) -> None:
+        enc = encode_probe_path(probe_path)
+        self._json_set(f"metric:{run_id}:probe_path:{enc}", metrics)
+        try:
+            self.publish_event({"type": "probe_metrics.updated", "run_id": run_id, "probe_path": probe_path, "metrics": metrics})
+        except Exception:
+            pass
+    def get_probe_metrics_by_path(self, run_id: str, probe_path: str) -> Dict[str, Any]:
+        enc = encode_probe_path(probe_path)
+        return self._json_get(f"metric:{run_id}:probe_path:{enc}") or {}
+    # Run listing (for UI)
+    def list_runs(self, limit: int = 100) -> list[str]:
+        prefix = "runs:"
+        ids: list[str] = []
+        for key in self._kv.keys():
+            if isinstance(key, str) and key.startswith(prefix) and key.endswith(":status"):
+                rid = key[len(prefix):-len(":status")]
+                ids.append(rid)
+        # Return insertion order approximation
+        return ids[:limit]