PyPI - expops - Versions diffs - 0.1.3__py3-none-any.whl - Mend

expops 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

expops-0.1.3.dist-info/METADATA +826 -0
expops-0.1.3.dist-info/RECORD +86 -0
expops-0.1.3.dist-info/WHEEL +5 -0
expops-0.1.3.dist-info/entry_points.txt +3 -0
expops-0.1.3.dist-info/licenses/LICENSE +674 -0
expops-0.1.3.dist-info/top_level.txt +1 -0
mlops/__init__.py +0 -0
mlops/__main__.py +11 -0
mlops/_version.py +34 -0
mlops/adapters/__init__.py +12 -0
mlops/adapters/base.py +86 -0
mlops/adapters/config_schema.py +89 -0
mlops/adapters/custom/__init__.py +3 -0
mlops/adapters/custom/custom_adapter.py +447 -0
mlops/adapters/plugin_manager.py +113 -0
mlops/adapters/sklearn/__init__.py +3 -0
mlops/adapters/sklearn/adapter.py +94 -0
mlops/cluster/__init__.py +3 -0
mlops/cluster/controller.py +496 -0
mlops/cluster/process_runner.py +91 -0
mlops/cluster/providers.py +258 -0
mlops/core/__init__.py +95 -0
mlops/core/custom_model_base.py +38 -0
mlops/core/dask_networkx_executor.py +1265 -0
mlops/core/executor_worker.py +1239 -0
mlops/core/experiment_tracker.py +81 -0
mlops/core/graph_types.py +64 -0
mlops/core/networkx_parser.py +135 -0
mlops/core/payload_spill.py +278 -0
mlops/core/pipeline_utils.py +162 -0
mlops/core/process_hashing.py +216 -0
mlops/core/step_state_manager.py +1298 -0
mlops/core/step_system.py +956 -0
mlops/core/workspace.py +99 -0
mlops/environment/__init__.py +10 -0
mlops/environment/base.py +43 -0
mlops/environment/conda_manager.py +307 -0
mlops/environment/factory.py +70 -0
mlops/environment/pyenv_manager.py +146 -0
mlops/environment/setup_env.py +31 -0
mlops/environment/system_manager.py +66 -0
mlops/environment/utils.py +105 -0
mlops/environment/venv_manager.py +134 -0
mlops/main.py +527 -0
mlops/managers/project_manager.py +400 -0
mlops/managers/reproducibility_manager.py +575 -0
mlops/platform.py +996 -0
mlops/reporting/__init__.py +16 -0
mlops/reporting/context.py +187 -0
mlops/reporting/entrypoint.py +292 -0
mlops/reporting/kv_utils.py +77 -0
mlops/reporting/registry.py +50 -0
mlops/runtime/__init__.py +9 -0
mlops/runtime/context.py +34 -0
mlops/runtime/env_export.py +113 -0
mlops/storage/__init__.py +12 -0
mlops/storage/adapters/__init__.py +9 -0
mlops/storage/adapters/gcp_kv_store.py +778 -0
mlops/storage/adapters/gcs_object_store.py +96 -0
mlops/storage/adapters/memory_store.py +240 -0
mlops/storage/adapters/redis_store.py +438 -0
mlops/storage/factory.py +199 -0
mlops/storage/interfaces/__init__.py +6 -0
mlops/storage/interfaces/kv_store.py +118 -0
mlops/storage/path_utils.py +38 -0
mlops/templates/premier-league/charts/plot_metrics.js +70 -0
mlops/templates/premier-league/charts/plot_metrics.py +145 -0
mlops/templates/premier-league/charts/requirements.txt +6 -0
mlops/templates/premier-league/configs/cluster_config.yaml +13 -0
mlops/templates/premier-league/configs/project_config.yaml +207 -0
mlops/templates/premier-league/data/England CSV.csv +12154 -0
mlops/templates/premier-league/models/premier_league_model.py +638 -0
mlops/templates/premier-league/requirements.txt +8 -0
mlops/templates/sklearn-basic/README.md +22 -0
mlops/templates/sklearn-basic/charts/plot_metrics.py +85 -0
mlops/templates/sklearn-basic/charts/requirements.txt +3 -0
mlops/templates/sklearn-basic/configs/project_config.yaml +64 -0
mlops/templates/sklearn-basic/data/train.csv +14 -0
mlops/templates/sklearn-basic/models/model.py +62 -0
mlops/templates/sklearn-basic/requirements.txt +10 -0
mlops/web/__init__.py +3 -0
mlops/web/server.py +585 -0
mlops/web/ui/index.html +52 -0
mlops/web/ui/mlops-charts.js +357 -0
mlops/web/ui/script.js +1244 -0
mlops/web/ui/styles.css +248 -0

mlops/core/pipeline_utils.py ADDED Viewed

@@ -0,0 +1,162 @@
+from __future__ import annotations
+from typing import Dict, List, Optional, Any
+from pathlib import Path
+def _load_project_config(project_dir: Path | str, project_id: str) -> Dict[str, Any]:
+    import yaml  # Local import to avoid import-time dependency if unused
+    project_dir = Path(project_dir).resolve()
+    config_path = project_dir / "projects" / project_id / "configs" / "project_config.yaml"
+    with open(config_path, "r", encoding="utf-8") as f:
+        return yaml.safe_load(f) or {}
+def _get_pipeline_config(cfg: Dict[str, Any]) -> Dict[str, Any]:
+    return (cfg.get("model", {}).get("parameters", {}).get("pipeline", {}) or {})
+def _parse_processes_from_pipeline(pipeline_config: Dict[str, Any]) -> List[str]:
+    processes: List[str] = []
+    # From explicit processes list
+    for p in pipeline_config.get("processes", []) or []:
+        name = p.get("name")
+        if name and name not in processes:
+            processes.append(name)
+    # From adjacency list (NetworkX-like string or list)
+    for src, tgt in _iter_adjlist_edges(pipeline_config.get("process_adjlist")):
+        if src and src not in processes:
+            processes.append(src)
+        if tgt and tgt not in processes:
+            processes.append(tgt)
+    return processes
+def _iter_adjlist_edges(adjlist: Any) -> List[tuple[str, str]]:
+    """Parse a NetworkX-style adjacency list into directed edges (src, tgt)."""
+    lines: List[str] = []
+    if isinstance(adjlist, str):
+        lines = adjlist.splitlines()
+    elif isinstance(adjlist, list):
+        lines = [str(x) for x in adjlist]
+    edges: List[tuple[str, str]] = []
+    for raw in lines:
+        line = str(raw).strip()
+        if not line:
+            continue
+        if "#" in line:
+            line = line.split("#", 1)[0].strip()
+        if not line:
+            continue
+        parts = line.split()
+        if len(parts) < 2:
+            # No outgoing edges on this line
+            continue
+        src = parts[0]
+        for tgt in parts[1:]:
+            edges.append((src, tgt))
+    return edges
+def _build_process_adjacency(pipeline_config: Dict[str, Any]) -> Dict[str, List[str]]:
+    processes = _parse_processes_from_pipeline(pipeline_config)
+    adj: Dict[str, List[str]] = {p: [] for p in processes}
+    # From explicit processes depends_on
+    for p in pipeline_config.get("processes", []) or []:
+        name = p.get("name")
+        deps = p.get("depends_on", []) or []
+        for dep in deps:
+            adj.setdefault(dep, [])
+            if name not in adj[dep]:
+                adj[dep].append(name)
+    # From adjacency list
+    for src, tgt in _iter_adjlist_edges(pipeline_config.get("process_adjlist")):
+        adj.setdefault(src, [])
+        if tgt not in adj[src]:
+            adj[src].append(tgt)
+    return adj
+def parse_networkx_config_from_project(project_dir: Path | str, project_id: str) -> Dict[str, Any]:
+    """Return a lightweight parsed view: {processes: [names], adj: {u:[v,...]}, steps_by_process: {proc:[step_names]}}"""
+    cfg = _load_project_config(project_dir, project_id)
+    pipeline_cfg = _get_pipeline_config(cfg)
+    processes = _parse_processes_from_pipeline(pipeline_cfg)
+    adj = _build_process_adjacency(pipeline_cfg)
+    # Manual-step mode: do not consider configured or auto-discovered steps
+    steps_by_process: Dict[str, List[str]] = {p: [] for p in processes}
+    return {
+        "processes": processes,
+        "adj": adj,
+        "steps_by_process": steps_by_process,
+        "global_config": cfg.get("model", {}).get("parameters", {}) or {},
+    }
+def get_process_graph_summary(config_like: Dict[str, Any]) -> Dict[str, Any]:
+    processes: List[str] = list(config_like.get("processes", []) or [])
+    adj: Dict[str, List[str]] = dict(config_like.get("adj", {}) or {})
+    node_set = set(processes)
+    for u, vs in adj.items():
+        node_set.add(u)
+        for v in vs:
+            node_set.add(v)
+    nodes = list(node_set)
+    indeg: Dict[str, int] = {n: 0 for n in nodes}
+    for u, vs in adj.items():
+        for v in vs:
+            indeg[v] = indeg.get(v, 0) + 1
+    return {"nodes": nodes, "adj": adj, "indeg": indeg}
+def get_process_graph_summary_from_project(project_dir: Path | str, project_id: str) -> Dict[str, Any]:
+    config_like = parse_networkx_config_from_project(project_dir, project_id)
+    return get_process_graph_summary(config_like)
+def setup_environment_and_write_interpreter(
+    project_dir: Path | str,
+    project_id: str,
+    env_file: Path | str,
+) -> str:
+    # Use a relative import to work whether invoked as `mlops.*` or `src.mlops.*`
+    from ..managers.reproducibility_manager import ReproducibilityManager
+    project_dir = Path(project_dir).resolve()
+    env_file = Path(env_file)
+    config_path = project_dir / "projects" / project_id / "configs" / "project_config.yaml"
+    rm = ReproducibilityManager(str(config_path), project_path=project_dir / "projects" / project_id)
+    cfg = rm.config or {}
+    env_cfg = cfg.get("environment", {}) if isinstance(cfg.get("environment", {}), dict) else {}
+    if "venv" in env_cfg:
+        vcfg = env_cfg.get("venv") or {}
+        if not isinstance(vcfg, dict):
+            vcfg = {}
+        if not vcfg.get("name"):
+            vcfg["name"] = project_id
+        env_cfg["venv"] = vcfg
+        cfg["environment"] = env_cfg
+        rm.config = cfg
+    rm.setup_environment()
+    py = rm.python_interpreter
+    Path(env_file).write_text(py)
+    return py

mlops/core/process_hashing.py ADDED Viewed

@@ -0,0 +1,216 @@
+from __future__ import annotations
+from typing import Any, Optional
+def compute_process_hashes(
+    state_manager: Any,
+    context: Any,
+    process_name: str,
+    dependency_map: dict[str, list[str]],
+    lookup_name: Optional[str] = None,
+) -> tuple[Optional[str], Optional[str], Optional[str]]:
+    """Compute (input_hash, config_hash, function_hash) deterministically for a process.
+    Determinism requirements:
+    - Predecessors are traversed using a sorted order
+    - Upstream signatures are dictionaries with stable key order (sorted by name)
+    - Config hashing uses a filtered, ordered payload
+    - Function hash is augmented with nested step AST and referenced step function hashes
+    """
+    try:
+        from .step_system import get_process_registry, get_step_registry
+    except Exception:
+        get_process_registry = None  # type: ignore[assignment]
+        get_step_registry = None  # type: ignore[assignment]
+    # Build a stable mapping of configured process names -> code function names (registry keys)
+    _lookup_map: dict[str, str] = {}
+    try:
+        global_cfg = getattr(context, "global_config", {}) or {}
+        pipeline_cfg = (global_cfg.get("pipeline", {}) or {}) if isinstance(global_cfg, dict) else {}
+        for p in (pipeline_cfg.get("processes", []) or []):
+            if not isinstance(p, dict):
+                continue
+            name = p.get("name")
+            code_fn = p.get("code_function")
+            if name and code_fn:
+                _lookup_map[str(name)] = str(code_fn)
+    except Exception:
+        _lookup_map = {}
+    def _filtered_global_settings(gc: Any) -> dict[str, Any]:
+        try:
+            if not isinstance(gc, dict):
+                return {}
+            return {k: v for k, v in gc.items() if k not in ("pipeline", "project_config_file_hash")}
+        except Exception:
+            return {}
+    # 1) Build upstream_signatures recursively (ih/ch/fh) using dependency_map
+    def _sorted_preds(name: str) -> list[str]:
+        try:
+            preds = list(dependency_map.get(name, []) or [])
+            preds = sorted(set(preds))
+            return preds
+        except Exception:
+            return []
+    def _sig_for(up_proc: str) -> dict[str, Optional[str]]:
+        ih_u, ch_u, fh_u = _compute_for(up_proc)
+        return {"ih": ih_u, "ch": ch_u, "fh": fh_u}
+    memo: dict[str, tuple[Optional[str], Optional[str], Optional[str]]] = {}
+    def _compute_for(name: str) -> tuple[Optional[str], Optional[str], Optional[str]]:
+        cached = memo.get(name)
+        if cached is not None:
+            return cached
+        # Recursively compute signature for an upstream process
+        try:
+            upstream_signatures: dict[str, dict[str, Optional[str]]] = {}
+            for p in _sorted_preds(name):
+                try:
+                    upstream_signatures[p] = _sig_for(p)
+                except Exception:
+                    continue
+            input_surface = {
+                "global_config_keys": sorted(list((getattr(context, "global_config", {}) or {}).keys())),
+                "project_id": getattr(context, "project_id", None),
+                "upstream_signatures": {k: upstream_signatures[k] for k in sorted(upstream_signatures.keys())},
+            }
+            ih = state_manager._compute_hash(input_surface) if state_manager else None
+        except Exception:
+            ih = None
+        try:
+            global_config = getattr(context, "global_config", {}) or {}
+            # Build process-scoped config hash: (global_without_pipeline, current process hyperparameters, process name)
+            process_hparams: dict[str, Any] = {}
+            try:
+                pipeline_cfg = global_config.get("pipeline", {}) if isinstance(global_config, dict) else {}
+                for proc_cfg in (pipeline_cfg.get("processes", []) or []):
+                    if isinstance(proc_cfg, dict) and proc_cfg.get("name") == name:
+                        maybe = proc_cfg.get("hyperparameters", {}) or {}
+                        process_hparams = dict(maybe) if isinstance(maybe, dict) else {}
+                        break
+            except Exception:
+                process_hparams = {}
+            # Exclude the pipeline graph and synthetic full-config hash to avoid global invalidations
+            enhanced_config = {
+                "global_config": _filtered_global_settings(global_config),
+                "process_hyperparameters": process_hparams,
+                "process_name": name,
+            }
+            ch = state_manager._compute_hash(enhanced_config) if state_manager else None
+        except Exception:
+            try:
+                # Last resort fallback: minimal global-only hash without pipeline
+                minimal = _filtered_global_settings(getattr(context, "global_config", {}) or {})
+                ch = state_manager._compute_hash(minimal) if state_manager else None
+            except Exception:
+                ch = None
+        try:
+            pr = get_process_registry() if callable(get_process_registry) else None
+            _node_lookup = _lookup_map.get(name)
+            if not _node_lookup and name == process_name:
+                _node_lookup = lookup_name
+            pdef = pr.get_process(_node_lookup or name) if pr else None
+            orig_fn = getattr(pdef, "original_func", None) if pdef else None
+            fh = state_manager._compute_function_hash(orig_fn or getattr(pdef, "runner", None)) if (state_manager and pdef) else None
+            try:
+                sr = get_step_registry() if callable(get_step_registry) else None
+                used_step_names = set()
+                try:
+                    import inspect as _inspect, ast as _ast
+                    src = _inspect.getsource(orig_fn or getattr(pdef, "runner", None)) if pdef else ""
+                    tree = _ast.parse(src) if src else None
+                    class _CallVisitor(_ast.NodeVisitor):
+                        def __init__(self):
+                            self.names = set()
+                        def visit_Call(self, node):
+                            try:
+                                if isinstance(node.func, _ast.Name):
+                                    self.names.add(node.func.id)
+                                elif isinstance(node.func, _ast.Attribute):
+                                    self.names.add(node.func.attr)
+                            except Exception:
+                                pass
+                            self.generic_visit(node)
+                    class _NestedStepVisitor(_ast.NodeVisitor):
+                        def __init__(self):
+                            self.func_nodes = {}
+                        def visit_FunctionDef(self, node):
+                            try:
+                                has_step = False
+                                for deco in (node.decorator_list or []):
+                                    if isinstance(deco, _ast.Name) and deco.id == "step":
+                                        has_step = True
+                                    elif isinstance(deco, _ast.Call) and isinstance(deco.func, _ast.Name) and deco.func.id == "step":
+                                        has_step = True
+                                if has_step and isinstance(node.name, str):
+                                    self.func_nodes[node.name] = node
+                            except Exception:
+                                pass
+                            self.generic_visit(node)
+                    nv = None
+                    if tree is not None:
+                        cv = _CallVisitor()
+                        cv.visit(tree)
+                        used_step_names = set(cv.names or set())
+                        nv = _NestedStepVisitor()
+                        nv.visit(tree)
+                except Exception:
+                    used_step_names = set()
+                    nv = None
+                step_hashes: dict[str, str] = {}
+                # 1) Nested steps (AST)
+                try:
+                    import ast as _ast
+                    if nv and getattr(nv, "func_nodes", None):
+                        for _nm in sorted(nv.func_nodes.keys()):
+                            try:
+                                _node = nv.func_nodes[_nm]
+                                normalized = _ast.dump(_node, annotate_fields=True, include_attributes=False)
+                                s_hash = state_manager._compute_hash({"ast": normalized}) if state_manager else None
+                                if s_hash:
+                                    step_hashes[_nm] = s_hash
+                            except Exception:
+                                continue
+                except Exception:
+                    pass
+                for _nm in sorted(list(used_step_names)):
+                    if _nm in step_hashes:
+                        continue
+                    try:
+                        sdef = sr.get_step(_nm) if sr else None
+                        if sdef is not None:
+                            s_orig = getattr(sdef, "original_func", None) or getattr(sdef, "func", None)
+                            s_hash = state_manager._compute_function_hash(s_orig) if (state_manager and s_orig) else None
+                            if s_hash:
+                                step_hashes[_nm] = s_hash
+                    except Exception:
+                        continue
+                if step_hashes and fh:
+                    # Stable combination by sorting keys
+                    ordered = {k: step_hashes[k] for k in sorted(step_hashes.keys())}
+                    fh = state_manager._compute_hash({"proc": fh, "steps": ordered})
+            except Exception:
+                pass
+        except Exception:
+            fh = None
+        out = (ih, ch, fh)
+        memo[name] = out
+        return out
+    return _compute_for(process_name)