expops 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. expops-0.1.3.dist-info/METADATA +826 -0
  2. expops-0.1.3.dist-info/RECORD +86 -0
  3. expops-0.1.3.dist-info/WHEEL +5 -0
  4. expops-0.1.3.dist-info/entry_points.txt +3 -0
  5. expops-0.1.3.dist-info/licenses/LICENSE +674 -0
  6. expops-0.1.3.dist-info/top_level.txt +1 -0
  7. mlops/__init__.py +0 -0
  8. mlops/__main__.py +11 -0
  9. mlops/_version.py +34 -0
  10. mlops/adapters/__init__.py +12 -0
  11. mlops/adapters/base.py +86 -0
  12. mlops/adapters/config_schema.py +89 -0
  13. mlops/adapters/custom/__init__.py +3 -0
  14. mlops/adapters/custom/custom_adapter.py +447 -0
  15. mlops/adapters/plugin_manager.py +113 -0
  16. mlops/adapters/sklearn/__init__.py +3 -0
  17. mlops/adapters/sklearn/adapter.py +94 -0
  18. mlops/cluster/__init__.py +3 -0
  19. mlops/cluster/controller.py +496 -0
  20. mlops/cluster/process_runner.py +91 -0
  21. mlops/cluster/providers.py +258 -0
  22. mlops/core/__init__.py +95 -0
  23. mlops/core/custom_model_base.py +38 -0
  24. mlops/core/dask_networkx_executor.py +1265 -0
  25. mlops/core/executor_worker.py +1239 -0
  26. mlops/core/experiment_tracker.py +81 -0
  27. mlops/core/graph_types.py +64 -0
  28. mlops/core/networkx_parser.py +135 -0
  29. mlops/core/payload_spill.py +278 -0
  30. mlops/core/pipeline_utils.py +162 -0
  31. mlops/core/process_hashing.py +216 -0
  32. mlops/core/step_state_manager.py +1298 -0
  33. mlops/core/step_system.py +956 -0
  34. mlops/core/workspace.py +99 -0
  35. mlops/environment/__init__.py +10 -0
  36. mlops/environment/base.py +43 -0
  37. mlops/environment/conda_manager.py +307 -0
  38. mlops/environment/factory.py +70 -0
  39. mlops/environment/pyenv_manager.py +146 -0
  40. mlops/environment/setup_env.py +31 -0
  41. mlops/environment/system_manager.py +66 -0
  42. mlops/environment/utils.py +105 -0
  43. mlops/environment/venv_manager.py +134 -0
  44. mlops/main.py +527 -0
  45. mlops/managers/project_manager.py +400 -0
  46. mlops/managers/reproducibility_manager.py +575 -0
  47. mlops/platform.py +996 -0
  48. mlops/reporting/__init__.py +16 -0
  49. mlops/reporting/context.py +187 -0
  50. mlops/reporting/entrypoint.py +292 -0
  51. mlops/reporting/kv_utils.py +77 -0
  52. mlops/reporting/registry.py +50 -0
  53. mlops/runtime/__init__.py +9 -0
  54. mlops/runtime/context.py +34 -0
  55. mlops/runtime/env_export.py +113 -0
  56. mlops/storage/__init__.py +12 -0
  57. mlops/storage/adapters/__init__.py +9 -0
  58. mlops/storage/adapters/gcp_kv_store.py +778 -0
  59. mlops/storage/adapters/gcs_object_store.py +96 -0
  60. mlops/storage/adapters/memory_store.py +240 -0
  61. mlops/storage/adapters/redis_store.py +438 -0
  62. mlops/storage/factory.py +199 -0
  63. mlops/storage/interfaces/__init__.py +6 -0
  64. mlops/storage/interfaces/kv_store.py +118 -0
  65. mlops/storage/path_utils.py +38 -0
  66. mlops/templates/premier-league/charts/plot_metrics.js +70 -0
  67. mlops/templates/premier-league/charts/plot_metrics.py +145 -0
  68. mlops/templates/premier-league/charts/requirements.txt +6 -0
  69. mlops/templates/premier-league/configs/cluster_config.yaml +13 -0
  70. mlops/templates/premier-league/configs/project_config.yaml +207 -0
  71. mlops/templates/premier-league/data/England CSV.csv +12154 -0
  72. mlops/templates/premier-league/models/premier_league_model.py +638 -0
  73. mlops/templates/premier-league/requirements.txt +8 -0
  74. mlops/templates/sklearn-basic/README.md +22 -0
  75. mlops/templates/sklearn-basic/charts/plot_metrics.py +85 -0
  76. mlops/templates/sklearn-basic/charts/requirements.txt +3 -0
  77. mlops/templates/sklearn-basic/configs/project_config.yaml +64 -0
  78. mlops/templates/sklearn-basic/data/train.csv +14 -0
  79. mlops/templates/sklearn-basic/models/model.py +62 -0
  80. mlops/templates/sklearn-basic/requirements.txt +10 -0
  81. mlops/web/__init__.py +3 -0
  82. mlops/web/server.py +585 -0
  83. mlops/web/ui/index.html +52 -0
  84. mlops/web/ui/mlops-charts.js +357 -0
  85. mlops/web/ui/script.js +1244 -0
  86. mlops/web/ui/styles.css +248 -0
@@ -0,0 +1,162 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Dict, List, Optional, Any
4
+ from pathlib import Path
5
+
6
+
7
+ def _load_project_config(project_dir: Path | str, project_id: str) -> Dict[str, Any]:
8
+ import yaml # Local import to avoid import-time dependency if unused
9
+ project_dir = Path(project_dir).resolve()
10
+ config_path = project_dir / "projects" / project_id / "configs" / "project_config.yaml"
11
+ with open(config_path, "r", encoding="utf-8") as f:
12
+ return yaml.safe_load(f) or {}
13
+
14
+
15
+ def _get_pipeline_config(cfg: Dict[str, Any]) -> Dict[str, Any]:
16
+ return (cfg.get("model", {}).get("parameters", {}).get("pipeline", {}) or {})
17
+
18
+
19
+ def _parse_processes_from_pipeline(pipeline_config: Dict[str, Any]) -> List[str]:
20
+ processes: List[str] = []
21
+
22
+ # From explicit processes list
23
+ for p in pipeline_config.get("processes", []) or []:
24
+ name = p.get("name")
25
+ if name and name not in processes:
26
+ processes.append(name)
27
+
28
+ # From adjacency list (NetworkX-like string or list)
29
+ for src, tgt in _iter_adjlist_edges(pipeline_config.get("process_adjlist")):
30
+ if src and src not in processes:
31
+ processes.append(src)
32
+ if tgt and tgt not in processes:
33
+ processes.append(tgt)
34
+
35
+ return processes
36
+
37
+
38
+ def _iter_adjlist_edges(adjlist: Any) -> List[tuple[str, str]]:
39
+ """Parse a NetworkX-style adjacency list into directed edges (src, tgt)."""
40
+ lines: List[str] = []
41
+ if isinstance(adjlist, str):
42
+ lines = adjlist.splitlines()
43
+ elif isinstance(adjlist, list):
44
+ lines = [str(x) for x in adjlist]
45
+
46
+ edges: List[tuple[str, str]] = []
47
+ for raw in lines:
48
+ line = str(raw).strip()
49
+ if not line:
50
+ continue
51
+ if "#" in line:
52
+ line = line.split("#", 1)[0].strip()
53
+ if not line:
54
+ continue
55
+ parts = line.split()
56
+ if len(parts) < 2:
57
+ # No outgoing edges on this line
58
+ continue
59
+ src = parts[0]
60
+ for tgt in parts[1:]:
61
+ edges.append((src, tgt))
62
+ return edges
63
+
64
+
65
+ def _build_process_adjacency(pipeline_config: Dict[str, Any]) -> Dict[str, List[str]]:
66
+ processes = _parse_processes_from_pipeline(pipeline_config)
67
+ adj: Dict[str, List[str]] = {p: [] for p in processes}
68
+
69
+ # From explicit processes depends_on
70
+ for p in pipeline_config.get("processes", []) or []:
71
+ name = p.get("name")
72
+ deps = p.get("depends_on", []) or []
73
+ for dep in deps:
74
+ adj.setdefault(dep, [])
75
+ if name not in adj[dep]:
76
+ adj[dep].append(name)
77
+
78
+ # From adjacency list
79
+ for src, tgt in _iter_adjlist_edges(pipeline_config.get("process_adjlist")):
80
+ adj.setdefault(src, [])
81
+ if tgt not in adj[src]:
82
+ adj[src].append(tgt)
83
+
84
+ return adj
85
+
86
+
87
+
88
+ def parse_networkx_config_from_project(project_dir: Path | str, project_id: str) -> Dict[str, Any]:
89
+ """Return a lightweight parsed view: {processes: [names], adj: {u:[v,...]}, steps_by_process: {proc:[step_names]}}"""
90
+ cfg = _load_project_config(project_dir, project_id)
91
+ pipeline_cfg = _get_pipeline_config(cfg)
92
+
93
+ processes = _parse_processes_from_pipeline(pipeline_cfg)
94
+ adj = _build_process_adjacency(pipeline_cfg)
95
+
96
+ # Manual-step mode: do not consider configured or auto-discovered steps
97
+ steps_by_process: Dict[str, List[str]] = {p: [] for p in processes}
98
+
99
+ return {
100
+ "processes": processes,
101
+ "adj": adj,
102
+ "steps_by_process": steps_by_process,
103
+ "global_config": cfg.get("model", {}).get("parameters", {}) or {},
104
+ }
105
+
106
+
107
+ def get_process_graph_summary(config_like: Dict[str, Any]) -> Dict[str, Any]:
108
+ processes: List[str] = list(config_like.get("processes", []) or [])
109
+ adj: Dict[str, List[str]] = dict(config_like.get("adj", {}) or {})
110
+
111
+ node_set = set(processes)
112
+ for u, vs in adj.items():
113
+ node_set.add(u)
114
+ for v in vs:
115
+ node_set.add(v)
116
+
117
+ nodes = list(node_set)
118
+ indeg: Dict[str, int] = {n: 0 for n in nodes}
119
+ for u, vs in adj.items():
120
+ for v in vs:
121
+ indeg[v] = indeg.get(v, 0) + 1
122
+
123
+ return {"nodes": nodes, "adj": adj, "indeg": indeg}
124
+
125
+
126
+ def get_process_graph_summary_from_project(project_dir: Path | str, project_id: str) -> Dict[str, Any]:
127
+ config_like = parse_networkx_config_from_project(project_dir, project_id)
128
+ return get_process_graph_summary(config_like)
129
+
130
+
131
+
132
+
133
+ def setup_environment_and_write_interpreter(
134
+ project_dir: Path | str,
135
+ project_id: str,
136
+ env_file: Path | str,
137
+ ) -> str:
138
+ # Use a relative import to work whether invoked as `mlops.*` or `src.mlops.*`
139
+ from ..managers.reproducibility_manager import ReproducibilityManager
140
+
141
+ project_dir = Path(project_dir).resolve()
142
+ env_file = Path(env_file)
143
+
144
+ config_path = project_dir / "projects" / project_id / "configs" / "project_config.yaml"
145
+ rm = ReproducibilityManager(str(config_path), project_path=project_dir / "projects" / project_id)
146
+ cfg = rm.config or {}
147
+ env_cfg = cfg.get("environment", {}) if isinstance(cfg.get("environment", {}), dict) else {}
148
+
149
+ if "venv" in env_cfg:
150
+ vcfg = env_cfg.get("venv") or {}
151
+ if not isinstance(vcfg, dict):
152
+ vcfg = {}
153
+ if not vcfg.get("name"):
154
+ vcfg["name"] = project_id
155
+ env_cfg["venv"] = vcfg
156
+ cfg["environment"] = env_cfg
157
+ rm.config = cfg
158
+
159
+ rm.setup_environment()
160
+ py = rm.python_interpreter
161
+ Path(env_file).write_text(py)
162
+ return py
@@ -0,0 +1,216 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Optional
4
+
5
+
6
+ def compute_process_hashes(
7
+ state_manager: Any,
8
+ context: Any,
9
+ process_name: str,
10
+ dependency_map: dict[str, list[str]],
11
+ lookup_name: Optional[str] = None,
12
+ ) -> tuple[Optional[str], Optional[str], Optional[str]]:
13
+ """Compute (input_hash, config_hash, function_hash) deterministically for a process.
14
+
15
+ Determinism requirements:
16
+ - Predecessors are traversed using a sorted order
17
+ - Upstream signatures are dictionaries with stable key order (sorted by name)
18
+ - Config hashing uses a filtered, ordered payload
19
+ - Function hash is augmented with nested step AST and referenced step function hashes
20
+ """
21
+ try:
22
+ from .step_system import get_process_registry, get_step_registry
23
+ except Exception:
24
+ get_process_registry = None # type: ignore[assignment]
25
+ get_step_registry = None # type: ignore[assignment]
26
+
27
+ # Build a stable mapping of configured process names -> code function names (registry keys)
28
+ _lookup_map: dict[str, str] = {}
29
+ try:
30
+ global_cfg = getattr(context, "global_config", {}) or {}
31
+ pipeline_cfg = (global_cfg.get("pipeline", {}) or {}) if isinstance(global_cfg, dict) else {}
32
+ for p in (pipeline_cfg.get("processes", []) or []):
33
+ if not isinstance(p, dict):
34
+ continue
35
+ name = p.get("name")
36
+ code_fn = p.get("code_function")
37
+ if name and code_fn:
38
+ _lookup_map[str(name)] = str(code_fn)
39
+ except Exception:
40
+ _lookup_map = {}
41
+
42
+ def _filtered_global_settings(gc: Any) -> dict[str, Any]:
43
+ try:
44
+ if not isinstance(gc, dict):
45
+ return {}
46
+ return {k: v for k, v in gc.items() if k not in ("pipeline", "project_config_file_hash")}
47
+ except Exception:
48
+ return {}
49
+
50
+ # 1) Build upstream_signatures recursively (ih/ch/fh) using dependency_map
51
+ def _sorted_preds(name: str) -> list[str]:
52
+ try:
53
+ preds = list(dependency_map.get(name, []) or [])
54
+ preds = sorted(set(preds))
55
+ return preds
56
+ except Exception:
57
+ return []
58
+
59
+ def _sig_for(up_proc: str) -> dict[str, Optional[str]]:
60
+ ih_u, ch_u, fh_u = _compute_for(up_proc)
61
+ return {"ih": ih_u, "ch": ch_u, "fh": fh_u}
62
+
63
+ memo: dict[str, tuple[Optional[str], Optional[str], Optional[str]]] = {}
64
+
65
+ def _compute_for(name: str) -> tuple[Optional[str], Optional[str], Optional[str]]:
66
+ cached = memo.get(name)
67
+ if cached is not None:
68
+ return cached
69
+
70
+ # Recursively compute signature for an upstream process
71
+ try:
72
+ upstream_signatures: dict[str, dict[str, Optional[str]]] = {}
73
+ for p in _sorted_preds(name):
74
+ try:
75
+ upstream_signatures[p] = _sig_for(p)
76
+ except Exception:
77
+ continue
78
+ input_surface = {
79
+ "global_config_keys": sorted(list((getattr(context, "global_config", {}) or {}).keys())),
80
+ "project_id": getattr(context, "project_id", None),
81
+ "upstream_signatures": {k: upstream_signatures[k] for k in sorted(upstream_signatures.keys())},
82
+ }
83
+ ih = state_manager._compute_hash(input_surface) if state_manager else None
84
+ except Exception:
85
+ ih = None
86
+
87
+ try:
88
+ global_config = getattr(context, "global_config", {}) or {}
89
+ # Build process-scoped config hash: (global_without_pipeline, current process hyperparameters, process name)
90
+ process_hparams: dict[str, Any] = {}
91
+ try:
92
+ pipeline_cfg = global_config.get("pipeline", {}) if isinstance(global_config, dict) else {}
93
+ for proc_cfg in (pipeline_cfg.get("processes", []) or []):
94
+ if isinstance(proc_cfg, dict) and proc_cfg.get("name") == name:
95
+ maybe = proc_cfg.get("hyperparameters", {}) or {}
96
+ process_hparams = dict(maybe) if isinstance(maybe, dict) else {}
97
+ break
98
+ except Exception:
99
+ process_hparams = {}
100
+ # Exclude the pipeline graph and synthetic full-config hash to avoid global invalidations
101
+ enhanced_config = {
102
+ "global_config": _filtered_global_settings(global_config),
103
+ "process_hyperparameters": process_hparams,
104
+ "process_name": name,
105
+ }
106
+ ch = state_manager._compute_hash(enhanced_config) if state_manager else None
107
+ except Exception:
108
+ try:
109
+ # Last resort fallback: minimal global-only hash without pipeline
110
+ minimal = _filtered_global_settings(getattr(context, "global_config", {}) or {})
111
+ ch = state_manager._compute_hash(minimal) if state_manager else None
112
+ except Exception:
113
+ ch = None
114
+
115
+ try:
116
+ pr = get_process_registry() if callable(get_process_registry) else None
117
+ _node_lookup = _lookup_map.get(name)
118
+ if not _node_lookup and name == process_name:
119
+ _node_lookup = lookup_name
120
+ pdef = pr.get_process(_node_lookup or name) if pr else None
121
+ orig_fn = getattr(pdef, "original_func", None) if pdef else None
122
+ fh = state_manager._compute_function_hash(orig_fn or getattr(pdef, "runner", None)) if (state_manager and pdef) else None
123
+
124
+ try:
125
+ sr = get_step_registry() if callable(get_step_registry) else None
126
+ used_step_names = set()
127
+ try:
128
+ import inspect as _inspect, ast as _ast
129
+ src = _inspect.getsource(orig_fn or getattr(pdef, "runner", None)) if pdef else ""
130
+ tree = _ast.parse(src) if src else None
131
+
132
+ class _CallVisitor(_ast.NodeVisitor):
133
+ def __init__(self):
134
+ self.names = set()
135
+ def visit_Call(self, node):
136
+ try:
137
+ if isinstance(node.func, _ast.Name):
138
+ self.names.add(node.func.id)
139
+ elif isinstance(node.func, _ast.Attribute):
140
+ self.names.add(node.func.attr)
141
+ except Exception:
142
+ pass
143
+ self.generic_visit(node)
144
+
145
+ class _NestedStepVisitor(_ast.NodeVisitor):
146
+ def __init__(self):
147
+ self.func_nodes = {}
148
+ def visit_FunctionDef(self, node):
149
+ try:
150
+ has_step = False
151
+ for deco in (node.decorator_list or []):
152
+ if isinstance(deco, _ast.Name) and deco.id == "step":
153
+ has_step = True
154
+ elif isinstance(deco, _ast.Call) and isinstance(deco.func, _ast.Name) and deco.func.id == "step":
155
+ has_step = True
156
+ if has_step and isinstance(node.name, str):
157
+ self.func_nodes[node.name] = node
158
+ except Exception:
159
+ pass
160
+ self.generic_visit(node)
161
+
162
+ nv = None
163
+ if tree is not None:
164
+ cv = _CallVisitor()
165
+ cv.visit(tree)
166
+ used_step_names = set(cv.names or set())
167
+ nv = _NestedStepVisitor()
168
+ nv.visit(tree)
169
+ except Exception:
170
+ used_step_names = set()
171
+ nv = None
172
+
173
+ step_hashes: dict[str, str] = {}
174
+ # 1) Nested steps (AST)
175
+ try:
176
+ import ast as _ast
177
+ if nv and getattr(nv, "func_nodes", None):
178
+ for _nm in sorted(nv.func_nodes.keys()):
179
+ try:
180
+ _node = nv.func_nodes[_nm]
181
+ normalized = _ast.dump(_node, annotate_fields=True, include_attributes=False)
182
+ s_hash = state_manager._compute_hash({"ast": normalized}) if state_manager else None
183
+ if s_hash:
184
+ step_hashes[_nm] = s_hash
185
+ except Exception:
186
+ continue
187
+ except Exception:
188
+ pass
189
+ for _nm in sorted(list(used_step_names)):
190
+ if _nm in step_hashes:
191
+ continue
192
+ try:
193
+ sdef = sr.get_step(_nm) if sr else None
194
+ if sdef is not None:
195
+ s_orig = getattr(sdef, "original_func", None) or getattr(sdef, "func", None)
196
+ s_hash = state_manager._compute_function_hash(s_orig) if (state_manager and s_orig) else None
197
+ if s_hash:
198
+ step_hashes[_nm] = s_hash
199
+ except Exception:
200
+ continue
201
+ if step_hashes and fh:
202
+ # Stable combination by sorting keys
203
+ ordered = {k: step_hashes[k] for k in sorted(step_hashes.keys())}
204
+ fh = state_manager._compute_hash({"proc": fh, "steps": ordered})
205
+ except Exception:
206
+ pass
207
+ except Exception:
208
+ fh = None
209
+
210
+ out = (ih, ch, fh)
211
+ memo[name] = out
212
+ return out
213
+
214
+ return _compute_for(process_name)
215
+
216
+