expops 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. expops-0.1.3.dist-info/METADATA +826 -0
  2. expops-0.1.3.dist-info/RECORD +86 -0
  3. expops-0.1.3.dist-info/WHEEL +5 -0
  4. expops-0.1.3.dist-info/entry_points.txt +3 -0
  5. expops-0.1.3.dist-info/licenses/LICENSE +674 -0
  6. expops-0.1.3.dist-info/top_level.txt +1 -0
  7. mlops/__init__.py +0 -0
  8. mlops/__main__.py +11 -0
  9. mlops/_version.py +34 -0
  10. mlops/adapters/__init__.py +12 -0
  11. mlops/adapters/base.py +86 -0
  12. mlops/adapters/config_schema.py +89 -0
  13. mlops/adapters/custom/__init__.py +3 -0
  14. mlops/adapters/custom/custom_adapter.py +447 -0
  15. mlops/adapters/plugin_manager.py +113 -0
  16. mlops/adapters/sklearn/__init__.py +3 -0
  17. mlops/adapters/sklearn/adapter.py +94 -0
  18. mlops/cluster/__init__.py +3 -0
  19. mlops/cluster/controller.py +496 -0
  20. mlops/cluster/process_runner.py +91 -0
  21. mlops/cluster/providers.py +258 -0
  22. mlops/core/__init__.py +95 -0
  23. mlops/core/custom_model_base.py +38 -0
  24. mlops/core/dask_networkx_executor.py +1265 -0
  25. mlops/core/executor_worker.py +1239 -0
  26. mlops/core/experiment_tracker.py +81 -0
  27. mlops/core/graph_types.py +64 -0
  28. mlops/core/networkx_parser.py +135 -0
  29. mlops/core/payload_spill.py +278 -0
  30. mlops/core/pipeline_utils.py +162 -0
  31. mlops/core/process_hashing.py +216 -0
  32. mlops/core/step_state_manager.py +1298 -0
  33. mlops/core/step_system.py +956 -0
  34. mlops/core/workspace.py +99 -0
  35. mlops/environment/__init__.py +10 -0
  36. mlops/environment/base.py +43 -0
  37. mlops/environment/conda_manager.py +307 -0
  38. mlops/environment/factory.py +70 -0
  39. mlops/environment/pyenv_manager.py +146 -0
  40. mlops/environment/setup_env.py +31 -0
  41. mlops/environment/system_manager.py +66 -0
  42. mlops/environment/utils.py +105 -0
  43. mlops/environment/venv_manager.py +134 -0
  44. mlops/main.py +527 -0
  45. mlops/managers/project_manager.py +400 -0
  46. mlops/managers/reproducibility_manager.py +575 -0
  47. mlops/platform.py +996 -0
  48. mlops/reporting/__init__.py +16 -0
  49. mlops/reporting/context.py +187 -0
  50. mlops/reporting/entrypoint.py +292 -0
  51. mlops/reporting/kv_utils.py +77 -0
  52. mlops/reporting/registry.py +50 -0
  53. mlops/runtime/__init__.py +9 -0
  54. mlops/runtime/context.py +34 -0
  55. mlops/runtime/env_export.py +113 -0
  56. mlops/storage/__init__.py +12 -0
  57. mlops/storage/adapters/__init__.py +9 -0
  58. mlops/storage/adapters/gcp_kv_store.py +778 -0
  59. mlops/storage/adapters/gcs_object_store.py +96 -0
  60. mlops/storage/adapters/memory_store.py +240 -0
  61. mlops/storage/adapters/redis_store.py +438 -0
  62. mlops/storage/factory.py +199 -0
  63. mlops/storage/interfaces/__init__.py +6 -0
  64. mlops/storage/interfaces/kv_store.py +118 -0
  65. mlops/storage/path_utils.py +38 -0
  66. mlops/templates/premier-league/charts/plot_metrics.js +70 -0
  67. mlops/templates/premier-league/charts/plot_metrics.py +145 -0
  68. mlops/templates/premier-league/charts/requirements.txt +6 -0
  69. mlops/templates/premier-league/configs/cluster_config.yaml +13 -0
  70. mlops/templates/premier-league/configs/project_config.yaml +207 -0
  71. mlops/templates/premier-league/data/England CSV.csv +12154 -0
  72. mlops/templates/premier-league/models/premier_league_model.py +638 -0
  73. mlops/templates/premier-league/requirements.txt +8 -0
  74. mlops/templates/sklearn-basic/README.md +22 -0
  75. mlops/templates/sklearn-basic/charts/plot_metrics.py +85 -0
  76. mlops/templates/sklearn-basic/charts/requirements.txt +3 -0
  77. mlops/templates/sklearn-basic/configs/project_config.yaml +64 -0
  78. mlops/templates/sklearn-basic/data/train.csv +14 -0
  79. mlops/templates/sklearn-basic/models/model.py +62 -0
  80. mlops/templates/sklearn-basic/requirements.txt +10 -0
  81. mlops/web/__init__.py +3 -0
  82. mlops/web/server.py +585 -0
  83. mlops/web/ui/index.html +52 -0
  84. mlops/web/ui/mlops-charts.js +357 -0
  85. mlops/web/ui/script.js +1244 -0
  86. mlops/web/ui/styles.css +248 -0
@@ -0,0 +1,400 @@
1
+ import os
2
+ import shutil
3
+ import yaml
4
+ from pathlib import Path
5
+ from typing import Dict, Any, List, Optional, Union
6
+ from datetime import datetime
7
+ import json
8
+
9
+
10
+ class ProjectManager:
11
+ """Manages MLOps projects with isolated state, caching, and configurations."""
12
+
13
+ def __init__(self, projects_root: Optional[Union[str, Path]] = None):
14
+ # Interpret projects_root relative to workspace root so callers can pass
15
+ # `--workspace` / `MLOPS_WORKSPACE_DIR` and still work from any CWD.
16
+ if projects_root is None:
17
+ try:
18
+ from mlops.core.workspace import get_projects_root
19
+ self.projects_root = get_projects_root()
20
+ except Exception:
21
+ self.projects_root = Path("projects")
22
+ else:
23
+ pr = Path(projects_root)
24
+ if not pr.is_absolute():
25
+ try:
26
+ from mlops.core.workspace import get_workspace_root
27
+ pr = get_workspace_root() / pr
28
+ except Exception:
29
+ pass
30
+ self.projects_root = pr
31
+
32
+ self.projects_root.mkdir(parents=True, exist_ok=True)
33
+ self.projects_index_file = self.projects_root / "projects_index.json"
34
+ self._ensure_projects_index()
35
+
36
+ def _ensure_projects_index(self) -> None:
37
+ """Ensure the projects index file exists."""
38
+ if not self.projects_index_file.exists():
39
+ self._save_projects_index({})
40
+
41
+ def _load_projects_index(self) -> Dict[str, Any]:
42
+ """Load the projects index."""
43
+ if self.projects_index_file.exists():
44
+ with open(self.projects_index_file, 'r') as f:
45
+ return json.load(f)
46
+ return {}
47
+
48
+ def _save_projects_index(self, index: Dict[str, Any]) -> None:
49
+ """Save the projects index."""
50
+ with open(self.projects_index_file, 'w') as f:
51
+ json.dump(index, f, indent=2)
52
+
53
+ def _list_available_templates(self) -> List[str]:
54
+ """List built-in templates shipped with the package (best-effort)."""
55
+ templates: List[str] = []
56
+ try:
57
+ from importlib import resources
58
+
59
+ root = resources.files("mlops") / "templates"
60
+ if not root.is_dir():
61
+ return []
62
+ for child in root.iterdir():
63
+ try:
64
+ if not child.is_dir():
65
+ continue
66
+ # New format: templates/<name>/configs/project_config.yaml
67
+ if (child / "configs" / "project_config.yaml").is_file():
68
+ templates.append(child.name)
69
+ continue
70
+ # Legacy format: templates/<name>/project_config.yaml
71
+ if (child / "project_config.yaml").is_file():
72
+ templates.append(child.name)
73
+ except Exception:
74
+ continue
75
+ except Exception:
76
+ return []
77
+ return sorted(set([t for t in templates if t]))
78
+
79
+ def _apply_template(self, project_path: Path, project_id: str, template: str) -> Path:
80
+ """Copy a built-in template into the project directory and return config path."""
81
+ from importlib import resources
82
+ from pathlib import Path as _Path
83
+
84
+ available = self._list_available_templates()
85
+ if template not in available:
86
+ raise ValueError(
87
+ f"Unknown template '{template}'. Available templates: {', '.join(available) if available else '(none found)'}"
88
+ )
89
+
90
+ root = resources.files("mlops") / "templates" / template
91
+
92
+ # New format: full project folder skeleton under the template root.
93
+ new_cfg = root / "configs" / "project_config.yaml"
94
+ legacy_cfg = root / "project_config.yaml"
95
+
96
+ def _copy_tree(src_dir, dst_dir: Path) -> None:
97
+ """Recursively copy an importlib.resources Traversable dir into dst_dir."""
98
+ try:
99
+ entries = list(src_dir.iterdir())
100
+ except Exception:
101
+ entries = []
102
+ for entry in entries:
103
+ try:
104
+ name = entry.name
105
+ except Exception:
106
+ continue
107
+ rel = _Path(name)
108
+ try:
109
+ if entry.is_dir():
110
+ _copy_tree(entry, dst_dir / rel)
111
+ continue
112
+ except Exception:
113
+ # Treat unknown entries as files
114
+ pass
115
+
116
+ dest_path = dst_dir / rel
117
+ dest_path.parent.mkdir(parents=True, exist_ok=True)
118
+
119
+ # Read bytes from resource file
120
+ try:
121
+ with entry.open("rb") as f:
122
+ raw = f.read()
123
+ except Exception:
124
+ raw = b""
125
+
126
+ # Best-effort: treat common text files as UTF-8 and expand template vars
127
+ suffix = dest_path.suffix.lower()
128
+ if suffix in {".yaml", ".yml", ".py", ".md", ".txt", ".csv", ".json"}:
129
+ try:
130
+ text = raw.decode("utf-8")
131
+ text = text.replace("{{PROJECT_ID}}", project_id)
132
+ dest_path.write_text(text, encoding="utf-8")
133
+ continue
134
+ except Exception:
135
+ pass
136
+
137
+ dest_path.write_bytes(raw)
138
+
139
+ if new_cfg.is_file():
140
+ # Copy the entire template tree into the project root (configs/, data/, models/, charts/, etc.)
141
+ _copy_tree(root, project_path)
142
+ config_dest = project_path / "configs" / "project_config.yaml"
143
+ if not config_dest.exists():
144
+ raise ValueError(f"Template '{template}' did not produce configs/project_config.yaml")
145
+ return config_dest
146
+
147
+ # Legacy fallback: copy flat files into expected project folders.
148
+ if legacy_cfg.is_file():
149
+ # Write config (with placeholder expansion)
150
+ try:
151
+ with legacy_cfg.open("r", encoding="utf-8") as f:
152
+ cfg_text = f.read()
153
+ except TypeError:
154
+ with legacy_cfg.open("r") as f:
155
+ cfg_text = f.read()
156
+ cfg_text = cfg_text.replace("{{PROJECT_ID}}", project_id)
157
+
158
+ config_dest = project_path / "configs" / "project_config.yaml"
159
+ config_dest.parent.mkdir(exist_ok=True)
160
+ config_dest.write_text(cfg_text, encoding="utf-8")
161
+
162
+ data_res = root / "train.csv"
163
+ if data_res.is_file():
164
+ data_dest = project_path / "data" / "train.csv"
165
+ data_dest.parent.mkdir(exist_ok=True)
166
+ with data_res.open("rb") as f:
167
+ data_dest.write_bytes(f.read())
168
+ return config_dest
169
+
170
+ raise ValueError(f"Template '{template}' is missing configs/project_config.yaml")
171
+
172
+ def create_project(
173
+ self,
174
+ project_id: str,
175
+ base_config_path: Optional[str] = None,
176
+ description: str = "",
177
+ template: Optional[str] = None,
178
+ ) -> Dict[str, Any]:
179
+ """
180
+ Create a new project with isolated workspace.
181
+
182
+ Args:
183
+ project_id: Unique identifier for the project
184
+ base_config_path: Optional path to base configuration to copy
185
+ description: Project description
186
+ template: Optional built-in template name to scaffold a runnable starter project
187
+
188
+ Returns:
189
+ Project information dictionary
190
+ """
191
+ if self.project_exists(project_id):
192
+ raise ValueError(f"Project '{project_id}' already exists")
193
+
194
+ if template and base_config_path:
195
+ raise ValueError("Use either 'template' or 'base_config_path', not both")
196
+
197
+ # Create project directory structure
198
+ project_path = self.projects_root / project_id
199
+ project_path.mkdir(exist_ok=True)
200
+
201
+ # Create subdirectories for isolation (state and cache no longer created locally)
202
+ (project_path / "configs").mkdir(exist_ok=True)
203
+ (project_path / "artifacts").mkdir(exist_ok=True)
204
+ (project_path / "logs").mkdir(exist_ok=True)
205
+ (project_path / "data").mkdir(exist_ok=True)
206
+ (project_path / "keys").mkdir(exist_ok=True)
207
+ (project_path / "models").mkdir(exist_ok=True)
208
+ (project_path / "charts").mkdir(exist_ok=True)
209
+
210
+ # Create project configuration
211
+ project_info = {
212
+ "project_id": project_id,
213
+ "description": description,
214
+ "created_at": datetime.now().isoformat(),
215
+ "last_modified": datetime.now().isoformat(),
216
+ "base_config_path": base_config_path,
217
+ "project_path": str(project_path),
218
+ "runs": []
219
+ }
220
+
221
+ # Apply built-in template (copies config + optional sample data)
222
+ if template:
223
+ config_dest = self._apply_template(project_path, project_id, template)
224
+ project_info["active_config"] = str(config_dest)
225
+ project_info["base_config_path"] = f"template:{template}"
226
+
227
+ # Copy base configuration if provided
228
+ if (not template) and base_config_path and Path(base_config_path).exists():
229
+ config_dest = project_path / "configs" / "project_config.yaml"
230
+ shutil.copy2(base_config_path, config_dest)
231
+ project_info["active_config"] = str(config_dest)
232
+
233
+ # Save project info
234
+ project_info_file = project_path / "project_info.json"
235
+ with open(project_info_file, 'w') as f:
236
+ json.dump(project_info, f, indent=2)
237
+
238
+ # Update projects index
239
+ projects_index = self._load_projects_index()
240
+ projects_index[project_id] = {
241
+ "project_path": str(project_path),
242
+ "created_at": project_info["created_at"],
243
+ "description": description
244
+ }
245
+ self._save_projects_index(projects_index)
246
+
247
+ print(f"✅ Project '{project_id}' created successfully at: {project_path}")
248
+ return project_info
249
+
250
+ def delete_project(self, project_id: str, confirm: bool = False) -> bool:
251
+ """
252
+ Delete a project and all its associated data.
253
+
254
+ Args:
255
+ project_id: Project to delete
256
+ confirm: If True, skip confirmation prompt
257
+
258
+ Returns:
259
+ True if project was deleted, False otherwise
260
+ """
261
+ if not self.project_exists(project_id):
262
+ print(f"❌ Project '{project_id}' does not exist")
263
+ return False
264
+
265
+ project_path = self.get_project_path(project_id)
266
+
267
+ if not confirm:
268
+ response = input(f"⚠️ Are you sure you want to delete project '{project_id}' and all its data? [y/N]: ")
269
+ if response.lower() != 'y':
270
+ print("❌ Project deletion cancelled")
271
+ return False
272
+
273
+ # Remove project directory
274
+ shutil.rmtree(project_path)
275
+
276
+ # Update projects index
277
+ projects_index = self._load_projects_index()
278
+ del projects_index[project_id]
279
+ self._save_projects_index(projects_index)
280
+
281
+ print(f"✅ Project '{project_id}' deleted successfully")
282
+ return True
283
+
284
+ def project_exists(self, project_id: str) -> bool:
285
+ """Check if a project exists."""
286
+ return project_id in self._load_projects_index()
287
+
288
+ def get_project_path(self, project_id: str) -> Path:
289
+ """Get the path to a project."""
290
+ projects_index = self._load_projects_index()
291
+ if project_id not in projects_index:
292
+ raise ValueError(f"Project '{project_id}' does not exist")
293
+ return Path(projects_index[project_id]["project_path"])
294
+
295
+ def list_projects(self) -> List[Dict[str, Any]]:
296
+ """List all projects."""
297
+ projects_index = self._load_projects_index()
298
+ projects = []
299
+
300
+ for project_id, info in projects_index.items():
301
+ try:
302
+ project_path = Path(info["project_path"])
303
+ project_info_file = project_path / "project_info.json"
304
+
305
+ if project_info_file.exists():
306
+ with open(project_info_file, 'r') as f:
307
+ project_info = json.load(f)
308
+ projects.append(project_info)
309
+ else:
310
+ # Fallback to index info if project_info.json is missing
311
+ projects.append({
312
+ "project_id": project_id,
313
+ "description": info.get("description", ""),
314
+ "created_at": info.get("created_at", ""),
315
+ "project_path": info["project_path"]
316
+ })
317
+ except Exception as e:
318
+ print(f"Warning: Could not load info for project '{project_id}': {e}")
319
+
320
+ return projects
321
+
322
+ def get_project_info(self, project_id: str) -> Dict[str, Any]:
323
+ """Get detailed information about a project."""
324
+ if not self.project_exists(project_id):
325
+ raise ValueError(f"Project '{project_id}' does not exist")
326
+
327
+ project_path = self.get_project_path(project_id)
328
+ project_info_file = project_path / "project_info.json"
329
+
330
+ with open(project_info_file, 'r') as f:
331
+ return json.load(f)
332
+
333
+ def update_project_config(self, project_id: str, config_updates: Dict[str, Any]) -> None:
334
+ """Update project configuration."""
335
+ if not self.project_exists(project_id):
336
+ raise ValueError(f"Project '{project_id}' does not exist")
337
+
338
+ project_path = self.get_project_path(project_id)
339
+ config_file = project_path / "configs" / "project_config.yaml"
340
+
341
+ # Load existing config or create new one
342
+ if config_file.exists():
343
+ with open(config_file, 'r') as f:
344
+ config = yaml.safe_load(f) or {}
345
+ else:
346
+ config = {}
347
+
348
+ # Deep merge config updates
349
+ self._deep_merge(config, config_updates)
350
+
351
+ # Save updated config
352
+ with open(config_file, 'w') as f:
353
+ yaml.dump(config, f, default_flow_style=False)
354
+
355
+ # Update project info
356
+ project_info = self.get_project_info(project_id)
357
+ project_info["last_modified"] = datetime.now().isoformat()
358
+ project_info["active_config"] = str(config_file)
359
+
360
+ project_info_file = project_path / "project_info.json"
361
+ with open(project_info_file, 'w') as f:
362
+ json.dump(project_info, f, indent=2)
363
+
364
+ print(f"✅ Project '{project_id}' configuration updated")
365
+
366
+ def _deep_merge(self, base_dict: Dict[str, Any], update_dict: Dict[str, Any]) -> None:
367
+ """Deep merge two dictionaries."""
368
+ for key, value in update_dict.items():
369
+ if key in base_dict and isinstance(base_dict[key], dict) and isinstance(value, dict):
370
+ self._deep_merge(base_dict[key], value)
371
+ else:
372
+ base_dict[key] = value
373
+
374
+ def get_project_config_path(self, project_id: str) -> Path:
375
+ """Get the path to project's active configuration."""
376
+ if not self.project_exists(project_id):
377
+ raise ValueError(f"Project '{project_id}' does not exist")
378
+
379
+ project_path = self.get_project_path(project_id)
380
+ config_file = project_path / "configs" / "project_config.yaml"
381
+
382
+ return config_file
383
+
384
+ def add_run_to_project(self, project_id: str, run_id: str, config_hash: str) -> None:
385
+ """Add a run record to the project."""
386
+ if not self.project_exists(project_id):
387
+ raise ValueError(f"Project '{project_id}' does not exist")
388
+
389
+ project_info = self.get_project_info(project_id)
390
+ project_info["runs"].append({
391
+ "run_id": run_id,
392
+ "timestamp": datetime.now().isoformat(),
393
+ "config_hash": config_hash
394
+ })
395
+ project_info["last_modified"] = datetime.now().isoformat()
396
+
397
+ project_path = self.get_project_path(project_id)
398
+ project_info_file = project_path / "project_info.json"
399
+ with open(project_info_file, 'w') as f:
400
+ json.dump(project_info, f, indent=2)