expops 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. expops-0.1.3.dist-info/METADATA +826 -0
  2. expops-0.1.3.dist-info/RECORD +86 -0
  3. expops-0.1.3.dist-info/WHEEL +5 -0
  4. expops-0.1.3.dist-info/entry_points.txt +3 -0
  5. expops-0.1.3.dist-info/licenses/LICENSE +674 -0
  6. expops-0.1.3.dist-info/top_level.txt +1 -0
  7. mlops/__init__.py +0 -0
  8. mlops/__main__.py +11 -0
  9. mlops/_version.py +34 -0
  10. mlops/adapters/__init__.py +12 -0
  11. mlops/adapters/base.py +86 -0
  12. mlops/adapters/config_schema.py +89 -0
  13. mlops/adapters/custom/__init__.py +3 -0
  14. mlops/adapters/custom/custom_adapter.py +447 -0
  15. mlops/adapters/plugin_manager.py +113 -0
  16. mlops/adapters/sklearn/__init__.py +3 -0
  17. mlops/adapters/sklearn/adapter.py +94 -0
  18. mlops/cluster/__init__.py +3 -0
  19. mlops/cluster/controller.py +496 -0
  20. mlops/cluster/process_runner.py +91 -0
  21. mlops/cluster/providers.py +258 -0
  22. mlops/core/__init__.py +95 -0
  23. mlops/core/custom_model_base.py +38 -0
  24. mlops/core/dask_networkx_executor.py +1265 -0
  25. mlops/core/executor_worker.py +1239 -0
  26. mlops/core/experiment_tracker.py +81 -0
  27. mlops/core/graph_types.py +64 -0
  28. mlops/core/networkx_parser.py +135 -0
  29. mlops/core/payload_spill.py +278 -0
  30. mlops/core/pipeline_utils.py +162 -0
  31. mlops/core/process_hashing.py +216 -0
  32. mlops/core/step_state_manager.py +1298 -0
  33. mlops/core/step_system.py +956 -0
  34. mlops/core/workspace.py +99 -0
  35. mlops/environment/__init__.py +10 -0
  36. mlops/environment/base.py +43 -0
  37. mlops/environment/conda_manager.py +307 -0
  38. mlops/environment/factory.py +70 -0
  39. mlops/environment/pyenv_manager.py +146 -0
  40. mlops/environment/setup_env.py +31 -0
  41. mlops/environment/system_manager.py +66 -0
  42. mlops/environment/utils.py +105 -0
  43. mlops/environment/venv_manager.py +134 -0
  44. mlops/main.py +527 -0
  45. mlops/managers/project_manager.py +400 -0
  46. mlops/managers/reproducibility_manager.py +575 -0
  47. mlops/platform.py +996 -0
  48. mlops/reporting/__init__.py +16 -0
  49. mlops/reporting/context.py +187 -0
  50. mlops/reporting/entrypoint.py +292 -0
  51. mlops/reporting/kv_utils.py +77 -0
  52. mlops/reporting/registry.py +50 -0
  53. mlops/runtime/__init__.py +9 -0
  54. mlops/runtime/context.py +34 -0
  55. mlops/runtime/env_export.py +113 -0
  56. mlops/storage/__init__.py +12 -0
  57. mlops/storage/adapters/__init__.py +9 -0
  58. mlops/storage/adapters/gcp_kv_store.py +778 -0
  59. mlops/storage/adapters/gcs_object_store.py +96 -0
  60. mlops/storage/adapters/memory_store.py +240 -0
  61. mlops/storage/adapters/redis_store.py +438 -0
  62. mlops/storage/factory.py +199 -0
  63. mlops/storage/interfaces/__init__.py +6 -0
  64. mlops/storage/interfaces/kv_store.py +118 -0
  65. mlops/storage/path_utils.py +38 -0
  66. mlops/templates/premier-league/charts/plot_metrics.js +70 -0
  67. mlops/templates/premier-league/charts/plot_metrics.py +145 -0
  68. mlops/templates/premier-league/charts/requirements.txt +6 -0
  69. mlops/templates/premier-league/configs/cluster_config.yaml +13 -0
  70. mlops/templates/premier-league/configs/project_config.yaml +207 -0
  71. mlops/templates/premier-league/data/England CSV.csv +12154 -0
  72. mlops/templates/premier-league/models/premier_league_model.py +638 -0
  73. mlops/templates/premier-league/requirements.txt +8 -0
  74. mlops/templates/sklearn-basic/README.md +22 -0
  75. mlops/templates/sklearn-basic/charts/plot_metrics.py +85 -0
  76. mlops/templates/sklearn-basic/charts/requirements.txt +3 -0
  77. mlops/templates/sklearn-basic/configs/project_config.yaml +64 -0
  78. mlops/templates/sklearn-basic/data/train.csv +14 -0
  79. mlops/templates/sklearn-basic/models/model.py +62 -0
  80. mlops/templates/sklearn-basic/requirements.txt +10 -0
  81. mlops/web/__init__.py +3 -0
  82. mlops/web/server.py +585 -0
  83. mlops/web/ui/index.html +52 -0
  84. mlops/web/ui/mlops-charts.js +357 -0
  85. mlops/web/ui/script.js +1244 -0
  86. mlops/web/ui/styles.css +248 -0
mlops/web/server.py ADDED
@@ -0,0 +1,585 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Dict, List, Optional
4
+ from pathlib import Path
5
+ import os
6
+
7
+ from fastapi import FastAPI, HTTPException, Response
8
+ from fastapi.middleware.cors import CORSMiddleware
9
+ from fastapi.staticfiles import StaticFiles
10
+ from datetime import datetime
11
+ import mimetypes
12
+
13
+ from mlops.core.workspace import get_projects_root, get_workspace_root
14
+ from mlops.storage.factory import create_kv_store as _create_kv_store
15
+
16
+ WORKSPACE_ROOT = get_workspace_root()
17
+ PROJECTS_DIR = get_projects_root(WORKSPACE_ROOT)
18
+
19
+
20
+ def _app_version() -> str:
21
+ try:
22
+ from importlib.metadata import PackageNotFoundError, version # type: ignore
23
+ # Primary distribution name is `expops`; keep legacy aliases for older builds.
24
+ for dist in ("expops", "mlops-platform", "mlops_platform"):
25
+ try:
26
+ v = version(dist)
27
+ if v:
28
+ return str(v)
29
+ except PackageNotFoundError:
30
+ continue
31
+ except Exception:
32
+ continue
33
+ except Exception:
34
+ pass
35
+ return "0.0.0"
36
+
37
+
38
+ app = FastAPI(title="MLOps Platform UI API", version=_app_version())
39
+ app.add_middleware(
40
+ CORSMiddleware,
41
+ allow_origins=["*"], # local dev only
42
+ allow_credentials=True,
43
+ allow_methods=["*"],
44
+ allow_headers=["*"],
45
+ )
46
+
47
+
48
+ def _read_projects_index() -> Dict[str, Any]:
49
+ idx_path = PROJECTS_DIR / "projects_index.json"
50
+ if idx_path.exists():
51
+ import json
52
+ try:
53
+ return json.loads(idx_path.read_text()) or {}
54
+ except Exception:
55
+ return {}
56
+ # Fallback: enumerate subdirectories
57
+ out: Dict[str, Any] = {}
58
+ for child in sorted(PROJECTS_DIR.iterdir() if PROJECTS_DIR.exists() else []):
59
+ if not child.is_dir():
60
+ continue
61
+ out[child.name] = {"project_path": f"projects/{child.name}", "description": ""}
62
+ return out
63
+
64
+
65
+ def _parse_graph(project_id: str) -> Dict[str, Any]:
66
+ # Lazy import to avoid runtime import for users not using the UI
67
+ from mlops.core.pipeline_utils import parse_networkx_config_from_project, get_process_graph_summary
68
+ cfg_like = parse_networkx_config_from_project(str(WORKSPACE_ROOT), project_id)
69
+ return get_process_graph_summary(cfg_like)
70
+
71
+
72
+ def _load_kv_cfg_from_project_config(project_id: str) -> Dict[str, Any]:
73
+ """Best-effort load of KV backend config from projects/<id>/configs/project_config.yaml."""
74
+ try:
75
+ import yaml
76
+ cfg_path = PROJECTS_DIR / project_id / "configs" / "project_config.yaml"
77
+ if not cfg_path.exists():
78
+ return {}
79
+ cfg = yaml.safe_load(cfg_path.read_text()) or {}
80
+ kv = ((cfg.get("model", {}) or {}).get("parameters", {}) or {}).get("cache", {}) or {}
81
+ backend = kv.get("backend", {}) or {}
82
+ return backend
83
+ except Exception:
84
+ return {}
85
+
86
+
87
+ def _load_object_store_cfg_from_project_config(project_id: str) -> Dict[str, Any]:
88
+ """Load object_store config from projects/<id>/configs/project_config.yaml."""
89
+ try:
90
+ import yaml
91
+ cfg_path = PROJECTS_DIR / project_id / "configs" / "project_config.yaml"
92
+ if not cfg_path.exists():
93
+ return {}
94
+ cfg = yaml.safe_load(cfg_path.read_text()) or {}
95
+ cache = ((cfg.get("model", {}) or {}).get("parameters", {}) or {}).get("cache", {}) or {}
96
+ store = cache.get("object_store", {}) or {}
97
+ return store
98
+ except Exception:
99
+ return {}
100
+
101
+
102
+ def _kv_for_project(project_id: str):
103
+ backend_cfg = _load_kv_cfg_from_project_config(project_id)
104
+ project_root = PROJECTS_DIR / project_id
105
+ env_for_factory: dict[str, str] | os._Environ[str] = os.environ
106
+ try:
107
+ if isinstance(backend_cfg, dict) and str(backend_cfg.get("type") or "").strip():
108
+ env_for_factory = dict(os.environ)
109
+ env_for_factory.pop("MLOPS_KV_BACKEND", None)
110
+ except Exception:
111
+ env_for_factory = os.environ
112
+ return _create_kv_store(
113
+ project_id,
114
+ backend_cfg if isinstance(backend_cfg, dict) else {},
115
+ env=env_for_factory,
116
+ workspace_root=WORKSPACE_ROOT,
117
+ project_root=project_root,
118
+ )
119
+
120
+ def _norm_backend_type(value: Any) -> str:
121
+ """Normalize backend type strings (aligns with mlops.storage.factory)."""
122
+ try:
123
+ s = str(value or "").strip().lower()
124
+ except Exception:
125
+ return ""
126
+ aliases = {
127
+ "mem": "memory",
128
+ "inmem": "memory",
129
+ "in-memory": "memory",
130
+ "inmemory": "memory",
131
+ "firestore": "gcp",
132
+ }
133
+ return aliases.get(s, s)
134
+
135
+
136
+ def _list_runs_fs(project_id: str) -> List[str]:
137
+ # Fallback: list charts folders which are created per run
138
+ charts_dir = PROJECTS_DIR / project_id / "artifacts" / "charts"
139
+ runs: List[str] = []
140
+ if charts_dir.exists():
141
+ for child in sorted(charts_dir.iterdir()):
142
+ if child.is_dir():
143
+ runs.append(child.name)
144
+ # Also scan logs for the latest timestamped files as run hints (no ID inside logs guaranteed)
145
+ # We will not parse logs; keep simple.
146
+ return runs
147
+
148
+
149
+ @app.get("/api/projects")
150
+ def list_projects() -> Dict[str, Any]:
151
+ idx = _read_projects_index()
152
+ return {"projects": sorted(idx.keys())}
153
+
154
+
155
+ def _object_store_for_project(project_id: str):
156
+ cfg = _load_object_store_cfg_from_project_config(project_id)
157
+ if not isinstance(cfg, dict):
158
+ return None
159
+ typ = str(cfg.get("type", "")).lower()
160
+ if typ == "gcs":
161
+ try:
162
+ # Set up credentials if provided in the KV backend config (same credentials used for both)
163
+ backend_cfg = _load_kv_cfg_from_project_config(project_id)
164
+ if isinstance(backend_cfg, dict):
165
+ creds_rel = backend_cfg.get("credentials_json")
166
+ if creds_rel and not os.getenv("GOOGLE_APPLICATION_CREDENTIALS"):
167
+ cred_path = PROJECTS_DIR / project_id / creds_rel
168
+ if cred_path.exists():
169
+ os.environ.setdefault("GOOGLE_APPLICATION_CREDENTIALS", str(cred_path.resolve()))
170
+
171
+ from mlops.storage.adapters.gcs_object_store import GCSObjectStore # type: ignore
172
+ bucket = cfg.get("bucket")
173
+ prefix = cfg.get("prefix")
174
+ if not bucket:
175
+ return None
176
+ return GCSObjectStore(bucket=bucket, prefix=prefix)
177
+ except Exception as e:
178
+ import logging
179
+ logging.getLogger(__name__).error(f"Failed to initialize GCS object store: {e}")
180
+ return None
181
+ return None
182
+
183
+
184
+ @app.get("/api/projects/{project_id}/runs")
185
+ def list_runs(project_id: str) -> Dict[str, Any]:
186
+ # Prefer KV backend for past runs; only fall back to filesystem in local (memory) mode.
187
+ runs: List[str] = []
188
+ backend_cfg = _load_kv_cfg_from_project_config(project_id)
189
+ backend_type = _norm_backend_type((backend_cfg or {}).get("type"))
190
+ if not backend_type:
191
+ backend_type = "memory"
192
+ kv = _kv_for_project(project_id)
193
+ try:
194
+ if kv and hasattr(kv, "list_runs"):
195
+ runs = kv.list_runs(limit=100) or []
196
+ except Exception:
197
+ runs = []
198
+ # Local filesystem discovery is a legacy/local-mode convenience. For remote backends
199
+ # (e.g., Firestore/Redis), the configured backend is the source of truth.
200
+ if (not runs) and backend_type == "memory":
201
+ try:
202
+ runs = _list_runs_fs(project_id)
203
+ except Exception:
204
+ runs = []
205
+ return {"runs": runs}
206
+
207
+
208
+ @app.get("/api/projects/{project_id}/graph")
209
+ def get_graph(project_id: str) -> Dict[str, Any]:
210
+ try:
211
+ graph = _parse_graph(project_id)
212
+ return graph
213
+ except Exception as e:
214
+ raise HTTPException(status_code=400, detail=f"Failed to parse graph: {e}")
215
+
216
+
217
+ @app.get("/api/projects/{project_id}/runs/{run_id}/status")
218
+ def get_run_status(project_id: str, run_id: str) -> Dict[str, Any]:
219
+ kv = _kv_for_project(project_id)
220
+ status = None
221
+ steps: Dict[str, Any] = {}
222
+ try:
223
+ if kv and hasattr(kv, "get_run_status"):
224
+ status = kv.get_run_status(run_id)
225
+ if kv and hasattr(kv, "list_run_steps"):
226
+ steps = kv.list_run_steps(run_id) or {}
227
+ except Exception:
228
+ pass
229
+ process_status: Dict[str, str] = {}
230
+ process_info: Dict[str, Dict[str, Any]] = {}
231
+
232
+ def _parse_ts(val: Any) -> Optional[float]:
233
+ if not val:
234
+ return None
235
+ try:
236
+ if isinstance(val, (int, float)):
237
+ return float(val)
238
+ s = str(val)
239
+ try:
240
+ return datetime.fromisoformat(s).timestamp()
241
+ except Exception:
242
+ return float(s)
243
+ except Exception:
244
+ return None
245
+
246
+ for key, rec in steps.items():
247
+ proc = str(rec.get("process_name") or (key.split(".")[0] if isinstance(key, str) and "." in key else ""))
248
+ st = str(rec.get("status") or "").lower()
249
+ if not proc:
250
+ continue
251
+ prev = process_status.get(proc)
252
+ rank = {"completed": 4, "failed": 4, "cached": 4, "running": 3, "": 0}
253
+ if prev == "running" and st in ("", "pending"):
254
+ st_effective = "running"
255
+ else:
256
+ st_effective = st
257
+ if prev is None or rank.get(st_effective, 0) > rank.get(prev, 0):
258
+ process_status[proc] = st_effective
259
+
260
+ info = process_info.setdefault(proc, {"status": "pending", "started_at": None, "ended_at": None, "duration_sec": None})
261
+ step_name = str(rec.get("step_name") or (key.split(".", 1)[1] if isinstance(key, str) and "." in key else ""))
262
+
263
+ # Extract cached_run_id from step record (first encountered for this process)
264
+ cached_run_id = rec.get("cached_run_id")
265
+ if cached_run_id and "cached_run_id" not in info:
266
+ info["cached_run_id"] = cached_run_id
267
+
268
+ # Extract cached timing information for cached steps
269
+ cached_started_at = rec.get("cached_started_at")
270
+ cached_ended_at = rec.get("cached_ended_at")
271
+ cached_execution_time = rec.get("cached_execution_time")
272
+
273
+ # For cached steps, store the original timing information
274
+ if st == "cached" and cached_started_at is not None and "cached_started_at" not in info:
275
+ info["cached_started_at"] = _parse_ts(cached_started_at)
276
+ if st == "cached" and cached_ended_at is not None and "cached_ended_at" not in info:
277
+ info["cached_ended_at"] = _parse_ts(cached_ended_at)
278
+ if st == "cached" and cached_execution_time is not None and "cached_execution_time" not in info:
279
+ try:
280
+ info["cached_execution_time"] = float(cached_execution_time)
281
+ except Exception:
282
+ pass
283
+
284
+ # Only use started_at/ended_at from the process summary record (__process__)
285
+ # to avoid step records overwriting process end time.
286
+ is_process_summary = (step_name == "__process__")
287
+ ts_start = _parse_ts(rec.get("started_at")) if is_process_summary else None
288
+ ts_end = _parse_ts(rec.get("ended_at")) if is_process_summary else None
289
+ # Update timestamps only when coming from the process summary
290
+ if is_process_summary and ts_start is not None:
291
+ cur = info.get("started_at")
292
+ info["started_at"] = min(cur, ts_start) if isinstance(cur, (int, float)) else ts_start
293
+ if is_process_summary and ts_end is not None and st in ("completed", "cached", "failed"):
294
+ cur = info.get("ended_at")
295
+ info["ended_at"] = max(cur, ts_end) if isinstance(cur, (int, float)) else ts_end
296
+
297
+ # Handle execution time
298
+ exec_time = rec.get("execution_time")
299
+ try:
300
+ exec_time = float(exec_time) if exec_time is not None else None
301
+ except Exception:
302
+ exec_time = None
303
+
304
+ if step_name == "__process__":
305
+ if st:
306
+ info["status"] = st
307
+ # For terminal states, prefer execution_time when it's positive; avoid
308
+ # writing 0.0 which can mask a valid (ended-started) duration.
309
+ if st in ("completed", "cached", "failed"):
310
+ if isinstance(exec_time, (int, float)) and exec_time > 0:
311
+ info["duration_sec"] = exec_time
312
+ # Ensure start/end timestamps from the __process__ summary are reflected
313
+ if isinstance(ts_start, (int, float)):
314
+ cur = info.get("started_at")
315
+ info["started_at"] = min(cur, ts_start) if isinstance(cur, (int, float)) else ts_start
316
+ if isinstance(ts_end, (int, float)):
317
+ cur = info.get("ended_at")
318
+ info["ended_at"] = max(cur, ts_end) if isinstance(cur, (int, float)) else ts_end
319
+ else:
320
+ # Do not modify duration based on individual steps; process duration comes
321
+ # from the __process__ record or (ended_at - started_at) fallback.
322
+
323
+ if st == "failed":
324
+ info["status"] = "failed"
325
+ elif st in ("completed", "cached") and info["status"] not in ("failed", "completed", "cached"):
326
+ if isinstance(info.get("started_at"), (int, float)):
327
+ info["status"] = "running"
328
+ elif st == "running":
329
+ info["status"] = "running"
330
+
331
+ for proc, info in process_info.items():
332
+ # Ensure consistency: use process_status as the source of truth for status
333
+ if proc in process_status:
334
+ info["status"] = process_status[proc]
335
+
336
+ # Clear ended_at for running processes to avoid showing same start/end time.
337
+ # Keep ended_at for pending (unknown) so UI can display '-' based on None.
338
+ current_status = str(info.get("status") or "").lower()
339
+ if current_status == "running":
340
+ info["ended_at"] = None
341
+
342
+ # Prefer (ended-started) when execution_time is missing or non-positive
343
+ if isinstance(info.get("started_at"), (int, float)) and isinstance(info.get("ended_at"), (int, float)):
344
+ _diff = max(0.0, float(info["ended_at"]) - float(info["started_at"]))
345
+ cur_dur = info.get("duration_sec")
346
+ try:
347
+ cur_dur_val = float(cur_dur) if cur_dur is not None else None
348
+ except Exception:
349
+ cur_dur_val = None
350
+ if cur_dur_val is None or cur_dur_val <= 0.0:
351
+ info["duration_sec"] = _diff
352
+
353
+ # Calculate live duration if running and we have a start time, or if duration is
354
+ # non-positive due to an initial 0.0 execution_time write.
355
+ if current_status == "running" and isinstance(info.get("started_at"), (int, float)):
356
+ try:
357
+ now_ts = datetime.now().timestamp()
358
+ live = max(0.0, float(now_ts) - float(info["started_at"]))
359
+ cur_dur = info.get("duration_sec")
360
+ try:
361
+ cur_dur_val = float(cur_dur) if cur_dur is not None else None
362
+ except Exception:
363
+ cur_dur_val = None
364
+ if cur_dur_val is None or cur_dur_val <= 0.0 or live > cur_dur_val:
365
+ info["duration_sec"] = live
366
+ except Exception:
367
+ pass
368
+ if not info.get("status"):
369
+ info["status"] = "pending"
370
+
371
+ return {"status": status or "unknown", "steps": steps, "process_status": process_status, "process_info": process_info}
372
+
373
+
374
+ @app.get("/api/projects/{project_id}/chart-config")
375
+ def get_chart_config(project_id: str) -> Dict[str, Any]:
376
+ """Get chart configuration from project_config.yaml including probe_paths.
377
+
378
+ Returns chart definitions needed by frontend to render dynamic charts.
379
+ Response:
380
+ {
381
+ "charts": [
382
+ {
383
+ "name": "chart_name",
384
+ "type": "dynamic"|"static",
385
+ "probe_paths": { "key": "path", ... }
386
+ }, ...
387
+ ],
388
+ "entrypoint": "path/to/charts.js" # User's chart file
389
+ }
390
+ """
391
+ try:
392
+ import yaml
393
+ cfg_path = PROJECTS_DIR / project_id / "configs" / "project_config.yaml"
394
+ if not cfg_path.exists():
395
+ return {"charts": [], "entrypoint": None}
396
+
397
+ cfg = yaml.safe_load(cfg_path.read_text()) or {}
398
+ reporting = cfg.get("reporting", {}) or {}
399
+
400
+ charts_list = reporting.get("charts", []) or []
401
+ # New config keys: prefer explicit dynamic_entrypoint for web UI JS, then derive from static_entrypoint, then legacy entrypoint
402
+ dyn_entry_cfg = str(reporting.get("dynamic_entrypoint", "") or "").strip()
403
+ static_entry_cfg = str(reporting.get("static_entrypoint", "") or "").strip()
404
+ legacy_entry_cfg = str(reporting.get("entrypoint", "") or "").strip()
405
+
406
+ # Resolve JS entrypoint for frontend dynamic charts
407
+ js_entrypoint = None
408
+ # 1) Explicit dynamic_entrypoint (expected to be a .js path under projects/<id>/charts)
409
+ if dyn_entry_cfg:
410
+ cand = Path(dyn_entry_cfg)
411
+ if cand.exists() or (WORKSPACE_ROOT / cand).exists():
412
+ js_entrypoint = dyn_entry_cfg
413
+ # 2) Derive from static_entrypoint (.py -> .js)
414
+ if not js_entrypoint and static_entry_cfg:
415
+ py_path = Path(static_entry_cfg)
416
+ js_path = py_path.with_suffix('.js')
417
+ if js_path.exists() or (WORKSPACE_ROOT / js_path).exists():
418
+ js_entrypoint = str(js_path)
419
+ # 3) Legacy: derive from entrypoint (.py -> .js)
420
+ if not js_entrypoint and legacy_entry_cfg:
421
+ py_path = Path(legacy_entry_cfg)
422
+ js_path = py_path.with_suffix('.js')
423
+ if js_path.exists() or (WORKSPACE_ROOT / js_path).exists():
424
+ js_entrypoint = str(js_path)
425
+
426
+ result_charts = []
427
+ for chart_def in charts_list:
428
+ if isinstance(chart_def, dict):
429
+ result_charts.append({
430
+ "name": chart_def.get("name", ""),
431
+ "type": chart_def.get("type", "static"),
432
+ "probe_paths": chart_def.get("probe_paths", {})
433
+ })
434
+
435
+ return {
436
+ "charts": result_charts,
437
+ "entrypoint": js_entrypoint
438
+ }
439
+ except Exception as e:
440
+ return {"charts": [], "entrypoint": None, "error": str(e)}
441
+
442
+
443
+ @app.get("/api/projects/{project_id}/runs/{run_id}/charts")
444
+ def list_charts(project_id: str, run_id: str) -> Dict[str, Any]:
445
+ """List charts for a run from KV store, including static/dynamic and cache paths.
446
+
447
+ Response shape:
448
+ {
449
+ "charts": {
450
+ "<chart_name>": {
451
+ "type": "static"|"dynamic",
452
+ "items": [ {"title","object_path","cache_path","mime_type","size_bytes","created_at"} ]
453
+ }, ...
454
+ }
455
+ }
456
+ """
457
+ kv = _kv_for_project(project_id)
458
+ charts: Dict[str, Any] = {}
459
+ try:
460
+ if kv and hasattr(kv, "list_run_charts"):
461
+ charts = kv.list_run_charts(run_id) or {}
462
+ except Exception:
463
+ charts = {}
464
+ if not charts:
465
+ try:
466
+ backend_cfg = _load_kv_cfg_from_project_config(project_id)
467
+ if isinstance(backend_cfg, dict) and str(backend_cfg.get("type", "")).lower() == "gcp":
468
+ try:
469
+ from google.cloud import firestore # type: ignore
470
+ client = firestore.Client()
471
+ ref = client.collection('mlops_projects').document(project_id) \
472
+ .collection('runs').document(run_id) \
473
+ .collection('charts_index').document('index')
474
+ snap = ref.get()
475
+ if getattr(snap, 'exists', False):
476
+ data = snap.to_dict() or {}
477
+ raw = data.get('charts', {})
478
+ if isinstance(raw, dict):
479
+ norm: Dict[str, Any] = {}
480
+ for name, val in raw.items():
481
+ if isinstance(val, dict):
482
+ ctype = val.get('type')
483
+ items = val.get('items') or []
484
+ if not ctype and isinstance(items, list) and items and isinstance(items[0], dict):
485
+ ctype = items[0].get('chart_type')
486
+ norm[name] = { 'type': (str(ctype).lower() if isinstance(ctype, str) else 'static'), 'items': items }
487
+ elif isinstance(val, list):
488
+ norm[name] = { 'type': 'static', 'items': val }
489
+ charts = norm
490
+ except Exception:
491
+ pass
492
+ except Exception:
493
+ pass
494
+ return {"charts": charts}
495
+
496
+
497
+ @app.get("/api/projects/{project_id}/runs/{run_id}/metrics/{probe_path:path}")
498
+ def get_probe_metrics(project_id: str, run_id: str, probe_path: str) -> Dict[str, Any]:
499
+ """Get current metrics for a specific probe path.
500
+
501
+ This endpoint enables frontend dynamic charts to poll for metrics updates.
502
+ """
503
+ kv = _kv_for_project(project_id)
504
+ if not kv or not hasattr(kv, "get_probe_metrics_by_path"):
505
+ raise HTTPException(status_code=503, detail="KV store not available")
506
+
507
+ try:
508
+ metrics = kv.get_probe_metrics_by_path(run_id, probe_path)
509
+ return {"metrics": metrics or {}}
510
+ except Exception as e:
511
+ raise HTTPException(status_code=500, detail=f"Failed to fetch metrics: {e}")
512
+
513
+
514
+ @app.get("/api/projects/{project_id}/runs/{run_id}/charts/fetch")
515
+ def fetch_chart(project_id: str, run_id: str, uri: str = "", cache_path: str = "") -> Response:
516
+ """Fetch a single chart image.
517
+
518
+ Prefer cache_path (local filesystem) when provided and exists. Otherwise, if
519
+ uri is a gs:// path and object store is configured, fetch from object store.
520
+ If uri is a local path, try to read it directly.
521
+ """
522
+ if cache_path and isinstance(cache_path, str):
523
+ try:
524
+ p = Path(cache_path)
525
+ if p.exists() and p.is_file():
526
+ data = p.read_bytes()
527
+ mime, _ = mimetypes.guess_type(p.name)
528
+ return Response(content=data, media_type=mime or "image/png")
529
+ except Exception:
530
+ pass
531
+
532
+ # If no uri provided, fail
533
+ if not uri or not isinstance(uri, str):
534
+ raise HTTPException(status_code=400, detail="Invalid chart reference")
535
+
536
+ # If uri is a gs:// path, use object store
537
+ if uri.startswith("gs://"):
538
+ store = _object_store_for_project(project_id)
539
+ if not store or not hasattr(store, "get_bytes"):
540
+ raise HTTPException(status_code=404, detail="Object store not configured")
541
+ try:
542
+ data = store.get_bytes(uri)
543
+ except Exception as e:
544
+ raise HTTPException(status_code=404, detail=f"Object not found: {e}")
545
+ mime, _ = mimetypes.guess_type(uri)
546
+ return Response(content=data, media_type=mime or "image/png")
547
+
548
+ # Otherwise, treat uri as a local file path (fallback when no object store)
549
+ try:
550
+ p = Path(uri)
551
+ if p.exists() and p.is_file():
552
+ data = p.read_bytes()
553
+ mime, _ = mimetypes.guess_type(p.name)
554
+ return Response(content=data, media_type=mime or "image/png")
555
+ except Exception as e:
556
+ raise HTTPException(status_code=404, detail=f"Local file not found: {e}")
557
+
558
+ raise HTTPException(status_code=404, detail="Chart not found")
559
+
560
+ PROJECTS_STATIC_DIR = PROJECTS_DIR
561
+ if PROJECTS_STATIC_DIR.exists():
562
+ # Mount /projects BEFORE mounting / so that requests to /projects/... resolve here
563
+ app.mount("/projects", StaticFiles(directory=str(PROJECTS_STATIC_DIR)), name="projects-static")
564
+
565
+ # Serve static frontend (dev fallback; packaged UI is handled separately)
566
+ PKG_UI_DIR = Path(__file__).resolve().parent / "ui"
567
+ if PKG_UI_DIR.exists():
568
+ app.mount("/", StaticFiles(directory=str(PKG_UI_DIR), html=True), name="static")
569
+ else:
570
+ STATIC_DIR = WORKSPACE_ROOT / "web-ui"
571
+ if STATIC_DIR.exists():
572
+ app.mount("/", StaticFiles(directory=str(STATIC_DIR), html=True), name="static")
573
+
574
+
575
+ def create_app() -> FastAPI:
576
+ return app
577
+
578
+
579
+ if __name__ == "__main__":
580
+ import uvicorn
581
+ host = os.getenv("HOST", "127.0.0.1")
582
+ port = int(os.getenv("PORT", "8000"))
583
+ uvicorn.run("mlops.web.server:app", host=host, port=port, reload=False)
584
+
585
+
@@ -0,0 +1,52 @@
1
+ <!doctype html>
2
+ <html>
3
+ <head>
4
+ <meta charset="utf-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1" />
6
+ <title>MLOps Platform UI</title>
7
+ <link rel="stylesheet" href="/styles.css" />
8
+ <!-- Chart.js for rendering dynamic charts -->
9
+ <script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.0/dist/chart.umd.min.js"></script>
10
+ </head>
11
+ <body>
12
+ <div id="app">
13
+ <aside id="sidebar">
14
+ <div class="section">
15
+ <label for="themeToggle">Theme</label>
16
+ <button id="themeToggle" class="theme-toggle" aria-label="Toggle theme" title="Switch theme">🌙</button>
17
+ </div>
18
+ <div class="section">
19
+ <label for="projectSelect">Project</label>
20
+ <select id="projectSelect"></select>
21
+ </div>
22
+ <div class="section">
23
+ <label for="runList">Run ID</label>
24
+ <ul id="runList" class="run-list"></ul>
25
+ </div>
26
+ <div class="hint">Status: <span id="statusText">-</span></div>
27
+ </aside>
28
+ <main id="main">
29
+ <h2 id="graphTitle">Process Graph</h2>
30
+ <svg id="graph" width="100%" height="100%"></svg>
31
+ <!-- Dynamic charts section (kept) -->
32
+ <div id="dynamicChartsContainer" style="display: none;">
33
+ <h3>Dynamic Charts</h3>
34
+ <div id="dynamicChartsGrid"></div>
35
+ </div>
36
+ </main>
37
+ </div>
38
+ <!-- Modal for chart preview -->
39
+ <div id="chartModal" class="modal hidden">
40
+ <div class="modal-content">
41
+ <button id="chartModalClose" class="modal-close" aria-label="Close">×</button>
42
+ <div class="modal-body">
43
+ <img id="chartModalImage" alt="chart" />
44
+ <div id="chartModalMessage" class="chart-message" style="display:none;">No chart available</div>
45
+ </div>
46
+ </div>
47
+ </div>
48
+ <script type="module" src="/script.js"></script>
49
+ </body>
50
+ </html>
51
+
52
+