flowyml 1.2.0__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. flowyml/__init__.py +3 -0
  2. flowyml/assets/base.py +10 -0
  3. flowyml/assets/metrics.py +6 -0
  4. flowyml/cli/main.py +108 -2
  5. flowyml/cli/run.py +9 -2
  6. flowyml/core/execution_status.py +52 -0
  7. flowyml/core/hooks.py +106 -0
  8. flowyml/core/observability.py +210 -0
  9. flowyml/core/orchestrator.py +274 -0
  10. flowyml/core/pipeline.py +193 -231
  11. flowyml/core/project.py +34 -2
  12. flowyml/core/remote_orchestrator.py +109 -0
  13. flowyml/core/resources.py +22 -5
  14. flowyml/core/retry_policy.py +80 -0
  15. flowyml/core/step.py +18 -1
  16. flowyml/core/submission_result.py +53 -0
  17. flowyml/integrations/keras.py +95 -22
  18. flowyml/monitoring/alerts.py +2 -2
  19. flowyml/stacks/__init__.py +15 -0
  20. flowyml/stacks/aws.py +599 -0
  21. flowyml/stacks/azure.py +295 -0
  22. flowyml/stacks/components.py +24 -2
  23. flowyml/stacks/gcp.py +158 -11
  24. flowyml/stacks/local.py +5 -0
  25. flowyml/storage/artifacts.py +15 -5
  26. flowyml/storage/materializers/__init__.py +2 -0
  27. flowyml/storage/materializers/cloudpickle.py +74 -0
  28. flowyml/storage/metadata.py +166 -5
  29. flowyml/ui/backend/main.py +41 -1
  30. flowyml/ui/backend/routers/assets.py +356 -15
  31. flowyml/ui/backend/routers/client.py +46 -0
  32. flowyml/ui/backend/routers/execution.py +13 -2
  33. flowyml/ui/backend/routers/experiments.py +48 -12
  34. flowyml/ui/backend/routers/metrics.py +213 -0
  35. flowyml/ui/backend/routers/pipelines.py +63 -7
  36. flowyml/ui/backend/routers/projects.py +33 -7
  37. flowyml/ui/backend/routers/runs.py +150 -8
  38. flowyml/ui/frontend/dist/assets/index-DcYwrn2j.css +1 -0
  39. flowyml/ui/frontend/dist/assets/index-Dlz_ygOL.js +592 -0
  40. flowyml/ui/frontend/dist/index.html +2 -2
  41. flowyml/ui/frontend/src/App.jsx +4 -1
  42. flowyml/ui/frontend/src/app/assets/page.jsx +260 -230
  43. flowyml/ui/frontend/src/app/dashboard/page.jsx +38 -7
  44. flowyml/ui/frontend/src/app/experiments/page.jsx +61 -314
  45. flowyml/ui/frontend/src/app/observability/page.jsx +277 -0
  46. flowyml/ui/frontend/src/app/pipelines/page.jsx +79 -402
  47. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectArtifactsList.jsx +151 -0
  48. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectExperimentsList.jsx +145 -0
  49. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectHeader.jsx +45 -0
  50. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectHierarchy.jsx +467 -0
  51. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectMetricsPanel.jsx +253 -0
  52. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectPipelinesList.jsx +105 -0
  53. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectRelations.jsx +189 -0
  54. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectRunsList.jsx +136 -0
  55. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectTabs.jsx +95 -0
  56. flowyml/ui/frontend/src/app/projects/[projectId]/page.jsx +326 -0
  57. flowyml/ui/frontend/src/app/projects/page.jsx +13 -3
  58. flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +79 -10
  59. flowyml/ui/frontend/src/app/runs/page.jsx +82 -424
  60. flowyml/ui/frontend/src/app/settings/page.jsx +1 -0
  61. flowyml/ui/frontend/src/app/tokens/page.jsx +62 -16
  62. flowyml/ui/frontend/src/components/AssetDetailsPanel.jsx +373 -0
  63. flowyml/ui/frontend/src/components/AssetLineageGraph.jsx +291 -0
  64. flowyml/ui/frontend/src/components/AssetStatsDashboard.jsx +302 -0
  65. flowyml/ui/frontend/src/components/AssetTreeHierarchy.jsx +477 -0
  66. flowyml/ui/frontend/src/components/ExperimentDetailsPanel.jsx +227 -0
  67. flowyml/ui/frontend/src/components/NavigationTree.jsx +401 -0
  68. flowyml/ui/frontend/src/components/PipelineDetailsPanel.jsx +239 -0
  69. flowyml/ui/frontend/src/components/PipelineGraph.jsx +67 -3
  70. flowyml/ui/frontend/src/components/ProjectSelector.jsx +115 -0
  71. flowyml/ui/frontend/src/components/RunDetailsPanel.jsx +298 -0
  72. flowyml/ui/frontend/src/components/header/Header.jsx +48 -1
  73. flowyml/ui/frontend/src/components/plugins/ZenMLIntegration.jsx +106 -0
  74. flowyml/ui/frontend/src/components/sidebar/Sidebar.jsx +52 -26
  75. flowyml/ui/frontend/src/components/ui/DataView.jsx +35 -17
  76. flowyml/ui/frontend/src/components/ui/ErrorBoundary.jsx +118 -0
  77. flowyml/ui/frontend/src/contexts/ProjectContext.jsx +2 -2
  78. flowyml/ui/frontend/src/contexts/ToastContext.jsx +116 -0
  79. flowyml/ui/frontend/src/layouts/MainLayout.jsx +5 -1
  80. flowyml/ui/frontend/src/router/index.jsx +4 -0
  81. flowyml/ui/frontend/src/utils/date.js +10 -0
  82. flowyml/ui/frontend/src/utils/downloads.js +11 -0
  83. flowyml/utils/config.py +6 -0
  84. flowyml/utils/stack_config.py +45 -3
  85. {flowyml-1.2.0.dist-info → flowyml-1.3.0.dist-info}/METADATA +42 -4
  86. {flowyml-1.2.0.dist-info → flowyml-1.3.0.dist-info}/RECORD +89 -52
  87. {flowyml-1.2.0.dist-info → flowyml-1.3.0.dist-info}/licenses/LICENSE +1 -1
  88. flowyml/ui/frontend/dist/assets/index-DFNQnrUj.js +0 -448
  89. flowyml/ui/frontend/dist/assets/index-pWI271rZ.css +0 -1
  90. {flowyml-1.2.0.dist-info → flowyml-1.3.0.dist-info}/WHEEL +0 -0
  91. {flowyml-1.2.0.dist-info → flowyml-1.3.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,213 @@
1
+ """Model metrics logging API endpoints."""
2
+
3
+ from fastapi import APIRouter, HTTPException, Depends, Security, Query
4
+ from fastapi.security import HTTPAuthorizationCredentials
5
+ from pydantic import BaseModel, Field
6
+ from typing import Any
7
+
8
+ from flowyml.ui.backend.auth import verify_api_token, security
9
+ from flowyml.storage.metadata import SQLiteMetadataStore
10
+ from flowyml.core.project import ProjectManager
11
+ from flowyml.utils.config import get_config
12
+
13
+ router = APIRouter()
14
+
15
+
16
+ def require_permission(permission: str):
17
+ """Create dependency enforcing a given permission."""
18
+
19
+ async def _verify(credentials: HTTPAuthorizationCredentials = Security(security)):
20
+ return await verify_api_token(credentials, required_permission=permission)
21
+
22
+ return _verify
23
+
24
+
25
+ def get_project_manager() -> ProjectManager:
26
+ """Get a project manager rooted at configured projects dir."""
27
+ config = get_config()
28
+ return ProjectManager(str(config.projects_dir))
29
+
30
+
31
+ def get_global_store() -> SQLiteMetadataStore:
32
+ """Metadata store for shared metrics."""
33
+ config = get_config()
34
+ return SQLiteMetadataStore(str(config.metadata_db))
35
+
36
+
37
+ class MetricsLogRequest(BaseModel):
38
+ """Payload for logging production model metrics."""
39
+
40
+ project: str = Field(..., description="Project identifier")
41
+ model_name: str = Field(..., description="Name of the model emitting metrics")
42
+ metrics: dict[str, float] = Field(..., description="Dictionary of metric_name -> value")
43
+ run_id: str | None = Field(None, description="Related run identifier (optional)")
44
+ environment: str | None = Field(None, description="Environment label (e.g., prod, staging)")
45
+ tags: dict[str, Any] | None = Field(default_factory=dict, description="Optional metadata tags")
46
+
47
+
48
+ @router.post("/log")
49
+ async def log_model_metrics(
50
+ payload: MetricsLogRequest,
51
+ token_data: dict = Depends(require_permission("write")),
52
+ ):
53
+ """Log production metrics for a model.
54
+
55
+ Requires tokens with the `write` permission. Project-scoped tokens
56
+ may only submit metrics for their project.
57
+ """
58
+ if token_data.get("project") and token_data["project"] != payload.project:
59
+ raise HTTPException(
60
+ status_code=403,
61
+ detail=f"Token is scoped to project '{token_data['project']}'",
62
+ )
63
+
64
+ if not payload.metrics:
65
+ raise HTTPException(status_code=400, detail="metrics dictionary cannot be empty")
66
+
67
+ numeric_metrics = {}
68
+ for name, value in payload.metrics.items():
69
+ try:
70
+ numeric_metrics[name] = float(value)
71
+ except (TypeError, ValueError):
72
+ raise HTTPException(
73
+ status_code=400,
74
+ detail=f"Metric '{name}' must be numeric.",
75
+ )
76
+
77
+ project_manager = get_project_manager()
78
+ project = project_manager.get_project(payload.project)
79
+ if not project:
80
+ project = project_manager.create_project(payload.project)
81
+
82
+ shared_store = get_global_store()
83
+ shared_store.log_model_metrics(
84
+ project=payload.project,
85
+ model_name=payload.model_name,
86
+ metrics=numeric_metrics,
87
+ run_id=payload.run_id,
88
+ environment=payload.environment,
89
+ tags=payload.tags,
90
+ )
91
+
92
+ project.log_model_metrics(
93
+ model_name=payload.model_name,
94
+ metrics=numeric_metrics,
95
+ run_id=payload.run_id,
96
+ environment=payload.environment,
97
+ tags=payload.tags,
98
+ )
99
+
100
+ return {
101
+ "project": payload.project,
102
+ "model_name": payload.model_name,
103
+ "logged_metrics": list(numeric_metrics.keys()),
104
+ "message": "Metrics logged successfully",
105
+ }
106
+
107
+
108
+ @router.get("")
109
+ async def list_model_metrics(
110
+ project: str | None = Query(default=None, description="Filter by project"),
111
+ model_name: str | None = Query(default=None, description="Filter by model"),
112
+ limit: int = Query(default=100, ge=1, le=500),
113
+ token_data: dict = Depends(require_permission("read")),
114
+ ):
115
+ """Retrieve the latest logged model metrics."""
116
+ if token_data.get("project"):
117
+ if project and token_data["project"] != project:
118
+ raise HTTPException(
119
+ status_code=403,
120
+ detail=f"Token is scoped to project '{token_data['project']}'",
121
+ )
122
+ project = token_data["project"]
123
+
124
+ store: SQLiteMetadataStore
125
+ if project:
126
+ project_manager = get_project_manager()
127
+ project_obj = project_manager.get_project(project)
128
+ if not project_obj:
129
+ raise HTTPException(status_code=404, detail=f"Project '{project}' not found")
130
+ store = project_obj.metadata_store
131
+ else:
132
+ store = get_global_store()
133
+
134
+ records = store.list_model_metrics(project=project, model_name=model_name, limit=limit)
135
+ return {"metrics": records}
136
+
137
+
138
+ @router.get("/observability/orchestrator")
139
+ async def get_orchestrator_metrics():
140
+ """Get orchestrator-level performance metrics."""
141
+ from datetime import datetime, timedelta
142
+ import sqlite3
143
+
144
+ store = get_global_store()
145
+ conn = sqlite3.connect(store.db_path)
146
+ cursor = conn.cursor()
147
+
148
+ thirty_days_ago = (datetime.now() - timedelta(days=30)).isoformat()
149
+ cursor.execute("SELECT COUNT(*) FROM runs WHERE created_at >= ?", (thirty_days_ago,))
150
+ total_runs = cursor.fetchone()[0]
151
+
152
+ cursor.execute(
153
+ "SELECT status, COUNT(*) FROM runs WHERE created_at >= ? GROUP BY status",
154
+ (thirty_days_ago,),
155
+ )
156
+ status_counts = dict(cursor.fetchall())
157
+
158
+ cursor.execute(
159
+ "SELECT AVG(duration) FROM runs WHERE created_at >= ? AND duration IS NOT NULL",
160
+ (thirty_days_ago,),
161
+ )
162
+ avg_duration = cursor.fetchone()[0] or 0
163
+
164
+ conn.close()
165
+
166
+ completed = status_counts.get("completed", 0)
167
+ success_rate = completed / total_runs if total_runs > 0 else 0
168
+
169
+ return {
170
+ "total_runs": total_runs,
171
+ "success_rate": success_rate,
172
+ "avg_duration_seconds": avg_duration,
173
+ "status_distribution": status_counts,
174
+ "period_days": 30,
175
+ }
176
+
177
+
178
+ @router.get("/observability/cache")
179
+ async def get_cache_metrics():
180
+ """Get cache performance metrics."""
181
+ from datetime import datetime, timedelta
182
+ import sqlite3
183
+ import json as json_lib
184
+
185
+ store = get_global_store()
186
+ conn = sqlite3.connect(store.db_path)
187
+ cursor = conn.cursor()
188
+
189
+ thirty_days_ago = (datetime.now() - timedelta(days=30)).isoformat()
190
+ cursor.execute("SELECT metadata FROM runs WHERE created_at >= ?", (thirty_days_ago,))
191
+
192
+ total_steps, cached_steps = 0, 0
193
+ for row in cursor.fetchall():
194
+ if not row[0]:
195
+ continue
196
+ try:
197
+ metadata = json_lib.loads(row[0])
198
+ for step_data in metadata.get("steps", {}).values():
199
+ total_steps += 1
200
+ if step_data.get("cached"):
201
+ cached_steps += 1
202
+ except Exception:
203
+ continue
204
+
205
+ conn.close()
206
+ cache_hit_rate = cached_steps / total_steps if total_steps > 0 else 0
207
+
208
+ return {
209
+ "total_steps": total_steps,
210
+ "cached_steps": cached_steps,
211
+ "cache_hit_rate": cache_hit_rate,
212
+ "period_days": 30,
213
+ }
@@ -1,25 +1,81 @@
1
1
  from fastapi import APIRouter, HTTPException
2
2
  from pydantic import BaseModel
3
3
  from flowyml.storage.metadata import SQLiteMetadataStore
4
+ from flowyml.core.project import ProjectManager
4
5
  from flowyml.utils.config import get_config
6
+ from typing import Optional
5
7
 
6
8
  router = APIRouter()
7
9
 
8
10
 
9
11
  def get_store():
10
12
  get_config()
11
- # Assuming default path or from config
12
- # The SQLiteMetadataStore defaults to .flowyml/metadata.db which is what we want for now
13
13
  return SQLiteMetadataStore()
14
14
 
15
15
 
16
+ def _iter_metadata_stores():
17
+ """Yield tuples of (project_name, store) including global and project stores."""
18
+ stores = [(None, SQLiteMetadataStore())]
19
+ try:
20
+ manager = ProjectManager()
21
+ for project_meta in manager.list_projects():
22
+ name = project_meta.get("name")
23
+ if not name:
24
+ continue
25
+ project = manager.get_project(name)
26
+ if project:
27
+ stores.append((name, project.metadata_store))
28
+ except Exception:
29
+ pass
30
+ return stores
31
+
32
+
16
33
  @router.get("/")
17
- async def list_pipelines(project: str = None):
18
- """List all unique pipelines, optionally filtered by project."""
34
+ async def list_pipelines(project: Optional[str] = None, limit: int = 100):
35
+ """List all unique pipelines with details, optionally filtered by project."""
19
36
  try:
20
- store = get_store()
21
- pipelines = store.list_pipelines(project=project)
22
- return {"pipelines": pipelines}
37
+ pipeline_map = {} # pipeline_name -> data
38
+
39
+ for project_name, store in _iter_metadata_stores():
40
+ # Skip other projects if filtering
41
+ if project and project_name and project != project_name:
42
+ continue
43
+
44
+ # Get pipeline names from this store
45
+ store_pipeline_names = store.list_pipelines()
46
+
47
+ for name in store_pipeline_names:
48
+ # Get runs for this pipeline
49
+ filters = {"pipeline_name": name}
50
+ runs = store.query(**filters)
51
+
52
+ if not runs:
53
+ continue
54
+
55
+ last_run = runs[0]
56
+ run_project = last_run.get("project") or project_name
57
+
58
+ # Skip if filtering by project and doesn't match
59
+ if project and run_project != project:
60
+ continue
61
+
62
+ # Use composite key if we already have this pipeline from another project
63
+ key = f"{name}:{run_project}" if run_project else name
64
+
65
+ if key not in pipeline_map:
66
+ pipeline_map[key] = {
67
+ "name": name,
68
+ "created": last_run.get("start_time"),
69
+ "version": last_run.get("git_sha", "latest")[:7] if last_run.get("git_sha") else "1.0",
70
+ "status": last_run.get("status", "unknown"),
71
+ "run_count": len(runs),
72
+ "last_run_id": last_run.get("run_id"),
73
+ "project": run_project,
74
+ }
75
+
76
+ # Return list of pipelines
77
+ enriched_pipelines = list(pipeline_map.values())[:limit]
78
+ return {"pipelines": enriched_pipelines}
23
79
  except Exception as e:
24
80
  raise HTTPException(status_code=500, detail=str(e))
25
81
 
@@ -1,17 +1,23 @@
1
- from fastapi import APIRouter, HTTPException
1
+ from fastapi import APIRouter, HTTPException, Depends
2
2
  from flowyml.core.project import ProjectManager
3
+ from flowyml.utils.config import get_config
3
4
  from pydantic import BaseModel
4
5
 
5
6
  router = APIRouter()
6
- manager = ProjectManager()
7
+
8
+
9
+ def get_projects_manager() -> ProjectManager:
10
+ """Instantiate a ProjectManager bound to the current config."""
11
+ config = get_config()
12
+ return ProjectManager(str(config.projects_dir))
7
13
 
8
14
 
9
15
  @router.get("/")
10
- async def list_projects():
16
+ async def list_projects(manager: ProjectManager = Depends(get_projects_manager)):
11
17
  """List all projects."""
12
18
  try:
13
19
  projects = manager.list_projects()
14
- return projects
20
+ return {"projects": projects}
15
21
  except Exception as e:
16
22
  raise HTTPException(status_code=500, detail=str(e))
17
23
 
@@ -22,7 +28,7 @@ class ProjectCreate(BaseModel):
22
28
 
23
29
 
24
30
  @router.post("/")
25
- async def create_project(project: ProjectCreate):
31
+ async def create_project(project: ProjectCreate, manager: ProjectManager = Depends(get_projects_manager)):
26
32
  """Create a new project."""
27
33
  created_project = manager.create_project(project.name, project.description)
28
34
  return {
@@ -33,7 +39,7 @@ async def create_project(project: ProjectCreate):
33
39
 
34
40
 
35
41
  @router.get("/{project_name}")
36
- async def get_project(project_name: str):
42
+ async def get_project(project_name: str, manager: ProjectManager = Depends(get_projects_manager)):
37
43
  """Get project details."""
38
44
  project = manager.get_project(project_name)
39
45
  if not project:
@@ -53,6 +59,7 @@ async def get_project_runs(
53
59
  project_name: str,
54
60
  pipeline_name: str | None = None,
55
61
  limit: int = 100,
62
+ manager: ProjectManager = Depends(get_projects_manager),
56
63
  ):
57
64
  """Get runs for a project."""
58
65
  project = manager.get_project(project_name)
@@ -68,6 +75,7 @@ async def get_project_artifacts(
68
75
  project_name: str,
69
76
  artifact_type: str | None = None,
70
77
  limit: int = 100,
78
+ manager: ProjectManager = Depends(get_projects_manager),
71
79
  ):
72
80
  """Get artifacts for a project."""
73
81
  project = manager.get_project(project_name)
@@ -78,8 +86,26 @@ async def get_project_artifacts(
78
86
  return artifacts
79
87
 
80
88
 
89
+ @router.get("/{project_name}/metrics")
90
+ async def get_project_metrics(
91
+ project_name: str,
92
+ model_name: str | None = None,
93
+ limit: int = 100,
94
+ manager: ProjectManager = Depends(get_projects_manager),
95
+ ):
96
+ """Get logged production metrics for a project."""
97
+ project = manager.get_project(project_name)
98
+ if not project:
99
+ raise HTTPException(status_code=404, detail="Project not found")
100
+
101
+ return {
102
+ "project": project_name,
103
+ "metrics": project.list_model_metrics(model_name=model_name, limit=limit),
104
+ }
105
+
106
+
81
107
  @router.delete("/{project_name}")
82
- async def delete_project(project_name: str):
108
+ async def delete_project(project_name: str, manager: ProjectManager = Depends(get_projects_manager)):
83
109
  """Delete a project."""
84
110
  manager.delete_project(project_name, confirm=True)
85
111
  return {"deleted": True}
@@ -1,6 +1,9 @@
1
1
  from fastapi import APIRouter, HTTPException
2
2
  from pydantic import BaseModel
3
3
  from flowyml.storage.metadata import SQLiteMetadataStore
4
+ from flowyml.core.project import ProjectManager
5
+ from typing import Optional
6
+ import json
4
7
 
5
8
  router = APIRouter()
6
9
 
@@ -9,17 +12,62 @@ def get_store():
9
12
  return SQLiteMetadataStore()
10
13
 
11
14
 
15
+ def _iter_metadata_stores():
16
+ """Yield tuples of (project_name, store) including global and project stores."""
17
+ stores: list[tuple[Optional[str], SQLiteMetadataStore]] = [(None, SQLiteMetadataStore())]
18
+ try:
19
+ manager = ProjectManager()
20
+ for project_meta in manager.list_projects():
21
+ name = project_meta.get("name")
22
+ if not name:
23
+ continue
24
+ project = manager.get_project(name)
25
+ if project:
26
+ stores.append((name, project.metadata_store))
27
+ except Exception:
28
+ pass
29
+ return stores
30
+
31
+
32
+ def _deduplicate_runs(runs):
33
+ seen = {}
34
+ for run, project_name in runs:
35
+ run_id = run.get("run_id") or f"{project_name}:{len(seen)}"
36
+ if run_id in seen:
37
+ continue
38
+ entry = dict(run)
39
+ if project_name and not entry.get("project"):
40
+ entry["project"] = project_name
41
+ seen[run_id] = entry
42
+ return list(seen.values())
43
+
44
+
45
+ def _sort_runs(runs):
46
+ def sort_key(run):
47
+ return run.get("start_time") or run.get("created_at") or ""
48
+
49
+ return sorted(runs, key=sort_key, reverse=True)
50
+
51
+
12
52
  @router.get("/")
13
53
  async def list_runs(limit: int = 20, project: str = None):
14
54
  """List all runs, optionally filtered by project."""
15
55
  try:
16
- store = get_store()
17
- runs = store.list_runs(limit=limit)
56
+ combined = []
57
+ for project_name, store in _iter_metadata_stores():
58
+ # Skip other projects if filtering by project name
59
+ if project and project_name and project != project_name:
60
+ continue
61
+ store_runs = store.list_runs(limit=limit)
62
+ for run in store_runs:
63
+ combined.append((run, project_name))
64
+
65
+ runs = _deduplicate_runs(combined)
18
66
 
19
- # Filter by project if specified
20
67
  if project:
21
68
  runs = [r for r in runs if r.get("project") == project]
22
69
 
70
+ runs = _sort_runs(runs)[:limit]
23
71
  return {"runs": runs}
24
72
  except Exception as e:
25
73
  return {"runs": [], "error": str(e)}
@@ -28,8 +76,7 @@ async def list_runs(limit: int = 20, project: str = None):
28
76
  @router.get("/{run_id}")
29
77
  async def get_run(run_id: str):
30
78
  """Get details for a specific run."""
31
- store = get_store()
32
- run = store.load_run(run_id)
79
+ run, _ = _find_run(run_id)
33
80
  if not run:
34
81
  raise HTTPException(status_code=404, detail="Run not found")
35
82
  return run
@@ -38,7 +85,7 @@ async def get_run(run_id: str):
38
85
  @router.get("/{run_id}/metrics")
39
86
  async def get_run_metrics(run_id: str):
40
87
  """Get metrics for a specific run."""
41
- store = get_store()
88
+ store = _find_store_for_run(run_id)
42
89
  metrics = store.get_metrics(run_id)
43
90
  return {"metrics": metrics}
44
91
 
@@ -46,7 +93,7 @@ async def get_run_metrics(run_id: str):
46
93
  @router.get("/{run_id}/artifacts")
47
94
  async def get_run_artifacts(run_id: str):
48
95
  """Get artifacts for a specific run."""
49
- store = get_store()
96
+ store = _find_store_for_run(run_id)
50
97
  artifacts = store.list_assets(run_id=run_id)
51
98
  return {"artifacts": artifacts}
52
99
 
@@ -58,9 +105,104 @@ class ProjectUpdate(BaseModel):
58
105
  @router.put("/{run_id}/project")
59
106
  async def update_run_project(run_id: str, update: ProjectUpdate):
60
107
  """Update the project for a run."""
61
- store = get_store()
108
+ store = _find_store_for_run(run_id)
62
109
  try:
63
110
  store.update_run_project(run_id, update.project_name)
64
111
  return {"status": "success", "project": update.project_name}
65
112
  except Exception as e:
66
113
  raise HTTPException(status_code=500, detail=str(e))
114
+
115
+
116
+ def _find_run(run_id: str):
117
+ for project_name, store in _iter_metadata_stores():
118
+ run = store.load_run(run_id)
119
+ if run:
120
+ if project_name and not run.get("project"):
121
+ run["project"] = project_name
122
+ return run, store
123
+ return None, None
124
+
125
+
126
+ def _find_store_for_run(run_id: str) -> SQLiteMetadataStore:
127
+ _, store = _find_run(run_id)
128
+ if store:
129
+ return store
130
+ raise HTTPException(status_code=404, detail="Run not found")
131
+
132
+
133
+ @router.get("/{run_id}/cloud-status")
134
+ async def get_cloud_status(run_id: str):
135
+ """Get real-time status from cloud orchestrator for remote runs.
136
+
137
+ Returns cloud provider status if the run is remote, otherwise returns
138
+ status from metadata store.
139
+ """
140
+ run, store = _find_run(run_id)
141
+ if not run:
142
+ raise HTTPException(status_code=404, detail="Run not found")
143
+
144
+ # Get orchestrator info from run metadata
145
+ metadata = run.get("metadata", {})
146
+ if isinstance(metadata, str):
147
+ try:
148
+ metadata = json.loads(metadata)
149
+ except Exception:
150
+ metadata = {}
151
+
152
+ orchestrator_type = metadata.get("orchestrator_type", "local")
153
+
154
+ # If local run, just return metadata store status
155
+ if orchestrator_type == "local":
156
+ return {
157
+ "run_id": run_id,
158
+ "status": run.get("status", "unknown"),
159
+ "orchestrator_type": "local",
160
+ "is_remote": False,
161
+ "cloud_status": None,
162
+ }
163
+
164
+ # For remote runs, try to query cloud orchestrator
165
+ cloud_status = None
166
+ cloud_error = None
167
+
168
+ try:
169
+ # Import orchestrators dynamically to avoid errors if cloud SDKs aren't installed
170
+ from flowyml.utils.stack_config import load_active_stack
171
+
172
+ stack = load_active_stack()
173
+ if not stack or not stack.orchestrator:
174
+ cloud_error = "No active stack or orchestrator configured"
175
+ else:
176
+ orchestrator = stack.orchestrator
177
+
178
+ # Check if orchestrator has get_run_status method
179
+ if hasattr(orchestrator, "get_run_status"):
180
+ from flowyml.core.execution_status import ExecutionStatus
181
+
182
+ status = orchestrator.get_run_status(run_id)
183
+
184
+ # Convert ExecutionStatus to dict
185
+ if isinstance(status, ExecutionStatus):
186
+ cloud_status = {
187
+ "status": status.value,
188
+ "is_finished": status.is_finished,
189
+ "is_successful": status.is_successful,
190
+ }
191
+ else:
192
+ cloud_status = {"status": str(status)}
193
+ else:
194
+ cloud_error = f"Orchestrator {orchestrator_type} does not support status queries"
195
+
196
+ except ImportError as e:
197
+ cloud_error = f"Cloud SDK not available: {str(e)}"
198
+ except Exception as e:
199
+ cloud_error = f"Error querying cloud status: {str(e)}"
200
+
201
+ return {
202
+ "run_id": run_id,
203
+ "status": run.get("status", "unknown"),
204
+ "orchestrator_type": orchestrator_type,
205
+ "is_remote": True,
206
+ "cloud_status": cloud_status,
207
+ "cloud_error": cloud_error,
208
+ }