flowyml 1.2.0__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowyml/__init__.py +3 -0
- flowyml/assets/base.py +10 -0
- flowyml/assets/metrics.py +6 -0
- flowyml/cli/main.py +108 -2
- flowyml/cli/run.py +9 -2
- flowyml/core/execution_status.py +52 -0
- flowyml/core/hooks.py +106 -0
- flowyml/core/observability.py +210 -0
- flowyml/core/orchestrator.py +274 -0
- flowyml/core/pipeline.py +193 -231
- flowyml/core/project.py +34 -2
- flowyml/core/remote_orchestrator.py +109 -0
- flowyml/core/resources.py +22 -5
- flowyml/core/retry_policy.py +80 -0
- flowyml/core/step.py +18 -1
- flowyml/core/submission_result.py +53 -0
- flowyml/integrations/keras.py +95 -22
- flowyml/monitoring/alerts.py +2 -2
- flowyml/stacks/__init__.py +15 -0
- flowyml/stacks/aws.py +599 -0
- flowyml/stacks/azure.py +295 -0
- flowyml/stacks/components.py +24 -2
- flowyml/stacks/gcp.py +158 -11
- flowyml/stacks/local.py +5 -0
- flowyml/storage/artifacts.py +15 -5
- flowyml/storage/materializers/__init__.py +2 -0
- flowyml/storage/materializers/cloudpickle.py +74 -0
- flowyml/storage/metadata.py +166 -5
- flowyml/ui/backend/main.py +41 -1
- flowyml/ui/backend/routers/assets.py +356 -15
- flowyml/ui/backend/routers/client.py +46 -0
- flowyml/ui/backend/routers/execution.py +13 -2
- flowyml/ui/backend/routers/experiments.py +48 -12
- flowyml/ui/backend/routers/metrics.py +213 -0
- flowyml/ui/backend/routers/pipelines.py +63 -7
- flowyml/ui/backend/routers/projects.py +33 -7
- flowyml/ui/backend/routers/runs.py +150 -8
- flowyml/ui/frontend/dist/assets/index-DcYwrn2j.css +1 -0
- flowyml/ui/frontend/dist/assets/index-Dlz_ygOL.js +592 -0
- flowyml/ui/frontend/dist/index.html +2 -2
- flowyml/ui/frontend/src/App.jsx +4 -1
- flowyml/ui/frontend/src/app/assets/page.jsx +260 -230
- flowyml/ui/frontend/src/app/dashboard/page.jsx +38 -7
- flowyml/ui/frontend/src/app/experiments/page.jsx +61 -314
- flowyml/ui/frontend/src/app/observability/page.jsx +277 -0
- flowyml/ui/frontend/src/app/pipelines/page.jsx +79 -402
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectArtifactsList.jsx +151 -0
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectExperimentsList.jsx +145 -0
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectHeader.jsx +45 -0
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectHierarchy.jsx +467 -0
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectMetricsPanel.jsx +253 -0
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectPipelinesList.jsx +105 -0
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectRelations.jsx +189 -0
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectRunsList.jsx +136 -0
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectTabs.jsx +95 -0
- flowyml/ui/frontend/src/app/projects/[projectId]/page.jsx +326 -0
- flowyml/ui/frontend/src/app/projects/page.jsx +13 -3
- flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +79 -10
- flowyml/ui/frontend/src/app/runs/page.jsx +82 -424
- flowyml/ui/frontend/src/app/settings/page.jsx +1 -0
- flowyml/ui/frontend/src/app/tokens/page.jsx +62 -16
- flowyml/ui/frontend/src/components/AssetDetailsPanel.jsx +373 -0
- flowyml/ui/frontend/src/components/AssetLineageGraph.jsx +291 -0
- flowyml/ui/frontend/src/components/AssetStatsDashboard.jsx +302 -0
- flowyml/ui/frontend/src/components/AssetTreeHierarchy.jsx +477 -0
- flowyml/ui/frontend/src/components/ExperimentDetailsPanel.jsx +227 -0
- flowyml/ui/frontend/src/components/NavigationTree.jsx +401 -0
- flowyml/ui/frontend/src/components/PipelineDetailsPanel.jsx +239 -0
- flowyml/ui/frontend/src/components/PipelineGraph.jsx +67 -3
- flowyml/ui/frontend/src/components/ProjectSelector.jsx +115 -0
- flowyml/ui/frontend/src/components/RunDetailsPanel.jsx +298 -0
- flowyml/ui/frontend/src/components/header/Header.jsx +48 -1
- flowyml/ui/frontend/src/components/plugins/ZenMLIntegration.jsx +106 -0
- flowyml/ui/frontend/src/components/sidebar/Sidebar.jsx +52 -26
- flowyml/ui/frontend/src/components/ui/DataView.jsx +35 -17
- flowyml/ui/frontend/src/components/ui/ErrorBoundary.jsx +118 -0
- flowyml/ui/frontend/src/contexts/ProjectContext.jsx +2 -2
- flowyml/ui/frontend/src/contexts/ToastContext.jsx +116 -0
- flowyml/ui/frontend/src/layouts/MainLayout.jsx +5 -1
- flowyml/ui/frontend/src/router/index.jsx +4 -0
- flowyml/ui/frontend/src/utils/date.js +10 -0
- flowyml/ui/frontend/src/utils/downloads.js +11 -0
- flowyml/utils/config.py +6 -0
- flowyml/utils/stack_config.py +45 -3
- {flowyml-1.2.0.dist-info → flowyml-1.3.0.dist-info}/METADATA +42 -4
- {flowyml-1.2.0.dist-info → flowyml-1.3.0.dist-info}/RECORD +89 -52
- {flowyml-1.2.0.dist-info → flowyml-1.3.0.dist-info}/licenses/LICENSE +1 -1
- flowyml/ui/frontend/dist/assets/index-DFNQnrUj.js +0 -448
- flowyml/ui/frontend/dist/assets/index-pWI271rZ.css +0 -1
- {flowyml-1.2.0.dist-info → flowyml-1.3.0.dist-info}/WHEEL +0 -0
- {flowyml-1.2.0.dist-info → flowyml-1.3.0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
"""Model metrics logging API endpoints."""
|
|
2
|
+
|
|
3
|
+
from fastapi import APIRouter, HTTPException, Depends, Security, Query
|
|
4
|
+
from fastapi.security import HTTPAuthorizationCredentials
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from flowyml.ui.backend.auth import verify_api_token, security
|
|
9
|
+
from flowyml.storage.metadata import SQLiteMetadataStore
|
|
10
|
+
from flowyml.core.project import ProjectManager
|
|
11
|
+
from flowyml.utils.config import get_config
|
|
12
|
+
|
|
13
|
+
router = APIRouter()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def require_permission(permission: str):
|
|
17
|
+
"""Create dependency enforcing a given permission."""
|
|
18
|
+
|
|
19
|
+
async def _verify(credentials: HTTPAuthorizationCredentials = Security(security)):
|
|
20
|
+
return await verify_api_token(credentials, required_permission=permission)
|
|
21
|
+
|
|
22
|
+
return _verify
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def get_project_manager() -> ProjectManager:
|
|
26
|
+
"""Get a project manager rooted at configured projects dir."""
|
|
27
|
+
config = get_config()
|
|
28
|
+
return ProjectManager(str(config.projects_dir))
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def get_global_store() -> SQLiteMetadataStore:
|
|
32
|
+
"""Metadata store for shared metrics."""
|
|
33
|
+
config = get_config()
|
|
34
|
+
return SQLiteMetadataStore(str(config.metadata_db))
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class MetricsLogRequest(BaseModel):
|
|
38
|
+
"""Payload for logging production model metrics."""
|
|
39
|
+
|
|
40
|
+
project: str = Field(..., description="Project identifier")
|
|
41
|
+
model_name: str = Field(..., description="Name of the model emitting metrics")
|
|
42
|
+
metrics: dict[str, float] = Field(..., description="Dictionary of metric_name -> value")
|
|
43
|
+
run_id: str | None = Field(None, description="Related run identifier (optional)")
|
|
44
|
+
environment: str | None = Field(None, description="Environment label (e.g., prod, staging)")
|
|
45
|
+
tags: dict[str, Any] | None = Field(default_factory=dict, description="Optional metadata tags")
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@router.post("/log")
|
|
49
|
+
async def log_model_metrics(
|
|
50
|
+
payload: MetricsLogRequest,
|
|
51
|
+
token_data: dict = Depends(require_permission("write")),
|
|
52
|
+
):
|
|
53
|
+
"""Log production metrics for a model.
|
|
54
|
+
|
|
55
|
+
Requires tokens with the `write` permission. Project-scoped tokens
|
|
56
|
+
may only submit metrics for their project.
|
|
57
|
+
"""
|
|
58
|
+
if token_data.get("project") and token_data["project"] != payload.project:
|
|
59
|
+
raise HTTPException(
|
|
60
|
+
status_code=403,
|
|
61
|
+
detail=f"Token is scoped to project '{token_data['project']}'",
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
if not payload.metrics:
|
|
65
|
+
raise HTTPException(status_code=400, detail="metrics dictionary cannot be empty")
|
|
66
|
+
|
|
67
|
+
numeric_metrics = {}
|
|
68
|
+
for name, value in payload.metrics.items():
|
|
69
|
+
try:
|
|
70
|
+
numeric_metrics[name] = float(value)
|
|
71
|
+
except (TypeError, ValueError):
|
|
72
|
+
raise HTTPException(
|
|
73
|
+
status_code=400,
|
|
74
|
+
detail=f"Metric '{name}' must be numeric.",
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
project_manager = get_project_manager()
|
|
78
|
+
project = project_manager.get_project(payload.project)
|
|
79
|
+
if not project:
|
|
80
|
+
project = project_manager.create_project(payload.project)
|
|
81
|
+
|
|
82
|
+
shared_store = get_global_store()
|
|
83
|
+
shared_store.log_model_metrics(
|
|
84
|
+
project=payload.project,
|
|
85
|
+
model_name=payload.model_name,
|
|
86
|
+
metrics=numeric_metrics,
|
|
87
|
+
run_id=payload.run_id,
|
|
88
|
+
environment=payload.environment,
|
|
89
|
+
tags=payload.tags,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
project.log_model_metrics(
|
|
93
|
+
model_name=payload.model_name,
|
|
94
|
+
metrics=numeric_metrics,
|
|
95
|
+
run_id=payload.run_id,
|
|
96
|
+
environment=payload.environment,
|
|
97
|
+
tags=payload.tags,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
return {
|
|
101
|
+
"project": payload.project,
|
|
102
|
+
"model_name": payload.model_name,
|
|
103
|
+
"logged_metrics": list(numeric_metrics.keys()),
|
|
104
|
+
"message": "Metrics logged successfully",
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
@router.get("")
|
|
109
|
+
async def list_model_metrics(
|
|
110
|
+
project: str | None = Query(default=None, description="Filter by project"),
|
|
111
|
+
model_name: str | None = Query(default=None, description="Filter by model"),
|
|
112
|
+
limit: int = Query(default=100, ge=1, le=500),
|
|
113
|
+
token_data: dict = Depends(require_permission("read")),
|
|
114
|
+
):
|
|
115
|
+
"""Retrieve the latest logged model metrics."""
|
|
116
|
+
if token_data.get("project"):
|
|
117
|
+
if project and token_data["project"] != project:
|
|
118
|
+
raise HTTPException(
|
|
119
|
+
status_code=403,
|
|
120
|
+
detail=f"Token is scoped to project '{token_data['project']}'",
|
|
121
|
+
)
|
|
122
|
+
project = token_data["project"]
|
|
123
|
+
|
|
124
|
+
store: SQLiteMetadataStore
|
|
125
|
+
if project:
|
|
126
|
+
project_manager = get_project_manager()
|
|
127
|
+
project_obj = project_manager.get_project(project)
|
|
128
|
+
if not project_obj:
|
|
129
|
+
raise HTTPException(status_code=404, detail=f"Project '{project}' not found")
|
|
130
|
+
store = project_obj.metadata_store
|
|
131
|
+
else:
|
|
132
|
+
store = get_global_store()
|
|
133
|
+
|
|
134
|
+
records = store.list_model_metrics(project=project, model_name=model_name, limit=limit)
|
|
135
|
+
return {"metrics": records}
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
@router.get("/observability/orchestrator")
|
|
139
|
+
async def get_orchestrator_metrics():
|
|
140
|
+
"""Get orchestrator-level performance metrics."""
|
|
141
|
+
from datetime import datetime, timedelta
|
|
142
|
+
import sqlite3
|
|
143
|
+
|
|
144
|
+
store = get_global_store()
|
|
145
|
+
conn = sqlite3.connect(store.db_path)
|
|
146
|
+
cursor = conn.cursor()
|
|
147
|
+
|
|
148
|
+
thirty_days_ago = (datetime.now() - timedelta(days=30)).isoformat()
|
|
149
|
+
cursor.execute("SELECT COUNT(*) FROM runs WHERE created_at >= ?", (thirty_days_ago,))
|
|
150
|
+
total_runs = cursor.fetchone()[0]
|
|
151
|
+
|
|
152
|
+
cursor.execute(
|
|
153
|
+
"SELECT status, COUNT(*) FROM runs WHERE created_at >= ? GROUP BY status",
|
|
154
|
+
(thirty_days_ago,),
|
|
155
|
+
)
|
|
156
|
+
status_counts = dict(cursor.fetchall())
|
|
157
|
+
|
|
158
|
+
cursor.execute(
|
|
159
|
+
"SELECT AVG(duration) FROM runs WHERE created_at >= ? AND duration IS NOT NULL",
|
|
160
|
+
(thirty_days_ago,),
|
|
161
|
+
)
|
|
162
|
+
avg_duration = cursor.fetchone()[0] or 0
|
|
163
|
+
|
|
164
|
+
conn.close()
|
|
165
|
+
|
|
166
|
+
completed = status_counts.get("completed", 0)
|
|
167
|
+
success_rate = completed / total_runs if total_runs > 0 else 0
|
|
168
|
+
|
|
169
|
+
return {
|
|
170
|
+
"total_runs": total_runs,
|
|
171
|
+
"success_rate": success_rate,
|
|
172
|
+
"avg_duration_seconds": avg_duration,
|
|
173
|
+
"status_distribution": status_counts,
|
|
174
|
+
"period_days": 30,
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
@router.get("/observability/cache")
|
|
179
|
+
async def get_cache_metrics():
|
|
180
|
+
"""Get cache performance metrics."""
|
|
181
|
+
from datetime import datetime, timedelta
|
|
182
|
+
import sqlite3
|
|
183
|
+
import json as json_lib
|
|
184
|
+
|
|
185
|
+
store = get_global_store()
|
|
186
|
+
conn = sqlite3.connect(store.db_path)
|
|
187
|
+
cursor = conn.cursor()
|
|
188
|
+
|
|
189
|
+
thirty_days_ago = (datetime.now() - timedelta(days=30)).isoformat()
|
|
190
|
+
cursor.execute("SELECT metadata FROM runs WHERE created_at >= ?", (thirty_days_ago,))
|
|
191
|
+
|
|
192
|
+
total_steps, cached_steps = 0, 0
|
|
193
|
+
for row in cursor.fetchall():
|
|
194
|
+
if not row[0]:
|
|
195
|
+
continue
|
|
196
|
+
try:
|
|
197
|
+
metadata = json_lib.loads(row[0])
|
|
198
|
+
for step_data in metadata.get("steps", {}).values():
|
|
199
|
+
total_steps += 1
|
|
200
|
+
if step_data.get("cached"):
|
|
201
|
+
cached_steps += 1
|
|
202
|
+
except Exception:
|
|
203
|
+
continue
|
|
204
|
+
|
|
205
|
+
conn.close()
|
|
206
|
+
cache_hit_rate = cached_steps / total_steps if total_steps > 0 else 0
|
|
207
|
+
|
|
208
|
+
return {
|
|
209
|
+
"total_steps": total_steps,
|
|
210
|
+
"cached_steps": cached_steps,
|
|
211
|
+
"cache_hit_rate": cache_hit_rate,
|
|
212
|
+
"period_days": 30,
|
|
213
|
+
}
|
|
@@ -1,25 +1,81 @@
|
|
|
1
1
|
from fastapi import APIRouter, HTTPException
|
|
2
2
|
from pydantic import BaseModel
|
|
3
3
|
from flowyml.storage.metadata import SQLiteMetadataStore
|
|
4
|
+
from flowyml.core.project import ProjectManager
|
|
4
5
|
from flowyml.utils.config import get_config
|
|
6
|
+
from typing import Optional
|
|
5
7
|
|
|
6
8
|
router = APIRouter()
|
|
7
9
|
|
|
8
10
|
|
|
9
11
|
def get_store():
|
|
10
12
|
get_config()
|
|
11
|
-
# Assuming default path or from config
|
|
12
|
-
# The SQLiteMetadataStore defaults to .flowyml/metadata.db which is what we want for now
|
|
13
13
|
return SQLiteMetadataStore()
|
|
14
14
|
|
|
15
15
|
|
|
16
|
+
def _iter_metadata_stores():
|
|
17
|
+
"""Yield tuples of (project_name, store) including global and project stores."""
|
|
18
|
+
stores = [(None, SQLiteMetadataStore())]
|
|
19
|
+
try:
|
|
20
|
+
manager = ProjectManager()
|
|
21
|
+
for project_meta in manager.list_projects():
|
|
22
|
+
name = project_meta.get("name")
|
|
23
|
+
if not name:
|
|
24
|
+
continue
|
|
25
|
+
project = manager.get_project(name)
|
|
26
|
+
if project:
|
|
27
|
+
stores.append((name, project.metadata_store))
|
|
28
|
+
except Exception:
|
|
29
|
+
pass
|
|
30
|
+
return stores
|
|
31
|
+
|
|
32
|
+
|
|
16
33
|
@router.get("/")
|
|
17
|
-
async def list_pipelines(project: str = None):
|
|
18
|
-
"""List all unique pipelines, optionally filtered by project."""
|
|
34
|
+
async def list_pipelines(project: Optional[str] = None, limit: int = 100):
|
|
35
|
+
"""List all unique pipelines with details, optionally filtered by project."""
|
|
19
36
|
try:
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
37
|
+
pipeline_map = {} # pipeline_name -> data
|
|
38
|
+
|
|
39
|
+
for project_name, store in _iter_metadata_stores():
|
|
40
|
+
# Skip other projects if filtering
|
|
41
|
+
if project and project_name and project != project_name:
|
|
42
|
+
continue
|
|
43
|
+
|
|
44
|
+
# Get pipeline names from this store
|
|
45
|
+
store_pipeline_names = store.list_pipelines()
|
|
46
|
+
|
|
47
|
+
for name in store_pipeline_names:
|
|
48
|
+
# Get runs for this pipeline
|
|
49
|
+
filters = {"pipeline_name": name}
|
|
50
|
+
runs = store.query(**filters)
|
|
51
|
+
|
|
52
|
+
if not runs:
|
|
53
|
+
continue
|
|
54
|
+
|
|
55
|
+
last_run = runs[0]
|
|
56
|
+
run_project = last_run.get("project") or project_name
|
|
57
|
+
|
|
58
|
+
# Skip if filtering by project and doesn't match
|
|
59
|
+
if project and run_project != project:
|
|
60
|
+
continue
|
|
61
|
+
|
|
62
|
+
# Use composite key if we already have this pipeline from another project
|
|
63
|
+
key = f"{name}:{run_project}" if run_project else name
|
|
64
|
+
|
|
65
|
+
if key not in pipeline_map:
|
|
66
|
+
pipeline_map[key] = {
|
|
67
|
+
"name": name,
|
|
68
|
+
"created": last_run.get("start_time"),
|
|
69
|
+
"version": last_run.get("git_sha", "latest")[:7] if last_run.get("git_sha") else "1.0",
|
|
70
|
+
"status": last_run.get("status", "unknown"),
|
|
71
|
+
"run_count": len(runs),
|
|
72
|
+
"last_run_id": last_run.get("run_id"),
|
|
73
|
+
"project": run_project,
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
# Return list of pipelines
|
|
77
|
+
enriched_pipelines = list(pipeline_map.values())[:limit]
|
|
78
|
+
return {"pipelines": enriched_pipelines}
|
|
23
79
|
except Exception as e:
|
|
24
80
|
raise HTTPException(status_code=500, detail=str(e))
|
|
25
81
|
|
|
@@ -1,17 +1,23 @@
|
|
|
1
|
-
from fastapi import APIRouter, HTTPException
|
|
1
|
+
from fastapi import APIRouter, HTTPException, Depends
|
|
2
2
|
from flowyml.core.project import ProjectManager
|
|
3
|
+
from flowyml.utils.config import get_config
|
|
3
4
|
from pydantic import BaseModel
|
|
4
5
|
|
|
5
6
|
router = APIRouter()
|
|
6
|
-
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def get_projects_manager() -> ProjectManager:
|
|
10
|
+
"""Instantiate a ProjectManager bound to the current config."""
|
|
11
|
+
config = get_config()
|
|
12
|
+
return ProjectManager(str(config.projects_dir))
|
|
7
13
|
|
|
8
14
|
|
|
9
15
|
@router.get("/")
|
|
10
|
-
async def list_projects():
|
|
16
|
+
async def list_projects(manager: ProjectManager = Depends(get_projects_manager)):
|
|
11
17
|
"""List all projects."""
|
|
12
18
|
try:
|
|
13
19
|
projects = manager.list_projects()
|
|
14
|
-
return projects
|
|
20
|
+
return {"projects": projects}
|
|
15
21
|
except Exception as e:
|
|
16
22
|
raise HTTPException(status_code=500, detail=str(e))
|
|
17
23
|
|
|
@@ -22,7 +28,7 @@ class ProjectCreate(BaseModel):
|
|
|
22
28
|
|
|
23
29
|
|
|
24
30
|
@router.post("/")
|
|
25
|
-
async def create_project(project: ProjectCreate):
|
|
31
|
+
async def create_project(project: ProjectCreate, manager: ProjectManager = Depends(get_projects_manager)):
|
|
26
32
|
"""Create a new project."""
|
|
27
33
|
created_project = manager.create_project(project.name, project.description)
|
|
28
34
|
return {
|
|
@@ -33,7 +39,7 @@ async def create_project(project: ProjectCreate):
|
|
|
33
39
|
|
|
34
40
|
|
|
35
41
|
@router.get("/{project_name}")
|
|
36
|
-
async def get_project(project_name: str):
|
|
42
|
+
async def get_project(project_name: str, manager: ProjectManager = Depends(get_projects_manager)):
|
|
37
43
|
"""Get project details."""
|
|
38
44
|
project = manager.get_project(project_name)
|
|
39
45
|
if not project:
|
|
@@ -53,6 +59,7 @@ async def get_project_runs(
|
|
|
53
59
|
project_name: str,
|
|
54
60
|
pipeline_name: str | None = None,
|
|
55
61
|
limit: int = 100,
|
|
62
|
+
manager: ProjectManager = Depends(get_projects_manager),
|
|
56
63
|
):
|
|
57
64
|
"""Get runs for a project."""
|
|
58
65
|
project = manager.get_project(project_name)
|
|
@@ -68,6 +75,7 @@ async def get_project_artifacts(
|
|
|
68
75
|
project_name: str,
|
|
69
76
|
artifact_type: str | None = None,
|
|
70
77
|
limit: int = 100,
|
|
78
|
+
manager: ProjectManager = Depends(get_projects_manager),
|
|
71
79
|
):
|
|
72
80
|
"""Get artifacts for a project."""
|
|
73
81
|
project = manager.get_project(project_name)
|
|
@@ -78,8 +86,26 @@ async def get_project_artifacts(
|
|
|
78
86
|
return artifacts
|
|
79
87
|
|
|
80
88
|
|
|
89
|
+
@router.get("/{project_name}/metrics")
|
|
90
|
+
async def get_project_metrics(
|
|
91
|
+
project_name: str,
|
|
92
|
+
model_name: str | None = None,
|
|
93
|
+
limit: int = 100,
|
|
94
|
+
manager: ProjectManager = Depends(get_projects_manager),
|
|
95
|
+
):
|
|
96
|
+
"""Get logged production metrics for a project."""
|
|
97
|
+
project = manager.get_project(project_name)
|
|
98
|
+
if not project:
|
|
99
|
+
raise HTTPException(status_code=404, detail="Project not found")
|
|
100
|
+
|
|
101
|
+
return {
|
|
102
|
+
"project": project_name,
|
|
103
|
+
"metrics": project.list_model_metrics(model_name=model_name, limit=limit),
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
|
|
81
107
|
@router.delete("/{project_name}")
|
|
82
|
-
async def delete_project(project_name: str):
|
|
108
|
+
async def delete_project(project_name: str, manager: ProjectManager = Depends(get_projects_manager)):
|
|
83
109
|
"""Delete a project."""
|
|
84
110
|
manager.delete_project(project_name, confirm=True)
|
|
85
111
|
return {"deleted": True}
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
from fastapi import APIRouter, HTTPException
|
|
2
2
|
from pydantic import BaseModel
|
|
3
3
|
from flowyml.storage.metadata import SQLiteMetadataStore
|
|
4
|
+
from flowyml.core.project import ProjectManager
|
|
5
|
+
from typing import Optional
|
|
6
|
+
import json
|
|
4
7
|
|
|
5
8
|
router = APIRouter()
|
|
6
9
|
|
|
@@ -9,17 +12,62 @@ def get_store():
|
|
|
9
12
|
return SQLiteMetadataStore()
|
|
10
13
|
|
|
11
14
|
|
|
15
|
+
def _iter_metadata_stores():
|
|
16
|
+
"""Yield tuples of (project_name, store) including global and project stores."""
|
|
17
|
+
stores: list[tuple[Optional[str], SQLiteMetadataStore]] = [(None, SQLiteMetadataStore())]
|
|
18
|
+
try:
|
|
19
|
+
manager = ProjectManager()
|
|
20
|
+
for project_meta in manager.list_projects():
|
|
21
|
+
name = project_meta.get("name")
|
|
22
|
+
if not name:
|
|
23
|
+
continue
|
|
24
|
+
project = manager.get_project(name)
|
|
25
|
+
if project:
|
|
26
|
+
stores.append((name, project.metadata_store))
|
|
27
|
+
except Exception:
|
|
28
|
+
pass
|
|
29
|
+
return stores
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _deduplicate_runs(runs):
|
|
33
|
+
seen = {}
|
|
34
|
+
for run, project_name in runs:
|
|
35
|
+
run_id = run.get("run_id") or f"{project_name}:{len(seen)}"
|
|
36
|
+
if run_id in seen:
|
|
37
|
+
continue
|
|
38
|
+
entry = dict(run)
|
|
39
|
+
if project_name and not entry.get("project"):
|
|
40
|
+
entry["project"] = project_name
|
|
41
|
+
seen[run_id] = entry
|
|
42
|
+
return list(seen.values())
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _sort_runs(runs):
|
|
46
|
+
def sort_key(run):
|
|
47
|
+
return run.get("start_time") or run.get("created_at") or ""
|
|
48
|
+
|
|
49
|
+
return sorted(runs, key=sort_key, reverse=True)
|
|
50
|
+
|
|
51
|
+
|
|
12
52
|
@router.get("/")
|
|
13
53
|
async def list_runs(limit: int = 20, project: str = None):
|
|
14
54
|
"""List all runs, optionally filtered by project."""
|
|
15
55
|
try:
|
|
16
|
-
|
|
17
|
-
|
|
56
|
+
combined = []
|
|
57
|
+
for project_name, store in _iter_metadata_stores():
|
|
58
|
+
# Skip other projects if filtering by project name
|
|
59
|
+
if project and project_name and project != project_name:
|
|
60
|
+
continue
|
|
61
|
+
store_runs = store.list_runs(limit=limit)
|
|
62
|
+
for run in store_runs:
|
|
63
|
+
combined.append((run, project_name))
|
|
64
|
+
|
|
65
|
+
runs = _deduplicate_runs(combined)
|
|
18
66
|
|
|
19
|
-
# Filter by project if specified
|
|
20
67
|
if project:
|
|
21
68
|
runs = [r for r in runs if r.get("project") == project]
|
|
22
69
|
|
|
70
|
+
runs = _sort_runs(runs)[:limit]
|
|
23
71
|
return {"runs": runs}
|
|
24
72
|
except Exception as e:
|
|
25
73
|
return {"runs": [], "error": str(e)}
|
|
@@ -28,8 +76,7 @@ async def list_runs(limit: int = 20, project: str = None):
|
|
|
28
76
|
@router.get("/{run_id}")
|
|
29
77
|
async def get_run(run_id: str):
|
|
30
78
|
"""Get details for a specific run."""
|
|
31
|
-
|
|
32
|
-
run = store.load_run(run_id)
|
|
79
|
+
run, _ = _find_run(run_id)
|
|
33
80
|
if not run:
|
|
34
81
|
raise HTTPException(status_code=404, detail="Run not found")
|
|
35
82
|
return run
|
|
@@ -38,7 +85,7 @@ async def get_run(run_id: str):
|
|
|
38
85
|
@router.get("/{run_id}/metrics")
|
|
39
86
|
async def get_run_metrics(run_id: str):
|
|
40
87
|
"""Get metrics for a specific run."""
|
|
41
|
-
store =
|
|
88
|
+
store = _find_store_for_run(run_id)
|
|
42
89
|
metrics = store.get_metrics(run_id)
|
|
43
90
|
return {"metrics": metrics}
|
|
44
91
|
|
|
@@ -46,7 +93,7 @@ async def get_run_metrics(run_id: str):
|
|
|
46
93
|
@router.get("/{run_id}/artifacts")
|
|
47
94
|
async def get_run_artifacts(run_id: str):
|
|
48
95
|
"""Get artifacts for a specific run."""
|
|
49
|
-
store =
|
|
96
|
+
store = _find_store_for_run(run_id)
|
|
50
97
|
artifacts = store.list_assets(run_id=run_id)
|
|
51
98
|
return {"artifacts": artifacts}
|
|
52
99
|
|
|
@@ -58,9 +105,104 @@ class ProjectUpdate(BaseModel):
|
|
|
58
105
|
@router.put("/{run_id}/project")
|
|
59
106
|
async def update_run_project(run_id: str, update: ProjectUpdate):
|
|
60
107
|
"""Update the project for a run."""
|
|
61
|
-
store =
|
|
108
|
+
store = _find_store_for_run(run_id)
|
|
62
109
|
try:
|
|
63
110
|
store.update_run_project(run_id, update.project_name)
|
|
64
111
|
return {"status": "success", "project": update.project_name}
|
|
65
112
|
except Exception as e:
|
|
66
113
|
raise HTTPException(status_code=500, detail=str(e))
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _find_run(run_id: str):
|
|
117
|
+
for project_name, store in _iter_metadata_stores():
|
|
118
|
+
run = store.load_run(run_id)
|
|
119
|
+
if run:
|
|
120
|
+
if project_name and not run.get("project"):
|
|
121
|
+
run["project"] = project_name
|
|
122
|
+
return run, store
|
|
123
|
+
return None, None
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _find_store_for_run(run_id: str) -> SQLiteMetadataStore:
|
|
127
|
+
_, store = _find_run(run_id)
|
|
128
|
+
if store:
|
|
129
|
+
return store
|
|
130
|
+
raise HTTPException(status_code=404, detail="Run not found")
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
@router.get("/{run_id}/cloud-status")
|
|
134
|
+
async def get_cloud_status(run_id: str):
|
|
135
|
+
"""Get real-time status from cloud orchestrator for remote runs.
|
|
136
|
+
|
|
137
|
+
Returns cloud provider status if the run is remote, otherwise returns
|
|
138
|
+
status from metadata store.
|
|
139
|
+
"""
|
|
140
|
+
run, store = _find_run(run_id)
|
|
141
|
+
if not run:
|
|
142
|
+
raise HTTPException(status_code=404, detail="Run not found")
|
|
143
|
+
|
|
144
|
+
# Get orchestrator info from run metadata
|
|
145
|
+
metadata = run.get("metadata", {})
|
|
146
|
+
if isinstance(metadata, str):
|
|
147
|
+
try:
|
|
148
|
+
metadata = json.loads(metadata)
|
|
149
|
+
except Exception:
|
|
150
|
+
metadata = {}
|
|
151
|
+
|
|
152
|
+
orchestrator_type = metadata.get("orchestrator_type", "local")
|
|
153
|
+
|
|
154
|
+
# If local run, just return metadata store status
|
|
155
|
+
if orchestrator_type == "local":
|
|
156
|
+
return {
|
|
157
|
+
"run_id": run_id,
|
|
158
|
+
"status": run.get("status", "unknown"),
|
|
159
|
+
"orchestrator_type": "local",
|
|
160
|
+
"is_remote": False,
|
|
161
|
+
"cloud_status": None,
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
# For remote runs, try to query cloud orchestrator
|
|
165
|
+
cloud_status = None
|
|
166
|
+
cloud_error = None
|
|
167
|
+
|
|
168
|
+
try:
|
|
169
|
+
# Import orchestrators dynamically to avoid errors if cloud SDKs aren't installed
|
|
170
|
+
from flowyml.utils.stack_config import load_active_stack
|
|
171
|
+
|
|
172
|
+
stack = load_active_stack()
|
|
173
|
+
if not stack or not stack.orchestrator:
|
|
174
|
+
cloud_error = "No active stack or orchestrator configured"
|
|
175
|
+
else:
|
|
176
|
+
orchestrator = stack.orchestrator
|
|
177
|
+
|
|
178
|
+
# Check if orchestrator has get_run_status method
|
|
179
|
+
if hasattr(orchestrator, "get_run_status"):
|
|
180
|
+
from flowyml.core.execution_status import ExecutionStatus
|
|
181
|
+
|
|
182
|
+
status = orchestrator.get_run_status(run_id)
|
|
183
|
+
|
|
184
|
+
# Convert ExecutionStatus to dict
|
|
185
|
+
if isinstance(status, ExecutionStatus):
|
|
186
|
+
cloud_status = {
|
|
187
|
+
"status": status.value,
|
|
188
|
+
"is_finished": status.is_finished,
|
|
189
|
+
"is_successful": status.is_successful,
|
|
190
|
+
}
|
|
191
|
+
else:
|
|
192
|
+
cloud_status = {"status": str(status)}
|
|
193
|
+
else:
|
|
194
|
+
cloud_error = f"Orchestrator {orchestrator_type} does not support status queries"
|
|
195
|
+
|
|
196
|
+
except ImportError as e:
|
|
197
|
+
cloud_error = f"Cloud SDK not available: {str(e)}"
|
|
198
|
+
except Exception as e:
|
|
199
|
+
cloud_error = f"Error querying cloud status: {str(e)}"
|
|
200
|
+
|
|
201
|
+
return {
|
|
202
|
+
"run_id": run_id,
|
|
203
|
+
"status": run.get("status", "unknown"),
|
|
204
|
+
"orchestrator_type": orchestrator_type,
|
|
205
|
+
"is_remote": True,
|
|
206
|
+
"cloud_status": cloud_status,
|
|
207
|
+
"cloud_error": cloud_error,
|
|
208
|
+
}
|