flowyml 1.3.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowyml/core/execution_status.py +1 -0
- flowyml/core/executor.py +175 -3
- flowyml/core/observability.py +7 -7
- flowyml/core/resources.py +12 -12
- flowyml/core/retry_policy.py +2 -2
- flowyml/core/scheduler.py +9 -9
- flowyml/core/scheduler_config.py +2 -3
- flowyml/core/submission_result.py +4 -4
- flowyml/stacks/bridge.py +9 -9
- flowyml/stacks/plugins.py +2 -2
- flowyml/stacks/registry.py +21 -0
- flowyml/storage/materializers/base.py +33 -0
- flowyml/storage/metadata.py +3 -1042
- flowyml/storage/remote.py +590 -0
- flowyml/storage/sql.py +951 -0
- flowyml/ui/backend/dependencies.py +28 -0
- flowyml/ui/backend/main.py +4 -79
- flowyml/ui/backend/routers/assets.py +170 -9
- flowyml/ui/backend/routers/client.py +6 -6
- flowyml/ui/backend/routers/execution.py +2 -2
- flowyml/ui/backend/routers/experiments.py +53 -6
- flowyml/ui/backend/routers/metrics.py +23 -68
- flowyml/ui/backend/routers/pipelines.py +19 -10
- flowyml/ui/backend/routers/runs.py +287 -9
- flowyml/ui/backend/routers/schedules.py +5 -21
- flowyml/ui/backend/routers/stats.py +14 -0
- flowyml/ui/backend/routers/traces.py +37 -53
- flowyml/ui/backend/routers/websocket.py +121 -0
- flowyml/ui/frontend/dist/assets/index-CBUXOWze.css +1 -0
- flowyml/ui/frontend/dist/assets/index-DF8dJaFL.js +629 -0
- flowyml/ui/frontend/dist/index.html +2 -2
- flowyml/ui/frontend/package-lock.json +289 -0
- flowyml/ui/frontend/package.json +1 -0
- flowyml/ui/frontend/src/app/compare/page.jsx +213 -0
- flowyml/ui/frontend/src/app/experiments/compare/page.jsx +289 -0
- flowyml/ui/frontend/src/app/experiments/page.jsx +61 -1
- flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +418 -203
- flowyml/ui/frontend/src/app/runs/page.jsx +64 -3
- flowyml/ui/frontend/src/app/settings/page.jsx +1 -1
- flowyml/ui/frontend/src/app/tokens/page.jsx +8 -6
- flowyml/ui/frontend/src/components/ArtifactViewer.jsx +159 -0
- flowyml/ui/frontend/src/components/NavigationTree.jsx +26 -9
- flowyml/ui/frontend/src/components/PipelineGraph.jsx +26 -24
- flowyml/ui/frontend/src/components/RunDetailsPanel.jsx +42 -14
- flowyml/ui/frontend/src/router/index.jsx +4 -0
- {flowyml-1.3.0.dist-info → flowyml-1.5.0.dist-info}/METADATA +3 -1
- {flowyml-1.3.0.dist-info → flowyml-1.5.0.dist-info}/RECORD +50 -42
- flowyml/ui/frontend/dist/assets/index-DcYwrn2j.css +0 -1
- flowyml/ui/frontend/dist/assets/index-Dlz_ygOL.js +0 -592
- {flowyml-1.3.0.dist-info → flowyml-1.5.0.dist-info}/WHEEL +0 -0
- {flowyml-1.3.0.dist-info → flowyml-1.5.0.dist-info}/entry_points.txt +0 -0
- {flowyml-1.3.0.dist-info → flowyml-1.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""Backend dependencies."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from flowyml.storage.sql import SQLMetadataStore
|
|
5
|
+
from flowyml.utils.config import get_config
|
|
6
|
+
|
|
7
|
+
_store = None
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def get_store() -> SQLMetadataStore:
|
|
11
|
+
"""Get the metadata store instance.
|
|
12
|
+
|
|
13
|
+
Uses FLOWYML_DATABASE_URL if set, otherwise defaults to local SQLite.
|
|
14
|
+
"""
|
|
15
|
+
global _store
|
|
16
|
+
if _store is None:
|
|
17
|
+
config = get_config()
|
|
18
|
+
db_url = os.environ.get("FLOWYML_DATABASE_URL")
|
|
19
|
+
|
|
20
|
+
# If no explicit URL, use the config's metadata_db path
|
|
21
|
+
if not db_url:
|
|
22
|
+
db_path = config.metadata_db
|
|
23
|
+
# Ensure it's a string path for SQLMetadataStore
|
|
24
|
+
_store = SQLMetadataStore(db_path=str(db_path))
|
|
25
|
+
else:
|
|
26
|
+
_store = SQLMetadataStore(db_url=db_url)
|
|
27
|
+
|
|
28
|
+
return _store
|
flowyml/ui/backend/main.py
CHANGED
|
@@ -23,6 +23,8 @@ from flowyml.ui.backend.routers import (
|
|
|
23
23
|
plugins,
|
|
24
24
|
metrics,
|
|
25
25
|
client,
|
|
26
|
+
stats,
|
|
27
|
+
websocket,
|
|
26
28
|
)
|
|
27
29
|
|
|
28
30
|
app = FastAPI(
|
|
@@ -75,85 +77,8 @@ app.include_router(execution.router, prefix="/api/execution", tags=["execution"]
|
|
|
75
77
|
app.include_router(metrics.router, prefix="/api/metrics", tags=["metrics"])
|
|
76
78
|
app.include_router(plugins.router, prefix="/api", tags=["plugins"])
|
|
77
79
|
app.include_router(client.router, prefix="/api/client", tags=["client"])
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
# Stats endpoint for dashboard
|
|
81
|
-
@app.get("/api/stats")
|
|
82
|
-
async def get_stats(project: str = None):
|
|
83
|
-
"""Get overall statistics for the dashboard, optionally filtered by project."""
|
|
84
|
-
try:
|
|
85
|
-
from flowyml.storage.metadata import SQLiteMetadataStore
|
|
86
|
-
|
|
87
|
-
store = SQLiteMetadataStore()
|
|
88
|
-
|
|
89
|
-
# Get base stats
|
|
90
|
-
stats = store.get_statistics()
|
|
91
|
-
|
|
92
|
-
# Get run status counts (not in get_statistics yet)
|
|
93
|
-
# We can add this to get_statistics later, but for now let's query efficiently
|
|
94
|
-
import sqlite3
|
|
95
|
-
|
|
96
|
-
conn = sqlite3.connect(store.db_path)
|
|
97
|
-
cursor = conn.cursor()
|
|
98
|
-
|
|
99
|
-
if project:
|
|
100
|
-
cursor.execute(
|
|
101
|
-
"SELECT COUNT(*) FROM runs WHERE project = ? AND status = 'completed'",
|
|
102
|
-
[project],
|
|
103
|
-
)
|
|
104
|
-
completed_runs = cursor.fetchone()[0]
|
|
105
|
-
|
|
106
|
-
cursor.execute(
|
|
107
|
-
"SELECT COUNT(*) FROM runs WHERE project = ? AND status = 'failed'",
|
|
108
|
-
[project],
|
|
109
|
-
)
|
|
110
|
-
failed_runs = cursor.fetchone()[0]
|
|
111
|
-
|
|
112
|
-
cursor.execute(
|
|
113
|
-
"SELECT AVG(duration) FROM runs WHERE project = ? AND duration IS NOT NULL",
|
|
114
|
-
[project],
|
|
115
|
-
)
|
|
116
|
-
avg_duration = cursor.fetchone()[0] or 0
|
|
117
|
-
|
|
118
|
-
cursor.execute(
|
|
119
|
-
"SELECT COUNT(*) FROM runs WHERE project = ?",
|
|
120
|
-
[project],
|
|
121
|
-
)
|
|
122
|
-
total_runs = cursor.fetchone()[0]
|
|
123
|
-
else:
|
|
124
|
-
cursor.execute("SELECT COUNT(*) FROM runs WHERE status = 'completed'")
|
|
125
|
-
completed_runs = cursor.fetchone()[0]
|
|
126
|
-
|
|
127
|
-
cursor.execute("SELECT COUNT(*) FROM runs WHERE status = 'failed'")
|
|
128
|
-
failed_runs = cursor.fetchone()[0]
|
|
129
|
-
|
|
130
|
-
cursor.execute("SELECT AVG(duration) FROM runs WHERE duration IS NOT NULL")
|
|
131
|
-
avg_duration = cursor.fetchone()[0] or 0
|
|
132
|
-
|
|
133
|
-
cursor.execute("SELECT COUNT(*) FROM runs")
|
|
134
|
-
total_runs = cursor.fetchone()[0]
|
|
135
|
-
|
|
136
|
-
conn.close()
|
|
137
|
-
|
|
138
|
-
return {
|
|
139
|
-
"runs": total_runs if project else stats.get("total_runs", 0),
|
|
140
|
-
"completed_runs": completed_runs,
|
|
141
|
-
"failed_runs": failed_runs,
|
|
142
|
-
"pipelines": stats.get("total_pipelines", 0), # TODO: filter by project
|
|
143
|
-
"artifacts": stats.get("total_artifacts", 0), # TODO: filter by project
|
|
144
|
-
"avg_duration": avg_duration,
|
|
145
|
-
}
|
|
146
|
-
except Exception as e:
|
|
147
|
-
# Return default stats if there's an error
|
|
148
|
-
return {
|
|
149
|
-
"runs": 0,
|
|
150
|
-
"completed_runs": 0,
|
|
151
|
-
"failed_runs": 0,
|
|
152
|
-
"pipelines": 0,
|
|
153
|
-
"artifacts": 0,
|
|
154
|
-
"avg_duration": 0,
|
|
155
|
-
"error": str(e),
|
|
156
|
-
}
|
|
80
|
+
app.include_router(stats.router, prefix="/api/stats", tags=["stats"])
|
|
81
|
+
app.include_router(websocket.router, tags=["websocket"])
|
|
157
82
|
|
|
158
83
|
|
|
159
84
|
# Static file serving for frontend
|
|
@@ -1,16 +1,20 @@
|
|
|
1
|
-
from fastapi import APIRouter, HTTPException
|
|
1
|
+
from fastapi import APIRouter, HTTPException, UploadFile, File
|
|
2
2
|
from fastapi.responses import FileResponse
|
|
3
|
-
from pydantic import BaseModel
|
|
3
|
+
from pydantic import BaseModel, Field
|
|
4
4
|
from flowyml.storage.metadata import SQLiteMetadataStore
|
|
5
5
|
from flowyml.core.project import ProjectManager
|
|
6
|
-
from typing import Optional
|
|
7
6
|
from pathlib import Path
|
|
7
|
+
from flowyml.ui.backend.dependencies import get_store
|
|
8
|
+
import shutil
|
|
9
|
+
import asyncio
|
|
10
|
+
import contextlib
|
|
8
11
|
|
|
9
12
|
router = APIRouter()
|
|
10
13
|
|
|
11
14
|
|
|
12
|
-
def
|
|
13
|
-
|
|
15
|
+
def _save_file_sync(src, dst):
|
|
16
|
+
with open(dst, "wb") as buffer:
|
|
17
|
+
shutil.copyfileobj(src, buffer)
|
|
14
18
|
|
|
15
19
|
|
|
16
20
|
def _iter_metadata_stores():
|
|
@@ -73,8 +77,129 @@ async def list_assets(limit: int = 50, asset_type: str = None, run_id: str = Non
|
|
|
73
77
|
return {"assets": [], "error": str(e)}
|
|
74
78
|
|
|
75
79
|
|
|
80
|
+
class AssetCreate(BaseModel):
|
|
81
|
+
artifact_id: str
|
|
82
|
+
name: str
|
|
83
|
+
asset_type: str = Field(..., alias="type")
|
|
84
|
+
run_id: str
|
|
85
|
+
step: str
|
|
86
|
+
project: str | None = None
|
|
87
|
+
metadata: dict = {}
|
|
88
|
+
value: str | None = None
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
@router.post("/")
|
|
92
|
+
async def create_asset(asset: AssetCreate):
|
|
93
|
+
"""Create or update an asset metadata."""
|
|
94
|
+
try:
|
|
95
|
+
store = get_store()
|
|
96
|
+
|
|
97
|
+
# Prepare metadata
|
|
98
|
+
metadata = asset.metadata.copy()
|
|
99
|
+
metadata.update(
|
|
100
|
+
{
|
|
101
|
+
"name": asset.name,
|
|
102
|
+
"type": asset.asset_type,
|
|
103
|
+
"run_id": asset.run_id,
|
|
104
|
+
"step": asset.step,
|
|
105
|
+
"project": asset.project,
|
|
106
|
+
"value": asset.value,
|
|
107
|
+
},
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
store.save_artifact(asset.artifact_id, metadata)
|
|
111
|
+
return {"status": "success", "artifact_id": asset.artifact_id}
|
|
112
|
+
except Exception as e:
|
|
113
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
@router.post("/{artifact_id}/upload")
|
|
117
|
+
async def upload_asset_content(artifact_id: str, file: UploadFile = File(...)):
|
|
118
|
+
"""Upload content for an artifact."""
|
|
119
|
+
try:
|
|
120
|
+
store = get_store()
|
|
121
|
+
|
|
122
|
+
# Get existing metadata to find path or create a new one
|
|
123
|
+
existing = store.load_artifact(artifact_id)
|
|
124
|
+
|
|
125
|
+
if not existing:
|
|
126
|
+
raise HTTPException(status_code=404, detail="Artifact metadata not found. Create metadata first.")
|
|
127
|
+
|
|
128
|
+
# Determine storage path
|
|
129
|
+
# We use the LocalArtifactStore logic here since the backend is running locally relative to itself
|
|
130
|
+
from flowyml.storage.artifacts import LocalArtifactStore
|
|
131
|
+
from flowyml.utils.config import get_config
|
|
132
|
+
|
|
133
|
+
config = get_config()
|
|
134
|
+
artifact_store = LocalArtifactStore(base_path=config.artifacts_dir)
|
|
135
|
+
|
|
136
|
+
# Construct a path if not present
|
|
137
|
+
if not existing.get("path"):
|
|
138
|
+
# Create a path structure: project/run_id/artifact_id/filename
|
|
139
|
+
project = existing.get("project", "default")
|
|
140
|
+
run_id = existing.get("run_id", "unknown")
|
|
141
|
+
filename = file.filename or "content"
|
|
142
|
+
rel_path = f"{project}/{run_id}/{artifact_id}/{filename}"
|
|
143
|
+
else:
|
|
144
|
+
rel_path = existing.get("path")
|
|
145
|
+
# If path is absolute, make it relative to artifacts dir if possible, or just use it
|
|
146
|
+
# But LocalArtifactStore expects relative paths usually, or handles absolute ones
|
|
147
|
+
|
|
148
|
+
# Save the file
|
|
149
|
+
full_path = artifact_store.base_path / rel_path
|
|
150
|
+
full_path.parent.mkdir(parents=True, exist_ok=True)
|
|
151
|
+
|
|
152
|
+
loop = asyncio.get_running_loop()
|
|
153
|
+
await loop.run_in_executor(None, _save_file_sync, file.file, full_path)
|
|
154
|
+
|
|
155
|
+
# Update metadata with path
|
|
156
|
+
existing["path"] = str(rel_path)
|
|
157
|
+
store.save_artifact(artifact_id, existing)
|
|
158
|
+
|
|
159
|
+
return {"status": "success", "path": str(rel_path)}
|
|
160
|
+
|
|
161
|
+
except HTTPException:
|
|
162
|
+
raise
|
|
163
|
+
except Exception as e:
|
|
164
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
@router.delete("/{artifact_id}")
|
|
168
|
+
async def delete_asset(artifact_id: str):
|
|
169
|
+
"""Delete an asset and its file."""
|
|
170
|
+
try:
|
|
171
|
+
store = get_store()
|
|
172
|
+
|
|
173
|
+
# Get metadata to find path
|
|
174
|
+
asset = store.load_artifact(artifact_id)
|
|
175
|
+
if not asset:
|
|
176
|
+
raise HTTPException(status_code=404, detail="Asset not found")
|
|
177
|
+
|
|
178
|
+
# Delete file if it exists locally (since backend is local to itself)
|
|
179
|
+
path = asset.get("path")
|
|
180
|
+
if path:
|
|
181
|
+
from flowyml.storage.artifacts import LocalArtifactStore
|
|
182
|
+
from flowyml.utils.config import get_config
|
|
183
|
+
|
|
184
|
+
config = get_config()
|
|
185
|
+
artifact_store = LocalArtifactStore(base_path=config.artifacts_dir)
|
|
186
|
+
|
|
187
|
+
with contextlib.suppress(Exception):
|
|
188
|
+
artifact_store.delete(path)
|
|
189
|
+
|
|
190
|
+
# Delete metadata
|
|
191
|
+
store.delete_artifact(artifact_id)
|
|
192
|
+
|
|
193
|
+
return {"status": "success", "artifact_id": artifact_id}
|
|
194
|
+
|
|
195
|
+
except HTTPException:
|
|
196
|
+
raise
|
|
197
|
+
except Exception as e:
|
|
198
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
199
|
+
|
|
200
|
+
|
|
76
201
|
@router.get("/stats")
|
|
77
|
-
async def get_asset_stats(project:
|
|
202
|
+
async def get_asset_stats(project: str | None = None):
|
|
78
203
|
"""Get statistics about assets for the dashboard."""
|
|
79
204
|
try:
|
|
80
205
|
combined_assets = []
|
|
@@ -129,7 +254,7 @@ async def get_asset_stats(project: Optional[str] = None):
|
|
|
129
254
|
|
|
130
255
|
|
|
131
256
|
@router.get("/search")
|
|
132
|
-
async def search_assets(q: str, limit: int = 50, project:
|
|
257
|
+
async def search_assets(q: str, limit: int = 50, project: str | None = None):
|
|
133
258
|
"""Search assets by name or properties."""
|
|
134
259
|
try:
|
|
135
260
|
combined_assets = []
|
|
@@ -170,8 +295,8 @@ async def search_assets(q: str, limit: int = 50, project: Optional[str] = None):
|
|
|
170
295
|
|
|
171
296
|
@router.get("/lineage")
|
|
172
297
|
async def get_asset_lineage(
|
|
173
|
-
asset_id:
|
|
174
|
-
project:
|
|
298
|
+
asset_id: str | None = None,
|
|
299
|
+
project: str | None = None,
|
|
175
300
|
depth: int = 3,
|
|
176
301
|
):
|
|
177
302
|
"""
|
|
@@ -351,6 +476,42 @@ async def download_asset(artifact_id: str):
|
|
|
351
476
|
)
|
|
352
477
|
|
|
353
478
|
|
|
479
|
+
@router.get("/{artifact_id}/content")
|
|
480
|
+
async def get_asset_content(artifact_id: str):
|
|
481
|
+
"""Get the artifact content for inline viewing."""
|
|
482
|
+
import mimetypes
|
|
483
|
+
|
|
484
|
+
asset, _ = _find_asset_with_store(artifact_id)
|
|
485
|
+
if not asset:
|
|
486
|
+
raise HTTPException(status_code=404, detail="Asset not found")
|
|
487
|
+
|
|
488
|
+
artifact_path = asset.get("path")
|
|
489
|
+
if not artifact_path:
|
|
490
|
+
raise HTTPException(status_code=404, detail="Artifact path not available")
|
|
491
|
+
|
|
492
|
+
# Handle relative paths for local store
|
|
493
|
+
from flowyml.utils.config import get_config
|
|
494
|
+
|
|
495
|
+
config = get_config()
|
|
496
|
+
|
|
497
|
+
file_path = Path(artifact_path)
|
|
498
|
+
if not file_path.is_absolute():
|
|
499
|
+
file_path = config.artifacts_dir / file_path
|
|
500
|
+
|
|
501
|
+
if not file_path.exists():
|
|
502
|
+
raise HTTPException(status_code=404, detail="Artifact file not found on disk")
|
|
503
|
+
|
|
504
|
+
# Guess mime type
|
|
505
|
+
mime_type, _ = mimetypes.guess_type(file_path.name)
|
|
506
|
+
if not mime_type:
|
|
507
|
+
mime_type = "text/plain" # Default fallback
|
|
508
|
+
|
|
509
|
+
return FileResponse(
|
|
510
|
+
path=file_path,
|
|
511
|
+
media_type=mime_type,
|
|
512
|
+
)
|
|
513
|
+
|
|
514
|
+
|
|
354
515
|
class ProjectUpdate(BaseModel):
|
|
355
516
|
project_name: str
|
|
356
517
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from fastapi import APIRouter, Request
|
|
2
2
|
from pydantic import BaseModel
|
|
3
|
-
from typing import Any
|
|
3
|
+
from typing import Any
|
|
4
4
|
from flowyml.monitoring.alerts import alert_manager, AlertLevel
|
|
5
5
|
|
|
6
6
|
router = APIRouter()
|
|
@@ -8,11 +8,11 @@ router = APIRouter()
|
|
|
8
8
|
|
|
9
9
|
class ClientError(BaseModel):
|
|
10
10
|
message: str
|
|
11
|
-
stack:
|
|
12
|
-
component_stack:
|
|
13
|
-
url:
|
|
14
|
-
user_agent:
|
|
15
|
-
additional_info:
|
|
11
|
+
stack: str | None = None
|
|
12
|
+
component_stack: str | None = None
|
|
13
|
+
url: str | None = None
|
|
14
|
+
user_agent: str | None = None
|
|
15
|
+
additional_info: dict[str, Any] | None = None
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
@router.post("/errors")
|
|
@@ -97,10 +97,10 @@ async def execute_pipeline(
|
|
|
97
97
|
run_kwargs = request.parameters.copy()
|
|
98
98
|
|
|
99
99
|
if request.retry_count > 0:
|
|
100
|
-
from flowyml.core.
|
|
100
|
+
from flowyml.core.retry_policy import OrchestratorRetryPolicy
|
|
101
101
|
|
|
102
102
|
run_kwargs["retry_policy"] = OrchestratorRetryPolicy(
|
|
103
|
-
|
|
103
|
+
max_attempts=min(request.retry_count, 5), # Cap at 5
|
|
104
104
|
)
|
|
105
105
|
|
|
106
106
|
result = pipeline.run(**run_kwargs)
|
|
@@ -1,15 +1,12 @@
|
|
|
1
1
|
from fastapi import APIRouter, HTTPException
|
|
2
2
|
from flowyml.storage.metadata import SQLiteMetadataStore
|
|
3
3
|
from flowyml.core.project import ProjectManager
|
|
4
|
-
from
|
|
4
|
+
from flowyml.ui.backend.dependencies import get_store
|
|
5
|
+
from pydantic import BaseModel
|
|
5
6
|
|
|
6
7
|
router = APIRouter()
|
|
7
8
|
|
|
8
9
|
|
|
9
|
-
def get_store():
|
|
10
|
-
return SQLiteMetadataStore()
|
|
11
|
-
|
|
12
|
-
|
|
13
10
|
def _iter_metadata_stores():
|
|
14
11
|
"""Yield tuples of (project_name, store) including global and project stores."""
|
|
15
12
|
stores = [(None, SQLiteMetadataStore())]
|
|
@@ -28,7 +25,7 @@ def _iter_metadata_stores():
|
|
|
28
25
|
|
|
29
26
|
|
|
30
27
|
@router.get("/")
|
|
31
|
-
async def list_experiments(project:
|
|
28
|
+
async def list_experiments(project: str | None = None):
|
|
32
29
|
"""List all experiments, optionally filtered by project."""
|
|
33
30
|
try:
|
|
34
31
|
combined_experiments = []
|
|
@@ -83,3 +80,53 @@ async def update_experiment_project(experiment_name: str, project_update: dict):
|
|
|
83
80
|
return {"message": f"Updated experiment {experiment_name} to project {project_name}"}
|
|
84
81
|
except Exception as e:
|
|
85
82
|
raise HTTPException(status_code=500, detail=str(e))
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class ExperimentCreate(BaseModel):
|
|
86
|
+
experiment_id: str
|
|
87
|
+
name: str
|
|
88
|
+
description: str = ""
|
|
89
|
+
tags: dict = {}
|
|
90
|
+
project: str | None = None
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
@router.post("/")
|
|
94
|
+
async def create_experiment(experiment: ExperimentCreate):
|
|
95
|
+
"""Create or update an experiment."""
|
|
96
|
+
try:
|
|
97
|
+
store = get_store()
|
|
98
|
+
store.save_experiment(
|
|
99
|
+
experiment_id=experiment.experiment_id,
|
|
100
|
+
name=experiment.name,
|
|
101
|
+
description=experiment.description,
|
|
102
|
+
tags=experiment.tags,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
if experiment.project:
|
|
106
|
+
store.update_experiment_project(experiment.name, experiment.project)
|
|
107
|
+
|
|
108
|
+
return {"status": "success", "experiment_id": experiment.experiment_id}
|
|
109
|
+
except Exception as e:
|
|
110
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class ExperimentRunLog(BaseModel):
|
|
114
|
+
run_id: str
|
|
115
|
+
metrics: dict | None = None
|
|
116
|
+
parameters: dict | None = None
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
@router.post("/{experiment_id}/runs")
|
|
120
|
+
async def log_experiment_run(experiment_id: str, log: ExperimentRunLog):
|
|
121
|
+
"""Log a run to an experiment."""
|
|
122
|
+
try:
|
|
123
|
+
store = get_store()
|
|
124
|
+
store.log_experiment_run(
|
|
125
|
+
experiment_id=experiment_id,
|
|
126
|
+
run_id=log.run_id,
|
|
127
|
+
metrics=log.metrics,
|
|
128
|
+
parameters=log.parameters,
|
|
129
|
+
)
|
|
130
|
+
return {"status": "success"}
|
|
131
|
+
except Exception as e:
|
|
132
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
@@ -6,6 +6,7 @@ from pydantic import BaseModel, Field
|
|
|
6
6
|
from typing import Any
|
|
7
7
|
|
|
8
8
|
from flowyml.ui.backend.auth import verify_api_token, security
|
|
9
|
+
from flowyml.ui.backend.dependencies import get_store
|
|
9
10
|
from flowyml.storage.metadata import SQLiteMetadataStore
|
|
10
11
|
from flowyml.core.project import ProjectManager
|
|
11
12
|
from flowyml.utils.config import get_config
|
|
@@ -45,6 +46,24 @@ class MetricsLogRequest(BaseModel):
|
|
|
45
46
|
tags: dict[str, Any] | None = Field(default_factory=dict, description="Optional metadata tags")
|
|
46
47
|
|
|
47
48
|
|
|
49
|
+
class MetricLog(BaseModel):
|
|
50
|
+
run_id: str
|
|
51
|
+
name: str
|
|
52
|
+
value: float
|
|
53
|
+
step: int = 0
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@router.post("/")
|
|
57
|
+
async def log_metric(metric: MetricLog):
|
|
58
|
+
"""Log a single metric."""
|
|
59
|
+
try:
|
|
60
|
+
store = get_global_store()
|
|
61
|
+
store.save_metric(metric.run_id, metric.name, metric.value, metric.step)
|
|
62
|
+
return {"status": "success"}
|
|
63
|
+
except Exception as e:
|
|
64
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
65
|
+
|
|
66
|
+
|
|
48
67
|
@router.post("/log")
|
|
49
68
|
async def log_model_metrics(
|
|
50
69
|
payload: MetricsLogRequest,
|
|
@@ -138,76 +157,12 @@ async def list_model_metrics(
|
|
|
138
157
|
@router.get("/observability/orchestrator")
|
|
139
158
|
async def get_orchestrator_metrics():
|
|
140
159
|
"""Get orchestrator-level performance metrics."""
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
store = get_global_store()
|
|
145
|
-
conn = sqlite3.connect(store.db_path)
|
|
146
|
-
cursor = conn.cursor()
|
|
147
|
-
|
|
148
|
-
thirty_days_ago = (datetime.now() - timedelta(days=30)).isoformat()
|
|
149
|
-
cursor.execute("SELECT COUNT(*) FROM runs WHERE created_at >= ?", (thirty_days_ago,))
|
|
150
|
-
total_runs = cursor.fetchone()[0]
|
|
151
|
-
|
|
152
|
-
cursor.execute(
|
|
153
|
-
"SELECT status, COUNT(*) FROM runs WHERE created_at >= ? GROUP BY status",
|
|
154
|
-
(thirty_days_ago,),
|
|
155
|
-
)
|
|
156
|
-
status_counts = dict(cursor.fetchall())
|
|
157
|
-
|
|
158
|
-
cursor.execute(
|
|
159
|
-
"SELECT AVG(duration) FROM runs WHERE created_at >= ? AND duration IS NOT NULL",
|
|
160
|
-
(thirty_days_ago,),
|
|
161
|
-
)
|
|
162
|
-
avg_duration = cursor.fetchone()[0] or 0
|
|
163
|
-
|
|
164
|
-
conn.close()
|
|
165
|
-
|
|
166
|
-
completed = status_counts.get("completed", 0)
|
|
167
|
-
success_rate = completed / total_runs if total_runs > 0 else 0
|
|
168
|
-
|
|
169
|
-
return {
|
|
170
|
-
"total_runs": total_runs,
|
|
171
|
-
"success_rate": success_rate,
|
|
172
|
-
"avg_duration_seconds": avg_duration,
|
|
173
|
-
"status_distribution": status_counts,
|
|
174
|
-
"period_days": 30,
|
|
175
|
-
}
|
|
160
|
+
store = get_store()
|
|
161
|
+
return store.get_orchestrator_metrics(days=30)
|
|
176
162
|
|
|
177
163
|
|
|
178
164
|
@router.get("/observability/cache")
|
|
179
165
|
async def get_cache_metrics():
|
|
180
166
|
"""Get cache performance metrics."""
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
import json as json_lib
|
|
184
|
-
|
|
185
|
-
store = get_global_store()
|
|
186
|
-
conn = sqlite3.connect(store.db_path)
|
|
187
|
-
cursor = conn.cursor()
|
|
188
|
-
|
|
189
|
-
thirty_days_ago = (datetime.now() - timedelta(days=30)).isoformat()
|
|
190
|
-
cursor.execute("SELECT metadata FROM runs WHERE created_at >= ?", (thirty_days_ago,))
|
|
191
|
-
|
|
192
|
-
total_steps, cached_steps = 0, 0
|
|
193
|
-
for row in cursor.fetchall():
|
|
194
|
-
if not row[0]:
|
|
195
|
-
continue
|
|
196
|
-
try:
|
|
197
|
-
metadata = json_lib.loads(row[0])
|
|
198
|
-
for step_data in metadata.get("steps", {}).values():
|
|
199
|
-
total_steps += 1
|
|
200
|
-
if step_data.get("cached"):
|
|
201
|
-
cached_steps += 1
|
|
202
|
-
except Exception:
|
|
203
|
-
continue
|
|
204
|
-
|
|
205
|
-
conn.close()
|
|
206
|
-
cache_hit_rate = cached_steps / total_steps if total_steps > 0 else 0
|
|
207
|
-
|
|
208
|
-
return {
|
|
209
|
-
"total_steps": total_steps,
|
|
210
|
-
"cached_steps": cached_steps,
|
|
211
|
-
"cache_hit_rate": cache_hit_rate,
|
|
212
|
-
"period_days": 30,
|
|
213
|
-
}
|
|
167
|
+
store = get_store()
|
|
168
|
+
return store.get_cache_metrics(days=30)
|
|
@@ -1,21 +1,14 @@
|
|
|
1
1
|
from fastapi import APIRouter, HTTPException
|
|
2
2
|
from pydantic import BaseModel
|
|
3
|
-
from flowyml.
|
|
3
|
+
from flowyml.ui.backend.dependencies import get_store
|
|
4
4
|
from flowyml.core.project import ProjectManager
|
|
5
|
-
from flowyml.utils.config import get_config
|
|
6
|
-
from typing import Optional
|
|
7
5
|
|
|
8
6
|
router = APIRouter()
|
|
9
7
|
|
|
10
8
|
|
|
11
|
-
def get_store():
|
|
12
|
-
get_config()
|
|
13
|
-
return SQLiteMetadataStore()
|
|
14
|
-
|
|
15
|
-
|
|
16
9
|
def _iter_metadata_stores():
|
|
17
10
|
"""Yield tuples of (project_name, store) including global and project stores."""
|
|
18
|
-
stores = [(None,
|
|
11
|
+
stores = [(None, get_store())]
|
|
19
12
|
try:
|
|
20
13
|
manager = ProjectManager()
|
|
21
14
|
for project_meta in manager.list_projects():
|
|
@@ -31,7 +24,7 @@ def _iter_metadata_stores():
|
|
|
31
24
|
|
|
32
25
|
|
|
33
26
|
@router.get("/")
|
|
34
|
-
async def list_pipelines(project:
|
|
27
|
+
async def list_pipelines(project: str | None = None, limit: int = 100):
|
|
35
28
|
"""List all unique pipelines with details, optionally filtered by project."""
|
|
36
29
|
try:
|
|
37
30
|
pipeline_map = {} # pipeline_name -> data
|
|
@@ -164,3 +157,19 @@ async def update_pipeline_project(pipeline_name: str, update: ProjectUpdate):
|
|
|
164
157
|
return {"status": "success", "project": update.project_name}
|
|
165
158
|
except Exception as e:
|
|
166
159
|
raise HTTPException(status_code=500, detail=str(e))
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
class PipelineDefinitionCreate(BaseModel):
|
|
163
|
+
pipeline_name: str
|
|
164
|
+
definition: dict
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
@router.post("/")
|
|
168
|
+
async def save_pipeline_definition(data: PipelineDefinitionCreate):
|
|
169
|
+
"""Save a pipeline definition."""
|
|
170
|
+
try:
|
|
171
|
+
store = get_store()
|
|
172
|
+
store.save_pipeline_definition(data.pipeline_name, data.definition)
|
|
173
|
+
return {"status": "success", "pipeline_name": data.pipeline_name}
|
|
174
|
+
except Exception as e:
|
|
175
|
+
raise HTTPException(status_code=500, detail=str(e))
|