flowyml 1.7.1__py3-none-any.whl → 1.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowyml/assets/base.py +15 -0
- flowyml/assets/dataset.py +570 -17
- flowyml/assets/metrics.py +5 -0
- flowyml/assets/model.py +1052 -15
- flowyml/cli/main.py +709 -0
- flowyml/cli/stack_cli.py +138 -25
- flowyml/core/__init__.py +17 -0
- flowyml/core/executor.py +231 -37
- flowyml/core/image_builder.py +129 -0
- flowyml/core/log_streamer.py +227 -0
- flowyml/core/orchestrator.py +59 -4
- flowyml/core/pipeline.py +65 -13
- flowyml/core/routing.py +558 -0
- flowyml/core/scheduler.py +88 -5
- flowyml/core/step.py +9 -1
- flowyml/core/step_grouping.py +49 -35
- flowyml/core/types.py +407 -0
- flowyml/integrations/keras.py +247 -82
- flowyml/monitoring/alerts.py +10 -0
- flowyml/monitoring/notifications.py +104 -25
- flowyml/monitoring/slack_blocks.py +323 -0
- flowyml/plugins/__init__.py +251 -0
- flowyml/plugins/alerters/__init__.py +1 -0
- flowyml/plugins/alerters/slack.py +168 -0
- flowyml/plugins/base.py +752 -0
- flowyml/plugins/config.py +478 -0
- flowyml/plugins/deployers/__init__.py +22 -0
- flowyml/plugins/deployers/gcp_cloud_run.py +200 -0
- flowyml/plugins/deployers/sagemaker.py +306 -0
- flowyml/plugins/deployers/vertex.py +290 -0
- flowyml/plugins/integration.py +369 -0
- flowyml/plugins/manager.py +510 -0
- flowyml/plugins/model_registries/__init__.py +22 -0
- flowyml/plugins/model_registries/mlflow.py +159 -0
- flowyml/plugins/model_registries/sagemaker.py +489 -0
- flowyml/plugins/model_registries/vertex.py +386 -0
- flowyml/plugins/orchestrators/__init__.py +13 -0
- flowyml/plugins/orchestrators/sagemaker.py +443 -0
- flowyml/plugins/orchestrators/vertex_ai.py +461 -0
- flowyml/plugins/registries/__init__.py +13 -0
- flowyml/plugins/registries/ecr.py +321 -0
- flowyml/plugins/registries/gcr.py +313 -0
- flowyml/plugins/registry.py +454 -0
- flowyml/plugins/stack.py +494 -0
- flowyml/plugins/stack_config.py +537 -0
- flowyml/plugins/stores/__init__.py +13 -0
- flowyml/plugins/stores/gcs.py +460 -0
- flowyml/plugins/stores/s3.py +453 -0
- flowyml/plugins/trackers/__init__.py +11 -0
- flowyml/plugins/trackers/mlflow.py +316 -0
- flowyml/plugins/validators/__init__.py +3 -0
- flowyml/plugins/validators/deepchecks.py +119 -0
- flowyml/registry/__init__.py +2 -1
- flowyml/registry/model_environment.py +109 -0
- flowyml/registry/model_registry.py +241 -96
- flowyml/serving/__init__.py +17 -0
- flowyml/serving/model_server.py +628 -0
- flowyml/stacks/__init__.py +60 -0
- flowyml/stacks/aws.py +93 -0
- flowyml/stacks/base.py +62 -0
- flowyml/stacks/components.py +12 -0
- flowyml/stacks/gcp.py +44 -9
- flowyml/stacks/plugins.py +115 -0
- flowyml/stacks/registry.py +2 -1
- flowyml/storage/sql.py +401 -12
- flowyml/tracking/experiment.py +8 -5
- flowyml/ui/backend/Dockerfile +87 -16
- flowyml/ui/backend/auth.py +12 -2
- flowyml/ui/backend/main.py +149 -5
- flowyml/ui/backend/routers/ai_context.py +226 -0
- flowyml/ui/backend/routers/assets.py +23 -4
- flowyml/ui/backend/routers/auth.py +96 -0
- flowyml/ui/backend/routers/deployments.py +660 -0
- flowyml/ui/backend/routers/model_explorer.py +597 -0
- flowyml/ui/backend/routers/plugins.py +103 -51
- flowyml/ui/backend/routers/projects.py +91 -8
- flowyml/ui/backend/routers/runs.py +132 -1
- flowyml/ui/backend/routers/schedules.py +54 -29
- flowyml/ui/backend/routers/templates.py +319 -0
- flowyml/ui/backend/routers/websocket.py +2 -2
- flowyml/ui/frontend/Dockerfile +55 -6
- flowyml/ui/frontend/dist/assets/index-B5AsPTSz.css +1 -0
- flowyml/ui/frontend/dist/assets/index-dFbZ8wD8.js +753 -0
- flowyml/ui/frontend/dist/index.html +2 -2
- flowyml/ui/frontend/dist/logo.png +0 -0
- flowyml/ui/frontend/nginx.conf +65 -4
- flowyml/ui/frontend/package-lock.json +1415 -74
- flowyml/ui/frontend/package.json +4 -0
- flowyml/ui/frontend/public/logo.png +0 -0
- flowyml/ui/frontend/src/App.jsx +10 -7
- flowyml/ui/frontend/src/app/assets/page.jsx +890 -321
- flowyml/ui/frontend/src/app/auth/Login.jsx +90 -0
- flowyml/ui/frontend/src/app/dashboard/page.jsx +8 -8
- flowyml/ui/frontend/src/app/deployments/page.jsx +786 -0
- flowyml/ui/frontend/src/app/model-explorer/page.jsx +1031 -0
- flowyml/ui/frontend/src/app/pipelines/page.jsx +12 -2
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectExperimentsList.jsx +19 -6
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectMetricsPanel.jsx +1 -1
- flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +601 -101
- flowyml/ui/frontend/src/app/runs/page.jsx +8 -2
- flowyml/ui/frontend/src/app/settings/page.jsx +267 -253
- flowyml/ui/frontend/src/components/ArtifactViewer.jsx +62 -2
- flowyml/ui/frontend/src/components/AssetDetailsPanel.jsx +424 -29
- flowyml/ui/frontend/src/components/AssetTreeHierarchy.jsx +119 -11
- flowyml/ui/frontend/src/components/DatasetViewer.jsx +753 -0
- flowyml/ui/frontend/src/components/Layout.jsx +6 -0
- flowyml/ui/frontend/src/components/PipelineGraph.jsx +79 -29
- flowyml/ui/frontend/src/components/RunDetailsPanel.jsx +36 -6
- flowyml/ui/frontend/src/components/RunMetaPanel.jsx +113 -0
- flowyml/ui/frontend/src/components/TrainingHistoryChart.jsx +514 -0
- flowyml/ui/frontend/src/components/TrainingMetricsPanel.jsx +175 -0
- flowyml/ui/frontend/src/components/ai/AIAssistantButton.jsx +71 -0
- flowyml/ui/frontend/src/components/ai/AIAssistantPanel.jsx +420 -0
- flowyml/ui/frontend/src/components/header/Header.jsx +22 -0
- flowyml/ui/frontend/src/components/plugins/PluginManager.jsx +4 -4
- flowyml/ui/frontend/src/components/plugins/{ZenMLIntegration.jsx → StackImport.jsx} +38 -12
- flowyml/ui/frontend/src/components/sidebar/Sidebar.jsx +36 -13
- flowyml/ui/frontend/src/contexts/AIAssistantContext.jsx +245 -0
- flowyml/ui/frontend/src/contexts/AuthContext.jsx +108 -0
- flowyml/ui/frontend/src/hooks/useAIContext.js +156 -0
- flowyml/ui/frontend/src/hooks/useWebGPU.js +54 -0
- flowyml/ui/frontend/src/layouts/MainLayout.jsx +6 -0
- flowyml/ui/frontend/src/router/index.jsx +47 -20
- flowyml/ui/frontend/src/services/pluginService.js +3 -1
- flowyml/ui/server_manager.py +5 -5
- flowyml/ui/utils.py +157 -39
- flowyml/utils/config.py +37 -15
- flowyml/utils/model_introspection.py +123 -0
- flowyml/utils/observability.py +30 -0
- flowyml-1.8.0.dist-info/METADATA +174 -0
- {flowyml-1.7.1.dist-info → flowyml-1.8.0.dist-info}/RECORD +134 -73
- {flowyml-1.7.1.dist-info → flowyml-1.8.0.dist-info}/WHEEL +1 -1
- flowyml/ui/frontend/dist/assets/index-BqDQvp63.js +0 -630
- flowyml/ui/frontend/dist/assets/index-By4trVyv.css +0 -1
- flowyml-1.7.1.dist-info/METADATA +0 -477
- {flowyml-1.7.1.dist-info → flowyml-1.8.0.dist-info}/entry_points.txt +0 -0
- {flowyml-1.7.1.dist-info → flowyml-1.8.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -34,7 +34,7 @@ class ImportStackRequest(BaseModel):
|
|
|
34
34
|
|
|
35
35
|
@router.get("/available", response_model=list[PluginInfo])
|
|
36
36
|
async def get_available_plugins():
|
|
37
|
-
"""Get list of available plugins."""
|
|
37
|
+
"""Get list of available FlowyML plugins."""
|
|
38
38
|
import importlib.metadata
|
|
39
39
|
|
|
40
40
|
# Helper to check if package is installed
|
|
@@ -45,51 +45,95 @@ async def get_available_plugins():
|
|
|
45
45
|
except importlib.metadata.PackageNotFoundError:
|
|
46
46
|
return False
|
|
47
47
|
|
|
48
|
-
#
|
|
48
|
+
# FlowyML Native Plugins
|
|
49
49
|
plugins = [
|
|
50
50
|
PluginInfo(
|
|
51
|
-
plugin_id="
|
|
52
|
-
name="
|
|
53
|
-
version="
|
|
54
|
-
author="
|
|
55
|
-
description="
|
|
56
|
-
downloads="
|
|
57
|
-
stars="
|
|
58
|
-
tags=["orchestrator", "
|
|
59
|
-
installed=is_installed("
|
|
51
|
+
plugin_id="flowyml-gcp",
|
|
52
|
+
name="FlowyML GCP",
|
|
53
|
+
version="1.8.0",
|
|
54
|
+
author="FlowyML",
|
|
55
|
+
description="Google Cloud Platform integration: Vertex AI orchestrator, GCS artifact store, and Cloud Run deployer.",
|
|
56
|
+
downloads="5.2k",
|
|
57
|
+
stars="180",
|
|
58
|
+
tags=["orchestrator", "artifact-store", "gcp", "vertex-ai"],
|
|
59
|
+
installed=is_installed("google-cloud-aiplatform"),
|
|
60
60
|
),
|
|
61
61
|
PluginInfo(
|
|
62
|
-
plugin_id="
|
|
63
|
-
name="
|
|
64
|
-
version="
|
|
65
|
-
author="
|
|
66
|
-
description="
|
|
62
|
+
plugin_id="flowyml-aws",
|
|
63
|
+
name="FlowyML AWS",
|
|
64
|
+
version="1.8.0",
|
|
65
|
+
author="FlowyML",
|
|
66
|
+
description="AWS integration: SageMaker orchestrator, S3 artifact store, and ECR container registry.",
|
|
67
|
+
downloads="4.8k",
|
|
68
|
+
stars="165",
|
|
69
|
+
tags=["orchestrator", "artifact-store", "aws", "sagemaker"],
|
|
70
|
+
installed=is_installed("boto3"),
|
|
71
|
+
),
|
|
72
|
+
PluginInfo(
|
|
73
|
+
plugin_id="flowyml-kubernetes",
|
|
74
|
+
name="FlowyML Kubernetes",
|
|
75
|
+
version="1.8.0",
|
|
76
|
+
author="FlowyML",
|
|
77
|
+
description="Kubernetes orchestrator for running pipelines on K8s clusters with auto-scaling.",
|
|
78
|
+
downloads="3.5k",
|
|
79
|
+
stars="145",
|
|
80
|
+
tags=["orchestrator", "kubernetes", "container"],
|
|
81
|
+
installed=is_installed("kubernetes"),
|
|
82
|
+
),
|
|
83
|
+
PluginInfo(
|
|
84
|
+
plugin_id="flowyml-mlflow",
|
|
85
|
+
name="FlowyML MLflow",
|
|
86
|
+
version="1.8.0",
|
|
87
|
+
author="FlowyML",
|
|
88
|
+
description="MLflow integration for experiment tracking, model registry, and deployment.",
|
|
89
|
+
downloads="6.1k",
|
|
90
|
+
stars="220",
|
|
91
|
+
tags=["tracking", "model-registry", "mlflow"],
|
|
92
|
+
installed=is_installed("mlflow"),
|
|
93
|
+
),
|
|
94
|
+
PluginInfo(
|
|
95
|
+
plugin_id="flowyml-wandb",
|
|
96
|
+
name="FlowyML Weights & Biases",
|
|
97
|
+
version="1.8.0",
|
|
98
|
+
author="FlowyML",
|
|
99
|
+
description="W&B integration for experiment tracking, artifact versioning, and collaboration.",
|
|
100
|
+
downloads="4.2k",
|
|
101
|
+
stars="195",
|
|
102
|
+
tags=["tracking", "wandb", "experiment"],
|
|
103
|
+
installed=is_installed("wandb"),
|
|
104
|
+
),
|
|
105
|
+
PluginInfo(
|
|
106
|
+
plugin_id="flowyml-pytorch",
|
|
107
|
+
name="FlowyML PyTorch",
|
|
108
|
+
version="1.8.0",
|
|
109
|
+
author="FlowyML",
|
|
110
|
+
description="PyTorch integration with automatic model serialization and distributed training support.",
|
|
67
111
|
downloads="8.5k",
|
|
68
|
-
stars="
|
|
69
|
-
tags=["
|
|
70
|
-
installed=is_installed("
|
|
112
|
+
stars="310",
|
|
113
|
+
tags=["framework", "pytorch", "deep-learning"],
|
|
114
|
+
installed=is_installed("torch"),
|
|
71
115
|
),
|
|
72
116
|
PluginInfo(
|
|
73
|
-
plugin_id="
|
|
74
|
-
name="
|
|
75
|
-
version="
|
|
76
|
-
author="
|
|
77
|
-
description="
|
|
78
|
-
downloads="
|
|
79
|
-
stars="
|
|
80
|
-
tags=["
|
|
81
|
-
installed=is_installed("
|
|
117
|
+
plugin_id="flowyml-tensorflow",
|
|
118
|
+
name="FlowyML TensorFlow",
|
|
119
|
+
version="1.8.0",
|
|
120
|
+
author="FlowyML",
|
|
121
|
+
description="TensorFlow/Keras integration with automatic callbacks and model tracking.",
|
|
122
|
+
downloads="7.8k",
|
|
123
|
+
stars="290",
|
|
124
|
+
tags=["framework", "tensorflow", "keras"],
|
|
125
|
+
installed=is_installed("tensorflow"),
|
|
82
126
|
),
|
|
83
127
|
PluginInfo(
|
|
84
|
-
plugin_id="
|
|
85
|
-
name="
|
|
86
|
-
version="1.
|
|
87
|
-
author="
|
|
88
|
-
description="
|
|
89
|
-
downloads="
|
|
90
|
-
stars="
|
|
91
|
-
tags=["
|
|
92
|
-
installed=is_installed("
|
|
128
|
+
plugin_id="flowyml-sklearn",
|
|
129
|
+
name="FlowyML Scikit-Learn",
|
|
130
|
+
version="1.8.0",
|
|
131
|
+
author="FlowyML",
|
|
132
|
+
description="Scikit-learn integration with automatic model serialization and metrics extraction.",
|
|
133
|
+
downloads="9.2k",
|
|
134
|
+
stars="340",
|
|
135
|
+
tags=["framework", "sklearn", "ml"],
|
|
136
|
+
installed=is_installed("scikit-learn"),
|
|
93
137
|
),
|
|
94
138
|
]
|
|
95
139
|
|
|
@@ -98,33 +142,41 @@ async def get_available_plugins():
|
|
|
98
142
|
|
|
99
143
|
@router.get("/installed", response_model=list[dict[str, Any]])
|
|
100
144
|
async def get_installed_plugins():
|
|
101
|
-
"""Get list of installed plugins."""
|
|
145
|
+
"""Get list of installed FlowyML plugins and integrations."""
|
|
102
146
|
import importlib.metadata
|
|
103
147
|
|
|
104
148
|
# Get all installed packages that could be plugins
|
|
105
149
|
installed = []
|
|
106
150
|
|
|
107
|
-
#
|
|
151
|
+
# FlowyML-related plugin packages
|
|
108
152
|
potential_plugins = [
|
|
109
|
-
|
|
110
|
-
"
|
|
111
|
-
"
|
|
112
|
-
"
|
|
113
|
-
"
|
|
114
|
-
|
|
115
|
-
"
|
|
116
|
-
"
|
|
117
|
-
|
|
118
|
-
"
|
|
153
|
+
# Cloud providers
|
|
154
|
+
("google-cloud-aiplatform", "FlowyML GCP"),
|
|
155
|
+
("google-cloud-storage", "GCS Storage"),
|
|
156
|
+
("boto3", "FlowyML AWS"),
|
|
157
|
+
("sagemaker", "AWS SageMaker"),
|
|
158
|
+
# Orchestrators
|
|
159
|
+
("kubernetes", "FlowyML Kubernetes"),
|
|
160
|
+
("kfp", "Kubeflow Pipelines"),
|
|
161
|
+
# Tracking & Registry
|
|
162
|
+
("mlflow", "FlowyML MLflow"),
|
|
163
|
+
("wandb", "FlowyML W&B"),
|
|
164
|
+
# ML Frameworks
|
|
165
|
+
("torch", "FlowyML PyTorch"),
|
|
166
|
+
("tensorflow", "FlowyML TensorFlow"),
|
|
167
|
+
("keras", "FlowyML Keras"),
|
|
168
|
+
("scikit-learn", "FlowyML Scikit-Learn"),
|
|
169
|
+
# Core
|
|
170
|
+
("flowyml", "FlowyML Core"),
|
|
119
171
|
]
|
|
120
172
|
|
|
121
|
-
for package_name in potential_plugins:
|
|
173
|
+
for package_name, display_name in potential_plugins:
|
|
122
174
|
try:
|
|
123
175
|
dist = importlib.metadata.distribution(package_name)
|
|
124
176
|
installed.append(
|
|
125
177
|
{
|
|
126
178
|
"id": package_name,
|
|
127
|
-
"name":
|
|
179
|
+
"name": display_name,
|
|
128
180
|
"version": dist.version,
|
|
129
181
|
"description": dist.metadata.get("Summary", ""),
|
|
130
182
|
"status": "active",
|
|
@@ -14,10 +14,42 @@ def get_projects_manager() -> ProjectManager:
|
|
|
14
14
|
|
|
15
15
|
@router.get("/")
|
|
16
16
|
async def list_projects(manager: ProjectManager = Depends(get_projects_manager)):
|
|
17
|
-
"""List all projects."""
|
|
17
|
+
"""List all projects, including those discovered from run metadata."""
|
|
18
18
|
try:
|
|
19
|
-
|
|
20
|
-
|
|
19
|
+
# Get explicitly created projects
|
|
20
|
+
explicit_projects = manager.list_projects()
|
|
21
|
+
project_names = {p.get("name") for p in explicit_projects if p.get("name")}
|
|
22
|
+
|
|
23
|
+
# Also discover projects from run metadata in global store
|
|
24
|
+
from flowyml.ui.backend.dependencies import get_store
|
|
25
|
+
|
|
26
|
+
store = get_store()
|
|
27
|
+
|
|
28
|
+
discovered_projects = []
|
|
29
|
+
try:
|
|
30
|
+
# Get all runs and extract unique project names
|
|
31
|
+
runs = store.list_runs(limit=1000)
|
|
32
|
+
for run in runs:
|
|
33
|
+
project_name = run.get("project")
|
|
34
|
+
if project_name and project_name not in project_names:
|
|
35
|
+
project_names.add(project_name)
|
|
36
|
+
# Create a synthetic project entry for discovered projects
|
|
37
|
+
discovered_projects.append(
|
|
38
|
+
{
|
|
39
|
+
"name": project_name,
|
|
40
|
+
"description": "Auto-discovered from pipeline runs",
|
|
41
|
+
"created_at": run.get("start_time"),
|
|
42
|
+
"pipelines": [],
|
|
43
|
+
"tags": {},
|
|
44
|
+
"discovered": True, # Flag to indicate this wasn't explicitly created
|
|
45
|
+
},
|
|
46
|
+
)
|
|
47
|
+
except Exception:
|
|
48
|
+
pass # Store might not be initialized
|
|
49
|
+
|
|
50
|
+
# Combine explicit and discovered projects
|
|
51
|
+
all_projects = explicit_projects + discovered_projects
|
|
52
|
+
return {"projects": all_projects}
|
|
21
53
|
except Exception as e:
|
|
22
54
|
raise HTTPException(status_code=500, detail=str(e))
|
|
23
55
|
|
|
@@ -93,14 +125,65 @@ async def get_project_metrics(
|
|
|
93
125
|
limit: int = 100,
|
|
94
126
|
manager: ProjectManager = Depends(get_projects_manager),
|
|
95
127
|
):
|
|
96
|
-
"""Get logged
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
128
|
+
"""Get logged metrics for a project (from model_metrics table and Metrics artifacts)."""
|
|
129
|
+
metrics = []
|
|
130
|
+
|
|
131
|
+
from flowyml.ui.backend.dependencies import get_store
|
|
132
|
+
|
|
133
|
+
store = get_store()
|
|
134
|
+
|
|
135
|
+
try:
|
|
136
|
+
# Get all runs for this project
|
|
137
|
+
all_runs = store.list_runs(limit=1000)
|
|
138
|
+
project_run_ids = {r.get("run_id") for r in all_runs if r.get("project") == project_name}
|
|
139
|
+
|
|
140
|
+
# 1. Try to get metrics from model_metrics table
|
|
141
|
+
all_model_metrics = store.list_model_metrics(limit=limit * 2)
|
|
142
|
+
for m in all_model_metrics:
|
|
143
|
+
if m.get("run_id") in project_run_ids or m.get("project") == project_name:
|
|
144
|
+
metrics.append(m)
|
|
145
|
+
|
|
146
|
+
# 2. Also extract metrics from Metrics artifacts
|
|
147
|
+
all_assets = store.list_assets(limit=500)
|
|
148
|
+
for asset in all_assets:
|
|
149
|
+
# Check if it's a metrics artifact for this project (case-insensitive type check)
|
|
150
|
+
asset_type = str(asset.get("type", "")).lower()
|
|
151
|
+
if asset_type == "metrics" and asset.get("run_id") in project_run_ids:
|
|
152
|
+
# Get properties which contain the metric values
|
|
153
|
+
props = asset.get("properties", {})
|
|
154
|
+
created_at = asset.get("created_at", "")
|
|
155
|
+
run_id = asset.get("run_id", "")
|
|
156
|
+
asset_name = asset.get("name", "evaluation")
|
|
157
|
+
|
|
158
|
+
# Convert artifact properties to metric entries
|
|
159
|
+
for key, value in props.items():
|
|
160
|
+
if isinstance(value, (int, float)) and key not in ["samples"]:
|
|
161
|
+
metrics.append(
|
|
162
|
+
{
|
|
163
|
+
"project": project_name,
|
|
164
|
+
"model_name": asset_name,
|
|
165
|
+
"run_id": run_id,
|
|
166
|
+
"metric_name": key,
|
|
167
|
+
"metric_value": value,
|
|
168
|
+
"environment": "evaluation",
|
|
169
|
+
"tags": {"source": "artifact"},
|
|
170
|
+
"created_at": created_at,
|
|
171
|
+
},
|
|
172
|
+
)
|
|
173
|
+
except Exception as e:
|
|
174
|
+
import logging
|
|
175
|
+
|
|
176
|
+
logging.getLogger(__name__).warning(f"Error fetching metrics: {e}")
|
|
177
|
+
|
|
178
|
+
# Try explicit project as fallback
|
|
179
|
+
if not metrics:
|
|
180
|
+
project = manager.get_project(project_name)
|
|
181
|
+
if project:
|
|
182
|
+
metrics = project.list_model_metrics(model_name=model_name, limit=limit)
|
|
100
183
|
|
|
101
184
|
return {
|
|
102
185
|
"project": project_name,
|
|
103
|
-
"metrics":
|
|
186
|
+
"metrics": metrics[:limit],
|
|
104
187
|
}
|
|
105
188
|
|
|
106
189
|
|
|
@@ -163,6 +163,9 @@ async def get_run(run_id: str):
|
|
|
163
163
|
for step_name, ts in _heartbeat_timestamps[run_id].items():
|
|
164
164
|
if step_name in run.get("steps", {}):
|
|
165
165
|
run["steps"][step_name]["last_heartbeat"] = ts
|
|
166
|
+
for step_name, metrics in _step_metrics.get(run_id, {}).items():
|
|
167
|
+
if step_name in run.get("steps", {}):
|
|
168
|
+
run["steps"][step_name]["metrics"] = metrics
|
|
166
169
|
|
|
167
170
|
return run
|
|
168
171
|
|
|
@@ -296,11 +299,14 @@ async def get_cloud_status(run_id: str):
|
|
|
296
299
|
class HeartbeatRequest(BaseModel):
|
|
297
300
|
step_name: str
|
|
298
301
|
status: str = "running"
|
|
302
|
+
metrics: dict | None = None
|
|
299
303
|
|
|
300
304
|
|
|
301
|
-
# In-memory storage for heartbeat timestamps
|
|
305
|
+
# In-memory storage for heartbeat timestamps and metrics
|
|
302
306
|
# Format: {run_id: {step_name: last_heartbeat_timestamp}}
|
|
303
307
|
_heartbeat_timestamps: dict[str, dict[str, float]] = {}
|
|
308
|
+
# Format: {run_id: {step_name: metrics_dict}}
|
|
309
|
+
_step_metrics: dict[str, dict[str, dict]] = {}
|
|
304
310
|
_heartbeat_lock = __import__("threading").Lock()
|
|
305
311
|
|
|
306
312
|
# Heartbeat interval in seconds (should match executor's interval)
|
|
@@ -319,6 +325,14 @@ def _record_heartbeat(run_id: str, step_name: str) -> None:
|
|
|
319
325
|
_heartbeat_timestamps[run_id][step_name] = time.time()
|
|
320
326
|
|
|
321
327
|
|
|
328
|
+
def _record_step_metrics(run_id: str, step_name: str, metrics: dict) -> None:
|
|
329
|
+
"""Record metrics for a step."""
|
|
330
|
+
with _heartbeat_lock:
|
|
331
|
+
if run_id not in _step_metrics:
|
|
332
|
+
_step_metrics[run_id] = {}
|
|
333
|
+
_step_metrics[run_id][step_name] = metrics
|
|
334
|
+
|
|
335
|
+
|
|
322
336
|
def _get_dead_steps(run_id: str) -> list[str]:
|
|
323
337
|
"""Get list of steps that have missed too many heartbeats."""
|
|
324
338
|
import time
|
|
@@ -342,6 +356,7 @@ def _cleanup_heartbeats(run_id: str) -> None:
|
|
|
342
356
|
"""Remove heartbeat tracking for a completed run."""
|
|
343
357
|
with _heartbeat_lock:
|
|
344
358
|
_heartbeat_timestamps.pop(run_id, None)
|
|
359
|
+
_step_metrics.pop(run_id, None)
|
|
345
360
|
|
|
346
361
|
|
|
347
362
|
@router.post("/{run_id}/steps/{step_name}/heartbeat")
|
|
@@ -356,6 +371,10 @@ async def step_heartbeat(run_id: str, step_name: str, heartbeat: HeartbeatReques
|
|
|
356
371
|
# Record heartbeat timestamp
|
|
357
372
|
_record_heartbeat(run_id, step_name)
|
|
358
373
|
|
|
374
|
+
# Record metrics if present
|
|
375
|
+
if heartbeat.metrics:
|
|
376
|
+
_record_step_metrics(run_id, step_name, heartbeat.metrics)
|
|
377
|
+
|
|
359
378
|
# Check if run is marked for stopping
|
|
360
379
|
run = store.load_run(run_id)
|
|
361
380
|
if not run:
|
|
@@ -484,3 +503,115 @@ async def get_run_logs(run_id: str):
|
|
|
484
503
|
logs = await anyio.to_thread.run_sync(read_all_logs)
|
|
485
504
|
|
|
486
505
|
return {"logs": logs}
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
@router.get("/{run_id}/training-history")
|
|
509
|
+
async def get_training_history(run_id: str):
|
|
510
|
+
"""Get training history (per-epoch metrics) for a run.
|
|
511
|
+
|
|
512
|
+
This combines:
|
|
513
|
+
1. Training history from model artifacts (saved by FlowymlKerasCallback)
|
|
514
|
+
2. Per-epoch metrics saved in the metrics table
|
|
515
|
+
|
|
516
|
+
Returns a consolidated training history suitable for visualization.
|
|
517
|
+
"""
|
|
518
|
+
store = _find_store_for_run(run_id)
|
|
519
|
+
|
|
520
|
+
# Get per-epoch metrics from the metrics table
|
|
521
|
+
metrics = store.get_metrics(run_id)
|
|
522
|
+
|
|
523
|
+
# Build training history from metrics table
|
|
524
|
+
# Group metrics by step (epoch) and name
|
|
525
|
+
epoch_metrics = {}
|
|
526
|
+
for m in metrics:
|
|
527
|
+
step = m.get("step", 0)
|
|
528
|
+
name = m.get("name", "unknown")
|
|
529
|
+
value = m.get("value", 0)
|
|
530
|
+
|
|
531
|
+
if step not in epoch_metrics:
|
|
532
|
+
epoch_metrics[step] = {}
|
|
533
|
+
epoch_metrics[step][name] = value
|
|
534
|
+
|
|
535
|
+
# Convert to chart-friendly format
|
|
536
|
+
training_history_from_metrics = {
|
|
537
|
+
"epochs": [],
|
|
538
|
+
"train_loss": [],
|
|
539
|
+
"val_loss": [],
|
|
540
|
+
"train_accuracy": [],
|
|
541
|
+
"val_accuracy": [],
|
|
542
|
+
"mae": [],
|
|
543
|
+
"val_mae": [],
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
# Standard metric name mappings
|
|
547
|
+
metric_mappings = {
|
|
548
|
+
"loss": "train_loss",
|
|
549
|
+
"val_loss": "val_loss",
|
|
550
|
+
"accuracy": "train_accuracy",
|
|
551
|
+
"acc": "train_accuracy",
|
|
552
|
+
"val_accuracy": "val_accuracy",
|
|
553
|
+
"val_acc": "val_accuracy",
|
|
554
|
+
"mae": "mae",
|
|
555
|
+
"val_mae": "val_mae",
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
# Track custom metrics
|
|
559
|
+
custom_metrics = set()
|
|
560
|
+
|
|
561
|
+
if epoch_metrics:
|
|
562
|
+
sorted_epochs = sorted(epoch_metrics.keys())
|
|
563
|
+
for epoch in sorted_epochs:
|
|
564
|
+
training_history_from_metrics["epochs"].append(epoch + 1) # 1-indexed for display
|
|
565
|
+
|
|
566
|
+
epoch_data = epoch_metrics[epoch]
|
|
567
|
+
for metric_name, value in epoch_data.items():
|
|
568
|
+
# Map to standard name or track as custom
|
|
569
|
+
standard_name = metric_mappings.get(metric_name)
|
|
570
|
+
if standard_name:
|
|
571
|
+
training_history_from_metrics[standard_name].append(value)
|
|
572
|
+
else:
|
|
573
|
+
# Custom metric
|
|
574
|
+
if metric_name not in custom_metrics:
|
|
575
|
+
custom_metrics.add(metric_name)
|
|
576
|
+
training_history_from_metrics[metric_name] = []
|
|
577
|
+
training_history_from_metrics[metric_name].append(value)
|
|
578
|
+
|
|
579
|
+
# Also try to get training history from model artifacts
|
|
580
|
+
artifacts = store.list_assets(run_id=run_id)
|
|
581
|
+
artifact_history = None
|
|
582
|
+
|
|
583
|
+
for artifact in artifacts:
|
|
584
|
+
# Check if artifact has training_history
|
|
585
|
+
if artifact.get("training_history"):
|
|
586
|
+
artifact_history = artifact.get("training_history")
|
|
587
|
+
break
|
|
588
|
+
# Also check in metadata/properties
|
|
589
|
+
metadata = artifact.get("metadata", {})
|
|
590
|
+
if isinstance(metadata, str):
|
|
591
|
+
try:
|
|
592
|
+
metadata = json.loads(metadata)
|
|
593
|
+
except Exception:
|
|
594
|
+
metadata = {}
|
|
595
|
+
if metadata.get("training_history"):
|
|
596
|
+
artifact_history = metadata.get("training_history")
|
|
597
|
+
break
|
|
598
|
+
|
|
599
|
+
# Prefer artifact history if it has more data, otherwise use metrics
|
|
600
|
+
if artifact_history and len(artifact_history.get("epochs", [])) > len(
|
|
601
|
+
training_history_from_metrics.get("epochs", []),
|
|
602
|
+
):
|
|
603
|
+
final_history = artifact_history
|
|
604
|
+
elif training_history_from_metrics.get("epochs"):
|
|
605
|
+
final_history = training_history_from_metrics
|
|
606
|
+
else:
|
|
607
|
+
final_history = artifact_history or {}
|
|
608
|
+
|
|
609
|
+
# Clean up empty arrays
|
|
610
|
+
cleaned_history = {k: v for k, v in final_history.items() if v and (not isinstance(v, list) or len(v) > 0)}
|
|
611
|
+
|
|
612
|
+
return {
|
|
613
|
+
"training_history": cleaned_history,
|
|
614
|
+
"has_history": len(cleaned_history.get("epochs", [])) > 0,
|
|
615
|
+
"total_epochs": len(cleaned_history.get("epochs", [])),
|
|
616
|
+
"source": "artifact" if artifact_history else "metrics",
|
|
617
|
+
}
|
|
@@ -4,11 +4,16 @@ from flowyml.core.scheduler import PipelineScheduler
|
|
|
4
4
|
from flowyml.registry.pipeline_registry import pipeline_registry
|
|
5
5
|
|
|
6
6
|
router = APIRouter()
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
7
|
+
_scheduler = None
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def get_scheduler():
|
|
11
|
+
"""Get or initialize the scheduler singleton."""
|
|
12
|
+
global _scheduler
|
|
13
|
+
if _scheduler is None:
|
|
14
|
+
_scheduler = PipelineScheduler()
|
|
15
|
+
_scheduler.start()
|
|
16
|
+
return _scheduler
|
|
12
17
|
|
|
13
18
|
|
|
14
19
|
class ScheduleRequest(BaseModel):
|
|
@@ -25,27 +30,47 @@ class ScheduleRequest(BaseModel):
|
|
|
25
30
|
|
|
26
31
|
@router.get("/")
|
|
27
32
|
async def list_schedules():
|
|
28
|
-
"""List all active schedules.
|
|
29
|
-
|
|
30
|
-
schedules
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
33
|
+
"""List all active schedules.
|
|
34
|
+
|
|
35
|
+
This reads schedules from the shared database, so schedules created
|
|
36
|
+
by user code (e.g., in scripts) are visible in the UI.
|
|
37
|
+
"""
|
|
38
|
+
# First, get schedules from the in-memory scheduler
|
|
39
|
+
memory_schedules = []
|
|
40
|
+
for s in get_scheduler().list_schedules():
|
|
41
|
+
memory_schedules.append(
|
|
42
|
+
{
|
|
43
|
+
"pipeline_name": s.pipeline_name,
|
|
44
|
+
"schedule_type": s.schedule_type,
|
|
45
|
+
"schedule_value": s.schedule_value,
|
|
46
|
+
"enabled": s.enabled,
|
|
47
|
+
"last_run": s.last_run.isoformat() if s.last_run else None,
|
|
48
|
+
"next_run": s.next_run.isoformat() if s.next_run else None,
|
|
49
|
+
"timezone": s.timezone,
|
|
50
|
+
},
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# Also read directly from the persistence database to get schedules
|
|
54
|
+
# created by other processes (e.g., user scripts)
|
|
55
|
+
db_schedules = []
|
|
56
|
+
if get_scheduler()._persistence:
|
|
57
|
+
db_schedules = get_scheduler()._persistence.list_all_schedules()
|
|
58
|
+
|
|
59
|
+
# Merge: prefer memory schedules (more up-to-date), but include db-only ones
|
|
60
|
+
memory_names = {s["pipeline_name"] for s in memory_schedules}
|
|
61
|
+
result = list(memory_schedules)
|
|
62
|
+
|
|
63
|
+
for db_sched in db_schedules:
|
|
64
|
+
if db_sched.get("pipeline_name") not in memory_names:
|
|
65
|
+
result.append(db_sched)
|
|
66
|
+
|
|
67
|
+
return result
|
|
43
68
|
|
|
44
69
|
|
|
45
70
|
@router.get("/health")
|
|
46
71
|
async def get_scheduler_health():
|
|
47
72
|
"""Get scheduler health metrics."""
|
|
48
|
-
return
|
|
73
|
+
return get_scheduler().health_check()
|
|
49
74
|
|
|
50
75
|
|
|
51
76
|
@router.post("/")
|
|
@@ -113,7 +138,7 @@ async def create_schedule(schedule: ScheduleRequest):
|
|
|
113
138
|
# 2. Schedule it
|
|
114
139
|
try:
|
|
115
140
|
if schedule.schedule_type == "daily":
|
|
116
|
-
|
|
141
|
+
get_scheduler().schedule_daily(
|
|
117
142
|
name=schedule.name,
|
|
118
143
|
pipeline_func=pipeline_func,
|
|
119
144
|
hour=schedule.hour,
|
|
@@ -121,14 +146,14 @@ async def create_schedule(schedule: ScheduleRequest):
|
|
|
121
146
|
timezone=schedule.timezone,
|
|
122
147
|
)
|
|
123
148
|
elif schedule.schedule_type == "hourly":
|
|
124
|
-
|
|
149
|
+
get_scheduler().schedule_hourly(
|
|
125
150
|
name=schedule.name,
|
|
126
151
|
pipeline_func=pipeline_func,
|
|
127
152
|
minute=schedule.minute,
|
|
128
153
|
timezone=schedule.timezone,
|
|
129
154
|
)
|
|
130
155
|
elif schedule.schedule_type == "interval":
|
|
131
|
-
|
|
156
|
+
get_scheduler().schedule_interval(
|
|
132
157
|
name=schedule.name,
|
|
133
158
|
pipeline_func=pipeline_func,
|
|
134
159
|
seconds=schedule.interval_seconds,
|
|
@@ -137,7 +162,7 @@ async def create_schedule(schedule: ScheduleRequest):
|
|
|
137
162
|
elif schedule.schedule_type == "cron":
|
|
138
163
|
if not schedule.cron_expression:
|
|
139
164
|
raise HTTPException(status_code=400, detail="Cron expression required for cron schedule")
|
|
140
|
-
|
|
165
|
+
get_scheduler().schedule_cron(
|
|
141
166
|
name=schedule.name,
|
|
142
167
|
pipeline_func=pipeline_func,
|
|
143
168
|
cron_expression=schedule.cron_expression,
|
|
@@ -157,28 +182,28 @@ async def create_schedule(schedule: ScheduleRequest):
|
|
|
157
182
|
@router.delete("/{schedule_name}")
|
|
158
183
|
async def delete_schedule(schedule_name: str):
|
|
159
184
|
"""Remove a schedule."""
|
|
160
|
-
|
|
185
|
+
get_scheduler().unschedule(schedule_name)
|
|
161
186
|
return {"status": "success", "message": f"Schedule {schedule_name} removed"}
|
|
162
187
|
|
|
163
188
|
|
|
164
189
|
@router.post("/{schedule_name}/enable")
|
|
165
190
|
async def enable_schedule(schedule_name: str):
|
|
166
191
|
"""Enable a schedule."""
|
|
167
|
-
|
|
192
|
+
get_scheduler().enable(schedule_name)
|
|
168
193
|
return {"status": "success"}
|
|
169
194
|
|
|
170
195
|
|
|
171
196
|
@router.post("/{schedule_name}/disable")
|
|
172
197
|
async def disable_schedule(schedule_name: str):
|
|
173
198
|
"""Disable a schedule."""
|
|
174
|
-
|
|
199
|
+
get_scheduler().disable(schedule_name)
|
|
175
200
|
return {"status": "success"}
|
|
176
201
|
|
|
177
202
|
|
|
178
203
|
@router.get("/{schedule_name}/history")
|
|
179
204
|
async def get_schedule_history(schedule_name: str, limit: int = 50):
|
|
180
205
|
"""Get execution history for a schedule."""
|
|
181
|
-
return
|
|
206
|
+
return get_scheduler().get_history(schedule_name, limit)
|
|
182
207
|
|
|
183
208
|
|
|
184
209
|
@router.get("/registered-pipelines")
|