flowyml 1.7.2__py3-none-any.whl → 1.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowyml/assets/base.py +15 -0
- flowyml/assets/metrics.py +5 -0
- flowyml/cli/main.py +709 -0
- flowyml/cli/stack_cli.py +138 -25
- flowyml/core/__init__.py +17 -0
- flowyml/core/executor.py +161 -26
- flowyml/core/image_builder.py +129 -0
- flowyml/core/log_streamer.py +227 -0
- flowyml/core/orchestrator.py +22 -2
- flowyml/core/pipeline.py +34 -10
- flowyml/core/routing.py +558 -0
- flowyml/core/step.py +9 -1
- flowyml/core/step_grouping.py +49 -35
- flowyml/core/types.py +407 -0
- flowyml/monitoring/alerts.py +10 -0
- flowyml/monitoring/notifications.py +104 -25
- flowyml/monitoring/slack_blocks.py +323 -0
- flowyml/plugins/__init__.py +251 -0
- flowyml/plugins/alerters/__init__.py +1 -0
- flowyml/plugins/alerters/slack.py +168 -0
- flowyml/plugins/base.py +752 -0
- flowyml/plugins/config.py +478 -0
- flowyml/plugins/deployers/__init__.py +22 -0
- flowyml/plugins/deployers/gcp_cloud_run.py +200 -0
- flowyml/plugins/deployers/sagemaker.py +306 -0
- flowyml/plugins/deployers/vertex.py +290 -0
- flowyml/plugins/integration.py +369 -0
- flowyml/plugins/manager.py +510 -0
- flowyml/plugins/model_registries/__init__.py +22 -0
- flowyml/plugins/model_registries/mlflow.py +159 -0
- flowyml/plugins/model_registries/sagemaker.py +489 -0
- flowyml/plugins/model_registries/vertex.py +386 -0
- flowyml/plugins/orchestrators/__init__.py +13 -0
- flowyml/plugins/orchestrators/sagemaker.py +443 -0
- flowyml/plugins/orchestrators/vertex_ai.py +461 -0
- flowyml/plugins/registries/__init__.py +13 -0
- flowyml/plugins/registries/ecr.py +321 -0
- flowyml/plugins/registries/gcr.py +313 -0
- flowyml/plugins/registry.py +454 -0
- flowyml/plugins/stack.py +494 -0
- flowyml/plugins/stack_config.py +537 -0
- flowyml/plugins/stores/__init__.py +13 -0
- flowyml/plugins/stores/gcs.py +460 -0
- flowyml/plugins/stores/s3.py +453 -0
- flowyml/plugins/trackers/__init__.py +11 -0
- flowyml/plugins/trackers/mlflow.py +316 -0
- flowyml/plugins/validators/__init__.py +3 -0
- flowyml/plugins/validators/deepchecks.py +119 -0
- flowyml/registry/__init__.py +2 -1
- flowyml/registry/model_environment.py +109 -0
- flowyml/registry/model_registry.py +241 -96
- flowyml/serving/__init__.py +17 -0
- flowyml/serving/model_server.py +628 -0
- flowyml/stacks/__init__.py +60 -0
- flowyml/stacks/aws.py +93 -0
- flowyml/stacks/base.py +62 -0
- flowyml/stacks/components.py +12 -0
- flowyml/stacks/gcp.py +44 -9
- flowyml/stacks/plugins.py +115 -0
- flowyml/stacks/registry.py +2 -1
- flowyml/storage/sql.py +401 -12
- flowyml/tracking/experiment.py +8 -5
- flowyml/ui/backend/Dockerfile +87 -16
- flowyml/ui/backend/auth.py +12 -2
- flowyml/ui/backend/main.py +149 -5
- flowyml/ui/backend/routers/ai_context.py +226 -0
- flowyml/ui/backend/routers/assets.py +23 -4
- flowyml/ui/backend/routers/auth.py +96 -0
- flowyml/ui/backend/routers/deployments.py +660 -0
- flowyml/ui/backend/routers/model_explorer.py +597 -0
- flowyml/ui/backend/routers/plugins.py +103 -51
- flowyml/ui/backend/routers/projects.py +91 -8
- flowyml/ui/backend/routers/runs.py +20 -1
- flowyml/ui/backend/routers/schedules.py +22 -17
- flowyml/ui/backend/routers/templates.py +319 -0
- flowyml/ui/backend/routers/websocket.py +2 -2
- flowyml/ui/frontend/Dockerfile +55 -6
- flowyml/ui/frontend/dist/assets/index-B5AsPTSz.css +1 -0
- flowyml/ui/frontend/dist/assets/index-dFbZ8wD8.js +753 -0
- flowyml/ui/frontend/dist/index.html +2 -2
- flowyml/ui/frontend/dist/logo.png +0 -0
- flowyml/ui/frontend/nginx.conf +65 -4
- flowyml/ui/frontend/package-lock.json +1404 -74
- flowyml/ui/frontend/package.json +3 -0
- flowyml/ui/frontend/public/logo.png +0 -0
- flowyml/ui/frontend/src/App.jsx +10 -7
- flowyml/ui/frontend/src/app/auth/Login.jsx +90 -0
- flowyml/ui/frontend/src/app/dashboard/page.jsx +8 -8
- flowyml/ui/frontend/src/app/deployments/page.jsx +786 -0
- flowyml/ui/frontend/src/app/model-explorer/page.jsx +1031 -0
- flowyml/ui/frontend/src/app/pipelines/page.jsx +12 -2
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectExperimentsList.jsx +19 -6
- flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +36 -24
- flowyml/ui/frontend/src/app/runs/page.jsx +8 -2
- flowyml/ui/frontend/src/app/settings/page.jsx +267 -253
- flowyml/ui/frontend/src/components/AssetDetailsPanel.jsx +29 -7
- flowyml/ui/frontend/src/components/Layout.jsx +6 -0
- flowyml/ui/frontend/src/components/PipelineGraph.jsx +79 -29
- flowyml/ui/frontend/src/components/RunDetailsPanel.jsx +36 -6
- flowyml/ui/frontend/src/components/RunMetaPanel.jsx +113 -0
- flowyml/ui/frontend/src/components/ai/AIAssistantButton.jsx +71 -0
- flowyml/ui/frontend/src/components/ai/AIAssistantPanel.jsx +420 -0
- flowyml/ui/frontend/src/components/header/Header.jsx +22 -0
- flowyml/ui/frontend/src/components/plugins/PluginManager.jsx +4 -4
- flowyml/ui/frontend/src/components/plugins/{ZenMLIntegration.jsx → StackImport.jsx} +38 -12
- flowyml/ui/frontend/src/components/sidebar/Sidebar.jsx +36 -13
- flowyml/ui/frontend/src/contexts/AIAssistantContext.jsx +245 -0
- flowyml/ui/frontend/src/contexts/AuthContext.jsx +108 -0
- flowyml/ui/frontend/src/hooks/useAIContext.js +156 -0
- flowyml/ui/frontend/src/hooks/useWebGPU.js +54 -0
- flowyml/ui/frontend/src/layouts/MainLayout.jsx +6 -0
- flowyml/ui/frontend/src/router/index.jsx +47 -20
- flowyml/ui/frontend/src/services/pluginService.js +3 -1
- flowyml/ui/server_manager.py +5 -5
- flowyml/ui/utils.py +157 -39
- flowyml/utils/config.py +37 -15
- flowyml/utils/model_introspection.py +123 -0
- flowyml/utils/observability.py +30 -0
- flowyml-1.8.0.dist-info/METADATA +174 -0
- {flowyml-1.7.2.dist-info → flowyml-1.8.0.dist-info}/RECORD +123 -65
- {flowyml-1.7.2.dist-info → flowyml-1.8.0.dist-info}/WHEEL +1 -1
- flowyml/ui/frontend/dist/assets/index-B40RsQDq.css +0 -1
- flowyml/ui/frontend/dist/assets/index-CjI0zKCn.js +0 -685
- flowyml-1.7.2.dist-info/METADATA +0 -477
- {flowyml-1.7.2.dist-info → flowyml-1.8.0.dist-info}/entry_points.txt +0 -0
- {flowyml-1.7.2.dist-info → flowyml-1.8.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -34,7 +34,7 @@ class ImportStackRequest(BaseModel):
|
|
|
34
34
|
|
|
35
35
|
@router.get("/available", response_model=list[PluginInfo])
|
|
36
36
|
async def get_available_plugins():
|
|
37
|
-
"""Get list of available plugins."""
|
|
37
|
+
"""Get list of available FlowyML plugins."""
|
|
38
38
|
import importlib.metadata
|
|
39
39
|
|
|
40
40
|
# Helper to check if package is installed
|
|
@@ -45,51 +45,95 @@ async def get_available_plugins():
|
|
|
45
45
|
except importlib.metadata.PackageNotFoundError:
|
|
46
46
|
return False
|
|
47
47
|
|
|
48
|
-
#
|
|
48
|
+
# FlowyML Native Plugins
|
|
49
49
|
plugins = [
|
|
50
50
|
PluginInfo(
|
|
51
|
-
plugin_id="
|
|
52
|
-
name="
|
|
53
|
-
version="
|
|
54
|
-
author="
|
|
55
|
-
description="
|
|
56
|
-
downloads="
|
|
57
|
-
stars="
|
|
58
|
-
tags=["orchestrator", "
|
|
59
|
-
installed=is_installed("
|
|
51
|
+
plugin_id="flowyml-gcp",
|
|
52
|
+
name="FlowyML GCP",
|
|
53
|
+
version="1.8.0",
|
|
54
|
+
author="FlowyML",
|
|
55
|
+
description="Google Cloud Platform integration: Vertex AI orchestrator, GCS artifact store, and Cloud Run deployer.",
|
|
56
|
+
downloads="5.2k",
|
|
57
|
+
stars="180",
|
|
58
|
+
tags=["orchestrator", "artifact-store", "gcp", "vertex-ai"],
|
|
59
|
+
installed=is_installed("google-cloud-aiplatform"),
|
|
60
60
|
),
|
|
61
61
|
PluginInfo(
|
|
62
|
-
plugin_id="
|
|
63
|
-
name="
|
|
64
|
-
version="
|
|
65
|
-
author="
|
|
66
|
-
description="
|
|
62
|
+
plugin_id="flowyml-aws",
|
|
63
|
+
name="FlowyML AWS",
|
|
64
|
+
version="1.8.0",
|
|
65
|
+
author="FlowyML",
|
|
66
|
+
description="AWS integration: SageMaker orchestrator, S3 artifact store, and ECR container registry.",
|
|
67
|
+
downloads="4.8k",
|
|
68
|
+
stars="165",
|
|
69
|
+
tags=["orchestrator", "artifact-store", "aws", "sagemaker"],
|
|
70
|
+
installed=is_installed("boto3"),
|
|
71
|
+
),
|
|
72
|
+
PluginInfo(
|
|
73
|
+
plugin_id="flowyml-kubernetes",
|
|
74
|
+
name="FlowyML Kubernetes",
|
|
75
|
+
version="1.8.0",
|
|
76
|
+
author="FlowyML",
|
|
77
|
+
description="Kubernetes orchestrator for running pipelines on K8s clusters with auto-scaling.",
|
|
78
|
+
downloads="3.5k",
|
|
79
|
+
stars="145",
|
|
80
|
+
tags=["orchestrator", "kubernetes", "container"],
|
|
81
|
+
installed=is_installed("kubernetes"),
|
|
82
|
+
),
|
|
83
|
+
PluginInfo(
|
|
84
|
+
plugin_id="flowyml-mlflow",
|
|
85
|
+
name="FlowyML MLflow",
|
|
86
|
+
version="1.8.0",
|
|
87
|
+
author="FlowyML",
|
|
88
|
+
description="MLflow integration for experiment tracking, model registry, and deployment.",
|
|
89
|
+
downloads="6.1k",
|
|
90
|
+
stars="220",
|
|
91
|
+
tags=["tracking", "model-registry", "mlflow"],
|
|
92
|
+
installed=is_installed("mlflow"),
|
|
93
|
+
),
|
|
94
|
+
PluginInfo(
|
|
95
|
+
plugin_id="flowyml-wandb",
|
|
96
|
+
name="FlowyML Weights & Biases",
|
|
97
|
+
version="1.8.0",
|
|
98
|
+
author="FlowyML",
|
|
99
|
+
description="W&B integration for experiment tracking, artifact versioning, and collaboration.",
|
|
100
|
+
downloads="4.2k",
|
|
101
|
+
stars="195",
|
|
102
|
+
tags=["tracking", "wandb", "experiment"],
|
|
103
|
+
installed=is_installed("wandb"),
|
|
104
|
+
),
|
|
105
|
+
PluginInfo(
|
|
106
|
+
plugin_id="flowyml-pytorch",
|
|
107
|
+
name="FlowyML PyTorch",
|
|
108
|
+
version="1.8.0",
|
|
109
|
+
author="FlowyML",
|
|
110
|
+
description="PyTorch integration with automatic model serialization and distributed training support.",
|
|
67
111
|
downloads="8.5k",
|
|
68
|
-
stars="
|
|
69
|
-
tags=["
|
|
70
|
-
installed=is_installed("
|
|
112
|
+
stars="310",
|
|
113
|
+
tags=["framework", "pytorch", "deep-learning"],
|
|
114
|
+
installed=is_installed("torch"),
|
|
71
115
|
),
|
|
72
116
|
PluginInfo(
|
|
73
|
-
plugin_id="
|
|
74
|
-
name="
|
|
75
|
-
version="
|
|
76
|
-
author="
|
|
77
|
-
description="
|
|
78
|
-
downloads="
|
|
79
|
-
stars="
|
|
80
|
-
tags=["
|
|
81
|
-
installed=is_installed("
|
|
117
|
+
plugin_id="flowyml-tensorflow",
|
|
118
|
+
name="FlowyML TensorFlow",
|
|
119
|
+
version="1.8.0",
|
|
120
|
+
author="FlowyML",
|
|
121
|
+
description="TensorFlow/Keras integration with automatic callbacks and model tracking.",
|
|
122
|
+
downloads="7.8k",
|
|
123
|
+
stars="290",
|
|
124
|
+
tags=["framework", "tensorflow", "keras"],
|
|
125
|
+
installed=is_installed("tensorflow"),
|
|
82
126
|
),
|
|
83
127
|
PluginInfo(
|
|
84
|
-
plugin_id="
|
|
85
|
-
name="
|
|
86
|
-
version="1.
|
|
87
|
-
author="
|
|
88
|
-
description="
|
|
89
|
-
downloads="
|
|
90
|
-
stars="
|
|
91
|
-
tags=["
|
|
92
|
-
installed=is_installed("
|
|
128
|
+
plugin_id="flowyml-sklearn",
|
|
129
|
+
name="FlowyML Scikit-Learn",
|
|
130
|
+
version="1.8.0",
|
|
131
|
+
author="FlowyML",
|
|
132
|
+
description="Scikit-learn integration with automatic model serialization and metrics extraction.",
|
|
133
|
+
downloads="9.2k",
|
|
134
|
+
stars="340",
|
|
135
|
+
tags=["framework", "sklearn", "ml"],
|
|
136
|
+
installed=is_installed("scikit-learn"),
|
|
93
137
|
),
|
|
94
138
|
]
|
|
95
139
|
|
|
@@ -98,33 +142,41 @@ async def get_available_plugins():
|
|
|
98
142
|
|
|
99
143
|
@router.get("/installed", response_model=list[dict[str, Any]])
|
|
100
144
|
async def get_installed_plugins():
|
|
101
|
-
"""Get list of installed plugins."""
|
|
145
|
+
"""Get list of installed FlowyML plugins and integrations."""
|
|
102
146
|
import importlib.metadata
|
|
103
147
|
|
|
104
148
|
# Get all installed packages that could be plugins
|
|
105
149
|
installed = []
|
|
106
150
|
|
|
107
|
-
#
|
|
151
|
+
# FlowyML-related plugin packages
|
|
108
152
|
potential_plugins = [
|
|
109
|
-
|
|
110
|
-
"
|
|
111
|
-
"
|
|
112
|
-
"
|
|
113
|
-
"
|
|
114
|
-
|
|
115
|
-
"
|
|
116
|
-
"
|
|
117
|
-
|
|
118
|
-
"
|
|
153
|
+
# Cloud providers
|
|
154
|
+
("google-cloud-aiplatform", "FlowyML GCP"),
|
|
155
|
+
("google-cloud-storage", "GCS Storage"),
|
|
156
|
+
("boto3", "FlowyML AWS"),
|
|
157
|
+
("sagemaker", "AWS SageMaker"),
|
|
158
|
+
# Orchestrators
|
|
159
|
+
("kubernetes", "FlowyML Kubernetes"),
|
|
160
|
+
("kfp", "Kubeflow Pipelines"),
|
|
161
|
+
# Tracking & Registry
|
|
162
|
+
("mlflow", "FlowyML MLflow"),
|
|
163
|
+
("wandb", "FlowyML W&B"),
|
|
164
|
+
# ML Frameworks
|
|
165
|
+
("torch", "FlowyML PyTorch"),
|
|
166
|
+
("tensorflow", "FlowyML TensorFlow"),
|
|
167
|
+
("keras", "FlowyML Keras"),
|
|
168
|
+
("scikit-learn", "FlowyML Scikit-Learn"),
|
|
169
|
+
# Core
|
|
170
|
+
("flowyml", "FlowyML Core"),
|
|
119
171
|
]
|
|
120
172
|
|
|
121
|
-
for package_name in potential_plugins:
|
|
173
|
+
for package_name, display_name in potential_plugins:
|
|
122
174
|
try:
|
|
123
175
|
dist = importlib.metadata.distribution(package_name)
|
|
124
176
|
installed.append(
|
|
125
177
|
{
|
|
126
178
|
"id": package_name,
|
|
127
|
-
"name":
|
|
179
|
+
"name": display_name,
|
|
128
180
|
"version": dist.version,
|
|
129
181
|
"description": dist.metadata.get("Summary", ""),
|
|
130
182
|
"status": "active",
|
|
@@ -14,10 +14,42 @@ def get_projects_manager() -> ProjectManager:
|
|
|
14
14
|
|
|
15
15
|
@router.get("/")
|
|
16
16
|
async def list_projects(manager: ProjectManager = Depends(get_projects_manager)):
|
|
17
|
-
"""List all projects."""
|
|
17
|
+
"""List all projects, including those discovered from run metadata."""
|
|
18
18
|
try:
|
|
19
|
-
|
|
20
|
-
|
|
19
|
+
# Get explicitly created projects
|
|
20
|
+
explicit_projects = manager.list_projects()
|
|
21
|
+
project_names = {p.get("name") for p in explicit_projects if p.get("name")}
|
|
22
|
+
|
|
23
|
+
# Also discover projects from run metadata in global store
|
|
24
|
+
from flowyml.ui.backend.dependencies import get_store
|
|
25
|
+
|
|
26
|
+
store = get_store()
|
|
27
|
+
|
|
28
|
+
discovered_projects = []
|
|
29
|
+
try:
|
|
30
|
+
# Get all runs and extract unique project names
|
|
31
|
+
runs = store.list_runs(limit=1000)
|
|
32
|
+
for run in runs:
|
|
33
|
+
project_name = run.get("project")
|
|
34
|
+
if project_name and project_name not in project_names:
|
|
35
|
+
project_names.add(project_name)
|
|
36
|
+
# Create a synthetic project entry for discovered projects
|
|
37
|
+
discovered_projects.append(
|
|
38
|
+
{
|
|
39
|
+
"name": project_name,
|
|
40
|
+
"description": "Auto-discovered from pipeline runs",
|
|
41
|
+
"created_at": run.get("start_time"),
|
|
42
|
+
"pipelines": [],
|
|
43
|
+
"tags": {},
|
|
44
|
+
"discovered": True, # Flag to indicate this wasn't explicitly created
|
|
45
|
+
},
|
|
46
|
+
)
|
|
47
|
+
except Exception:
|
|
48
|
+
pass # Store might not be initialized
|
|
49
|
+
|
|
50
|
+
# Combine explicit and discovered projects
|
|
51
|
+
all_projects = explicit_projects + discovered_projects
|
|
52
|
+
return {"projects": all_projects}
|
|
21
53
|
except Exception as e:
|
|
22
54
|
raise HTTPException(status_code=500, detail=str(e))
|
|
23
55
|
|
|
@@ -93,14 +125,65 @@ async def get_project_metrics(
|
|
|
93
125
|
limit: int = 100,
|
|
94
126
|
manager: ProjectManager = Depends(get_projects_manager),
|
|
95
127
|
):
|
|
96
|
-
"""Get logged
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
128
|
+
"""Get logged metrics for a project (from model_metrics table and Metrics artifacts)."""
|
|
129
|
+
metrics = []
|
|
130
|
+
|
|
131
|
+
from flowyml.ui.backend.dependencies import get_store
|
|
132
|
+
|
|
133
|
+
store = get_store()
|
|
134
|
+
|
|
135
|
+
try:
|
|
136
|
+
# Get all runs for this project
|
|
137
|
+
all_runs = store.list_runs(limit=1000)
|
|
138
|
+
project_run_ids = {r.get("run_id") for r in all_runs if r.get("project") == project_name}
|
|
139
|
+
|
|
140
|
+
# 1. Try to get metrics from model_metrics table
|
|
141
|
+
all_model_metrics = store.list_model_metrics(limit=limit * 2)
|
|
142
|
+
for m in all_model_metrics:
|
|
143
|
+
if m.get("run_id") in project_run_ids or m.get("project") == project_name:
|
|
144
|
+
metrics.append(m)
|
|
145
|
+
|
|
146
|
+
# 2. Also extract metrics from Metrics artifacts
|
|
147
|
+
all_assets = store.list_assets(limit=500)
|
|
148
|
+
for asset in all_assets:
|
|
149
|
+
# Check if it's a metrics artifact for this project (case-insensitive type check)
|
|
150
|
+
asset_type = str(asset.get("type", "")).lower()
|
|
151
|
+
if asset_type == "metrics" and asset.get("run_id") in project_run_ids:
|
|
152
|
+
# Get properties which contain the metric values
|
|
153
|
+
props = asset.get("properties", {})
|
|
154
|
+
created_at = asset.get("created_at", "")
|
|
155
|
+
run_id = asset.get("run_id", "")
|
|
156
|
+
asset_name = asset.get("name", "evaluation")
|
|
157
|
+
|
|
158
|
+
# Convert artifact properties to metric entries
|
|
159
|
+
for key, value in props.items():
|
|
160
|
+
if isinstance(value, (int, float)) and key not in ["samples"]:
|
|
161
|
+
metrics.append(
|
|
162
|
+
{
|
|
163
|
+
"project": project_name,
|
|
164
|
+
"model_name": asset_name,
|
|
165
|
+
"run_id": run_id,
|
|
166
|
+
"metric_name": key,
|
|
167
|
+
"metric_value": value,
|
|
168
|
+
"environment": "evaluation",
|
|
169
|
+
"tags": {"source": "artifact"},
|
|
170
|
+
"created_at": created_at,
|
|
171
|
+
},
|
|
172
|
+
)
|
|
173
|
+
except Exception as e:
|
|
174
|
+
import logging
|
|
175
|
+
|
|
176
|
+
logging.getLogger(__name__).warning(f"Error fetching metrics: {e}")
|
|
177
|
+
|
|
178
|
+
# Try explicit project as fallback
|
|
179
|
+
if not metrics:
|
|
180
|
+
project = manager.get_project(project_name)
|
|
181
|
+
if project:
|
|
182
|
+
metrics = project.list_model_metrics(model_name=model_name, limit=limit)
|
|
100
183
|
|
|
101
184
|
return {
|
|
102
185
|
"project": project_name,
|
|
103
|
-
"metrics":
|
|
186
|
+
"metrics": metrics[:limit],
|
|
104
187
|
}
|
|
105
188
|
|
|
106
189
|
|
|
@@ -163,6 +163,9 @@ async def get_run(run_id: str):
|
|
|
163
163
|
for step_name, ts in _heartbeat_timestamps[run_id].items():
|
|
164
164
|
if step_name in run.get("steps", {}):
|
|
165
165
|
run["steps"][step_name]["last_heartbeat"] = ts
|
|
166
|
+
for step_name, metrics in _step_metrics.get(run_id, {}).items():
|
|
167
|
+
if step_name in run.get("steps", {}):
|
|
168
|
+
run["steps"][step_name]["metrics"] = metrics
|
|
166
169
|
|
|
167
170
|
return run
|
|
168
171
|
|
|
@@ -296,11 +299,14 @@ async def get_cloud_status(run_id: str):
|
|
|
296
299
|
class HeartbeatRequest(BaseModel):
|
|
297
300
|
step_name: str
|
|
298
301
|
status: str = "running"
|
|
302
|
+
metrics: dict | None = None
|
|
299
303
|
|
|
300
304
|
|
|
301
|
-
# In-memory storage for heartbeat timestamps
|
|
305
|
+
# In-memory storage for heartbeat timestamps and metrics
|
|
302
306
|
# Format: {run_id: {step_name: last_heartbeat_timestamp}}
|
|
303
307
|
_heartbeat_timestamps: dict[str, dict[str, float]] = {}
|
|
308
|
+
# Format: {run_id: {step_name: metrics_dict}}
|
|
309
|
+
_step_metrics: dict[str, dict[str, dict]] = {}
|
|
304
310
|
_heartbeat_lock = __import__("threading").Lock()
|
|
305
311
|
|
|
306
312
|
# Heartbeat interval in seconds (should match executor's interval)
|
|
@@ -319,6 +325,14 @@ def _record_heartbeat(run_id: str, step_name: str) -> None:
|
|
|
319
325
|
_heartbeat_timestamps[run_id][step_name] = time.time()
|
|
320
326
|
|
|
321
327
|
|
|
328
|
+
def _record_step_metrics(run_id: str, step_name: str, metrics: dict) -> None:
|
|
329
|
+
"""Record metrics for a step."""
|
|
330
|
+
with _heartbeat_lock:
|
|
331
|
+
if run_id not in _step_metrics:
|
|
332
|
+
_step_metrics[run_id] = {}
|
|
333
|
+
_step_metrics[run_id][step_name] = metrics
|
|
334
|
+
|
|
335
|
+
|
|
322
336
|
def _get_dead_steps(run_id: str) -> list[str]:
|
|
323
337
|
"""Get list of steps that have missed too many heartbeats."""
|
|
324
338
|
import time
|
|
@@ -342,6 +356,7 @@ def _cleanup_heartbeats(run_id: str) -> None:
|
|
|
342
356
|
"""Remove heartbeat tracking for a completed run."""
|
|
343
357
|
with _heartbeat_lock:
|
|
344
358
|
_heartbeat_timestamps.pop(run_id, None)
|
|
359
|
+
_step_metrics.pop(run_id, None)
|
|
345
360
|
|
|
346
361
|
|
|
347
362
|
@router.post("/{run_id}/steps/{step_name}/heartbeat")
|
|
@@ -356,6 +371,10 @@ async def step_heartbeat(run_id: str, step_name: str, heartbeat: HeartbeatReques
|
|
|
356
371
|
# Record heartbeat timestamp
|
|
357
372
|
_record_heartbeat(run_id, step_name)
|
|
358
373
|
|
|
374
|
+
# Record metrics if present
|
|
375
|
+
if heartbeat.metrics:
|
|
376
|
+
_record_step_metrics(run_id, step_name, heartbeat.metrics)
|
|
377
|
+
|
|
359
378
|
# Check if run is marked for stopping
|
|
360
379
|
run = store.load_run(run_id)
|
|
361
380
|
if not run:
|
|
@@ -4,11 +4,16 @@ from flowyml.core.scheduler import PipelineScheduler
|
|
|
4
4
|
from flowyml.registry.pipeline_registry import pipeline_registry
|
|
5
5
|
|
|
6
6
|
router = APIRouter()
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
7
|
+
_scheduler = None
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def get_scheduler():
|
|
11
|
+
"""Get or initialize the scheduler singleton."""
|
|
12
|
+
global _scheduler
|
|
13
|
+
if _scheduler is None:
|
|
14
|
+
_scheduler = PipelineScheduler()
|
|
15
|
+
_scheduler.start()
|
|
16
|
+
return _scheduler
|
|
12
17
|
|
|
13
18
|
|
|
14
19
|
class ScheduleRequest(BaseModel):
|
|
@@ -32,7 +37,7 @@ async def list_schedules():
|
|
|
32
37
|
"""
|
|
33
38
|
# First, get schedules from the in-memory scheduler
|
|
34
39
|
memory_schedules = []
|
|
35
|
-
for s in
|
|
40
|
+
for s in get_scheduler().list_schedules():
|
|
36
41
|
memory_schedules.append(
|
|
37
42
|
{
|
|
38
43
|
"pipeline_name": s.pipeline_name,
|
|
@@ -48,8 +53,8 @@ async def list_schedules():
|
|
|
48
53
|
# Also read directly from the persistence database to get schedules
|
|
49
54
|
# created by other processes (e.g., user scripts)
|
|
50
55
|
db_schedules = []
|
|
51
|
-
if
|
|
52
|
-
db_schedules =
|
|
56
|
+
if get_scheduler()._persistence:
|
|
57
|
+
db_schedules = get_scheduler()._persistence.list_all_schedules()
|
|
53
58
|
|
|
54
59
|
# Merge: prefer memory schedules (more up-to-date), but include db-only ones
|
|
55
60
|
memory_names = {s["pipeline_name"] for s in memory_schedules}
|
|
@@ -65,7 +70,7 @@ async def list_schedules():
|
|
|
65
70
|
@router.get("/health")
|
|
66
71
|
async def get_scheduler_health():
|
|
67
72
|
"""Get scheduler health metrics."""
|
|
68
|
-
return
|
|
73
|
+
return get_scheduler().health_check()
|
|
69
74
|
|
|
70
75
|
|
|
71
76
|
@router.post("/")
|
|
@@ -133,7 +138,7 @@ async def create_schedule(schedule: ScheduleRequest):
|
|
|
133
138
|
# 2. Schedule it
|
|
134
139
|
try:
|
|
135
140
|
if schedule.schedule_type == "daily":
|
|
136
|
-
|
|
141
|
+
get_scheduler().schedule_daily(
|
|
137
142
|
name=schedule.name,
|
|
138
143
|
pipeline_func=pipeline_func,
|
|
139
144
|
hour=schedule.hour,
|
|
@@ -141,14 +146,14 @@ async def create_schedule(schedule: ScheduleRequest):
|
|
|
141
146
|
timezone=schedule.timezone,
|
|
142
147
|
)
|
|
143
148
|
elif schedule.schedule_type == "hourly":
|
|
144
|
-
|
|
149
|
+
get_scheduler().schedule_hourly(
|
|
145
150
|
name=schedule.name,
|
|
146
151
|
pipeline_func=pipeline_func,
|
|
147
152
|
minute=schedule.minute,
|
|
148
153
|
timezone=schedule.timezone,
|
|
149
154
|
)
|
|
150
155
|
elif schedule.schedule_type == "interval":
|
|
151
|
-
|
|
156
|
+
get_scheduler().schedule_interval(
|
|
152
157
|
name=schedule.name,
|
|
153
158
|
pipeline_func=pipeline_func,
|
|
154
159
|
seconds=schedule.interval_seconds,
|
|
@@ -157,7 +162,7 @@ async def create_schedule(schedule: ScheduleRequest):
|
|
|
157
162
|
elif schedule.schedule_type == "cron":
|
|
158
163
|
if not schedule.cron_expression:
|
|
159
164
|
raise HTTPException(status_code=400, detail="Cron expression required for cron schedule")
|
|
160
|
-
|
|
165
|
+
get_scheduler().schedule_cron(
|
|
161
166
|
name=schedule.name,
|
|
162
167
|
pipeline_func=pipeline_func,
|
|
163
168
|
cron_expression=schedule.cron_expression,
|
|
@@ -177,28 +182,28 @@ async def create_schedule(schedule: ScheduleRequest):
|
|
|
177
182
|
@router.delete("/{schedule_name}")
|
|
178
183
|
async def delete_schedule(schedule_name: str):
|
|
179
184
|
"""Remove a schedule."""
|
|
180
|
-
|
|
185
|
+
get_scheduler().unschedule(schedule_name)
|
|
181
186
|
return {"status": "success", "message": f"Schedule {schedule_name} removed"}
|
|
182
187
|
|
|
183
188
|
|
|
184
189
|
@router.post("/{schedule_name}/enable")
|
|
185
190
|
async def enable_schedule(schedule_name: str):
|
|
186
191
|
"""Enable a schedule."""
|
|
187
|
-
|
|
192
|
+
get_scheduler().enable(schedule_name)
|
|
188
193
|
return {"status": "success"}
|
|
189
194
|
|
|
190
195
|
|
|
191
196
|
@router.post("/{schedule_name}/disable")
|
|
192
197
|
async def disable_schedule(schedule_name: str):
|
|
193
198
|
"""Disable a schedule."""
|
|
194
|
-
|
|
199
|
+
get_scheduler().disable(schedule_name)
|
|
195
200
|
return {"status": "success"}
|
|
196
201
|
|
|
197
202
|
|
|
198
203
|
@router.get("/{schedule_name}/history")
|
|
199
204
|
async def get_schedule_history(schedule_name: str, limit: int = 50):
|
|
200
205
|
"""Get execution history for a schedule."""
|
|
201
|
-
return
|
|
206
|
+
return get_scheduler().get_history(schedule_name, limit)
|
|
202
207
|
|
|
203
208
|
|
|
204
209
|
@router.get("/registered-pipelines")
|