flowyml 1.7.1__py3-none-any.whl → 1.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. flowyml/assets/base.py +15 -0
  2. flowyml/assets/dataset.py +570 -17
  3. flowyml/assets/metrics.py +5 -0
  4. flowyml/assets/model.py +1052 -15
  5. flowyml/cli/main.py +709 -0
  6. flowyml/cli/stack_cli.py +138 -25
  7. flowyml/core/__init__.py +17 -0
  8. flowyml/core/executor.py +231 -37
  9. flowyml/core/image_builder.py +129 -0
  10. flowyml/core/log_streamer.py +227 -0
  11. flowyml/core/orchestrator.py +59 -4
  12. flowyml/core/pipeline.py +65 -13
  13. flowyml/core/routing.py +558 -0
  14. flowyml/core/scheduler.py +88 -5
  15. flowyml/core/step.py +9 -1
  16. flowyml/core/step_grouping.py +49 -35
  17. flowyml/core/types.py +407 -0
  18. flowyml/integrations/keras.py +247 -82
  19. flowyml/monitoring/alerts.py +10 -0
  20. flowyml/monitoring/notifications.py +104 -25
  21. flowyml/monitoring/slack_blocks.py +323 -0
  22. flowyml/plugins/__init__.py +251 -0
  23. flowyml/plugins/alerters/__init__.py +1 -0
  24. flowyml/plugins/alerters/slack.py +168 -0
  25. flowyml/plugins/base.py +752 -0
  26. flowyml/plugins/config.py +478 -0
  27. flowyml/plugins/deployers/__init__.py +22 -0
  28. flowyml/plugins/deployers/gcp_cloud_run.py +200 -0
  29. flowyml/plugins/deployers/sagemaker.py +306 -0
  30. flowyml/plugins/deployers/vertex.py +290 -0
  31. flowyml/plugins/integration.py +369 -0
  32. flowyml/plugins/manager.py +510 -0
  33. flowyml/plugins/model_registries/__init__.py +22 -0
  34. flowyml/plugins/model_registries/mlflow.py +159 -0
  35. flowyml/plugins/model_registries/sagemaker.py +489 -0
  36. flowyml/plugins/model_registries/vertex.py +386 -0
  37. flowyml/plugins/orchestrators/__init__.py +13 -0
  38. flowyml/plugins/orchestrators/sagemaker.py +443 -0
  39. flowyml/plugins/orchestrators/vertex_ai.py +461 -0
  40. flowyml/plugins/registries/__init__.py +13 -0
  41. flowyml/plugins/registries/ecr.py +321 -0
  42. flowyml/plugins/registries/gcr.py +313 -0
  43. flowyml/plugins/registry.py +454 -0
  44. flowyml/plugins/stack.py +494 -0
  45. flowyml/plugins/stack_config.py +537 -0
  46. flowyml/plugins/stores/__init__.py +13 -0
  47. flowyml/plugins/stores/gcs.py +460 -0
  48. flowyml/plugins/stores/s3.py +453 -0
  49. flowyml/plugins/trackers/__init__.py +11 -0
  50. flowyml/plugins/trackers/mlflow.py +316 -0
  51. flowyml/plugins/validators/__init__.py +3 -0
  52. flowyml/plugins/validators/deepchecks.py +119 -0
  53. flowyml/registry/__init__.py +2 -1
  54. flowyml/registry/model_environment.py +109 -0
  55. flowyml/registry/model_registry.py +241 -96
  56. flowyml/serving/__init__.py +17 -0
  57. flowyml/serving/model_server.py +628 -0
  58. flowyml/stacks/__init__.py +60 -0
  59. flowyml/stacks/aws.py +93 -0
  60. flowyml/stacks/base.py +62 -0
  61. flowyml/stacks/components.py +12 -0
  62. flowyml/stacks/gcp.py +44 -9
  63. flowyml/stacks/plugins.py +115 -0
  64. flowyml/stacks/registry.py +2 -1
  65. flowyml/storage/sql.py +401 -12
  66. flowyml/tracking/experiment.py +8 -5
  67. flowyml/ui/backend/Dockerfile +87 -16
  68. flowyml/ui/backend/auth.py +12 -2
  69. flowyml/ui/backend/main.py +149 -5
  70. flowyml/ui/backend/routers/ai_context.py +226 -0
  71. flowyml/ui/backend/routers/assets.py +23 -4
  72. flowyml/ui/backend/routers/auth.py +96 -0
  73. flowyml/ui/backend/routers/deployments.py +660 -0
  74. flowyml/ui/backend/routers/model_explorer.py +597 -0
  75. flowyml/ui/backend/routers/plugins.py +103 -51
  76. flowyml/ui/backend/routers/projects.py +91 -8
  77. flowyml/ui/backend/routers/runs.py +132 -1
  78. flowyml/ui/backend/routers/schedules.py +54 -29
  79. flowyml/ui/backend/routers/templates.py +319 -0
  80. flowyml/ui/backend/routers/websocket.py +2 -2
  81. flowyml/ui/frontend/Dockerfile +55 -6
  82. flowyml/ui/frontend/dist/assets/index-B5AsPTSz.css +1 -0
  83. flowyml/ui/frontend/dist/assets/index-dFbZ8wD8.js +753 -0
  84. flowyml/ui/frontend/dist/index.html +2 -2
  85. flowyml/ui/frontend/dist/logo.png +0 -0
  86. flowyml/ui/frontend/nginx.conf +65 -4
  87. flowyml/ui/frontend/package-lock.json +1415 -74
  88. flowyml/ui/frontend/package.json +4 -0
  89. flowyml/ui/frontend/public/logo.png +0 -0
  90. flowyml/ui/frontend/src/App.jsx +10 -7
  91. flowyml/ui/frontend/src/app/assets/page.jsx +890 -321
  92. flowyml/ui/frontend/src/app/auth/Login.jsx +90 -0
  93. flowyml/ui/frontend/src/app/dashboard/page.jsx +8 -8
  94. flowyml/ui/frontend/src/app/deployments/page.jsx +786 -0
  95. flowyml/ui/frontend/src/app/model-explorer/page.jsx +1031 -0
  96. flowyml/ui/frontend/src/app/pipelines/page.jsx +12 -2
  97. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectExperimentsList.jsx +19 -6
  98. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectMetricsPanel.jsx +1 -1
  99. flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +601 -101
  100. flowyml/ui/frontend/src/app/runs/page.jsx +8 -2
  101. flowyml/ui/frontend/src/app/settings/page.jsx +267 -253
  102. flowyml/ui/frontend/src/components/ArtifactViewer.jsx +62 -2
  103. flowyml/ui/frontend/src/components/AssetDetailsPanel.jsx +424 -29
  104. flowyml/ui/frontend/src/components/AssetTreeHierarchy.jsx +119 -11
  105. flowyml/ui/frontend/src/components/DatasetViewer.jsx +753 -0
  106. flowyml/ui/frontend/src/components/Layout.jsx +6 -0
  107. flowyml/ui/frontend/src/components/PipelineGraph.jsx +79 -29
  108. flowyml/ui/frontend/src/components/RunDetailsPanel.jsx +36 -6
  109. flowyml/ui/frontend/src/components/RunMetaPanel.jsx +113 -0
  110. flowyml/ui/frontend/src/components/TrainingHistoryChart.jsx +514 -0
  111. flowyml/ui/frontend/src/components/TrainingMetricsPanel.jsx +175 -0
  112. flowyml/ui/frontend/src/components/ai/AIAssistantButton.jsx +71 -0
  113. flowyml/ui/frontend/src/components/ai/AIAssistantPanel.jsx +420 -0
  114. flowyml/ui/frontend/src/components/header/Header.jsx +22 -0
  115. flowyml/ui/frontend/src/components/plugins/PluginManager.jsx +4 -4
  116. flowyml/ui/frontend/src/components/plugins/{ZenMLIntegration.jsx → StackImport.jsx} +38 -12
  117. flowyml/ui/frontend/src/components/sidebar/Sidebar.jsx +36 -13
  118. flowyml/ui/frontend/src/contexts/AIAssistantContext.jsx +245 -0
  119. flowyml/ui/frontend/src/contexts/AuthContext.jsx +108 -0
  120. flowyml/ui/frontend/src/hooks/useAIContext.js +156 -0
  121. flowyml/ui/frontend/src/hooks/useWebGPU.js +54 -0
  122. flowyml/ui/frontend/src/layouts/MainLayout.jsx +6 -0
  123. flowyml/ui/frontend/src/router/index.jsx +47 -20
  124. flowyml/ui/frontend/src/services/pluginService.js +3 -1
  125. flowyml/ui/server_manager.py +5 -5
  126. flowyml/ui/utils.py +157 -39
  127. flowyml/utils/config.py +37 -15
  128. flowyml/utils/model_introspection.py +123 -0
  129. flowyml/utils/observability.py +30 -0
  130. flowyml-1.8.0.dist-info/METADATA +174 -0
  131. {flowyml-1.7.1.dist-info → flowyml-1.8.0.dist-info}/RECORD +134 -73
  132. {flowyml-1.7.1.dist-info → flowyml-1.8.0.dist-info}/WHEEL +1 -1
  133. flowyml/ui/frontend/dist/assets/index-BqDQvp63.js +0 -630
  134. flowyml/ui/frontend/dist/assets/index-By4trVyv.css +0 -1
  135. flowyml-1.7.1.dist-info/METADATA +0 -477
  136. {flowyml-1.7.1.dist-info → flowyml-1.8.0.dist-info}/entry_points.txt +0 -0
  137. {flowyml-1.7.1.dist-info → flowyml-1.8.0.dist-info}/licenses/LICENSE +0 -0
@@ -34,7 +34,7 @@ class ImportStackRequest(BaseModel):
34
34
 
35
35
  @router.get("/available", response_model=list[PluginInfo])
36
36
  async def get_available_plugins():
37
- """Get list of available plugins."""
37
+ """Get list of available FlowyML plugins."""
38
38
  import importlib.metadata
39
39
 
40
40
  # Helper to check if package is installed
@@ -45,51 +45,95 @@ async def get_available_plugins():
45
45
  except importlib.metadata.PackageNotFoundError:
46
46
  return False
47
47
 
48
- # Mock data for now - in production this would query a plugin registry
48
+ # FlowyML Native Plugins
49
49
  plugins = [
50
50
  PluginInfo(
51
- plugin_id="zenml-kubernetes",
52
- name="zenml-kubernetes",
53
- version="0.45.0",
54
- author="ZenML",
55
- description="Kubernetes orchestrator integration from ZenML ecosystem.",
56
- downloads="12k",
57
- stars="450",
58
- tags=["orchestrator", "kubernetes", "zenml"],
59
- installed=is_installed("zenml-kubernetes"),
51
+ plugin_id="flowyml-gcp",
52
+ name="FlowyML GCP",
53
+ version="1.8.0",
54
+ author="FlowyML",
55
+ description="Google Cloud Platform integration: Vertex AI orchestrator, GCS artifact store, and Cloud Run deployer.",
56
+ downloads="5.2k",
57
+ stars="180",
58
+ tags=["orchestrator", "artifact-store", "gcp", "vertex-ai"],
59
+ installed=is_installed("google-cloud-aiplatform"),
60
60
  ),
61
61
  PluginInfo(
62
- plugin_id="zenml-mlflow",
63
- name="zenml-mlflow",
64
- version="0.45.0",
65
- author="ZenML",
66
- description="MLflow integration for experiment tracking and model deployment.",
62
+ plugin_id="flowyml-aws",
63
+ name="FlowyML AWS",
64
+ version="1.8.0",
65
+ author="FlowyML",
66
+ description="AWS integration: SageMaker orchestrator, S3 artifact store, and ECR container registry.",
67
+ downloads="4.8k",
68
+ stars="165",
69
+ tags=["orchestrator", "artifact-store", "aws", "sagemaker"],
70
+ installed=is_installed("boto3"),
71
+ ),
72
+ PluginInfo(
73
+ plugin_id="flowyml-kubernetes",
74
+ name="FlowyML Kubernetes",
75
+ version="1.8.0",
76
+ author="FlowyML",
77
+ description="Kubernetes orchestrator for running pipelines on K8s clusters with auto-scaling.",
78
+ downloads="3.5k",
79
+ stars="145",
80
+ tags=["orchestrator", "kubernetes", "container"],
81
+ installed=is_installed("kubernetes"),
82
+ ),
83
+ PluginInfo(
84
+ plugin_id="flowyml-mlflow",
85
+ name="FlowyML MLflow",
86
+ version="1.8.0",
87
+ author="FlowyML",
88
+ description="MLflow integration for experiment tracking, model registry, and deployment.",
89
+ downloads="6.1k",
90
+ stars="220",
91
+ tags=["tracking", "model-registry", "mlflow"],
92
+ installed=is_installed("mlflow"),
93
+ ),
94
+ PluginInfo(
95
+ plugin_id="flowyml-wandb",
96
+ name="FlowyML Weights & Biases",
97
+ version="1.8.0",
98
+ author="FlowyML",
99
+ description="W&B integration for experiment tracking, artifact versioning, and collaboration.",
100
+ downloads="4.2k",
101
+ stars="195",
102
+ tags=["tracking", "wandb", "experiment"],
103
+ installed=is_installed("wandb"),
104
+ ),
105
+ PluginInfo(
106
+ plugin_id="flowyml-pytorch",
107
+ name="FlowyML PyTorch",
108
+ version="1.8.0",
109
+ author="FlowyML",
110
+ description="PyTorch integration with automatic model serialization and distributed training support.",
67
111
  downloads="8.5k",
68
- stars="320",
69
- tags=["tracking", "mlflow", "zenml"],
70
- installed=is_installed("zenml-mlflow"),
112
+ stars="310",
113
+ tags=["framework", "pytorch", "deep-learning"],
114
+ installed=is_installed("torch"),
71
115
  ),
72
116
  PluginInfo(
73
- plugin_id="airflow-providers-google",
74
- name="airflow-providers-google",
75
- version="10.1.0",
76
- author="Apache Airflow",
77
- description="Google Cloud Platform providers for Airflow.",
78
- downloads="50k",
79
- stars="1.2k",
80
- tags=["orchestrator", "gcp", "airflow"],
81
- installed=is_installed("airflow-providers-google"),
117
+ plugin_id="flowyml-tensorflow",
118
+ name="FlowyML TensorFlow",
119
+ version="1.8.0",
120
+ author="FlowyML",
121
+ description="TensorFlow/Keras integration with automatic callbacks and model tracking.",
122
+ downloads="7.8k",
123
+ stars="290",
124
+ tags=["framework", "tensorflow", "keras"],
125
+ installed=is_installed("tensorflow"),
82
126
  ),
83
127
  PluginInfo(
84
- plugin_id="aws-s3",
85
- name="aws-s3",
86
- version="1.0.0",
87
- author="AWS",
88
- description="S3 artifact store integration.",
89
- downloads="15k",
90
- stars="200",
91
- tags=["artifact-store", "aws"],
92
- installed=is_installed("aws-s3"),
128
+ plugin_id="flowyml-sklearn",
129
+ name="FlowyML Scikit-Learn",
130
+ version="1.8.0",
131
+ author="FlowyML",
132
+ description="Scikit-learn integration with automatic model serialization and metrics extraction.",
133
+ downloads="9.2k",
134
+ stars="340",
135
+ tags=["framework", "sklearn", "ml"],
136
+ installed=is_installed("scikit-learn"),
93
137
  ),
94
138
  ]
95
139
 
@@ -98,33 +142,41 @@ async def get_available_plugins():
98
142
 
99
143
  @router.get("/installed", response_model=list[dict[str, Any]])
100
144
  async def get_installed_plugins():
101
- """Get list of installed plugins."""
145
+ """Get list of installed FlowyML plugins and integrations."""
102
146
  import importlib.metadata
103
147
 
104
148
  # Get all installed packages that could be plugins
105
149
  installed = []
106
150
 
107
- # List of known plugin packages (you can expand this)
151
+ # FlowyML-related plugin packages
108
152
  potential_plugins = [
109
- "zenml",
110
- "zenml-kubernetes",
111
- "zenml-mlflow",
112
- "zenml-s3",
113
- "airflow",
114
- "airflow-providers-google",
115
- "airflow-providers-aws",
116
- "aws-s3",
117
- "boto3",
118
- "kubernetes",
153
+ # Cloud providers
154
+ ("google-cloud-aiplatform", "FlowyML GCP"),
155
+ ("google-cloud-storage", "GCS Storage"),
156
+ ("boto3", "FlowyML AWS"),
157
+ ("sagemaker", "AWS SageMaker"),
158
+ # Orchestrators
159
+ ("kubernetes", "FlowyML Kubernetes"),
160
+ ("kfp", "Kubeflow Pipelines"),
161
+ # Tracking & Registry
162
+ ("mlflow", "FlowyML MLflow"),
163
+ ("wandb", "FlowyML W&B"),
164
+ # ML Frameworks
165
+ ("torch", "FlowyML PyTorch"),
166
+ ("tensorflow", "FlowyML TensorFlow"),
167
+ ("keras", "FlowyML Keras"),
168
+ ("scikit-learn", "FlowyML Scikit-Learn"),
169
+ # Core
170
+ ("flowyml", "FlowyML Core"),
119
171
  ]
120
172
 
121
- for package_name in potential_plugins:
173
+ for package_name, display_name in potential_plugins:
122
174
  try:
123
175
  dist = importlib.metadata.distribution(package_name)
124
176
  installed.append(
125
177
  {
126
178
  "id": package_name,
127
- "name": package_name,
179
+ "name": display_name,
128
180
  "version": dist.version,
129
181
  "description": dist.metadata.get("Summary", ""),
130
182
  "status": "active",
@@ -14,10 +14,42 @@ def get_projects_manager() -> ProjectManager:
14
14
 
15
15
  @router.get("/")
16
16
  async def list_projects(manager: ProjectManager = Depends(get_projects_manager)):
17
- """List all projects."""
17
+ """List all projects, including those discovered from run metadata."""
18
18
  try:
19
- projects = manager.list_projects()
20
- return {"projects": projects}
19
+ # Get explicitly created projects
20
+ explicit_projects = manager.list_projects()
21
+ project_names = {p.get("name") for p in explicit_projects if p.get("name")}
22
+
23
+ # Also discover projects from run metadata in global store
24
+ from flowyml.ui.backend.dependencies import get_store
25
+
26
+ store = get_store()
27
+
28
+ discovered_projects = []
29
+ try:
30
+ # Get all runs and extract unique project names
31
+ runs = store.list_runs(limit=1000)
32
+ for run in runs:
33
+ project_name = run.get("project")
34
+ if project_name and project_name not in project_names:
35
+ project_names.add(project_name)
36
+ # Create a synthetic project entry for discovered projects
37
+ discovered_projects.append(
38
+ {
39
+ "name": project_name,
40
+ "description": "Auto-discovered from pipeline runs",
41
+ "created_at": run.get("start_time"),
42
+ "pipelines": [],
43
+ "tags": {},
44
+ "discovered": True, # Flag to indicate this wasn't explicitly created
45
+ },
46
+ )
47
+ except Exception:
48
+ pass # Store might not be initialized
49
+
50
+ # Combine explicit and discovered projects
51
+ all_projects = explicit_projects + discovered_projects
52
+ return {"projects": all_projects}
21
53
  except Exception as e:
22
54
  raise HTTPException(status_code=500, detail=str(e))
23
55
 
@@ -93,14 +125,65 @@ async def get_project_metrics(
93
125
  limit: int = 100,
94
126
  manager: ProjectManager = Depends(get_projects_manager),
95
127
  ):
96
- """Get logged production metrics for a project."""
97
- project = manager.get_project(project_name)
98
- if not project:
99
- raise HTTPException(status_code=404, detail="Project not found")
128
+ """Get logged metrics for a project (from model_metrics table and Metrics artifacts)."""
129
+ metrics = []
130
+
131
+ from flowyml.ui.backend.dependencies import get_store
132
+
133
+ store = get_store()
134
+
135
+ try:
136
+ # Get all runs for this project
137
+ all_runs = store.list_runs(limit=1000)
138
+ project_run_ids = {r.get("run_id") for r in all_runs if r.get("project") == project_name}
139
+
140
+ # 1. Try to get metrics from model_metrics table
141
+ all_model_metrics = store.list_model_metrics(limit=limit * 2)
142
+ for m in all_model_metrics:
143
+ if m.get("run_id") in project_run_ids or m.get("project") == project_name:
144
+ metrics.append(m)
145
+
146
+ # 2. Also extract metrics from Metrics artifacts
147
+ all_assets = store.list_assets(limit=500)
148
+ for asset in all_assets:
149
+ # Check if it's a metrics artifact for this project (case-insensitive type check)
150
+ asset_type = str(asset.get("type", "")).lower()
151
+ if asset_type == "metrics" and asset.get("run_id") in project_run_ids:
152
+ # Get properties which contain the metric values
153
+ props = asset.get("properties", {})
154
+ created_at = asset.get("created_at", "")
155
+ run_id = asset.get("run_id", "")
156
+ asset_name = asset.get("name", "evaluation")
157
+
158
+ # Convert artifact properties to metric entries
159
+ for key, value in props.items():
160
+ if isinstance(value, (int, float)) and key not in ["samples"]:
161
+ metrics.append(
162
+ {
163
+ "project": project_name,
164
+ "model_name": asset_name,
165
+ "run_id": run_id,
166
+ "metric_name": key,
167
+ "metric_value": value,
168
+ "environment": "evaluation",
169
+ "tags": {"source": "artifact"},
170
+ "created_at": created_at,
171
+ },
172
+ )
173
+ except Exception as e:
174
+ import logging
175
+
176
+ logging.getLogger(__name__).warning(f"Error fetching metrics: {e}")
177
+
178
+ # Try explicit project as fallback
179
+ if not metrics:
180
+ project = manager.get_project(project_name)
181
+ if project:
182
+ metrics = project.list_model_metrics(model_name=model_name, limit=limit)
100
183
 
101
184
  return {
102
185
  "project": project_name,
103
- "metrics": project.list_model_metrics(model_name=model_name, limit=limit),
186
+ "metrics": metrics[:limit],
104
187
  }
105
188
 
106
189
 
@@ -163,6 +163,9 @@ async def get_run(run_id: str):
163
163
  for step_name, ts in _heartbeat_timestamps[run_id].items():
164
164
  if step_name in run.get("steps", {}):
165
165
  run["steps"][step_name]["last_heartbeat"] = ts
166
+ for step_name, metrics in _step_metrics.get(run_id, {}).items():
167
+ if step_name in run.get("steps", {}):
168
+ run["steps"][step_name]["metrics"] = metrics
166
169
 
167
170
  return run
168
171
 
@@ -296,11 +299,14 @@ async def get_cloud_status(run_id: str):
296
299
  class HeartbeatRequest(BaseModel):
297
300
  step_name: str
298
301
  status: str = "running"
302
+ metrics: dict | None = None
299
303
 
300
304
 
301
- # In-memory storage for heartbeat timestamps
305
+ # In-memory storage for heartbeat timestamps and metrics
302
306
  # Format: {run_id: {step_name: last_heartbeat_timestamp}}
303
307
  _heartbeat_timestamps: dict[str, dict[str, float]] = {}
308
+ # Format: {run_id: {step_name: metrics_dict}}
309
+ _step_metrics: dict[str, dict[str, dict]] = {}
304
310
  _heartbeat_lock = __import__("threading").Lock()
305
311
 
306
312
  # Heartbeat interval in seconds (should match executor's interval)
@@ -319,6 +325,14 @@ def _record_heartbeat(run_id: str, step_name: str) -> None:
319
325
  _heartbeat_timestamps[run_id][step_name] = time.time()
320
326
 
321
327
 
328
+ def _record_step_metrics(run_id: str, step_name: str, metrics: dict) -> None:
329
+ """Record metrics for a step."""
330
+ with _heartbeat_lock:
331
+ if run_id not in _step_metrics:
332
+ _step_metrics[run_id] = {}
333
+ _step_metrics[run_id][step_name] = metrics
334
+
335
+
322
336
  def _get_dead_steps(run_id: str) -> list[str]:
323
337
  """Get list of steps that have missed too many heartbeats."""
324
338
  import time
@@ -342,6 +356,7 @@ def _cleanup_heartbeats(run_id: str) -> None:
342
356
  """Remove heartbeat tracking for a completed run."""
343
357
  with _heartbeat_lock:
344
358
  _heartbeat_timestamps.pop(run_id, None)
359
+ _step_metrics.pop(run_id, None)
345
360
 
346
361
 
347
362
  @router.post("/{run_id}/steps/{step_name}/heartbeat")
@@ -356,6 +371,10 @@ async def step_heartbeat(run_id: str, step_name: str, heartbeat: HeartbeatReques
356
371
  # Record heartbeat timestamp
357
372
  _record_heartbeat(run_id, step_name)
358
373
 
374
+ # Record metrics if present
375
+ if heartbeat.metrics:
376
+ _record_step_metrics(run_id, step_name, heartbeat.metrics)
377
+
359
378
  # Check if run is marked for stopping
360
379
  run = store.load_run(run_id)
361
380
  if not run:
@@ -484,3 +503,115 @@ async def get_run_logs(run_id: str):
484
503
  logs = await anyio.to_thread.run_sync(read_all_logs)
485
504
 
486
505
  return {"logs": logs}
506
+
507
+
508
+ @router.get("/{run_id}/training-history")
509
+ async def get_training_history(run_id: str):
510
+ """Get training history (per-epoch metrics) for a run.
511
+
512
+ This combines:
513
+ 1. Training history from model artifacts (saved by FlowymlKerasCallback)
514
+ 2. Per-epoch metrics saved in the metrics table
515
+
516
+ Returns a consolidated training history suitable for visualization.
517
+ """
518
+ store = _find_store_for_run(run_id)
519
+
520
+ # Get per-epoch metrics from the metrics table
521
+ metrics = store.get_metrics(run_id)
522
+
523
+ # Build training history from metrics table
524
+ # Group metrics by step (epoch) and name
525
+ epoch_metrics = {}
526
+ for m in metrics:
527
+ step = m.get("step", 0)
528
+ name = m.get("name", "unknown")
529
+ value = m.get("value", 0)
530
+
531
+ if step not in epoch_metrics:
532
+ epoch_metrics[step] = {}
533
+ epoch_metrics[step][name] = value
534
+
535
+ # Convert to chart-friendly format
536
+ training_history_from_metrics = {
537
+ "epochs": [],
538
+ "train_loss": [],
539
+ "val_loss": [],
540
+ "train_accuracy": [],
541
+ "val_accuracy": [],
542
+ "mae": [],
543
+ "val_mae": [],
544
+ }
545
+
546
+ # Standard metric name mappings
547
+ metric_mappings = {
548
+ "loss": "train_loss",
549
+ "val_loss": "val_loss",
550
+ "accuracy": "train_accuracy",
551
+ "acc": "train_accuracy",
552
+ "val_accuracy": "val_accuracy",
553
+ "val_acc": "val_accuracy",
554
+ "mae": "mae",
555
+ "val_mae": "val_mae",
556
+ }
557
+
558
+ # Track custom metrics
559
+ custom_metrics = set()
560
+
561
+ if epoch_metrics:
562
+ sorted_epochs = sorted(epoch_metrics.keys())
563
+ for epoch in sorted_epochs:
564
+ training_history_from_metrics["epochs"].append(epoch + 1) # 1-indexed for display
565
+
566
+ epoch_data = epoch_metrics[epoch]
567
+ for metric_name, value in epoch_data.items():
568
+ # Map to standard name or track as custom
569
+ standard_name = metric_mappings.get(metric_name)
570
+ if standard_name:
571
+ training_history_from_metrics[standard_name].append(value)
572
+ else:
573
+ # Custom metric
574
+ if metric_name not in custom_metrics:
575
+ custom_metrics.add(metric_name)
576
+ training_history_from_metrics[metric_name] = []
577
+ training_history_from_metrics[metric_name].append(value)
578
+
579
+ # Also try to get training history from model artifacts
580
+ artifacts = store.list_assets(run_id=run_id)
581
+ artifact_history = None
582
+
583
+ for artifact in artifacts:
584
+ # Check if artifact has training_history
585
+ if artifact.get("training_history"):
586
+ artifact_history = artifact.get("training_history")
587
+ break
588
+ # Also check in metadata/properties
589
+ metadata = artifact.get("metadata", {})
590
+ if isinstance(metadata, str):
591
+ try:
592
+ metadata = json.loads(metadata)
593
+ except Exception:
594
+ metadata = {}
595
+ if metadata.get("training_history"):
596
+ artifact_history = metadata.get("training_history")
597
+ break
598
+
599
+ # Prefer artifact history if it has more data, otherwise use metrics
600
+ if artifact_history and len(artifact_history.get("epochs", [])) > len(
601
+ training_history_from_metrics.get("epochs", []),
602
+ ):
603
+ final_history = artifact_history
604
+ elif training_history_from_metrics.get("epochs"):
605
+ final_history = training_history_from_metrics
606
+ else:
607
+ final_history = artifact_history or {}
608
+
609
+ # Clean up empty arrays
610
+ cleaned_history = {k: v for k, v in final_history.items() if v and (not isinstance(v, list) or len(v) > 0)}
611
+
612
+ return {
613
+ "training_history": cleaned_history,
614
+ "has_history": len(cleaned_history.get("epochs", [])) > 0,
615
+ "total_epochs": len(cleaned_history.get("epochs", [])),
616
+ "source": "artifact" if artifact_history else "metrics",
617
+ }
@@ -4,11 +4,16 @@ from flowyml.core.scheduler import PipelineScheduler
4
4
  from flowyml.registry.pipeline_registry import pipeline_registry
5
5
 
6
6
  router = APIRouter()
7
- # Note: In a real app, the scheduler instance should be a singleton managed by the app state
8
- # For now, we instantiate it here, but it might not persist state across reloads if not handled carefully.
9
- # Ideally, the scheduler is started when the backend starts.
10
- scheduler = PipelineScheduler()
11
- scheduler.start() # Start the scheduler thread
7
+ _scheduler = None
8
+
9
+
10
+ def get_scheduler():
11
+ """Get or initialize the scheduler singleton."""
12
+ global _scheduler
13
+ if _scheduler is None:
14
+ _scheduler = PipelineScheduler()
15
+ _scheduler.start()
16
+ return _scheduler
12
17
 
13
18
 
14
19
  class ScheduleRequest(BaseModel):
@@ -25,27 +30,47 @@ class ScheduleRequest(BaseModel):
25
30
 
26
31
  @router.get("/")
27
32
  async def list_schedules():
28
- """List all active schedules."""
29
- # Convert Schedule objects to dicts for JSON serialization
30
- schedules = scheduler.list_schedules()
31
- return [
32
- {
33
- "pipeline_name": s.pipeline_name,
34
- "schedule_type": s.schedule_type,
35
- "schedule_value": s.schedule_value,
36
- "enabled": s.enabled,
37
- "last_run": s.last_run,
38
- "next_run": s.next_run,
39
- "timezone": s.timezone,
40
- }
41
- for s in schedules
42
- ]
33
+ """List all active schedules.
34
+
35
+ This reads schedules from the shared database, so schedules created
36
+ by user code (e.g., in scripts) are visible in the UI.
37
+ """
38
+ # First, get schedules from the in-memory scheduler
39
+ memory_schedules = []
40
+ for s in get_scheduler().list_schedules():
41
+ memory_schedules.append(
42
+ {
43
+ "pipeline_name": s.pipeline_name,
44
+ "schedule_type": s.schedule_type,
45
+ "schedule_value": s.schedule_value,
46
+ "enabled": s.enabled,
47
+ "last_run": s.last_run.isoformat() if s.last_run else None,
48
+ "next_run": s.next_run.isoformat() if s.next_run else None,
49
+ "timezone": s.timezone,
50
+ },
51
+ )
52
+
53
+ # Also read directly from the persistence database to get schedules
54
+ # created by other processes (e.g., user scripts)
55
+ db_schedules = []
56
+ if get_scheduler()._persistence:
57
+ db_schedules = get_scheduler()._persistence.list_all_schedules()
58
+
59
+ # Merge: prefer memory schedules (more up-to-date), but include db-only ones
60
+ memory_names = {s["pipeline_name"] for s in memory_schedules}
61
+ result = list(memory_schedules)
62
+
63
+ for db_sched in db_schedules:
64
+ if db_sched.get("pipeline_name") not in memory_names:
65
+ result.append(db_sched)
66
+
67
+ return result
43
68
 
44
69
 
45
70
  @router.get("/health")
46
71
  async def get_scheduler_health():
47
72
  """Get scheduler health metrics."""
48
- return scheduler.health_check()
73
+ return get_scheduler().health_check()
49
74
 
50
75
 
51
76
  @router.post("/")
@@ -113,7 +138,7 @@ async def create_schedule(schedule: ScheduleRequest):
113
138
  # 2. Schedule it
114
139
  try:
115
140
  if schedule.schedule_type == "daily":
116
- scheduler.schedule_daily(
141
+ get_scheduler().schedule_daily(
117
142
  name=schedule.name,
118
143
  pipeline_func=pipeline_func,
119
144
  hour=schedule.hour,
@@ -121,14 +146,14 @@ async def create_schedule(schedule: ScheduleRequest):
121
146
  timezone=schedule.timezone,
122
147
  )
123
148
  elif schedule.schedule_type == "hourly":
124
- scheduler.schedule_hourly(
149
+ get_scheduler().schedule_hourly(
125
150
  name=schedule.name,
126
151
  pipeline_func=pipeline_func,
127
152
  minute=schedule.minute,
128
153
  timezone=schedule.timezone,
129
154
  )
130
155
  elif schedule.schedule_type == "interval":
131
- scheduler.schedule_interval(
156
+ get_scheduler().schedule_interval(
132
157
  name=schedule.name,
133
158
  pipeline_func=pipeline_func,
134
159
  seconds=schedule.interval_seconds,
@@ -137,7 +162,7 @@ async def create_schedule(schedule: ScheduleRequest):
137
162
  elif schedule.schedule_type == "cron":
138
163
  if not schedule.cron_expression:
139
164
  raise HTTPException(status_code=400, detail="Cron expression required for cron schedule")
140
- scheduler.schedule_cron(
165
+ get_scheduler().schedule_cron(
141
166
  name=schedule.name,
142
167
  pipeline_func=pipeline_func,
143
168
  cron_expression=schedule.cron_expression,
@@ -157,28 +182,28 @@ async def create_schedule(schedule: ScheduleRequest):
157
182
  @router.delete("/{schedule_name}")
158
183
  async def delete_schedule(schedule_name: str):
159
184
  """Remove a schedule."""
160
- scheduler.unschedule(schedule_name)
185
+ get_scheduler().unschedule(schedule_name)
161
186
  return {"status": "success", "message": f"Schedule {schedule_name} removed"}
162
187
 
163
188
 
164
189
  @router.post("/{schedule_name}/enable")
165
190
  async def enable_schedule(schedule_name: str):
166
191
  """Enable a schedule."""
167
- scheduler.enable(schedule_name)
192
+ get_scheduler().enable(schedule_name)
168
193
  return {"status": "success"}
169
194
 
170
195
 
171
196
  @router.post("/{schedule_name}/disable")
172
197
  async def disable_schedule(schedule_name: str):
173
198
  """Disable a schedule."""
174
- scheduler.disable(schedule_name)
199
+ get_scheduler().disable(schedule_name)
175
200
  return {"status": "success"}
176
201
 
177
202
 
178
203
  @router.get("/{schedule_name}/history")
179
204
  async def get_schedule_history(schedule_name: str, limit: int = 50):
180
205
  """Get execution history for a schedule."""
181
- return scheduler.get_history(schedule_name, limit)
206
+ return get_scheduler().get_history(schedule_name, limit)
182
207
 
183
208
 
184
209
  @router.get("/registered-pipelines")