flowyml 1.7.1__py3-none-any.whl → 1.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowyml/assets/base.py +15 -0
- flowyml/assets/dataset.py +570 -17
- flowyml/assets/metrics.py +5 -0
- flowyml/assets/model.py +1052 -15
- flowyml/cli/main.py +709 -0
- flowyml/cli/stack_cli.py +138 -25
- flowyml/core/__init__.py +17 -0
- flowyml/core/executor.py +231 -37
- flowyml/core/image_builder.py +129 -0
- flowyml/core/log_streamer.py +227 -0
- flowyml/core/orchestrator.py +59 -4
- flowyml/core/pipeline.py +65 -13
- flowyml/core/routing.py +558 -0
- flowyml/core/scheduler.py +88 -5
- flowyml/core/step.py +9 -1
- flowyml/core/step_grouping.py +49 -35
- flowyml/core/types.py +407 -0
- flowyml/integrations/keras.py +247 -82
- flowyml/monitoring/alerts.py +10 -0
- flowyml/monitoring/notifications.py +104 -25
- flowyml/monitoring/slack_blocks.py +323 -0
- flowyml/plugins/__init__.py +251 -0
- flowyml/plugins/alerters/__init__.py +1 -0
- flowyml/plugins/alerters/slack.py +168 -0
- flowyml/plugins/base.py +752 -0
- flowyml/plugins/config.py +478 -0
- flowyml/plugins/deployers/__init__.py +22 -0
- flowyml/plugins/deployers/gcp_cloud_run.py +200 -0
- flowyml/plugins/deployers/sagemaker.py +306 -0
- flowyml/plugins/deployers/vertex.py +290 -0
- flowyml/plugins/integration.py +369 -0
- flowyml/plugins/manager.py +510 -0
- flowyml/plugins/model_registries/__init__.py +22 -0
- flowyml/plugins/model_registries/mlflow.py +159 -0
- flowyml/plugins/model_registries/sagemaker.py +489 -0
- flowyml/plugins/model_registries/vertex.py +386 -0
- flowyml/plugins/orchestrators/__init__.py +13 -0
- flowyml/plugins/orchestrators/sagemaker.py +443 -0
- flowyml/plugins/orchestrators/vertex_ai.py +461 -0
- flowyml/plugins/registries/__init__.py +13 -0
- flowyml/plugins/registries/ecr.py +321 -0
- flowyml/plugins/registries/gcr.py +313 -0
- flowyml/plugins/registry.py +454 -0
- flowyml/plugins/stack.py +494 -0
- flowyml/plugins/stack_config.py +537 -0
- flowyml/plugins/stores/__init__.py +13 -0
- flowyml/plugins/stores/gcs.py +460 -0
- flowyml/plugins/stores/s3.py +453 -0
- flowyml/plugins/trackers/__init__.py +11 -0
- flowyml/plugins/trackers/mlflow.py +316 -0
- flowyml/plugins/validators/__init__.py +3 -0
- flowyml/plugins/validators/deepchecks.py +119 -0
- flowyml/registry/__init__.py +2 -1
- flowyml/registry/model_environment.py +109 -0
- flowyml/registry/model_registry.py +241 -96
- flowyml/serving/__init__.py +17 -0
- flowyml/serving/model_server.py +628 -0
- flowyml/stacks/__init__.py +60 -0
- flowyml/stacks/aws.py +93 -0
- flowyml/stacks/base.py +62 -0
- flowyml/stacks/components.py +12 -0
- flowyml/stacks/gcp.py +44 -9
- flowyml/stacks/plugins.py +115 -0
- flowyml/stacks/registry.py +2 -1
- flowyml/storage/sql.py +401 -12
- flowyml/tracking/experiment.py +8 -5
- flowyml/ui/backend/Dockerfile +87 -16
- flowyml/ui/backend/auth.py +12 -2
- flowyml/ui/backend/main.py +149 -5
- flowyml/ui/backend/routers/ai_context.py +226 -0
- flowyml/ui/backend/routers/assets.py +23 -4
- flowyml/ui/backend/routers/auth.py +96 -0
- flowyml/ui/backend/routers/deployments.py +660 -0
- flowyml/ui/backend/routers/model_explorer.py +597 -0
- flowyml/ui/backend/routers/plugins.py +103 -51
- flowyml/ui/backend/routers/projects.py +91 -8
- flowyml/ui/backend/routers/runs.py +132 -1
- flowyml/ui/backend/routers/schedules.py +54 -29
- flowyml/ui/backend/routers/templates.py +319 -0
- flowyml/ui/backend/routers/websocket.py +2 -2
- flowyml/ui/frontend/Dockerfile +55 -6
- flowyml/ui/frontend/dist/assets/index-B5AsPTSz.css +1 -0
- flowyml/ui/frontend/dist/assets/index-dFbZ8wD8.js +753 -0
- flowyml/ui/frontend/dist/index.html +2 -2
- flowyml/ui/frontend/dist/logo.png +0 -0
- flowyml/ui/frontend/nginx.conf +65 -4
- flowyml/ui/frontend/package-lock.json +1415 -74
- flowyml/ui/frontend/package.json +4 -0
- flowyml/ui/frontend/public/logo.png +0 -0
- flowyml/ui/frontend/src/App.jsx +10 -7
- flowyml/ui/frontend/src/app/assets/page.jsx +890 -321
- flowyml/ui/frontend/src/app/auth/Login.jsx +90 -0
- flowyml/ui/frontend/src/app/dashboard/page.jsx +8 -8
- flowyml/ui/frontend/src/app/deployments/page.jsx +786 -0
- flowyml/ui/frontend/src/app/model-explorer/page.jsx +1031 -0
- flowyml/ui/frontend/src/app/pipelines/page.jsx +12 -2
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectExperimentsList.jsx +19 -6
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectMetricsPanel.jsx +1 -1
- flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +601 -101
- flowyml/ui/frontend/src/app/runs/page.jsx +8 -2
- flowyml/ui/frontend/src/app/settings/page.jsx +267 -253
- flowyml/ui/frontend/src/components/ArtifactViewer.jsx +62 -2
- flowyml/ui/frontend/src/components/AssetDetailsPanel.jsx +424 -29
- flowyml/ui/frontend/src/components/AssetTreeHierarchy.jsx +119 -11
- flowyml/ui/frontend/src/components/DatasetViewer.jsx +753 -0
- flowyml/ui/frontend/src/components/Layout.jsx +6 -0
- flowyml/ui/frontend/src/components/PipelineGraph.jsx +79 -29
- flowyml/ui/frontend/src/components/RunDetailsPanel.jsx +36 -6
- flowyml/ui/frontend/src/components/RunMetaPanel.jsx +113 -0
- flowyml/ui/frontend/src/components/TrainingHistoryChart.jsx +514 -0
- flowyml/ui/frontend/src/components/TrainingMetricsPanel.jsx +175 -0
- flowyml/ui/frontend/src/components/ai/AIAssistantButton.jsx +71 -0
- flowyml/ui/frontend/src/components/ai/AIAssistantPanel.jsx +420 -0
- flowyml/ui/frontend/src/components/header/Header.jsx +22 -0
- flowyml/ui/frontend/src/components/plugins/PluginManager.jsx +4 -4
- flowyml/ui/frontend/src/components/plugins/{ZenMLIntegration.jsx → StackImport.jsx} +38 -12
- flowyml/ui/frontend/src/components/sidebar/Sidebar.jsx +36 -13
- flowyml/ui/frontend/src/contexts/AIAssistantContext.jsx +245 -0
- flowyml/ui/frontend/src/contexts/AuthContext.jsx +108 -0
- flowyml/ui/frontend/src/hooks/useAIContext.js +156 -0
- flowyml/ui/frontend/src/hooks/useWebGPU.js +54 -0
- flowyml/ui/frontend/src/layouts/MainLayout.jsx +6 -0
- flowyml/ui/frontend/src/router/index.jsx +47 -20
- flowyml/ui/frontend/src/services/pluginService.js +3 -1
- flowyml/ui/server_manager.py +5 -5
- flowyml/ui/utils.py +157 -39
- flowyml/utils/config.py +37 -15
- flowyml/utils/model_introspection.py +123 -0
- flowyml/utils/observability.py +30 -0
- flowyml-1.8.0.dist-info/METADATA +174 -0
- {flowyml-1.7.1.dist-info → flowyml-1.8.0.dist-info}/RECORD +134 -73
- {flowyml-1.7.1.dist-info → flowyml-1.8.0.dist-info}/WHEEL +1 -1
- flowyml/ui/frontend/dist/assets/index-BqDQvp63.js +0 -630
- flowyml/ui/frontend/dist/assets/index-By4trVyv.css +0 -1
- flowyml-1.7.1.dist-info/METADATA +0 -477
- {flowyml-1.7.1.dist-info → flowyml-1.8.0.dist-info}/entry_points.txt +0 -0
- {flowyml-1.7.1.dist-info → flowyml-1.8.0.dist-info}/licenses/LICENSE +0 -0
flowyml/integrations/keras.py
CHANGED
|
@@ -1,4 +1,9 @@
|
|
|
1
|
-
"""Keras integration for flowyml.
|
|
1
|
+
"""Keras integration for flowyml.
|
|
2
|
+
|
|
3
|
+
This module provides seamless integration between Keras and FlowyML,
|
|
4
|
+
enabling automatic tracking of training metrics, model artifacts, and
|
|
5
|
+
interactive visualization in the FlowyML dashboard.
|
|
6
|
+
"""
|
|
2
7
|
|
|
3
8
|
from pathlib import Path
|
|
4
9
|
from datetime import datetime
|
|
@@ -19,17 +24,42 @@ from flowyml.storage.metadata import SQLiteMetadataStore
|
|
|
19
24
|
class FlowymlKerasCallback(keras.callbacks.Callback if keras else object):
|
|
20
25
|
"""Keras callback for flowyml tracking with automatic training history collection.
|
|
21
26
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
-
|
|
27
|
+
This callback integrates Keras training with FlowyML's tracking and visualization
|
|
28
|
+
system. It **automatically and dynamically** captures ALL metrics that Keras logs
|
|
29
|
+
during training - no configuration needed!
|
|
30
|
+
|
|
31
|
+
Features:
|
|
32
|
+
- **Automatic metric capture**: Whatever metrics you compile your model with
|
|
33
|
+
(loss, accuracy, mae, f1_score, custom metrics) are automatically tracked
|
|
34
|
+
- **Dynamic chart generation**: The UI generates charts for all captured metrics
|
|
35
|
+
- **Real-time updates**: Training progress is visible in the dashboard as it happens
|
|
36
|
+
- **Zero configuration**: Just add the callback and everything works automatically
|
|
28
37
|
|
|
29
38
|
Example:
|
|
30
39
|
>>> from flowyml.integrations.keras import FlowymlKerasCallback
|
|
31
|
-
>>>
|
|
32
|
-
>>>
|
|
40
|
+
>>>
|
|
41
|
+
>>> # Create callback - that's all you need!
|
|
42
|
+
>>> callback = FlowymlKerasCallback(
|
|
43
|
+
... experiment_name="my-experiment",
|
|
44
|
+
... project="my-project",
|
|
45
|
+
... )
|
|
46
|
+
>>>
|
|
47
|
+
>>> # Compile with any metrics you want - they'll all be tracked
|
|
48
|
+
>>> model.compile(
|
|
49
|
+
... optimizer="adam",
|
|
50
|
+
... loss="mse",
|
|
51
|
+
... metrics=["mae", "mape"], # All automatically captured!
|
|
52
|
+
... )
|
|
53
|
+
>>>
|
|
54
|
+
>>> # Train with validation data - both train & val metrics captured
|
|
55
|
+
>>> history = model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=50, callbacks=[callback])
|
|
56
|
+
>>>
|
|
57
|
+
>>> # Get training history for use in your step output
|
|
58
|
+
>>> training_history = callback.get_training_history()
|
|
59
|
+
|
|
60
|
+
The training metrics will be visible in the FlowyML dashboard at:
|
|
61
|
+
- Run Details page → Training Metrics section
|
|
62
|
+
- Model artifacts → Training History charts
|
|
33
63
|
"""
|
|
34
64
|
|
|
35
65
|
def __init__(
|
|
@@ -40,16 +70,22 @@ class FlowymlKerasCallback(keras.callbacks.Callback if keras else object):
|
|
|
40
70
|
log_model: bool = True,
|
|
41
71
|
log_every_epoch: bool = True,
|
|
42
72
|
auto_log_history: bool = True,
|
|
73
|
+
live_update_interval: int = 1,
|
|
43
74
|
metadata_store: SQLiteMetadataStore | None = None,
|
|
44
75
|
):
|
|
45
|
-
"""
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
76
|
+
"""Initialize the FlowyML Keras callback.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
experiment_name: Name of the experiment for grouping runs.
|
|
80
|
+
run_name: Optional run name (defaults to timestamp-based name).
|
|
81
|
+
project: Project name for organizing runs in the dashboard.
|
|
82
|
+
log_model: Whether to save the model as an artifact after training.
|
|
83
|
+
log_every_epoch: Whether to log metrics to the database every epoch.
|
|
84
|
+
auto_log_history: Whether to automatically collect training history
|
|
85
|
+
for visualization. Highly recommended for dashboard charts.
|
|
86
|
+
live_update_interval: How often (in epochs) to update the live
|
|
87
|
+
training history artifact. Set to 1 for real-time updates.
|
|
88
|
+
metadata_store: Optional metadata store override for custom storage.
|
|
53
89
|
"""
|
|
54
90
|
if keras is None:
|
|
55
91
|
raise ImportError("Keras is not installed. Please install tensorflow or keras.")
|
|
@@ -61,6 +97,7 @@ class FlowymlKerasCallback(keras.callbacks.Callback if keras else object):
|
|
|
61
97
|
self.log_model = log_model
|
|
62
98
|
self.log_every_epoch = log_every_epoch
|
|
63
99
|
self.auto_log_history = auto_log_history
|
|
100
|
+
self.live_update_interval = live_update_interval
|
|
64
101
|
|
|
65
102
|
self.metadata_store = metadata_store or SQLiteMetadataStore()
|
|
66
103
|
|
|
@@ -70,15 +107,12 @@ class FlowymlKerasCallback(keras.callbacks.Callback if keras else object):
|
|
|
70
107
|
# Track params
|
|
71
108
|
self.params_logged = False
|
|
72
109
|
|
|
73
|
-
# Training history
|
|
74
|
-
self.
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
"val_accuracy": [],
|
|
80
|
-
}
|
|
81
|
-
self.custom_metrics = set()
|
|
110
|
+
# Training history artifact ID (for live updates)
|
|
111
|
+
self._history_artifact_id = f"{self.run_name}_training_history"
|
|
112
|
+
|
|
113
|
+
# DYNAMIC training history - only epochs is pre-initialized
|
|
114
|
+
# All other metrics are added dynamically as Keras logs them
|
|
115
|
+
self._training_history = {"epochs": []}
|
|
82
116
|
|
|
83
117
|
def on_train_begin(self, logs=None) -> None:
|
|
84
118
|
"""Log initial parameters."""
|
|
@@ -117,51 +151,190 @@ class FlowymlKerasCallback(keras.callbacks.Callback if keras else object):
|
|
|
117
151
|
self.params_logged = True
|
|
118
152
|
|
|
119
153
|
def on_epoch_end(self, epoch, logs=None) -> None:
|
|
120
|
-
"""
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
# Update experiment run
|
|
133
|
-
self.metadata_store.log_experiment_run(
|
|
134
|
-
experiment_id=self.experiment_name,
|
|
154
|
+
"""Dynamically capture ALL metrics at the end of each epoch.
|
|
155
|
+
|
|
156
|
+
This method automatically captures whatever metrics Keras logs,
|
|
157
|
+
without requiring any configuration or hardcoded metric names.
|
|
158
|
+
"""
|
|
159
|
+
if not logs:
|
|
160
|
+
return
|
|
161
|
+
|
|
162
|
+
# Log metrics to DB
|
|
163
|
+
if self.log_every_epoch:
|
|
164
|
+
for k, v in logs.items():
|
|
165
|
+
self.metadata_store.save_metric(
|
|
135
166
|
run_id=self.run_name,
|
|
136
|
-
|
|
167
|
+
name=k,
|
|
168
|
+
value=float(v),
|
|
169
|
+
step=epoch,
|
|
137
170
|
)
|
|
138
171
|
|
|
139
|
-
#
|
|
140
|
-
|
|
141
|
-
self.
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
172
|
+
# Update experiment run
|
|
173
|
+
self.metadata_store.log_experiment_run(
|
|
174
|
+
experiment_id=self.experiment_name,
|
|
175
|
+
run_id=self.run_name,
|
|
176
|
+
metrics=logs,
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
# Accumulate training history for visualization
|
|
180
|
+
if self.auto_log_history:
|
|
181
|
+
# Record epoch number (1-indexed for display)
|
|
182
|
+
self._training_history["epochs"].append(epoch + 1)
|
|
183
|
+
|
|
184
|
+
# DYNAMICALLY capture ALL metrics from Keras logs
|
|
185
|
+
for metric_name, value in logs.items():
|
|
186
|
+
# Normalize metric name for consistent display
|
|
187
|
+
display_name = self._normalize_metric_name(metric_name)
|
|
188
|
+
|
|
189
|
+
# Initialize list if this is a new metric
|
|
190
|
+
if display_name not in self._training_history:
|
|
191
|
+
self._training_history[display_name] = []
|
|
192
|
+
|
|
193
|
+
# Append the value
|
|
194
|
+
self._training_history[display_name].append(float(value))
|
|
195
|
+
|
|
196
|
+
# Save live training history artifact for real-time UI updates
|
|
197
|
+
if (epoch + 1) % self.live_update_interval == 0:
|
|
198
|
+
self._save_live_training_history()
|
|
199
|
+
|
|
200
|
+
def _normalize_metric_name(self, name: str) -> str:
|
|
201
|
+
"""Normalize metric names for consistent display.
|
|
202
|
+
|
|
203
|
+
Converts Keras metric names to user-friendly display names:
|
|
204
|
+
- 'loss' -> 'train_loss'
|
|
205
|
+
- 'val_loss' -> 'val_loss' (unchanged)
|
|
206
|
+
- 'mae' -> 'train_mae'
|
|
207
|
+
- 'val_mae' -> 'val_mae' (unchanged)
|
|
208
|
+
- 'accuracy' -> 'train_accuracy'
|
|
209
|
+
- 'acc' -> 'train_accuracy'
|
|
210
|
+
"""
|
|
211
|
+
# Validation metrics (val_*) stay as-is
|
|
212
|
+
if name.startswith("val_"):
|
|
213
|
+
return name
|
|
214
|
+
|
|
215
|
+
# Special case: 'acc' -> 'train_accuracy'
|
|
216
|
+
if name == "acc":
|
|
217
|
+
return "train_accuracy"
|
|
218
|
+
|
|
219
|
+
# Training metrics: add 'train_' prefix for clarity
|
|
220
|
+
if name == "loss":
|
|
221
|
+
return "train_loss"
|
|
222
|
+
|
|
223
|
+
# For other metrics (mae, accuracy, custom), add 'train_' prefix
|
|
224
|
+
return f"train_{name}"
|
|
225
|
+
|
|
226
|
+
def get_training_history(self) -> dict:
|
|
227
|
+
"""Get the accumulated training history for use in step outputs.
|
|
228
|
+
|
|
229
|
+
This is the recommended way to include training history in your
|
|
230
|
+
Model asset, ensuring it's linked to the pipeline run.
|
|
231
|
+
|
|
232
|
+
Returns:
|
|
233
|
+
dict: Training history with epochs and all captured metrics.
|
|
234
|
+
Only includes metrics that have data (non-empty lists).
|
|
235
|
+
|
|
236
|
+
Example:
|
|
237
|
+
>>> callback = FlowymlKerasCallback(...)
|
|
238
|
+
>>> model.fit(..., callbacks=[callback])
|
|
239
|
+
>>> history = callback.get_training_history()
|
|
240
|
+
>>> return Model.create(
|
|
241
|
+
... data=model,
|
|
242
|
+
... name="my_model",
|
|
243
|
+
... training_history=history, # Automatically displayed in UI!
|
|
244
|
+
... )
|
|
245
|
+
"""
|
|
246
|
+
# Return cleaned history (only non-empty metrics)
|
|
247
|
+
return {k: v for k, v in self._training_history.items() if v and len(v) > 0}
|
|
248
|
+
|
|
249
|
+
def _save_live_training_history(self) -> None:
|
|
250
|
+
"""Save current training history as an artifact for live UI updates."""
|
|
251
|
+
cleaned_history = self.get_training_history()
|
|
252
|
+
|
|
253
|
+
if not cleaned_history.get("epochs"):
|
|
254
|
+
return # Nothing to save yet
|
|
255
|
+
|
|
256
|
+
# Calculate summary metrics dynamically
|
|
257
|
+
summary_metrics = {}
|
|
258
|
+
for key, values in cleaned_history.items():
|
|
259
|
+
if key == "epochs" or not values:
|
|
260
|
+
continue
|
|
261
|
+
|
|
262
|
+
# For loss-like metrics (lower is better)
|
|
263
|
+
if "loss" in key or "mae" in key or "mse" in key or "error" in key.lower():
|
|
264
|
+
summary_metrics[f"final_{key}"] = values[-1]
|
|
265
|
+
summary_metrics[f"best_{key}"] = min(values)
|
|
266
|
+
# For accuracy-like metrics (higher is better)
|
|
267
|
+
elif "accuracy" in key or "acc" in key or "f1" in key or "precision" in key or "recall" in key:
|
|
268
|
+
summary_metrics[f"final_{key}"] = values[-1]
|
|
269
|
+
summary_metrics[f"best_{key}"] = max(values)
|
|
270
|
+
# For other metrics, just store final value
|
|
271
|
+
else:
|
|
272
|
+
summary_metrics[f"final_{key}"] = values[-1]
|
|
273
|
+
|
|
274
|
+
# Save/update the training history artifact
|
|
275
|
+
self.metadata_store.save_artifact(
|
|
276
|
+
artifact_id=self._history_artifact_id,
|
|
277
|
+
metadata={
|
|
278
|
+
"artifact_id": self._history_artifact_id,
|
|
279
|
+
"name": f"training-history-{self.experiment_name}",
|
|
280
|
+
"type": "training_history",
|
|
281
|
+
"run_id": self.run_name,
|
|
282
|
+
"project": self.project,
|
|
283
|
+
"properties": {
|
|
284
|
+
"experiment": self.experiment_name,
|
|
285
|
+
"epochs_completed": len(cleaned_history.get("epochs", [])),
|
|
286
|
+
"status": "training",
|
|
287
|
+
**summary_metrics,
|
|
288
|
+
},
|
|
289
|
+
"training_history": cleaned_history,
|
|
290
|
+
"created_at": datetime.now().isoformat(),
|
|
291
|
+
"updated_at": datetime.now().isoformat(),
|
|
292
|
+
},
|
|
293
|
+
)
|
|
162
294
|
|
|
163
295
|
def on_train_end(self, logs=None) -> None:
|
|
164
|
-
"""Save model at the end of training
|
|
296
|
+
"""Save model and finalize training history at the end of training."""
|
|
297
|
+
cleaned_history = self.get_training_history()
|
|
298
|
+
|
|
299
|
+
# Calculate final metrics dynamically
|
|
300
|
+
final_metrics = {}
|
|
301
|
+
for key, values in cleaned_history.items():
|
|
302
|
+
if key == "epochs" or not values:
|
|
303
|
+
continue
|
|
304
|
+
|
|
305
|
+
# For loss-like metrics
|
|
306
|
+
if "loss" in key or "mae" in key or "mse" in key or "error" in key.lower():
|
|
307
|
+
final_metrics[f"final_{key}"] = values[-1]
|
|
308
|
+
final_metrics[f"best_{key}"] = min(values)
|
|
309
|
+
# For accuracy-like metrics
|
|
310
|
+
elif "accuracy" in key or "acc" in key or "f1" in key:
|
|
311
|
+
final_metrics[f"final_{key}"] = values[-1]
|
|
312
|
+
final_metrics[f"best_{key}"] = max(values)
|
|
313
|
+
else:
|
|
314
|
+
final_metrics[f"final_{key}"] = values[-1]
|
|
315
|
+
|
|
316
|
+
# Update training history artifact with final status
|
|
317
|
+
self.metadata_store.save_artifact(
|
|
318
|
+
artifact_id=self._history_artifact_id,
|
|
319
|
+
metadata={
|
|
320
|
+
"artifact_id": self._history_artifact_id,
|
|
321
|
+
"name": f"training-history-{self.experiment_name}",
|
|
322
|
+
"type": "training_history",
|
|
323
|
+
"run_id": self.run_name,
|
|
324
|
+
"project": self.project,
|
|
325
|
+
"properties": {
|
|
326
|
+
"experiment": self.experiment_name,
|
|
327
|
+
"epochs_completed": len(cleaned_history.get("epochs", [])),
|
|
328
|
+
"status": "completed",
|
|
329
|
+
**final_metrics,
|
|
330
|
+
},
|
|
331
|
+
"training_history": cleaned_history,
|
|
332
|
+
"created_at": datetime.now().isoformat(),
|
|
333
|
+
"updated_at": datetime.now().isoformat(),
|
|
334
|
+
},
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
# Save model artifact if enabled
|
|
165
338
|
if self.log_model:
|
|
166
339
|
# Create artifacts directory
|
|
167
340
|
artifact_dir = Path(f".flowyml/artifacts/{self.run_name}")
|
|
@@ -170,21 +343,7 @@ class FlowymlKerasCallback(keras.callbacks.Callback if keras else object):
|
|
|
170
343
|
model_path = artifact_dir / "model.keras"
|
|
171
344
|
self.model.save(model_path)
|
|
172
345
|
|
|
173
|
-
#
|
|
174
|
-
cleaned_history = {
|
|
175
|
-
k: v
|
|
176
|
-
for k, v in self.training_history.items()
|
|
177
|
-
if v # Only include non-empty lists
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
# Calculate final metrics
|
|
181
|
-
final_metrics = {}
|
|
182
|
-
if "train_loss" in cleaned_history and cleaned_history["train_loss"]:
|
|
183
|
-
final_metrics["loss"] = cleaned_history["train_loss"][-1]
|
|
184
|
-
if "train_accuracy" in cleaned_history and cleaned_history["train_accuracy"]:
|
|
185
|
-
final_metrics["accuracy"] = cleaned_history["train_accuracy"][-1]
|
|
186
|
-
|
|
187
|
-
# Save model artifact with training history
|
|
346
|
+
# Save model artifact with training history attached
|
|
188
347
|
artifact_id = str(uuid.uuid4())
|
|
189
348
|
self.metadata_store.save_artifact(
|
|
190
349
|
artifact_id=artifact_id,
|
|
@@ -201,7 +360,13 @@ class FlowymlKerasCallback(keras.callbacks.Callback if keras else object):
|
|
|
201
360
|
"optimizer": str(self.model.optimizer.__class__.__name__),
|
|
202
361
|
**final_metrics,
|
|
203
362
|
},
|
|
204
|
-
"training_history": cleaned_history,
|
|
363
|
+
"training_history": cleaned_history,
|
|
205
364
|
"created_at": datetime.now().isoformat(),
|
|
206
365
|
},
|
|
207
366
|
)
|
|
367
|
+
|
|
368
|
+
# Expose training_history as property for backwards compatibility
|
|
369
|
+
@property
|
|
370
|
+
def training_history(self) -> dict:
|
|
371
|
+
"""Return the accumulated training history as a dictionary."""
|
|
372
|
+
return self.get_training_history()
|
flowyml/monitoring/alerts.py
CHANGED
|
@@ -52,6 +52,16 @@ class AlertManager:
|
|
|
52
52
|
except Exception as e:
|
|
53
53
|
logger.error(f"Failed to handle alert: {e}")
|
|
54
54
|
|
|
55
|
+
def alert(
|
|
56
|
+
self,
|
|
57
|
+
message: str,
|
|
58
|
+
title: str = "Pipeline Alert",
|
|
59
|
+
level: AlertLevel = AlertLevel.INFO,
|
|
60
|
+
metadata: dict = None,
|
|
61
|
+
) -> None:
|
|
62
|
+
"""Convenience method for sending alerts."""
|
|
63
|
+
self.send_alert(title=title, message=message, level=level, metadata=metadata)
|
|
64
|
+
|
|
55
65
|
|
|
56
66
|
# Global instance
|
|
57
67
|
alert_manager = AlertManager()
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
"""Notification system for pipeline events."""
|
|
2
2
|
|
|
3
|
+
import contextlib
|
|
3
4
|
import os
|
|
4
|
-
from typing import Any
|
|
5
5
|
from abc import ABC, abstractmethod
|
|
6
6
|
from dataclasses import dataclass
|
|
7
7
|
from datetime import datetime
|
|
8
|
-
import
|
|
8
|
+
from typing import Any
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
@dataclass
|
|
@@ -43,10 +43,11 @@ class ConsoleNotifier(NotificationChannel):
|
|
|
43
43
|
|
|
44
44
|
|
|
45
45
|
class SlackNotifier(NotificationChannel):
|
|
46
|
-
"""Send notifications to Slack."""
|
|
46
|
+
"""Send notifications to Slack with rich Block Kit formatting."""
|
|
47
47
|
|
|
48
|
-
def __init__(self, webhook_url: str | None = None):
|
|
48
|
+
def __init__(self, webhook_url: str | None = None, ui_url: str | None = None):
|
|
49
49
|
self.webhook_url = webhook_url or os.getenv("SLACK_WEBHOOK_URL")
|
|
50
|
+
self.ui_url = ui_url or os.getenv("FLOWYML_UI_URL", "http://localhost:5173")
|
|
50
51
|
|
|
51
52
|
def send(self, notification: Notification) -> bool:
|
|
52
53
|
if not self.webhook_url:
|
|
@@ -55,26 +56,104 @@ class SlackNotifier(NotificationChannel):
|
|
|
55
56
|
try:
|
|
56
57
|
import requests
|
|
57
58
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
59
|
+
from flowyml.monitoring.slack_blocks import build_simple_message
|
|
60
|
+
|
|
61
|
+
# Use rich Block Kit message
|
|
62
|
+
payload = build_simple_message(
|
|
63
|
+
title=notification.title,
|
|
64
|
+
message=notification.message,
|
|
65
|
+
level=notification.level,
|
|
66
|
+
metadata=notification.metadata if notification.metadata else None,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
response = requests.post(self.webhook_url, json=payload, timeout=10)
|
|
70
|
+
return response.status_code == 200
|
|
71
|
+
except Exception:
|
|
72
|
+
return False
|
|
73
|
+
|
|
74
|
+
def send_pipeline_success(
|
|
75
|
+
self,
|
|
76
|
+
pipeline_name: str,
|
|
77
|
+
run_id: str,
|
|
78
|
+
duration: float,
|
|
79
|
+
metrics: dict[str, float] | None = None,
|
|
80
|
+
) -> bool:
|
|
81
|
+
"""Send a rich pipeline success notification."""
|
|
82
|
+
if not self.webhook_url:
|
|
83
|
+
return False
|
|
84
|
+
|
|
85
|
+
try:
|
|
86
|
+
import requests
|
|
87
|
+
|
|
88
|
+
from flowyml.monitoring.slack_blocks import build_pipeline_success_message
|
|
89
|
+
|
|
90
|
+
payload = build_pipeline_success_message(
|
|
91
|
+
pipeline_name=pipeline_name,
|
|
92
|
+
run_id=run_id,
|
|
93
|
+
duration=duration,
|
|
94
|
+
metrics=metrics,
|
|
95
|
+
ui_url=self.ui_url,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
response = requests.post(self.webhook_url, json=payload, timeout=10)
|
|
99
|
+
return response.status_code == 200
|
|
100
|
+
except Exception:
|
|
101
|
+
return False
|
|
102
|
+
|
|
103
|
+
def send_pipeline_failure(
|
|
104
|
+
self,
|
|
105
|
+
pipeline_name: str,
|
|
106
|
+
run_id: str,
|
|
107
|
+
error: str,
|
|
108
|
+
step_name: str | None = None,
|
|
109
|
+
) -> bool:
|
|
110
|
+
"""Send a rich pipeline failure notification."""
|
|
111
|
+
if not self.webhook_url:
|
|
112
|
+
return False
|
|
113
|
+
|
|
114
|
+
try:
|
|
115
|
+
import requests
|
|
116
|
+
|
|
117
|
+
from flowyml.monitoring.slack_blocks import build_pipeline_failure_message
|
|
118
|
+
|
|
119
|
+
payload = build_pipeline_failure_message(
|
|
120
|
+
pipeline_name=pipeline_name,
|
|
121
|
+
run_id=run_id,
|
|
122
|
+
error=error,
|
|
123
|
+
step_name=step_name,
|
|
124
|
+
ui_url=self.ui_url,
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
response = requests.post(self.webhook_url, json=payload, timeout=10)
|
|
128
|
+
return response.status_code == 200
|
|
129
|
+
except Exception:
|
|
130
|
+
return False
|
|
131
|
+
|
|
132
|
+
def send_drift_warning(
|
|
133
|
+
self,
|
|
134
|
+
feature: str,
|
|
135
|
+
psi: float,
|
|
136
|
+
threshold: float = 0.2,
|
|
137
|
+
model_name: str | None = None,
|
|
138
|
+
) -> bool:
|
|
139
|
+
"""Send a rich drift warning notification."""
|
|
140
|
+
if not self.webhook_url:
|
|
141
|
+
return False
|
|
142
|
+
|
|
143
|
+
try:
|
|
144
|
+
import requests
|
|
145
|
+
|
|
146
|
+
from flowyml.monitoring.slack_blocks import build_drift_warning_message
|
|
147
|
+
|
|
148
|
+
payload = build_drift_warning_message(
|
|
149
|
+
feature=feature,
|
|
150
|
+
psi=psi,
|
|
151
|
+
threshold=threshold,
|
|
152
|
+
model_name=model_name,
|
|
153
|
+
ui_url=self.ui_url,
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
response = requests.post(self.webhook_url, json=payload, timeout=10)
|
|
78
157
|
return response.status_code == 200
|
|
79
158
|
except Exception:
|
|
80
159
|
return False
|
|
@@ -105,8 +184,8 @@ class EmailNotifier(NotificationChannel):
|
|
|
105
184
|
|
|
106
185
|
try:
|
|
107
186
|
import smtplib
|
|
108
|
-
from email.mime.text import MIMEText
|
|
109
187
|
from email.mime.multipart import MIMEMultipart
|
|
188
|
+
from email.mime.text import MIMEText
|
|
110
189
|
|
|
111
190
|
msg = MIMEMultipart()
|
|
112
191
|
msg["From"] = self.from_addr
|