flowyml 1.7.0__py3-none-any.whl → 1.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowyml/assets/dataset.py +570 -17
- flowyml/assets/model.py +1052 -15
- flowyml/core/executor.py +70 -11
- flowyml/core/orchestrator.py +37 -2
- flowyml/core/pipeline.py +32 -4
- flowyml/core/scheduler.py +88 -5
- flowyml/integrations/keras.py +247 -82
- flowyml/storage/sql.py +24 -6
- flowyml/ui/backend/routers/runs.py +112 -0
- flowyml/ui/backend/routers/schedules.py +35 -15
- flowyml/ui/frontend/dist/assets/index-B40RsQDq.css +1 -0
- flowyml/ui/frontend/dist/assets/index-CjI0zKCn.js +685 -0
- flowyml/ui/frontend/dist/index.html +2 -2
- flowyml/ui/frontend/package-lock.json +11 -0
- flowyml/ui/frontend/package.json +1 -0
- flowyml/ui/frontend/src/app/assets/page.jsx +890 -321
- flowyml/ui/frontend/src/app/dashboard/page.jsx +1 -1
- flowyml/ui/frontend/src/app/experiments/[experimentId]/page.jsx +1 -1
- flowyml/ui/frontend/src/app/leaderboard/page.jsx +1 -1
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectMetricsPanel.jsx +1 -1
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectRunsList.jsx +3 -3
- flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +590 -102
- flowyml/ui/frontend/src/components/ArtifactViewer.jsx +62 -2
- flowyml/ui/frontend/src/components/AssetDetailsPanel.jsx +401 -28
- flowyml/ui/frontend/src/components/AssetTreeHierarchy.jsx +119 -11
- flowyml/ui/frontend/src/components/DatasetViewer.jsx +753 -0
- flowyml/ui/frontend/src/components/TrainingHistoryChart.jsx +514 -0
- flowyml/ui/frontend/src/components/TrainingMetricsPanel.jsx +175 -0
- {flowyml-1.7.0.dist-info → flowyml-1.7.2.dist-info}/METADATA +1 -1
- {flowyml-1.7.0.dist-info → flowyml-1.7.2.dist-info}/RECORD +33 -30
- flowyml/ui/frontend/dist/assets/index-By4trVyv.css +0 -1
- flowyml/ui/frontend/dist/assets/index-CX5RV2C9.js +0 -630
- {flowyml-1.7.0.dist-info → flowyml-1.7.2.dist-info}/WHEEL +0 -0
- {flowyml-1.7.0.dist-info → flowyml-1.7.2.dist-info}/entry_points.txt +0 -0
- {flowyml-1.7.0.dist-info → flowyml-1.7.2.dist-info}/licenses/LICENSE +0 -0
flowyml/integrations/keras.py
CHANGED
|
@@ -1,4 +1,9 @@
|
|
|
1
|
-
"""Keras integration for flowyml.
|
|
1
|
+
"""Keras integration for flowyml.
|
|
2
|
+
|
|
3
|
+
This module provides seamless integration between Keras and FlowyML,
|
|
4
|
+
enabling automatic tracking of training metrics, model artifacts, and
|
|
5
|
+
interactive visualization in the FlowyML dashboard.
|
|
6
|
+
"""
|
|
2
7
|
|
|
3
8
|
from pathlib import Path
|
|
4
9
|
from datetime import datetime
|
|
@@ -19,17 +24,42 @@ from flowyml.storage.metadata import SQLiteMetadataStore
|
|
|
19
24
|
class FlowymlKerasCallback(keras.callbacks.Callback if keras else object):
|
|
20
25
|
"""Keras callback for flowyml tracking with automatic training history collection.
|
|
21
26
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
-
|
|
27
|
+
This callback integrates Keras training with FlowyML's tracking and visualization
|
|
28
|
+
system. It **automatically and dynamically** captures ALL metrics that Keras logs
|
|
29
|
+
during training - no configuration needed!
|
|
30
|
+
|
|
31
|
+
Features:
|
|
32
|
+
- **Automatic metric capture**: Whatever metrics you compile your model with
|
|
33
|
+
(loss, accuracy, mae, f1_score, custom metrics) are automatically tracked
|
|
34
|
+
- **Dynamic chart generation**: The UI generates charts for all captured metrics
|
|
35
|
+
- **Real-time updates**: Training progress is visible in the dashboard as it happens
|
|
36
|
+
- **Zero configuration**: Just add the callback and everything works automatically
|
|
28
37
|
|
|
29
38
|
Example:
|
|
30
39
|
>>> from flowyml.integrations.keras import FlowymlKerasCallback
|
|
31
|
-
>>>
|
|
32
|
-
>>>
|
|
40
|
+
>>>
|
|
41
|
+
>>> # Create callback - that's all you need!
|
|
42
|
+
>>> callback = FlowymlKerasCallback(
|
|
43
|
+
... experiment_name="my-experiment",
|
|
44
|
+
... project="my-project",
|
|
45
|
+
... )
|
|
46
|
+
>>>
|
|
47
|
+
>>> # Compile with any metrics you want - they'll all be tracked
|
|
48
|
+
>>> model.compile(
|
|
49
|
+
... optimizer="adam",
|
|
50
|
+
... loss="mse",
|
|
51
|
+
... metrics=["mae", "mape"], # All automatically captured!
|
|
52
|
+
... )
|
|
53
|
+
>>>
|
|
54
|
+
>>> # Train with validation data - both train & val metrics captured
|
|
55
|
+
>>> history = model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=50, callbacks=[callback])
|
|
56
|
+
>>>
|
|
57
|
+
>>> # Get training history for use in your step output
|
|
58
|
+
>>> training_history = callback.get_training_history()
|
|
59
|
+
|
|
60
|
+
The training metrics will be visible in the FlowyML dashboard at:
|
|
61
|
+
- Run Details page → Training Metrics section
|
|
62
|
+
- Model artifacts → Training History charts
|
|
33
63
|
"""
|
|
34
64
|
|
|
35
65
|
def __init__(
|
|
@@ -40,16 +70,22 @@ class FlowymlKerasCallback(keras.callbacks.Callback if keras else object):
|
|
|
40
70
|
log_model: bool = True,
|
|
41
71
|
log_every_epoch: bool = True,
|
|
42
72
|
auto_log_history: bool = True,
|
|
73
|
+
live_update_interval: int = 1,
|
|
43
74
|
metadata_store: SQLiteMetadataStore | None = None,
|
|
44
75
|
):
|
|
45
|
-
"""
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
76
|
+
"""Initialize the FlowyML Keras callback.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
experiment_name: Name of the experiment for grouping runs.
|
|
80
|
+
run_name: Optional run name (defaults to timestamp-based name).
|
|
81
|
+
project: Project name for organizing runs in the dashboard.
|
|
82
|
+
log_model: Whether to save the model as an artifact after training.
|
|
83
|
+
log_every_epoch: Whether to log metrics to the database every epoch.
|
|
84
|
+
auto_log_history: Whether to automatically collect training history
|
|
85
|
+
for visualization. Highly recommended for dashboard charts.
|
|
86
|
+
live_update_interval: How often (in epochs) to update the live
|
|
87
|
+
training history artifact. Set to 1 for real-time updates.
|
|
88
|
+
metadata_store: Optional metadata store override for custom storage.
|
|
53
89
|
"""
|
|
54
90
|
if keras is None:
|
|
55
91
|
raise ImportError("Keras is not installed. Please install tensorflow or keras.")
|
|
@@ -61,6 +97,7 @@ class FlowymlKerasCallback(keras.callbacks.Callback if keras else object):
|
|
|
61
97
|
self.log_model = log_model
|
|
62
98
|
self.log_every_epoch = log_every_epoch
|
|
63
99
|
self.auto_log_history = auto_log_history
|
|
100
|
+
self.live_update_interval = live_update_interval
|
|
64
101
|
|
|
65
102
|
self.metadata_store = metadata_store or SQLiteMetadataStore()
|
|
66
103
|
|
|
@@ -70,15 +107,12 @@ class FlowymlKerasCallback(keras.callbacks.Callback if keras else object):
|
|
|
70
107
|
# Track params
|
|
71
108
|
self.params_logged = False
|
|
72
109
|
|
|
73
|
-
# Training history
|
|
74
|
-
self.
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
"val_accuracy": [],
|
|
80
|
-
}
|
|
81
|
-
self.custom_metrics = set()
|
|
110
|
+
# Training history artifact ID (for live updates)
|
|
111
|
+
self._history_artifact_id = f"{self.run_name}_training_history"
|
|
112
|
+
|
|
113
|
+
# DYNAMIC training history - only epochs is pre-initialized
|
|
114
|
+
# All other metrics are added dynamically as Keras logs them
|
|
115
|
+
self._training_history = {"epochs": []}
|
|
82
116
|
|
|
83
117
|
def on_train_begin(self, logs=None) -> None:
|
|
84
118
|
"""Log initial parameters."""
|
|
@@ -117,51 +151,190 @@ class FlowymlKerasCallback(keras.callbacks.Callback if keras else object):
|
|
|
117
151
|
self.params_logged = True
|
|
118
152
|
|
|
119
153
|
def on_epoch_end(self, epoch, logs=None) -> None:
|
|
120
|
-
"""
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
# Update experiment run
|
|
133
|
-
self.metadata_store.log_experiment_run(
|
|
134
|
-
experiment_id=self.experiment_name,
|
|
154
|
+
"""Dynamically capture ALL metrics at the end of each epoch.
|
|
155
|
+
|
|
156
|
+
This method automatically captures whatever metrics Keras logs,
|
|
157
|
+
without requiring any configuration or hardcoded metric names.
|
|
158
|
+
"""
|
|
159
|
+
if not logs:
|
|
160
|
+
return
|
|
161
|
+
|
|
162
|
+
# Log metrics to DB
|
|
163
|
+
if self.log_every_epoch:
|
|
164
|
+
for k, v in logs.items():
|
|
165
|
+
self.metadata_store.save_metric(
|
|
135
166
|
run_id=self.run_name,
|
|
136
|
-
|
|
167
|
+
name=k,
|
|
168
|
+
value=float(v),
|
|
169
|
+
step=epoch,
|
|
137
170
|
)
|
|
138
171
|
|
|
139
|
-
#
|
|
140
|
-
|
|
141
|
-
self.
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
172
|
+
# Update experiment run
|
|
173
|
+
self.metadata_store.log_experiment_run(
|
|
174
|
+
experiment_id=self.experiment_name,
|
|
175
|
+
run_id=self.run_name,
|
|
176
|
+
metrics=logs,
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
# Accumulate training history for visualization
|
|
180
|
+
if self.auto_log_history:
|
|
181
|
+
# Record epoch number (1-indexed for display)
|
|
182
|
+
self._training_history["epochs"].append(epoch + 1)
|
|
183
|
+
|
|
184
|
+
# DYNAMICALLY capture ALL metrics from Keras logs
|
|
185
|
+
for metric_name, value in logs.items():
|
|
186
|
+
# Normalize metric name for consistent display
|
|
187
|
+
display_name = self._normalize_metric_name(metric_name)
|
|
188
|
+
|
|
189
|
+
# Initialize list if this is a new metric
|
|
190
|
+
if display_name not in self._training_history:
|
|
191
|
+
self._training_history[display_name] = []
|
|
192
|
+
|
|
193
|
+
# Append the value
|
|
194
|
+
self._training_history[display_name].append(float(value))
|
|
195
|
+
|
|
196
|
+
# Save live training history artifact for real-time UI updates
|
|
197
|
+
if (epoch + 1) % self.live_update_interval == 0:
|
|
198
|
+
self._save_live_training_history()
|
|
199
|
+
|
|
200
|
+
def _normalize_metric_name(self, name: str) -> str:
|
|
201
|
+
"""Normalize metric names for consistent display.
|
|
202
|
+
|
|
203
|
+
Converts Keras metric names to user-friendly display names:
|
|
204
|
+
- 'loss' -> 'train_loss'
|
|
205
|
+
- 'val_loss' -> 'val_loss' (unchanged)
|
|
206
|
+
- 'mae' -> 'train_mae'
|
|
207
|
+
- 'val_mae' -> 'val_mae' (unchanged)
|
|
208
|
+
- 'accuracy' -> 'train_accuracy'
|
|
209
|
+
- 'acc' -> 'train_accuracy'
|
|
210
|
+
"""
|
|
211
|
+
# Validation metrics (val_*) stay as-is
|
|
212
|
+
if name.startswith("val_"):
|
|
213
|
+
return name
|
|
214
|
+
|
|
215
|
+
# Special case: 'acc' -> 'train_accuracy'
|
|
216
|
+
if name == "acc":
|
|
217
|
+
return "train_accuracy"
|
|
218
|
+
|
|
219
|
+
# Training metrics: add 'train_' prefix for clarity
|
|
220
|
+
if name == "loss":
|
|
221
|
+
return "train_loss"
|
|
222
|
+
|
|
223
|
+
# For other metrics (mae, accuracy, custom), add 'train_' prefix
|
|
224
|
+
return f"train_{name}"
|
|
225
|
+
|
|
226
|
+
def get_training_history(self) -> dict:
|
|
227
|
+
"""Get the accumulated training history for use in step outputs.
|
|
228
|
+
|
|
229
|
+
This is the recommended way to include training history in your
|
|
230
|
+
Model asset, ensuring it's linked to the pipeline run.
|
|
231
|
+
|
|
232
|
+
Returns:
|
|
233
|
+
dict: Training history with epochs and all captured metrics.
|
|
234
|
+
Only includes metrics that have data (non-empty lists).
|
|
235
|
+
|
|
236
|
+
Example:
|
|
237
|
+
>>> callback = FlowymlKerasCallback(...)
|
|
238
|
+
>>> model.fit(..., callbacks=[callback])
|
|
239
|
+
>>> history = callback.get_training_history()
|
|
240
|
+
>>> return Model.create(
|
|
241
|
+
... data=model,
|
|
242
|
+
... name="my_model",
|
|
243
|
+
... training_history=history, # Automatically displayed in UI!
|
|
244
|
+
... )
|
|
245
|
+
"""
|
|
246
|
+
# Return cleaned history (only non-empty metrics)
|
|
247
|
+
return {k: v for k, v in self._training_history.items() if v and len(v) > 0}
|
|
248
|
+
|
|
249
|
+
def _save_live_training_history(self) -> None:
|
|
250
|
+
"""Save current training history as an artifact for live UI updates."""
|
|
251
|
+
cleaned_history = self.get_training_history()
|
|
252
|
+
|
|
253
|
+
if not cleaned_history.get("epochs"):
|
|
254
|
+
return # Nothing to save yet
|
|
255
|
+
|
|
256
|
+
# Calculate summary metrics dynamically
|
|
257
|
+
summary_metrics = {}
|
|
258
|
+
for key, values in cleaned_history.items():
|
|
259
|
+
if key == "epochs" or not values:
|
|
260
|
+
continue
|
|
261
|
+
|
|
262
|
+
# For loss-like metrics (lower is better)
|
|
263
|
+
if "loss" in key or "mae" in key or "mse" in key or "error" in key.lower():
|
|
264
|
+
summary_metrics[f"final_{key}"] = values[-1]
|
|
265
|
+
summary_metrics[f"best_{key}"] = min(values)
|
|
266
|
+
# For accuracy-like metrics (higher is better)
|
|
267
|
+
elif "accuracy" in key or "acc" in key or "f1" in key or "precision" in key or "recall" in key:
|
|
268
|
+
summary_metrics[f"final_{key}"] = values[-1]
|
|
269
|
+
summary_metrics[f"best_{key}"] = max(values)
|
|
270
|
+
# For other metrics, just store final value
|
|
271
|
+
else:
|
|
272
|
+
summary_metrics[f"final_{key}"] = values[-1]
|
|
273
|
+
|
|
274
|
+
# Save/update the training history artifact
|
|
275
|
+
self.metadata_store.save_artifact(
|
|
276
|
+
artifact_id=self._history_artifact_id,
|
|
277
|
+
metadata={
|
|
278
|
+
"artifact_id": self._history_artifact_id,
|
|
279
|
+
"name": f"training-history-{self.experiment_name}",
|
|
280
|
+
"type": "training_history",
|
|
281
|
+
"run_id": self.run_name,
|
|
282
|
+
"project": self.project,
|
|
283
|
+
"properties": {
|
|
284
|
+
"experiment": self.experiment_name,
|
|
285
|
+
"epochs_completed": len(cleaned_history.get("epochs", [])),
|
|
286
|
+
"status": "training",
|
|
287
|
+
**summary_metrics,
|
|
288
|
+
},
|
|
289
|
+
"training_history": cleaned_history,
|
|
290
|
+
"created_at": datetime.now().isoformat(),
|
|
291
|
+
"updated_at": datetime.now().isoformat(),
|
|
292
|
+
},
|
|
293
|
+
)
|
|
162
294
|
|
|
163
295
|
def on_train_end(self, logs=None) -> None:
|
|
164
|
-
"""Save model at the end of training
|
|
296
|
+
"""Save model and finalize training history at the end of training."""
|
|
297
|
+
cleaned_history = self.get_training_history()
|
|
298
|
+
|
|
299
|
+
# Calculate final metrics dynamically
|
|
300
|
+
final_metrics = {}
|
|
301
|
+
for key, values in cleaned_history.items():
|
|
302
|
+
if key == "epochs" or not values:
|
|
303
|
+
continue
|
|
304
|
+
|
|
305
|
+
# For loss-like metrics
|
|
306
|
+
if "loss" in key or "mae" in key or "mse" in key or "error" in key.lower():
|
|
307
|
+
final_metrics[f"final_{key}"] = values[-1]
|
|
308
|
+
final_metrics[f"best_{key}"] = min(values)
|
|
309
|
+
# For accuracy-like metrics
|
|
310
|
+
elif "accuracy" in key or "acc" in key or "f1" in key:
|
|
311
|
+
final_metrics[f"final_{key}"] = values[-1]
|
|
312
|
+
final_metrics[f"best_{key}"] = max(values)
|
|
313
|
+
else:
|
|
314
|
+
final_metrics[f"final_{key}"] = values[-1]
|
|
315
|
+
|
|
316
|
+
# Update training history artifact with final status
|
|
317
|
+
self.metadata_store.save_artifact(
|
|
318
|
+
artifact_id=self._history_artifact_id,
|
|
319
|
+
metadata={
|
|
320
|
+
"artifact_id": self._history_artifact_id,
|
|
321
|
+
"name": f"training-history-{self.experiment_name}",
|
|
322
|
+
"type": "training_history",
|
|
323
|
+
"run_id": self.run_name,
|
|
324
|
+
"project": self.project,
|
|
325
|
+
"properties": {
|
|
326
|
+
"experiment": self.experiment_name,
|
|
327
|
+
"epochs_completed": len(cleaned_history.get("epochs", [])),
|
|
328
|
+
"status": "completed",
|
|
329
|
+
**final_metrics,
|
|
330
|
+
},
|
|
331
|
+
"training_history": cleaned_history,
|
|
332
|
+
"created_at": datetime.now().isoformat(),
|
|
333
|
+
"updated_at": datetime.now().isoformat(),
|
|
334
|
+
},
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
# Save model artifact if enabled
|
|
165
338
|
if self.log_model:
|
|
166
339
|
# Create artifacts directory
|
|
167
340
|
artifact_dir = Path(f".flowyml/artifacts/{self.run_name}")
|
|
@@ -170,21 +343,7 @@ class FlowymlKerasCallback(keras.callbacks.Callback if keras else object):
|
|
|
170
343
|
model_path = artifact_dir / "model.keras"
|
|
171
344
|
self.model.save(model_path)
|
|
172
345
|
|
|
173
|
-
#
|
|
174
|
-
cleaned_history = {
|
|
175
|
-
k: v
|
|
176
|
-
for k, v in self.training_history.items()
|
|
177
|
-
if v # Only include non-empty lists
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
# Calculate final metrics
|
|
181
|
-
final_metrics = {}
|
|
182
|
-
if "train_loss" in cleaned_history and cleaned_history["train_loss"]:
|
|
183
|
-
final_metrics["loss"] = cleaned_history["train_loss"][-1]
|
|
184
|
-
if "train_accuracy" in cleaned_history and cleaned_history["train_accuracy"]:
|
|
185
|
-
final_metrics["accuracy"] = cleaned_history["train_accuracy"][-1]
|
|
186
|
-
|
|
187
|
-
# Save model artifact with training history
|
|
346
|
+
# Save model artifact with training history attached
|
|
188
347
|
artifact_id = str(uuid.uuid4())
|
|
189
348
|
self.metadata_store.save_artifact(
|
|
190
349
|
artifact_id=artifact_id,
|
|
@@ -201,7 +360,13 @@ class FlowymlKerasCallback(keras.callbacks.Callback if keras else object):
|
|
|
201
360
|
"optimizer": str(self.model.optimizer.__class__.__name__),
|
|
202
361
|
**final_metrics,
|
|
203
362
|
},
|
|
204
|
-
"training_history": cleaned_history,
|
|
363
|
+
"training_history": cleaned_history,
|
|
205
364
|
"created_at": datetime.now().isoformat(),
|
|
206
365
|
},
|
|
207
366
|
)
|
|
367
|
+
|
|
368
|
+
# Expose training_history as property for backwards compatibility
|
|
369
|
+
@property
|
|
370
|
+
def training_history(self) -> dict:
|
|
371
|
+
"""Return the accumulated training history as a dictionary."""
|
|
372
|
+
return self.get_training_history()
|
flowyml/storage/sql.py
CHANGED
|
@@ -249,6 +249,9 @@ class SQLMetadataStore(MetadataStore):
|
|
|
249
249
|
stmt = select(self.runs).where(self.runs.c.run_id == run_id)
|
|
250
250
|
existing = conn.execute(stmt).fetchone()
|
|
251
251
|
|
|
252
|
+
# Ensure run_id is included in metadata for consistency
|
|
253
|
+
metadata_with_id = {**metadata, "run_id": run_id}
|
|
254
|
+
|
|
252
255
|
values = {
|
|
253
256
|
"run_id": run_id,
|
|
254
257
|
"pipeline_name": metadata.get("pipeline_name"),
|
|
@@ -256,7 +259,7 @@ class SQLMetadataStore(MetadataStore):
|
|
|
256
259
|
"start_time": metadata.get("start_time"),
|
|
257
260
|
"end_time": metadata.get("end_time"),
|
|
258
261
|
"duration": metadata.get("duration"),
|
|
259
|
-
"metadata": json.dumps(
|
|
262
|
+
"metadata": json.dumps(metadata_with_id),
|
|
260
263
|
"project": metadata.get("project"),
|
|
261
264
|
}
|
|
262
265
|
|
|
@@ -299,7 +302,10 @@ class SQLMetadataStore(MetadataStore):
|
|
|
299
302
|
stmt = select(self.runs.c.metadata).where(self.runs.c.run_id == run_id)
|
|
300
303
|
row = conn.execute(stmt).fetchone()
|
|
301
304
|
if row:
|
|
302
|
-
|
|
305
|
+
data = json.loads(row[0])
|
|
306
|
+
# Ensure run_id is always included in the returned dict
|
|
307
|
+
data["run_id"] = run_id
|
|
308
|
+
return data
|
|
303
309
|
return None
|
|
304
310
|
|
|
305
311
|
def update_run_project(self, run_id: str, project_name: str) -> None:
|
|
@@ -325,12 +331,18 @@ class SQLMetadataStore(MetadataStore):
|
|
|
325
331
|
def list_runs(self, limit: int | None = None) -> list[dict]:
|
|
326
332
|
"""List all runs."""
|
|
327
333
|
with self.engine.connect() as conn:
|
|
328
|
-
stmt = select(self.runs.c.metadata).order_by(self.runs.c.created_at.desc())
|
|
334
|
+
stmt = select(self.runs.c.run_id, self.runs.c.metadata).order_by(self.runs.c.created_at.desc())
|
|
329
335
|
if limit:
|
|
330
336
|
stmt = stmt.limit(limit)
|
|
331
337
|
|
|
332
338
|
rows = conn.execute(stmt).fetchall()
|
|
333
|
-
|
|
339
|
+
runs = []
|
|
340
|
+
for row in rows:
|
|
341
|
+
data = json.loads(row[1])
|
|
342
|
+
# Ensure run_id is always included in the returned dict
|
|
343
|
+
data["run_id"] = row[0]
|
|
344
|
+
runs.append(data)
|
|
345
|
+
return runs
|
|
334
346
|
|
|
335
347
|
def list_pipelines(self, project: str = None) -> list[str]:
|
|
336
348
|
"""List all unique pipeline names."""
|
|
@@ -402,7 +414,7 @@ class SQLMetadataStore(MetadataStore):
|
|
|
402
414
|
def query(self, **filters) -> list[dict]:
|
|
403
415
|
"""Query runs with filters."""
|
|
404
416
|
with self.engine.connect() as conn:
|
|
405
|
-
stmt = select(self.runs.c.metadata)
|
|
417
|
+
stmt = select(self.runs.c.run_id, self.runs.c.metadata)
|
|
406
418
|
|
|
407
419
|
for key, value in filters.items():
|
|
408
420
|
if hasattr(self.runs.c, key):
|
|
@@ -410,7 +422,13 @@ class SQLMetadataStore(MetadataStore):
|
|
|
410
422
|
|
|
411
423
|
stmt = stmt.order_by(self.runs.c.created_at.desc())
|
|
412
424
|
rows = conn.execute(stmt).fetchall()
|
|
413
|
-
|
|
425
|
+
runs = []
|
|
426
|
+
for row in rows:
|
|
427
|
+
data = json.loads(row[1])
|
|
428
|
+
# Ensure run_id is always included in the returned dict
|
|
429
|
+
data["run_id"] = row[0]
|
|
430
|
+
runs.append(data)
|
|
431
|
+
return runs
|
|
414
432
|
|
|
415
433
|
def save_metric(self, run_id: str, name: str, value: float, step: int = 0) -> None:
|
|
416
434
|
"""Save a single metric value."""
|
|
@@ -484,3 +484,115 @@ async def get_run_logs(run_id: str):
|
|
|
484
484
|
logs = await anyio.to_thread.run_sync(read_all_logs)
|
|
485
485
|
|
|
486
486
|
return {"logs": logs}
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
@router.get("/{run_id}/training-history")
|
|
490
|
+
async def get_training_history(run_id: str):
|
|
491
|
+
"""Get training history (per-epoch metrics) for a run.
|
|
492
|
+
|
|
493
|
+
This combines:
|
|
494
|
+
1. Training history from model artifacts (saved by FlowymlKerasCallback)
|
|
495
|
+
2. Per-epoch metrics saved in the metrics table
|
|
496
|
+
|
|
497
|
+
Returns a consolidated training history suitable for visualization.
|
|
498
|
+
"""
|
|
499
|
+
store = _find_store_for_run(run_id)
|
|
500
|
+
|
|
501
|
+
# Get per-epoch metrics from the metrics table
|
|
502
|
+
metrics = store.get_metrics(run_id)
|
|
503
|
+
|
|
504
|
+
# Build training history from metrics table
|
|
505
|
+
# Group metrics by step (epoch) and name
|
|
506
|
+
epoch_metrics = {}
|
|
507
|
+
for m in metrics:
|
|
508
|
+
step = m.get("step", 0)
|
|
509
|
+
name = m.get("name", "unknown")
|
|
510
|
+
value = m.get("value", 0)
|
|
511
|
+
|
|
512
|
+
if step not in epoch_metrics:
|
|
513
|
+
epoch_metrics[step] = {}
|
|
514
|
+
epoch_metrics[step][name] = value
|
|
515
|
+
|
|
516
|
+
# Convert to chart-friendly format
|
|
517
|
+
training_history_from_metrics = {
|
|
518
|
+
"epochs": [],
|
|
519
|
+
"train_loss": [],
|
|
520
|
+
"val_loss": [],
|
|
521
|
+
"train_accuracy": [],
|
|
522
|
+
"val_accuracy": [],
|
|
523
|
+
"mae": [],
|
|
524
|
+
"val_mae": [],
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
# Standard metric name mappings
|
|
528
|
+
metric_mappings = {
|
|
529
|
+
"loss": "train_loss",
|
|
530
|
+
"val_loss": "val_loss",
|
|
531
|
+
"accuracy": "train_accuracy",
|
|
532
|
+
"acc": "train_accuracy",
|
|
533
|
+
"val_accuracy": "val_accuracy",
|
|
534
|
+
"val_acc": "val_accuracy",
|
|
535
|
+
"mae": "mae",
|
|
536
|
+
"val_mae": "val_mae",
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
# Track custom metrics
|
|
540
|
+
custom_metrics = set()
|
|
541
|
+
|
|
542
|
+
if epoch_metrics:
|
|
543
|
+
sorted_epochs = sorted(epoch_metrics.keys())
|
|
544
|
+
for epoch in sorted_epochs:
|
|
545
|
+
training_history_from_metrics["epochs"].append(epoch + 1) # 1-indexed for display
|
|
546
|
+
|
|
547
|
+
epoch_data = epoch_metrics[epoch]
|
|
548
|
+
for metric_name, value in epoch_data.items():
|
|
549
|
+
# Map to standard name or track as custom
|
|
550
|
+
standard_name = metric_mappings.get(metric_name)
|
|
551
|
+
if standard_name:
|
|
552
|
+
training_history_from_metrics[standard_name].append(value)
|
|
553
|
+
else:
|
|
554
|
+
# Custom metric
|
|
555
|
+
if metric_name not in custom_metrics:
|
|
556
|
+
custom_metrics.add(metric_name)
|
|
557
|
+
training_history_from_metrics[metric_name] = []
|
|
558
|
+
training_history_from_metrics[metric_name].append(value)
|
|
559
|
+
|
|
560
|
+
# Also try to get training history from model artifacts
|
|
561
|
+
artifacts = store.list_assets(run_id=run_id)
|
|
562
|
+
artifact_history = None
|
|
563
|
+
|
|
564
|
+
for artifact in artifacts:
|
|
565
|
+
# Check if artifact has training_history
|
|
566
|
+
if artifact.get("training_history"):
|
|
567
|
+
artifact_history = artifact.get("training_history")
|
|
568
|
+
break
|
|
569
|
+
# Also check in metadata/properties
|
|
570
|
+
metadata = artifact.get("metadata", {})
|
|
571
|
+
if isinstance(metadata, str):
|
|
572
|
+
try:
|
|
573
|
+
metadata = json.loads(metadata)
|
|
574
|
+
except Exception:
|
|
575
|
+
metadata = {}
|
|
576
|
+
if metadata.get("training_history"):
|
|
577
|
+
artifact_history = metadata.get("training_history")
|
|
578
|
+
break
|
|
579
|
+
|
|
580
|
+
# Prefer artifact history if it has more data, otherwise use metrics
|
|
581
|
+
if artifact_history and len(artifact_history.get("epochs", [])) > len(
|
|
582
|
+
training_history_from_metrics.get("epochs", []),
|
|
583
|
+
):
|
|
584
|
+
final_history = artifact_history
|
|
585
|
+
elif training_history_from_metrics.get("epochs"):
|
|
586
|
+
final_history = training_history_from_metrics
|
|
587
|
+
else:
|
|
588
|
+
final_history = artifact_history or {}
|
|
589
|
+
|
|
590
|
+
# Clean up empty arrays
|
|
591
|
+
cleaned_history = {k: v for k, v in final_history.items() if v and (not isinstance(v, list) or len(v) > 0)}
|
|
592
|
+
|
|
593
|
+
return {
|
|
594
|
+
"training_history": cleaned_history,
|
|
595
|
+
"has_history": len(cleaned_history.get("epochs", [])) > 0,
|
|
596
|
+
"total_epochs": len(cleaned_history.get("epochs", [])),
|
|
597
|
+
"source": "artifact" if artifact_history else "metrics",
|
|
598
|
+
}
|
|
@@ -25,21 +25,41 @@ class ScheduleRequest(BaseModel):
|
|
|
25
25
|
|
|
26
26
|
@router.get("/")
|
|
27
27
|
async def list_schedules():
|
|
28
|
-
"""List all active schedules.
|
|
29
|
-
|
|
30
|
-
schedules
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
28
|
+
"""List all active schedules.
|
|
29
|
+
|
|
30
|
+
This reads schedules from the shared database, so schedules created
|
|
31
|
+
by user code (e.g., in scripts) are visible in the UI.
|
|
32
|
+
"""
|
|
33
|
+
# First, get schedules from the in-memory scheduler
|
|
34
|
+
memory_schedules = []
|
|
35
|
+
for s in scheduler.list_schedules():
|
|
36
|
+
memory_schedules.append(
|
|
37
|
+
{
|
|
38
|
+
"pipeline_name": s.pipeline_name,
|
|
39
|
+
"schedule_type": s.schedule_type,
|
|
40
|
+
"schedule_value": s.schedule_value,
|
|
41
|
+
"enabled": s.enabled,
|
|
42
|
+
"last_run": s.last_run.isoformat() if s.last_run else None,
|
|
43
|
+
"next_run": s.next_run.isoformat() if s.next_run else None,
|
|
44
|
+
"timezone": s.timezone,
|
|
45
|
+
},
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
# Also read directly from the persistence database to get schedules
|
|
49
|
+
# created by other processes (e.g., user scripts)
|
|
50
|
+
db_schedules = []
|
|
51
|
+
if scheduler._persistence:
|
|
52
|
+
db_schedules = scheduler._persistence.list_all_schedules()
|
|
53
|
+
|
|
54
|
+
# Merge: prefer memory schedules (more up-to-date), but include db-only ones
|
|
55
|
+
memory_names = {s["pipeline_name"] for s in memory_schedules}
|
|
56
|
+
result = list(memory_schedules)
|
|
57
|
+
|
|
58
|
+
for db_sched in db_schedules:
|
|
59
|
+
if db_sched.get("pipeline_name") not in memory_names:
|
|
60
|
+
result.append(db_sched)
|
|
61
|
+
|
|
62
|
+
return result
|
|
43
63
|
|
|
44
64
|
|
|
45
65
|
@router.get("/health")
|