ml-dash 0.6.12__tar.gz → 0.6.14__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ml_dash-0.6.12 → ml_dash-0.6.14}/PKG-INFO +1 -1
- {ml_dash-0.6.12 → ml_dash-0.6.14}/pyproject.toml +1 -1
- {ml_dash-0.6.12 → ml_dash-0.6.14}/src/ml_dash/__init__.py +15 -13
- {ml_dash-0.6.12 → ml_dash-0.6.14}/src/ml_dash/buffer.py +28 -39
- {ml_dash-0.6.12 → ml_dash-0.6.14}/src/ml_dash/cli_commands/upload.py +3 -2
- {ml_dash-0.6.12 → ml_dash-0.6.14}/src/ml_dash/client.py +14 -4
- {ml_dash-0.6.12 → ml_dash-0.6.14}/src/ml_dash/experiment.py +37 -66
- {ml_dash-0.6.12 → ml_dash-0.6.14}/src/ml_dash/metric.py +10 -6
- {ml_dash-0.6.12 → ml_dash-0.6.14}/src/ml_dash/run.py +65 -0
- {ml_dash-0.6.12 → ml_dash-0.6.14}/LICENSE +0 -0
- {ml_dash-0.6.12 → ml_dash-0.6.14}/README.md +0 -0
- {ml_dash-0.6.12 → ml_dash-0.6.14}/src/ml_dash/auth/__init__.py +0 -0
- {ml_dash-0.6.12 → ml_dash-0.6.14}/src/ml_dash/auth/constants.py +0 -0
- {ml_dash-0.6.12 → ml_dash-0.6.14}/src/ml_dash/auth/device_flow.py +0 -0
- {ml_dash-0.6.12 → ml_dash-0.6.14}/src/ml_dash/auth/device_secret.py +0 -0
- {ml_dash-0.6.12 → ml_dash-0.6.14}/src/ml_dash/auth/exceptions.py +0 -0
- {ml_dash-0.6.12 → ml_dash-0.6.14}/src/ml_dash/auth/token_storage.py +0 -0
- {ml_dash-0.6.12 → ml_dash-0.6.14}/src/ml_dash/auto_start.py +0 -0
- {ml_dash-0.6.12 → ml_dash-0.6.14}/src/ml_dash/cli.py +0 -0
- {ml_dash-0.6.12 → ml_dash-0.6.14}/src/ml_dash/cli_commands/__init__.py +0 -0
- {ml_dash-0.6.12 → ml_dash-0.6.14}/src/ml_dash/cli_commands/api.py +0 -0
- {ml_dash-0.6.12 → ml_dash-0.6.14}/src/ml_dash/cli_commands/create.py +0 -0
- {ml_dash-0.6.12 → ml_dash-0.6.14}/src/ml_dash/cli_commands/download.py +0 -0
- {ml_dash-0.6.12 → ml_dash-0.6.14}/src/ml_dash/cli_commands/list.py +0 -0
- {ml_dash-0.6.12 → ml_dash-0.6.14}/src/ml_dash/cli_commands/login.py +0 -0
- {ml_dash-0.6.12 → ml_dash-0.6.14}/src/ml_dash/cli_commands/logout.py +0 -0
- {ml_dash-0.6.12 → ml_dash-0.6.14}/src/ml_dash/cli_commands/profile.py +0 -0
- {ml_dash-0.6.12 → ml_dash-0.6.14}/src/ml_dash/cli_commands/remove.py +0 -0
- {ml_dash-0.6.12 → ml_dash-0.6.14}/src/ml_dash/config.py +0 -0
- {ml_dash-0.6.12 → ml_dash-0.6.14}/src/ml_dash/files.py +0 -0
- {ml_dash-0.6.12 → ml_dash-0.6.14}/src/ml_dash/log.py +0 -0
- {ml_dash-0.6.12 → ml_dash-0.6.14}/src/ml_dash/params.py +0 -0
- {ml_dash-0.6.12 → ml_dash-0.6.14}/src/ml_dash/py.typed +0 -0
- {ml_dash-0.6.12 → ml_dash-0.6.14}/src/ml_dash/remote_auto_start.py +0 -0
- {ml_dash-0.6.12 → ml_dash-0.6.14}/src/ml_dash/snowflake.py +0 -0
- {ml_dash-0.6.12 → ml_dash-0.6.14}/src/ml_dash/storage.py +0 -0
- {ml_dash-0.6.12 → ml_dash-0.6.14}/src/ml_dash/track.py +0 -0
|
@@ -43,18 +43,19 @@ from .params import ParametersBuilder
|
|
|
43
43
|
from .run import RUN
|
|
44
44
|
from .storage import LocalStorage
|
|
45
45
|
|
|
46
|
-
__version__ = "0.6.
|
|
46
|
+
__version__ = "0.6.14"
|
|
47
47
|
|
|
48
|
-
#
|
|
49
|
-
|
|
48
|
+
# Required version - MUST match exactly (blocks all older versions)
|
|
49
|
+
# Update this with EVERY release to force users to upgrade
|
|
50
|
+
REQUIRED_VERSION = "0.6.14"
|
|
50
51
|
|
|
51
52
|
|
|
52
53
|
def _check_version_compatibility():
|
|
53
54
|
"""
|
|
54
|
-
Enforce
|
|
55
|
+
Enforce strict version requirement.
|
|
55
56
|
|
|
56
|
-
Raises ImportError if installed version
|
|
57
|
-
This ensures users
|
|
57
|
+
Raises ImportError if installed version doesn't match the required version.
|
|
58
|
+
This ensures all users are on the latest version with newest features and bug fixes.
|
|
58
59
|
"""
|
|
59
60
|
try:
|
|
60
61
|
from packaging import version
|
|
@@ -64,25 +65,26 @@ def _check_version_compatibility():
|
|
|
64
65
|
return
|
|
65
66
|
|
|
66
67
|
current = version.parse(__version__)
|
|
67
|
-
|
|
68
|
+
required = version.parse(REQUIRED_VERSION)
|
|
68
69
|
|
|
69
|
-
if current <
|
|
70
|
+
if current < required:
|
|
70
71
|
raise ImportError(
|
|
71
72
|
f"\n"
|
|
72
73
|
f"{'=' * 80}\n"
|
|
73
|
-
f"ERROR: ml-dash version {__version__} is
|
|
74
|
+
f"ERROR: ml-dash version {__version__} is outdated!\n"
|
|
74
75
|
f"{'=' * 80}\n"
|
|
75
76
|
f"\n"
|
|
76
|
-
f"
|
|
77
|
-
f"
|
|
77
|
+
f"Your installed version ({__version__}) is no longer supported.\n"
|
|
78
|
+
f"Required version: {REQUIRED_VERSION}\n"
|
|
78
79
|
f"\n"
|
|
79
80
|
f"Please upgrade to the latest version:\n"
|
|
80
81
|
f"\n"
|
|
81
82
|
f" pip install --upgrade ml-dash\n"
|
|
82
83
|
f"\n"
|
|
83
|
-
f"Or
|
|
84
|
+
f"Or with uv:\n"
|
|
84
85
|
f"\n"
|
|
85
|
-
f" pip install ml-dash
|
|
86
|
+
f" uv pip install --upgrade ml-dash\n"
|
|
87
|
+
f" uv sync --upgrade-package ml-dash\n"
|
|
86
88
|
f"\n"
|
|
87
89
|
f"{'=' * 80}\n"
|
|
88
90
|
)
|
|
@@ -458,12 +458,10 @@ class BackgroundBufferManager:
|
|
|
458
458
|
logs=batch,
|
|
459
459
|
)
|
|
460
460
|
except Exception as e:
|
|
461
|
-
|
|
462
|
-
f"Failed to flush {len(batch)} logs to remote server: {e}
|
|
463
|
-
f"
|
|
464
|
-
|
|
465
|
-
stacklevel=3,
|
|
466
|
-
)
|
|
461
|
+
raise RuntimeError(
|
|
462
|
+
f"Failed to flush {len(batch)} logs to remote server: {e}\n"
|
|
463
|
+
f"Data loss occurred. Check your network connection and server status."
|
|
464
|
+
) from e
|
|
467
465
|
|
|
468
466
|
if self._experiment.run._storage:
|
|
469
467
|
# Local storage writes one at a time (no batch API)
|
|
@@ -479,11 +477,10 @@ class BackgroundBufferManager:
|
|
|
479
477
|
timestamp=log_entry["timestamp"],
|
|
480
478
|
)
|
|
481
479
|
except Exception as e:
|
|
482
|
-
|
|
483
|
-
f"Failed to write log to local storage: {e}"
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
)
|
|
480
|
+
raise RuntimeError(
|
|
481
|
+
f"Failed to write log to local storage: {e}\n"
|
|
482
|
+
f"Check disk space and file permissions."
|
|
483
|
+
) from e
|
|
487
484
|
|
|
488
485
|
self._last_log_flush = time.time()
|
|
489
486
|
|
|
@@ -535,12 +532,10 @@ class BackgroundBufferManager:
|
|
|
535
532
|
)
|
|
536
533
|
except Exception as e:
|
|
537
534
|
metric_display = f"'{metric_name}'" if metric_name else "unnamed metric"
|
|
538
|
-
|
|
539
|
-
f"Failed to flush {len(batch)} points to {metric_display} on remote server: {e}
|
|
540
|
-
f"
|
|
541
|
-
|
|
542
|
-
stacklevel=3,
|
|
543
|
-
)
|
|
535
|
+
raise RuntimeError(
|
|
536
|
+
f"Failed to flush {len(batch)} points to {metric_display} on remote server: {e}\n"
|
|
537
|
+
f"Data loss occurred. Check your network connection and server status."
|
|
538
|
+
) from e
|
|
544
539
|
|
|
545
540
|
if self._experiment.run._storage:
|
|
546
541
|
try:
|
|
@@ -556,11 +551,10 @@ class BackgroundBufferManager:
|
|
|
556
551
|
)
|
|
557
552
|
except Exception as e:
|
|
558
553
|
metric_display = f"'{metric_name}'" if metric_name else "unnamed metric"
|
|
559
|
-
|
|
560
|
-
f"Failed to flush {len(batch)} points to {metric_display} in local storage: {e}"
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
)
|
|
554
|
+
raise RuntimeError(
|
|
555
|
+
f"Failed to flush {len(batch)} points to {metric_display} in local storage: {e}\n"
|
|
556
|
+
f"Check disk space and file permissions."
|
|
557
|
+
) from e
|
|
564
558
|
|
|
565
559
|
self._last_metric_flush[metric_name] = time.time()
|
|
566
560
|
|
|
@@ -597,12 +591,10 @@ class BackgroundBufferManager:
|
|
|
597
591
|
entries=batch,
|
|
598
592
|
)
|
|
599
593
|
except Exception as e:
|
|
600
|
-
|
|
601
|
-
f"Failed to flush {len(batch)} entries to track '{topic}' on remote server: {e}
|
|
602
|
-
f"
|
|
603
|
-
|
|
604
|
-
stacklevel=3,
|
|
605
|
-
)
|
|
594
|
+
raise RuntimeError(
|
|
595
|
+
f"Failed to flush {len(batch)} entries to track '{topic}' on remote server: {e}\n"
|
|
596
|
+
f"Data loss occurred. Check your network connection and server status."
|
|
597
|
+
) from e
|
|
606
598
|
|
|
607
599
|
# Write to local storage
|
|
608
600
|
if self._experiment.run._storage:
|
|
@@ -615,11 +607,10 @@ class BackgroundBufferManager:
|
|
|
615
607
|
entries=batch,
|
|
616
608
|
)
|
|
617
609
|
except Exception as e:
|
|
618
|
-
|
|
619
|
-
f"Failed to flush {len(batch)} entries to track '{topic}' in local storage: {e}"
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
)
|
|
610
|
+
raise RuntimeError(
|
|
611
|
+
f"Failed to flush {len(batch)} entries to track '{topic}' in local storage: {e}\n"
|
|
612
|
+
f"Check disk space and file permissions."
|
|
613
|
+
) from e
|
|
623
614
|
|
|
624
615
|
self._last_track_flush[topic] = time.time()
|
|
625
616
|
|
|
@@ -663,12 +654,10 @@ class BackgroundBufferManager:
|
|
|
663
654
|
if total_files > 1:
|
|
664
655
|
print(f"[ML-Dash] [{completed}/{total_files}] Uploaded {file_entry['filename']}", flush=True)
|
|
665
656
|
except Exception as e:
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
f"
|
|
669
|
-
|
|
670
|
-
stacklevel=3,
|
|
671
|
-
)
|
|
657
|
+
raise RuntimeError(
|
|
658
|
+
f"Failed to upload file {file_entry['filename']}: {e}\n"
|
|
659
|
+
f"File upload failed. Check network connection and file permissions."
|
|
660
|
+
) from e
|
|
672
661
|
|
|
673
662
|
def _upload_single_file(self, file_entry: Dict[str, Any]) -> None:
|
|
674
663
|
"""
|
|
@@ -306,7 +306,8 @@ def discover_experiments(
|
|
|
306
306
|
with open(exp_json, "r") as f:
|
|
307
307
|
metadata = json.load(f)
|
|
308
308
|
prefix = metadata.get("prefix")
|
|
309
|
-
except:
|
|
309
|
+
except (FileNotFoundError, json.JSONDecodeError, KeyError) as e:
|
|
310
|
+
# Metadata file missing or invalid - will use path-based prefix
|
|
310
311
|
pass
|
|
311
312
|
|
|
312
313
|
# Extract project and experiment names from PREFIX (not path)
|
|
@@ -1217,7 +1218,7 @@ def cmd_upload(args: argparse.Namespace) -> int:
|
|
|
1217
1218
|
Exit code (0 for success, 1 for error)
|
|
1218
1219
|
"""
|
|
1219
1220
|
# Handle track upload if --tracks is specified
|
|
1220
|
-
if args
|
|
1221
|
+
if getattr(args, 'tracks', False):
|
|
1221
1222
|
return cmd_upload_track(args)
|
|
1222
1223
|
|
|
1223
1224
|
# Load config
|
|
@@ -226,8 +226,13 @@ class RemoteClient:
|
|
|
226
226
|
result = self.graphql_query(query)
|
|
227
227
|
username = result.get("me", {}).get("username")
|
|
228
228
|
return username
|
|
229
|
-
except Exception:
|
|
230
|
-
|
|
229
|
+
except Exception as e:
|
|
230
|
+
# Re-raise authentication errors
|
|
231
|
+
from .auth.exceptions import AuthenticationError
|
|
232
|
+
if isinstance(e, AuthenticationError):
|
|
233
|
+
raise
|
|
234
|
+
# For other errors, raise a clear exception
|
|
235
|
+
raise RuntimeError(f"Failed to fetch namespace from server: {e}") from e
|
|
231
236
|
|
|
232
237
|
def get_current_user(self) -> Optional[Dict[str, Any]]:
|
|
233
238
|
"""
|
|
@@ -264,8 +269,13 @@ class RemoteClient:
|
|
|
264
269
|
"""
|
|
265
270
|
result = self.graphql_query(query)
|
|
266
271
|
return result.get("me")
|
|
267
|
-
except Exception:
|
|
268
|
-
|
|
272
|
+
except Exception as e:
|
|
273
|
+
# Re-raise authentication errors
|
|
274
|
+
from .auth.exceptions import AuthenticationError
|
|
275
|
+
if isinstance(e, AuthenticationError):
|
|
276
|
+
raise
|
|
277
|
+
# For other errors, raise a clear exception
|
|
278
|
+
raise RuntimeError(f"Failed to fetch current user from server: {e}") from e
|
|
269
279
|
|
|
270
280
|
def _ensure_authenticated(self):
|
|
271
281
|
"""Check if authenticated, raise error if not."""
|
|
@@ -399,8 +399,11 @@ class Experiment:
|
|
|
399
399
|
print(f"View results at: {experiment_url}")
|
|
400
400
|
|
|
401
401
|
except Exception as e:
|
|
402
|
-
#
|
|
403
|
-
|
|
402
|
+
# Raise on status update failure
|
|
403
|
+
raise RuntimeError(
|
|
404
|
+
f"Failed to update experiment status to COMPLETED: {e}\n"
|
|
405
|
+
f"Experiment may not be marked as completed on the server."
|
|
406
|
+
) from e
|
|
404
407
|
|
|
405
408
|
self._is_open = False
|
|
406
409
|
|
|
@@ -554,15 +557,10 @@ class Experiment:
|
|
|
554
557
|
logs=[log_entry], # Single log in array
|
|
555
558
|
)
|
|
556
559
|
except Exception as e:
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
f"Failed to write log to remote server: {e}. Training will continue.",
|
|
562
|
-
RuntimeWarning,
|
|
563
|
-
stacklevel=4,
|
|
564
|
-
)
|
|
565
|
-
# Fall through to local storage if available
|
|
560
|
+
raise RuntimeError(
|
|
561
|
+
f"Failed to write log to remote server: {e}\n"
|
|
562
|
+
f"Data loss occurred. Check your network connection and server status."
|
|
563
|
+
) from e
|
|
566
564
|
|
|
567
565
|
if self.run._storage:
|
|
568
566
|
# Local mode: write to file immediately
|
|
@@ -577,11 +575,10 @@ class Experiment:
|
|
|
577
575
|
timestamp=log_entry["timestamp"],
|
|
578
576
|
)
|
|
579
577
|
except Exception as e:
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
)
|
|
578
|
+
raise RuntimeError(
|
|
579
|
+
f"Failed to write log to local storage: {e}\n"
|
|
580
|
+
f"Check disk space and file permissions."
|
|
581
|
+
) from e
|
|
585
582
|
|
|
586
583
|
def _print_log(
|
|
587
584
|
self, message: str, level: str, metadata: Optional[Dict[str, Any]]
|
|
@@ -1072,17 +1069,11 @@ class Experiment:
|
|
|
1072
1069
|
metadata=metadata,
|
|
1073
1070
|
)
|
|
1074
1071
|
except Exception as e:
|
|
1075
|
-
# Log warning but don't crash training
|
|
1076
|
-
import warnings
|
|
1077
|
-
|
|
1078
1072
|
metric_display = f"'{name}'" if name else "unnamed metric"
|
|
1079
|
-
|
|
1080
|
-
f"Failed to log {metric_display} to remote server: {e}
|
|
1081
|
-
f"
|
|
1082
|
-
|
|
1083
|
-
stacklevel=3,
|
|
1084
|
-
)
|
|
1085
|
-
# Fall through to local storage if available
|
|
1073
|
+
raise RuntimeError(
|
|
1074
|
+
f"Failed to log {metric_display} to remote server: {e}\n"
|
|
1075
|
+
f"Data loss occurred. Check your network connection and server status."
|
|
1076
|
+
) from e
|
|
1086
1077
|
|
|
1087
1078
|
if self.run._storage:
|
|
1088
1079
|
# Local mode: append to local storage
|
|
@@ -1098,14 +1089,11 @@ class Experiment:
|
|
|
1098
1089
|
metadata=metadata,
|
|
1099
1090
|
)
|
|
1100
1091
|
except Exception as e:
|
|
1101
|
-
import warnings
|
|
1102
|
-
|
|
1103
1092
|
metric_display = f"'{name}'" if name else "unnamed metric"
|
|
1104
|
-
|
|
1105
|
-
f"Failed to log {metric_display} to local storage: {e}"
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
)
|
|
1093
|
+
raise RuntimeError(
|
|
1094
|
+
f"Failed to log {metric_display} to local storage: {e}\n"
|
|
1095
|
+
f"Check disk space and file permissions."
|
|
1096
|
+
) from e
|
|
1109
1097
|
|
|
1110
1098
|
return result
|
|
1111
1099
|
|
|
@@ -1141,15 +1129,10 @@ class Experiment:
|
|
|
1141
1129
|
entries=[{"timestamp": timestamp, **data}],
|
|
1142
1130
|
)
|
|
1143
1131
|
except Exception as e:
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
f"Failed to log track '{topic}' to remote server: {e}. "
|
|
1149
|
-
f"Training will continue.",
|
|
1150
|
-
RuntimeWarning,
|
|
1151
|
-
stacklevel=3,
|
|
1152
|
-
)
|
|
1132
|
+
raise RuntimeError(
|
|
1133
|
+
f"Failed to log track '{topic}' to remote server: {e}\n"
|
|
1134
|
+
f"Data loss occurred. Check your network connection and server status."
|
|
1135
|
+
) from e
|
|
1153
1136
|
|
|
1154
1137
|
if self.run._storage:
|
|
1155
1138
|
# Local mode: append to local storage
|
|
@@ -1162,13 +1145,10 @@ class Experiment:
|
|
|
1162
1145
|
entries=[{"timestamp": timestamp, **data}],
|
|
1163
1146
|
)
|
|
1164
1147
|
except Exception as e:
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
RuntimeWarning,
|
|
1170
|
-
stacklevel=3,
|
|
1171
|
-
)
|
|
1148
|
+
raise RuntimeError(
|
|
1149
|
+
f"Failed to log track '{topic}' to local storage: {e}\n"
|
|
1150
|
+
f"Check disk space and file permissions."
|
|
1151
|
+
) from e
|
|
1172
1152
|
|
|
1173
1153
|
def _append_batch_to_metric(
|
|
1174
1154
|
self,
|
|
@@ -1205,17 +1185,11 @@ class Experiment:
|
|
|
1205
1185
|
metadata=metadata,
|
|
1206
1186
|
)
|
|
1207
1187
|
except Exception as e:
|
|
1208
|
-
# Log warning but don't crash training
|
|
1209
|
-
import warnings
|
|
1210
|
-
|
|
1211
1188
|
metric_display = f"'{name}'" if name else "unnamed metric"
|
|
1212
|
-
|
|
1213
|
-
f"Failed to log batch to {metric_display} on remote server: {e}
|
|
1214
|
-
f"
|
|
1215
|
-
|
|
1216
|
-
stacklevel=3,
|
|
1217
|
-
)
|
|
1218
|
-
# Fall through to local storage if available
|
|
1189
|
+
raise RuntimeError(
|
|
1190
|
+
f"Failed to log batch to {metric_display} on remote server: {e}\n"
|
|
1191
|
+
f"Data loss occurred. Check your network connection and server status."
|
|
1192
|
+
) from e
|
|
1219
1193
|
|
|
1220
1194
|
if self.run._storage:
|
|
1221
1195
|
# Local mode: append batch to local storage
|
|
@@ -1231,14 +1205,11 @@ class Experiment:
|
|
|
1231
1205
|
metadata=metadata,
|
|
1232
1206
|
)
|
|
1233
1207
|
except Exception as e:
|
|
1234
|
-
import warnings
|
|
1235
|
-
|
|
1236
1208
|
metric_display = f"'{name}'" if name else "unnamed metric"
|
|
1237
|
-
|
|
1238
|
-
f"Failed to log batch to {metric_display} in local storage: {e}"
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
)
|
|
1209
|
+
raise RuntimeError(
|
|
1210
|
+
f"Failed to log batch to {metric_display} in local storage: {e}\n"
|
|
1211
|
+
f"Check disk space and file permissions."
|
|
1212
|
+
) from e
|
|
1242
1213
|
|
|
1243
1214
|
return result
|
|
1244
1215
|
|
|
@@ -69,9 +69,11 @@ class BufferManager:
|
|
|
69
69
|
value = float('nan')
|
|
70
70
|
try:
|
|
71
71
|
self._buffers[prefix][key].append(float(value))
|
|
72
|
-
except (TypeError, ValueError):
|
|
73
|
-
|
|
74
|
-
|
|
72
|
+
except (TypeError, ValueError) as e:
|
|
73
|
+
raise ValueError(
|
|
74
|
+
f"Cannot buffer non-numeric value for '{key}': {value!r} (type: {type(value).__name__})\n"
|
|
75
|
+
f"Metrics must be numeric (int, float). Use exp.log() for non-numeric values."
|
|
76
|
+
) from e
|
|
75
77
|
|
|
76
78
|
def _compute_stats(self, values: List[float], aggs: tuple) -> Dict[str, float]:
|
|
77
79
|
"""
|
|
@@ -248,9 +250,11 @@ class SummaryCache:
|
|
|
248
250
|
value = float('nan')
|
|
249
251
|
try:
|
|
250
252
|
self._buffer[key].append(float(value))
|
|
251
|
-
except (TypeError, ValueError):
|
|
252
|
-
|
|
253
|
-
|
|
253
|
+
except (TypeError, ValueError) as e:
|
|
254
|
+
raise ValueError(
|
|
255
|
+
f"Cannot store non-numeric value for '{key}': {value!r} (type: {type(value).__name__})\n"
|
|
256
|
+
f"SummaryCache only accepts numeric values. Use exp.log() for non-numeric data."
|
|
257
|
+
) from e
|
|
254
258
|
|
|
255
259
|
def set(self, **kwargs) -> None:
|
|
256
260
|
"""
|
|
@@ -158,6 +158,16 @@ class RUN:
|
|
|
158
158
|
now = datetime.now()
|
|
159
159
|
"""Timestamp at import time. Does not change during the session."""
|
|
160
160
|
|
|
161
|
+
@property
|
|
162
|
+
def date(self) -> str:
|
|
163
|
+
"""Date string in YYYYMMDD format."""
|
|
164
|
+
return self.now.strftime("%Y%m%d")
|
|
165
|
+
|
|
166
|
+
@property
|
|
167
|
+
def datetime_str(self) -> str:
|
|
168
|
+
"""DateTime string in YYYYMMDD.HHMMSS format."""
|
|
169
|
+
return self.now.strftime("%Y%m%d.%H%M%S")
|
|
170
|
+
|
|
161
171
|
timestamp: str = None
|
|
162
172
|
"""Timestamp created at instantiation"""
|
|
163
173
|
|
|
@@ -277,6 +287,61 @@ class RUN:
|
|
|
277
287
|
# self.name is the last segment
|
|
278
288
|
self.name = parts[-1] if len(parts) > 2 else parts[1]
|
|
279
289
|
|
|
290
|
+
def __setattr__(self, name: str, value):
|
|
291
|
+
"""
|
|
292
|
+
Intercept attribute setting to expand {EXP.attr} templates in prefix.
|
|
293
|
+
|
|
294
|
+
When prefix is set, expands any {EXP.name}, {EXP.id}, {EXP.date}, etc. templates
|
|
295
|
+
using current instance's attributes. Also syncs back to class-level RUN attributes.
|
|
296
|
+
"""
|
|
297
|
+
# Prevent prefix changes after experiment has started
|
|
298
|
+
if name == "prefix" and isinstance(value, str):
|
|
299
|
+
experiment = getattr(self, "_experiment", None)
|
|
300
|
+
if experiment is not None and getattr(experiment, "_is_open", False):
|
|
301
|
+
raise RuntimeError(
|
|
302
|
+
"Cannot change prefix after experiment has been initialized. "
|
|
303
|
+
"Set prefix before calling experiment.run.start() or entering the context manager."
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
# Expand templates if setting prefix
|
|
307
|
+
if name == "prefix" and isinstance(value, str):
|
|
308
|
+
# Check if value contains {EXP. templates
|
|
309
|
+
if "{EXP." in value:
|
|
310
|
+
import re
|
|
311
|
+
|
|
312
|
+
def replace_match(match):
|
|
313
|
+
attr_name = match.group(1)
|
|
314
|
+
# Special handling for id - generate if needed
|
|
315
|
+
if attr_name == "id" and not getattr(self, "id", None):
|
|
316
|
+
from ml_dash.snowflake import generate_id
|
|
317
|
+
object.__setattr__(self, "id", generate_id())
|
|
318
|
+
|
|
319
|
+
# Get attribute, raising error if not found
|
|
320
|
+
try:
|
|
321
|
+
attr_value = getattr(self, attr_name)
|
|
322
|
+
if attr_value is None:
|
|
323
|
+
raise AttributeError(f"Attribute '{attr_name}' is None")
|
|
324
|
+
return str(attr_value)
|
|
325
|
+
except AttributeError:
|
|
326
|
+
raise AttributeError(f"RUN has no attribute '{attr_name}'")
|
|
327
|
+
|
|
328
|
+
# Match {EXP.attr_name} pattern
|
|
329
|
+
pattern = r"\{EXP\.(\w+)\}"
|
|
330
|
+
value = re.sub(pattern, replace_match, value)
|
|
331
|
+
|
|
332
|
+
# Always update _folder_path when prefix changes
|
|
333
|
+
object.__setattr__(self, "_folder_path", value)
|
|
334
|
+
|
|
335
|
+
# Parse and update owner, project, name from new prefix
|
|
336
|
+
parts = value.strip("/").split("/")
|
|
337
|
+
if len(parts) >= 2:
|
|
338
|
+
object.__setattr__(self, "owner", parts[0])
|
|
339
|
+
object.__setattr__(self, "project", parts[1])
|
|
340
|
+
object.__setattr__(self, "name", parts[-1] if len(parts) > 2 else parts[1])
|
|
341
|
+
|
|
342
|
+
# Use object.__setattr__ to set the value
|
|
343
|
+
object.__setattr__(self, name, value)
|
|
344
|
+
|
|
280
345
|
def start(self) -> "Experiment":
|
|
281
346
|
"""
|
|
282
347
|
Start the experiment (sets status to RUNNING).
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|