ml-dash 0.6.1__py3-none-any.whl → 0.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ml_dash/__init__.py +37 -63
- ml_dash/auth/token_storage.py +267 -226
- ml_dash/auto_start.py +28 -15
- ml_dash/cli.py +16 -2
- ml_dash/cli_commands/api.py +165 -0
- ml_dash/cli_commands/download.py +757 -667
- ml_dash/cli_commands/list.py +146 -13
- ml_dash/cli_commands/login.py +190 -183
- ml_dash/cli_commands/profile.py +92 -0
- ml_dash/cli_commands/upload.py +1291 -1141
- ml_dash/client.py +79 -6
- ml_dash/config.py +119 -119
- ml_dash/experiment.py +1242 -995
- ml_dash/files.py +1051 -340
- ml_dash/log.py +7 -7
- ml_dash/metric.py +359 -100
- ml_dash/params.py +6 -6
- ml_dash/remote_auto_start.py +20 -17
- ml_dash/run.py +231 -0
- ml_dash/snowflake.py +173 -0
- ml_dash/storage.py +1051 -1079
- {ml_dash-0.6.1.dist-info → ml_dash-0.6.2.dist-info}/METADATA +45 -20
- ml_dash-0.6.2.dist-info/RECORD +33 -0
- ml_dash-0.6.1.dist-info/RECORD +0 -29
- {ml_dash-0.6.1.dist-info → ml_dash-0.6.2.dist-info}/WHEEL +0 -0
- {ml_dash-0.6.1.dist-info → ml_dash-0.6.2.dist-info}/entry_points.txt +0 -0
ml_dash/log.py
CHANGED
|
@@ -69,8 +69,8 @@ class LogBuilder:
|
|
|
69
69
|
the log level method is called to write the log.
|
|
70
70
|
|
|
71
71
|
Example:
|
|
72
|
-
|
|
73
|
-
|
|
72
|
+
exp.logs.info("Training started", epoch=1)
|
|
73
|
+
exp.logs.error("Failed", error_code=500)
|
|
74
74
|
"""
|
|
75
75
|
|
|
76
76
|
def __init__(self, experiment: 'Experiment', metadata: Optional[Dict[str, Any]] = None):
|
|
@@ -93,8 +93,8 @@ class LogBuilder:
|
|
|
93
93
|
**extra_metadata: Additional metadata as keyword arguments
|
|
94
94
|
|
|
95
95
|
Example:
|
|
96
|
-
|
|
97
|
-
|
|
96
|
+
exp.log("Training started", level="info")
|
|
97
|
+
exp.log("Epoch complete", level="info", epoch=1, loss=0.5)
|
|
98
98
|
"""
|
|
99
99
|
self._write(LogLevel.INFO.value, message, extra_metadata)
|
|
100
100
|
|
|
@@ -107,7 +107,7 @@ class LogBuilder:
|
|
|
107
107
|
**extra_metadata: Additional metadata as keyword arguments
|
|
108
108
|
|
|
109
109
|
Example:
|
|
110
|
-
|
|
110
|
+
exp.logs.warn("High loss detected", loss=1.5)
|
|
111
111
|
"""
|
|
112
112
|
self._write(LogLevel.WARN.value, message, extra_metadata)
|
|
113
113
|
|
|
@@ -120,7 +120,7 @@ class LogBuilder:
|
|
|
120
120
|
**extra_metadata: Additional metadata as keyword arguments
|
|
121
121
|
|
|
122
122
|
Example:
|
|
123
|
-
|
|
123
|
+
exp.logs.error("Failed to save", path="/models/checkpoint.pth")
|
|
124
124
|
"""
|
|
125
125
|
self._write(LogLevel.ERROR.value, message, extra_metadata)
|
|
126
126
|
|
|
@@ -133,7 +133,7 @@ class LogBuilder:
|
|
|
133
133
|
**extra_metadata: Additional metadata as keyword arguments
|
|
134
134
|
|
|
135
135
|
Example:
|
|
136
|
-
|
|
136
|
+
exp.logs.debug("Memory usage", memory_mb=2500)
|
|
137
137
|
"""
|
|
138
138
|
self._write(LogLevel.DEBUG.value, message, extra_metadata)
|
|
139
139
|
|
ml_dash/metric.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Metric API - Time-series data
|
|
2
|
+
Metric API - Time-series data logging for ML experiments.
|
|
3
3
|
|
|
4
4
|
Metrics are used for storing continuous data series like training metrics,
|
|
5
5
|
validation losses, system measurements, etc.
|
|
@@ -13,6 +13,203 @@ if TYPE_CHECKING:
|
|
|
13
13
|
from .experiment import Experiment
|
|
14
14
|
|
|
15
15
|
|
|
16
|
+
class BufferManager:
|
|
17
|
+
"""
|
|
18
|
+
Global buffer manager for collecting metric values across prefixes.
|
|
19
|
+
|
|
20
|
+
Accumulates values via metrics("prefix").buffer(...) and computes
|
|
21
|
+
statistics when log_summary() is called.
|
|
22
|
+
|
|
23
|
+
Usage:
|
|
24
|
+
# Accumulate with prefix
|
|
25
|
+
metrics("train").buffer(loss=0.5, accuracy=0.81)
|
|
26
|
+
metrics("val").buffer(loss=0.6, accuracy=0.78)
|
|
27
|
+
|
|
28
|
+
# Log summaries (all buffered prefixes)
|
|
29
|
+
metrics.buffer.log_summary() # default: "mean"
|
|
30
|
+
metrics.buffer.log_summary("mean", "std", "p95")
|
|
31
|
+
|
|
32
|
+
# Log non-buffered values directly
|
|
33
|
+
metrics.log(epoch=epoch, lr=lr)
|
|
34
|
+
|
|
35
|
+
# Final flush to storage
|
|
36
|
+
metrics.flush()
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
# Supported aggregation functions
|
|
40
|
+
SUPPORTED_AGGS = {
|
|
41
|
+
"mean", "std", "min", "max", "count",
|
|
42
|
+
"median", "sum",
|
|
43
|
+
"p50", "p90", "p95", "p99",
|
|
44
|
+
"last", "first"
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
def __init__(self, metrics_manager: 'MetricsManager'):
|
|
48
|
+
"""
|
|
49
|
+
Initialize BufferManager.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
metrics_manager: Parent MetricsManager instance
|
|
53
|
+
"""
|
|
54
|
+
self._metrics_manager = metrics_manager
|
|
55
|
+
# Buffers per prefix: {prefix: {key: [values]}}
|
|
56
|
+
self._buffers: Dict[Optional[str], Dict[str, List[float]]] = defaultdict(lambda: defaultdict(list))
|
|
57
|
+
|
|
58
|
+
def _store(self, prefix: Optional[str], **kwargs) -> None:
|
|
59
|
+
"""
|
|
60
|
+
Store values in buffer for a specific prefix.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
prefix: Metric prefix (e.g., "train", "val")
|
|
64
|
+
**kwargs: Metric values to buffer (e.g., loss=0.5, accuracy=0.9)
|
|
65
|
+
"""
|
|
66
|
+
for key, value in kwargs.items():
|
|
67
|
+
# Handle None values gracefully
|
|
68
|
+
if value is None:
|
|
69
|
+
value = float('nan')
|
|
70
|
+
try:
|
|
71
|
+
self._buffers[prefix][key].append(float(value))
|
|
72
|
+
except (TypeError, ValueError):
|
|
73
|
+
# Skip non-numeric values silently
|
|
74
|
+
continue
|
|
75
|
+
|
|
76
|
+
def _compute_stats(self, values: List[float], aggs: tuple) -> Dict[str, float]:
|
|
77
|
+
"""
|
|
78
|
+
Compute statistics for a list of values.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
values: List of numeric values
|
|
82
|
+
aggs: Tuple of aggregation names
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
Dict with computed statistics
|
|
86
|
+
"""
|
|
87
|
+
# Filter out NaN values
|
|
88
|
+
clean_values = [v for v in values if not (isinstance(v, float) and v != v)]
|
|
89
|
+
|
|
90
|
+
if not clean_values:
|
|
91
|
+
return {}
|
|
92
|
+
|
|
93
|
+
stats = {}
|
|
94
|
+
for agg in aggs:
|
|
95
|
+
if agg == "mean":
|
|
96
|
+
stats["mean"] = statistics.mean(clean_values)
|
|
97
|
+
elif agg == "std":
|
|
98
|
+
if len(clean_values) >= 2:
|
|
99
|
+
stats["std"] = statistics.stdev(clean_values)
|
|
100
|
+
else:
|
|
101
|
+
stats["std"] = 0.0
|
|
102
|
+
elif agg == "min":
|
|
103
|
+
stats["min"] = min(clean_values)
|
|
104
|
+
elif agg == "max":
|
|
105
|
+
stats["max"] = max(clean_values)
|
|
106
|
+
elif agg == "count":
|
|
107
|
+
stats["count"] = len(clean_values)
|
|
108
|
+
elif agg == "median" or agg == "p50":
|
|
109
|
+
stats[agg] = statistics.median(clean_values)
|
|
110
|
+
elif agg == "sum":
|
|
111
|
+
stats["sum"] = sum(clean_values)
|
|
112
|
+
elif agg == "p90":
|
|
113
|
+
stats["p90"] = self._percentile(clean_values, 90)
|
|
114
|
+
elif agg == "p95":
|
|
115
|
+
stats["p95"] = self._percentile(clean_values, 95)
|
|
116
|
+
elif agg == "p99":
|
|
117
|
+
stats["p99"] = self._percentile(clean_values, 99)
|
|
118
|
+
elif agg == "last":
|
|
119
|
+
stats["last"] = clean_values[-1]
|
|
120
|
+
elif agg == "first":
|
|
121
|
+
stats["first"] = clean_values[0]
|
|
122
|
+
|
|
123
|
+
return stats
|
|
124
|
+
|
|
125
|
+
def _percentile(self, values: List[float], p: int) -> float:
|
|
126
|
+
"""Compute percentile of values."""
|
|
127
|
+
sorted_vals = sorted(values)
|
|
128
|
+
k = (len(sorted_vals) - 1) * p / 100
|
|
129
|
+
f = int(k)
|
|
130
|
+
c = f + 1 if f + 1 < len(sorted_vals) else f
|
|
131
|
+
return sorted_vals[f] + (k - f) * (sorted_vals[c] - sorted_vals[f])
|
|
132
|
+
|
|
133
|
+
def log_summary(self, *aggs: str) -> None:
|
|
134
|
+
"""
|
|
135
|
+
Compute statistics from buffered values and log them.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
*aggs: Aggregation functions to compute. Defaults to ("mean",).
|
|
139
|
+
Supported: "mean", "std", "min", "max", "count",
|
|
140
|
+
"median", "sum", "p50", "p90", "p95", "p99",
|
|
141
|
+
"last", "first"
|
|
142
|
+
|
|
143
|
+
Example:
|
|
144
|
+
metrics.buffer.log_summary() # default: mean
|
|
145
|
+
metrics.buffer.log_summary("mean", "std") # mean and std
|
|
146
|
+
metrics.buffer.log_summary("mean", "p95") # mean and 95th percentile
|
|
147
|
+
"""
|
|
148
|
+
# Default to mean
|
|
149
|
+
if not aggs:
|
|
150
|
+
aggs = ("mean",)
|
|
151
|
+
|
|
152
|
+
# Validate aggregations
|
|
153
|
+
for agg in aggs:
|
|
154
|
+
if agg not in self.SUPPORTED_AGGS:
|
|
155
|
+
raise ValueError(f"Unsupported aggregation: {agg}. Supported: {self.SUPPORTED_AGGS}")
|
|
156
|
+
|
|
157
|
+
# Process each prefix's buffer
|
|
158
|
+
for prefix, buffer in list(self._buffers.items()):
|
|
159
|
+
if not buffer:
|
|
160
|
+
continue
|
|
161
|
+
|
|
162
|
+
output_data = {}
|
|
163
|
+
|
|
164
|
+
for key, values in buffer.items():
|
|
165
|
+
if not values:
|
|
166
|
+
continue
|
|
167
|
+
|
|
168
|
+
stats = self._compute_stats(values, aggs)
|
|
169
|
+
|
|
170
|
+
# Add stats with hierarchical naming (key.agg)
|
|
171
|
+
for stat_name, stat_value in stats.items():
|
|
172
|
+
output_data[f"{key}.{stat_name}"] = stat_value
|
|
173
|
+
|
|
174
|
+
if output_data:
|
|
175
|
+
# Log to the appropriate metric
|
|
176
|
+
self._metrics_manager(prefix).log(**output_data)
|
|
177
|
+
|
|
178
|
+
# Clear all buffers
|
|
179
|
+
self._buffers.clear()
|
|
180
|
+
|
|
181
|
+
def peek(self, prefix: Optional[str] = None, *keys: str, limit: int = 5) -> Dict[str, List[float]]:
|
|
182
|
+
"""
|
|
183
|
+
Non-destructive inspection of buffered values.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
prefix: Specific prefix to peek at (None for all)
|
|
187
|
+
*keys: Optional specific keys to peek at. If empty, shows all.
|
|
188
|
+
limit: Number of most recent values to show (default 5)
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
Dict of buffered values (truncated to last `limit` items)
|
|
192
|
+
"""
|
|
193
|
+
if prefix is not None:
|
|
194
|
+
buffer = self._buffers.get(prefix, {})
|
|
195
|
+
keys_to_show = keys if keys else buffer.keys()
|
|
196
|
+
return {
|
|
197
|
+
k: buffer[k][-limit:] if limit else buffer[k]
|
|
198
|
+
for k in keys_to_show
|
|
199
|
+
if k in buffer and buffer[k]
|
|
200
|
+
}
|
|
201
|
+
else:
|
|
202
|
+
# Return all buffers
|
|
203
|
+
result = {}
|
|
204
|
+
for p, buffer in self._buffers.items():
|
|
205
|
+
prefix_str = p if p else "(default)"
|
|
206
|
+
keys_to_show = keys if keys else buffer.keys()
|
|
207
|
+
for k in keys_to_show:
|
|
208
|
+
if k in buffer and buffer[k]:
|
|
209
|
+
result[f"{prefix_str}/{k}"] = buffer[k][-limit:] if limit else buffer[k]
|
|
210
|
+
return result
|
|
211
|
+
|
|
212
|
+
|
|
16
213
|
class SummaryCache:
|
|
17
214
|
"""
|
|
18
215
|
Buffer for collecting metric values and computing statistics periodically.
|
|
@@ -135,8 +332,8 @@ class SummaryCache:
|
|
|
135
332
|
if not output_data:
|
|
136
333
|
return
|
|
137
334
|
|
|
138
|
-
#
|
|
139
|
-
self._metric_builder.
|
|
335
|
+
# Log combined data as a single metric data point
|
|
336
|
+
self._metric_builder.log(**output_data)
|
|
140
337
|
|
|
141
338
|
# Clear buffer if requested (default behavior for "tiled" mode)
|
|
142
339
|
if clear:
|
|
@@ -169,20 +366,23 @@ class MetricsManager:
|
|
|
169
366
|
"""
|
|
170
367
|
Manager for metric operations that supports both named and unnamed usage.
|
|
171
368
|
|
|
172
|
-
Supports
|
|
173
|
-
1. Named via call: experiment.metrics("
|
|
174
|
-
2.
|
|
175
|
-
3. Unnamed: experiment.metrics.append(value=0.5, step=1) # name=None
|
|
369
|
+
Supports two usage patterns:
|
|
370
|
+
1. Named via call: experiment.metrics("train").log(loss=0.5, accuracy=0.9)
|
|
371
|
+
2. Unnamed: experiment.metrics.log(epoch=1).flush()
|
|
176
372
|
|
|
177
373
|
Usage:
|
|
178
374
|
# With explicit metric name (via call)
|
|
179
|
-
experiment.metrics("
|
|
375
|
+
experiment.metrics("train").log(loss=0.5, accuracy=0.9)
|
|
180
376
|
|
|
181
|
-
# With
|
|
182
|
-
experiment.metrics.
|
|
377
|
+
# With epoch context (unnamed metric)
|
|
378
|
+
experiment.metrics.log(epoch=epoch).flush()
|
|
183
379
|
|
|
184
|
-
#
|
|
185
|
-
experiment.metrics.
|
|
380
|
+
# Nested dict pattern (single call for all metrics)
|
|
381
|
+
experiment.metrics.log(
|
|
382
|
+
epoch=100,
|
|
383
|
+
train=dict(loss=0.142, accuracy=0.80),
|
|
384
|
+
eval=dict(loss=0.201, accuracy=0.76)
|
|
385
|
+
)
|
|
186
386
|
"""
|
|
187
387
|
|
|
188
388
|
def __init__(self, experiment: 'Experiment'):
|
|
@@ -194,6 +394,31 @@ class MetricsManager:
|
|
|
194
394
|
"""
|
|
195
395
|
self._experiment = experiment
|
|
196
396
|
self._metric_builders: Dict[str, 'MetricBuilder'] = {} # Cache for MetricBuilder instances
|
|
397
|
+
self._buffer_manager: Optional[BufferManager] = None # Lazy initialization
|
|
398
|
+
|
|
399
|
+
@property
|
|
400
|
+
def buffer(self) -> BufferManager:
|
|
401
|
+
"""
|
|
402
|
+
Get the global BufferManager for buffered metric operations.
|
|
403
|
+
|
|
404
|
+
The buffer manager collects values across prefixes and computes
|
|
405
|
+
statistics when log_summary() is called.
|
|
406
|
+
|
|
407
|
+
Returns:
|
|
408
|
+
BufferManager instance
|
|
409
|
+
|
|
410
|
+
Example:
|
|
411
|
+
# Accumulate values
|
|
412
|
+
metrics("train").buffer(loss=0.5, accuracy=0.81)
|
|
413
|
+
metrics("val").buffer(loss=0.6, accuracy=0.78)
|
|
414
|
+
|
|
415
|
+
# Log summaries
|
|
416
|
+
metrics.buffer.log_summary() # default: mean
|
|
417
|
+
metrics.buffer.log_summary("mean", "std", "p95")
|
|
418
|
+
"""
|
|
419
|
+
if self._buffer_manager is None:
|
|
420
|
+
self._buffer_manager = BufferManager(self)
|
|
421
|
+
return self._buffer_manager
|
|
197
422
|
|
|
198
423
|
def __call__(self, name: str, description: Optional[str] = None,
|
|
199
424
|
tags: Optional[List[str]] = None, metadata: Optional[Dict[str, Any]] = None) -> 'MetricBuilder':
|
|
@@ -210,7 +435,7 @@ class MetricsManager:
|
|
|
210
435
|
MetricBuilder instance for the named metric (same instance on repeated calls)
|
|
211
436
|
|
|
212
437
|
Examples:
|
|
213
|
-
experiment.metrics("
|
|
438
|
+
experiment.metrics("train").log(loss=0.5, accuracy=0.9)
|
|
214
439
|
|
|
215
440
|
Note:
|
|
216
441
|
MetricBuilder instances are cached by name, so repeated calls with the
|
|
@@ -219,92 +444,110 @@ class MetricsManager:
|
|
|
219
444
|
"""
|
|
220
445
|
# Cache key includes name only (description/tags/metadata are set once on first call)
|
|
221
446
|
if name not in self._metric_builders:
|
|
222
|
-
self._metric_builders[name] = MetricBuilder(
|
|
447
|
+
self._metric_builders[name] = MetricBuilder(
|
|
448
|
+
self._experiment, name, description, tags, metadata,
|
|
449
|
+
metrics_manager=self
|
|
450
|
+
)
|
|
223
451
|
return self._metric_builders[name]
|
|
224
452
|
|
|
225
|
-
def
|
|
453
|
+
def log(self, _flush: bool = False, **kwargs) -> 'MetricsManager':
|
|
226
454
|
"""
|
|
227
|
-
|
|
455
|
+
Log a data point to the unnamed (root) metric.
|
|
456
|
+
|
|
457
|
+
Supports two patterns:
|
|
458
|
+
|
|
459
|
+
1. Simple key-value pairs:
|
|
460
|
+
experiment.metrics.log(epoch=epoch).flush()
|
|
461
|
+
|
|
462
|
+
2. Nested dict pattern (logs to multiple prefixed metrics):
|
|
463
|
+
experiment.metrics.log(
|
|
464
|
+
epoch=100,
|
|
465
|
+
train=dict(loss=0.142, accuracy=0.80),
|
|
466
|
+
eval=dict(loss=0.201, accuracy=0.76)
|
|
467
|
+
)
|
|
228
468
|
|
|
229
469
|
Args:
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
**kwargs: Data as keyword arguments
|
|
470
|
+
_flush: If True, flush after logging (equivalent to calling .flush())
|
|
471
|
+
**kwargs: Data point fields. Dict values are expanded to prefixed metrics.
|
|
233
472
|
|
|
234
473
|
Returns:
|
|
235
|
-
|
|
474
|
+
Self for method chaining
|
|
236
475
|
|
|
237
476
|
Examples:
|
|
238
|
-
|
|
239
|
-
experiment.metrics.
|
|
240
|
-
|
|
477
|
+
# Log epoch context and flush
|
|
478
|
+
experiment.metrics.log(epoch=epoch).flush()
|
|
479
|
+
|
|
480
|
+
# Log with nested dicts (single call for all metrics)
|
|
481
|
+
experiment.metrics.log(
|
|
482
|
+
epoch=100,
|
|
483
|
+
train=dict(loss=0.142, accuracy=0.80),
|
|
484
|
+
eval=dict(loss=0.201, accuracy=0.76)
|
|
485
|
+
)
|
|
486
|
+
|
|
487
|
+
# Equivalent to _flush=True
|
|
488
|
+
experiment.metrics.log(epoch=100, _flush=True)
|
|
241
489
|
"""
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
490
|
+
# Separate nested dicts from scalar values
|
|
491
|
+
scalar_data = {}
|
|
492
|
+
nested_data = {}
|
|
493
|
+
|
|
494
|
+
for key, value in kwargs.items():
|
|
495
|
+
if isinstance(value, dict):
|
|
496
|
+
nested_data[key] = value
|
|
497
|
+
else:
|
|
498
|
+
scalar_data[key] = value
|
|
499
|
+
|
|
500
|
+
# Log scalar data to unnamed metric
|
|
501
|
+
if scalar_data:
|
|
502
|
+
self._experiment._append_to_metric(None, scalar_data, None, None, None)
|
|
503
|
+
|
|
504
|
+
# Log nested dicts to their respective prefixed metrics
|
|
505
|
+
for prefix, data in nested_data.items():
|
|
506
|
+
# Include scalar data (like epoch) with each nested metric
|
|
507
|
+
combined_data = {**scalar_data, **data}
|
|
508
|
+
self(prefix).log(**combined_data)
|
|
509
|
+
|
|
510
|
+
if _flush:
|
|
511
|
+
self.flush()
|
|
245
512
|
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
metadata: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
|
513
|
+
return self
|
|
514
|
+
|
|
515
|
+
def flush(self) -> 'MetricsManager':
|
|
250
516
|
"""
|
|
251
|
-
|
|
517
|
+
Flush buffered data (for method chaining).
|
|
252
518
|
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
description: Optional metric description
|
|
257
|
-
tags: Optional tags for categorization
|
|
258
|
-
metadata: Optional structured metadata
|
|
519
|
+
Currently a no-op as data is written immediately, but supports
|
|
520
|
+
the fluent API pattern:
|
|
521
|
+
experiment.metrics.log(epoch=epoch).flush()
|
|
259
522
|
|
|
260
523
|
Returns:
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
Examples:
|
|
264
|
-
experiment.metrics.append_batch(
|
|
265
|
-
name="loss",
|
|
266
|
-
data_points=[
|
|
267
|
-
{"value": 0.5, "step": 1},
|
|
268
|
-
{"value": 0.4, "step": 2}
|
|
269
|
-
]
|
|
270
|
-
)
|
|
271
|
-
experiment.metrics.append_batch(
|
|
272
|
-
data_points=[
|
|
273
|
-
{"value": 0.5, "step": 1},
|
|
274
|
-
{"value": 0.4, "step": 2}
|
|
275
|
-
]
|
|
276
|
-
) # name=None
|
|
524
|
+
Self for method chaining
|
|
277
525
|
"""
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
return self
|
|
526
|
+
# Data is written immediately, so nothing to flush
|
|
527
|
+
# This method exists for API consistency and chaining
|
|
528
|
+
return self
|
|
281
529
|
|
|
282
530
|
|
|
283
531
|
class MetricBuilder:
|
|
284
532
|
"""
|
|
285
533
|
Builder for metric operations.
|
|
286
534
|
|
|
287
|
-
Provides fluent API for
|
|
535
|
+
Provides fluent API for logging, reading, and querying metric data.
|
|
288
536
|
|
|
289
537
|
Usage:
|
|
290
|
-
#
|
|
291
|
-
experiment.
|
|
292
|
-
|
|
293
|
-
# Append batch
|
|
294
|
-
experiment.metric(name="train_loss").append_batch([
|
|
295
|
-
{"value": 0.5, "step": 100},
|
|
296
|
-
{"value": 0.45, "step": 101}
|
|
297
|
-
])
|
|
538
|
+
# Log single data point
|
|
539
|
+
experiment.metrics("train").log(loss=0.5, accuracy=0.9)
|
|
298
540
|
|
|
299
541
|
# Read data
|
|
300
|
-
data = experiment.
|
|
542
|
+
data = experiment.metrics("train").read(start_index=0, limit=100)
|
|
301
543
|
|
|
302
544
|
# Get statistics
|
|
303
|
-
stats = experiment.
|
|
545
|
+
stats = experiment.metrics("train").stats()
|
|
304
546
|
"""
|
|
305
547
|
|
|
306
548
|
def __init__(self, experiment: 'Experiment', name: str, description: Optional[str] = None,
|
|
307
|
-
tags: Optional[List[str]] = None, metadata: Optional[Dict[str, Any]] = None
|
|
549
|
+
tags: Optional[List[str]] = None, metadata: Optional[Dict[str, Any]] = None,
|
|
550
|
+
metrics_manager: Optional['MetricsManager'] = None):
|
|
308
551
|
"""
|
|
309
552
|
Initialize MetricBuilder.
|
|
310
553
|
|
|
@@ -314,71 +557,87 @@ class MetricBuilder:
|
|
|
314
557
|
description: Optional metric description
|
|
315
558
|
tags: Optional tags for categorization
|
|
316
559
|
metadata: Optional structured metadata (units, type, etc.)
|
|
560
|
+
metrics_manager: Parent MetricsManager (for buffer access)
|
|
317
561
|
"""
|
|
318
562
|
self._experiment = experiment
|
|
319
563
|
self._name = name
|
|
320
564
|
self._description = description
|
|
321
565
|
self._tags = tags
|
|
322
566
|
self._metadata = metadata
|
|
567
|
+
self._metrics_manager = metrics_manager
|
|
323
568
|
self._summary_cache = None # Lazy initialization
|
|
324
569
|
|
|
325
|
-
def
|
|
570
|
+
def buffer(self, **kwargs) -> 'MetricBuilder':
|
|
571
|
+
"""
|
|
572
|
+
Buffer values for later aggregation via metrics.buffer.log_summary().
|
|
573
|
+
|
|
574
|
+
Values are accumulated and statistics are computed when log_summary() is called.
|
|
575
|
+
|
|
576
|
+
Args:
|
|
577
|
+
**kwargs: Metric values to buffer (e.g., loss=0.5, accuracy=0.9)
|
|
578
|
+
|
|
579
|
+
Returns:
|
|
580
|
+
Self for method chaining
|
|
581
|
+
|
|
582
|
+
Example:
|
|
583
|
+
# Accumulate values during training
|
|
584
|
+
for batch in dataloader:
|
|
585
|
+
metrics("train").buffer(loss=loss, acc=acc)
|
|
586
|
+
|
|
587
|
+
# Log summary at end of epoch
|
|
588
|
+
metrics.buffer.log_summary() # logs loss.mean, acc.mean
|
|
589
|
+
metrics.buffer.log_summary("mean", "std") # logs loss.mean, loss.std, etc.
|
|
590
|
+
"""
|
|
591
|
+
if self._metrics_manager is None:
|
|
592
|
+
raise RuntimeError("buffer() requires MetricsManager reference")
|
|
593
|
+
self._metrics_manager.buffer._store(self._name, **kwargs)
|
|
594
|
+
return self
|
|
595
|
+
|
|
596
|
+
def log(self, **kwargs) -> 'MetricBuilder':
|
|
326
597
|
"""
|
|
327
|
-
|
|
598
|
+
Log a single data point to the metric.
|
|
328
599
|
|
|
329
600
|
The data point can have any structure - common patterns:
|
|
601
|
+
- {loss: 0.3, accuracy: 0.92}
|
|
330
602
|
- {value: 0.5, step: 100}
|
|
331
|
-
- {loss: 0.3, accuracy: 0.92, epoch: 5}
|
|
332
603
|
- {timestamp: "...", temperature: 25.5, humidity: 60}
|
|
333
604
|
|
|
605
|
+
Supports method chaining for fluent API:
|
|
606
|
+
experiment.metrics("train").log(loss=0.5, accuracy=0.9)
|
|
607
|
+
|
|
334
608
|
Args:
|
|
335
609
|
**kwargs: Data point fields (flexible schema)
|
|
336
610
|
|
|
337
611
|
Returns:
|
|
338
|
-
|
|
612
|
+
Self for method chaining
|
|
339
613
|
|
|
340
614
|
Example:
|
|
341
|
-
|
|
342
|
-
|
|
615
|
+
experiment.metrics("train").log(loss=0.5, accuracy=0.9)
|
|
616
|
+
experiment.metrics.log(epoch=epoch).flush()
|
|
343
617
|
"""
|
|
344
|
-
|
|
618
|
+
self._experiment._append_to_metric(
|
|
345
619
|
name=self._name,
|
|
346
620
|
data=kwargs,
|
|
347
621
|
description=self._description,
|
|
348
622
|
tags=self._tags,
|
|
349
623
|
metadata=self._metadata
|
|
350
624
|
)
|
|
351
|
-
return
|
|
625
|
+
return self
|
|
352
626
|
|
|
353
|
-
def
|
|
627
|
+
def flush(self) -> 'MetricBuilder':
|
|
354
628
|
"""
|
|
355
|
-
|
|
629
|
+
Flush buffered data (for method chaining).
|
|
356
630
|
|
|
357
|
-
|
|
358
|
-
|
|
631
|
+
Currently a no-op as data is written immediately, but supports
|
|
632
|
+
the fluent API pattern:
|
|
633
|
+
experiment.metrics.log(epoch=epoch).flush()
|
|
359
634
|
|
|
360
635
|
Returns:
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
Example:
|
|
364
|
-
result = experiment.metric(name="metrics").append_batch([
|
|
365
|
-
{"loss": 0.5, "acc": 0.8, "step": 1},
|
|
366
|
-
{"loss": 0.4, "acc": 0.85, "step": 2},
|
|
367
|
-
{"loss": 0.3, "acc": 0.9, "step": 3}
|
|
368
|
-
])
|
|
369
|
-
print(f"Appended {result['count']} points")
|
|
636
|
+
Self for method chaining
|
|
370
637
|
"""
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
result = self._experiment._append_batch_to_metric(
|
|
375
|
-
name=self._name,
|
|
376
|
-
data_points=data_points,
|
|
377
|
-
description=self._description,
|
|
378
|
-
tags=self._tags,
|
|
379
|
-
metadata=self._metadata
|
|
380
|
-
)
|
|
381
|
-
return result
|
|
638
|
+
# Data is written immediately, so nothing to flush
|
|
639
|
+
# This method exists for API consistency and chaining
|
|
640
|
+
return self
|
|
382
641
|
|
|
383
642
|
def read(self, start_index: int = 0, limit: int = 1000) -> Dict[str, Any]:
|
|
384
643
|
"""
|
ml_dash/params.py
CHANGED
|
@@ -17,9 +17,9 @@ class ParametersBuilder:
|
|
|
17
17
|
Fluent interface for parameter operations.
|
|
18
18
|
|
|
19
19
|
Usage:
|
|
20
|
-
|
|
21
|
-
params =
|
|
22
|
-
params_nested =
|
|
20
|
+
exp.params.set(model={"lr": 0.001}, optimizer="adam")
|
|
21
|
+
params = exp.params.get()
|
|
22
|
+
params_nested = exp.params.get(flatten=False)
|
|
23
23
|
"""
|
|
24
24
|
|
|
25
25
|
def __init__(self, experiment: 'Experiment'):
|
|
@@ -51,16 +51,16 @@ class ParametersBuilder:
|
|
|
51
51
|
|
|
52
52
|
Examples:
|
|
53
53
|
# Set nested parameters
|
|
54
|
-
|
|
54
|
+
exp.params.set(
|
|
55
55
|
model={"lr": 0.001, "batch_size": 32},
|
|
56
56
|
optimizer="adam"
|
|
57
57
|
)
|
|
58
58
|
|
|
59
59
|
# Merge/update specific parameters
|
|
60
|
-
|
|
60
|
+
exp.params.set(model={"lr": 0.0001}) # Only updates model.lr
|
|
61
61
|
|
|
62
62
|
# Set flat parameters with dot notation
|
|
63
|
-
|
|
63
|
+
exp.params.set(**{"model.lr": 0.001, "model.batch_size": 32})
|
|
64
64
|
"""
|
|
65
65
|
if not self._experiment._is_open:
|
|
66
66
|
raise RuntimeError(
|