ml-dash 0.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ml_dash/__init__.py +85 -0
- ml_dash/auth/__init__.py +51 -0
- ml_dash/auth/constants.py +10 -0
- ml_dash/auth/device_flow.py +237 -0
- ml_dash/auth/device_secret.py +49 -0
- ml_dash/auth/exceptions.py +31 -0
- ml_dash/auth/token_storage.py +262 -0
- ml_dash/auto_start.py +52 -0
- ml_dash/cli.py +79 -0
- ml_dash/cli_commands/__init__.py +1 -0
- ml_dash/cli_commands/download.py +769 -0
- ml_dash/cli_commands/list.py +319 -0
- ml_dash/cli_commands/login.py +225 -0
- ml_dash/cli_commands/logout.py +54 -0
- ml_dash/cli_commands/upload.py +1248 -0
- ml_dash/client.py +1003 -0
- ml_dash/config.py +133 -0
- ml_dash/experiment.py +1116 -0
- ml_dash/files.py +785 -0
- ml_dash/log.py +181 -0
- ml_dash/metric.py +481 -0
- ml_dash/params.py +277 -0
- ml_dash/py.typed +0 -0
- ml_dash/remote_auto_start.py +55 -0
- ml_dash/storage.py +1127 -0
- ml_dash-0.6.1.dist-info/METADATA +248 -0
- ml_dash-0.6.1.dist-info/RECORD +29 -0
- ml_dash-0.6.1.dist-info/WHEEL +4 -0
- ml_dash-0.6.1.dist-info/entry_points.txt +3 -0
ml_dash/log.py
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Log API for ML-Dash SDK.
|
|
3
|
+
|
|
4
|
+
Provides fluent interface for structured logging with validation.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Optional, Dict, Any, TYPE_CHECKING
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from enum import Enum
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from .experiment import Experiment
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class LogLevel(Enum):
|
|
16
|
+
"""
|
|
17
|
+
Standard log levels for ML-Dash.
|
|
18
|
+
|
|
19
|
+
Supported levels:
|
|
20
|
+
- INFO: Informational messages
|
|
21
|
+
- WARN: Warning messages
|
|
22
|
+
- ERROR: Error messages
|
|
23
|
+
- DEBUG: Debug messages
|
|
24
|
+
- FATAL: Fatal error messages
|
|
25
|
+
"""
|
|
26
|
+
INFO = "info"
|
|
27
|
+
WARN = "warn"
|
|
28
|
+
ERROR = "error"
|
|
29
|
+
DEBUG = "debug"
|
|
30
|
+
FATAL = "fatal"
|
|
31
|
+
|
|
32
|
+
@classmethod
|
|
33
|
+
def validate(cls, level: str) -> str:
|
|
34
|
+
"""
|
|
35
|
+
Validate and normalize log level.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
level: Log level string (case-insensitive)
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
Normalized log level string (lowercase)
|
|
42
|
+
|
|
43
|
+
Raises:
|
|
44
|
+
ValueError: If level is not one of the 5 standard levels
|
|
45
|
+
|
|
46
|
+
Example:
|
|
47
|
+
>>> LogLevel.validate("INFO")
|
|
48
|
+
"info"
|
|
49
|
+
>>> LogLevel.validate("invalid")
|
|
50
|
+
ValueError: Invalid log level: 'invalid'. Must be one of: info, warn, error, debug, fatal
|
|
51
|
+
"""
|
|
52
|
+
level_lower = level.lower()
|
|
53
|
+
try:
|
|
54
|
+
return cls[level_lower.upper()].value
|
|
55
|
+
except KeyError:
|
|
56
|
+
valid_levels = ", ".join([l.value for l in cls])
|
|
57
|
+
raise ValueError(
|
|
58
|
+
f"Invalid log level: '{level}'. "
|
|
59
|
+
f"Must be one of: {valid_levels}"
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class LogBuilder:
|
|
64
|
+
"""
|
|
65
|
+
Fluent builder for creating log entries.
|
|
66
|
+
|
|
67
|
+
This class is returned by Experiment.log() when no message is provided.
|
|
68
|
+
It allows for a fluent API style where metadata is set first, then
|
|
69
|
+
the log level method is called to write the log.
|
|
70
|
+
|
|
71
|
+
Example:
|
|
72
|
+
experiment.log(metadata={"epoch": 1}).info("Training started")
|
|
73
|
+
experiment.log().error("Failed", error_code=500)
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
def __init__(self, experiment: 'Experiment', metadata: Optional[Dict[str, Any]] = None):
|
|
77
|
+
"""
|
|
78
|
+
Initialize LogBuilder.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
experiment: Parent Experiment instance
|
|
82
|
+
metadata: Optional metadata dict from log() call
|
|
83
|
+
"""
|
|
84
|
+
self._experiment = experiment
|
|
85
|
+
self._metadata = metadata
|
|
86
|
+
|
|
87
|
+
def info(self, message: str, **extra_metadata) -> None:
|
|
88
|
+
"""
|
|
89
|
+
Write info level log.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
message: Log message
|
|
93
|
+
**extra_metadata: Additional metadata as keyword arguments
|
|
94
|
+
|
|
95
|
+
Example:
|
|
96
|
+
experiment.log().info("Training started")
|
|
97
|
+
experiment.log().info("Epoch complete", epoch=1, loss=0.5)
|
|
98
|
+
"""
|
|
99
|
+
self._write(LogLevel.INFO.value, message, extra_metadata)
|
|
100
|
+
|
|
101
|
+
def warn(self, message: str, **extra_metadata) -> None:
|
|
102
|
+
"""
|
|
103
|
+
Write warning level log.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
message: Log message
|
|
107
|
+
**extra_metadata: Additional metadata as keyword arguments
|
|
108
|
+
|
|
109
|
+
Example:
|
|
110
|
+
experiment.log().warn("High loss detected", loss=1.5)
|
|
111
|
+
"""
|
|
112
|
+
self._write(LogLevel.WARN.value, message, extra_metadata)
|
|
113
|
+
|
|
114
|
+
def error(self, message: str, **extra_metadata) -> None:
|
|
115
|
+
"""
|
|
116
|
+
Write error level log.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
message: Log message
|
|
120
|
+
**extra_metadata: Additional metadata as keyword arguments
|
|
121
|
+
|
|
122
|
+
Example:
|
|
123
|
+
experiment.log().error("Failed to save", path="/models/checkpoint.pth")
|
|
124
|
+
"""
|
|
125
|
+
self._write(LogLevel.ERROR.value, message, extra_metadata)
|
|
126
|
+
|
|
127
|
+
def debug(self, message: str, **extra_metadata) -> None:
|
|
128
|
+
"""
|
|
129
|
+
Write debug level log.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
message: Log message
|
|
133
|
+
**extra_metadata: Additional metadata as keyword arguments
|
|
134
|
+
|
|
135
|
+
Example:
|
|
136
|
+
experiment.log().debug("Memory usage", memory_mb=2500)
|
|
137
|
+
"""
|
|
138
|
+
self._write(LogLevel.DEBUG.value, message, extra_metadata)
|
|
139
|
+
|
|
140
|
+
def fatal(self, message: str, **extra_metadata) -> None:
|
|
141
|
+
"""
|
|
142
|
+
Write fatal level log.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
message: Log message
|
|
146
|
+
**extra_metadata: Additional metadata as keyword arguments
|
|
147
|
+
|
|
148
|
+
Example:
|
|
149
|
+
experiment.log().fatal("Unrecoverable error", exit_code=1)
|
|
150
|
+
"""
|
|
151
|
+
self._write(LogLevel.FATAL.value, message, extra_metadata)
|
|
152
|
+
|
|
153
|
+
def _write(self, level: str, message: str, extra_metadata: Dict[str, Any]) -> None:
|
|
154
|
+
"""
|
|
155
|
+
Internal: Execute the actual log write.
|
|
156
|
+
|
|
157
|
+
Merges metadata from log() call with metadata from level method,
|
|
158
|
+
then writes immediately (no buffering).
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
level: Log level (already validated)
|
|
162
|
+
message: Log message
|
|
163
|
+
extra_metadata: Additional metadata from level method kwargs
|
|
164
|
+
"""
|
|
165
|
+
# Merge metadata from log() call with metadata from level method
|
|
166
|
+
if self._metadata and extra_metadata:
|
|
167
|
+
final_metadata = {**self._metadata, **extra_metadata}
|
|
168
|
+
elif self._metadata:
|
|
169
|
+
final_metadata = self._metadata
|
|
170
|
+
elif extra_metadata:
|
|
171
|
+
final_metadata = extra_metadata
|
|
172
|
+
else:
|
|
173
|
+
final_metadata = None
|
|
174
|
+
|
|
175
|
+
# Write immediately (no buffering)
|
|
176
|
+
self._experiment._write_log(
|
|
177
|
+
message=message,
|
|
178
|
+
level=level,
|
|
179
|
+
metadata=final_metadata,
|
|
180
|
+
timestamp=None
|
|
181
|
+
)
|
ml_dash/metric.py
ADDED
|
@@ -0,0 +1,481 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Metric API - Time-series data metricing for ML experiments.
|
|
3
|
+
|
|
4
|
+
Metrics are used for storing continuous data series like training metrics,
|
|
5
|
+
validation losses, system measurements, etc.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Dict, Any, List, Optional, TYPE_CHECKING
|
|
9
|
+
from collections import defaultdict
|
|
10
|
+
import statistics
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from .experiment import Experiment
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class SummaryCache:
|
|
17
|
+
"""
|
|
18
|
+
Buffer for collecting metric values and computing statistics periodically.
|
|
19
|
+
|
|
20
|
+
Inspired by ml-logger's SummaryCache design:
|
|
21
|
+
- Lazy computation: Store raw values, compute stats on demand
|
|
22
|
+
- Hierarchical naming: Stats get suffixes (loss.mean, loss.std)
|
|
23
|
+
- Robust handling: Converts None → NaN, filters before stats
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(self, metric_builder: 'MetricBuilder'):
|
|
27
|
+
"""
|
|
28
|
+
Initialize SummaryCache.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
metric_builder: Parent MetricBuilder instance
|
|
32
|
+
"""
|
|
33
|
+
self._metric_builder = metric_builder
|
|
34
|
+
self._buffer: Dict[str, List[float]] = defaultdict(list)
|
|
35
|
+
self._metadata: Dict[str, Any] = {} # For set() metadata
|
|
36
|
+
|
|
37
|
+
def store(self, **kwargs) -> None:
|
|
38
|
+
"""
|
|
39
|
+
Store values in buffer without immediate logging (deferred computation).
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
**kwargs: Metric values to buffer (e.g., loss=0.5, accuracy=0.9)
|
|
43
|
+
|
|
44
|
+
Example:
|
|
45
|
+
cache.store(loss=0.5, accuracy=0.9)
|
|
46
|
+
cache.store(loss=0.48) # Accumulates
|
|
47
|
+
"""
|
|
48
|
+
for key, value in kwargs.items():
|
|
49
|
+
# Handle None values gracefully
|
|
50
|
+
if value is None:
|
|
51
|
+
value = float('nan')
|
|
52
|
+
try:
|
|
53
|
+
self._buffer[key].append(float(value))
|
|
54
|
+
except (TypeError, ValueError):
|
|
55
|
+
# Skip non-numeric values silently
|
|
56
|
+
continue
|
|
57
|
+
|
|
58
|
+
def set(self, **kwargs) -> None:
|
|
59
|
+
"""
|
|
60
|
+
Set metadata values without aggregation (replaces previous values).
|
|
61
|
+
|
|
62
|
+
Used for contextual metadata like learning rate, epoch number, etc.
|
|
63
|
+
These values are included in the final data point when summarize() is called.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
**kwargs: Metadata to set (e.g., lr=0.001, epoch=5)
|
|
67
|
+
|
|
68
|
+
Example:
|
|
69
|
+
cache.set(lr=0.001, epoch=5)
|
|
70
|
+
cache.set(lr=0.0005) # Replaces lr, keeps epoch
|
|
71
|
+
"""
|
|
72
|
+
self._metadata.update(kwargs)
|
|
73
|
+
|
|
74
|
+
def _compute_stats(self) -> Dict[str, float]:
|
|
75
|
+
"""
|
|
76
|
+
Compute statistics from buffered values (idempotent, read-only).
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
Dict with hierarchical metric names (key.mean, key.std, etc.)
|
|
80
|
+
|
|
81
|
+
Note: This is idempotent - can be called multiple times without side effects.
|
|
82
|
+
"""
|
|
83
|
+
stats_data = {}
|
|
84
|
+
|
|
85
|
+
for key, values in self._buffer.items():
|
|
86
|
+
if not values:
|
|
87
|
+
continue
|
|
88
|
+
|
|
89
|
+
# Filter out NaN values (ml-logger pattern)
|
|
90
|
+
clean_values = [v for v in values if not (isinstance(v, float) and v != v)]
|
|
91
|
+
|
|
92
|
+
if not clean_values:
|
|
93
|
+
continue
|
|
94
|
+
|
|
95
|
+
# Compute statistics with hierarchical naming
|
|
96
|
+
stats_data[f"{key}.mean"] = statistics.mean(clean_values)
|
|
97
|
+
stats_data[f"{key}.min"] = min(clean_values)
|
|
98
|
+
stats_data[f"{key}.max"] = max(clean_values)
|
|
99
|
+
stats_data[f"{key}.count"] = len(clean_values)
|
|
100
|
+
|
|
101
|
+
# Std dev requires at least 2 values
|
|
102
|
+
if len(clean_values) >= 2:
|
|
103
|
+
stats_data[f"{key}.std"] = statistics.stdev(clean_values)
|
|
104
|
+
else:
|
|
105
|
+
stats_data[f"{key}.std"] = 0.0
|
|
106
|
+
|
|
107
|
+
return stats_data
|
|
108
|
+
|
|
109
|
+
def summarize(self, clear: bool = True) -> None:
|
|
110
|
+
"""
|
|
111
|
+
Compute statistics from buffered values and log them (non-idempotent).
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
clear: If True (default), clear buffer after computing statistics.
|
|
115
|
+
This creates a "rolling window" behavior matching ml-logger's "tiled" mode.
|
|
116
|
+
|
|
117
|
+
Example:
|
|
118
|
+
# After storing 10 loss values and setting lr=0.001:
|
|
119
|
+
cache.store(loss=0.5)
|
|
120
|
+
cache.set(lr=0.001, epoch=5)
|
|
121
|
+
cache.summarize()
|
|
122
|
+
# Logs: {lr: 0.001, epoch: 5, loss.mean: 0.5, loss.std: 0.0, ...}
|
|
123
|
+
|
|
124
|
+
Note: This is non-idempotent - calling it multiple times has side effects.
|
|
125
|
+
"""
|
|
126
|
+
if not self._buffer and not self._metadata:
|
|
127
|
+
return
|
|
128
|
+
|
|
129
|
+
# Compute statistics (delegated to idempotent method)
|
|
130
|
+
stats_data = self._compute_stats()
|
|
131
|
+
|
|
132
|
+
# Merge metadata with statistics
|
|
133
|
+
output_data = {**self._metadata, **stats_data}
|
|
134
|
+
|
|
135
|
+
if not output_data:
|
|
136
|
+
return
|
|
137
|
+
|
|
138
|
+
# Append combined data as a single metric data point
|
|
139
|
+
self._metric_builder.append(**output_data)
|
|
140
|
+
|
|
141
|
+
# Clear buffer if requested (default behavior for "tiled" mode)
|
|
142
|
+
if clear:
|
|
143
|
+
self._buffer.clear()
|
|
144
|
+
self._metadata.clear() # Also clear metadata
|
|
145
|
+
|
|
146
|
+
def peek(self, *keys: str, limit: int = 5) -> Dict[str, List[float]]:
|
|
147
|
+
"""
|
|
148
|
+
Non-destructive inspection of buffered values (idempotent, read-only).
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
*keys: Optional specific keys to peek at. If empty, shows all.
|
|
152
|
+
limit: Number of most recent values to show (default 5)
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
Dict of buffered values (truncated to last `limit` items)
|
|
156
|
+
|
|
157
|
+
Example:
|
|
158
|
+
cache.peek('loss', limit=3) # {'loss': [0.5, 0.48, 0.52]}
|
|
159
|
+
"""
|
|
160
|
+
keys_to_show = keys if keys else self._buffer.keys()
|
|
161
|
+
return {
|
|
162
|
+
k: self._buffer[k][-limit:] if limit else self._buffer[k]
|
|
163
|
+
for k in keys_to_show
|
|
164
|
+
if k in self._buffer and self._buffer[k]
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
class MetricsManager:
|
|
169
|
+
"""
|
|
170
|
+
Manager for metric operations that supports both named and unnamed usage.
|
|
171
|
+
|
|
172
|
+
Supports three usage patterns:
|
|
173
|
+
1. Named via call: experiment.metrics("loss").append(value=0.5, step=1)
|
|
174
|
+
2. Named via argument: experiment.metrics.append(name="loss", value=0.5, step=1)
|
|
175
|
+
3. Unnamed: experiment.metrics.append(value=0.5, step=1) # name=None
|
|
176
|
+
|
|
177
|
+
Usage:
|
|
178
|
+
# With explicit metric name (via call)
|
|
179
|
+
experiment.metrics("train_loss").append(value=0.5, step=100)
|
|
180
|
+
|
|
181
|
+
# With explicit metric name (via argument)
|
|
182
|
+
experiment.metrics.append(name="train_loss", value=0.5, step=100)
|
|
183
|
+
|
|
184
|
+
# Without name (uses None as metric name)
|
|
185
|
+
experiment.metrics.append(value=0.5, step=100)
|
|
186
|
+
"""
|
|
187
|
+
|
|
188
|
+
def __init__(self, experiment: 'Experiment'):
|
|
189
|
+
"""
|
|
190
|
+
Initialize MetricsManager.
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
experiment: Parent Experiment instance
|
|
194
|
+
"""
|
|
195
|
+
self._experiment = experiment
|
|
196
|
+
self._metric_builders: Dict[str, 'MetricBuilder'] = {} # Cache for MetricBuilder instances
|
|
197
|
+
|
|
198
|
+
def __call__(self, name: str, description: Optional[str] = None,
|
|
199
|
+
tags: Optional[List[str]] = None, metadata: Optional[Dict[str, Any]] = None) -> 'MetricBuilder':
|
|
200
|
+
"""
|
|
201
|
+
Get a MetricBuilder for a specific metric name (cached for reuse).
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
name: Metric name (unique within experiment)
|
|
205
|
+
description: Optional metric description
|
|
206
|
+
tags: Optional tags for categorization
|
|
207
|
+
metadata: Optional structured metadata
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
MetricBuilder instance for the named metric (same instance on repeated calls)
|
|
211
|
+
|
|
212
|
+
Examples:
|
|
213
|
+
experiment.metrics("loss").append(value=0.5, step=1)
|
|
214
|
+
|
|
215
|
+
Note:
|
|
216
|
+
MetricBuilder instances are cached by name, so repeated calls with the
|
|
217
|
+
same name return the same instance. This ensures summary_cache works
|
|
218
|
+
correctly when called multiple times within a loop.
|
|
219
|
+
"""
|
|
220
|
+
# Cache key includes name only (description/tags/metadata are set once on first call)
|
|
221
|
+
if name not in self._metric_builders:
|
|
222
|
+
self._metric_builders[name] = MetricBuilder(self._experiment, name, description, tags, metadata)
|
|
223
|
+
return self._metric_builders[name]
|
|
224
|
+
|
|
225
|
+
def append(self, name: Optional[str] = None, data: Optional[Dict[str, Any]] = None, **kwargs) -> Dict[str, Any]:
|
|
226
|
+
"""
|
|
227
|
+
Append a data point to a metric (name can be optional).
|
|
228
|
+
|
|
229
|
+
Args:
|
|
230
|
+
name: Metric name (optional, can be None for unnamed metrics)
|
|
231
|
+
data: Data dict (alternative to kwargs)
|
|
232
|
+
**kwargs: Data as keyword arguments
|
|
233
|
+
|
|
234
|
+
Returns:
|
|
235
|
+
Response dict with metric metadata
|
|
236
|
+
|
|
237
|
+
Examples:
|
|
238
|
+
experiment.metrics.append(name="loss", value=0.5, step=1)
|
|
239
|
+
experiment.metrics.append(value=0.5, step=1) # name=None
|
|
240
|
+
experiment.metrics.append(name="loss", data={"value": 0.5, "step": 1})
|
|
241
|
+
"""
|
|
242
|
+
if data is None:
|
|
243
|
+
data = kwargs
|
|
244
|
+
return self._experiment._append_to_metric(name, data, None, None, None)
|
|
245
|
+
|
|
246
|
+
def append_batch(self, name: Optional[str] = None, data_points: Optional[List[Dict[str, Any]]] = None,
|
|
247
|
+
description: Optional[str] = None,
|
|
248
|
+
tags: Optional[List[str]] = None,
|
|
249
|
+
metadata: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
|
250
|
+
"""
|
|
251
|
+
Append multiple data points to a metric.
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
name: Metric name (optional, can be None for unnamed metrics)
|
|
255
|
+
data_points: List of data point dicts
|
|
256
|
+
description: Optional metric description
|
|
257
|
+
tags: Optional tags for categorization
|
|
258
|
+
metadata: Optional structured metadata
|
|
259
|
+
|
|
260
|
+
Returns:
|
|
261
|
+
Response dict with metric metadata
|
|
262
|
+
|
|
263
|
+
Examples:
|
|
264
|
+
experiment.metrics.append_batch(
|
|
265
|
+
name="loss",
|
|
266
|
+
data_points=[
|
|
267
|
+
{"value": 0.5, "step": 1},
|
|
268
|
+
{"value": 0.4, "step": 2}
|
|
269
|
+
]
|
|
270
|
+
)
|
|
271
|
+
experiment.metrics.append_batch(
|
|
272
|
+
data_points=[
|
|
273
|
+
{"value": 0.5, "step": 1},
|
|
274
|
+
{"value": 0.4, "step": 2}
|
|
275
|
+
]
|
|
276
|
+
) # name=None
|
|
277
|
+
"""
|
|
278
|
+
if data_points is None:
|
|
279
|
+
data_points = []
|
|
280
|
+
return self._experiment._append_batch_to_metric(name, data_points, description, tags, metadata)
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
class MetricBuilder:
|
|
284
|
+
"""
|
|
285
|
+
Builder for metric operations.
|
|
286
|
+
|
|
287
|
+
Provides fluent API for appending, reading, and querying metric data.
|
|
288
|
+
|
|
289
|
+
Usage:
|
|
290
|
+
# Append single data point
|
|
291
|
+
experiment.metric(name="train_loss").append(value=0.5, step=100)
|
|
292
|
+
|
|
293
|
+
# Append batch
|
|
294
|
+
experiment.metric(name="train_loss").append_batch([
|
|
295
|
+
{"value": 0.5, "step": 100},
|
|
296
|
+
{"value": 0.45, "step": 101}
|
|
297
|
+
])
|
|
298
|
+
|
|
299
|
+
# Read data
|
|
300
|
+
data = experiment.metric(name="train_loss").read(start_index=0, limit=100)
|
|
301
|
+
|
|
302
|
+
# Get statistics
|
|
303
|
+
stats = experiment.metric(name="train_loss").stats()
|
|
304
|
+
"""
|
|
305
|
+
|
|
306
|
+
def __init__(self, experiment: 'Experiment', name: str, description: Optional[str] = None,
|
|
307
|
+
tags: Optional[List[str]] = None, metadata: Optional[Dict[str, Any]] = None):
|
|
308
|
+
"""
|
|
309
|
+
Initialize MetricBuilder.
|
|
310
|
+
|
|
311
|
+
Args:
|
|
312
|
+
experiment: Parent Experiment instance
|
|
313
|
+
name: Metric name (unique within experiment)
|
|
314
|
+
description: Optional metric description
|
|
315
|
+
tags: Optional tags for categorization
|
|
316
|
+
metadata: Optional structured metadata (units, type, etc.)
|
|
317
|
+
"""
|
|
318
|
+
self._experiment = experiment
|
|
319
|
+
self._name = name
|
|
320
|
+
self._description = description
|
|
321
|
+
self._tags = tags
|
|
322
|
+
self._metadata = metadata
|
|
323
|
+
self._summary_cache = None # Lazy initialization
|
|
324
|
+
|
|
325
|
+
def append(self, **kwargs) -> 'MetricBuilder':
|
|
326
|
+
"""
|
|
327
|
+
Append a single data point to the metric.
|
|
328
|
+
|
|
329
|
+
The data point can have any structure - common patterns:
|
|
330
|
+
- {value: 0.5, step: 100}
|
|
331
|
+
- {loss: 0.3, accuracy: 0.92, epoch: 5}
|
|
332
|
+
- {timestamp: "...", temperature: 25.5, humidity: 60}
|
|
333
|
+
|
|
334
|
+
Args:
|
|
335
|
+
**kwargs: Data point fields (flexible schema)
|
|
336
|
+
|
|
337
|
+
Returns:
|
|
338
|
+
Dict with metricId, index, bufferedDataPoints, chunkSize
|
|
339
|
+
|
|
340
|
+
Example:
|
|
341
|
+
result = experiment.metric(name="train_loss").append(value=0.5, step=100, epoch=1)
|
|
342
|
+
print(f"Appended at index {result['index']}")
|
|
343
|
+
"""
|
|
344
|
+
result = self._experiment._append_to_metric(
|
|
345
|
+
name=self._name,
|
|
346
|
+
data=kwargs,
|
|
347
|
+
description=self._description,
|
|
348
|
+
tags=self._tags,
|
|
349
|
+
metadata=self._metadata
|
|
350
|
+
)
|
|
351
|
+
return result
|
|
352
|
+
|
|
353
|
+
def append_batch(self, data_points: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
354
|
+
"""
|
|
355
|
+
Append multiple data points in batch (more efficient than multiple append calls).
|
|
356
|
+
|
|
357
|
+
Args:
|
|
358
|
+
data_points: List of data point dicts
|
|
359
|
+
|
|
360
|
+
Returns:
|
|
361
|
+
Dict with metricId, startIndex, endIndex, count, bufferedDataPoints, chunkSize
|
|
362
|
+
|
|
363
|
+
Example:
|
|
364
|
+
result = experiment.metric(name="metrics").append_batch([
|
|
365
|
+
{"loss": 0.5, "acc": 0.8, "step": 1},
|
|
366
|
+
{"loss": 0.4, "acc": 0.85, "step": 2},
|
|
367
|
+
{"loss": 0.3, "acc": 0.9, "step": 3}
|
|
368
|
+
])
|
|
369
|
+
print(f"Appended {result['count']} points")
|
|
370
|
+
"""
|
|
371
|
+
if not data_points:
|
|
372
|
+
raise ValueError("data_points cannot be empty")
|
|
373
|
+
|
|
374
|
+
result = self._experiment._append_batch_to_metric(
|
|
375
|
+
name=self._name,
|
|
376
|
+
data_points=data_points,
|
|
377
|
+
description=self._description,
|
|
378
|
+
tags=self._tags,
|
|
379
|
+
metadata=self._metadata
|
|
380
|
+
)
|
|
381
|
+
return result
|
|
382
|
+
|
|
383
|
+
def read(self, start_index: int = 0, limit: int = 1000) -> Dict[str, Any]:
|
|
384
|
+
"""
|
|
385
|
+
Read data points from the metric by index range.
|
|
386
|
+
|
|
387
|
+
Args:
|
|
388
|
+
start_index: Starting index (inclusive, default 0)
|
|
389
|
+
limit: Maximum number of points to read (default 1000, max 10000)
|
|
390
|
+
|
|
391
|
+
Returns:
|
|
392
|
+
Dict with keys:
|
|
393
|
+
- data: List of {index: str, data: dict, createdAt: str}
|
|
394
|
+
- startIndex: Starting index
|
|
395
|
+
- endIndex: Ending index
|
|
396
|
+
- total: Number of points returned
|
|
397
|
+
- hasMore: Whether more data exists beyond this range
|
|
398
|
+
|
|
399
|
+
Example:
|
|
400
|
+
result = experiment.metric(name="train_loss").read(start_index=0, limit=100)
|
|
401
|
+
for point in result['data']:
|
|
402
|
+
print(f"Index {point['index']}: {point['data']}")
|
|
403
|
+
"""
|
|
404
|
+
return self._experiment._read_metric_data(
|
|
405
|
+
name=self._name,
|
|
406
|
+
start_index=start_index,
|
|
407
|
+
limit=limit
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
def stats(self) -> Dict[str, Any]:
|
|
411
|
+
"""
|
|
412
|
+
Get metric statistics and metadata.
|
|
413
|
+
|
|
414
|
+
Returns:
|
|
415
|
+
Dict with metric info:
|
|
416
|
+
- metricId: Unique metric ID
|
|
417
|
+
- name: Metric name
|
|
418
|
+
- description: Metric description (if set)
|
|
419
|
+
- tags: Tags list
|
|
420
|
+
- metadata: User metadata
|
|
421
|
+
- totalDataPoints: Total points (buffered + chunked)
|
|
422
|
+
- bufferedDataPoints: Points in MongoDB (hot storage)
|
|
423
|
+
- chunkedDataPoints: Points in S3 (cold storage)
|
|
424
|
+
- totalChunks: Number of chunks in S3
|
|
425
|
+
- chunkSize: Chunking threshold
|
|
426
|
+
- firstDataAt: Timestamp of first point (if data has timestamp)
|
|
427
|
+
- lastDataAt: Timestamp of last point (if data has timestamp)
|
|
428
|
+
- createdAt: Metric creation time
|
|
429
|
+
- updatedAt: Last update time
|
|
430
|
+
|
|
431
|
+
Example:
|
|
432
|
+
stats = experiment.metric(name="train_loss").stats()
|
|
433
|
+
print(f"Total points: {stats['totalDataPoints']}")
|
|
434
|
+
print(f"Buffered: {stats['bufferedDataPoints']}, Chunked: {stats['chunkedDataPoints']}")
|
|
435
|
+
"""
|
|
436
|
+
return self._experiment._get_metric_stats(name=self._name)
|
|
437
|
+
|
|
438
|
+
def list_all(self) -> List[Dict[str, Any]]:
|
|
439
|
+
"""
|
|
440
|
+
List all metrics in the experiment.
|
|
441
|
+
|
|
442
|
+
Returns:
|
|
443
|
+
List of metric summaries with keys:
|
|
444
|
+
- metricId: Unique metric ID
|
|
445
|
+
- name: Metric name
|
|
446
|
+
- description: Metric description
|
|
447
|
+
- tags: Tags list
|
|
448
|
+
- totalDataPoints: Total data points
|
|
449
|
+
- createdAt: Creation timestamp
|
|
450
|
+
|
|
451
|
+
Example:
|
|
452
|
+
metrics = experiment.metric().list_all()
|
|
453
|
+
for metric in metrics:
|
|
454
|
+
print(f"{metric['name']}: {metric['totalDataPoints']} points")
|
|
455
|
+
"""
|
|
456
|
+
return self._experiment._list_metrics()
|
|
457
|
+
|
|
458
|
+
@property
|
|
459
|
+
def summary_cache(self) -> SummaryCache:
|
|
460
|
+
"""
|
|
461
|
+
Get summary cache for this metric (lazy initialization).
|
|
462
|
+
|
|
463
|
+
The summary cache allows buffering values and computing statistics
|
|
464
|
+
periodically, which is much more efficient than logging every value.
|
|
465
|
+
|
|
466
|
+
Returns:
|
|
467
|
+
SummaryCache instance for this metric
|
|
468
|
+
|
|
469
|
+
Example:
|
|
470
|
+
metric = experiment.metrics("train")
|
|
471
|
+
# Store values every batch
|
|
472
|
+
metric.summary_cache.store(loss=0.5)
|
|
473
|
+
metric.summary_cache.store(loss=0.48)
|
|
474
|
+
# Set metadata
|
|
475
|
+
metric.summary_cache.set(lr=0.001, epoch=1)
|
|
476
|
+
# Compute stats and log periodically
|
|
477
|
+
metric.summary_cache.summarize()
|
|
478
|
+
"""
|
|
479
|
+
if self._summary_cache is None:
|
|
480
|
+
self._summary_cache = SummaryCache(self)
|
|
481
|
+
return self._summary_cache
|