ml-dash 0.6.1__py3-none-any.whl → 0.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ml_dash/__init__.py +37 -63
- ml_dash/auth/token_storage.py +267 -226
- ml_dash/auto_start.py +28 -15
- ml_dash/cli.py +16 -2
- ml_dash/cli_commands/api.py +165 -0
- ml_dash/cli_commands/download.py +757 -667
- ml_dash/cli_commands/list.py +146 -13
- ml_dash/cli_commands/login.py +190 -183
- ml_dash/cli_commands/profile.py +92 -0
- ml_dash/cli_commands/upload.py +1291 -1141
- ml_dash/client.py +79 -6
- ml_dash/config.py +119 -119
- ml_dash/experiment.py +1242 -995
- ml_dash/files.py +1051 -340
- ml_dash/log.py +7 -7
- ml_dash/metric.py +359 -100
- ml_dash/params.py +6 -6
- ml_dash/remote_auto_start.py +20 -17
- ml_dash/run.py +231 -0
- ml_dash/snowflake.py +173 -0
- ml_dash/storage.py +1051 -1079
- {ml_dash-0.6.1.dist-info → ml_dash-0.6.2.dist-info}/METADATA +45 -20
- ml_dash-0.6.2.dist-info/RECORD +33 -0
- ml_dash-0.6.1.dist-info/RECORD +0 -29
- {ml_dash-0.6.1.dist-info → ml_dash-0.6.2.dist-info}/WHEEL +0 -0
- {ml_dash-0.6.1.dist-info → ml_dash-0.6.2.dist-info}/entry_points.txt +0 -0
ml_dash/experiment.py
CHANGED
|
@@ -3,1114 +3,1361 @@ Experiment class for ML-Dash SDK.
|
|
|
3
3
|
|
|
4
4
|
Supports three usage styles:
|
|
5
5
|
1. Decorator: @ml_dash_experiment(...)
|
|
6
|
-
2. Context manager: with Experiment(...) as exp:
|
|
6
|
+
2. Context manager: with Experiment(...).run as exp:
|
|
7
7
|
3. Direct instantiation: exp = Experiment(...)
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
|
-
from typing import Optional, Dict, Any, List, Callable
|
|
11
|
-
from enum import Enum
|
|
12
10
|
import functools
|
|
13
|
-
from pathlib import Path
|
|
14
11
|
from datetime import datetime
|
|
12
|
+
from enum import Enum
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any, Callable, Dict, List, Optional, Union, Unpack
|
|
15
15
|
|
|
16
16
|
from .client import RemoteClient
|
|
17
|
-
from .
|
|
18
|
-
from .log import
|
|
17
|
+
from .files import BindrsBuilder, FilesAccessor
|
|
18
|
+
from .log import LogBuilder, LogLevel
|
|
19
19
|
from .params import ParametersBuilder
|
|
20
|
-
from .
|
|
20
|
+
from .run import RUN
|
|
21
|
+
from .storage import LocalStorage
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _expand_exp_template(template: str) -> str:
|
|
25
|
+
"""
|
|
26
|
+
Expand {EXP.attr} placeholders in template string.
|
|
27
|
+
|
|
28
|
+
Handles both regular attributes and property descriptors on the EXP class.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
template: String containing {EXP.attr} placeholders
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
String with placeholders expanded to actual values
|
|
35
|
+
"""
|
|
36
|
+
import re
|
|
37
|
+
|
|
38
|
+
def replace_match(match):
|
|
39
|
+
attr_name = match.group(1)
|
|
40
|
+
# Get the attribute from the class __dict__, handling properties correctly
|
|
41
|
+
# EXP is a params_proto class where properties are stored in EXP.__dict__
|
|
42
|
+
attr = RUN.__dict__.get(attr_name)
|
|
43
|
+
if isinstance(attr, property):
|
|
44
|
+
# For properties, call the getter with EXP as self
|
|
45
|
+
return str(attr.fget(RUN))
|
|
46
|
+
else:
|
|
47
|
+
# For regular attributes, access via getattr
|
|
48
|
+
return str(getattr(RUN, attr_name))
|
|
49
|
+
|
|
50
|
+
# Match {EXP.attr_name} pattern
|
|
51
|
+
pattern = r"\{EXP\.(\w+)\}"
|
|
52
|
+
return re.sub(pattern, replace_match, template)
|
|
21
53
|
|
|
22
54
|
|
|
23
55
|
class OperationMode(Enum):
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
56
|
+
"""Operation mode for the experiment."""
|
|
57
|
+
|
|
58
|
+
LOCAL = "local"
|
|
59
|
+
REMOTE = "remote"
|
|
60
|
+
HYBRID = "hybrid" # Future: sync local to remote
|
|
28
61
|
|
|
29
62
|
|
|
30
63
|
class RunManager:
|
|
64
|
+
"""
|
|
65
|
+
Lifecycle manager for experiments.
|
|
66
|
+
|
|
67
|
+
Supports three usage patterns:
|
|
68
|
+
1. Method calls: experiment.run.start(), experiment.run.complete()
|
|
69
|
+
2. Context manager: with Experiment(...).run as exp:
|
|
70
|
+
3. Decorator: @exp.run or @Experiment(...).run
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
def __init__(self, experiment: "Experiment"):
|
|
31
74
|
"""
|
|
32
|
-
|
|
75
|
+
Initialize RunManager.
|
|
33
76
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
2. Context manager: with Experiment(...).run as exp:
|
|
37
|
-
3. Decorator: @exp.run or @Experiment(...).run
|
|
77
|
+
Args:
|
|
78
|
+
experiment: Parent Experiment instance
|
|
38
79
|
"""
|
|
80
|
+
self._experiment = experiment
|
|
39
81
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
82
|
+
def start(self) -> "Experiment":
|
|
83
|
+
"""
|
|
84
|
+
Start the experiment (sets status to RUNNING).
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
The experiment instance for chaining
|
|
88
|
+
"""
|
|
89
|
+
return self._experiment._open()
|
|
90
|
+
|
|
91
|
+
def complete(self) -> None:
|
|
92
|
+
"""Mark experiment as completed (status: COMPLETED)."""
|
|
93
|
+
self._experiment._close(status="COMPLETED")
|
|
43
94
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
self._experiment = experiment
|
|
95
|
+
def fail(self) -> None:
|
|
96
|
+
"""Mark experiment as failed (status: FAILED)."""
|
|
97
|
+
self._experiment._close(status="FAILED")
|
|
48
98
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
99
|
+
def cancel(self) -> None:
|
|
100
|
+
"""Mark experiment as cancelled (status: CANCELLED)."""
|
|
101
|
+
self._experiment._close(status="CANCELLED")
|
|
52
102
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
103
|
+
@property
|
|
104
|
+
def prefix(self) -> Optional[str]:
|
|
105
|
+
"""
|
|
106
|
+
Get the current folder prefix for this experiment.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
Current folder prefix path or None
|
|
57
110
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
111
|
+
Example:
|
|
112
|
+
current_prefix = exp.run.prefix
|
|
113
|
+
"""
|
|
114
|
+
return self._experiment._folder_path
|
|
61
115
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
116
|
+
@prefix.setter
|
|
117
|
+
def prefix(self, value: Optional[str]) -> None:
|
|
118
|
+
"""
|
|
119
|
+
Set the folder prefix for this experiment before initialization.
|
|
65
120
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
self._experiment._close(status="CANCELLED")
|
|
121
|
+
This can ONLY be set before the experiment is started (initialized).
|
|
122
|
+
Once the experiment is opened, the prefix cannot be changed.
|
|
69
123
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
Get the current folder for this experiment.
|
|
124
|
+
Supports template variables:
|
|
125
|
+
- {EXP.name} - Experiment name
|
|
126
|
+
- {EXP.id} - Experiment ID
|
|
74
127
|
|
|
75
|
-
|
|
76
|
-
|
|
128
|
+
Args:
|
|
129
|
+
value: Folder prefix path with optional template variables
|
|
130
|
+
(e.g., "ge/myproject/{EXP.name}" or None)
|
|
77
131
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
"""
|
|
81
|
-
return self._experiment.folder
|
|
132
|
+
Raises:
|
|
133
|
+
RuntimeError: If experiment is already initialized/open
|
|
82
134
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
"""
|
|
86
|
-
Set the folder for this experiment before initialization.
|
|
135
|
+
Examples:
|
|
136
|
+
from ml_dash import dxp
|
|
87
137
|
|
|
88
|
-
|
|
89
|
-
|
|
138
|
+
# Static folder
|
|
139
|
+
dxp.run.prefix = "ge/myproject/experiments/resnet"
|
|
90
140
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
- {RUN.project} - Project name
|
|
141
|
+
# Template with experiment name
|
|
142
|
+
dxp.run.prefix = "ge/iclr_2024/{EXP.name}"
|
|
94
143
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
144
|
+
# Now start the experiment
|
|
145
|
+
with dxp.run:
|
|
146
|
+
dxp.params.set(lr=0.001)
|
|
147
|
+
"""
|
|
148
|
+
if self._experiment._is_open:
|
|
149
|
+
raise RuntimeError(
|
|
150
|
+
"Cannot change prefix after experiment is initialized. "
|
|
151
|
+
"Set prefix before calling start() or entering 'with' block."
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
if value:
|
|
155
|
+
# Sync EXP with this experiment's values
|
|
156
|
+
RUN.name = self._experiment.name
|
|
157
|
+
RUN.description = self._experiment.description
|
|
158
|
+
# Generate id/timestamp if not already set
|
|
159
|
+
if RUN.id is None:
|
|
160
|
+
RUN._init_run()
|
|
161
|
+
# Format with EXP - use helper to expand properties correctly
|
|
162
|
+
value = _expand_exp_template(value)
|
|
163
|
+
|
|
164
|
+
# Update the folder on the experiment
|
|
165
|
+
self._experiment._folder_path = value
|
|
166
|
+
|
|
167
|
+
def __enter__(self) -> "Experiment":
|
|
168
|
+
"""Context manager entry - starts the experiment."""
|
|
169
|
+
return self.start()
|
|
170
|
+
|
|
171
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
172
|
+
"""Context manager exit - completes or fails the experiment."""
|
|
173
|
+
if exc_type is not None:
|
|
174
|
+
self.fail()
|
|
175
|
+
else:
|
|
176
|
+
self.complete()
|
|
177
|
+
return False
|
|
178
|
+
|
|
179
|
+
def __call__(self, func: Callable) -> Callable:
|
|
180
|
+
"""
|
|
181
|
+
Decorator support for wrapping functions with experiment lifecycle.
|
|
98
182
|
|
|
99
|
-
|
|
100
|
-
|
|
183
|
+
Usage:
|
|
184
|
+
@exp.run
|
|
185
|
+
def train(exp):
|
|
186
|
+
exp.log("Training...")
|
|
187
|
+
"""
|
|
101
188
|
|
|
102
|
-
|
|
103
|
-
|
|
189
|
+
@functools.wraps(func)
|
|
190
|
+
def wrapper(*args, **kwargs):
|
|
191
|
+
with self as exp:
|
|
192
|
+
return func(exp, *args, **kwargs)
|
|
104
193
|
|
|
105
|
-
|
|
106
|
-
dxp.run.folder = "experiments/vision/resnet"
|
|
194
|
+
return wrapper
|
|
107
195
|
|
|
108
|
-
# Template with experiment name
|
|
109
|
-
dxp.run.folder = "/iclr_2024/{RUN.name}"
|
|
110
196
|
|
|
111
|
-
|
|
112
|
-
|
|
197
|
+
class Experiment:
|
|
198
|
+
"""
|
|
199
|
+
ML-Dash experiment for metricing experiments.
|
|
200
|
+
|
|
201
|
+
Prefix format: {owner}/{project}/path.../[name]
|
|
202
|
+
- owner: First segment (e.g., your username)
|
|
203
|
+
- project: Second segment (e.g., project name)
|
|
204
|
+
- path: Remaining segments form the folder structure
|
|
205
|
+
- name: Derived from last segment (may be a seed/id)
|
|
206
|
+
|
|
207
|
+
Usage examples:
|
|
208
|
+
|
|
209
|
+
# Local mode (default)
|
|
210
|
+
experiment = Experiment(prefix="ge/my-project/experiments/exp1")
|
|
211
|
+
|
|
212
|
+
# Custom local storage directory
|
|
213
|
+
experiment = Experiment(
|
|
214
|
+
prefix="ge/my-project/experiments/exp1",
|
|
215
|
+
dash_root=".dash"
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
# Remote mode with custom server
|
|
219
|
+
experiment = Experiment(
|
|
220
|
+
prefix="ge/my-project/experiments/exp1",
|
|
221
|
+
dash_url="https://custom-server.com"
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
# Context manager
|
|
225
|
+
with Experiment(prefix="ge/my-project/exp1").run as exp:
|
|
226
|
+
exp.logs.info("Training started")
|
|
227
|
+
|
|
228
|
+
# Decorator
|
|
229
|
+
@ml_dash_experiment(prefix="ge/ws/experiments/exp", dash_url="https://api.dash.ml")
|
|
230
|
+
def train():
|
|
231
|
+
...
|
|
232
|
+
"""
|
|
233
|
+
|
|
234
|
+
def __init__(
|
|
235
|
+
self,
|
|
236
|
+
prefix: Optional[str] = None,
|
|
237
|
+
*,
|
|
238
|
+
readme: Optional[str] = None,
|
|
239
|
+
# Ge: this is an instance only property
|
|
240
|
+
tags: Optional[List[str]] = None,
|
|
241
|
+
# Ge: Bindrs is an instance-only property, it is not set inside the RUN namespace.
|
|
242
|
+
bindrs: Optional[List[str]] = None,
|
|
243
|
+
# Ge: This is also instance-only
|
|
244
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
245
|
+
# Mode configuration
|
|
246
|
+
dash_url: Optional[Union[str, bool]] = None,
|
|
247
|
+
dash_root: Optional[str] = ".dash",
|
|
248
|
+
# Deprecated parameters (for backward compatibility)
|
|
249
|
+
remote: Optional[Union[str, bool]] = None,
|
|
250
|
+
local_path: Optional[str] = None,
|
|
251
|
+
# Internal parameters
|
|
252
|
+
_write_protected: bool = False,
|
|
253
|
+
# The rest of the params go directly to populate the RUN object.
|
|
254
|
+
**run_params: Unpack[RUN],
|
|
255
|
+
):
|
|
256
|
+
"""
|
|
257
|
+
Initialize an ML-Dash experiment.
|
|
258
|
+
|
|
259
|
+
Args:
|
|
260
|
+
prefix: Full experiment path like "owner/project/folder.../name" (defaults to DASH_PREFIX env var).
|
|
261
|
+
Format: {owner}/{project}/path.../[name]
|
|
262
|
+
- owner: First segment (e.g., username)
|
|
263
|
+
- project: Second segment (e.g., project name)
|
|
264
|
+
- path: Remaining segments form the folder path
|
|
265
|
+
- name: Derived from last segment (may be a seed/id, not always meaningful)
|
|
266
|
+
readme: Optional experiment readme/description
|
|
267
|
+
tags: Optional list of tags
|
|
268
|
+
bindrs: Optional list of bindrs
|
|
269
|
+
metadata: Optional metadata dict
|
|
270
|
+
dash_url: Remote API URL. True=use EXP.API_URL, str=custom URL, None=no remote. Token auto-loaded from ~/.dash/token.enc
|
|
271
|
+
dash_root: Local storage root path (defaults to ".dash"). Set to None for remote-only mode.
|
|
272
|
+
remote: (Deprecated) Use dash_url instead
|
|
273
|
+
local_path: (Deprecated) Use dash_root instead
|
|
274
|
+
_write_protected: Internal parameter - if True, experiment becomes immutable after creation
|
|
275
|
+
|
|
276
|
+
Mode Selection:
|
|
277
|
+
- Default (no dash_url): Local-only mode (writes to ".dash/")
|
|
278
|
+
- dash_url + dash_root: Hybrid mode (local + remote)
|
|
279
|
+
- dash_url + dash_root=None: Remote-only mode
|
|
280
|
+
"""
|
|
281
|
+
import os
|
|
282
|
+
import warnings
|
|
283
|
+
|
|
284
|
+
# Handle backward compatibility
|
|
285
|
+
if remote is not None:
|
|
286
|
+
warnings.warn(
|
|
287
|
+
"Parameter 'remote' is deprecated. Use 'dash_url' instead.",
|
|
288
|
+
DeprecationWarning,
|
|
289
|
+
stacklevel=2
|
|
290
|
+
)
|
|
291
|
+
if dash_url is None:
|
|
292
|
+
dash_url = remote
|
|
293
|
+
|
|
294
|
+
if local_path is not None:
|
|
295
|
+
warnings.warn(
|
|
296
|
+
"Parameter 'local_path' is deprecated. Use 'dash_root' instead.",
|
|
297
|
+
DeprecationWarning,
|
|
298
|
+
stacklevel=2
|
|
299
|
+
)
|
|
300
|
+
if dash_root == ".dash": # Only override if dash_root is default
|
|
301
|
+
dash_root = local_path
|
|
302
|
+
|
|
303
|
+
# Resolve prefix from environment variable if not provided
|
|
304
|
+
self._folder_path = prefix or os.getenv("DASH_PREFIX")
|
|
305
|
+
|
|
306
|
+
if not self._folder_path:
|
|
307
|
+
raise ValueError("prefix (or DASH_PREFIX env var) must be provided")
|
|
308
|
+
|
|
309
|
+
# Parse prefix: {owner}/{project}/path.../[name]
|
|
310
|
+
parts = self._folder_path.strip("/").split("/")
|
|
311
|
+
if len(parts) < 2:
|
|
312
|
+
raise ValueError(
|
|
313
|
+
f"prefix must have at least owner/project: got '{self._folder_path}'"
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
self.owner = parts[0]
|
|
317
|
+
self.project = parts[1]
|
|
318
|
+
# Name is the last segment (may be a seed/id, not always a meaningful name)
|
|
319
|
+
self.name = parts[-1] if len(parts) > 2 else parts[1]
|
|
320
|
+
|
|
321
|
+
self.readme = readme
|
|
322
|
+
self.tags = tags
|
|
323
|
+
self._bindrs_list = bindrs
|
|
324
|
+
self._write_protected = _write_protected
|
|
325
|
+
self.metadata = metadata
|
|
326
|
+
|
|
327
|
+
# Initialize RUN with experiment values
|
|
328
|
+
RUN.name = self.name
|
|
329
|
+
if readme:
|
|
330
|
+
RUN.readme = readme
|
|
331
|
+
|
|
332
|
+
# Determine operation mode
|
|
333
|
+
# dash_root defaults to ".dash", dash_url defaults to None
|
|
334
|
+
if dash_url and dash_root:
|
|
335
|
+
self.mode = OperationMode.HYBRID
|
|
336
|
+
elif dash_url:
|
|
337
|
+
self.mode = OperationMode.REMOTE
|
|
338
|
+
else:
|
|
339
|
+
self.mode = OperationMode.LOCAL
|
|
340
|
+
|
|
341
|
+
# Initialize backend
|
|
342
|
+
self._client: Optional[RemoteClient] = None
|
|
343
|
+
self._storage: Optional[LocalStorage] = None
|
|
344
|
+
self._experiment_id: Optional[str] = None
|
|
345
|
+
self._experiment_data: Optional[Dict[str, Any]] = None
|
|
346
|
+
self._is_open = False
|
|
347
|
+
self._metrics_manager: Optional["MetricsManager"] = None # Cached metrics manager
|
|
348
|
+
|
|
349
|
+
if self.mode in (OperationMode.REMOTE, OperationMode.HYBRID):
|
|
350
|
+
# RemoteClient will auto-load token from ~/.dash/token.enc
|
|
351
|
+
# Use RUN.api_url if dash_url=True (boolean), otherwise use the provided URL
|
|
352
|
+
api_url = RUN.api_url if dash_url is True else dash_url
|
|
353
|
+
self._client = RemoteClient(base_url=api_url)
|
|
354
|
+
|
|
355
|
+
if self.mode in (OperationMode.LOCAL, OperationMode.HYBRID):
|
|
356
|
+
self._storage = LocalStorage(root_path=Path(dash_root))
|
|
357
|
+
|
|
358
|
+
def _open(self) -> "Experiment":
|
|
359
|
+
"""
|
|
360
|
+
Internal method to open the experiment (create or update on server/filesystem).
|
|
113
361
|
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
362
|
+
Returns:
|
|
363
|
+
self for chaining
|
|
364
|
+
"""
|
|
365
|
+
if self._is_open:
|
|
366
|
+
return self
|
|
367
|
+
|
|
368
|
+
if self._client:
|
|
369
|
+
# Remote mode: create/update experiment via API
|
|
370
|
+
try:
|
|
371
|
+
response = self._client.create_or_update_experiment(
|
|
372
|
+
project=self.project,
|
|
373
|
+
name=self.name,
|
|
374
|
+
description=self.readme,
|
|
375
|
+
tags=self.tags,
|
|
376
|
+
bindrs=self._bindrs_list,
|
|
377
|
+
prefix=self._folder_path,
|
|
378
|
+
write_protected=self._write_protected,
|
|
379
|
+
metadata=self.metadata,
|
|
380
|
+
)
|
|
381
|
+
self._experiment_data = response
|
|
382
|
+
self._experiment_id = response["experiment"]["id"]
|
|
383
|
+
|
|
384
|
+
# Display message about viewing data online
|
|
385
|
+
try:
|
|
386
|
+
from rich.console import Console
|
|
387
|
+
|
|
388
|
+
console = Console()
|
|
389
|
+
console.print(
|
|
390
|
+
f"[dim]✓ Experiment started: [bold]{self.name}[/bold] (project: {self.project})[/dim]\n"
|
|
391
|
+
f"[dim]View your data, statistics, and plots online at:[/dim] "
|
|
392
|
+
f"[link=https://dash.ml]https://dash.ml[/link]"
|
|
393
|
+
)
|
|
394
|
+
except ImportError:
|
|
395
|
+
# Fallback if rich is not available
|
|
396
|
+
print(f"✓ Experiment started: {self.name} (project: {self.project})")
|
|
397
|
+
print("View your data at: https://dash.ml")
|
|
398
|
+
|
|
399
|
+
except Exception as e:
|
|
400
|
+
# Check if it's an authentication error
|
|
401
|
+
from .auth.exceptions import AuthenticationError
|
|
402
|
+
|
|
403
|
+
if isinstance(e, AuthenticationError):
|
|
404
|
+
try:
|
|
405
|
+
from rich.console import Console
|
|
406
|
+
from rich.panel import Panel
|
|
407
|
+
|
|
408
|
+
console = Console()
|
|
409
|
+
|
|
410
|
+
message = (
|
|
411
|
+
"[bold red]Authentication Required[/bold red]\n\n"
|
|
412
|
+
"You need to authenticate before using remote experiments.\n\n"
|
|
413
|
+
"[bold]To authenticate:[/bold]\n"
|
|
414
|
+
" [cyan]ml-dash login[/cyan]\n\n"
|
|
415
|
+
"[dim]This will open your browser for secure OAuth2 authentication.\n"
|
|
416
|
+
"Your token will be stored securely in your system keychain.[/dim]\n\n"
|
|
417
|
+
"[bold]Alternative:[/bold]\n"
|
|
418
|
+
" Use [cyan]local_path[/cyan] instead of [cyan]remote[/cyan] for offline experiments"
|
|
122
419
|
)
|
|
123
420
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
"""Context manager entry - starts the experiment."""
|
|
149
|
-
return self.start()
|
|
150
|
-
|
|
151
|
-
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
152
|
-
"""Context manager exit - completes or fails the experiment."""
|
|
153
|
-
if exc_type is not None:
|
|
154
|
-
self.fail()
|
|
421
|
+
panel = Panel(
|
|
422
|
+
message,
|
|
423
|
+
title="[bold yellow]⚠ Not Authenticated[/bold yellow]",
|
|
424
|
+
border_style="yellow",
|
|
425
|
+
expand=False,
|
|
426
|
+
)
|
|
427
|
+
console.print("\n")
|
|
428
|
+
console.print(panel)
|
|
429
|
+
console.print("\n")
|
|
430
|
+
except ImportError:
|
|
431
|
+
# Fallback if rich is not available
|
|
432
|
+
print("\n" + "=" * 60)
|
|
433
|
+
print("⚠ Authentication Required")
|
|
434
|
+
print("=" * 60)
|
|
435
|
+
print("\nYou need to authenticate before using remote experiments.\n")
|
|
436
|
+
print("To authenticate:")
|
|
437
|
+
print(" ml-dash login\n")
|
|
438
|
+
print("Alternative:")
|
|
439
|
+
print(" Use local_path instead of remote for offline experiments\n")
|
|
440
|
+
print("=" * 60 + "\n")
|
|
441
|
+
|
|
442
|
+
import sys
|
|
443
|
+
|
|
444
|
+
sys.exit(1)
|
|
155
445
|
else:
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
446
|
+
# Re-raise other exceptions
|
|
447
|
+
raise
|
|
448
|
+
|
|
449
|
+
if self._storage:
|
|
450
|
+
# Local mode: create experiment directory structure
|
|
451
|
+
self._storage.create_experiment(
|
|
452
|
+
owner=self.owner,
|
|
453
|
+
project=self.project,
|
|
454
|
+
prefix=self._folder_path,
|
|
455
|
+
description=self.readme,
|
|
456
|
+
tags=self.tags,
|
|
457
|
+
bindrs=self._bindrs_list,
|
|
458
|
+
metadata=self.metadata,
|
|
459
|
+
)
|
|
460
|
+
|
|
461
|
+
self._is_open = True
|
|
462
|
+
return self
|
|
463
|
+
|
|
464
|
+
def _close(self, status: str = "COMPLETED"):
|
|
465
|
+
"""
|
|
466
|
+
Internal method to close the experiment and update status.
|
|
162
467
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
468
|
+
Args:
|
|
469
|
+
status: Status to set - "COMPLETED" (default), "FAILED", or "CANCELLED"
|
|
470
|
+
"""
|
|
471
|
+
if not self._is_open:
|
|
472
|
+
return
|
|
473
|
+
|
|
474
|
+
# Flush any pending writes
|
|
475
|
+
if self._storage:
|
|
476
|
+
self._storage.flush()
|
|
477
|
+
|
|
478
|
+
# Update experiment status in remote mode
|
|
479
|
+
if self._client and self._experiment_id:
|
|
480
|
+
try:
|
|
481
|
+
self._client.update_experiment_status(
|
|
482
|
+
experiment_id=self._experiment_id, status=status
|
|
483
|
+
)
|
|
173
484
|
|
|
485
|
+
# Display completion message with link to view results
|
|
486
|
+
status_emoji = {"COMPLETED": "✓", "FAILED": "✗", "CANCELLED": "⊘"}.get(
|
|
487
|
+
status, "•"
|
|
488
|
+
)
|
|
174
489
|
|
|
175
|
-
|
|
490
|
+
status_color = {
|
|
491
|
+
"COMPLETED": "green",
|
|
492
|
+
"FAILED": "red",
|
|
493
|
+
"CANCELLED": "yellow",
|
|
494
|
+
}.get(status, "white")
|
|
495
|
+
|
|
496
|
+
try:
|
|
497
|
+
from rich.console import Console
|
|
498
|
+
|
|
499
|
+
console = Console()
|
|
500
|
+
console.print(
|
|
501
|
+
f"[{status_color}]{status_emoji} Experiment {status.lower()}: "
|
|
502
|
+
f"[bold]{self.name}[/bold] (project: {self.project})[/{status_color}]\n"
|
|
503
|
+
f"[dim]View results, statistics, and plots online at:[/dim] "
|
|
504
|
+
f"[link=https://dash.ml]https://dash.ml[/link]"
|
|
505
|
+
)
|
|
506
|
+
except ImportError:
|
|
507
|
+
# Fallback if rich is not available
|
|
508
|
+
print(
|
|
509
|
+
f"{status_emoji} Experiment {status.lower()}: {self.name} (project: {self.project})"
|
|
510
|
+
)
|
|
511
|
+
print("View results at: https://dash.ml")
|
|
512
|
+
|
|
513
|
+
except Exception as e:
|
|
514
|
+
# Log error but don't fail the close operation
|
|
515
|
+
print(f"Warning: Failed to update experiment status: {e}")
|
|
516
|
+
|
|
517
|
+
self._is_open = False
|
|
518
|
+
|
|
519
|
+
# Reset RUN for next experiment
|
|
520
|
+
# TODO: RUN._reset() - method doesn't exist
|
|
521
|
+
# RUN._reset()
|
|
522
|
+
|
|
523
|
+
@property
|
|
524
|
+
def run(self) -> RunManager:
|
|
176
525
|
"""
|
|
177
|
-
|
|
526
|
+
Get the RunManager for lifecycle operations.
|
|
178
527
|
|
|
179
|
-
Usage
|
|
528
|
+
Usage:
|
|
529
|
+
# Method calls
|
|
530
|
+
experiment.run.start()
|
|
531
|
+
experiment.run.complete()
|
|
180
532
|
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
project="my-project",
|
|
185
|
-
remote="https://api.dash.ml",
|
|
186
|
-
api_key="your-jwt-token"
|
|
187
|
-
)
|
|
533
|
+
# Context manager
|
|
534
|
+
with Experiment(...).run as exp:
|
|
535
|
+
exp.log("Training...")
|
|
188
536
|
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
local_path=".ml-dash"
|
|
194
|
-
)
|
|
537
|
+
# Decorator
|
|
538
|
+
@experiment.run
|
|
539
|
+
def train(exp):
|
|
540
|
+
exp.log("Training...")
|
|
195
541
|
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
542
|
+
Returns:
|
|
543
|
+
RunManager instance
|
|
544
|
+
"""
|
|
545
|
+
return RunManager(self)
|
|
199
546
|
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
def train():
|
|
203
|
-
...
|
|
547
|
+
@property
|
|
548
|
+
def params(self) -> ParametersBuilder:
|
|
204
549
|
"""
|
|
550
|
+
Get a ParametersBuilder for parameter operations.
|
|
205
551
|
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
project: str,
|
|
210
|
-
*,
|
|
211
|
-
description: Optional[str] = None,
|
|
212
|
-
tags: Optional[List[str]] = None,
|
|
213
|
-
bindrs: Optional[List[str]] = None,
|
|
214
|
-
folder: Optional[str] = None,
|
|
215
|
-
metadata: Optional[Dict[str, Any]] = None,
|
|
216
|
-
# Mode configuration
|
|
217
|
-
remote: Optional[str] = None,
|
|
218
|
-
api_key: Optional[str] = None,
|
|
219
|
-
local_path: Optional[str] = None,
|
|
220
|
-
# Internal parameters
|
|
221
|
-
_write_protected: bool = False,
|
|
222
|
-
):
|
|
223
|
-
"""
|
|
224
|
-
Initialize an ML-Dash experiment.
|
|
225
|
-
|
|
226
|
-
Args:
|
|
227
|
-
name: Experiment name (unique within project)
|
|
228
|
-
project: Project name
|
|
229
|
-
description: Optional experiment description
|
|
230
|
-
tags: Optional list of tags
|
|
231
|
-
bindrs: Optional list of bindrs
|
|
232
|
-
folder: Optional folder path (e.g., "/experiments/baseline")
|
|
233
|
-
metadata: Optional metadata dict
|
|
234
|
-
remote: Remote API URL (e.g., "https://api.dash.ml")
|
|
235
|
-
api_key: JWT token for authentication (auto-loaded from storage if not provided)
|
|
236
|
-
local_path: Local storage root path (for local mode)
|
|
237
|
-
_write_protected: Internal parameter - if True, experiment becomes immutable after creation
|
|
238
|
-
"""
|
|
239
|
-
self.name = name
|
|
240
|
-
self.project = project
|
|
241
|
-
self.description = description
|
|
242
|
-
self.tags = tags
|
|
243
|
-
self.bindrs = bindrs
|
|
244
|
-
self.folder = folder
|
|
245
|
-
self._write_protected = _write_protected
|
|
246
|
-
self.metadata = metadata
|
|
247
|
-
|
|
248
|
-
# Determine operation mode
|
|
249
|
-
if remote and local_path:
|
|
250
|
-
self.mode = OperationMode.HYBRID
|
|
251
|
-
elif remote:
|
|
252
|
-
self.mode = OperationMode.REMOTE
|
|
253
|
-
elif local_path:
|
|
254
|
-
self.mode = OperationMode.LOCAL
|
|
255
|
-
else:
|
|
256
|
-
raise ValueError(
|
|
257
|
-
"Must specify either 'remote' (with api_key) or 'local_path'"
|
|
258
|
-
)
|
|
552
|
+
Usage:
|
|
553
|
+
# Set parameters
|
|
554
|
+
experiment.params.set(lr=0.001, batch_size=32)
|
|
259
555
|
|
|
260
|
-
#
|
|
261
|
-
|
|
262
|
-
self._storage: Optional[LocalStorage] = None
|
|
263
|
-
self._experiment_id: Optional[str] = None
|
|
264
|
-
self._experiment_data: Optional[Dict[str, Any]] = None
|
|
265
|
-
self._is_open = False
|
|
266
|
-
self._metrics_manager: Optional['MetricsManager'] = None # Cached metrics manager
|
|
267
|
-
|
|
268
|
-
if self.mode in (OperationMode.REMOTE, OperationMode.HYBRID):
|
|
269
|
-
# api_key can be None - RemoteClient will auto-load from storage
|
|
270
|
-
self._client = RemoteClient(base_url=remote, api_key=api_key)
|
|
271
|
-
|
|
272
|
-
if self.mode in (OperationMode.LOCAL, OperationMode.HYBRID):
|
|
273
|
-
if not local_path:
|
|
274
|
-
raise ValueError("local_path is required for local mode")
|
|
275
|
-
self._storage = LocalStorage(root_path=Path(local_path))
|
|
276
|
-
|
|
277
|
-
def _open(self) -> "Experiment":
|
|
278
|
-
"""
|
|
279
|
-
Internal method to open the experiment (create or update on server/filesystem).
|
|
280
|
-
|
|
281
|
-
Returns:
|
|
282
|
-
self for chaining
|
|
283
|
-
"""
|
|
284
|
-
if self._is_open:
|
|
285
|
-
return self
|
|
286
|
-
|
|
287
|
-
if self._client:
|
|
288
|
-
# Remote mode: create/update experiment via API
|
|
289
|
-
response = self._client.create_or_update_experiment(
|
|
290
|
-
project=self.project,
|
|
291
|
-
name=self.name,
|
|
292
|
-
description=self.description,
|
|
293
|
-
tags=self.tags,
|
|
294
|
-
bindrs=self.bindrs,
|
|
295
|
-
folder=self.folder,
|
|
296
|
-
write_protected=self._write_protected,
|
|
297
|
-
metadata=self.metadata,
|
|
298
|
-
)
|
|
299
|
-
self._experiment_data = response
|
|
300
|
-
self._experiment_id = response["experiment"]["id"]
|
|
301
|
-
|
|
302
|
-
if self._storage:
|
|
303
|
-
# Local mode: create experiment directory structure
|
|
304
|
-
self._storage.create_experiment(
|
|
305
|
-
project=self.project,
|
|
306
|
-
name=self.name,
|
|
307
|
-
description=self.description,
|
|
308
|
-
tags=self.tags,
|
|
309
|
-
bindrs=self.bindrs,
|
|
310
|
-
folder=self.folder,
|
|
311
|
-
metadata=self.metadata,
|
|
312
|
-
)
|
|
556
|
+
# Get parameters
|
|
557
|
+
params = experiment.params.get()
|
|
313
558
|
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
def _close(self, status: str = "COMPLETED"):
|
|
318
|
-
"""
|
|
319
|
-
Internal method to close the experiment and update status.
|
|
320
|
-
|
|
321
|
-
Args:
|
|
322
|
-
status: Status to set - "COMPLETED" (default), "FAILED", or "CANCELLED"
|
|
323
|
-
"""
|
|
324
|
-
if not self._is_open:
|
|
325
|
-
return
|
|
326
|
-
|
|
327
|
-
# Flush any pending writes
|
|
328
|
-
if self._storage:
|
|
329
|
-
self._storage.flush()
|
|
330
|
-
|
|
331
|
-
# Update experiment status in remote mode
|
|
332
|
-
if self._client and self._experiment_id:
|
|
333
|
-
try:
|
|
334
|
-
self._client.update_experiment_status(
|
|
335
|
-
experiment_id=self._experiment_id,
|
|
336
|
-
status=status
|
|
337
|
-
)
|
|
338
|
-
except Exception as e:
|
|
339
|
-
# Log error but don't fail the close operation
|
|
340
|
-
print(f"Warning: Failed to update experiment status: {e}")
|
|
341
|
-
|
|
342
|
-
self._is_open = False
|
|
343
|
-
|
|
344
|
-
@property
|
|
345
|
-
def run(self) -> RunManager:
|
|
346
|
-
"""
|
|
347
|
-
Get the RunManager for lifecycle operations.
|
|
348
|
-
|
|
349
|
-
Usage:
|
|
350
|
-
# Method calls
|
|
351
|
-
experiment.run.start()
|
|
352
|
-
experiment.run.complete()
|
|
353
|
-
|
|
354
|
-
# Context manager
|
|
355
|
-
with Experiment(...).run as exp:
|
|
356
|
-
exp.log("Training...")
|
|
357
|
-
|
|
358
|
-
# Decorator
|
|
359
|
-
@experiment.run
|
|
360
|
-
def train(exp):
|
|
361
|
-
exp.log("Training...")
|
|
362
|
-
|
|
363
|
-
Returns:
|
|
364
|
-
RunManager instance
|
|
365
|
-
"""
|
|
366
|
-
return RunManager(self)
|
|
367
|
-
|
|
368
|
-
@property
|
|
369
|
-
def params(self) -> ParametersBuilder:
|
|
370
|
-
"""
|
|
371
|
-
Get a ParametersBuilder for parameter operations.
|
|
372
|
-
|
|
373
|
-
Usage:
|
|
374
|
-
# Set parameters
|
|
375
|
-
experiment.params.set(lr=0.001, batch_size=32)
|
|
376
|
-
|
|
377
|
-
# Get parameters
|
|
378
|
-
params = experiment.params.get()
|
|
379
|
-
|
|
380
|
-
Returns:
|
|
381
|
-
ParametersBuilder instance
|
|
382
|
-
|
|
383
|
-
Raises:
|
|
384
|
-
RuntimeError: If experiment is not open
|
|
385
|
-
"""
|
|
386
|
-
if not self._is_open:
|
|
387
|
-
raise RuntimeError(
|
|
388
|
-
"Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
|
|
389
|
-
"Example:\n"
|
|
390
|
-
" with dxp.run:\n"
|
|
391
|
-
" dxp.params.set(lr=0.001)"
|
|
392
|
-
)
|
|
559
|
+
Returns:
|
|
560
|
+
ParametersBuilder instance
|
|
393
561
|
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
Examples:
|
|
412
|
-
experiment.log(metadata={"epoch": 1}).info("Training started")
|
|
413
|
-
experiment.log().error("Failed", error_code=500)
|
|
414
|
-
|
|
415
|
-
2. Traditional style (message provided):
|
|
416
|
-
Writes the log immediately and returns None.
|
|
417
|
-
|
|
418
|
-
Examples:
|
|
419
|
-
experiment.log("Training started", level="info", epoch=1)
|
|
420
|
-
experiment.log("Training started") # Defaults to "info"
|
|
421
|
-
|
|
422
|
-
Args:
|
|
423
|
-
message: Optional log message (for traditional style)
|
|
424
|
-
level: Optional log level (for traditional style, defaults to "info")
|
|
425
|
-
metadata: Optional metadata dict
|
|
426
|
-
**extra_metadata: Additional metadata as keyword arguments
|
|
427
|
-
|
|
428
|
-
Returns:
|
|
429
|
-
LogBuilder if no message provided (fluent mode)
|
|
430
|
-
None if log was written directly (traditional mode)
|
|
431
|
-
|
|
432
|
-
Raises:
|
|
433
|
-
RuntimeError: If experiment is not open
|
|
434
|
-
ValueError: If log level is invalid
|
|
435
|
-
"""
|
|
436
|
-
if not self._is_open:
|
|
437
|
-
raise RuntimeError(
|
|
438
|
-
"Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
|
|
439
|
-
"Example:\n"
|
|
440
|
-
" with dxp.run:\n"
|
|
441
|
-
" dxp.log().info('Training started')"
|
|
442
|
-
)
|
|
562
|
+
Raises:
|
|
563
|
+
RuntimeError: If experiment is not open
|
|
564
|
+
"""
|
|
565
|
+
if not self._is_open:
|
|
566
|
+
raise RuntimeError(
|
|
567
|
+
"Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
|
|
568
|
+
"Example:\n"
|
|
569
|
+
" with dxp.run:\n"
|
|
570
|
+
" dxp.params.set(lr=0.001)"
|
|
571
|
+
)
|
|
572
|
+
|
|
573
|
+
return ParametersBuilder(self)
|
|
574
|
+
|
|
575
|
+
@property
|
|
576
|
+
def logs(self) -> LogBuilder:
|
|
577
|
+
"""
|
|
578
|
+
Get a LogBuilder for fluent-style logging.
|
|
443
579
|
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
combined_metadata = {**(metadata or {}), **extra_metadata}
|
|
447
|
-
return LogBuilder(self, combined_metadata if combined_metadata else None)
|
|
448
|
-
|
|
449
|
-
# Traditional mode: write immediately
|
|
450
|
-
level = level or LogLevel.INFO.value # Default to "info"
|
|
451
|
-
level = LogLevel.validate(level) # Validate level
|
|
452
|
-
|
|
453
|
-
combined_metadata = {**(metadata or {}), **extra_metadata}
|
|
454
|
-
self._write_log(
|
|
455
|
-
message=message,
|
|
456
|
-
level=level,
|
|
457
|
-
metadata=combined_metadata if combined_metadata else None,
|
|
458
|
-
timestamp=None
|
|
459
|
-
)
|
|
460
|
-
return None
|
|
461
|
-
|
|
462
|
-
def _write_log(
|
|
463
|
-
self,
|
|
464
|
-
message: str,
|
|
465
|
-
level: str,
|
|
466
|
-
metadata: Optional[Dict[str, Any]],
|
|
467
|
-
timestamp: Optional[datetime]
|
|
468
|
-
) -> None:
|
|
469
|
-
"""
|
|
470
|
-
Internal method to write a log entry immediately.
|
|
471
|
-
No buffering - writes directly to storage/remote AND stdout/stderr.
|
|
472
|
-
|
|
473
|
-
Args:
|
|
474
|
-
message: Log message
|
|
475
|
-
level: Log level (already validated)
|
|
476
|
-
metadata: Optional metadata dict
|
|
477
|
-
timestamp: Optional custom timestamp (defaults to now)
|
|
478
|
-
"""
|
|
479
|
-
log_entry = {
|
|
480
|
-
"timestamp": (timestamp or datetime.utcnow()).isoformat() + "Z",
|
|
481
|
-
"level": level,
|
|
482
|
-
"message": message,
|
|
483
|
-
}
|
|
484
|
-
|
|
485
|
-
if metadata:
|
|
486
|
-
log_entry["metadata"] = metadata
|
|
487
|
-
|
|
488
|
-
# Mirror to stdout/stderr before writing to storage
|
|
489
|
-
self._print_log(message, level, metadata)
|
|
490
|
-
|
|
491
|
-
# Write immediately (no buffering)
|
|
492
|
-
if self._client:
|
|
493
|
-
# Remote mode: send to API (wrapped in array for batch API)
|
|
494
|
-
self._client.create_log_entries(
|
|
495
|
-
experiment_id=self._experiment_id,
|
|
496
|
-
logs=[log_entry] # Single log in array
|
|
497
|
-
)
|
|
580
|
+
Returns a LogBuilder that allows chaining with level methods like
|
|
581
|
+
.info(), .warn(), .error(), .debug(), .fatal().
|
|
498
582
|
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
self._storage.write_log(
|
|
502
|
-
project=self.project,
|
|
503
|
-
experiment=self.name,
|
|
504
|
-
folder=self.folder,
|
|
505
|
-
message=log_entry["message"],
|
|
506
|
-
level=log_entry["level"],
|
|
507
|
-
metadata=log_entry.get("metadata"),
|
|
508
|
-
timestamp=log_entry["timestamp"]
|
|
509
|
-
)
|
|
583
|
+
Returns:
|
|
584
|
+
LogBuilder instance for fluent logging
|
|
510
585
|
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
message: str,
|
|
514
|
-
level: str,
|
|
515
|
-
metadata: Optional[Dict[str, Any]]
|
|
516
|
-
) -> None:
|
|
517
|
-
"""
|
|
518
|
-
Print log to stdout or stderr based on level.
|
|
519
|
-
|
|
520
|
-
ERROR and FATAL go to stderr, all others go to stdout.
|
|
521
|
-
|
|
522
|
-
Args:
|
|
523
|
-
message: Log message
|
|
524
|
-
level: Log level
|
|
525
|
-
metadata: Optional metadata dict
|
|
526
|
-
"""
|
|
527
|
-
import sys
|
|
528
|
-
|
|
529
|
-
# Format the log message
|
|
530
|
-
level_upper = level.upper()
|
|
531
|
-
|
|
532
|
-
# Build metadata string if present
|
|
533
|
-
metadata_str = ""
|
|
534
|
-
if metadata:
|
|
535
|
-
# Format metadata as key=value pairs
|
|
536
|
-
pairs = [f"{k}={v}" for k, v in metadata.items()]
|
|
537
|
-
metadata_str = f" [{', '.join(pairs)}]"
|
|
538
|
-
|
|
539
|
-
# Format: [LEVEL] message [key=value, ...]
|
|
540
|
-
formatted_message = f"[{level_upper}] {message}{metadata_str}"
|
|
541
|
-
|
|
542
|
-
# Route to stdout or stderr based on level
|
|
543
|
-
if level in ("error", "fatal"):
|
|
544
|
-
print(formatted_message, file=sys.stderr)
|
|
545
|
-
else:
|
|
546
|
-
print(formatted_message, file=sys.stdout)
|
|
547
|
-
|
|
548
|
-
def files(self, **kwargs) -> FileBuilder:
|
|
549
|
-
"""
|
|
550
|
-
Get a FileBuilder for fluent file operations.
|
|
551
|
-
|
|
552
|
-
Returns:
|
|
553
|
-
FileBuilder instance for chaining
|
|
554
|
-
|
|
555
|
-
Raises:
|
|
556
|
-
RuntimeError: If experiment is not open
|
|
557
|
-
|
|
558
|
-
Examples:
|
|
559
|
-
# Upload file
|
|
560
|
-
experiment.files(file_path="./model.pt", prefix="/models").save()
|
|
561
|
-
|
|
562
|
-
# List files
|
|
563
|
-
files = experiment.files().list()
|
|
564
|
-
files = experiment.files(prefix="/models").list()
|
|
565
|
-
|
|
566
|
-
# Download file
|
|
567
|
-
experiment.files(file_id="123").download()
|
|
568
|
-
|
|
569
|
-
# Delete file
|
|
570
|
-
experiment.files(file_id="123").delete()
|
|
571
|
-
"""
|
|
572
|
-
if not self._is_open:
|
|
573
|
-
raise RuntimeError(
|
|
574
|
-
"Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
|
|
575
|
-
"Example:\n"
|
|
576
|
-
" with dxp.run:\n"
|
|
577
|
-
" dxp.files().save()"
|
|
578
|
-
)
|
|
586
|
+
Raises:
|
|
587
|
+
RuntimeError: If experiment is not open
|
|
579
588
|
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
content_type: MIME type
|
|
606
|
-
size_bytes: File size in bytes
|
|
607
|
-
|
|
608
|
-
Returns:
|
|
609
|
-
File metadata dict
|
|
610
|
-
"""
|
|
611
|
-
result = None
|
|
612
|
-
|
|
613
|
-
if self._client:
|
|
614
|
-
# Remote mode: upload to API
|
|
615
|
-
result = self._client.upload_file(
|
|
616
|
-
experiment_id=self._experiment_id,
|
|
617
|
-
file_path=file_path,
|
|
618
|
-
prefix=prefix,
|
|
619
|
-
filename=filename,
|
|
620
|
-
description=description,
|
|
621
|
-
tags=tags,
|
|
622
|
-
metadata=metadata,
|
|
623
|
-
checksum=checksum,
|
|
624
|
-
content_type=content_type,
|
|
625
|
-
size_bytes=size_bytes
|
|
626
|
-
)
|
|
589
|
+
Examples:
|
|
590
|
+
exp.logs.info("Training started", epoch=1)
|
|
591
|
+
exp.logs.error("Failed to load data", error_code=500)
|
|
592
|
+
exp.logs.warn("GPU memory low", memory_available="1GB")
|
|
593
|
+
exp.logs.debug("Debug info", step=100)
|
|
594
|
+
"""
|
|
595
|
+
if not self._is_open:
|
|
596
|
+
raise RuntimeError(
|
|
597
|
+
"Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
|
|
598
|
+
"Example:\n"
|
|
599
|
+
" with dxp.run:\n"
|
|
600
|
+
" dxp.logs.info('Training started')"
|
|
601
|
+
)
|
|
602
|
+
|
|
603
|
+
return LogBuilder(self, metadata=None)
|
|
604
|
+
|
|
605
|
+
def log(
|
|
606
|
+
self,
|
|
607
|
+
message: Optional[str] = None,
|
|
608
|
+
level: Optional[str] = None,
|
|
609
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
610
|
+
**extra_metadata,
|
|
611
|
+
) -> Optional[LogBuilder]:
|
|
612
|
+
"""
|
|
613
|
+
Create a log entry (traditional style).
|
|
627
614
|
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
project=self.project,
|
|
632
|
-
experiment=self.name,
|
|
633
|
-
folder=self.folder,
|
|
634
|
-
file_path=file_path,
|
|
635
|
-
prefix=prefix,
|
|
636
|
-
filename=filename,
|
|
637
|
-
description=description,
|
|
638
|
-
tags=tags,
|
|
639
|
-
metadata=metadata,
|
|
640
|
-
checksum=checksum,
|
|
641
|
-
content_type=content_type,
|
|
642
|
-
size_bytes=size_bytes
|
|
643
|
-
)
|
|
615
|
+
.. deprecated::
|
|
616
|
+
The fluent style (calling without message) is deprecated.
|
|
617
|
+
Use the `logs` property instead: `exp.logs.info("message")`
|
|
644
618
|
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
self,
|
|
649
|
-
prefix: Optional[str] = None,
|
|
650
|
-
tags: Optional[List[str]] = None
|
|
651
|
-
) -> List[Dict[str, Any]]:
|
|
652
|
-
"""
|
|
653
|
-
Internal method to list files.
|
|
654
|
-
|
|
655
|
-
Args:
|
|
656
|
-
prefix: Optional prefix filter
|
|
657
|
-
tags: Optional tags filter
|
|
658
|
-
|
|
659
|
-
Returns:
|
|
660
|
-
List of file metadata dicts
|
|
661
|
-
"""
|
|
662
|
-
files = []
|
|
663
|
-
|
|
664
|
-
if self._client:
|
|
665
|
-
# Remote mode: fetch from API
|
|
666
|
-
files = self._client.list_files(
|
|
667
|
-
experiment_id=self._experiment_id,
|
|
668
|
-
prefix=prefix,
|
|
669
|
-
tags=tags
|
|
670
|
-
)
|
|
619
|
+
Recommended usage:
|
|
620
|
+
exp.logs.info("Training started", epoch=1)
|
|
621
|
+
exp.logs.error("Failed", error_code=500)
|
|
671
622
|
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
project=self.project,
|
|
676
|
-
experiment=self.name,
|
|
677
|
-
prefix=prefix,
|
|
678
|
-
tags=tags
|
|
679
|
-
)
|
|
623
|
+
Traditional style (still supported):
|
|
624
|
+
experiment.log("Training started", level="info", epoch=1)
|
|
625
|
+
experiment.log("Training started") # Defaults to "info"
|
|
680
626
|
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
dest_path: Optional[str] = None
|
|
687
|
-
) -> str:
|
|
688
|
-
"""
|
|
689
|
-
Internal method to download a file.
|
|
690
|
-
|
|
691
|
-
Args:
|
|
692
|
-
file_id: File ID
|
|
693
|
-
dest_path: Optional destination path (defaults to original filename)
|
|
694
|
-
|
|
695
|
-
Returns:
|
|
696
|
-
Path to downloaded file
|
|
697
|
-
"""
|
|
698
|
-
if self._client:
|
|
699
|
-
# Remote mode: download from API
|
|
700
|
-
return self._client.download_file(
|
|
701
|
-
experiment_id=self._experiment_id,
|
|
702
|
-
file_id=file_id,
|
|
703
|
-
dest_path=dest_path
|
|
704
|
-
)
|
|
627
|
+
Args:
|
|
628
|
+
message: Log message (required for recommended usage)
|
|
629
|
+
level: Log level (defaults to "info")
|
|
630
|
+
metadata: Optional metadata dict
|
|
631
|
+
**extra_metadata: Additional metadata as keyword arguments
|
|
705
632
|
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
project=self.project,
|
|
710
|
-
experiment=self.name,
|
|
711
|
-
file_id=file_id,
|
|
712
|
-
dest_path=dest_path
|
|
713
|
-
)
|
|
633
|
+
Returns:
|
|
634
|
+
None when used in traditional style (message provided)
|
|
635
|
+
LogBuilder when used in deprecated fluent style (message=None)
|
|
714
636
|
|
|
715
|
-
|
|
637
|
+
Raises:
|
|
638
|
+
RuntimeError: If experiment is not open
|
|
639
|
+
ValueError: If log level is invalid
|
|
640
|
+
"""
|
|
641
|
+
if not self._is_open:
|
|
642
|
+
raise RuntimeError(
|
|
643
|
+
"Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
|
|
644
|
+
"Example:\n"
|
|
645
|
+
" with dxp.run:\n"
|
|
646
|
+
" dxp.logs.info('Training started')"
|
|
647
|
+
)
|
|
648
|
+
|
|
649
|
+
# Fluent mode: return LogBuilder (deprecated)
|
|
650
|
+
if message is None:
|
|
651
|
+
import warnings
|
|
652
|
+
warnings.warn(
|
|
653
|
+
"Using exp.log() without a message is deprecated. "
|
|
654
|
+
"Use exp.logs.info('message') instead.",
|
|
655
|
+
DeprecationWarning,
|
|
656
|
+
stacklevel=2
|
|
657
|
+
)
|
|
658
|
+
combined_metadata = {**(metadata or {}), **extra_metadata}
|
|
659
|
+
return LogBuilder(self, combined_metadata if combined_metadata else None)
|
|
660
|
+
|
|
661
|
+
# Traditional mode: write immediately
|
|
662
|
+
level = level or LogLevel.INFO.value # Default to "info"
|
|
663
|
+
level = LogLevel.validate(level) # Validate level
|
|
664
|
+
|
|
665
|
+
combined_metadata = {**(metadata or {}), **extra_metadata}
|
|
666
|
+
self._write_log(
|
|
667
|
+
message=message,
|
|
668
|
+
level=level,
|
|
669
|
+
metadata=combined_metadata if combined_metadata else None,
|
|
670
|
+
timestamp=None,
|
|
671
|
+
)
|
|
672
|
+
return None
|
|
673
|
+
|
|
674
|
+
def _write_log(
|
|
675
|
+
self,
|
|
676
|
+
message: str,
|
|
677
|
+
level: str,
|
|
678
|
+
metadata: Optional[Dict[str, Any]],
|
|
679
|
+
timestamp: Optional[datetime],
|
|
680
|
+
) -> None:
|
|
681
|
+
"""
|
|
682
|
+
Internal method to write a log entry immediately.
|
|
683
|
+
No buffering - writes directly to storage/remote AND stdout/stderr.
|
|
684
|
+
|
|
685
|
+
Args:
|
|
686
|
+
message: Log message
|
|
687
|
+
level: Log level (already validated)
|
|
688
|
+
metadata: Optional metadata dict
|
|
689
|
+
timestamp: Optional custom timestamp (defaults to now)
|
|
690
|
+
"""
|
|
691
|
+
log_entry = {
|
|
692
|
+
"timestamp": (timestamp or datetime.utcnow()).isoformat() + "Z",
|
|
693
|
+
"level": level,
|
|
694
|
+
"message": message,
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
if metadata:
|
|
698
|
+
log_entry["metadata"] = metadata
|
|
699
|
+
|
|
700
|
+
# Mirror to stdout/stderr before writing to storage
|
|
701
|
+
self._print_log(message, level, metadata)
|
|
702
|
+
|
|
703
|
+
# Write immediately (no buffering)
|
|
704
|
+
if self._client:
|
|
705
|
+
# Remote mode: send to API (wrapped in array for batch API)
|
|
706
|
+
self._client.create_log_entries(
|
|
707
|
+
experiment_id=self._experiment_id,
|
|
708
|
+
logs=[log_entry], # Single log in array
|
|
709
|
+
)
|
|
710
|
+
|
|
711
|
+
if self._storage:
|
|
712
|
+
# Local mode: write to file immediately
|
|
713
|
+
self._storage.write_log(
|
|
714
|
+
owner=self.owner,
|
|
715
|
+
project=self.project,
|
|
716
|
+
prefix=self._folder_path,
|
|
717
|
+
message=log_entry["message"],
|
|
718
|
+
level=log_entry["level"],
|
|
719
|
+
metadata=log_entry.get("metadata"),
|
|
720
|
+
timestamp=log_entry["timestamp"],
|
|
721
|
+
)
|
|
722
|
+
|
|
723
|
+
def _print_log(
|
|
724
|
+
self, message: str, level: str, metadata: Optional[Dict[str, Any]]
|
|
725
|
+
) -> None:
|
|
726
|
+
"""
|
|
727
|
+
Print log to stdout or stderr based on level.
|
|
716
728
|
|
|
717
|
-
|
|
718
|
-
"""
|
|
719
|
-
Internal method to delete a file.
|
|
729
|
+
ERROR and FATAL go to stderr, all others go to stdout.
|
|
720
730
|
|
|
721
|
-
|
|
722
|
-
|
|
731
|
+
Args:
|
|
732
|
+
message: Log message
|
|
733
|
+
level: Log level
|
|
734
|
+
metadata: Optional metadata dict
|
|
735
|
+
"""
|
|
736
|
+
import sys
|
|
723
737
|
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
"""
|
|
727
|
-
result = None
|
|
738
|
+
# Format the log message
|
|
739
|
+
level_upper = level.upper()
|
|
728
740
|
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
741
|
+
# Build metadata string if present
|
|
742
|
+
metadata_str = ""
|
|
743
|
+
if metadata:
|
|
744
|
+
# Format metadata as key=value pairs
|
|
745
|
+
pairs = [f"{k}={v}" for k, v in metadata.items()]
|
|
746
|
+
metadata_str = f" [{', '.join(pairs)}]"
|
|
735
747
|
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
result = self._storage.delete_file(
|
|
739
|
-
project=self.project,
|
|
740
|
-
experiment=self.name,
|
|
741
|
-
file_id=file_id
|
|
742
|
-
)
|
|
748
|
+
# Format: [LEVEL] message [key=value, ...]
|
|
749
|
+
formatted_message = f"[{level_upper}] {message}{metadata_str}"
|
|
743
750
|
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
description: Optional[str],
|
|
750
|
-
tags: Optional[List[str]],
|
|
751
|
-
metadata: Optional[Dict[str, Any]]
|
|
752
|
-
) -> Dict[str, Any]:
|
|
753
|
-
"""
|
|
754
|
-
Internal method to update file metadata.
|
|
755
|
-
|
|
756
|
-
Args:
|
|
757
|
-
file_id: File ID
|
|
758
|
-
description: Optional description
|
|
759
|
-
tags: Optional tags
|
|
760
|
-
metadata: Optional metadata
|
|
761
|
-
|
|
762
|
-
Returns:
|
|
763
|
-
Updated file metadata dict
|
|
764
|
-
"""
|
|
765
|
-
result = None
|
|
766
|
-
|
|
767
|
-
if self._client:
|
|
768
|
-
# Remote mode: update via API
|
|
769
|
-
result = self._client.update_file(
|
|
770
|
-
experiment_id=self._experiment_id,
|
|
771
|
-
file_id=file_id,
|
|
772
|
-
description=description,
|
|
773
|
-
tags=tags,
|
|
774
|
-
metadata=metadata
|
|
775
|
-
)
|
|
751
|
+
# Route to stdout or stderr based on level
|
|
752
|
+
if level in ("error", "fatal"):
|
|
753
|
+
print(formatted_message, file=sys.stderr)
|
|
754
|
+
else:
|
|
755
|
+
print(formatted_message, file=sys.stdout)
|
|
776
756
|
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
experiment=self.name,
|
|
782
|
-
file_id=file_id,
|
|
783
|
-
description=description,
|
|
784
|
-
tags=tags,
|
|
785
|
-
metadata=metadata
|
|
786
|
-
)
|
|
757
|
+
@property
|
|
758
|
+
def files(self) -> FilesAccessor:
|
|
759
|
+
"""
|
|
760
|
+
Get a FilesAccessor for fluent file operations.
|
|
787
761
|
|
|
788
|
-
|
|
762
|
+
Returns:
|
|
763
|
+
FilesAccessor instance for chaining
|
|
789
764
|
|
|
765
|
+
Raises:
|
|
766
|
+
RuntimeError: If experiment is not open
|
|
790
767
|
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
768
|
+
Examples:
|
|
769
|
+
# Upload file - supports flexible syntax
|
|
770
|
+
experiment.files("checkpoints").upload("./model.pt", to="checkpoint.pt")
|
|
771
|
+
experiment.files(prefix="checkpoints").upload("./model.pt")
|
|
772
|
+
experiment.files().upload("./model.pt", to="models/model.pt") # root
|
|
794
773
|
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
""
|
|
798
|
-
if self._client:
|
|
799
|
-
# Remote mode: send to API
|
|
800
|
-
self._client.set_parameters(
|
|
801
|
-
experiment_id=self._experiment_id,
|
|
802
|
-
data=flattened_params
|
|
803
|
-
)
|
|
774
|
+
# List files
|
|
775
|
+
files = experiment.files("/some/location").list()
|
|
776
|
+
files = experiment.files("/models").list()
|
|
804
777
|
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
project=self.project,
|
|
809
|
-
experiment=self.name,
|
|
810
|
-
folder=self.folder,
|
|
811
|
-
data=flattened_params
|
|
812
|
-
)
|
|
778
|
+
# Download file
|
|
779
|
+
experiment.files("some.text").download()
|
|
780
|
+
experiment.files("some.text").download(to="./model.pt")
|
|
813
781
|
|
|
814
|
-
|
|
815
|
-
"""
|
|
816
|
-
|
|
782
|
+
# Download files via glob pattern
|
|
783
|
+
file_paths = experiment.files("images").list("*.png")
|
|
784
|
+
experiment.files("images").download("*.png")
|
|
817
785
|
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
"""
|
|
821
|
-
params = None
|
|
786
|
+
# This is equivalent to downloading to a directory
|
|
787
|
+
experiment.files.download("images/*.png", to="local_images")
|
|
822
788
|
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
params = self._client.get_parameters(experiment_id=self._experiment_id)
|
|
827
|
-
except Exception:
|
|
828
|
-
# Parameters don't exist yet
|
|
829
|
-
params = None
|
|
830
|
-
|
|
831
|
-
if self._storage:
|
|
832
|
-
# Local mode: read from file
|
|
833
|
-
params = self._storage.read_parameters(
|
|
834
|
-
project=self.project,
|
|
835
|
-
experiment=self.name
|
|
836
|
-
)
|
|
789
|
+
# Delete files
|
|
790
|
+
experiment.files("some.text").delete()
|
|
791
|
+
experiment.files.delete("some.text")
|
|
837
792
|
|
|
838
|
-
|
|
793
|
+
# Specific file types
|
|
794
|
+
dxp.files.save_text("content", to="view.yaml")
|
|
795
|
+
dxp.files.save_json(dict(hey="yo"), to="config.json")
|
|
796
|
+
dxp.files.save_blob(b"xxx", to="data.bin")
|
|
797
|
+
"""
|
|
798
|
+
if not self._is_open:
|
|
799
|
+
raise RuntimeError(
|
|
800
|
+
"Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
|
|
801
|
+
"Example:\n"
|
|
802
|
+
" with dxp.run:\n"
|
|
803
|
+
" dxp.files('path').upload()"
|
|
804
|
+
)
|
|
839
805
|
|
|
840
|
-
|
|
841
|
-
def metrics(self) -> 'MetricsManager':
|
|
842
|
-
"""
|
|
843
|
-
Get a MetricsManager for metric operations.
|
|
806
|
+
return FilesAccessor(self)
|
|
844
807
|
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
808
|
+
def bindrs(self, bindr_name: str) -> BindrsBuilder:
|
|
809
|
+
"""
|
|
810
|
+
Get a BindrsBuilder for working with file collections (bindrs).
|
|
848
811
|
|
|
849
|
-
|
|
850
|
-
MetricsManager instance
|
|
812
|
+
Bindrs are collections of files that can span multiple prefixes.
|
|
851
813
|
|
|
852
|
-
|
|
853
|
-
|
|
814
|
+
Args:
|
|
815
|
+
bindr_name: Name of the bindr (collection)
|
|
854
816
|
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
experiment.metrics("train_loss").append(value=0.5, step=100)
|
|
817
|
+
Returns:
|
|
818
|
+
BindrsBuilder instance for chaining
|
|
858
819
|
|
|
859
|
-
|
|
860
|
-
|
|
820
|
+
Raises:
|
|
821
|
+
RuntimeError: If experiment is not open
|
|
861
822
|
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
{"loss": 0.4, "acc": 0.85, "step": 2}
|
|
866
|
-
])
|
|
823
|
+
Examples:
|
|
824
|
+
# List files in a bindr
|
|
825
|
+
file_paths = experiment.bindrs("some-bindr").list()
|
|
867
826
|
|
|
868
|
-
|
|
869
|
-
|
|
827
|
+
Note:
|
|
828
|
+
This is a placeholder for future bindr functionality.
|
|
829
|
+
"""
|
|
830
|
+
if not self._is_open:
|
|
831
|
+
raise RuntimeError(
|
|
832
|
+
"Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
|
|
833
|
+
"Example:\n"
|
|
834
|
+
" with dxp.run:\n"
|
|
835
|
+
" files = dxp.bindrs('my-bindr').list()"
|
|
836
|
+
)
|
|
837
|
+
|
|
838
|
+
return BindrsBuilder(self, bindr_name)
|
|
839
|
+
|
|
840
|
+
def _upload_file(
|
|
841
|
+
self,
|
|
842
|
+
file_path: str,
|
|
843
|
+
prefix: str,
|
|
844
|
+
filename: str,
|
|
845
|
+
description: Optional[str],
|
|
846
|
+
tags: Optional[List[str]],
|
|
847
|
+
metadata: Optional[Dict[str, Any]],
|
|
848
|
+
checksum: str,
|
|
849
|
+
content_type: str,
|
|
850
|
+
size_bytes: int,
|
|
851
|
+
) -> Dict[str, Any]:
|
|
852
|
+
"""
|
|
853
|
+
Internal method to upload a file.
|
|
854
|
+
|
|
855
|
+
Args:
|
|
856
|
+
file_path: Local file path
|
|
857
|
+
prefix: Logical path prefix
|
|
858
|
+
filename: Original filename
|
|
859
|
+
description: Optional description
|
|
860
|
+
tags: Optional tags
|
|
861
|
+
metadata: Optional metadata
|
|
862
|
+
checksum: SHA256 checksum
|
|
863
|
+
content_type: MIME type
|
|
864
|
+
size_bytes: File size in bytes
|
|
865
|
+
|
|
866
|
+
Returns:
|
|
867
|
+
File metadata dict
|
|
868
|
+
"""
|
|
869
|
+
result = None
|
|
870
|
+
|
|
871
|
+
if self._client:
|
|
872
|
+
# Remote mode: upload to API
|
|
873
|
+
result = self._client.upload_file(
|
|
874
|
+
experiment_id=self._experiment_id,
|
|
875
|
+
file_path=file_path,
|
|
876
|
+
prefix=prefix,
|
|
877
|
+
filename=filename,
|
|
878
|
+
description=description,
|
|
879
|
+
tags=tags,
|
|
880
|
+
metadata=metadata,
|
|
881
|
+
checksum=checksum,
|
|
882
|
+
content_type=content_type,
|
|
883
|
+
size_bytes=size_bytes,
|
|
884
|
+
)
|
|
885
|
+
|
|
886
|
+
if self._storage:
|
|
887
|
+
# Local mode: copy to local storage
|
|
888
|
+
result = self._storage.write_file(
|
|
889
|
+
owner=self.owner,
|
|
890
|
+
project=self.project,
|
|
891
|
+
prefix=self._folder_path,
|
|
892
|
+
file_path=file_path,
|
|
893
|
+
path=prefix,
|
|
894
|
+
filename=filename,
|
|
895
|
+
description=description,
|
|
896
|
+
tags=tags,
|
|
897
|
+
metadata=metadata,
|
|
898
|
+
checksum=checksum,
|
|
899
|
+
content_type=content_type,
|
|
900
|
+
size_bytes=size_bytes,
|
|
901
|
+
)
|
|
902
|
+
|
|
903
|
+
return result
|
|
904
|
+
|
|
905
|
+
def _list_files(
|
|
906
|
+
self, prefix: Optional[str] = None, tags: Optional[List[str]] = None
|
|
907
|
+
) -> List[Dict[str, Any]]:
|
|
908
|
+
"""
|
|
909
|
+
Internal method to list files.
|
|
870
910
|
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
from .metric import MetricsManager
|
|
911
|
+
Args:
|
|
912
|
+
prefix: Optional prefix filter
|
|
913
|
+
tags: Optional tags filter
|
|
875
914
|
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
915
|
+
Returns:
|
|
916
|
+
List of file metadata dicts
|
|
917
|
+
"""
|
|
918
|
+
files = []
|
|
919
|
+
|
|
920
|
+
if self._client:
|
|
921
|
+
# Remote mode: fetch from API
|
|
922
|
+
files = self._client.list_files(
|
|
923
|
+
experiment_id=self._experiment_id, prefix=prefix, tags=tags
|
|
924
|
+
)
|
|
925
|
+
|
|
926
|
+
if self._storage:
|
|
927
|
+
# Local mode: read from metadata file
|
|
928
|
+
files = self._storage.list_files(
|
|
929
|
+
owner=self.owner,
|
|
930
|
+
project=self.project,
|
|
931
|
+
prefix=self._folder_path,
|
|
932
|
+
path_prefix=prefix,
|
|
933
|
+
tags=tags,
|
|
934
|
+
)
|
|
935
|
+
|
|
936
|
+
return files
|
|
937
|
+
|
|
938
|
+
def _download_file(self, file_id: str, dest_path: Optional[str] = None) -> str:
|
|
939
|
+
"""
|
|
940
|
+
Internal method to download a file.
|
|
881
941
|
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
return self._metrics_manager
|
|
886
|
-
|
|
887
|
-
def _append_to_metric(
|
|
888
|
-
self,
|
|
889
|
-
name: Optional[str],
|
|
890
|
-
data: Dict[str, Any],
|
|
891
|
-
description: Optional[str],
|
|
892
|
-
tags: Optional[List[str]],
|
|
893
|
-
metadata: Optional[Dict[str, Any]]
|
|
894
|
-
) -> Dict[str, Any]:
|
|
895
|
-
"""
|
|
896
|
-
Internal method to append a single data point to a metric.
|
|
897
|
-
|
|
898
|
-
Args:
|
|
899
|
-
name: Metric name (can be None for unnamed metrics)
|
|
900
|
-
data: Data point (flexible schema)
|
|
901
|
-
description: Optional metric description
|
|
902
|
-
tags: Optional tags
|
|
903
|
-
metadata: Optional metadata
|
|
904
|
-
|
|
905
|
-
Returns:
|
|
906
|
-
Dict with metricId, index, bufferedDataPoints, chunkSize
|
|
907
|
-
"""
|
|
908
|
-
result = None
|
|
909
|
-
|
|
910
|
-
if self._client:
|
|
911
|
-
# Remote mode: append via API
|
|
912
|
-
result = self._client.append_to_metric(
|
|
913
|
-
experiment_id=self._experiment_id,
|
|
914
|
-
metric_name=name,
|
|
915
|
-
data=data,
|
|
916
|
-
description=description,
|
|
917
|
-
tags=tags,
|
|
918
|
-
metadata=metadata
|
|
919
|
-
)
|
|
942
|
+
Args:
|
|
943
|
+
file_id: File ID
|
|
944
|
+
dest_path: Optional destination path (defaults to original filename)
|
|
920
945
|
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
946
|
+
Returns:
|
|
947
|
+
Path to downloaded file
|
|
948
|
+
"""
|
|
949
|
+
if self._client:
|
|
950
|
+
# Remote mode: download from API
|
|
951
|
+
return self._client.download_file(
|
|
952
|
+
experiment_id=self._experiment_id, file_id=file_id, dest_path=dest_path
|
|
953
|
+
)
|
|
954
|
+
|
|
955
|
+
if self._storage:
|
|
956
|
+
# Local mode: copy from local storage
|
|
957
|
+
return self._storage.read_file(
|
|
958
|
+
owner=self.owner,
|
|
959
|
+
project=self.project,
|
|
960
|
+
prefix=self._folder_path,
|
|
961
|
+
file_id=file_id,
|
|
962
|
+
dest_path=dest_path,
|
|
963
|
+
)
|
|
964
|
+
|
|
965
|
+
raise RuntimeError("No client or storage configured")
|
|
966
|
+
|
|
967
|
+
def _delete_file(self, file_id: str) -> Dict[str, Any]:
|
|
968
|
+
"""
|
|
969
|
+
Internal method to delete a file.
|
|
933
970
|
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
def _append_batch_to_metric(
|
|
937
|
-
self,
|
|
938
|
-
name: Optional[str],
|
|
939
|
-
data_points: List[Dict[str, Any]],
|
|
940
|
-
description: Optional[str],
|
|
941
|
-
tags: Optional[List[str]],
|
|
942
|
-
metadata: Optional[Dict[str, Any]]
|
|
943
|
-
) -> Dict[str, Any]:
|
|
944
|
-
"""
|
|
945
|
-
Internal method to append multiple data points to a metric.
|
|
946
|
-
|
|
947
|
-
Args:
|
|
948
|
-
name: Metric name (can be None for unnamed metrics)
|
|
949
|
-
data_points: List of data points
|
|
950
|
-
description: Optional metric description
|
|
951
|
-
tags: Optional tags
|
|
952
|
-
metadata: Optional metadata
|
|
953
|
-
|
|
954
|
-
Returns:
|
|
955
|
-
Dict with metricId, startIndex, endIndex, count
|
|
956
|
-
"""
|
|
957
|
-
result = None
|
|
958
|
-
|
|
959
|
-
if self._client:
|
|
960
|
-
# Remote mode: append batch via API
|
|
961
|
-
result = self._client.append_batch_to_metric(
|
|
962
|
-
experiment_id=self._experiment_id,
|
|
963
|
-
metric_name=name,
|
|
964
|
-
data_points=data_points,
|
|
965
|
-
description=description,
|
|
966
|
-
tags=tags,
|
|
967
|
-
metadata=metadata
|
|
968
|
-
)
|
|
971
|
+
Args:
|
|
972
|
+
file_id: File ID
|
|
969
973
|
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
974
|
+
Returns:
|
|
975
|
+
Dict with id and deletedAt
|
|
976
|
+
"""
|
|
977
|
+
result = None
|
|
978
|
+
|
|
979
|
+
if self._client:
|
|
980
|
+
# Remote mode: delete via API
|
|
981
|
+
result = self._client.delete_file(
|
|
982
|
+
experiment_id=self._experiment_id, file_id=file_id
|
|
983
|
+
)
|
|
984
|
+
|
|
985
|
+
if self._storage:
|
|
986
|
+
# Local mode: soft delete in metadata
|
|
987
|
+
result = self._storage.delete_file(
|
|
988
|
+
owner=self.owner,
|
|
989
|
+
project=self.project,
|
|
990
|
+
prefix=self._folder_path,
|
|
991
|
+
file_id=file_id,
|
|
992
|
+
)
|
|
993
|
+
|
|
994
|
+
return result
|
|
995
|
+
|
|
996
|
+
def _update_file(
|
|
997
|
+
self,
|
|
998
|
+
file_id: str,
|
|
999
|
+
description: Optional[str],
|
|
1000
|
+
tags: Optional[List[str]],
|
|
1001
|
+
metadata: Optional[Dict[str, Any]],
|
|
1002
|
+
) -> Dict[str, Any]:
|
|
1003
|
+
"""
|
|
1004
|
+
Internal method to update file metadata.
|
|
981
1005
|
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
start_index: int,
|
|
988
|
-
limit: int
|
|
989
|
-
) -> Dict[str, Any]:
|
|
990
|
-
"""
|
|
991
|
-
Internal method to read data points from a metric.
|
|
992
|
-
|
|
993
|
-
Args:
|
|
994
|
-
name: Metric name
|
|
995
|
-
start_index: Starting index
|
|
996
|
-
limit: Max points to read
|
|
997
|
-
|
|
998
|
-
Returns:
|
|
999
|
-
Dict with data, startIndex, endIndex, total, hasMore
|
|
1000
|
-
"""
|
|
1001
|
-
result = None
|
|
1002
|
-
|
|
1003
|
-
if self._client:
|
|
1004
|
-
# Remote mode: read via API
|
|
1005
|
-
result = self._client.read_metric_data(
|
|
1006
|
-
experiment_id=self._experiment_id,
|
|
1007
|
-
metric_name=name,
|
|
1008
|
-
start_index=start_index,
|
|
1009
|
-
limit=limit
|
|
1010
|
-
)
|
|
1006
|
+
Args:
|
|
1007
|
+
file_id: File ID
|
|
1008
|
+
description: Optional description
|
|
1009
|
+
tags: Optional tags
|
|
1010
|
+
metadata: Optional metadata
|
|
1011
1011
|
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1012
|
+
Returns:
|
|
1013
|
+
Updated file metadata dict
|
|
1014
|
+
"""
|
|
1015
|
+
result = None
|
|
1016
|
+
|
|
1017
|
+
if self._client:
|
|
1018
|
+
# Remote mode: update via API
|
|
1019
|
+
result = self._client.update_file(
|
|
1020
|
+
experiment_id=self._experiment_id,
|
|
1021
|
+
file_id=file_id,
|
|
1022
|
+
description=description,
|
|
1023
|
+
tags=tags,
|
|
1024
|
+
metadata=metadata,
|
|
1025
|
+
)
|
|
1026
|
+
|
|
1027
|
+
if self._storage:
|
|
1028
|
+
# Local mode: update in metadata file
|
|
1029
|
+
result = self._storage.update_file_metadata(
|
|
1030
|
+
owner=self.owner,
|
|
1031
|
+
project=self.project,
|
|
1032
|
+
prefix=self._folder_path,
|
|
1033
|
+
file_id=file_id,
|
|
1034
|
+
description=description,
|
|
1035
|
+
tags=tags,
|
|
1036
|
+
metadata=metadata,
|
|
1037
|
+
)
|
|
1038
|
+
|
|
1039
|
+
return result
|
|
1040
|
+
|
|
1041
|
+
def _write_params(self, flattened_params: Dict[str, Any]) -> None:
|
|
1042
|
+
"""
|
|
1043
|
+
Internal method to write/merge parameters.
|
|
1021
1044
|
|
|
1022
|
-
|
|
1045
|
+
Args:
|
|
1046
|
+
flattened_params: Already-flattened parameter dict with dot notation
|
|
1047
|
+
"""
|
|
1048
|
+
if self._client:
|
|
1049
|
+
# Remote mode: send to API
|
|
1050
|
+
self._client.set_parameters(
|
|
1051
|
+
experiment_id=self._experiment_id, data=flattened_params
|
|
1052
|
+
)
|
|
1053
|
+
|
|
1054
|
+
if self._storage:
|
|
1055
|
+
# Local mode: write to file
|
|
1056
|
+
self._storage.write_parameters(
|
|
1057
|
+
owner=self.owner,
|
|
1058
|
+
project=self.project,
|
|
1059
|
+
prefix=self._folder_path,
|
|
1060
|
+
data=flattened_params,
|
|
1061
|
+
)
|
|
1062
|
+
|
|
1063
|
+
def _read_params(self) -> Optional[Dict[str, Any]]:
|
|
1064
|
+
"""
|
|
1065
|
+
Internal method to read parameters.
|
|
1023
1066
|
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1067
|
+
Returns:
|
|
1068
|
+
Flattened parameters dict, or None if no parameters exist
|
|
1069
|
+
"""
|
|
1070
|
+
params = None
|
|
1071
|
+
|
|
1072
|
+
if self._client:
|
|
1073
|
+
# Remote mode: fetch from API
|
|
1074
|
+
try:
|
|
1075
|
+
params = self._client.get_parameters(experiment_id=self._experiment_id)
|
|
1076
|
+
except Exception:
|
|
1077
|
+
# Parameters don't exist yet
|
|
1078
|
+
params = None
|
|
1027
1079
|
|
|
1028
|
-
|
|
1029
|
-
|
|
1080
|
+
if self._storage:
|
|
1081
|
+
# Local mode: read from file
|
|
1082
|
+
params = self._storage.read_parameters(
|
|
1083
|
+
owner=self.owner, project=self.project, prefix=self._folder_path
|
|
1084
|
+
)
|
|
1030
1085
|
|
|
1031
|
-
|
|
1032
|
-
Dict with metric stats
|
|
1033
|
-
"""
|
|
1034
|
-
result = None
|
|
1086
|
+
return params
|
|
1035
1087
|
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
metric_name=name
|
|
1041
|
-
)
|
|
1088
|
+
@property
|
|
1089
|
+
def metrics(self) -> "MetricsManager":
|
|
1090
|
+
"""
|
|
1091
|
+
Get a MetricsManager for metric operations.
|
|
1042
1092
|
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
project=self.project,
|
|
1047
|
-
experiment=self.name,
|
|
1048
|
-
metric_name=name
|
|
1049
|
-
)
|
|
1093
|
+
Supports two usage patterns:
|
|
1094
|
+
1. Named: experiment.metrics("train").log(loss=0.5, accuracy=0.9)
|
|
1095
|
+
2. Unnamed: experiment.metrics.log(epoch=epoch).flush()
|
|
1050
1096
|
|
|
1051
|
-
|
|
1097
|
+
Returns:
|
|
1098
|
+
MetricsManager instance
|
|
1052
1099
|
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
Internal method to list all metrics in experiment.
|
|
1100
|
+
Raises:
|
|
1101
|
+
RuntimeError: If experiment is not open
|
|
1056
1102
|
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
""
|
|
1060
|
-
|
|
1103
|
+
Examples:
|
|
1104
|
+
# Named metric with multi-field logging
|
|
1105
|
+
experiment.metrics("train").log(loss=0.5, accuracy=0.9)
|
|
1106
|
+
experiment.metrics("eval").log(loss=0.6, accuracy=0.85)
|
|
1107
|
+
experiment.metrics.log(epoch=epoch).flush()
|
|
1061
1108
|
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1109
|
+
# Nested dict pattern (single call for all metrics)
|
|
1110
|
+
experiment.metrics.log(
|
|
1111
|
+
epoch=100,
|
|
1112
|
+
train=dict(loss=0.142, accuracy=0.80),
|
|
1113
|
+
eval=dict(loss=0.201, accuracy=0.76)
|
|
1114
|
+
)
|
|
1065
1115
|
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
result = self._storage.list_metrics(
|
|
1069
|
-
project=self.project,
|
|
1070
|
-
experiment=self.name
|
|
1071
|
-
)
|
|
1116
|
+
# Read data
|
|
1117
|
+
data = experiment.metrics("train").read(start_index=0, limit=100)
|
|
1072
1118
|
|
|
1073
|
-
|
|
1119
|
+
# Get statistics
|
|
1120
|
+
stats = experiment.metrics("train").stats()
|
|
1121
|
+
"""
|
|
1122
|
+
from .metric import MetricsManager
|
|
1123
|
+
|
|
1124
|
+
if not self._is_open:
|
|
1125
|
+
raise RuntimeError(
|
|
1126
|
+
"Cannot use metrics on closed experiment. "
|
|
1127
|
+
"Use 'with Experiment(...).run as experiment:' or call experiment.run.start() first."
|
|
1128
|
+
)
|
|
1129
|
+
|
|
1130
|
+
# Cache the MetricsManager instance to preserve MetricBuilder cache across calls
|
|
1131
|
+
if self._metrics_manager is None:
|
|
1132
|
+
self._metrics_manager = MetricsManager(self)
|
|
1133
|
+
return self._metrics_manager
|
|
1134
|
+
|
|
1135
|
+
def _append_to_metric(
|
|
1136
|
+
self,
|
|
1137
|
+
name: Optional[str],
|
|
1138
|
+
data: Dict[str, Any],
|
|
1139
|
+
description: Optional[str],
|
|
1140
|
+
tags: Optional[List[str]],
|
|
1141
|
+
metadata: Optional[Dict[str, Any]],
|
|
1142
|
+
) -> Dict[str, Any]:
|
|
1143
|
+
"""
|
|
1144
|
+
Internal method to append a single data point to a metric.
|
|
1074
1145
|
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1146
|
+
Args:
|
|
1147
|
+
name: Metric name (can be None for unnamed metrics)
|
|
1148
|
+
data: Data point (flexible schema)
|
|
1149
|
+
description: Optional metric description
|
|
1150
|
+
tags: Optional tags
|
|
1151
|
+
metadata: Optional metadata
|
|
1079
1152
|
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1153
|
+
Returns:
|
|
1154
|
+
Dict with metricId, index, bufferedDataPoints, chunkSize
|
|
1155
|
+
"""
|
|
1156
|
+
result = None
|
|
1157
|
+
|
|
1158
|
+
if self._client:
|
|
1159
|
+
# Remote mode: append via API
|
|
1160
|
+
result = self._client.append_to_metric(
|
|
1161
|
+
experiment_id=self._experiment_id,
|
|
1162
|
+
metric_name=name,
|
|
1163
|
+
data=data,
|
|
1164
|
+
description=description,
|
|
1165
|
+
tags=tags,
|
|
1166
|
+
metadata=metadata,
|
|
1167
|
+
)
|
|
1168
|
+
|
|
1169
|
+
if self._storage:
|
|
1170
|
+
# Local mode: append to local storage
|
|
1171
|
+
result = self._storage.append_to_metric(
|
|
1172
|
+
owner=self.owner,
|
|
1173
|
+
project=self.project,
|
|
1174
|
+
prefix=self._folder_path,
|
|
1175
|
+
metric_name=name,
|
|
1176
|
+
data=data,
|
|
1177
|
+
description=description,
|
|
1178
|
+
tags=tags,
|
|
1179
|
+
metadata=metadata,
|
|
1180
|
+
)
|
|
1181
|
+
|
|
1182
|
+
return result
|
|
1183
|
+
|
|
1184
|
+
def _append_batch_to_metric(
|
|
1185
|
+
self,
|
|
1186
|
+
name: Optional[str],
|
|
1187
|
+
data_points: List[Dict[str, Any]],
|
|
1188
|
+
description: Optional[str],
|
|
1189
|
+
tags: Optional[List[str]],
|
|
1190
|
+
metadata: Optional[Dict[str, Any]],
|
|
1191
|
+
) -> Dict[str, Any]:
|
|
1192
|
+
"""
|
|
1193
|
+
Internal method to append multiple data points to a metric.
|
|
1084
1194
|
|
|
1195
|
+
Args:
|
|
1196
|
+
name: Metric name (can be None for unnamed metrics)
|
|
1197
|
+
data_points: List of data points
|
|
1198
|
+
description: Optional metric description
|
|
1199
|
+
tags: Optional tags
|
|
1200
|
+
metadata: Optional metadata
|
|
1085
1201
|
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1202
|
+
Returns:
|
|
1203
|
+
Dict with metricId, startIndex, endIndex, count
|
|
1204
|
+
"""
|
|
1205
|
+
result = None
|
|
1206
|
+
|
|
1207
|
+
if self._client:
|
|
1208
|
+
# Remote mode: append batch via API
|
|
1209
|
+
result = self._client.append_batch_to_metric(
|
|
1210
|
+
experiment_id=self._experiment_id,
|
|
1211
|
+
metric_name=name,
|
|
1212
|
+
data_points=data_points,
|
|
1213
|
+
description=description,
|
|
1214
|
+
tags=tags,
|
|
1215
|
+
metadata=metadata,
|
|
1216
|
+
)
|
|
1217
|
+
|
|
1218
|
+
if self._storage:
|
|
1219
|
+
# Local mode: append batch to local storage
|
|
1220
|
+
result = self._storage.append_batch_to_metric(
|
|
1221
|
+
owner=self.owner,
|
|
1222
|
+
project=self.project,
|
|
1223
|
+
prefix=self._folder_path,
|
|
1224
|
+
metric_name=name,
|
|
1225
|
+
data_points=data_points,
|
|
1226
|
+
description=description,
|
|
1227
|
+
tags=tags,
|
|
1228
|
+
metadata=metadata,
|
|
1229
|
+
)
|
|
1230
|
+
|
|
1231
|
+
return result
|
|
1232
|
+
|
|
1233
|
+
def _read_metric_data(
|
|
1234
|
+
self, name: str, start_index: int, limit: int
|
|
1235
|
+
) -> Dict[str, Any]:
|
|
1091
1236
|
"""
|
|
1092
|
-
|
|
1237
|
+
Internal method to read data points from a metric.
|
|
1093
1238
|
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1239
|
+
Args:
|
|
1240
|
+
name: Metric name
|
|
1241
|
+
start_index: Starting index
|
|
1242
|
+
limit: Max points to read
|
|
1243
|
+
|
|
1244
|
+
Returns:
|
|
1245
|
+
Dict with data, startIndex, endIndex, total, hasMore
|
|
1246
|
+
"""
|
|
1247
|
+
result = None
|
|
1248
|
+
|
|
1249
|
+
if self._client:
|
|
1250
|
+
# Remote mode: read via API
|
|
1251
|
+
result = self._client.read_metric_data(
|
|
1252
|
+
experiment_id=self._experiment_id,
|
|
1253
|
+
metric_name=name,
|
|
1254
|
+
start_index=start_index,
|
|
1255
|
+
limit=limit,
|
|
1256
|
+
)
|
|
1257
|
+
|
|
1258
|
+
if self._storage:
|
|
1259
|
+
# Local mode: read from local storage
|
|
1260
|
+
result = self._storage.read_metric_data(
|
|
1261
|
+
owner=self.owner,
|
|
1262
|
+
project=self.project,
|
|
1263
|
+
prefix=self._folder_path,
|
|
1264
|
+
metric_name=name,
|
|
1265
|
+
start_index=start_index,
|
|
1266
|
+
limit=limit,
|
|
1267
|
+
)
|
|
1268
|
+
|
|
1269
|
+
return result
|
|
1270
|
+
|
|
1271
|
+
def _get_metric_stats(self, name: str) -> Dict[str, Any]:
|
|
1272
|
+
"""
|
|
1273
|
+
Internal method to get metric statistics.
|
|
1274
|
+
|
|
1275
|
+
Args:
|
|
1276
|
+
name: Metric name
|
|
1277
|
+
|
|
1278
|
+
Returns:
|
|
1279
|
+
Dict with metric stats
|
|
1280
|
+
"""
|
|
1281
|
+
result = None
|
|
1282
|
+
|
|
1283
|
+
if self._client:
|
|
1284
|
+
# Remote mode: get stats via API
|
|
1285
|
+
result = self._client.get_metric_stats(
|
|
1286
|
+
experiment_id=self._experiment_id, metric_name=name
|
|
1287
|
+
)
|
|
1288
|
+
|
|
1289
|
+
if self._storage:
|
|
1290
|
+
# Local mode: get stats from local storage
|
|
1291
|
+
result = self._storage.get_metric_stats(
|
|
1292
|
+
owner=self.owner,
|
|
1293
|
+
project=self.project,
|
|
1294
|
+
prefix=self._folder_path,
|
|
1295
|
+
metric_name=name,
|
|
1296
|
+
)
|
|
1297
|
+
|
|
1298
|
+
return result
|
|
1299
|
+
|
|
1300
|
+
def _list_metrics(self) -> List[Dict[str, Any]]:
|
|
1301
|
+
"""
|
|
1302
|
+
Internal method to list all metrics in experiment.
|
|
1104
1303
|
|
|
1105
|
-
|
|
1106
|
-
|
|
1304
|
+
Returns:
|
|
1305
|
+
List of metric summaries
|
|
1107
1306
|
"""
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1307
|
+
result = None
|
|
1308
|
+
|
|
1309
|
+
if self._client:
|
|
1310
|
+
# Remote mode: list via API
|
|
1311
|
+
result = self._client.list_metrics(experiment_id=self._experiment_id)
|
|
1312
|
+
|
|
1313
|
+
if self._storage:
|
|
1314
|
+
# Local mode: list from local storage
|
|
1315
|
+
result = self._storage.list_metrics(
|
|
1316
|
+
owner=self.owner, project=self.project, prefix=self._folder_path
|
|
1317
|
+
)
|
|
1318
|
+
|
|
1319
|
+
return result or []
|
|
1320
|
+
|
|
1321
|
+
@property
|
|
1322
|
+
def id(self) -> Optional[str]:
|
|
1323
|
+
"""Get the experiment ID (only available after open in remote mode)."""
|
|
1324
|
+
return self._experiment_id
|
|
1325
|
+
|
|
1326
|
+
@property
|
|
1327
|
+
def data(self) -> Optional[Dict[str, Any]]:
|
|
1328
|
+
"""Get the full experiment data (only available after open in remote mode)."""
|
|
1329
|
+
return self._experiment_data
|
|
1330
|
+
|
|
1331
|
+
|
|
1332
|
+
def ml_dash_experiment(prefix: str, **kwargs) -> Callable:
|
|
1333
|
+
"""
|
|
1334
|
+
Decorator for wrapping functions with an ML-Dash experiment.
|
|
1335
|
+
|
|
1336
|
+
Args:
|
|
1337
|
+
prefix: Full experiment path like "owner/project/folder.../name"
|
|
1338
|
+
**kwargs: Additional arguments passed to Experiment constructor
|
|
1339
|
+
|
|
1340
|
+
Usage:
|
|
1341
|
+
@ml_dash_experiment(
|
|
1342
|
+
prefix="ge/my-project/experiments/my-experiment",
|
|
1343
|
+
dash_url="https://api.dash.ml"
|
|
1344
|
+
)
|
|
1345
|
+
def train_model():
|
|
1346
|
+
# Function code here
|
|
1347
|
+
pass
|
|
1348
|
+
|
|
1349
|
+
The decorated function will receive an 'experiment' keyword argument
|
|
1350
|
+
with the active Experiment instance.
|
|
1351
|
+
"""
|
|
1352
|
+
|
|
1353
|
+
def decorator(func: Callable) -> Callable:
|
|
1354
|
+
@functools.wraps(func)
|
|
1355
|
+
def wrapper(*args, **func_kwargs):
|
|
1356
|
+
with Experiment(prefix=prefix, **kwargs).run as experiment:
|
|
1357
|
+
# Inject experiment into function kwargs
|
|
1358
|
+
func_kwargs["experiment"] = experiment
|
|
1359
|
+
return func(*args, **func_kwargs)
|
|
1360
|
+
|
|
1361
|
+
return wrapper
|
|
1362
|
+
|
|
1363
|
+
return decorator
|