ml-dash 0.6.2rc1__py3-none-any.whl → 0.6.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ml_dash/__init__.py +36 -64
- ml_dash/auth/token_storage.py +267 -226
- ml_dash/auto_start.py +28 -15
- ml_dash/cli.py +16 -2
- ml_dash/cli_commands/api.py +174 -0
- ml_dash/cli_commands/download.py +773 -666
- ml_dash/cli_commands/list.py +164 -14
- ml_dash/cli_commands/login.py +190 -183
- ml_dash/cli_commands/profile.py +92 -0
- ml_dash/cli_commands/upload.py +1312 -1141
- ml_dash/client.py +335 -82
- ml_dash/config.py +119 -119
- ml_dash/experiment.py +1293 -1033
- ml_dash/files.py +339 -224
- ml_dash/log.py +7 -7
- ml_dash/metric.py +359 -100
- ml_dash/params.py +6 -6
- ml_dash/remote_auto_start.py +20 -17
- ml_dash/run.py +211 -65
- ml_dash/snowflake.py +173 -0
- ml_dash/storage.py +1051 -1081
- {ml_dash-0.6.2rc1.dist-info → ml_dash-0.6.4.dist-info}/METADATA +12 -14
- ml_dash-0.6.4.dist-info/RECORD +33 -0
- {ml_dash-0.6.2rc1.dist-info → ml_dash-0.6.4.dist-info}/WHEEL +1 -1
- ml_dash-0.6.2rc1.dist-info/RECORD +0 -30
- {ml_dash-0.6.2rc1.dist-info → ml_dash-0.6.4.dist-info}/entry_points.txt +0 -0
ml_dash/experiment.py
CHANGED
|
@@ -3,1161 +3,1421 @@ Experiment class for ML-Dash SDK.
|
|
|
3
3
|
|
|
4
4
|
Supports three usage styles:
|
|
5
5
|
1. Decorator: @ml_dash_experiment(...)
|
|
6
|
-
2. Context manager: with Experiment(...) as exp:
|
|
6
|
+
2. Context manager: with Experiment(...).run as exp:
|
|
7
7
|
3. Direct instantiation: exp = Experiment(...)
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
|
-
from typing import Optional, Dict, Any, List, Callable
|
|
11
|
-
from enum import Enum
|
|
12
10
|
import functools
|
|
13
|
-
from pathlib import Path
|
|
14
11
|
from datetime import datetime
|
|
12
|
+
from enum import Enum
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any, Callable, Dict, List, Optional, Union, Unpack
|
|
15
15
|
|
|
16
16
|
from .client import RemoteClient
|
|
17
|
-
from .
|
|
18
|
-
from .log import
|
|
17
|
+
from .files import BindrsBuilder, FilesAccessor
|
|
18
|
+
from .log import LogBuilder, LogLevel
|
|
19
19
|
from .params import ParametersBuilder
|
|
20
|
-
from .files import FilesAccessor, BindrsBuilder
|
|
21
20
|
from .run import RUN
|
|
21
|
+
from .storage import LocalStorage
|
|
22
22
|
|
|
23
23
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
REMOTE = "remote"
|
|
28
|
-
HYBRID = "hybrid" # Future: sync local to remote
|
|
24
|
+
def _expand_exp_template(template: str) -> str:
|
|
25
|
+
"""
|
|
26
|
+
Expand {EXP.attr} placeholders in template string.
|
|
29
27
|
|
|
28
|
+
Handles both regular attributes and property descriptors on the EXP class.
|
|
30
29
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
Lifecycle manager for experiments.
|
|
30
|
+
Args:
|
|
31
|
+
template: String containing {EXP.attr} placeholders
|
|
34
32
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
"""
|
|
33
|
+
Returns:
|
|
34
|
+
String with placeholders expanded to actual values
|
|
35
|
+
"""
|
|
36
|
+
import re
|
|
40
37
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
38
|
+
def replace_match(match):
|
|
39
|
+
attr_name = match.group(1)
|
|
40
|
+
# Get the attribute from the class __dict__, handling properties correctly
|
|
41
|
+
# EXP is a params_proto class where properties are stored in EXP.__dict__
|
|
42
|
+
attr = RUN.__dict__.get(attr_name)
|
|
43
|
+
if isinstance(attr, property):
|
|
44
|
+
# For properties, call the getter with EXP as self
|
|
45
|
+
return str(attr.fget(RUN))
|
|
46
|
+
else:
|
|
47
|
+
# For regular attributes, access via getattr
|
|
48
|
+
return str(getattr(RUN, attr_name))
|
|
44
49
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
self._experiment = experiment
|
|
50
|
+
# Match {EXP.attr_name} pattern
|
|
51
|
+
pattern = r"\{EXP\.(\w+)\}"
|
|
52
|
+
return re.sub(pattern, replace_match, template)
|
|
49
53
|
|
|
50
|
-
def start(self) -> "Experiment":
|
|
51
|
-
"""
|
|
52
|
-
Start the experiment (sets status to RUNNING).
|
|
53
54
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
"""
|
|
57
|
-
return self._experiment._open()
|
|
55
|
+
class OperationMode(Enum):
|
|
56
|
+
"""Operation mode for the experiment."""
|
|
58
57
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
58
|
+
LOCAL = "local"
|
|
59
|
+
REMOTE = "remote"
|
|
60
|
+
HYBRID = "hybrid" # Future: sync local to remote
|
|
62
61
|
|
|
63
|
-
def fail(self) -> None:
|
|
64
|
-
"""Mark experiment as failed (status: FAILED)."""
|
|
65
|
-
self._experiment._close(status="FAILED")
|
|
66
62
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
63
|
+
class RunManager:
|
|
64
|
+
"""
|
|
65
|
+
Lifecycle manager for experiments.
|
|
70
66
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
67
|
+
Supports three usage patterns:
|
|
68
|
+
1. Method calls: experiment.run.start(), experiment.run.complete()
|
|
69
|
+
2. Context manager: with Experiment(...).run as exp:
|
|
70
|
+
3. Decorator: @exp.run or @Experiment(...).run
|
|
71
|
+
"""
|
|
75
72
|
|
|
76
|
-
|
|
77
|
-
|
|
73
|
+
def __init__(self, experiment: "Experiment"):
|
|
74
|
+
"""
|
|
75
|
+
Initialize RunManager.
|
|
78
76
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
77
|
+
Args:
|
|
78
|
+
experiment: Parent Experiment instance
|
|
79
|
+
"""
|
|
80
|
+
self._experiment = experiment
|
|
83
81
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
Set the folder for this experiment before initialization.
|
|
82
|
+
def start(self) -> "Experiment":
|
|
83
|
+
"""
|
|
84
|
+
Start the experiment (sets status to RUNNING).
|
|
88
85
|
|
|
89
|
-
|
|
90
|
-
|
|
86
|
+
Returns:
|
|
87
|
+
The experiment instance for chaining
|
|
88
|
+
"""
|
|
89
|
+
return self._experiment._open()
|
|
91
90
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
91
|
+
def complete(self) -> None:
|
|
92
|
+
"""Mark experiment as completed (status: COMPLETED)."""
|
|
93
|
+
self._experiment._close(status="COMPLETED")
|
|
95
94
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
95
|
+
def fail(self) -> None:
|
|
96
|
+
"""Mark experiment as failed (status: FAILED)."""
|
|
97
|
+
self._experiment._close(status="FAILED")
|
|
99
98
|
|
|
100
|
-
|
|
101
|
-
|
|
99
|
+
def cancel(self) -> None:
|
|
100
|
+
"""Mark experiment as cancelled (status: CANCELLED)."""
|
|
101
|
+
self._experiment._close(status="CANCELLED")
|
|
102
102
|
|
|
103
|
-
|
|
104
|
-
|
|
103
|
+
@property
|
|
104
|
+
def prefix(self) -> Optional[str]:
|
|
105
|
+
"""
|
|
106
|
+
Get the current folder prefix for this experiment.
|
|
105
107
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
+
Returns:
|
|
109
|
+
Current folder prefix path or None
|
|
108
110
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
+
Example:
|
|
112
|
+
current_prefix = exp.run.prefix
|
|
113
|
+
"""
|
|
114
|
+
return self._experiment._folder_path
|
|
111
115
|
|
|
112
|
-
|
|
113
|
-
|
|
116
|
+
@prefix.setter
|
|
117
|
+
def prefix(self, value: Optional[str]) -> None:
|
|
118
|
+
"""
|
|
119
|
+
Set the folder prefix for this experiment before initialization.
|
|
114
120
|
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
dxp.params.set(lr=0.001)
|
|
118
|
-
"""
|
|
119
|
-
if self._experiment._is_open:
|
|
120
|
-
raise RuntimeError(
|
|
121
|
-
"Cannot change folder after experiment is initialized. "
|
|
122
|
-
"Set folder before calling start() or entering 'with' block."
|
|
123
|
-
)
|
|
121
|
+
This can ONLY be set before the experiment is started (initialized).
|
|
122
|
+
Once the experiment is opened, the prefix cannot be changed.
|
|
124
123
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
self._experiment._folder_template = value
|
|
129
|
-
else:
|
|
130
|
-
# Static folder - set directly
|
|
131
|
-
self._experiment.folder = value
|
|
124
|
+
Supports template variables:
|
|
125
|
+
- {EXP.name} - Experiment name
|
|
126
|
+
- {EXP.id} - Experiment ID
|
|
132
127
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
128
|
+
Args:
|
|
129
|
+
value: Folder prefix path with optional template variables
|
|
130
|
+
(e.g., "ge/myproject/{EXP.name}" or None)
|
|
136
131
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
if exc_type is not None:
|
|
140
|
-
self.fail()
|
|
141
|
-
else:
|
|
142
|
-
self.complete()
|
|
143
|
-
return False
|
|
132
|
+
Raises:
|
|
133
|
+
RuntimeError: If experiment is already initialized/open
|
|
144
134
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
Decorator support for wrapping functions with experiment lifecycle.
|
|
135
|
+
Examples:
|
|
136
|
+
from ml_dash import dxp
|
|
148
137
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
def train(exp):
|
|
152
|
-
exp.log("Training...")
|
|
153
|
-
"""
|
|
154
|
-
@functools.wraps(func)
|
|
155
|
-
def wrapper(*args, **kwargs):
|
|
156
|
-
with self as exp:
|
|
157
|
-
return func(exp, *args, **kwargs)
|
|
158
|
-
return wrapper
|
|
138
|
+
# Static folder
|
|
139
|
+
dxp.run.prefix = "ge/myproject/experiments/resnet"
|
|
159
140
|
|
|
141
|
+
# Template with experiment name
|
|
142
|
+
dxp.run.prefix = "ge/iclr_2024/{EXP.name}"
|
|
160
143
|
|
|
161
|
-
|
|
144
|
+
# Now start the experiment
|
|
145
|
+
with dxp.run:
|
|
146
|
+
dxp.params.set(lr=0.001)
|
|
162
147
|
"""
|
|
163
|
-
|
|
148
|
+
if self._experiment._is_open:
|
|
149
|
+
raise RuntimeError(
|
|
150
|
+
"Cannot change prefix after experiment is initialized. "
|
|
151
|
+
"Set prefix before calling start() or entering 'with' block."
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
if value:
|
|
155
|
+
# Sync EXP with this experiment's values
|
|
156
|
+
RUN.name = self._experiment.name
|
|
157
|
+
RUN.description = self._experiment.description
|
|
158
|
+
# Generate id/timestamp if not already set
|
|
159
|
+
if RUN.id is None:
|
|
160
|
+
RUN._init_run()
|
|
161
|
+
# Format with EXP - use helper to expand properties correctly
|
|
162
|
+
value = _expand_exp_template(value)
|
|
163
|
+
|
|
164
|
+
# Update the folder on the experiment
|
|
165
|
+
self._experiment._folder_path = value
|
|
166
|
+
|
|
167
|
+
def __enter__(self) -> "Experiment":
|
|
168
|
+
"""Context manager entry - starts the experiment."""
|
|
169
|
+
return self.start()
|
|
170
|
+
|
|
171
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
172
|
+
"""Context manager exit - completes or fails the experiment."""
|
|
173
|
+
if exc_type is not None:
|
|
174
|
+
self.fail()
|
|
175
|
+
else:
|
|
176
|
+
self.complete()
|
|
177
|
+
return False
|
|
178
|
+
|
|
179
|
+
def __call__(self, func: Callable) -> Callable:
|
|
180
|
+
"""
|
|
181
|
+
Decorator support for wrapping functions with experiment lifecycle.
|
|
164
182
|
|
|
165
|
-
Usage
|
|
183
|
+
Usage:
|
|
184
|
+
@exp.run
|
|
185
|
+
def train(exp):
|
|
186
|
+
exp.log("Training...")
|
|
187
|
+
"""
|
|
166
188
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
remote="https://api.dash.ml",
|
|
172
|
-
api_key="your-jwt-token"
|
|
173
|
-
)
|
|
189
|
+
@functools.wraps(func)
|
|
190
|
+
def wrapper(*args, **kwargs):
|
|
191
|
+
with self as exp:
|
|
192
|
+
return func(exp, *args, **kwargs)
|
|
174
193
|
|
|
175
|
-
|
|
176
|
-
experiment = Experiment(
|
|
177
|
-
name="my-experiment",
|
|
178
|
-
project="my-project",
|
|
179
|
-
local_path=".ml-dash"
|
|
180
|
-
)
|
|
194
|
+
return wrapper
|
|
181
195
|
|
|
182
|
-
# Context manager
|
|
183
|
-
with Experiment(...) as exp:
|
|
184
|
-
exp.log(...)
|
|
185
196
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
197
|
+
class Experiment:
|
|
198
|
+
"""
|
|
199
|
+
ML-Dash experiment for metricing experiments.
|
|
200
|
+
|
|
201
|
+
Prefix format: {owner}/{project}/path.../[name]
|
|
202
|
+
- owner: First segment (e.g., your username)
|
|
203
|
+
- project: Second segment (e.g., project name)
|
|
204
|
+
- path: Remaining segments form the folder structure
|
|
205
|
+
- name: Derived from last segment (may be a seed/id)
|
|
206
|
+
|
|
207
|
+
Usage examples:
|
|
208
|
+
|
|
209
|
+
# Local mode (default)
|
|
210
|
+
experiment = Experiment(prefix="ge/my-project/experiments/exp1")
|
|
211
|
+
|
|
212
|
+
# Custom local storage directory
|
|
213
|
+
experiment = Experiment(
|
|
214
|
+
prefix="ge/my-project/experiments/exp1",
|
|
215
|
+
dash_root=".dash"
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
# Remote mode with custom server
|
|
219
|
+
experiment = Experiment(
|
|
220
|
+
prefix="ge/my-project/experiments/exp1",
|
|
221
|
+
dash_url="https://custom-server.com"
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
# Context manager
|
|
225
|
+
with Experiment(prefix="ge/my-project/exp1").run as exp:
|
|
226
|
+
exp.logs.info("Training started")
|
|
227
|
+
|
|
228
|
+
# Decorator
|
|
229
|
+
@ml_dash_experiment(prefix="ge/ws/experiments/exp", dash_url="https://api.dash.ml")
|
|
230
|
+
def train():
|
|
231
|
+
...
|
|
232
|
+
"""
|
|
233
|
+
|
|
234
|
+
def __init__(
|
|
235
|
+
self,
|
|
236
|
+
prefix: Optional[str] = None,
|
|
237
|
+
*,
|
|
238
|
+
readme: Optional[str] = None,
|
|
239
|
+
# Ge: this is an instance only property
|
|
240
|
+
tags: Optional[List[str]] = None,
|
|
241
|
+
# Ge: Bindrs is an instance-only property, it is not set inside the RUN namespace.
|
|
242
|
+
bindrs: Optional[List[str]] = None,
|
|
243
|
+
# Ge: This is also instance-only
|
|
244
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
245
|
+
# Mode configuration
|
|
246
|
+
dash_url: Optional[Union[str, bool]] = None,
|
|
247
|
+
dash_root: Optional[str] = ".dash",
|
|
248
|
+
# Deprecated parameters (for backward compatibility)
|
|
249
|
+
remote: Optional[Union[str, bool]] = None,
|
|
250
|
+
local_path: Optional[str] = None,
|
|
251
|
+
# Internal parameters
|
|
252
|
+
_write_protected: bool = False,
|
|
253
|
+
# The rest of the params go directly to populate the RUN object.
|
|
254
|
+
**run_params: Unpack[RUN],
|
|
255
|
+
):
|
|
190
256
|
"""
|
|
257
|
+
Initialize an ML-Dash experiment.
|
|
258
|
+
|
|
259
|
+
Args:
|
|
260
|
+
prefix: Full experiment path like "owner/project/folder.../name" (defaults to DASH_PREFIX env var).
|
|
261
|
+
Format: {owner}/{project}/path.../[name]
|
|
262
|
+
- owner: First segment (e.g., username)
|
|
263
|
+
- project: Second segment (e.g., project name)
|
|
264
|
+
- path: Remaining segments form the folder path
|
|
265
|
+
- name: Derived from last segment (may be a seed/id, not always meaningful)
|
|
266
|
+
readme: Optional experiment readme/description
|
|
267
|
+
tags: Optional list of tags
|
|
268
|
+
bindrs: Optional list of bindrs
|
|
269
|
+
metadata: Optional metadata dict
|
|
270
|
+
dash_url: Remote API URL. True=use EXP.API_URL, str=custom URL, None=no remote. Token auto-loaded from ~/.dash/token.enc
|
|
271
|
+
dash_root: Local storage root path (defaults to ".dash"). Set to None for remote-only mode.
|
|
272
|
+
remote: (Deprecated) Use dash_url instead
|
|
273
|
+
local_path: (Deprecated) Use dash_root instead
|
|
274
|
+
_write_protected: Internal parameter - if True, experiment becomes immutable after creation
|
|
275
|
+
|
|
276
|
+
Mode Selection:
|
|
277
|
+
- Default (no dash_url): Local-only mode (writes to ".dash/")
|
|
278
|
+
- dash_url + dash_root: Hybrid mode (local + remote)
|
|
279
|
+
- dash_url + dash_root=None: Remote-only mode
|
|
280
|
+
"""
|
|
281
|
+
import os
|
|
282
|
+
import warnings
|
|
283
|
+
|
|
284
|
+
# Handle backward compatibility
|
|
285
|
+
if remote is not None:
|
|
286
|
+
warnings.warn(
|
|
287
|
+
"Parameter 'remote' is deprecated. Use 'dash_url' instead.",
|
|
288
|
+
DeprecationWarning,
|
|
289
|
+
stacklevel=2
|
|
290
|
+
)
|
|
291
|
+
if dash_url is None:
|
|
292
|
+
dash_url = remote
|
|
293
|
+
|
|
294
|
+
if local_path is not None:
|
|
295
|
+
warnings.warn(
|
|
296
|
+
"Parameter 'local_path' is deprecated. Use 'dash_root' instead.",
|
|
297
|
+
DeprecationWarning,
|
|
298
|
+
stacklevel=2
|
|
299
|
+
)
|
|
300
|
+
if dash_root == ".dash": # Only override if dash_root is default
|
|
301
|
+
dash_root = local_path
|
|
302
|
+
|
|
303
|
+
# Resolve prefix from environment variable if not provided
|
|
304
|
+
self._folder_path = prefix or os.getenv("DASH_PREFIX")
|
|
305
|
+
|
|
306
|
+
if not self._folder_path:
|
|
307
|
+
raise ValueError("prefix (or DASH_PREFIX env var) must be provided")
|
|
308
|
+
|
|
309
|
+
# Parse prefix: {owner}/{project}/path.../[name]
|
|
310
|
+
parts = self._folder_path.strip("/").split("/")
|
|
311
|
+
if len(parts) < 2:
|
|
312
|
+
raise ValueError(
|
|
313
|
+
f"prefix must have at least owner/project: got '{self._folder_path}'"
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
self.owner = parts[0]
|
|
317
|
+
self.project = parts[1]
|
|
318
|
+
# Name is the last segment (may be a seed/id, not always a meaningful name)
|
|
319
|
+
self.name = parts[-1] if len(parts) > 2 else parts[1]
|
|
320
|
+
|
|
321
|
+
self.readme = readme
|
|
322
|
+
self.tags = tags
|
|
323
|
+
self._bindrs_list = bindrs
|
|
324
|
+
self._write_protected = _write_protected
|
|
325
|
+
self.metadata = metadata
|
|
326
|
+
|
|
327
|
+
# Initialize RUN with experiment values
|
|
328
|
+
RUN.name = self.name
|
|
329
|
+
if readme:
|
|
330
|
+
RUN.readme = readme
|
|
331
|
+
|
|
332
|
+
# Determine operation mode
|
|
333
|
+
# dash_root defaults to ".dash", dash_url defaults to None
|
|
334
|
+
if dash_url and dash_root:
|
|
335
|
+
self.mode = OperationMode.HYBRID
|
|
336
|
+
elif dash_url:
|
|
337
|
+
self.mode = OperationMode.REMOTE
|
|
338
|
+
else:
|
|
339
|
+
self.mode = OperationMode.LOCAL
|
|
340
|
+
|
|
341
|
+
# Initialize backend
|
|
342
|
+
self._client: Optional[RemoteClient] = None
|
|
343
|
+
self._storage: Optional[LocalStorage] = None
|
|
344
|
+
self._experiment_id: Optional[str] = None
|
|
345
|
+
self._experiment_data: Optional[Dict[str, Any]] = None
|
|
346
|
+
self._is_open = False
|
|
347
|
+
self._metrics_manager: Optional["MetricsManager"] = None # Cached metrics manager
|
|
348
|
+
|
|
349
|
+
if self.mode in (OperationMode.REMOTE, OperationMode.HYBRID):
|
|
350
|
+
# RemoteClient will auto-load token from ~/.dash/token.enc
|
|
351
|
+
# Use RUN.api_url if dash_url=True (boolean), otherwise use the provided URL
|
|
352
|
+
api_url = RUN.api_url if dash_url is True else dash_url
|
|
353
|
+
self._client = RemoteClient(base_url=api_url, namespace=self.owner)
|
|
354
|
+
|
|
355
|
+
if self.mode in (OperationMode.LOCAL, OperationMode.HYBRID):
|
|
356
|
+
self._storage = LocalStorage(root_path=Path(dash_root))
|
|
357
|
+
|
|
358
|
+
def _open(self) -> "Experiment":
|
|
359
|
+
"""
|
|
360
|
+
Internal method to open the experiment (create or update on server/filesystem).
|
|
191
361
|
|
|
192
|
-
|
|
193
|
-
self
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
self._experiment_id: Optional[str] = None
|
|
250
|
-
self._experiment_data: Optional[Dict[str, Any]] = None
|
|
251
|
-
self._is_open = False
|
|
252
|
-
self._metrics_manager: Optional['MetricsManager'] = None # Cached metrics manager
|
|
253
|
-
self._folder_template: Optional[str] = None # Template for folder path
|
|
254
|
-
|
|
255
|
-
if self.mode in (OperationMode.REMOTE, OperationMode.HYBRID):
|
|
256
|
-
# api_key can be None - RemoteClient will auto-load from storage
|
|
257
|
-
self._client = RemoteClient(base_url=remote, api_key=api_key)
|
|
258
|
-
|
|
259
|
-
if self.mode in (OperationMode.LOCAL, OperationMode.HYBRID):
|
|
260
|
-
if not local_path:
|
|
261
|
-
raise ValueError("local_path is required for local mode")
|
|
262
|
-
self._storage = LocalStorage(root_path=Path(local_path))
|
|
263
|
-
|
|
264
|
-
def _open(self) -> "Experiment":
|
|
265
|
-
"""
|
|
266
|
-
Internal method to open the experiment (create or update on server/filesystem).
|
|
267
|
-
|
|
268
|
-
Returns:
|
|
269
|
-
self for chaining
|
|
270
|
-
"""
|
|
271
|
-
if self._is_open:
|
|
272
|
-
return self
|
|
273
|
-
|
|
274
|
-
# Initialize RUN with experiment values
|
|
275
|
-
RUN.name = self.name
|
|
276
|
-
RUN.project = self.project
|
|
277
|
-
RUN.description = self.description
|
|
278
|
-
RUN._init_run() # Generate id and timestamp
|
|
279
|
-
|
|
280
|
-
# Format folder template if present
|
|
281
|
-
if self._folder_template:
|
|
282
|
-
self.folder = RUN._format(self._folder_template)
|
|
283
|
-
|
|
284
|
-
if self._client:
|
|
285
|
-
# Remote mode: create/update experiment via API
|
|
286
|
-
response = self._client.create_or_update_experiment(
|
|
287
|
-
project=self.project,
|
|
288
|
-
name=self.name,
|
|
289
|
-
description=self.description,
|
|
290
|
-
tags=self.tags,
|
|
291
|
-
bindrs=self._bindrs_list,
|
|
292
|
-
folder=self.folder,
|
|
293
|
-
write_protected=self._write_protected,
|
|
294
|
-
metadata=self.metadata,
|
|
295
|
-
)
|
|
296
|
-
self._experiment_data = response
|
|
297
|
-
self._experiment_id = response["experiment"]["id"]
|
|
298
|
-
|
|
299
|
-
if self._storage:
|
|
300
|
-
# Local mode: create experiment directory structure
|
|
301
|
-
self._storage.create_experiment(
|
|
302
|
-
project=self.project,
|
|
303
|
-
name=self.name,
|
|
304
|
-
description=self.description,
|
|
305
|
-
tags=self.tags,
|
|
306
|
-
bindrs=self._bindrs_list,
|
|
307
|
-
folder=self.folder,
|
|
308
|
-
metadata=self.metadata,
|
|
362
|
+
Returns:
|
|
363
|
+
self for chaining
|
|
364
|
+
"""
|
|
365
|
+
if self._is_open:
|
|
366
|
+
return self
|
|
367
|
+
|
|
368
|
+
if self._client:
|
|
369
|
+
# Remote mode: create/update experiment via API
|
|
370
|
+
try:
|
|
371
|
+
response = self._client.create_or_update_experiment(
|
|
372
|
+
project=self.project,
|
|
373
|
+
name=self.name,
|
|
374
|
+
description=self.readme,
|
|
375
|
+
tags=self.tags,
|
|
376
|
+
bindrs=self._bindrs_list,
|
|
377
|
+
prefix=self._folder_path,
|
|
378
|
+
write_protected=self._write_protected,
|
|
379
|
+
metadata=self.metadata,
|
|
380
|
+
)
|
|
381
|
+
self._experiment_data = response
|
|
382
|
+
self._experiment_id = response["experiment"]["id"]
|
|
383
|
+
|
|
384
|
+
# Display message about viewing data online
|
|
385
|
+
try:
|
|
386
|
+
from rich.console import Console
|
|
387
|
+
|
|
388
|
+
console = Console()
|
|
389
|
+
console.print(
|
|
390
|
+
f"[dim]✓ Experiment started: [bold]{self.name}[/bold] (project: {self.project})[/dim]\n"
|
|
391
|
+
f"[dim]View your data, statistics, and plots online at:[/dim] "
|
|
392
|
+
f"[link=https://dash.ml]https://dash.ml[/link]"
|
|
393
|
+
)
|
|
394
|
+
except ImportError:
|
|
395
|
+
# Fallback if rich is not available
|
|
396
|
+
print(f"✓ Experiment started: {self.name} (project: {self.project})")
|
|
397
|
+
print("View your data at: https://dash.ml")
|
|
398
|
+
|
|
399
|
+
except Exception as e:
|
|
400
|
+
# Check if it's an authentication error
|
|
401
|
+
from .auth.exceptions import AuthenticationError
|
|
402
|
+
|
|
403
|
+
if isinstance(e, AuthenticationError):
|
|
404
|
+
try:
|
|
405
|
+
from rich.console import Console
|
|
406
|
+
from rich.panel import Panel
|
|
407
|
+
|
|
408
|
+
console = Console()
|
|
409
|
+
|
|
410
|
+
message = (
|
|
411
|
+
"[bold red]Authentication Required[/bold red]\n\n"
|
|
412
|
+
"You need to authenticate before using remote experiments.\n\n"
|
|
413
|
+
"[bold]To authenticate:[/bold]\n"
|
|
414
|
+
" [cyan]ml-dash login[/cyan]\n\n"
|
|
415
|
+
"[dim]This will open your browser for secure OAuth2 authentication.\n"
|
|
416
|
+
"Your token will be stored securely in your system keychain.[/dim]\n\n"
|
|
417
|
+
"[bold]Alternative:[/bold]\n"
|
|
418
|
+
" Use [cyan]local_path[/cyan] instead of [cyan]remote[/cyan] for offline experiments"
|
|
309
419
|
)
|
|
310
420
|
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
Internal method to close the experiment and update status.
|
|
317
|
-
|
|
318
|
-
Args:
|
|
319
|
-
status: Status to set - "COMPLETED" (default), "FAILED", or "CANCELLED"
|
|
320
|
-
"""
|
|
321
|
-
if not self._is_open:
|
|
322
|
-
return
|
|
323
|
-
|
|
324
|
-
# Flush any pending writes
|
|
325
|
-
if self._storage:
|
|
326
|
-
self._storage.flush()
|
|
327
|
-
|
|
328
|
-
# Update experiment status in remote mode
|
|
329
|
-
if self._client and self._experiment_id:
|
|
330
|
-
try:
|
|
331
|
-
self._client.update_experiment_status(
|
|
332
|
-
experiment_id=self._experiment_id,
|
|
333
|
-
status=status
|
|
334
|
-
)
|
|
335
|
-
except Exception as e:
|
|
336
|
-
# Log error but don't fail the close operation
|
|
337
|
-
print(f"Warning: Failed to update experiment status: {e}")
|
|
338
|
-
|
|
339
|
-
self._is_open = False
|
|
340
|
-
|
|
341
|
-
# Reset RUN for next experiment
|
|
342
|
-
RUN._reset()
|
|
343
|
-
|
|
344
|
-
@property
|
|
345
|
-
def run(self) -> RunManager:
|
|
346
|
-
"""
|
|
347
|
-
Get the RunManager for lifecycle operations.
|
|
348
|
-
|
|
349
|
-
Usage:
|
|
350
|
-
# Method calls
|
|
351
|
-
experiment.run.start()
|
|
352
|
-
experiment.run.complete()
|
|
353
|
-
|
|
354
|
-
# Context manager
|
|
355
|
-
with Experiment(...).run as exp:
|
|
356
|
-
exp.log("Training...")
|
|
357
|
-
|
|
358
|
-
# Decorator
|
|
359
|
-
@experiment.run
|
|
360
|
-
def train(exp):
|
|
361
|
-
exp.log("Training...")
|
|
362
|
-
|
|
363
|
-
Returns:
|
|
364
|
-
RunManager instance
|
|
365
|
-
"""
|
|
366
|
-
return RunManager(self)
|
|
367
|
-
|
|
368
|
-
@property
|
|
369
|
-
def params(self) -> ParametersBuilder:
|
|
370
|
-
"""
|
|
371
|
-
Get a ParametersBuilder for parameter operations.
|
|
372
|
-
|
|
373
|
-
Usage:
|
|
374
|
-
# Set parameters
|
|
375
|
-
experiment.params.set(lr=0.001, batch_size=32)
|
|
376
|
-
|
|
377
|
-
# Get parameters
|
|
378
|
-
params = experiment.params.get()
|
|
379
|
-
|
|
380
|
-
Returns:
|
|
381
|
-
ParametersBuilder instance
|
|
382
|
-
|
|
383
|
-
Raises:
|
|
384
|
-
RuntimeError: If experiment is not open
|
|
385
|
-
"""
|
|
386
|
-
if not self._is_open:
|
|
387
|
-
raise RuntimeError(
|
|
388
|
-
"Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
|
|
389
|
-
"Example:\n"
|
|
390
|
-
" with dxp.run:\n"
|
|
391
|
-
" dxp.params.set(lr=0.001)"
|
|
421
|
+
panel = Panel(
|
|
422
|
+
message,
|
|
423
|
+
title="[bold yellow]⚠ Not Authenticated[/bold yellow]",
|
|
424
|
+
border_style="yellow",
|
|
425
|
+
expand=False,
|
|
392
426
|
)
|
|
427
|
+
console.print("\n")
|
|
428
|
+
console.print(panel)
|
|
429
|
+
console.print("\n")
|
|
430
|
+
except ImportError:
|
|
431
|
+
# Fallback if rich is not available
|
|
432
|
+
print("\n" + "=" * 60)
|
|
433
|
+
print("⚠ Authentication Required")
|
|
434
|
+
print("=" * 60)
|
|
435
|
+
print("\nYou need to authenticate before using remote experiments.\n")
|
|
436
|
+
print("To authenticate:")
|
|
437
|
+
print(" ml-dash login\n")
|
|
438
|
+
print("Alternative:")
|
|
439
|
+
print(" Use local_path instead of remote for offline experiments\n")
|
|
440
|
+
print("=" * 60 + "\n")
|
|
441
|
+
|
|
442
|
+
import sys
|
|
443
|
+
|
|
444
|
+
sys.exit(1)
|
|
445
|
+
else:
|
|
446
|
+
# Re-raise other exceptions
|
|
447
|
+
raise
|
|
448
|
+
|
|
449
|
+
if self._storage:
|
|
450
|
+
# Local mode: create experiment directory structure
|
|
451
|
+
self._storage.create_experiment(
|
|
452
|
+
owner=self.owner,
|
|
453
|
+
project=self.project,
|
|
454
|
+
prefix=self._folder_path,
|
|
455
|
+
description=self.readme,
|
|
456
|
+
tags=self.tags,
|
|
457
|
+
bindrs=self._bindrs_list,
|
|
458
|
+
metadata=self.metadata,
|
|
459
|
+
)
|
|
460
|
+
|
|
461
|
+
self._is_open = True
|
|
462
|
+
return self
|
|
463
|
+
|
|
464
|
+
def _close(self, status: str = "COMPLETED"):
|
|
465
|
+
"""
|
|
466
|
+
Internal method to close the experiment and update status.
|
|
393
467
|
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
Examples:
|
|
412
|
-
experiment.log(metadata={"epoch": 1}).info("Training started")
|
|
413
|
-
experiment.log().error("Failed", error_code=500)
|
|
414
|
-
|
|
415
|
-
2. Traditional style (message provided):
|
|
416
|
-
Writes the log immediately and returns None.
|
|
417
|
-
|
|
418
|
-
Examples:
|
|
419
|
-
experiment.log("Training started", level="info", epoch=1)
|
|
420
|
-
experiment.log("Training started") # Defaults to "info"
|
|
421
|
-
|
|
422
|
-
Args:
|
|
423
|
-
message: Optional log message (for traditional style)
|
|
424
|
-
level: Optional log level (for traditional style, defaults to "info")
|
|
425
|
-
metadata: Optional metadata dict
|
|
426
|
-
**extra_metadata: Additional metadata as keyword arguments
|
|
427
|
-
|
|
428
|
-
Returns:
|
|
429
|
-
LogBuilder if no message provided (fluent mode)
|
|
430
|
-
None if log was written directly (traditional mode)
|
|
431
|
-
|
|
432
|
-
Raises:
|
|
433
|
-
RuntimeError: If experiment is not open
|
|
434
|
-
ValueError: If log level is invalid
|
|
435
|
-
"""
|
|
436
|
-
if not self._is_open:
|
|
437
|
-
raise RuntimeError(
|
|
438
|
-
"Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
|
|
439
|
-
"Example:\n"
|
|
440
|
-
" with dxp.run:\n"
|
|
441
|
-
" dxp.log().info('Training started')"
|
|
442
|
-
)
|
|
468
|
+
Args:
|
|
469
|
+
status: Status to set - "COMPLETED" (default), "FAILED", or "CANCELLED"
|
|
470
|
+
"""
|
|
471
|
+
if not self._is_open:
|
|
472
|
+
return
|
|
473
|
+
|
|
474
|
+
# Flush any pending writes
|
|
475
|
+
if self._storage:
|
|
476
|
+
self._storage.flush()
|
|
477
|
+
|
|
478
|
+
# Update experiment status in remote mode
|
|
479
|
+
if self._client and self._experiment_id:
|
|
480
|
+
try:
|
|
481
|
+
self._client.update_experiment_status(
|
|
482
|
+
experiment_id=self._experiment_id, status=status
|
|
483
|
+
)
|
|
443
484
|
|
|
444
|
-
#
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
return LogBuilder(self, combined_metadata if combined_metadata else None)
|
|
448
|
-
|
|
449
|
-
# Traditional mode: write immediately
|
|
450
|
-
level = level or LogLevel.INFO.value # Default to "info"
|
|
451
|
-
level = LogLevel.validate(level) # Validate level
|
|
452
|
-
|
|
453
|
-
combined_metadata = {**(metadata or {}), **extra_metadata}
|
|
454
|
-
self._write_log(
|
|
455
|
-
message=message,
|
|
456
|
-
level=level,
|
|
457
|
-
metadata=combined_metadata if combined_metadata else None,
|
|
458
|
-
timestamp=None
|
|
485
|
+
# Display completion message with link to view results
|
|
486
|
+
status_emoji = {"COMPLETED": "✓", "FAILED": "✗", "CANCELLED": "⊘"}.get(
|
|
487
|
+
status, "•"
|
|
459
488
|
)
|
|
460
|
-
return None
|
|
461
|
-
|
|
462
|
-
def _write_log(
|
|
463
|
-
self,
|
|
464
|
-
message: str,
|
|
465
|
-
level: str,
|
|
466
|
-
metadata: Optional[Dict[str, Any]],
|
|
467
|
-
timestamp: Optional[datetime]
|
|
468
|
-
) -> None:
|
|
469
|
-
"""
|
|
470
|
-
Internal method to write a log entry immediately.
|
|
471
|
-
No buffering - writes directly to storage/remote AND stdout/stderr.
|
|
472
|
-
|
|
473
|
-
Args:
|
|
474
|
-
message: Log message
|
|
475
|
-
level: Log level (already validated)
|
|
476
|
-
metadata: Optional metadata dict
|
|
477
|
-
timestamp: Optional custom timestamp (defaults to now)
|
|
478
|
-
"""
|
|
479
|
-
log_entry = {
|
|
480
|
-
"timestamp": (timestamp or datetime.utcnow()).isoformat() + "Z",
|
|
481
|
-
"level": level,
|
|
482
|
-
"message": message,
|
|
483
|
-
}
|
|
484
|
-
|
|
485
|
-
if metadata:
|
|
486
|
-
log_entry["metadata"] = metadata
|
|
487
|
-
|
|
488
|
-
# Mirror to stdout/stderr before writing to storage
|
|
489
|
-
self._print_log(message, level, metadata)
|
|
490
|
-
|
|
491
|
-
# Write immediately (no buffering)
|
|
492
|
-
if self._client:
|
|
493
|
-
# Remote mode: send to API (wrapped in array for batch API)
|
|
494
|
-
self._client.create_log_entries(
|
|
495
|
-
experiment_id=self._experiment_id,
|
|
496
|
-
logs=[log_entry] # Single log in array
|
|
497
|
-
)
|
|
498
489
|
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
490
|
+
status_color = {
|
|
491
|
+
"COMPLETED": "green",
|
|
492
|
+
"FAILED": "red",
|
|
493
|
+
"CANCELLED": "yellow",
|
|
494
|
+
}.get(status, "white")
|
|
495
|
+
|
|
496
|
+
try:
|
|
497
|
+
from rich.console import Console
|
|
498
|
+
|
|
499
|
+
console = Console()
|
|
500
|
+
console.print(
|
|
501
|
+
f"[{status_color}]{status_emoji} Experiment {status.lower()}: "
|
|
502
|
+
f"[bold]{self.name}[/bold] (project: {self.project})[/{status_color}]\n"
|
|
503
|
+
f"[dim]View results, statistics, and plots online at:[/dim] "
|
|
504
|
+
f"[link=https://dash.ml]https://dash.ml[/link]"
|
|
505
|
+
)
|
|
506
|
+
except ImportError:
|
|
507
|
+
# Fallback if rich is not available
|
|
508
|
+
print(
|
|
509
|
+
f"{status_emoji} Experiment {status.lower()}: {self.name} (project: {self.project})"
|
|
510
|
+
)
|
|
511
|
+
print("View results at: https://dash.ml")
|
|
512
|
+
|
|
513
|
+
except Exception as e:
|
|
514
|
+
# Log error but don't fail the close operation
|
|
515
|
+
print(f"Warning: Failed to update experiment status: {e}")
|
|
516
|
+
|
|
517
|
+
self._is_open = False
|
|
518
|
+
|
|
519
|
+
# Reset RUN for next experiment
|
|
520
|
+
# TODO: RUN._reset() - method doesn't exist
|
|
521
|
+
# RUN._reset()
|
|
522
|
+
|
|
523
|
+
@property
|
|
524
|
+
def run(self) -> RunManager:
|
|
525
|
+
"""
|
|
526
|
+
Get the RunManager for lifecycle operations.
|
|
510
527
|
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
metadata: Optional[Dict[str, Any]]
|
|
516
|
-
) -> None:
|
|
517
|
-
"""
|
|
518
|
-
Print log to stdout or stderr based on level.
|
|
519
|
-
|
|
520
|
-
ERROR and FATAL go to stderr, all others go to stdout.
|
|
521
|
-
|
|
522
|
-
Args:
|
|
523
|
-
message: Log message
|
|
524
|
-
level: Log level
|
|
525
|
-
metadata: Optional metadata dict
|
|
526
|
-
"""
|
|
527
|
-
import sys
|
|
528
|
-
|
|
529
|
-
# Format the log message
|
|
530
|
-
level_upper = level.upper()
|
|
531
|
-
|
|
532
|
-
# Build metadata string if present
|
|
533
|
-
metadata_str = ""
|
|
534
|
-
if metadata:
|
|
535
|
-
# Format metadata as key=value pairs
|
|
536
|
-
pairs = [f"{k}={v}" for k, v in metadata.items()]
|
|
537
|
-
metadata_str = f" [{', '.join(pairs)}]"
|
|
538
|
-
|
|
539
|
-
# Format: [LEVEL] message [key=value, ...]
|
|
540
|
-
formatted_message = f"[{level_upper}] {message}{metadata_str}"
|
|
541
|
-
|
|
542
|
-
# Route to stdout or stderr based on level
|
|
543
|
-
if level in ("error", "fatal"):
|
|
544
|
-
print(formatted_message, file=sys.stderr)
|
|
545
|
-
else:
|
|
546
|
-
print(formatted_message, file=sys.stdout)
|
|
547
|
-
|
|
548
|
-
@property
|
|
549
|
-
def files(self) -> FilesAccessor:
|
|
550
|
-
"""
|
|
551
|
-
Get a FilesAccessor for fluent file operations.
|
|
552
|
-
|
|
553
|
-
Returns:
|
|
554
|
-
FilesAccessor instance for chaining
|
|
555
|
-
|
|
556
|
-
Raises:
|
|
557
|
-
RuntimeError: If experiment is not open
|
|
558
|
-
|
|
559
|
-
Examples:
|
|
560
|
-
# Upload file
|
|
561
|
-
experiment.files("checkpoints").save(net, to="checkpoint.pt")
|
|
562
|
-
|
|
563
|
-
# List files
|
|
564
|
-
files = experiment.files("/some/location").list()
|
|
565
|
-
files = experiment.files("/models").list()
|
|
566
|
-
|
|
567
|
-
# Download file
|
|
568
|
-
experiment.files("some.text").download()
|
|
569
|
-
experiment.files("some.text").download(to="./model.pt")
|
|
570
|
-
|
|
571
|
-
# Download Files via Glob Pattern
|
|
572
|
-
file_paths = experiment.files("images").list("*.png")
|
|
573
|
-
experiment.files("images").download("*.png")
|
|
574
|
-
|
|
575
|
-
# This is equivalent to downloading to a directory
|
|
576
|
-
experiment.files.download("images/*.png", to="local_images")
|
|
577
|
-
|
|
578
|
-
# Delete files
|
|
579
|
-
experiment.files("some.text").delete()
|
|
580
|
-
experiment.files.delete("some.text")
|
|
581
|
-
|
|
582
|
-
# Specific File Types
|
|
583
|
-
dxp.files.save_text("content", to="view.yaml")
|
|
584
|
-
dxp.files.save_json(dict(hey="yo"), to="config.json")
|
|
585
|
-
dxp.files.save_blob(b"xxx", to="data.bin")
|
|
586
|
-
"""
|
|
587
|
-
if not self._is_open:
|
|
588
|
-
raise RuntimeError(
|
|
589
|
-
"Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
|
|
590
|
-
"Example:\n"
|
|
591
|
-
" with dxp.run:\n"
|
|
592
|
-
" dxp.files('path').save()"
|
|
593
|
-
)
|
|
528
|
+
Usage:
|
|
529
|
+
# Method calls
|
|
530
|
+
experiment.run.start()
|
|
531
|
+
experiment.run.complete()
|
|
594
532
|
|
|
595
|
-
|
|
533
|
+
# Context manager
|
|
534
|
+
with Experiment(...).run as exp:
|
|
535
|
+
exp.log("Training...")
|
|
596
536
|
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
537
|
+
# Decorator
|
|
538
|
+
@experiment.run
|
|
539
|
+
def train(exp):
|
|
540
|
+
exp.log("Training...")
|
|
600
541
|
|
|
601
|
-
|
|
542
|
+
Returns:
|
|
543
|
+
RunManager instance
|
|
544
|
+
"""
|
|
545
|
+
return RunManager(self)
|
|
602
546
|
|
|
603
|
-
|
|
604
|
-
|
|
547
|
+
@property
|
|
548
|
+
def params(self) -> ParametersBuilder:
|
|
549
|
+
"""
|
|
550
|
+
Get a ParametersBuilder for parameter operations.
|
|
605
551
|
|
|
606
|
-
|
|
607
|
-
|
|
552
|
+
Usage:
|
|
553
|
+
# Set parameters
|
|
554
|
+
experiment.params.set(lr=0.001, batch_size=32)
|
|
608
555
|
|
|
609
|
-
|
|
610
|
-
|
|
556
|
+
# Get parameters
|
|
557
|
+
params = experiment.params.get()
|
|
611
558
|
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
file_paths = experiment.bindrs("some-bindr").list()
|
|
559
|
+
Returns:
|
|
560
|
+
ParametersBuilder instance
|
|
615
561
|
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
562
|
+
Raises:
|
|
563
|
+
RuntimeError: If experiment is not open
|
|
564
|
+
"""
|
|
565
|
+
if not self._is_open:
|
|
566
|
+
raise RuntimeError(
|
|
567
|
+
"Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
|
|
568
|
+
"Example:\n"
|
|
569
|
+
" with dxp.run:\n"
|
|
570
|
+
" dxp.params.set(lr=0.001)"
|
|
571
|
+
)
|
|
572
|
+
|
|
573
|
+
return ParametersBuilder(self)
|
|
574
|
+
|
|
575
|
+
@property
|
|
576
|
+
def logs(self) -> LogBuilder:
|
|
577
|
+
"""
|
|
578
|
+
Get a LogBuilder for fluent-style logging.
|
|
626
579
|
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
def _upload_file(
|
|
630
|
-
self,
|
|
631
|
-
file_path: str,
|
|
632
|
-
prefix: str,
|
|
633
|
-
filename: str,
|
|
634
|
-
description: Optional[str],
|
|
635
|
-
tags: Optional[List[str]],
|
|
636
|
-
metadata: Optional[Dict[str, Any]],
|
|
637
|
-
checksum: str,
|
|
638
|
-
content_type: str,
|
|
639
|
-
size_bytes: int
|
|
640
|
-
) -> Dict[str, Any]:
|
|
641
|
-
"""
|
|
642
|
-
Internal method to upload a file.
|
|
643
|
-
|
|
644
|
-
Args:
|
|
645
|
-
file_path: Local file path
|
|
646
|
-
prefix: Logical path prefix
|
|
647
|
-
filename: Original filename
|
|
648
|
-
description: Optional description
|
|
649
|
-
tags: Optional tags
|
|
650
|
-
metadata: Optional metadata
|
|
651
|
-
checksum: SHA256 checksum
|
|
652
|
-
content_type: MIME type
|
|
653
|
-
size_bytes: File size in bytes
|
|
654
|
-
|
|
655
|
-
Returns:
|
|
656
|
-
File metadata dict
|
|
657
|
-
"""
|
|
658
|
-
result = None
|
|
659
|
-
|
|
660
|
-
if self._client:
|
|
661
|
-
# Remote mode: upload to API
|
|
662
|
-
result = self._client.upload_file(
|
|
663
|
-
experiment_id=self._experiment_id,
|
|
664
|
-
file_path=file_path,
|
|
665
|
-
prefix=prefix,
|
|
666
|
-
filename=filename,
|
|
667
|
-
description=description,
|
|
668
|
-
tags=tags,
|
|
669
|
-
metadata=metadata,
|
|
670
|
-
checksum=checksum,
|
|
671
|
-
content_type=content_type,
|
|
672
|
-
size_bytes=size_bytes
|
|
673
|
-
)
|
|
580
|
+
Returns a LogBuilder that allows chaining with level methods like
|
|
581
|
+
.info(), .warn(), .error(), .debug(), .fatal().
|
|
674
582
|
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
result = self._storage.write_file(
|
|
678
|
-
project=self.project,
|
|
679
|
-
experiment=self.name,
|
|
680
|
-
folder=self.folder,
|
|
681
|
-
file_path=file_path,
|
|
682
|
-
prefix=prefix,
|
|
683
|
-
filename=filename,
|
|
684
|
-
description=description,
|
|
685
|
-
tags=tags,
|
|
686
|
-
metadata=metadata,
|
|
687
|
-
checksum=checksum,
|
|
688
|
-
content_type=content_type,
|
|
689
|
-
size_bytes=size_bytes
|
|
690
|
-
)
|
|
583
|
+
Returns:
|
|
584
|
+
LogBuilder instance for fluent logging
|
|
691
585
|
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
def _list_files(
|
|
695
|
-
self,
|
|
696
|
-
prefix: Optional[str] = None,
|
|
697
|
-
tags: Optional[List[str]] = None
|
|
698
|
-
) -> List[Dict[str, Any]]:
|
|
699
|
-
"""
|
|
700
|
-
Internal method to list files.
|
|
701
|
-
|
|
702
|
-
Args:
|
|
703
|
-
prefix: Optional prefix filter
|
|
704
|
-
tags: Optional tags filter
|
|
705
|
-
|
|
706
|
-
Returns:
|
|
707
|
-
List of file metadata dicts
|
|
708
|
-
"""
|
|
709
|
-
files = []
|
|
710
|
-
|
|
711
|
-
if self._client:
|
|
712
|
-
# Remote mode: fetch from API
|
|
713
|
-
files = self._client.list_files(
|
|
714
|
-
experiment_id=self._experiment_id,
|
|
715
|
-
prefix=prefix,
|
|
716
|
-
tags=tags
|
|
717
|
-
)
|
|
586
|
+
Raises:
|
|
587
|
+
RuntimeError: If experiment is not open
|
|
718
588
|
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
589
|
+
Examples:
|
|
590
|
+
exp.logs.info("Training started", epoch=1)
|
|
591
|
+
exp.logs.error("Failed to load data", error_code=500)
|
|
592
|
+
exp.logs.warn("GPU memory low", memory_available="1GB")
|
|
593
|
+
exp.logs.debug("Debug info", step=100)
|
|
594
|
+
"""
|
|
595
|
+
if not self._is_open:
|
|
596
|
+
raise RuntimeError(
|
|
597
|
+
"Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
|
|
598
|
+
"Example:\n"
|
|
599
|
+
" with dxp.run:\n"
|
|
600
|
+
" dxp.logs.info('Training started')"
|
|
601
|
+
)
|
|
602
|
+
|
|
603
|
+
return LogBuilder(self, metadata=None)
|
|
604
|
+
|
|
605
|
+
def log(
|
|
606
|
+
self,
|
|
607
|
+
message: Optional[str] = None,
|
|
608
|
+
level: Optional[str] = None,
|
|
609
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
610
|
+
**extra_metadata,
|
|
611
|
+
) -> Optional[LogBuilder]:
|
|
612
|
+
"""
|
|
613
|
+
Create a log entry (traditional style).
|
|
727
614
|
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
self,
|
|
732
|
-
file_id: str,
|
|
733
|
-
dest_path: Optional[str] = None
|
|
734
|
-
) -> str:
|
|
735
|
-
"""
|
|
736
|
-
Internal method to download a file.
|
|
737
|
-
|
|
738
|
-
Args:
|
|
739
|
-
file_id: File ID
|
|
740
|
-
dest_path: Optional destination path (defaults to original filename)
|
|
741
|
-
|
|
742
|
-
Returns:
|
|
743
|
-
Path to downloaded file
|
|
744
|
-
"""
|
|
745
|
-
if self._client:
|
|
746
|
-
# Remote mode: download from API
|
|
747
|
-
return self._client.download_file(
|
|
748
|
-
experiment_id=self._experiment_id,
|
|
749
|
-
file_id=file_id,
|
|
750
|
-
dest_path=dest_path
|
|
751
|
-
)
|
|
615
|
+
.. deprecated::
|
|
616
|
+
The fluent style (calling without message) is deprecated.
|
|
617
|
+
Use the `logs` property instead: `exp.logs.info("message")`
|
|
752
618
|
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
project=self.project,
|
|
757
|
-
experiment=self.name,
|
|
758
|
-
file_id=file_id,
|
|
759
|
-
dest_path=dest_path
|
|
760
|
-
)
|
|
619
|
+
Recommended usage:
|
|
620
|
+
exp.logs.info("Training started", epoch=1)
|
|
621
|
+
exp.logs.error("Failed", error_code=500)
|
|
761
622
|
|
|
762
|
-
|
|
623
|
+
Traditional style (still supported):
|
|
624
|
+
experiment.log("Training started", level="info", epoch=1)
|
|
625
|
+
experiment.log("Training started") # Defaults to "info"
|
|
763
626
|
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
627
|
+
Args:
|
|
628
|
+
message: Log message (required for recommended usage)
|
|
629
|
+
level: Log level (defaults to "info")
|
|
630
|
+
metadata: Optional metadata dict
|
|
631
|
+
**extra_metadata: Additional metadata as keyword arguments
|
|
767
632
|
|
|
768
|
-
|
|
769
|
-
|
|
633
|
+
Returns:
|
|
634
|
+
None when used in traditional style (message provided)
|
|
635
|
+
LogBuilder when used in deprecated fluent style (message=None)
|
|
770
636
|
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
637
|
+
Raises:
|
|
638
|
+
RuntimeError: If experiment is not open
|
|
639
|
+
ValueError: If log level is invalid
|
|
640
|
+
"""
|
|
641
|
+
if not self._is_open:
|
|
642
|
+
raise RuntimeError(
|
|
643
|
+
"Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
|
|
644
|
+
"Example:\n"
|
|
645
|
+
" with dxp.run:\n"
|
|
646
|
+
" dxp.logs.info('Training started')"
|
|
647
|
+
)
|
|
648
|
+
|
|
649
|
+
# Fluent mode: return LogBuilder (deprecated)
|
|
650
|
+
if message is None:
|
|
651
|
+
import warnings
|
|
652
|
+
warnings.warn(
|
|
653
|
+
"Using exp.log() without a message is deprecated. "
|
|
654
|
+
"Use exp.logs.info('message') instead.",
|
|
655
|
+
DeprecationWarning,
|
|
656
|
+
stacklevel=2
|
|
657
|
+
)
|
|
658
|
+
combined_metadata = {**(metadata or {}), **extra_metadata}
|
|
659
|
+
return LogBuilder(self, combined_metadata if combined_metadata else None)
|
|
660
|
+
|
|
661
|
+
# Traditional mode: write immediately
|
|
662
|
+
level = level or LogLevel.INFO.value # Default to "info"
|
|
663
|
+
level = LogLevel.validate(level) # Validate level
|
|
664
|
+
|
|
665
|
+
combined_metadata = {**(metadata or {}), **extra_metadata}
|
|
666
|
+
self._write_log(
|
|
667
|
+
message=message,
|
|
668
|
+
level=level,
|
|
669
|
+
metadata=combined_metadata if combined_metadata else None,
|
|
670
|
+
timestamp=None,
|
|
671
|
+
)
|
|
672
|
+
return None
|
|
673
|
+
|
|
674
|
+
def _write_log(
|
|
675
|
+
self,
|
|
676
|
+
message: str,
|
|
677
|
+
level: str,
|
|
678
|
+
metadata: Optional[Dict[str, Any]],
|
|
679
|
+
timestamp: Optional[datetime],
|
|
680
|
+
) -> None:
|
|
681
|
+
"""
|
|
682
|
+
Internal method to write a log entry immediately.
|
|
683
|
+
No buffering - writes directly to storage/remote AND stdout/stderr.
|
|
684
|
+
|
|
685
|
+
Args:
|
|
686
|
+
message: Log message
|
|
687
|
+
level: Log level (already validated)
|
|
688
|
+
metadata: Optional metadata dict
|
|
689
|
+
timestamp: Optional custom timestamp (defaults to now)
|
|
690
|
+
"""
|
|
691
|
+
log_entry = {
|
|
692
|
+
"timestamp": (timestamp or datetime.utcnow()).isoformat() + "Z",
|
|
693
|
+
"level": level,
|
|
694
|
+
"message": message,
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
if metadata:
|
|
698
|
+
log_entry["metadata"] = metadata
|
|
699
|
+
|
|
700
|
+
# Mirror to stdout/stderr before writing to storage
|
|
701
|
+
self._print_log(message, level, metadata)
|
|
702
|
+
|
|
703
|
+
# Write immediately (no buffering)
|
|
704
|
+
if self._client:
|
|
705
|
+
# Remote mode: send to API (wrapped in array for batch API)
|
|
706
|
+
try:
|
|
707
|
+
self._client.create_log_entries(
|
|
708
|
+
experiment_id=self._experiment_id,
|
|
709
|
+
logs=[log_entry], # Single log in array
|
|
710
|
+
)
|
|
711
|
+
except Exception as e:
|
|
712
|
+
# Log warning but don't crash training
|
|
713
|
+
import warnings
|
|
714
|
+
warnings.warn(
|
|
715
|
+
f"Failed to write log to remote server: {e}. Training will continue.",
|
|
716
|
+
RuntimeWarning,
|
|
717
|
+
stacklevel=4
|
|
718
|
+
)
|
|
719
|
+
# Fall through to local storage if available
|
|
720
|
+
|
|
721
|
+
if self._storage:
|
|
722
|
+
# Local mode: write to file immediately
|
|
723
|
+
try:
|
|
724
|
+
self._storage.write_log(
|
|
725
|
+
owner=self.owner,
|
|
726
|
+
project=self.project,
|
|
727
|
+
prefix=self._folder_path,
|
|
728
|
+
message=log_entry["message"],
|
|
729
|
+
level=log_entry["level"],
|
|
730
|
+
metadata=log_entry.get("metadata"),
|
|
731
|
+
timestamp=log_entry["timestamp"],
|
|
732
|
+
)
|
|
733
|
+
except Exception as e:
|
|
734
|
+
import warnings
|
|
735
|
+
warnings.warn(
|
|
736
|
+
f"Failed to write log to local storage: {e}",
|
|
737
|
+
RuntimeWarning,
|
|
738
|
+
stacklevel=4
|
|
739
|
+
)
|
|
775
740
|
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
)
|
|
741
|
+
def _print_log(
|
|
742
|
+
self, message: str, level: str, metadata: Optional[Dict[str, Any]]
|
|
743
|
+
) -> None:
|
|
744
|
+
"""
|
|
745
|
+
Print log to stdout or stderr based on level.
|
|
782
746
|
|
|
783
|
-
|
|
784
|
-
# Local mode: soft delete in metadata
|
|
785
|
-
result = self._storage.delete_file(
|
|
786
|
-
project=self.project,
|
|
787
|
-
experiment=self.name,
|
|
788
|
-
file_id=file_id
|
|
789
|
-
)
|
|
747
|
+
ERROR and FATAL go to stderr, all others go to stdout.
|
|
790
748
|
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
tags: Optional[List[str]],
|
|
798
|
-
metadata: Optional[Dict[str, Any]]
|
|
799
|
-
) -> Dict[str, Any]:
|
|
800
|
-
"""
|
|
801
|
-
Internal method to update file metadata.
|
|
802
|
-
|
|
803
|
-
Args:
|
|
804
|
-
file_id: File ID
|
|
805
|
-
description: Optional description
|
|
806
|
-
tags: Optional tags
|
|
807
|
-
metadata: Optional metadata
|
|
808
|
-
|
|
809
|
-
Returns:
|
|
810
|
-
Updated file metadata dict
|
|
811
|
-
"""
|
|
812
|
-
result = None
|
|
813
|
-
|
|
814
|
-
if self._client:
|
|
815
|
-
# Remote mode: update via API
|
|
816
|
-
result = self._client.update_file(
|
|
817
|
-
experiment_id=self._experiment_id,
|
|
818
|
-
file_id=file_id,
|
|
819
|
-
description=description,
|
|
820
|
-
tags=tags,
|
|
821
|
-
metadata=metadata
|
|
822
|
-
)
|
|
749
|
+
Args:
|
|
750
|
+
message: Log message
|
|
751
|
+
level: Log level
|
|
752
|
+
metadata: Optional metadata dict
|
|
753
|
+
"""
|
|
754
|
+
import sys
|
|
823
755
|
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
result = self._storage.update_file_metadata(
|
|
827
|
-
project=self.project,
|
|
828
|
-
experiment=self.name,
|
|
829
|
-
file_id=file_id,
|
|
830
|
-
description=description,
|
|
831
|
-
tags=tags,
|
|
832
|
-
metadata=metadata
|
|
833
|
-
)
|
|
756
|
+
# Format the log message
|
|
757
|
+
level_upper = level.upper()
|
|
834
758
|
|
|
835
|
-
|
|
759
|
+
# Build metadata string if present
|
|
760
|
+
metadata_str = ""
|
|
761
|
+
if metadata:
|
|
762
|
+
# Format metadata as key=value pairs
|
|
763
|
+
pairs = [f"{k}={v}" for k, v in metadata.items()]
|
|
764
|
+
metadata_str = f" [{', '.join(pairs)}]"
|
|
836
765
|
|
|
766
|
+
# Format: [LEVEL] message [key=value, ...]
|
|
767
|
+
formatted_message = f"[{level_upper}] {message}{metadata_str}"
|
|
837
768
|
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
769
|
+
# Route to stdout or stderr based on level
|
|
770
|
+
if level in ("error", "fatal"):
|
|
771
|
+
print(formatted_message, file=sys.stderr)
|
|
772
|
+
else:
|
|
773
|
+
print(formatted_message, file=sys.stdout)
|
|
841
774
|
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
# Remote mode: send to API
|
|
847
|
-
self._client.set_parameters(
|
|
848
|
-
experiment_id=self._experiment_id,
|
|
849
|
-
data=flattened_params
|
|
850
|
-
)
|
|
775
|
+
@property
|
|
776
|
+
def files(self) -> FilesAccessor:
|
|
777
|
+
"""
|
|
778
|
+
Get a FilesAccessor for fluent file operations.
|
|
851
779
|
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
self._storage.write_parameters(
|
|
855
|
-
project=self.project,
|
|
856
|
-
experiment=self.name,
|
|
857
|
-
folder=self.folder,
|
|
858
|
-
data=flattened_params
|
|
859
|
-
)
|
|
780
|
+
Returns:
|
|
781
|
+
FilesAccessor instance for chaining
|
|
860
782
|
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
Internal method to read parameters.
|
|
783
|
+
Raises:
|
|
784
|
+
RuntimeError: If experiment is not open
|
|
864
785
|
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
"""
|
|
868
|
-
|
|
786
|
+
Examples:
|
|
787
|
+
# Upload file - supports flexible syntax
|
|
788
|
+
experiment.files("checkpoints").upload("./model.pt", to="checkpoint.pt")
|
|
789
|
+
experiment.files(prefix="checkpoints").upload("./model.pt")
|
|
790
|
+
experiment.files().upload("./model.pt", to="models/model.pt") # root
|
|
869
791
|
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
params = self._client.get_parameters(experiment_id=self._experiment_id)
|
|
874
|
-
except Exception:
|
|
875
|
-
# Parameters don't exist yet
|
|
876
|
-
params = None
|
|
877
|
-
|
|
878
|
-
if self._storage:
|
|
879
|
-
# Local mode: read from file
|
|
880
|
-
params = self._storage.read_parameters(
|
|
881
|
-
project=self.project,
|
|
882
|
-
experiment=self.name
|
|
883
|
-
)
|
|
792
|
+
# List files
|
|
793
|
+
files = experiment.files("/some/location").list()
|
|
794
|
+
files = experiment.files("/models").list()
|
|
884
795
|
|
|
885
|
-
|
|
796
|
+
# Download file
|
|
797
|
+
experiment.files("some.text").download()
|
|
798
|
+
experiment.files("some.text").download(to="./model.pt")
|
|
886
799
|
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
"""
|
|
890
|
-
Get a MetricsManager for metric operations.
|
|
800
|
+
# Download files via glob pattern
|
|
801
|
+
file_paths = experiment.files("images").list("*.png")
|
|
802
|
+
experiment.files("images").download("*.png")
|
|
891
803
|
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
2. Unnamed: experiment.metrics.append(name="loss", value=0.5, step=1)
|
|
804
|
+
# This is equivalent to downloading to a directory
|
|
805
|
+
experiment.files.download("images/*.png", to="local_images")
|
|
895
806
|
|
|
896
|
-
|
|
897
|
-
|
|
807
|
+
# Delete files
|
|
808
|
+
experiment.files("some.text").delete()
|
|
809
|
+
experiment.files.delete("some.text")
|
|
898
810
|
|
|
899
|
-
|
|
900
|
-
|
|
811
|
+
# Specific file types
|
|
812
|
+
dxp.files.save_text("content", to="view.yaml")
|
|
813
|
+
dxp.files.save_json(dict(hey="yo"), to="config.json")
|
|
814
|
+
dxp.files.save_blob(b"xxx", to="data.bin")
|
|
815
|
+
"""
|
|
816
|
+
if not self._is_open:
|
|
817
|
+
raise RuntimeError(
|
|
818
|
+
"Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
|
|
819
|
+
"Example:\n"
|
|
820
|
+
" with dxp.run:\n"
|
|
821
|
+
" dxp.files('path').upload()"
|
|
822
|
+
)
|
|
901
823
|
|
|
902
|
-
|
|
903
|
-
# Named metric
|
|
904
|
-
experiment.metrics("train_loss").append(value=0.5, step=100)
|
|
824
|
+
return FilesAccessor(self)
|
|
905
825
|
|
|
906
|
-
|
|
907
|
-
|
|
826
|
+
def bindrs(self, bindr_name: str) -> BindrsBuilder:
|
|
827
|
+
"""
|
|
828
|
+
Get a BindrsBuilder for working with file collections (bindrs).
|
|
908
829
|
|
|
909
|
-
|
|
910
|
-
experiment.metrics("metrics").append_batch([
|
|
911
|
-
{"loss": 0.5, "acc": 0.8, "step": 1},
|
|
912
|
-
{"loss": 0.4, "acc": 0.85, "step": 2}
|
|
913
|
-
])
|
|
830
|
+
Bindrs are collections of files that can span multiple prefixes.
|
|
914
831
|
|
|
915
|
-
|
|
916
|
-
|
|
832
|
+
Args:
|
|
833
|
+
bindr_name: Name of the bindr (collection)
|
|
917
834
|
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
"""
|
|
921
|
-
from .metric import MetricsManager
|
|
835
|
+
Returns:
|
|
836
|
+
BindrsBuilder instance for chaining
|
|
922
837
|
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
"Cannot use metrics on closed experiment. "
|
|
926
|
-
"Use 'with Experiment(...).run as experiment:' or call experiment.run.start() first."
|
|
927
|
-
)
|
|
838
|
+
Raises:
|
|
839
|
+
RuntimeError: If experiment is not open
|
|
928
840
|
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
return self._metrics_manager
|
|
933
|
-
|
|
934
|
-
def _append_to_metric(
|
|
935
|
-
self,
|
|
936
|
-
name: Optional[str],
|
|
937
|
-
data: Dict[str, Any],
|
|
938
|
-
description: Optional[str],
|
|
939
|
-
tags: Optional[List[str]],
|
|
940
|
-
metadata: Optional[Dict[str, Any]]
|
|
941
|
-
) -> Dict[str, Any]:
|
|
942
|
-
"""
|
|
943
|
-
Internal method to append a single data point to a metric.
|
|
944
|
-
|
|
945
|
-
Args:
|
|
946
|
-
name: Metric name (can be None for unnamed metrics)
|
|
947
|
-
data: Data point (flexible schema)
|
|
948
|
-
description: Optional metric description
|
|
949
|
-
tags: Optional tags
|
|
950
|
-
metadata: Optional metadata
|
|
951
|
-
|
|
952
|
-
Returns:
|
|
953
|
-
Dict with metricId, index, bufferedDataPoints, chunkSize
|
|
954
|
-
"""
|
|
955
|
-
result = None
|
|
956
|
-
|
|
957
|
-
if self._client:
|
|
958
|
-
# Remote mode: append via API
|
|
959
|
-
result = self._client.append_to_metric(
|
|
960
|
-
experiment_id=self._experiment_id,
|
|
961
|
-
metric_name=name,
|
|
962
|
-
data=data,
|
|
963
|
-
description=description,
|
|
964
|
-
tags=tags,
|
|
965
|
-
metadata=metadata
|
|
966
|
-
)
|
|
841
|
+
Examples:
|
|
842
|
+
# List files in a bindr
|
|
843
|
+
file_paths = experiment.bindrs("some-bindr").list()
|
|
967
844
|
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
845
|
+
Note:
|
|
846
|
+
This is a placeholder for future bindr functionality.
|
|
847
|
+
"""
|
|
848
|
+
if not self._is_open:
|
|
849
|
+
raise RuntimeError(
|
|
850
|
+
"Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
|
|
851
|
+
"Example:\n"
|
|
852
|
+
" with dxp.run:\n"
|
|
853
|
+
" files = dxp.bindrs('my-bindr').list()"
|
|
854
|
+
)
|
|
855
|
+
|
|
856
|
+
return BindrsBuilder(self, bindr_name)
|
|
857
|
+
|
|
858
|
+
def _upload_file(
|
|
859
|
+
self,
|
|
860
|
+
file_path: str,
|
|
861
|
+
prefix: str,
|
|
862
|
+
filename: str,
|
|
863
|
+
description: Optional[str],
|
|
864
|
+
tags: Optional[List[str]],
|
|
865
|
+
metadata: Optional[Dict[str, Any]],
|
|
866
|
+
checksum: str,
|
|
867
|
+
content_type: str,
|
|
868
|
+
size_bytes: int,
|
|
869
|
+
) -> Dict[str, Any]:
|
|
870
|
+
"""
|
|
871
|
+
Internal method to upload a file.
|
|
872
|
+
|
|
873
|
+
Args:
|
|
874
|
+
file_path: Local file path
|
|
875
|
+
prefix: Logical path prefix
|
|
876
|
+
filename: Original filename
|
|
877
|
+
description: Optional description
|
|
878
|
+
tags: Optional tags
|
|
879
|
+
metadata: Optional metadata
|
|
880
|
+
checksum: SHA256 checksum
|
|
881
|
+
content_type: MIME type
|
|
882
|
+
size_bytes: File size in bytes
|
|
883
|
+
|
|
884
|
+
Returns:
|
|
885
|
+
File metadata dict
|
|
886
|
+
"""
|
|
887
|
+
result = None
|
|
888
|
+
|
|
889
|
+
if self._client:
|
|
890
|
+
# Remote mode: upload to API
|
|
891
|
+
result = self._client.upload_file(
|
|
892
|
+
experiment_id=self._experiment_id,
|
|
893
|
+
file_path=file_path,
|
|
894
|
+
prefix=prefix,
|
|
895
|
+
filename=filename,
|
|
896
|
+
description=description,
|
|
897
|
+
tags=tags,
|
|
898
|
+
metadata=metadata,
|
|
899
|
+
checksum=checksum,
|
|
900
|
+
content_type=content_type,
|
|
901
|
+
size_bytes=size_bytes,
|
|
902
|
+
)
|
|
903
|
+
|
|
904
|
+
if self._storage:
|
|
905
|
+
# Local mode: copy to local storage
|
|
906
|
+
result = self._storage.write_file(
|
|
907
|
+
owner=self.owner,
|
|
908
|
+
project=self.project,
|
|
909
|
+
prefix=self._folder_path,
|
|
910
|
+
file_path=file_path,
|
|
911
|
+
path=prefix,
|
|
912
|
+
filename=filename,
|
|
913
|
+
description=description,
|
|
914
|
+
tags=tags,
|
|
915
|
+
metadata=metadata,
|
|
916
|
+
checksum=checksum,
|
|
917
|
+
content_type=content_type,
|
|
918
|
+
size_bytes=size_bytes,
|
|
919
|
+
)
|
|
920
|
+
|
|
921
|
+
return result
|
|
922
|
+
|
|
923
|
+
def _list_files(
|
|
924
|
+
self, prefix: Optional[str] = None, tags: Optional[List[str]] = None
|
|
925
|
+
) -> List[Dict[str, Any]]:
|
|
926
|
+
"""
|
|
927
|
+
Internal method to list files.
|
|
980
928
|
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
self,
|
|
985
|
-
name: Optional[str],
|
|
986
|
-
data_points: List[Dict[str, Any]],
|
|
987
|
-
description: Optional[str],
|
|
988
|
-
tags: Optional[List[str]],
|
|
989
|
-
metadata: Optional[Dict[str, Any]]
|
|
990
|
-
) -> Dict[str, Any]:
|
|
991
|
-
"""
|
|
992
|
-
Internal method to append multiple data points to a metric.
|
|
993
|
-
|
|
994
|
-
Args:
|
|
995
|
-
name: Metric name (can be None for unnamed metrics)
|
|
996
|
-
data_points: List of data points
|
|
997
|
-
description: Optional metric description
|
|
998
|
-
tags: Optional tags
|
|
999
|
-
metadata: Optional metadata
|
|
1000
|
-
|
|
1001
|
-
Returns:
|
|
1002
|
-
Dict with metricId, startIndex, endIndex, count
|
|
1003
|
-
"""
|
|
1004
|
-
result = None
|
|
1005
|
-
|
|
1006
|
-
if self._client:
|
|
1007
|
-
# Remote mode: append batch via API
|
|
1008
|
-
result = self._client.append_batch_to_metric(
|
|
1009
|
-
experiment_id=self._experiment_id,
|
|
1010
|
-
metric_name=name,
|
|
1011
|
-
data_points=data_points,
|
|
1012
|
-
description=description,
|
|
1013
|
-
tags=tags,
|
|
1014
|
-
metadata=metadata
|
|
1015
|
-
)
|
|
929
|
+
Args:
|
|
930
|
+
prefix: Optional prefix filter
|
|
931
|
+
tags: Optional tags filter
|
|
1016
932
|
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
933
|
+
Returns:
|
|
934
|
+
List of file metadata dicts
|
|
935
|
+
"""
|
|
936
|
+
files = []
|
|
937
|
+
|
|
938
|
+
if self._client:
|
|
939
|
+
# Remote mode: fetch from API
|
|
940
|
+
files = self._client.list_files(
|
|
941
|
+
experiment_id=self._experiment_id, prefix=prefix, tags=tags
|
|
942
|
+
)
|
|
943
|
+
|
|
944
|
+
if self._storage:
|
|
945
|
+
# Local mode: read from metadata file
|
|
946
|
+
files = self._storage.list_files(
|
|
947
|
+
owner=self.owner,
|
|
948
|
+
project=self.project,
|
|
949
|
+
prefix=self._folder_path,
|
|
950
|
+
path_prefix=prefix,
|
|
951
|
+
tags=tags,
|
|
952
|
+
)
|
|
953
|
+
|
|
954
|
+
return files
|
|
955
|
+
|
|
956
|
+
def _download_file(self, file_id: str, dest_path: Optional[str] = None) -> str:
|
|
957
|
+
"""
|
|
958
|
+
Internal method to download a file.
|
|
1028
959
|
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
self,
|
|
1033
|
-
name: str,
|
|
1034
|
-
start_index: int,
|
|
1035
|
-
limit: int
|
|
1036
|
-
) -> Dict[str, Any]:
|
|
1037
|
-
"""
|
|
1038
|
-
Internal method to read data points from a metric.
|
|
1039
|
-
|
|
1040
|
-
Args:
|
|
1041
|
-
name: Metric name
|
|
1042
|
-
start_index: Starting index
|
|
1043
|
-
limit: Max points to read
|
|
1044
|
-
|
|
1045
|
-
Returns:
|
|
1046
|
-
Dict with data, startIndex, endIndex, total, hasMore
|
|
1047
|
-
"""
|
|
1048
|
-
result = None
|
|
1049
|
-
|
|
1050
|
-
if self._client:
|
|
1051
|
-
# Remote mode: read via API
|
|
1052
|
-
result = self._client.read_metric_data(
|
|
1053
|
-
experiment_id=self._experiment_id,
|
|
1054
|
-
metric_name=name,
|
|
1055
|
-
start_index=start_index,
|
|
1056
|
-
limit=limit
|
|
1057
|
-
)
|
|
960
|
+
Args:
|
|
961
|
+
file_id: File ID
|
|
962
|
+
dest_path: Optional destination path (defaults to original filename)
|
|
1058
963
|
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
964
|
+
Returns:
|
|
965
|
+
Path to downloaded file
|
|
966
|
+
"""
|
|
967
|
+
if self._client:
|
|
968
|
+
# Remote mode: download from API
|
|
969
|
+
return self._client.download_file(
|
|
970
|
+
experiment_id=self._experiment_id, file_id=file_id, dest_path=dest_path
|
|
971
|
+
)
|
|
972
|
+
|
|
973
|
+
if self._storage:
|
|
974
|
+
# Local mode: copy from local storage
|
|
975
|
+
return self._storage.read_file(
|
|
976
|
+
owner=self.owner,
|
|
977
|
+
project=self.project,
|
|
978
|
+
prefix=self._folder_path,
|
|
979
|
+
file_id=file_id,
|
|
980
|
+
dest_path=dest_path,
|
|
981
|
+
)
|
|
982
|
+
|
|
983
|
+
raise RuntimeError("No client or storage configured")
|
|
984
|
+
|
|
985
|
+
def _delete_file(self, file_id: str) -> Dict[str, Any]:
|
|
986
|
+
"""
|
|
987
|
+
Internal method to delete a file.
|
|
1068
988
|
|
|
1069
|
-
|
|
989
|
+
Args:
|
|
990
|
+
file_id: File ID
|
|
1070
991
|
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
992
|
+
Returns:
|
|
993
|
+
Dict with id and deletedAt
|
|
994
|
+
"""
|
|
995
|
+
result = None
|
|
996
|
+
|
|
997
|
+
if self._client:
|
|
998
|
+
# Remote mode: delete via API
|
|
999
|
+
result = self._client.delete_file(
|
|
1000
|
+
experiment_id=self._experiment_id, file_id=file_id
|
|
1001
|
+
)
|
|
1002
|
+
|
|
1003
|
+
if self._storage:
|
|
1004
|
+
# Local mode: soft delete in metadata
|
|
1005
|
+
result = self._storage.delete_file(
|
|
1006
|
+
owner=self.owner,
|
|
1007
|
+
project=self.project,
|
|
1008
|
+
prefix=self._folder_path,
|
|
1009
|
+
file_id=file_id,
|
|
1010
|
+
)
|
|
1011
|
+
|
|
1012
|
+
return result
|
|
1013
|
+
|
|
1014
|
+
def _update_file(
|
|
1015
|
+
self,
|
|
1016
|
+
file_id: str,
|
|
1017
|
+
description: Optional[str],
|
|
1018
|
+
tags: Optional[List[str]],
|
|
1019
|
+
metadata: Optional[Dict[str, Any]],
|
|
1020
|
+
) -> Dict[str, Any]:
|
|
1021
|
+
"""
|
|
1022
|
+
Internal method to update file metadata.
|
|
1074
1023
|
|
|
1075
|
-
|
|
1076
|
-
|
|
1024
|
+
Args:
|
|
1025
|
+
file_id: File ID
|
|
1026
|
+
description: Optional description
|
|
1027
|
+
tags: Optional tags
|
|
1028
|
+
metadata: Optional metadata
|
|
1077
1029
|
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1030
|
+
Returns:
|
|
1031
|
+
Updated file metadata dict
|
|
1032
|
+
"""
|
|
1033
|
+
result = None
|
|
1034
|
+
|
|
1035
|
+
if self._client:
|
|
1036
|
+
# Remote mode: update via API
|
|
1037
|
+
result = self._client.update_file(
|
|
1038
|
+
experiment_id=self._experiment_id,
|
|
1039
|
+
file_id=file_id,
|
|
1040
|
+
description=description,
|
|
1041
|
+
tags=tags,
|
|
1042
|
+
metadata=metadata,
|
|
1043
|
+
)
|
|
1044
|
+
|
|
1045
|
+
if self._storage:
|
|
1046
|
+
# Local mode: update in metadata file
|
|
1047
|
+
result = self._storage.update_file_metadata(
|
|
1048
|
+
owner=self.owner,
|
|
1049
|
+
project=self.project,
|
|
1050
|
+
prefix=self._folder_path,
|
|
1051
|
+
file_id=file_id,
|
|
1052
|
+
description=description,
|
|
1053
|
+
tags=tags,
|
|
1054
|
+
metadata=metadata,
|
|
1055
|
+
)
|
|
1056
|
+
|
|
1057
|
+
return result
|
|
1058
|
+
|
|
1059
|
+
def _write_params(self, flattened_params: Dict[str, Any]) -> None:
|
|
1060
|
+
"""
|
|
1061
|
+
Internal method to write/merge parameters.
|
|
1082
1062
|
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1063
|
+
Args:
|
|
1064
|
+
flattened_params: Already-flattened parameter dict with dot notation
|
|
1065
|
+
"""
|
|
1066
|
+
if self._client:
|
|
1067
|
+
# Remote mode: send to API
|
|
1068
|
+
self._client.set_parameters(
|
|
1069
|
+
experiment_id=self._experiment_id, data=flattened_params
|
|
1070
|
+
)
|
|
1071
|
+
|
|
1072
|
+
if self._storage:
|
|
1073
|
+
# Local mode: write to file
|
|
1074
|
+
self._storage.write_parameters(
|
|
1075
|
+
owner=self.owner,
|
|
1076
|
+
project=self.project,
|
|
1077
|
+
prefix=self._folder_path,
|
|
1078
|
+
data=flattened_params,
|
|
1079
|
+
)
|
|
1080
|
+
|
|
1081
|
+
def _read_params(self) -> Optional[Dict[str, Any]]:
|
|
1082
|
+
"""
|
|
1083
|
+
Internal method to read parameters.
|
|
1089
1084
|
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1085
|
+
Returns:
|
|
1086
|
+
Flattened parameters dict, or None if no parameters exist
|
|
1087
|
+
"""
|
|
1088
|
+
params = None
|
|
1089
|
+
|
|
1090
|
+
if self._client:
|
|
1091
|
+
# Remote mode: fetch from API
|
|
1092
|
+
try:
|
|
1093
|
+
params = self._client.get_parameters(experiment_id=self._experiment_id)
|
|
1094
|
+
except Exception:
|
|
1095
|
+
# Parameters don't exist yet
|
|
1096
|
+
params = None
|
|
1097
1097
|
|
|
1098
|
-
|
|
1098
|
+
if self._storage:
|
|
1099
|
+
# Local mode: read from file
|
|
1100
|
+
params = self._storage.read_parameters(
|
|
1101
|
+
owner=self.owner, project=self.project, prefix=self._folder_path
|
|
1102
|
+
)
|
|
1099
1103
|
|
|
1100
|
-
|
|
1101
|
-
"""
|
|
1102
|
-
Internal method to list all metrics in experiment.
|
|
1104
|
+
return params
|
|
1103
1105
|
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1106
|
+
@property
|
|
1107
|
+
def metrics(self) -> "MetricsManager":
|
|
1108
|
+
"""
|
|
1109
|
+
Get a MetricsManager for metric operations.
|
|
1108
1110
|
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1111
|
+
Supports two usage patterns:
|
|
1112
|
+
1. Named: experiment.metrics("train").log(loss=0.5, accuracy=0.9)
|
|
1113
|
+
2. Unnamed: experiment.metrics.log(epoch=epoch).flush()
|
|
1112
1114
|
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
result = self._storage.list_metrics(
|
|
1116
|
-
project=self.project,
|
|
1117
|
-
experiment=self.name
|
|
1118
|
-
)
|
|
1115
|
+
Returns:
|
|
1116
|
+
MetricsManager instance
|
|
1119
1117
|
|
|
1120
|
-
|
|
1118
|
+
Raises:
|
|
1119
|
+
RuntimeError: If experiment is not open
|
|
1121
1120
|
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
""
|
|
1125
|
-
|
|
1121
|
+
Examples:
|
|
1122
|
+
# Named metric with multi-field logging
|
|
1123
|
+
experiment.metrics("train").log(loss=0.5, accuracy=0.9)
|
|
1124
|
+
experiment.metrics("eval").log(loss=0.6, accuracy=0.85)
|
|
1125
|
+
experiment.metrics.log(epoch=epoch).flush()
|
|
1126
1126
|
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1127
|
+
# Nested dict pattern (single call for all metrics)
|
|
1128
|
+
experiment.metrics.log(
|
|
1129
|
+
epoch=100,
|
|
1130
|
+
train=dict(loss=0.142, accuracy=0.80),
|
|
1131
|
+
eval=dict(loss=0.201, accuracy=0.76)
|
|
1132
|
+
)
|
|
1131
1133
|
|
|
1134
|
+
# Read data
|
|
1135
|
+
data = experiment.metrics("train").read(start_index=0, limit=100)
|
|
1132
1136
|
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1137
|
+
# Get statistics
|
|
1138
|
+
stats = experiment.metrics("train").stats()
|
|
1139
|
+
"""
|
|
1140
|
+
from .metric import MetricsManager
|
|
1141
|
+
|
|
1142
|
+
if not self._is_open:
|
|
1143
|
+
raise RuntimeError(
|
|
1144
|
+
"Cannot use metrics on closed experiment. "
|
|
1145
|
+
"Use 'with Experiment(...).run as experiment:' or call experiment.run.start() first."
|
|
1146
|
+
)
|
|
1147
|
+
|
|
1148
|
+
# Cache the MetricsManager instance to preserve MetricBuilder cache across calls
|
|
1149
|
+
if self._metrics_manager is None:
|
|
1150
|
+
self._metrics_manager = MetricsManager(self)
|
|
1151
|
+
return self._metrics_manager
|
|
1152
|
+
|
|
1153
|
+
def _append_to_metric(
|
|
1154
|
+
self,
|
|
1155
|
+
name: Optional[str],
|
|
1156
|
+
data: Dict[str, Any],
|
|
1157
|
+
description: Optional[str],
|
|
1158
|
+
tags: Optional[List[str]],
|
|
1159
|
+
metadata: Optional[Dict[str, Any]],
|
|
1160
|
+
) -> Optional[Dict[str, Any]]:
|
|
1138
1161
|
"""
|
|
1139
|
-
|
|
1162
|
+
Internal method to append a single data point to a metric.
|
|
1140
1163
|
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1164
|
+
Args:
|
|
1165
|
+
name: Metric name (can be None for unnamed metrics)
|
|
1166
|
+
data: Data point (flexible schema)
|
|
1167
|
+
description: Optional metric description
|
|
1168
|
+
tags: Optional tags
|
|
1169
|
+
metadata: Optional metadata
|
|
1170
|
+
|
|
1171
|
+
Returns:
|
|
1172
|
+
Dict with metricId, index, bufferedDataPoints, chunkSize or None if all backends fail
|
|
1173
|
+
"""
|
|
1174
|
+
result = None
|
|
1175
|
+
|
|
1176
|
+
if self._client:
|
|
1177
|
+
# Remote mode: append via API
|
|
1178
|
+
try:
|
|
1179
|
+
result = self._client.append_to_metric(
|
|
1180
|
+
experiment_id=self._experiment_id,
|
|
1181
|
+
metric_name=name,
|
|
1182
|
+
data=data,
|
|
1183
|
+
description=description,
|
|
1184
|
+
tags=tags,
|
|
1185
|
+
metadata=metadata,
|
|
1186
|
+
)
|
|
1187
|
+
except Exception as e:
|
|
1188
|
+
# Log warning but don't crash training
|
|
1189
|
+
import warnings
|
|
1190
|
+
metric_display = f"'{name}'" if name else "unnamed metric"
|
|
1191
|
+
warnings.warn(
|
|
1192
|
+
f"Failed to log {metric_display} to remote server: {e}. "
|
|
1193
|
+
f"Training will continue.",
|
|
1194
|
+
RuntimeWarning,
|
|
1195
|
+
stacklevel=3
|
|
1196
|
+
)
|
|
1197
|
+
# Fall through to local storage if available
|
|
1198
|
+
|
|
1199
|
+
if self._storage:
|
|
1200
|
+
# Local mode: append to local storage
|
|
1201
|
+
try:
|
|
1202
|
+
result = self._storage.append_to_metric(
|
|
1203
|
+
owner=self.owner,
|
|
1204
|
+
project=self.project,
|
|
1205
|
+
prefix=self._folder_path,
|
|
1206
|
+
metric_name=name,
|
|
1207
|
+
data=data,
|
|
1208
|
+
description=description,
|
|
1209
|
+
tags=tags,
|
|
1210
|
+
metadata=metadata,
|
|
1211
|
+
)
|
|
1212
|
+
except Exception as e:
|
|
1213
|
+
import warnings
|
|
1214
|
+
metric_display = f"'{name}'" if name else "unnamed metric"
|
|
1215
|
+
warnings.warn(
|
|
1216
|
+
f"Failed to log {metric_display} to local storage: {e}",
|
|
1217
|
+
RuntimeWarning,
|
|
1218
|
+
stacklevel=3
|
|
1219
|
+
)
|
|
1220
|
+
|
|
1221
|
+
return result
|
|
1222
|
+
|
|
1223
|
+
def _append_batch_to_metric(
|
|
1224
|
+
self,
|
|
1225
|
+
name: Optional[str],
|
|
1226
|
+
data_points: List[Dict[str, Any]],
|
|
1227
|
+
description: Optional[str],
|
|
1228
|
+
tags: Optional[List[str]],
|
|
1229
|
+
metadata: Optional[Dict[str, Any]],
|
|
1230
|
+
) -> Optional[Dict[str, Any]]:
|
|
1231
|
+
"""
|
|
1232
|
+
Internal method to append multiple data points to a metric.
|
|
1233
|
+
|
|
1234
|
+
Args:
|
|
1235
|
+
name: Metric name (can be None for unnamed metrics)
|
|
1236
|
+
data_points: List of data points
|
|
1237
|
+
description: Optional metric description
|
|
1238
|
+
tags: Optional tags
|
|
1239
|
+
metadata: Optional metadata
|
|
1240
|
+
|
|
1241
|
+
Returns:
|
|
1242
|
+
Dict with metricId, startIndex, endIndex, count or None if all backends fail
|
|
1243
|
+
"""
|
|
1244
|
+
result = None
|
|
1245
|
+
|
|
1246
|
+
if self._client:
|
|
1247
|
+
# Remote mode: append batch via API
|
|
1248
|
+
try:
|
|
1249
|
+
result = self._client.append_batch_to_metric(
|
|
1250
|
+
experiment_id=self._experiment_id,
|
|
1251
|
+
metric_name=name,
|
|
1252
|
+
data_points=data_points,
|
|
1253
|
+
description=description,
|
|
1254
|
+
tags=tags,
|
|
1255
|
+
metadata=metadata,
|
|
1256
|
+
)
|
|
1257
|
+
except Exception as e:
|
|
1258
|
+
# Log warning but don't crash training
|
|
1259
|
+
import warnings
|
|
1260
|
+
metric_display = f"'{name}'" if name else "unnamed metric"
|
|
1261
|
+
warnings.warn(
|
|
1262
|
+
f"Failed to log batch to {metric_display} on remote server: {e}. "
|
|
1263
|
+
f"Training will continue.",
|
|
1264
|
+
RuntimeWarning,
|
|
1265
|
+
stacklevel=3
|
|
1266
|
+
)
|
|
1267
|
+
# Fall through to local storage if available
|
|
1268
|
+
|
|
1269
|
+
if self._storage:
|
|
1270
|
+
# Local mode: append batch to local storage
|
|
1271
|
+
try:
|
|
1272
|
+
result = self._storage.append_batch_to_metric(
|
|
1273
|
+
owner=self.owner,
|
|
1274
|
+
project=self.project,
|
|
1275
|
+
prefix=self._folder_path,
|
|
1276
|
+
metric_name=name,
|
|
1277
|
+
data_points=data_points,
|
|
1278
|
+
description=description,
|
|
1279
|
+
tags=tags,
|
|
1280
|
+
metadata=metadata,
|
|
1147
1281
|
)
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1282
|
+
except Exception as e:
|
|
1283
|
+
import warnings
|
|
1284
|
+
metric_display = f"'{name}'" if name else "unnamed metric"
|
|
1285
|
+
warnings.warn(
|
|
1286
|
+
f"Failed to log batch to {metric_display} in local storage: {e}",
|
|
1287
|
+
RuntimeWarning,
|
|
1288
|
+
stacklevel=3
|
|
1289
|
+
)
|
|
1290
|
+
|
|
1291
|
+
return result
|
|
1292
|
+
|
|
1293
|
+
def _read_metric_data(
|
|
1294
|
+
self, name: str, start_index: int, limit: int
|
|
1295
|
+
) -> Dict[str, Any]:
|
|
1296
|
+
"""
|
|
1297
|
+
Internal method to read data points from a metric.
|
|
1298
|
+
|
|
1299
|
+
Args:
|
|
1300
|
+
name: Metric name
|
|
1301
|
+
start_index: Starting index
|
|
1302
|
+
limit: Max points to read
|
|
1303
|
+
|
|
1304
|
+
Returns:
|
|
1305
|
+
Dict with data, startIndex, endIndex, total, hasMore
|
|
1306
|
+
"""
|
|
1307
|
+
result = None
|
|
1308
|
+
|
|
1309
|
+
if self._client:
|
|
1310
|
+
# Remote mode: read via API
|
|
1311
|
+
result = self._client.read_metric_data(
|
|
1312
|
+
experiment_id=self._experiment_id,
|
|
1313
|
+
metric_name=name,
|
|
1314
|
+
start_index=start_index,
|
|
1315
|
+
limit=limit,
|
|
1316
|
+
)
|
|
1317
|
+
|
|
1318
|
+
if self._storage:
|
|
1319
|
+
# Local mode: read from local storage
|
|
1320
|
+
result = self._storage.read_metric_data(
|
|
1321
|
+
owner=self.owner,
|
|
1322
|
+
project=self.project,
|
|
1323
|
+
prefix=self._folder_path,
|
|
1324
|
+
metric_name=name,
|
|
1325
|
+
start_index=start_index,
|
|
1326
|
+
limit=limit,
|
|
1327
|
+
)
|
|
1328
|
+
|
|
1329
|
+
return result
|
|
1330
|
+
|
|
1331
|
+
def _get_metric_stats(self, name: str) -> Dict[str, Any]:
|
|
1332
|
+
"""
|
|
1333
|
+
Internal method to get metric statistics.
|
|
1334
|
+
|
|
1335
|
+
Args:
|
|
1336
|
+
name: Metric name
|
|
1337
|
+
|
|
1338
|
+
Returns:
|
|
1339
|
+
Dict with metric stats
|
|
1340
|
+
"""
|
|
1341
|
+
result = None
|
|
1342
|
+
|
|
1343
|
+
if self._client:
|
|
1344
|
+
# Remote mode: get stats via API
|
|
1345
|
+
result = self._client.get_metric_stats(
|
|
1346
|
+
experiment_id=self._experiment_id, metric_name=name
|
|
1347
|
+
)
|
|
1348
|
+
|
|
1349
|
+
if self._storage:
|
|
1350
|
+
# Local mode: get stats from local storage
|
|
1351
|
+
result = self._storage.get_metric_stats(
|
|
1352
|
+
owner=self.owner,
|
|
1353
|
+
project=self.project,
|
|
1354
|
+
prefix=self._folder_path,
|
|
1355
|
+
metric_name=name,
|
|
1356
|
+
)
|
|
1357
|
+
|
|
1358
|
+
return result
|
|
1359
|
+
|
|
1360
|
+
def _list_metrics(self) -> List[Dict[str, Any]]:
|
|
1361
|
+
"""
|
|
1362
|
+
Internal method to list all metrics in experiment.
|
|
1151
1363
|
|
|
1152
|
-
|
|
1153
|
-
|
|
1364
|
+
Returns:
|
|
1365
|
+
List of metric summaries
|
|
1154
1366
|
"""
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1367
|
+
result = None
|
|
1368
|
+
|
|
1369
|
+
if self._client:
|
|
1370
|
+
# Remote mode: list via API
|
|
1371
|
+
result = self._client.list_metrics(experiment_id=self._experiment_id)
|
|
1372
|
+
|
|
1373
|
+
if self._storage:
|
|
1374
|
+
# Local mode: list from local storage
|
|
1375
|
+
result = self._storage.list_metrics(
|
|
1376
|
+
owner=self.owner, project=self.project, prefix=self._folder_path
|
|
1377
|
+
)
|
|
1378
|
+
|
|
1379
|
+
return result or []
|
|
1380
|
+
|
|
1381
|
+
@property
|
|
1382
|
+
def id(self) -> Optional[str]:
|
|
1383
|
+
"""Get the experiment ID (only available after open in remote mode)."""
|
|
1384
|
+
return self._experiment_id
|
|
1385
|
+
|
|
1386
|
+
@property
|
|
1387
|
+
def data(self) -> Optional[Dict[str, Any]]:
|
|
1388
|
+
"""Get the full experiment data (only available after open in remote mode)."""
|
|
1389
|
+
return self._experiment_data
|
|
1390
|
+
|
|
1391
|
+
|
|
1392
|
+
def ml_dash_experiment(prefix: str, **kwargs) -> Callable:
|
|
1393
|
+
"""
|
|
1394
|
+
Decorator for wrapping functions with an ML-Dash experiment.
|
|
1395
|
+
|
|
1396
|
+
Args:
|
|
1397
|
+
prefix: Full experiment path like "owner/project/folder.../name"
|
|
1398
|
+
**kwargs: Additional arguments passed to Experiment constructor
|
|
1399
|
+
|
|
1400
|
+
Usage:
|
|
1401
|
+
@ml_dash_experiment(
|
|
1402
|
+
prefix="ge/my-project/experiments/my-experiment",
|
|
1403
|
+
dash_url="https://api.dash.ml"
|
|
1404
|
+
)
|
|
1405
|
+
def train_model():
|
|
1406
|
+
# Function code here
|
|
1407
|
+
pass
|
|
1408
|
+
|
|
1409
|
+
The decorated function will receive an 'experiment' keyword argument
|
|
1410
|
+
with the active Experiment instance.
|
|
1411
|
+
"""
|
|
1412
|
+
|
|
1413
|
+
def decorator(func: Callable) -> Callable:
|
|
1414
|
+
@functools.wraps(func)
|
|
1415
|
+
def wrapper(*args, **func_kwargs):
|
|
1416
|
+
with Experiment(prefix=prefix, **kwargs).run as experiment:
|
|
1417
|
+
# Inject experiment into function kwargs
|
|
1418
|
+
func_kwargs["experiment"] = experiment
|
|
1419
|
+
return func(*args, **func_kwargs)
|
|
1420
|
+
|
|
1421
|
+
return wrapper
|
|
1422
|
+
|
|
1423
|
+
return decorator
|