ml-dash 0.6.2rc1__py3-none-any.whl → 0.6.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ml_dash/__init__.py +36 -64
- ml_dash/auth/token_storage.py +267 -226
- ml_dash/auto_start.py +28 -15
- ml_dash/cli.py +16 -2
- ml_dash/cli_commands/api.py +165 -0
- ml_dash/cli_commands/download.py +757 -667
- ml_dash/cli_commands/list.py +146 -13
- ml_dash/cli_commands/login.py +190 -183
- ml_dash/cli_commands/profile.py +92 -0
- ml_dash/cli_commands/upload.py +1291 -1141
- ml_dash/client.py +79 -6
- ml_dash/config.py +119 -119
- ml_dash/experiment.py +1234 -1034
- ml_dash/files.py +339 -224
- ml_dash/log.py +7 -7
- ml_dash/metric.py +359 -100
- ml_dash/params.py +6 -6
- ml_dash/remote_auto_start.py +20 -17
- ml_dash/run.py +211 -65
- ml_dash/snowflake.py +173 -0
- ml_dash/storage.py +1051 -1081
- {ml_dash-0.6.2rc1.dist-info → ml_dash-0.6.3.dist-info}/METADATA +12 -14
- ml_dash-0.6.3.dist-info/RECORD +33 -0
- {ml_dash-0.6.2rc1.dist-info → ml_dash-0.6.3.dist-info}/WHEEL +1 -1
- ml_dash-0.6.2rc1.dist-info/RECORD +0 -30
- {ml_dash-0.6.2rc1.dist-info → ml_dash-0.6.3.dist-info}/entry_points.txt +0 -0
ml_dash/experiment.py
CHANGED
|
@@ -3,1161 +3,1361 @@ Experiment class for ML-Dash SDK.
|
|
|
3
3
|
|
|
4
4
|
Supports three usage styles:
|
|
5
5
|
1. Decorator: @ml_dash_experiment(...)
|
|
6
|
-
2. Context manager: with Experiment(...) as exp:
|
|
6
|
+
2. Context manager: with Experiment(...).run as exp:
|
|
7
7
|
3. Direct instantiation: exp = Experiment(...)
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
|
-
from typing import Optional, Dict, Any, List, Callable
|
|
11
|
-
from enum import Enum
|
|
12
10
|
import functools
|
|
13
|
-
from pathlib import Path
|
|
14
11
|
from datetime import datetime
|
|
12
|
+
from enum import Enum
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any, Callable, Dict, List, Optional, Union, Unpack
|
|
15
15
|
|
|
16
16
|
from .client import RemoteClient
|
|
17
|
-
from .
|
|
18
|
-
from .log import
|
|
17
|
+
from .files import BindrsBuilder, FilesAccessor
|
|
18
|
+
from .log import LogBuilder, LogLevel
|
|
19
19
|
from .params import ParametersBuilder
|
|
20
|
-
from .files import FilesAccessor, BindrsBuilder
|
|
21
20
|
from .run import RUN
|
|
21
|
+
from .storage import LocalStorage
|
|
22
22
|
|
|
23
23
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
REMOTE = "remote"
|
|
28
|
-
HYBRID = "hybrid" # Future: sync local to remote
|
|
24
|
+
def _expand_exp_template(template: str) -> str:
|
|
25
|
+
"""
|
|
26
|
+
Expand {EXP.attr} placeholders in template string.
|
|
29
27
|
|
|
28
|
+
Handles both regular attributes and property descriptors on the EXP class.
|
|
30
29
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
Lifecycle manager for experiments.
|
|
30
|
+
Args:
|
|
31
|
+
template: String containing {EXP.attr} placeholders
|
|
34
32
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
"""
|
|
33
|
+
Returns:
|
|
34
|
+
String with placeholders expanded to actual values
|
|
35
|
+
"""
|
|
36
|
+
import re
|
|
40
37
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
38
|
+
def replace_match(match):
|
|
39
|
+
attr_name = match.group(1)
|
|
40
|
+
# Get the attribute from the class __dict__, handling properties correctly
|
|
41
|
+
# EXP is a params_proto class where properties are stored in EXP.__dict__
|
|
42
|
+
attr = RUN.__dict__.get(attr_name)
|
|
43
|
+
if isinstance(attr, property):
|
|
44
|
+
# For properties, call the getter with EXP as self
|
|
45
|
+
return str(attr.fget(RUN))
|
|
46
|
+
else:
|
|
47
|
+
# For regular attributes, access via getattr
|
|
48
|
+
return str(getattr(RUN, attr_name))
|
|
44
49
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
self._experiment = experiment
|
|
50
|
+
# Match {EXP.attr_name} pattern
|
|
51
|
+
pattern = r"\{EXP\.(\w+)\}"
|
|
52
|
+
return re.sub(pattern, replace_match, template)
|
|
49
53
|
|
|
50
|
-
def start(self) -> "Experiment":
|
|
51
|
-
"""
|
|
52
|
-
Start the experiment (sets status to RUNNING).
|
|
53
54
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
"""
|
|
57
|
-
return self._experiment._open()
|
|
55
|
+
class OperationMode(Enum):
|
|
56
|
+
"""Operation mode for the experiment."""
|
|
58
57
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
58
|
+
LOCAL = "local"
|
|
59
|
+
REMOTE = "remote"
|
|
60
|
+
HYBRID = "hybrid" # Future: sync local to remote
|
|
62
61
|
|
|
63
|
-
def fail(self) -> None:
|
|
64
|
-
"""Mark experiment as failed (status: FAILED)."""
|
|
65
|
-
self._experiment._close(status="FAILED")
|
|
66
62
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
63
|
+
class RunManager:
|
|
64
|
+
"""
|
|
65
|
+
Lifecycle manager for experiments.
|
|
70
66
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
67
|
+
Supports three usage patterns:
|
|
68
|
+
1. Method calls: experiment.run.start(), experiment.run.complete()
|
|
69
|
+
2. Context manager: with Experiment(...).run as exp:
|
|
70
|
+
3. Decorator: @exp.run or @Experiment(...).run
|
|
71
|
+
"""
|
|
75
72
|
|
|
76
|
-
|
|
77
|
-
|
|
73
|
+
def __init__(self, experiment: "Experiment"):
|
|
74
|
+
"""
|
|
75
|
+
Initialize RunManager.
|
|
78
76
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
77
|
+
Args:
|
|
78
|
+
experiment: Parent Experiment instance
|
|
79
|
+
"""
|
|
80
|
+
self._experiment = experiment
|
|
83
81
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
Set the folder for this experiment before initialization.
|
|
82
|
+
def start(self) -> "Experiment":
|
|
83
|
+
"""
|
|
84
|
+
Start the experiment (sets status to RUNNING).
|
|
88
85
|
|
|
89
|
-
|
|
90
|
-
|
|
86
|
+
Returns:
|
|
87
|
+
The experiment instance for chaining
|
|
88
|
+
"""
|
|
89
|
+
return self._experiment._open()
|
|
91
90
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
91
|
+
def complete(self) -> None:
|
|
92
|
+
"""Mark experiment as completed (status: COMPLETED)."""
|
|
93
|
+
self._experiment._close(status="COMPLETED")
|
|
95
94
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
95
|
+
def fail(self) -> None:
|
|
96
|
+
"""Mark experiment as failed (status: FAILED)."""
|
|
97
|
+
self._experiment._close(status="FAILED")
|
|
99
98
|
|
|
100
|
-
|
|
101
|
-
|
|
99
|
+
def cancel(self) -> None:
|
|
100
|
+
"""Mark experiment as cancelled (status: CANCELLED)."""
|
|
101
|
+
self._experiment._close(status="CANCELLED")
|
|
102
102
|
|
|
103
|
-
|
|
104
|
-
|
|
103
|
+
@property
|
|
104
|
+
def prefix(self) -> Optional[str]:
|
|
105
|
+
"""
|
|
106
|
+
Get the current folder prefix for this experiment.
|
|
105
107
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
+
Returns:
|
|
109
|
+
Current folder prefix path or None
|
|
108
110
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
+
Example:
|
|
112
|
+
current_prefix = exp.run.prefix
|
|
113
|
+
"""
|
|
114
|
+
return self._experiment._folder_path
|
|
111
115
|
|
|
112
|
-
|
|
113
|
-
|
|
116
|
+
@prefix.setter
|
|
117
|
+
def prefix(self, value: Optional[str]) -> None:
|
|
118
|
+
"""
|
|
119
|
+
Set the folder prefix for this experiment before initialization.
|
|
114
120
|
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
dxp.params.set(lr=0.001)
|
|
118
|
-
"""
|
|
119
|
-
if self._experiment._is_open:
|
|
120
|
-
raise RuntimeError(
|
|
121
|
-
"Cannot change folder after experiment is initialized. "
|
|
122
|
-
"Set folder before calling start() or entering 'with' block."
|
|
123
|
-
)
|
|
121
|
+
This can ONLY be set before the experiment is started (initialized).
|
|
122
|
+
Once the experiment is opened, the prefix cannot be changed.
|
|
124
123
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
self._experiment._folder_template = value
|
|
129
|
-
else:
|
|
130
|
-
# Static folder - set directly
|
|
131
|
-
self._experiment.folder = value
|
|
124
|
+
Supports template variables:
|
|
125
|
+
- {EXP.name} - Experiment name
|
|
126
|
+
- {EXP.id} - Experiment ID
|
|
132
127
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
128
|
+
Args:
|
|
129
|
+
value: Folder prefix path with optional template variables
|
|
130
|
+
(e.g., "ge/myproject/{EXP.name}" or None)
|
|
136
131
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
if exc_type is not None:
|
|
140
|
-
self.fail()
|
|
141
|
-
else:
|
|
142
|
-
self.complete()
|
|
143
|
-
return False
|
|
132
|
+
Raises:
|
|
133
|
+
RuntimeError: If experiment is already initialized/open
|
|
144
134
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
Decorator support for wrapping functions with experiment lifecycle.
|
|
135
|
+
Examples:
|
|
136
|
+
from ml_dash import dxp
|
|
148
137
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
def train(exp):
|
|
152
|
-
exp.log("Training...")
|
|
153
|
-
"""
|
|
154
|
-
@functools.wraps(func)
|
|
155
|
-
def wrapper(*args, **kwargs):
|
|
156
|
-
with self as exp:
|
|
157
|
-
return func(exp, *args, **kwargs)
|
|
158
|
-
return wrapper
|
|
138
|
+
# Static folder
|
|
139
|
+
dxp.run.prefix = "ge/myproject/experiments/resnet"
|
|
159
140
|
|
|
141
|
+
# Template with experiment name
|
|
142
|
+
dxp.run.prefix = "ge/iclr_2024/{EXP.name}"
|
|
160
143
|
|
|
161
|
-
|
|
144
|
+
# Now start the experiment
|
|
145
|
+
with dxp.run:
|
|
146
|
+
dxp.params.set(lr=0.001)
|
|
147
|
+
"""
|
|
148
|
+
if self._experiment._is_open:
|
|
149
|
+
raise RuntimeError(
|
|
150
|
+
"Cannot change prefix after experiment is initialized. "
|
|
151
|
+
"Set prefix before calling start() or entering 'with' block."
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
if value:
|
|
155
|
+
# Sync EXP with this experiment's values
|
|
156
|
+
RUN.name = self._experiment.name
|
|
157
|
+
RUN.description = self._experiment.description
|
|
158
|
+
# Generate id/timestamp if not already set
|
|
159
|
+
if RUN.id is None:
|
|
160
|
+
RUN._init_run()
|
|
161
|
+
# Format with EXP - use helper to expand properties correctly
|
|
162
|
+
value = _expand_exp_template(value)
|
|
163
|
+
|
|
164
|
+
# Update the folder on the experiment
|
|
165
|
+
self._experiment._folder_path = value
|
|
166
|
+
|
|
167
|
+
def __enter__(self) -> "Experiment":
|
|
168
|
+
"""Context manager entry - starts the experiment."""
|
|
169
|
+
return self.start()
|
|
170
|
+
|
|
171
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
172
|
+
"""Context manager exit - completes or fails the experiment."""
|
|
173
|
+
if exc_type is not None:
|
|
174
|
+
self.fail()
|
|
175
|
+
else:
|
|
176
|
+
self.complete()
|
|
177
|
+
return False
|
|
178
|
+
|
|
179
|
+
def __call__(self, func: Callable) -> Callable:
|
|
162
180
|
"""
|
|
163
|
-
|
|
181
|
+
Decorator support for wrapping functions with experiment lifecycle.
|
|
164
182
|
|
|
165
|
-
Usage
|
|
183
|
+
Usage:
|
|
184
|
+
@exp.run
|
|
185
|
+
def train(exp):
|
|
186
|
+
exp.log("Training...")
|
|
187
|
+
"""
|
|
166
188
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
remote="https://api.dash.ml",
|
|
172
|
-
api_key="your-jwt-token"
|
|
173
|
-
)
|
|
189
|
+
@functools.wraps(func)
|
|
190
|
+
def wrapper(*args, **kwargs):
|
|
191
|
+
with self as exp:
|
|
192
|
+
return func(exp, *args, **kwargs)
|
|
174
193
|
|
|
175
|
-
|
|
176
|
-
experiment = Experiment(
|
|
177
|
-
name="my-experiment",
|
|
178
|
-
project="my-project",
|
|
179
|
-
local_path=".ml-dash"
|
|
180
|
-
)
|
|
194
|
+
return wrapper
|
|
181
195
|
|
|
182
|
-
# Context manager
|
|
183
|
-
with Experiment(...) as exp:
|
|
184
|
-
exp.log(...)
|
|
185
196
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
197
|
+
class Experiment:
|
|
198
|
+
"""
|
|
199
|
+
ML-Dash experiment for metricing experiments.
|
|
200
|
+
|
|
201
|
+
Prefix format: {owner}/{project}/path.../[name]
|
|
202
|
+
- owner: First segment (e.g., your username)
|
|
203
|
+
- project: Second segment (e.g., project name)
|
|
204
|
+
- path: Remaining segments form the folder structure
|
|
205
|
+
- name: Derived from last segment (may be a seed/id)
|
|
206
|
+
|
|
207
|
+
Usage examples:
|
|
208
|
+
|
|
209
|
+
# Local mode (default)
|
|
210
|
+
experiment = Experiment(prefix="ge/my-project/experiments/exp1")
|
|
211
|
+
|
|
212
|
+
# Custom local storage directory
|
|
213
|
+
experiment = Experiment(
|
|
214
|
+
prefix="ge/my-project/experiments/exp1",
|
|
215
|
+
dash_root=".dash"
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
# Remote mode with custom server
|
|
219
|
+
experiment = Experiment(
|
|
220
|
+
prefix="ge/my-project/experiments/exp1",
|
|
221
|
+
dash_url="https://custom-server.com"
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
# Context manager
|
|
225
|
+
with Experiment(prefix="ge/my-project/exp1").run as exp:
|
|
226
|
+
exp.logs.info("Training started")
|
|
227
|
+
|
|
228
|
+
# Decorator
|
|
229
|
+
@ml_dash_experiment(prefix="ge/ws/experiments/exp", dash_url="https://api.dash.ml")
|
|
230
|
+
def train():
|
|
231
|
+
...
|
|
232
|
+
"""
|
|
233
|
+
|
|
234
|
+
def __init__(
|
|
235
|
+
self,
|
|
236
|
+
prefix: Optional[str] = None,
|
|
237
|
+
*,
|
|
238
|
+
readme: Optional[str] = None,
|
|
239
|
+
# Ge: this is an instance only property
|
|
240
|
+
tags: Optional[List[str]] = None,
|
|
241
|
+
# Ge: Bindrs is an instance-only property, it is not set inside the RUN namespace.
|
|
242
|
+
bindrs: Optional[List[str]] = None,
|
|
243
|
+
# Ge: This is also instance-only
|
|
244
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
245
|
+
# Mode configuration
|
|
246
|
+
dash_url: Optional[Union[str, bool]] = None,
|
|
247
|
+
dash_root: Optional[str] = ".dash",
|
|
248
|
+
# Deprecated parameters (for backward compatibility)
|
|
249
|
+
remote: Optional[Union[str, bool]] = None,
|
|
250
|
+
local_path: Optional[str] = None,
|
|
251
|
+
# Internal parameters
|
|
252
|
+
_write_protected: bool = False,
|
|
253
|
+
# The rest of the params go directly to populate the RUN object.
|
|
254
|
+
**run_params: Unpack[RUN],
|
|
255
|
+
):
|
|
190
256
|
"""
|
|
257
|
+
Initialize an ML-Dash experiment.
|
|
258
|
+
|
|
259
|
+
Args:
|
|
260
|
+
prefix: Full experiment path like "owner/project/folder.../name" (defaults to DASH_PREFIX env var).
|
|
261
|
+
Format: {owner}/{project}/path.../[name]
|
|
262
|
+
- owner: First segment (e.g., username)
|
|
263
|
+
- project: Second segment (e.g., project name)
|
|
264
|
+
- path: Remaining segments form the folder path
|
|
265
|
+
- name: Derived from last segment (may be a seed/id, not always meaningful)
|
|
266
|
+
readme: Optional experiment readme/description
|
|
267
|
+
tags: Optional list of tags
|
|
268
|
+
bindrs: Optional list of bindrs
|
|
269
|
+
metadata: Optional metadata dict
|
|
270
|
+
dash_url: Remote API URL. True=use EXP.API_URL, str=custom URL, None=no remote. Token auto-loaded from ~/.dash/token.enc
|
|
271
|
+
dash_root: Local storage root path (defaults to ".dash"). Set to None for remote-only mode.
|
|
272
|
+
remote: (Deprecated) Use dash_url instead
|
|
273
|
+
local_path: (Deprecated) Use dash_root instead
|
|
274
|
+
_write_protected: Internal parameter - if True, experiment becomes immutable after creation
|
|
275
|
+
|
|
276
|
+
Mode Selection:
|
|
277
|
+
- Default (no dash_url): Local-only mode (writes to ".dash/")
|
|
278
|
+
- dash_url + dash_root: Hybrid mode (local + remote)
|
|
279
|
+
- dash_url + dash_root=None: Remote-only mode
|
|
280
|
+
"""
|
|
281
|
+
import os
|
|
282
|
+
import warnings
|
|
283
|
+
|
|
284
|
+
# Handle backward compatibility
|
|
285
|
+
if remote is not None:
|
|
286
|
+
warnings.warn(
|
|
287
|
+
"Parameter 'remote' is deprecated. Use 'dash_url' instead.",
|
|
288
|
+
DeprecationWarning,
|
|
289
|
+
stacklevel=2
|
|
290
|
+
)
|
|
291
|
+
if dash_url is None:
|
|
292
|
+
dash_url = remote
|
|
293
|
+
|
|
294
|
+
if local_path is not None:
|
|
295
|
+
warnings.warn(
|
|
296
|
+
"Parameter 'local_path' is deprecated. Use 'dash_root' instead.",
|
|
297
|
+
DeprecationWarning,
|
|
298
|
+
stacklevel=2
|
|
299
|
+
)
|
|
300
|
+
if dash_root == ".dash": # Only override if dash_root is default
|
|
301
|
+
dash_root = local_path
|
|
302
|
+
|
|
303
|
+
# Resolve prefix from environment variable if not provided
|
|
304
|
+
self._folder_path = prefix or os.getenv("DASH_PREFIX")
|
|
305
|
+
|
|
306
|
+
if not self._folder_path:
|
|
307
|
+
raise ValueError("prefix (or DASH_PREFIX env var) must be provided")
|
|
308
|
+
|
|
309
|
+
# Parse prefix: {owner}/{project}/path.../[name]
|
|
310
|
+
parts = self._folder_path.strip("/").split("/")
|
|
311
|
+
if len(parts) < 2:
|
|
312
|
+
raise ValueError(
|
|
313
|
+
f"prefix must have at least owner/project: got '{self._folder_path}'"
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
self.owner = parts[0]
|
|
317
|
+
self.project = parts[1]
|
|
318
|
+
# Name is the last segment (may be a seed/id, not always a meaningful name)
|
|
319
|
+
self.name = parts[-1] if len(parts) > 2 else parts[1]
|
|
320
|
+
|
|
321
|
+
self.readme = readme
|
|
322
|
+
self.tags = tags
|
|
323
|
+
self._bindrs_list = bindrs
|
|
324
|
+
self._write_protected = _write_protected
|
|
325
|
+
self.metadata = metadata
|
|
326
|
+
|
|
327
|
+
# Initialize RUN with experiment values
|
|
328
|
+
RUN.name = self.name
|
|
329
|
+
if readme:
|
|
330
|
+
RUN.readme = readme
|
|
331
|
+
|
|
332
|
+
# Determine operation mode
|
|
333
|
+
# dash_root defaults to ".dash", dash_url defaults to None
|
|
334
|
+
if dash_url and dash_root:
|
|
335
|
+
self.mode = OperationMode.HYBRID
|
|
336
|
+
elif dash_url:
|
|
337
|
+
self.mode = OperationMode.REMOTE
|
|
338
|
+
else:
|
|
339
|
+
self.mode = OperationMode.LOCAL
|
|
340
|
+
|
|
341
|
+
# Initialize backend
|
|
342
|
+
self._client: Optional[RemoteClient] = None
|
|
343
|
+
self._storage: Optional[LocalStorage] = None
|
|
344
|
+
self._experiment_id: Optional[str] = None
|
|
345
|
+
self._experiment_data: Optional[Dict[str, Any]] = None
|
|
346
|
+
self._is_open = False
|
|
347
|
+
self._metrics_manager: Optional["MetricsManager"] = None # Cached metrics manager
|
|
348
|
+
|
|
349
|
+
if self.mode in (OperationMode.REMOTE, OperationMode.HYBRID):
|
|
350
|
+
# RemoteClient will auto-load token from ~/.dash/token.enc
|
|
351
|
+
# Use RUN.api_url if dash_url=True (boolean), otherwise use the provided URL
|
|
352
|
+
api_url = RUN.api_url if dash_url is True else dash_url
|
|
353
|
+
self._client = RemoteClient(base_url=api_url)
|
|
354
|
+
|
|
355
|
+
if self.mode in (OperationMode.LOCAL, OperationMode.HYBRID):
|
|
356
|
+
self._storage = LocalStorage(root_path=Path(dash_root))
|
|
357
|
+
|
|
358
|
+
def _open(self) -> "Experiment":
|
|
359
|
+
"""
|
|
360
|
+
Internal method to open the experiment (create or update on server/filesystem).
|
|
191
361
|
|
|
192
|
-
|
|
193
|
-
self
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
self._experiment_id: Optional[str] = None
|
|
250
|
-
self._experiment_data: Optional[Dict[str, Any]] = None
|
|
251
|
-
self._is_open = False
|
|
252
|
-
self._metrics_manager: Optional['MetricsManager'] = None # Cached metrics manager
|
|
253
|
-
self._folder_template: Optional[str] = None # Template for folder path
|
|
254
|
-
|
|
255
|
-
if self.mode in (OperationMode.REMOTE, OperationMode.HYBRID):
|
|
256
|
-
# api_key can be None - RemoteClient will auto-load from storage
|
|
257
|
-
self._client = RemoteClient(base_url=remote, api_key=api_key)
|
|
258
|
-
|
|
259
|
-
if self.mode in (OperationMode.LOCAL, OperationMode.HYBRID):
|
|
260
|
-
if not local_path:
|
|
261
|
-
raise ValueError("local_path is required for local mode")
|
|
262
|
-
self._storage = LocalStorage(root_path=Path(local_path))
|
|
263
|
-
|
|
264
|
-
def _open(self) -> "Experiment":
|
|
265
|
-
"""
|
|
266
|
-
Internal method to open the experiment (create or update on server/filesystem).
|
|
267
|
-
|
|
268
|
-
Returns:
|
|
269
|
-
self for chaining
|
|
270
|
-
"""
|
|
271
|
-
if self._is_open:
|
|
272
|
-
return self
|
|
273
|
-
|
|
274
|
-
# Initialize RUN with experiment values
|
|
275
|
-
RUN.name = self.name
|
|
276
|
-
RUN.project = self.project
|
|
277
|
-
RUN.description = self.description
|
|
278
|
-
RUN._init_run() # Generate id and timestamp
|
|
279
|
-
|
|
280
|
-
# Format folder template if present
|
|
281
|
-
if self._folder_template:
|
|
282
|
-
self.folder = RUN._format(self._folder_template)
|
|
283
|
-
|
|
284
|
-
if self._client:
|
|
285
|
-
# Remote mode: create/update experiment via API
|
|
286
|
-
response = self._client.create_or_update_experiment(
|
|
287
|
-
project=self.project,
|
|
288
|
-
name=self.name,
|
|
289
|
-
description=self.description,
|
|
290
|
-
tags=self.tags,
|
|
291
|
-
bindrs=self._bindrs_list,
|
|
292
|
-
folder=self.folder,
|
|
293
|
-
write_protected=self._write_protected,
|
|
294
|
-
metadata=self.metadata,
|
|
295
|
-
)
|
|
296
|
-
self._experiment_data = response
|
|
297
|
-
self._experiment_id = response["experiment"]["id"]
|
|
298
|
-
|
|
299
|
-
if self._storage:
|
|
300
|
-
# Local mode: create experiment directory structure
|
|
301
|
-
self._storage.create_experiment(
|
|
302
|
-
project=self.project,
|
|
303
|
-
name=self.name,
|
|
304
|
-
description=self.description,
|
|
305
|
-
tags=self.tags,
|
|
306
|
-
bindrs=self._bindrs_list,
|
|
307
|
-
folder=self.folder,
|
|
308
|
-
metadata=self.metadata,
|
|
362
|
+
Returns:
|
|
363
|
+
self for chaining
|
|
364
|
+
"""
|
|
365
|
+
if self._is_open:
|
|
366
|
+
return self
|
|
367
|
+
|
|
368
|
+
if self._client:
|
|
369
|
+
# Remote mode: create/update experiment via API
|
|
370
|
+
try:
|
|
371
|
+
response = self._client.create_or_update_experiment(
|
|
372
|
+
project=self.project,
|
|
373
|
+
name=self.name,
|
|
374
|
+
description=self.readme,
|
|
375
|
+
tags=self.tags,
|
|
376
|
+
bindrs=self._bindrs_list,
|
|
377
|
+
prefix=self._folder_path,
|
|
378
|
+
write_protected=self._write_protected,
|
|
379
|
+
metadata=self.metadata,
|
|
380
|
+
)
|
|
381
|
+
self._experiment_data = response
|
|
382
|
+
self._experiment_id = response["experiment"]["id"]
|
|
383
|
+
|
|
384
|
+
# Display message about viewing data online
|
|
385
|
+
try:
|
|
386
|
+
from rich.console import Console
|
|
387
|
+
|
|
388
|
+
console = Console()
|
|
389
|
+
console.print(
|
|
390
|
+
f"[dim]✓ Experiment started: [bold]{self.name}[/bold] (project: {self.project})[/dim]\n"
|
|
391
|
+
f"[dim]View your data, statistics, and plots online at:[/dim] "
|
|
392
|
+
f"[link=https://dash.ml]https://dash.ml[/link]"
|
|
393
|
+
)
|
|
394
|
+
except ImportError:
|
|
395
|
+
# Fallback if rich is not available
|
|
396
|
+
print(f"✓ Experiment started: {self.name} (project: {self.project})")
|
|
397
|
+
print("View your data at: https://dash.ml")
|
|
398
|
+
|
|
399
|
+
except Exception as e:
|
|
400
|
+
# Check if it's an authentication error
|
|
401
|
+
from .auth.exceptions import AuthenticationError
|
|
402
|
+
|
|
403
|
+
if isinstance(e, AuthenticationError):
|
|
404
|
+
try:
|
|
405
|
+
from rich.console import Console
|
|
406
|
+
from rich.panel import Panel
|
|
407
|
+
|
|
408
|
+
console = Console()
|
|
409
|
+
|
|
410
|
+
message = (
|
|
411
|
+
"[bold red]Authentication Required[/bold red]\n\n"
|
|
412
|
+
"You need to authenticate before using remote experiments.\n\n"
|
|
413
|
+
"[bold]To authenticate:[/bold]\n"
|
|
414
|
+
" [cyan]ml-dash login[/cyan]\n\n"
|
|
415
|
+
"[dim]This will open your browser for secure OAuth2 authentication.\n"
|
|
416
|
+
"Your token will be stored securely in your system keychain.[/dim]\n\n"
|
|
417
|
+
"[bold]Alternative:[/bold]\n"
|
|
418
|
+
" Use [cyan]local_path[/cyan] instead of [cyan]remote[/cyan] for offline experiments"
|
|
309
419
|
)
|
|
310
420
|
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
Internal method to close the experiment and update status.
|
|
317
|
-
|
|
318
|
-
Args:
|
|
319
|
-
status: Status to set - "COMPLETED" (default), "FAILED", or "CANCELLED"
|
|
320
|
-
"""
|
|
321
|
-
if not self._is_open:
|
|
322
|
-
return
|
|
323
|
-
|
|
324
|
-
# Flush any pending writes
|
|
325
|
-
if self._storage:
|
|
326
|
-
self._storage.flush()
|
|
327
|
-
|
|
328
|
-
# Update experiment status in remote mode
|
|
329
|
-
if self._client and self._experiment_id:
|
|
330
|
-
try:
|
|
331
|
-
self._client.update_experiment_status(
|
|
332
|
-
experiment_id=self._experiment_id,
|
|
333
|
-
status=status
|
|
334
|
-
)
|
|
335
|
-
except Exception as e:
|
|
336
|
-
# Log error but don't fail the close operation
|
|
337
|
-
print(f"Warning: Failed to update experiment status: {e}")
|
|
338
|
-
|
|
339
|
-
self._is_open = False
|
|
340
|
-
|
|
341
|
-
# Reset RUN for next experiment
|
|
342
|
-
RUN._reset()
|
|
343
|
-
|
|
344
|
-
@property
|
|
345
|
-
def run(self) -> RunManager:
|
|
346
|
-
"""
|
|
347
|
-
Get the RunManager for lifecycle operations.
|
|
348
|
-
|
|
349
|
-
Usage:
|
|
350
|
-
# Method calls
|
|
351
|
-
experiment.run.start()
|
|
352
|
-
experiment.run.complete()
|
|
353
|
-
|
|
354
|
-
# Context manager
|
|
355
|
-
with Experiment(...).run as exp:
|
|
356
|
-
exp.log("Training...")
|
|
357
|
-
|
|
358
|
-
# Decorator
|
|
359
|
-
@experiment.run
|
|
360
|
-
def train(exp):
|
|
361
|
-
exp.log("Training...")
|
|
362
|
-
|
|
363
|
-
Returns:
|
|
364
|
-
RunManager instance
|
|
365
|
-
"""
|
|
366
|
-
return RunManager(self)
|
|
367
|
-
|
|
368
|
-
@property
|
|
369
|
-
def params(self) -> ParametersBuilder:
|
|
370
|
-
"""
|
|
371
|
-
Get a ParametersBuilder for parameter operations.
|
|
372
|
-
|
|
373
|
-
Usage:
|
|
374
|
-
# Set parameters
|
|
375
|
-
experiment.params.set(lr=0.001, batch_size=32)
|
|
376
|
-
|
|
377
|
-
# Get parameters
|
|
378
|
-
params = experiment.params.get()
|
|
379
|
-
|
|
380
|
-
Returns:
|
|
381
|
-
ParametersBuilder instance
|
|
382
|
-
|
|
383
|
-
Raises:
|
|
384
|
-
RuntimeError: If experiment is not open
|
|
385
|
-
"""
|
|
386
|
-
if not self._is_open:
|
|
387
|
-
raise RuntimeError(
|
|
388
|
-
"Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
|
|
389
|
-
"Example:\n"
|
|
390
|
-
" with dxp.run:\n"
|
|
391
|
-
" dxp.params.set(lr=0.001)"
|
|
421
|
+
panel = Panel(
|
|
422
|
+
message,
|
|
423
|
+
title="[bold yellow]⚠ Not Authenticated[/bold yellow]",
|
|
424
|
+
border_style="yellow",
|
|
425
|
+
expand=False,
|
|
392
426
|
)
|
|
427
|
+
console.print("\n")
|
|
428
|
+
console.print(panel)
|
|
429
|
+
console.print("\n")
|
|
430
|
+
except ImportError:
|
|
431
|
+
# Fallback if rich is not available
|
|
432
|
+
print("\n" + "=" * 60)
|
|
433
|
+
print("⚠ Authentication Required")
|
|
434
|
+
print("=" * 60)
|
|
435
|
+
print("\nYou need to authenticate before using remote experiments.\n")
|
|
436
|
+
print("To authenticate:")
|
|
437
|
+
print(" ml-dash login\n")
|
|
438
|
+
print("Alternative:")
|
|
439
|
+
print(" Use local_path instead of remote for offline experiments\n")
|
|
440
|
+
print("=" * 60 + "\n")
|
|
441
|
+
|
|
442
|
+
import sys
|
|
443
|
+
|
|
444
|
+
sys.exit(1)
|
|
445
|
+
else:
|
|
446
|
+
# Re-raise other exceptions
|
|
447
|
+
raise
|
|
448
|
+
|
|
449
|
+
if self._storage:
|
|
450
|
+
# Local mode: create experiment directory structure
|
|
451
|
+
self._storage.create_experiment(
|
|
452
|
+
owner=self.owner,
|
|
453
|
+
project=self.project,
|
|
454
|
+
prefix=self._folder_path,
|
|
455
|
+
description=self.readme,
|
|
456
|
+
tags=self.tags,
|
|
457
|
+
bindrs=self._bindrs_list,
|
|
458
|
+
metadata=self.metadata,
|
|
459
|
+
)
|
|
460
|
+
|
|
461
|
+
self._is_open = True
|
|
462
|
+
return self
|
|
463
|
+
|
|
464
|
+
def _close(self, status: str = "COMPLETED"):
|
|
465
|
+
"""
|
|
466
|
+
Internal method to close the experiment and update status.
|
|
393
467
|
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
Examples:
|
|
412
|
-
experiment.log(metadata={"epoch": 1}).info("Training started")
|
|
413
|
-
experiment.log().error("Failed", error_code=500)
|
|
414
|
-
|
|
415
|
-
2. Traditional style (message provided):
|
|
416
|
-
Writes the log immediately and returns None.
|
|
417
|
-
|
|
418
|
-
Examples:
|
|
419
|
-
experiment.log("Training started", level="info", epoch=1)
|
|
420
|
-
experiment.log("Training started") # Defaults to "info"
|
|
421
|
-
|
|
422
|
-
Args:
|
|
423
|
-
message: Optional log message (for traditional style)
|
|
424
|
-
level: Optional log level (for traditional style, defaults to "info")
|
|
425
|
-
metadata: Optional metadata dict
|
|
426
|
-
**extra_metadata: Additional metadata as keyword arguments
|
|
427
|
-
|
|
428
|
-
Returns:
|
|
429
|
-
LogBuilder if no message provided (fluent mode)
|
|
430
|
-
None if log was written directly (traditional mode)
|
|
431
|
-
|
|
432
|
-
Raises:
|
|
433
|
-
RuntimeError: If experiment is not open
|
|
434
|
-
ValueError: If log level is invalid
|
|
435
|
-
"""
|
|
436
|
-
if not self._is_open:
|
|
437
|
-
raise RuntimeError(
|
|
438
|
-
"Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
|
|
439
|
-
"Example:\n"
|
|
440
|
-
" with dxp.run:\n"
|
|
441
|
-
" dxp.log().info('Training started')"
|
|
442
|
-
)
|
|
468
|
+
Args:
|
|
469
|
+
status: Status to set - "COMPLETED" (default), "FAILED", or "CANCELLED"
|
|
470
|
+
"""
|
|
471
|
+
if not self._is_open:
|
|
472
|
+
return
|
|
473
|
+
|
|
474
|
+
# Flush any pending writes
|
|
475
|
+
if self._storage:
|
|
476
|
+
self._storage.flush()
|
|
477
|
+
|
|
478
|
+
# Update experiment status in remote mode
|
|
479
|
+
if self._client and self._experiment_id:
|
|
480
|
+
try:
|
|
481
|
+
self._client.update_experiment_status(
|
|
482
|
+
experiment_id=self._experiment_id, status=status
|
|
483
|
+
)
|
|
443
484
|
|
|
444
|
-
#
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
return LogBuilder(self, combined_metadata if combined_metadata else None)
|
|
448
|
-
|
|
449
|
-
# Traditional mode: write immediately
|
|
450
|
-
level = level or LogLevel.INFO.value # Default to "info"
|
|
451
|
-
level = LogLevel.validate(level) # Validate level
|
|
452
|
-
|
|
453
|
-
combined_metadata = {**(metadata or {}), **extra_metadata}
|
|
454
|
-
self._write_log(
|
|
455
|
-
message=message,
|
|
456
|
-
level=level,
|
|
457
|
-
metadata=combined_metadata if combined_metadata else None,
|
|
458
|
-
timestamp=None
|
|
485
|
+
# Display completion message with link to view results
|
|
486
|
+
status_emoji = {"COMPLETED": "✓", "FAILED": "✗", "CANCELLED": "⊘"}.get(
|
|
487
|
+
status, "•"
|
|
459
488
|
)
|
|
460
|
-
return None
|
|
461
|
-
|
|
462
|
-
def _write_log(
|
|
463
|
-
self,
|
|
464
|
-
message: str,
|
|
465
|
-
level: str,
|
|
466
|
-
metadata: Optional[Dict[str, Any]],
|
|
467
|
-
timestamp: Optional[datetime]
|
|
468
|
-
) -> None:
|
|
469
|
-
"""
|
|
470
|
-
Internal method to write a log entry immediately.
|
|
471
|
-
No buffering - writes directly to storage/remote AND stdout/stderr.
|
|
472
|
-
|
|
473
|
-
Args:
|
|
474
|
-
message: Log message
|
|
475
|
-
level: Log level (already validated)
|
|
476
|
-
metadata: Optional metadata dict
|
|
477
|
-
timestamp: Optional custom timestamp (defaults to now)
|
|
478
|
-
"""
|
|
479
|
-
log_entry = {
|
|
480
|
-
"timestamp": (timestamp or datetime.utcnow()).isoformat() + "Z",
|
|
481
|
-
"level": level,
|
|
482
|
-
"message": message,
|
|
483
|
-
}
|
|
484
|
-
|
|
485
|
-
if metadata:
|
|
486
|
-
log_entry["metadata"] = metadata
|
|
487
|
-
|
|
488
|
-
# Mirror to stdout/stderr before writing to storage
|
|
489
|
-
self._print_log(message, level, metadata)
|
|
490
|
-
|
|
491
|
-
# Write immediately (no buffering)
|
|
492
|
-
if self._client:
|
|
493
|
-
# Remote mode: send to API (wrapped in array for batch API)
|
|
494
|
-
self._client.create_log_entries(
|
|
495
|
-
experiment_id=self._experiment_id,
|
|
496
|
-
logs=[log_entry] # Single log in array
|
|
497
|
-
)
|
|
498
489
|
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
490
|
+
status_color = {
|
|
491
|
+
"COMPLETED": "green",
|
|
492
|
+
"FAILED": "red",
|
|
493
|
+
"CANCELLED": "yellow",
|
|
494
|
+
}.get(status, "white")
|
|
495
|
+
|
|
496
|
+
try:
|
|
497
|
+
from rich.console import Console
|
|
498
|
+
|
|
499
|
+
console = Console()
|
|
500
|
+
console.print(
|
|
501
|
+
f"[{status_color}]{status_emoji} Experiment {status.lower()}: "
|
|
502
|
+
f"[bold]{self.name}[/bold] (project: {self.project})[/{status_color}]\n"
|
|
503
|
+
f"[dim]View results, statistics, and plots online at:[/dim] "
|
|
504
|
+
f"[link=https://dash.ml]https://dash.ml[/link]"
|
|
505
|
+
)
|
|
506
|
+
except ImportError:
|
|
507
|
+
# Fallback if rich is not available
|
|
508
|
+
print(
|
|
509
|
+
f"{status_emoji} Experiment {status.lower()}: {self.name} (project: {self.project})"
|
|
510
|
+
)
|
|
511
|
+
print("View results at: https://dash.ml")
|
|
512
|
+
|
|
513
|
+
except Exception as e:
|
|
514
|
+
# Log error but don't fail the close operation
|
|
515
|
+
print(f"Warning: Failed to update experiment status: {e}")
|
|
516
|
+
|
|
517
|
+
self._is_open = False
|
|
518
|
+
|
|
519
|
+
# Reset RUN for next experiment
|
|
520
|
+
# TODO: RUN._reset() - method doesn't exist
|
|
521
|
+
# RUN._reset()
|
|
522
|
+
|
|
523
|
+
@property
|
|
524
|
+
def run(self) -> RunManager:
|
|
525
|
+
"""
|
|
526
|
+
Get the RunManager for lifecycle operations.
|
|
510
527
|
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
metadata: Optional[Dict[str, Any]]
|
|
516
|
-
) -> None:
|
|
517
|
-
"""
|
|
518
|
-
Print log to stdout or stderr based on level.
|
|
519
|
-
|
|
520
|
-
ERROR and FATAL go to stderr, all others go to stdout.
|
|
521
|
-
|
|
522
|
-
Args:
|
|
523
|
-
message: Log message
|
|
524
|
-
level: Log level
|
|
525
|
-
metadata: Optional metadata dict
|
|
526
|
-
"""
|
|
527
|
-
import sys
|
|
528
|
-
|
|
529
|
-
# Format the log message
|
|
530
|
-
level_upper = level.upper()
|
|
531
|
-
|
|
532
|
-
# Build metadata string if present
|
|
533
|
-
metadata_str = ""
|
|
534
|
-
if metadata:
|
|
535
|
-
# Format metadata as key=value pairs
|
|
536
|
-
pairs = [f"{k}={v}" for k, v in metadata.items()]
|
|
537
|
-
metadata_str = f" [{', '.join(pairs)}]"
|
|
538
|
-
|
|
539
|
-
# Format: [LEVEL] message [key=value, ...]
|
|
540
|
-
formatted_message = f"[{level_upper}] {message}{metadata_str}"
|
|
541
|
-
|
|
542
|
-
# Route to stdout or stderr based on level
|
|
543
|
-
if level in ("error", "fatal"):
|
|
544
|
-
print(formatted_message, file=sys.stderr)
|
|
545
|
-
else:
|
|
546
|
-
print(formatted_message, file=sys.stdout)
|
|
547
|
-
|
|
548
|
-
@property
|
|
549
|
-
def files(self) -> FilesAccessor:
|
|
550
|
-
"""
|
|
551
|
-
Get a FilesAccessor for fluent file operations.
|
|
552
|
-
|
|
553
|
-
Returns:
|
|
554
|
-
FilesAccessor instance for chaining
|
|
555
|
-
|
|
556
|
-
Raises:
|
|
557
|
-
RuntimeError: If experiment is not open
|
|
558
|
-
|
|
559
|
-
Examples:
|
|
560
|
-
# Upload file
|
|
561
|
-
experiment.files("checkpoints").save(net, to="checkpoint.pt")
|
|
562
|
-
|
|
563
|
-
# List files
|
|
564
|
-
files = experiment.files("/some/location").list()
|
|
565
|
-
files = experiment.files("/models").list()
|
|
566
|
-
|
|
567
|
-
# Download file
|
|
568
|
-
experiment.files("some.text").download()
|
|
569
|
-
experiment.files("some.text").download(to="./model.pt")
|
|
570
|
-
|
|
571
|
-
# Download Files via Glob Pattern
|
|
572
|
-
file_paths = experiment.files("images").list("*.png")
|
|
573
|
-
experiment.files("images").download("*.png")
|
|
574
|
-
|
|
575
|
-
# This is equivalent to downloading to a directory
|
|
576
|
-
experiment.files.download("images/*.png", to="local_images")
|
|
577
|
-
|
|
578
|
-
# Delete files
|
|
579
|
-
experiment.files("some.text").delete()
|
|
580
|
-
experiment.files.delete("some.text")
|
|
581
|
-
|
|
582
|
-
# Specific File Types
|
|
583
|
-
dxp.files.save_text("content", to="view.yaml")
|
|
584
|
-
dxp.files.save_json(dict(hey="yo"), to="config.json")
|
|
585
|
-
dxp.files.save_blob(b"xxx", to="data.bin")
|
|
586
|
-
"""
|
|
587
|
-
if not self._is_open:
|
|
588
|
-
raise RuntimeError(
|
|
589
|
-
"Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
|
|
590
|
-
"Example:\n"
|
|
591
|
-
" with dxp.run:\n"
|
|
592
|
-
" dxp.files('path').save()"
|
|
593
|
-
)
|
|
528
|
+
Usage:
|
|
529
|
+
# Method calls
|
|
530
|
+
experiment.run.start()
|
|
531
|
+
experiment.run.complete()
|
|
594
532
|
|
|
595
|
-
|
|
533
|
+
# Context manager
|
|
534
|
+
with Experiment(...).run as exp:
|
|
535
|
+
exp.log("Training...")
|
|
596
536
|
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
537
|
+
# Decorator
|
|
538
|
+
@experiment.run
|
|
539
|
+
def train(exp):
|
|
540
|
+
exp.log("Training...")
|
|
600
541
|
|
|
601
|
-
|
|
542
|
+
Returns:
|
|
543
|
+
RunManager instance
|
|
544
|
+
"""
|
|
545
|
+
return RunManager(self)
|
|
602
546
|
|
|
603
|
-
|
|
604
|
-
|
|
547
|
+
@property
|
|
548
|
+
def params(self) -> ParametersBuilder:
|
|
549
|
+
"""
|
|
550
|
+
Get a ParametersBuilder for parameter operations.
|
|
605
551
|
|
|
606
|
-
|
|
607
|
-
|
|
552
|
+
Usage:
|
|
553
|
+
# Set parameters
|
|
554
|
+
experiment.params.set(lr=0.001, batch_size=32)
|
|
608
555
|
|
|
609
|
-
|
|
610
|
-
|
|
556
|
+
# Get parameters
|
|
557
|
+
params = experiment.params.get()
|
|
611
558
|
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
file_paths = experiment.bindrs("some-bindr").list()
|
|
559
|
+
Returns:
|
|
560
|
+
ParametersBuilder instance
|
|
615
561
|
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
562
|
+
Raises:
|
|
563
|
+
RuntimeError: If experiment is not open
|
|
564
|
+
"""
|
|
565
|
+
if not self._is_open:
|
|
566
|
+
raise RuntimeError(
|
|
567
|
+
"Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
|
|
568
|
+
"Example:\n"
|
|
569
|
+
" with dxp.run:\n"
|
|
570
|
+
" dxp.params.set(lr=0.001)"
|
|
571
|
+
)
|
|
572
|
+
|
|
573
|
+
return ParametersBuilder(self)
|
|
574
|
+
|
|
575
|
+
@property
|
|
576
|
+
def logs(self) -> LogBuilder:
|
|
577
|
+
"""
|
|
578
|
+
Get a LogBuilder for fluent-style logging.
|
|
626
579
|
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
def _upload_file(
|
|
630
|
-
self,
|
|
631
|
-
file_path: str,
|
|
632
|
-
prefix: str,
|
|
633
|
-
filename: str,
|
|
634
|
-
description: Optional[str],
|
|
635
|
-
tags: Optional[List[str]],
|
|
636
|
-
metadata: Optional[Dict[str, Any]],
|
|
637
|
-
checksum: str,
|
|
638
|
-
content_type: str,
|
|
639
|
-
size_bytes: int
|
|
640
|
-
) -> Dict[str, Any]:
|
|
641
|
-
"""
|
|
642
|
-
Internal method to upload a file.
|
|
643
|
-
|
|
644
|
-
Args:
|
|
645
|
-
file_path: Local file path
|
|
646
|
-
prefix: Logical path prefix
|
|
647
|
-
filename: Original filename
|
|
648
|
-
description: Optional description
|
|
649
|
-
tags: Optional tags
|
|
650
|
-
metadata: Optional metadata
|
|
651
|
-
checksum: SHA256 checksum
|
|
652
|
-
content_type: MIME type
|
|
653
|
-
size_bytes: File size in bytes
|
|
654
|
-
|
|
655
|
-
Returns:
|
|
656
|
-
File metadata dict
|
|
657
|
-
"""
|
|
658
|
-
result = None
|
|
659
|
-
|
|
660
|
-
if self._client:
|
|
661
|
-
# Remote mode: upload to API
|
|
662
|
-
result = self._client.upload_file(
|
|
663
|
-
experiment_id=self._experiment_id,
|
|
664
|
-
file_path=file_path,
|
|
665
|
-
prefix=prefix,
|
|
666
|
-
filename=filename,
|
|
667
|
-
description=description,
|
|
668
|
-
tags=tags,
|
|
669
|
-
metadata=metadata,
|
|
670
|
-
checksum=checksum,
|
|
671
|
-
content_type=content_type,
|
|
672
|
-
size_bytes=size_bytes
|
|
673
|
-
)
|
|
580
|
+
Returns a LogBuilder that allows chaining with level methods like
|
|
581
|
+
.info(), .warn(), .error(), .debug(), .fatal().
|
|
674
582
|
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
result = self._storage.write_file(
|
|
678
|
-
project=self.project,
|
|
679
|
-
experiment=self.name,
|
|
680
|
-
folder=self.folder,
|
|
681
|
-
file_path=file_path,
|
|
682
|
-
prefix=prefix,
|
|
683
|
-
filename=filename,
|
|
684
|
-
description=description,
|
|
685
|
-
tags=tags,
|
|
686
|
-
metadata=metadata,
|
|
687
|
-
checksum=checksum,
|
|
688
|
-
content_type=content_type,
|
|
689
|
-
size_bytes=size_bytes
|
|
690
|
-
)
|
|
583
|
+
Returns:
|
|
584
|
+
LogBuilder instance for fluent logging
|
|
691
585
|
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
def _list_files(
|
|
695
|
-
self,
|
|
696
|
-
prefix: Optional[str] = None,
|
|
697
|
-
tags: Optional[List[str]] = None
|
|
698
|
-
) -> List[Dict[str, Any]]:
|
|
699
|
-
"""
|
|
700
|
-
Internal method to list files.
|
|
701
|
-
|
|
702
|
-
Args:
|
|
703
|
-
prefix: Optional prefix filter
|
|
704
|
-
tags: Optional tags filter
|
|
705
|
-
|
|
706
|
-
Returns:
|
|
707
|
-
List of file metadata dicts
|
|
708
|
-
"""
|
|
709
|
-
files = []
|
|
710
|
-
|
|
711
|
-
if self._client:
|
|
712
|
-
# Remote mode: fetch from API
|
|
713
|
-
files = self._client.list_files(
|
|
714
|
-
experiment_id=self._experiment_id,
|
|
715
|
-
prefix=prefix,
|
|
716
|
-
tags=tags
|
|
717
|
-
)
|
|
586
|
+
Raises:
|
|
587
|
+
RuntimeError: If experiment is not open
|
|
718
588
|
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
589
|
+
Examples:
|
|
590
|
+
exp.logs.info("Training started", epoch=1)
|
|
591
|
+
exp.logs.error("Failed to load data", error_code=500)
|
|
592
|
+
exp.logs.warn("GPU memory low", memory_available="1GB")
|
|
593
|
+
exp.logs.debug("Debug info", step=100)
|
|
594
|
+
"""
|
|
595
|
+
if not self._is_open:
|
|
596
|
+
raise RuntimeError(
|
|
597
|
+
"Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
|
|
598
|
+
"Example:\n"
|
|
599
|
+
" with dxp.run:\n"
|
|
600
|
+
" dxp.logs.info('Training started')"
|
|
601
|
+
)
|
|
602
|
+
|
|
603
|
+
return LogBuilder(self, metadata=None)
|
|
604
|
+
|
|
605
|
+
def log(
|
|
606
|
+
self,
|
|
607
|
+
message: Optional[str] = None,
|
|
608
|
+
level: Optional[str] = None,
|
|
609
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
610
|
+
**extra_metadata,
|
|
611
|
+
) -> Optional[LogBuilder]:
|
|
612
|
+
"""
|
|
613
|
+
Create a log entry (traditional style).
|
|
727
614
|
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
self,
|
|
732
|
-
file_id: str,
|
|
733
|
-
dest_path: Optional[str] = None
|
|
734
|
-
) -> str:
|
|
735
|
-
"""
|
|
736
|
-
Internal method to download a file.
|
|
737
|
-
|
|
738
|
-
Args:
|
|
739
|
-
file_id: File ID
|
|
740
|
-
dest_path: Optional destination path (defaults to original filename)
|
|
741
|
-
|
|
742
|
-
Returns:
|
|
743
|
-
Path to downloaded file
|
|
744
|
-
"""
|
|
745
|
-
if self._client:
|
|
746
|
-
# Remote mode: download from API
|
|
747
|
-
return self._client.download_file(
|
|
748
|
-
experiment_id=self._experiment_id,
|
|
749
|
-
file_id=file_id,
|
|
750
|
-
dest_path=dest_path
|
|
751
|
-
)
|
|
615
|
+
.. deprecated::
|
|
616
|
+
The fluent style (calling without message) is deprecated.
|
|
617
|
+
Use the `logs` property instead: `exp.logs.info("message")`
|
|
752
618
|
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
project=self.project,
|
|
757
|
-
experiment=self.name,
|
|
758
|
-
file_id=file_id,
|
|
759
|
-
dest_path=dest_path
|
|
760
|
-
)
|
|
619
|
+
Recommended usage:
|
|
620
|
+
exp.logs.info("Training started", epoch=1)
|
|
621
|
+
exp.logs.error("Failed", error_code=500)
|
|
761
622
|
|
|
762
|
-
|
|
623
|
+
Traditional style (still supported):
|
|
624
|
+
experiment.log("Training started", level="info", epoch=1)
|
|
625
|
+
experiment.log("Training started") # Defaults to "info"
|
|
763
626
|
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
627
|
+
Args:
|
|
628
|
+
message: Log message (required for recommended usage)
|
|
629
|
+
level: Log level (defaults to "info")
|
|
630
|
+
metadata: Optional metadata dict
|
|
631
|
+
**extra_metadata: Additional metadata as keyword arguments
|
|
767
632
|
|
|
768
|
-
|
|
769
|
-
|
|
633
|
+
Returns:
|
|
634
|
+
None when used in traditional style (message provided)
|
|
635
|
+
LogBuilder when used in deprecated fluent style (message=None)
|
|
770
636
|
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
637
|
+
Raises:
|
|
638
|
+
RuntimeError: If experiment is not open
|
|
639
|
+
ValueError: If log level is invalid
|
|
640
|
+
"""
|
|
641
|
+
if not self._is_open:
|
|
642
|
+
raise RuntimeError(
|
|
643
|
+
"Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
|
|
644
|
+
"Example:\n"
|
|
645
|
+
" with dxp.run:\n"
|
|
646
|
+
" dxp.logs.info('Training started')"
|
|
647
|
+
)
|
|
648
|
+
|
|
649
|
+
# Fluent mode: return LogBuilder (deprecated)
|
|
650
|
+
if message is None:
|
|
651
|
+
import warnings
|
|
652
|
+
warnings.warn(
|
|
653
|
+
"Using exp.log() without a message is deprecated. "
|
|
654
|
+
"Use exp.logs.info('message') instead.",
|
|
655
|
+
DeprecationWarning,
|
|
656
|
+
stacklevel=2
|
|
657
|
+
)
|
|
658
|
+
combined_metadata = {**(metadata or {}), **extra_metadata}
|
|
659
|
+
return LogBuilder(self, combined_metadata if combined_metadata else None)
|
|
660
|
+
|
|
661
|
+
# Traditional mode: write immediately
|
|
662
|
+
level = level or LogLevel.INFO.value # Default to "info"
|
|
663
|
+
level = LogLevel.validate(level) # Validate level
|
|
664
|
+
|
|
665
|
+
combined_metadata = {**(metadata or {}), **extra_metadata}
|
|
666
|
+
self._write_log(
|
|
667
|
+
message=message,
|
|
668
|
+
level=level,
|
|
669
|
+
metadata=combined_metadata if combined_metadata else None,
|
|
670
|
+
timestamp=None,
|
|
671
|
+
)
|
|
672
|
+
return None
|
|
673
|
+
|
|
674
|
+
def _write_log(
|
|
675
|
+
self,
|
|
676
|
+
message: str,
|
|
677
|
+
level: str,
|
|
678
|
+
metadata: Optional[Dict[str, Any]],
|
|
679
|
+
timestamp: Optional[datetime],
|
|
680
|
+
) -> None:
|
|
681
|
+
"""
|
|
682
|
+
Internal method to write a log entry immediately.
|
|
683
|
+
No buffering - writes directly to storage/remote AND stdout/stderr.
|
|
684
|
+
|
|
685
|
+
Args:
|
|
686
|
+
message: Log message
|
|
687
|
+
level: Log level (already validated)
|
|
688
|
+
metadata: Optional metadata dict
|
|
689
|
+
timestamp: Optional custom timestamp (defaults to now)
|
|
690
|
+
"""
|
|
691
|
+
log_entry = {
|
|
692
|
+
"timestamp": (timestamp or datetime.utcnow()).isoformat() + "Z",
|
|
693
|
+
"level": level,
|
|
694
|
+
"message": message,
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
if metadata:
|
|
698
|
+
log_entry["metadata"] = metadata
|
|
699
|
+
|
|
700
|
+
# Mirror to stdout/stderr before writing to storage
|
|
701
|
+
self._print_log(message, level, metadata)
|
|
702
|
+
|
|
703
|
+
# Write immediately (no buffering)
|
|
704
|
+
if self._client:
|
|
705
|
+
# Remote mode: send to API (wrapped in array for batch API)
|
|
706
|
+
self._client.create_log_entries(
|
|
707
|
+
experiment_id=self._experiment_id,
|
|
708
|
+
logs=[log_entry], # Single log in array
|
|
709
|
+
)
|
|
710
|
+
|
|
711
|
+
if self._storage:
|
|
712
|
+
# Local mode: write to file immediately
|
|
713
|
+
self._storage.write_log(
|
|
714
|
+
owner=self.owner,
|
|
715
|
+
project=self.project,
|
|
716
|
+
prefix=self._folder_path,
|
|
717
|
+
message=log_entry["message"],
|
|
718
|
+
level=log_entry["level"],
|
|
719
|
+
metadata=log_entry.get("metadata"),
|
|
720
|
+
timestamp=log_entry["timestamp"],
|
|
721
|
+
)
|
|
722
|
+
|
|
723
|
+
def _print_log(
|
|
724
|
+
self, message: str, level: str, metadata: Optional[Dict[str, Any]]
|
|
725
|
+
) -> None:
|
|
726
|
+
"""
|
|
727
|
+
Print log to stdout or stderr based on level.
|
|
775
728
|
|
|
776
|
-
|
|
777
|
-
# Remote mode: delete via API
|
|
778
|
-
result = self._client.delete_file(
|
|
779
|
-
experiment_id=self._experiment_id,
|
|
780
|
-
file_id=file_id
|
|
781
|
-
)
|
|
729
|
+
ERROR and FATAL go to stderr, all others go to stdout.
|
|
782
730
|
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
)
|
|
731
|
+
Args:
|
|
732
|
+
message: Log message
|
|
733
|
+
level: Log level
|
|
734
|
+
metadata: Optional metadata dict
|
|
735
|
+
"""
|
|
736
|
+
import sys
|
|
790
737
|
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
def _update_file(
|
|
794
|
-
self,
|
|
795
|
-
file_id: str,
|
|
796
|
-
description: Optional[str],
|
|
797
|
-
tags: Optional[List[str]],
|
|
798
|
-
metadata: Optional[Dict[str, Any]]
|
|
799
|
-
) -> Dict[str, Any]:
|
|
800
|
-
"""
|
|
801
|
-
Internal method to update file metadata.
|
|
802
|
-
|
|
803
|
-
Args:
|
|
804
|
-
file_id: File ID
|
|
805
|
-
description: Optional description
|
|
806
|
-
tags: Optional tags
|
|
807
|
-
metadata: Optional metadata
|
|
808
|
-
|
|
809
|
-
Returns:
|
|
810
|
-
Updated file metadata dict
|
|
811
|
-
"""
|
|
812
|
-
result = None
|
|
813
|
-
|
|
814
|
-
if self._client:
|
|
815
|
-
# Remote mode: update via API
|
|
816
|
-
result = self._client.update_file(
|
|
817
|
-
experiment_id=self._experiment_id,
|
|
818
|
-
file_id=file_id,
|
|
819
|
-
description=description,
|
|
820
|
-
tags=tags,
|
|
821
|
-
metadata=metadata
|
|
822
|
-
)
|
|
738
|
+
# Format the log message
|
|
739
|
+
level_upper = level.upper()
|
|
823
740
|
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
description=description,
|
|
831
|
-
tags=tags,
|
|
832
|
-
metadata=metadata
|
|
833
|
-
)
|
|
741
|
+
# Build metadata string if present
|
|
742
|
+
metadata_str = ""
|
|
743
|
+
if metadata:
|
|
744
|
+
# Format metadata as key=value pairs
|
|
745
|
+
pairs = [f"{k}={v}" for k, v in metadata.items()]
|
|
746
|
+
metadata_str = f" [{', '.join(pairs)}]"
|
|
834
747
|
|
|
835
|
-
|
|
748
|
+
# Format: [LEVEL] message [key=value, ...]
|
|
749
|
+
formatted_message = f"[{level_upper}] {message}{metadata_str}"
|
|
836
750
|
|
|
751
|
+
# Route to stdout or stderr based on level
|
|
752
|
+
if level in ("error", "fatal"):
|
|
753
|
+
print(formatted_message, file=sys.stderr)
|
|
754
|
+
else:
|
|
755
|
+
print(formatted_message, file=sys.stdout)
|
|
837
756
|
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
757
|
+
@property
|
|
758
|
+
def files(self) -> FilesAccessor:
|
|
759
|
+
"""
|
|
760
|
+
Get a FilesAccessor for fluent file operations.
|
|
841
761
|
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
"""
|
|
845
|
-
if self._client:
|
|
846
|
-
# Remote mode: send to API
|
|
847
|
-
self._client.set_parameters(
|
|
848
|
-
experiment_id=self._experiment_id,
|
|
849
|
-
data=flattened_params
|
|
850
|
-
)
|
|
762
|
+
Returns:
|
|
763
|
+
FilesAccessor instance for chaining
|
|
851
764
|
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
self._storage.write_parameters(
|
|
855
|
-
project=self.project,
|
|
856
|
-
experiment=self.name,
|
|
857
|
-
folder=self.folder,
|
|
858
|
-
data=flattened_params
|
|
859
|
-
)
|
|
765
|
+
Raises:
|
|
766
|
+
RuntimeError: If experiment is not open
|
|
860
767
|
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
768
|
+
Examples:
|
|
769
|
+
# Upload file - supports flexible syntax
|
|
770
|
+
experiment.files("checkpoints").upload("./model.pt", to="checkpoint.pt")
|
|
771
|
+
experiment.files(prefix="checkpoints").upload("./model.pt")
|
|
772
|
+
experiment.files().upload("./model.pt", to="models/model.pt") # root
|
|
864
773
|
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
""
|
|
868
|
-
params = None
|
|
774
|
+
# List files
|
|
775
|
+
files = experiment.files("/some/location").list()
|
|
776
|
+
files = experiment.files("/models").list()
|
|
869
777
|
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
params = self._client.get_parameters(experiment_id=self._experiment_id)
|
|
874
|
-
except Exception:
|
|
875
|
-
# Parameters don't exist yet
|
|
876
|
-
params = None
|
|
877
|
-
|
|
878
|
-
if self._storage:
|
|
879
|
-
# Local mode: read from file
|
|
880
|
-
params = self._storage.read_parameters(
|
|
881
|
-
project=self.project,
|
|
882
|
-
experiment=self.name
|
|
883
|
-
)
|
|
778
|
+
# Download file
|
|
779
|
+
experiment.files("some.text").download()
|
|
780
|
+
experiment.files("some.text").download(to="./model.pt")
|
|
884
781
|
|
|
885
|
-
|
|
782
|
+
# Download files via glob pattern
|
|
783
|
+
file_paths = experiment.files("images").list("*.png")
|
|
784
|
+
experiment.files("images").download("*.png")
|
|
886
785
|
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
"""
|
|
890
|
-
Get a MetricsManager for metric operations.
|
|
786
|
+
# This is equivalent to downloading to a directory
|
|
787
|
+
experiment.files.download("images/*.png", to="local_images")
|
|
891
788
|
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
789
|
+
# Delete files
|
|
790
|
+
experiment.files("some.text").delete()
|
|
791
|
+
experiment.files.delete("some.text")
|
|
895
792
|
|
|
896
|
-
|
|
897
|
-
|
|
793
|
+
# Specific file types
|
|
794
|
+
dxp.files.save_text("content", to="view.yaml")
|
|
795
|
+
dxp.files.save_json(dict(hey="yo"), to="config.json")
|
|
796
|
+
dxp.files.save_blob(b"xxx", to="data.bin")
|
|
797
|
+
"""
|
|
798
|
+
if not self._is_open:
|
|
799
|
+
raise RuntimeError(
|
|
800
|
+
"Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
|
|
801
|
+
"Example:\n"
|
|
802
|
+
" with dxp.run:\n"
|
|
803
|
+
" dxp.files('path').upload()"
|
|
804
|
+
)
|
|
898
805
|
|
|
899
|
-
|
|
900
|
-
RuntimeError: If experiment is not open
|
|
806
|
+
return FilesAccessor(self)
|
|
901
807
|
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
808
|
+
def bindrs(self, bindr_name: str) -> BindrsBuilder:
|
|
809
|
+
"""
|
|
810
|
+
Get a BindrsBuilder for working with file collections (bindrs).
|
|
905
811
|
|
|
906
|
-
|
|
907
|
-
experiment.metrics.append(name="train_loss", value=0.5, step=100)
|
|
812
|
+
Bindrs are collections of files that can span multiple prefixes.
|
|
908
813
|
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
{"loss": 0.5, "acc": 0.8, "step": 1},
|
|
912
|
-
{"loss": 0.4, "acc": 0.85, "step": 2}
|
|
913
|
-
])
|
|
814
|
+
Args:
|
|
815
|
+
bindr_name: Name of the bindr (collection)
|
|
914
816
|
|
|
915
|
-
|
|
916
|
-
|
|
817
|
+
Returns:
|
|
818
|
+
BindrsBuilder instance for chaining
|
|
917
819
|
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
"""
|
|
921
|
-
from .metric import MetricsManager
|
|
820
|
+
Raises:
|
|
821
|
+
RuntimeError: If experiment is not open
|
|
922
822
|
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
"Use 'with Experiment(...).run as experiment:' or call experiment.run.start() first."
|
|
927
|
-
)
|
|
823
|
+
Examples:
|
|
824
|
+
# List files in a bindr
|
|
825
|
+
file_paths = experiment.bindrs("some-bindr").list()
|
|
928
826
|
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
827
|
+
Note:
|
|
828
|
+
This is a placeholder for future bindr functionality.
|
|
829
|
+
"""
|
|
830
|
+
if not self._is_open:
|
|
831
|
+
raise RuntimeError(
|
|
832
|
+
"Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
|
|
833
|
+
"Example:\n"
|
|
834
|
+
" with dxp.run:\n"
|
|
835
|
+
" files = dxp.bindrs('my-bindr').list()"
|
|
836
|
+
)
|
|
837
|
+
|
|
838
|
+
return BindrsBuilder(self, bindr_name)
|
|
839
|
+
|
|
840
|
+
def _upload_file(
|
|
841
|
+
self,
|
|
842
|
+
file_path: str,
|
|
843
|
+
prefix: str,
|
|
844
|
+
filename: str,
|
|
845
|
+
description: Optional[str],
|
|
846
|
+
tags: Optional[List[str]],
|
|
847
|
+
metadata: Optional[Dict[str, Any]],
|
|
848
|
+
checksum: str,
|
|
849
|
+
content_type: str,
|
|
850
|
+
size_bytes: int,
|
|
851
|
+
) -> Dict[str, Any]:
|
|
852
|
+
"""
|
|
853
|
+
Internal method to upload a file.
|
|
854
|
+
|
|
855
|
+
Args:
|
|
856
|
+
file_path: Local file path
|
|
857
|
+
prefix: Logical path prefix
|
|
858
|
+
filename: Original filename
|
|
859
|
+
description: Optional description
|
|
860
|
+
tags: Optional tags
|
|
861
|
+
metadata: Optional metadata
|
|
862
|
+
checksum: SHA256 checksum
|
|
863
|
+
content_type: MIME type
|
|
864
|
+
size_bytes: File size in bytes
|
|
865
|
+
|
|
866
|
+
Returns:
|
|
867
|
+
File metadata dict
|
|
868
|
+
"""
|
|
869
|
+
result = None
|
|
870
|
+
|
|
871
|
+
if self._client:
|
|
872
|
+
# Remote mode: upload to API
|
|
873
|
+
result = self._client.upload_file(
|
|
874
|
+
experiment_id=self._experiment_id,
|
|
875
|
+
file_path=file_path,
|
|
876
|
+
prefix=prefix,
|
|
877
|
+
filename=filename,
|
|
878
|
+
description=description,
|
|
879
|
+
tags=tags,
|
|
880
|
+
metadata=metadata,
|
|
881
|
+
checksum=checksum,
|
|
882
|
+
content_type=content_type,
|
|
883
|
+
size_bytes=size_bytes,
|
|
884
|
+
)
|
|
885
|
+
|
|
886
|
+
if self._storage:
|
|
887
|
+
# Local mode: copy to local storage
|
|
888
|
+
result = self._storage.write_file(
|
|
889
|
+
owner=self.owner,
|
|
890
|
+
project=self.project,
|
|
891
|
+
prefix=self._folder_path,
|
|
892
|
+
file_path=file_path,
|
|
893
|
+
path=prefix,
|
|
894
|
+
filename=filename,
|
|
895
|
+
description=description,
|
|
896
|
+
tags=tags,
|
|
897
|
+
metadata=metadata,
|
|
898
|
+
checksum=checksum,
|
|
899
|
+
content_type=content_type,
|
|
900
|
+
size_bytes=size_bytes,
|
|
901
|
+
)
|
|
902
|
+
|
|
903
|
+
return result
|
|
904
|
+
|
|
905
|
+
def _list_files(
|
|
906
|
+
self, prefix: Optional[str] = None, tags: Optional[List[str]] = None
|
|
907
|
+
) -> List[Dict[str, Any]]:
|
|
908
|
+
"""
|
|
909
|
+
Internal method to list files.
|
|
967
910
|
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
project=self.project,
|
|
972
|
-
experiment=self.name,
|
|
973
|
-
folder=self.folder,
|
|
974
|
-
metric_name=name,
|
|
975
|
-
data=data,
|
|
976
|
-
description=description,
|
|
977
|
-
tags=tags,
|
|
978
|
-
metadata=metadata
|
|
979
|
-
)
|
|
911
|
+
Args:
|
|
912
|
+
prefix: Optional prefix filter
|
|
913
|
+
tags: Optional tags filter
|
|
980
914
|
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
# Remote mode: append batch via API
|
|
1008
|
-
result = self._client.append_batch_to_metric(
|
|
1009
|
-
experiment_id=self._experiment_id,
|
|
1010
|
-
metric_name=name,
|
|
1011
|
-
data_points=data_points,
|
|
1012
|
-
description=description,
|
|
1013
|
-
tags=tags,
|
|
1014
|
-
metadata=metadata
|
|
1015
|
-
)
|
|
915
|
+
Returns:
|
|
916
|
+
List of file metadata dicts
|
|
917
|
+
"""
|
|
918
|
+
files = []
|
|
919
|
+
|
|
920
|
+
if self._client:
|
|
921
|
+
# Remote mode: fetch from API
|
|
922
|
+
files = self._client.list_files(
|
|
923
|
+
experiment_id=self._experiment_id, prefix=prefix, tags=tags
|
|
924
|
+
)
|
|
925
|
+
|
|
926
|
+
if self._storage:
|
|
927
|
+
# Local mode: read from metadata file
|
|
928
|
+
files = self._storage.list_files(
|
|
929
|
+
owner=self.owner,
|
|
930
|
+
project=self.project,
|
|
931
|
+
prefix=self._folder_path,
|
|
932
|
+
path_prefix=prefix,
|
|
933
|
+
tags=tags,
|
|
934
|
+
)
|
|
935
|
+
|
|
936
|
+
return files
|
|
937
|
+
|
|
938
|
+
def _download_file(self, file_id: str, dest_path: Optional[str] = None) -> str:
|
|
939
|
+
"""
|
|
940
|
+
Internal method to download a file.
|
|
1016
941
|
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
project=self.project,
|
|
1021
|
-
experiment=self.name,
|
|
1022
|
-
metric_name=name,
|
|
1023
|
-
data_points=data_points,
|
|
1024
|
-
description=description,
|
|
1025
|
-
tags=tags,
|
|
1026
|
-
metadata=metadata
|
|
1027
|
-
)
|
|
942
|
+
Args:
|
|
943
|
+
file_id: File ID
|
|
944
|
+
dest_path: Optional destination path (defaults to original filename)
|
|
1028
945
|
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
experiment_id=self._experiment_id,
|
|
1054
|
-
metric_name=name,
|
|
1055
|
-
start_index=start_index,
|
|
1056
|
-
limit=limit
|
|
1057
|
-
)
|
|
946
|
+
Returns:
|
|
947
|
+
Path to downloaded file
|
|
948
|
+
"""
|
|
949
|
+
if self._client:
|
|
950
|
+
# Remote mode: download from API
|
|
951
|
+
return self._client.download_file(
|
|
952
|
+
experiment_id=self._experiment_id, file_id=file_id, dest_path=dest_path
|
|
953
|
+
)
|
|
954
|
+
|
|
955
|
+
if self._storage:
|
|
956
|
+
# Local mode: copy from local storage
|
|
957
|
+
return self._storage.read_file(
|
|
958
|
+
owner=self.owner,
|
|
959
|
+
project=self.project,
|
|
960
|
+
prefix=self._folder_path,
|
|
961
|
+
file_id=file_id,
|
|
962
|
+
dest_path=dest_path,
|
|
963
|
+
)
|
|
964
|
+
|
|
965
|
+
raise RuntimeError("No client or storage configured")
|
|
966
|
+
|
|
967
|
+
def _delete_file(self, file_id: str) -> Dict[str, Any]:
|
|
968
|
+
"""
|
|
969
|
+
Internal method to delete a file.
|
|
1058
970
|
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
result = self._storage.read_metric_data(
|
|
1062
|
-
project=self.project,
|
|
1063
|
-
experiment=self.name,
|
|
1064
|
-
metric_name=name,
|
|
1065
|
-
start_index=start_index,
|
|
1066
|
-
limit=limit
|
|
1067
|
-
)
|
|
971
|
+
Args:
|
|
972
|
+
file_id: File ID
|
|
1068
973
|
|
|
1069
|
-
|
|
974
|
+
Returns:
|
|
975
|
+
Dict with id and deletedAt
|
|
976
|
+
"""
|
|
977
|
+
result = None
|
|
978
|
+
|
|
979
|
+
if self._client:
|
|
980
|
+
# Remote mode: delete via API
|
|
981
|
+
result = self._client.delete_file(
|
|
982
|
+
experiment_id=self._experiment_id, file_id=file_id
|
|
983
|
+
)
|
|
984
|
+
|
|
985
|
+
if self._storage:
|
|
986
|
+
# Local mode: soft delete in metadata
|
|
987
|
+
result = self._storage.delete_file(
|
|
988
|
+
owner=self.owner,
|
|
989
|
+
project=self.project,
|
|
990
|
+
prefix=self._folder_path,
|
|
991
|
+
file_id=file_id,
|
|
992
|
+
)
|
|
993
|
+
|
|
994
|
+
return result
|
|
995
|
+
|
|
996
|
+
def _update_file(
|
|
997
|
+
self,
|
|
998
|
+
file_id: str,
|
|
999
|
+
description: Optional[str],
|
|
1000
|
+
tags: Optional[List[str]],
|
|
1001
|
+
metadata: Optional[Dict[str, Any]],
|
|
1002
|
+
) -> Dict[str, Any]:
|
|
1003
|
+
"""
|
|
1004
|
+
Internal method to update file metadata.
|
|
1070
1005
|
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1006
|
+
Args:
|
|
1007
|
+
file_id: File ID
|
|
1008
|
+
description: Optional description
|
|
1009
|
+
tags: Optional tags
|
|
1010
|
+
metadata: Optional metadata
|
|
1074
1011
|
|
|
1075
|
-
|
|
1076
|
-
|
|
1012
|
+
Returns:
|
|
1013
|
+
Updated file metadata dict
|
|
1014
|
+
"""
|
|
1015
|
+
result = None
|
|
1016
|
+
|
|
1017
|
+
if self._client:
|
|
1018
|
+
# Remote mode: update via API
|
|
1019
|
+
result = self._client.update_file(
|
|
1020
|
+
experiment_id=self._experiment_id,
|
|
1021
|
+
file_id=file_id,
|
|
1022
|
+
description=description,
|
|
1023
|
+
tags=tags,
|
|
1024
|
+
metadata=metadata,
|
|
1025
|
+
)
|
|
1026
|
+
|
|
1027
|
+
if self._storage:
|
|
1028
|
+
# Local mode: update in metadata file
|
|
1029
|
+
result = self._storage.update_file_metadata(
|
|
1030
|
+
owner=self.owner,
|
|
1031
|
+
project=self.project,
|
|
1032
|
+
prefix=self._folder_path,
|
|
1033
|
+
file_id=file_id,
|
|
1034
|
+
description=description,
|
|
1035
|
+
tags=tags,
|
|
1036
|
+
metadata=metadata,
|
|
1037
|
+
)
|
|
1038
|
+
|
|
1039
|
+
return result
|
|
1040
|
+
|
|
1041
|
+
def _write_params(self, flattened_params: Dict[str, Any]) -> None:
|
|
1042
|
+
"""
|
|
1043
|
+
Internal method to write/merge parameters.
|
|
1077
1044
|
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1045
|
+
Args:
|
|
1046
|
+
flattened_params: Already-flattened parameter dict with dot notation
|
|
1047
|
+
"""
|
|
1048
|
+
if self._client:
|
|
1049
|
+
# Remote mode: send to API
|
|
1050
|
+
self._client.set_parameters(
|
|
1051
|
+
experiment_id=self._experiment_id, data=flattened_params
|
|
1052
|
+
)
|
|
1053
|
+
|
|
1054
|
+
if self._storage:
|
|
1055
|
+
# Local mode: write to file
|
|
1056
|
+
self._storage.write_parameters(
|
|
1057
|
+
owner=self.owner,
|
|
1058
|
+
project=self.project,
|
|
1059
|
+
prefix=self._folder_path,
|
|
1060
|
+
data=flattened_params,
|
|
1061
|
+
)
|
|
1062
|
+
|
|
1063
|
+
def _read_params(self) -> Optional[Dict[str, Any]]:
|
|
1064
|
+
"""
|
|
1065
|
+
Internal method to read parameters.
|
|
1082
1066
|
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1067
|
+
Returns:
|
|
1068
|
+
Flattened parameters dict, or None if no parameters exist
|
|
1069
|
+
"""
|
|
1070
|
+
params = None
|
|
1071
|
+
|
|
1072
|
+
if self._client:
|
|
1073
|
+
# Remote mode: fetch from API
|
|
1074
|
+
try:
|
|
1075
|
+
params = self._client.get_parameters(experiment_id=self._experiment_id)
|
|
1076
|
+
except Exception:
|
|
1077
|
+
# Parameters don't exist yet
|
|
1078
|
+
params = None
|
|
1089
1079
|
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
metric_name=name
|
|
1096
|
-
)
|
|
1080
|
+
if self._storage:
|
|
1081
|
+
# Local mode: read from file
|
|
1082
|
+
params = self._storage.read_parameters(
|
|
1083
|
+
owner=self.owner, project=self.project, prefix=self._folder_path
|
|
1084
|
+
)
|
|
1097
1085
|
|
|
1098
|
-
|
|
1086
|
+
return params
|
|
1099
1087
|
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1088
|
+
@property
|
|
1089
|
+
def metrics(self) -> "MetricsManager":
|
|
1090
|
+
"""
|
|
1091
|
+
Get a MetricsManager for metric operations.
|
|
1103
1092
|
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
result = None
|
|
1093
|
+
Supports two usage patterns:
|
|
1094
|
+
1. Named: experiment.metrics("train").log(loss=0.5, accuracy=0.9)
|
|
1095
|
+
2. Unnamed: experiment.metrics.log(epoch=epoch).flush()
|
|
1108
1096
|
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
result = self._client.list_metrics(experiment_id=self._experiment_id)
|
|
1097
|
+
Returns:
|
|
1098
|
+
MetricsManager instance
|
|
1112
1099
|
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
result = self._storage.list_metrics(
|
|
1116
|
-
project=self.project,
|
|
1117
|
-
experiment=self.name
|
|
1118
|
-
)
|
|
1100
|
+
Raises:
|
|
1101
|
+
RuntimeError: If experiment is not open
|
|
1119
1102
|
|
|
1120
|
-
|
|
1103
|
+
Examples:
|
|
1104
|
+
# Named metric with multi-field logging
|
|
1105
|
+
experiment.metrics("train").log(loss=0.5, accuracy=0.9)
|
|
1106
|
+
experiment.metrics("eval").log(loss=0.6, accuracy=0.85)
|
|
1107
|
+
experiment.metrics.log(epoch=epoch).flush()
|
|
1121
1108
|
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1109
|
+
# Nested dict pattern (single call for all metrics)
|
|
1110
|
+
experiment.metrics.log(
|
|
1111
|
+
epoch=100,
|
|
1112
|
+
train=dict(loss=0.142, accuracy=0.80),
|
|
1113
|
+
eval=dict(loss=0.201, accuracy=0.76)
|
|
1114
|
+
)
|
|
1126
1115
|
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
"""Get the full experiment data (only available after open in remote mode)."""
|
|
1130
|
-
return self._experiment_data
|
|
1116
|
+
# Read data
|
|
1117
|
+
data = experiment.metrics("train").read(start_index=0, limit=100)
|
|
1131
1118
|
|
|
1119
|
+
# Get statistics
|
|
1120
|
+
stats = experiment.metrics("train").stats()
|
|
1121
|
+
"""
|
|
1122
|
+
from .metric import MetricsManager
|
|
1123
|
+
|
|
1124
|
+
if not self._is_open:
|
|
1125
|
+
raise RuntimeError(
|
|
1126
|
+
"Cannot use metrics on closed experiment. "
|
|
1127
|
+
"Use 'with Experiment(...).run as experiment:' or call experiment.run.start() first."
|
|
1128
|
+
)
|
|
1129
|
+
|
|
1130
|
+
# Cache the MetricsManager instance to preserve MetricBuilder cache across calls
|
|
1131
|
+
if self._metrics_manager is None:
|
|
1132
|
+
self._metrics_manager = MetricsManager(self)
|
|
1133
|
+
return self._metrics_manager
|
|
1134
|
+
|
|
1135
|
+
def _append_to_metric(
|
|
1136
|
+
self,
|
|
1137
|
+
name: Optional[str],
|
|
1138
|
+
data: Dict[str, Any],
|
|
1139
|
+
description: Optional[str],
|
|
1140
|
+
tags: Optional[List[str]],
|
|
1141
|
+
metadata: Optional[Dict[str, Any]],
|
|
1142
|
+
) -> Dict[str, Any]:
|
|
1143
|
+
"""
|
|
1144
|
+
Internal method to append a single data point to a metric.
|
|
1132
1145
|
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1146
|
+
Args:
|
|
1147
|
+
name: Metric name (can be None for unnamed metrics)
|
|
1148
|
+
data: Data point (flexible schema)
|
|
1149
|
+
description: Optional metric description
|
|
1150
|
+
tags: Optional tags
|
|
1151
|
+
metadata: Optional metadata
|
|
1152
|
+
|
|
1153
|
+
Returns:
|
|
1154
|
+
Dict with metricId, index, bufferedDataPoints, chunkSize
|
|
1155
|
+
"""
|
|
1156
|
+
result = None
|
|
1157
|
+
|
|
1158
|
+
if self._client:
|
|
1159
|
+
# Remote mode: append via API
|
|
1160
|
+
result = self._client.append_to_metric(
|
|
1161
|
+
experiment_id=self._experiment_id,
|
|
1162
|
+
metric_name=name,
|
|
1163
|
+
data=data,
|
|
1164
|
+
description=description,
|
|
1165
|
+
tags=tags,
|
|
1166
|
+
metadata=metadata,
|
|
1167
|
+
)
|
|
1168
|
+
|
|
1169
|
+
if self._storage:
|
|
1170
|
+
# Local mode: append to local storage
|
|
1171
|
+
result = self._storage.append_to_metric(
|
|
1172
|
+
owner=self.owner,
|
|
1173
|
+
project=self.project,
|
|
1174
|
+
prefix=self._folder_path,
|
|
1175
|
+
metric_name=name,
|
|
1176
|
+
data=data,
|
|
1177
|
+
description=description,
|
|
1178
|
+
tags=tags,
|
|
1179
|
+
metadata=metadata,
|
|
1180
|
+
)
|
|
1181
|
+
|
|
1182
|
+
return result
|
|
1183
|
+
|
|
1184
|
+
def _append_batch_to_metric(
|
|
1185
|
+
self,
|
|
1186
|
+
name: Optional[str],
|
|
1187
|
+
data_points: List[Dict[str, Any]],
|
|
1188
|
+
description: Optional[str],
|
|
1189
|
+
tags: Optional[List[str]],
|
|
1190
|
+
metadata: Optional[Dict[str, Any]],
|
|
1191
|
+
) -> Dict[str, Any]:
|
|
1138
1192
|
"""
|
|
1139
|
-
|
|
1193
|
+
Internal method to append multiple data points to a metric.
|
|
1140
1194
|
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1195
|
+
Args:
|
|
1196
|
+
name: Metric name (can be None for unnamed metrics)
|
|
1197
|
+
data_points: List of data points
|
|
1198
|
+
description: Optional metric description
|
|
1199
|
+
tags: Optional tags
|
|
1200
|
+
metadata: Optional metadata
|
|
1201
|
+
|
|
1202
|
+
Returns:
|
|
1203
|
+
Dict with metricId, startIndex, endIndex, count
|
|
1204
|
+
"""
|
|
1205
|
+
result = None
|
|
1206
|
+
|
|
1207
|
+
if self._client:
|
|
1208
|
+
# Remote mode: append batch via API
|
|
1209
|
+
result = self._client.append_batch_to_metric(
|
|
1210
|
+
experiment_id=self._experiment_id,
|
|
1211
|
+
metric_name=name,
|
|
1212
|
+
data_points=data_points,
|
|
1213
|
+
description=description,
|
|
1214
|
+
tags=tags,
|
|
1215
|
+
metadata=metadata,
|
|
1216
|
+
)
|
|
1217
|
+
|
|
1218
|
+
if self._storage:
|
|
1219
|
+
# Local mode: append batch to local storage
|
|
1220
|
+
result = self._storage.append_batch_to_metric(
|
|
1221
|
+
owner=self.owner,
|
|
1222
|
+
project=self.project,
|
|
1223
|
+
prefix=self._folder_path,
|
|
1224
|
+
metric_name=name,
|
|
1225
|
+
data_points=data_points,
|
|
1226
|
+
description=description,
|
|
1227
|
+
tags=tags,
|
|
1228
|
+
metadata=metadata,
|
|
1229
|
+
)
|
|
1230
|
+
|
|
1231
|
+
return result
|
|
1232
|
+
|
|
1233
|
+
def _read_metric_data(
|
|
1234
|
+
self, name: str, start_index: int, limit: int
|
|
1235
|
+
) -> Dict[str, Any]:
|
|
1236
|
+
"""
|
|
1237
|
+
Internal method to read data points from a metric.
|
|
1238
|
+
|
|
1239
|
+
Args:
|
|
1240
|
+
name: Metric name
|
|
1241
|
+
start_index: Starting index
|
|
1242
|
+
limit: Max points to read
|
|
1243
|
+
|
|
1244
|
+
Returns:
|
|
1245
|
+
Dict with data, startIndex, endIndex, total, hasMore
|
|
1246
|
+
"""
|
|
1247
|
+
result = None
|
|
1248
|
+
|
|
1249
|
+
if self._client:
|
|
1250
|
+
# Remote mode: read via API
|
|
1251
|
+
result = self._client.read_metric_data(
|
|
1252
|
+
experiment_id=self._experiment_id,
|
|
1253
|
+
metric_name=name,
|
|
1254
|
+
start_index=start_index,
|
|
1255
|
+
limit=limit,
|
|
1256
|
+
)
|
|
1257
|
+
|
|
1258
|
+
if self._storage:
|
|
1259
|
+
# Local mode: read from local storage
|
|
1260
|
+
result = self._storage.read_metric_data(
|
|
1261
|
+
owner=self.owner,
|
|
1262
|
+
project=self.project,
|
|
1263
|
+
prefix=self._folder_path,
|
|
1264
|
+
metric_name=name,
|
|
1265
|
+
start_index=start_index,
|
|
1266
|
+
limit=limit,
|
|
1267
|
+
)
|
|
1268
|
+
|
|
1269
|
+
return result
|
|
1270
|
+
|
|
1271
|
+
def _get_metric_stats(self, name: str) -> Dict[str, Any]:
|
|
1272
|
+
"""
|
|
1273
|
+
Internal method to get metric statistics.
|
|
1274
|
+
|
|
1275
|
+
Args:
|
|
1276
|
+
name: Metric name
|
|
1277
|
+
|
|
1278
|
+
Returns:
|
|
1279
|
+
Dict with metric stats
|
|
1280
|
+
"""
|
|
1281
|
+
result = None
|
|
1282
|
+
|
|
1283
|
+
if self._client:
|
|
1284
|
+
# Remote mode: get stats via API
|
|
1285
|
+
result = self._client.get_metric_stats(
|
|
1286
|
+
experiment_id=self._experiment_id, metric_name=name
|
|
1287
|
+
)
|
|
1288
|
+
|
|
1289
|
+
if self._storage:
|
|
1290
|
+
# Local mode: get stats from local storage
|
|
1291
|
+
result = self._storage.get_metric_stats(
|
|
1292
|
+
owner=self.owner,
|
|
1293
|
+
project=self.project,
|
|
1294
|
+
prefix=self._folder_path,
|
|
1295
|
+
metric_name=name,
|
|
1296
|
+
)
|
|
1297
|
+
|
|
1298
|
+
return result
|
|
1299
|
+
|
|
1300
|
+
def _list_metrics(self) -> List[Dict[str, Any]]:
|
|
1301
|
+
"""
|
|
1302
|
+
Internal method to list all metrics in experiment.
|
|
1151
1303
|
|
|
1152
|
-
|
|
1153
|
-
|
|
1304
|
+
Returns:
|
|
1305
|
+
List of metric summaries
|
|
1154
1306
|
"""
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1307
|
+
result = None
|
|
1308
|
+
|
|
1309
|
+
if self._client:
|
|
1310
|
+
# Remote mode: list via API
|
|
1311
|
+
result = self._client.list_metrics(experiment_id=self._experiment_id)
|
|
1312
|
+
|
|
1313
|
+
if self._storage:
|
|
1314
|
+
# Local mode: list from local storage
|
|
1315
|
+
result = self._storage.list_metrics(
|
|
1316
|
+
owner=self.owner, project=self.project, prefix=self._folder_path
|
|
1317
|
+
)
|
|
1318
|
+
|
|
1319
|
+
return result or []
|
|
1320
|
+
|
|
1321
|
+
@property
|
|
1322
|
+
def id(self) -> Optional[str]:
|
|
1323
|
+
"""Get the experiment ID (only available after open in remote mode)."""
|
|
1324
|
+
return self._experiment_id
|
|
1325
|
+
|
|
1326
|
+
@property
|
|
1327
|
+
def data(self) -> Optional[Dict[str, Any]]:
|
|
1328
|
+
"""Get the full experiment data (only available after open in remote mode)."""
|
|
1329
|
+
return self._experiment_data
|
|
1330
|
+
|
|
1331
|
+
|
|
1332
|
+
def ml_dash_experiment(prefix: str, **kwargs) -> Callable:
|
|
1333
|
+
"""
|
|
1334
|
+
Decorator for wrapping functions with an ML-Dash experiment.
|
|
1335
|
+
|
|
1336
|
+
Args:
|
|
1337
|
+
prefix: Full experiment path like "owner/project/folder.../name"
|
|
1338
|
+
**kwargs: Additional arguments passed to Experiment constructor
|
|
1339
|
+
|
|
1340
|
+
Usage:
|
|
1341
|
+
@ml_dash_experiment(
|
|
1342
|
+
prefix="ge/my-project/experiments/my-experiment",
|
|
1343
|
+
dash_url="https://api.dash.ml"
|
|
1344
|
+
)
|
|
1345
|
+
def train_model():
|
|
1346
|
+
# Function code here
|
|
1347
|
+
pass
|
|
1348
|
+
|
|
1349
|
+
The decorated function will receive an 'experiment' keyword argument
|
|
1350
|
+
with the active Experiment instance.
|
|
1351
|
+
"""
|
|
1352
|
+
|
|
1353
|
+
def decorator(func: Callable) -> Callable:
|
|
1354
|
+
@functools.wraps(func)
|
|
1355
|
+
def wrapper(*args, **func_kwargs):
|
|
1356
|
+
with Experiment(prefix=prefix, **kwargs).run as experiment:
|
|
1357
|
+
# Inject experiment into function kwargs
|
|
1358
|
+
func_kwargs["experiment"] = experiment
|
|
1359
|
+
return func(*args, **func_kwargs)
|
|
1360
|
+
|
|
1361
|
+
return wrapper
|
|
1362
|
+
|
|
1363
|
+
return decorator
|