ml-dash 0.6.2__py3-none-any.whl → 0.6.2rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ml_dash/__init__.py +64 -36
- ml_dash/auth/token_storage.py +226 -267
- ml_dash/auto_start.py +15 -28
- ml_dash/cli.py +2 -16
- ml_dash/cli_commands/download.py +667 -757
- ml_dash/cli_commands/list.py +13 -146
- ml_dash/cli_commands/login.py +183 -190
- ml_dash/cli_commands/upload.py +1141 -1291
- ml_dash/client.py +6 -79
- ml_dash/config.py +119 -119
- ml_dash/experiment.py +1034 -1234
- ml_dash/files.py +224 -339
- ml_dash/log.py +7 -7
- ml_dash/metric.py +100 -359
- ml_dash/params.py +6 -6
- ml_dash/remote_auto_start.py +17 -20
- ml_dash/run.py +65 -211
- ml_dash/storage.py +1081 -1051
- {ml_dash-0.6.2.dist-info → ml_dash-0.6.2rc1.dist-info}/METADATA +14 -12
- ml_dash-0.6.2rc1.dist-info/RECORD +30 -0
- {ml_dash-0.6.2.dist-info → ml_dash-0.6.2rc1.dist-info}/WHEEL +1 -1
- ml_dash/cli_commands/api.py +0 -165
- ml_dash/cli_commands/profile.py +0 -92
- ml_dash/snowflake.py +0 -173
- ml_dash-0.6.2.dist-info/RECORD +0 -33
- {ml_dash-0.6.2.dist-info → ml_dash-0.6.2rc1.dist-info}/entry_points.txt +0 -0
ml_dash/experiment.py
CHANGED
|
@@ -3,1361 +3,1161 @@ Experiment class for ML-Dash SDK.
|
|
|
3
3
|
|
|
4
4
|
Supports three usage styles:
|
|
5
5
|
1. Decorator: @ml_dash_experiment(...)
|
|
6
|
-
2. Context manager: with Experiment(...)
|
|
6
|
+
2. Context manager: with Experiment(...) as exp:
|
|
7
7
|
3. Direct instantiation: exp = Experiment(...)
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
|
-
import
|
|
11
|
-
from datetime import datetime
|
|
10
|
+
from typing import Optional, Dict, Any, List, Callable
|
|
12
11
|
from enum import Enum
|
|
12
|
+
import functools
|
|
13
13
|
from pathlib import Path
|
|
14
|
-
from
|
|
14
|
+
from datetime import datetime
|
|
15
15
|
|
|
16
16
|
from .client import RemoteClient
|
|
17
|
-
from .
|
|
18
|
-
from .log import
|
|
17
|
+
from .storage import LocalStorage
|
|
18
|
+
from .log import LogLevel, LogBuilder
|
|
19
19
|
from .params import ParametersBuilder
|
|
20
|
+
from .files import FilesAccessor, BindrsBuilder
|
|
20
21
|
from .run import RUN
|
|
21
|
-
from .storage import LocalStorage
|
|
22
22
|
|
|
23
23
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
24
|
+
class OperationMode(Enum):
|
|
25
|
+
"""Operation mode for the experiment."""
|
|
26
|
+
LOCAL = "local"
|
|
27
|
+
REMOTE = "remote"
|
|
28
|
+
HYBRID = "hybrid" # Future: sync local to remote
|
|
27
29
|
|
|
28
|
-
Handles both regular attributes and property descriptors on the EXP class.
|
|
29
30
|
|
|
30
|
-
|
|
31
|
-
|
|
31
|
+
class RunManager:
|
|
32
|
+
"""
|
|
33
|
+
Lifecycle manager for experiments.
|
|
32
34
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
35
|
+
Supports three usage patterns:
|
|
36
|
+
1. Method calls: experiment.run.start(), experiment.run.complete()
|
|
37
|
+
2. Context manager: with Experiment(...).run as exp:
|
|
38
|
+
3. Decorator: @exp.run or @Experiment(...).run
|
|
39
|
+
"""
|
|
37
40
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
# EXP is a params_proto class where properties are stored in EXP.__dict__
|
|
42
|
-
attr = RUN.__dict__.get(attr_name)
|
|
43
|
-
if isinstance(attr, property):
|
|
44
|
-
# For properties, call the getter with EXP as self
|
|
45
|
-
return str(attr.fget(RUN))
|
|
46
|
-
else:
|
|
47
|
-
# For regular attributes, access via getattr
|
|
48
|
-
return str(getattr(RUN, attr_name))
|
|
41
|
+
def __init__(self, experiment: "Experiment"):
|
|
42
|
+
"""
|
|
43
|
+
Initialize RunManager.
|
|
49
44
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
45
|
+
Args:
|
|
46
|
+
experiment: Parent Experiment instance
|
|
47
|
+
"""
|
|
48
|
+
self._experiment = experiment
|
|
53
49
|
|
|
50
|
+
def start(self) -> "Experiment":
|
|
51
|
+
"""
|
|
52
|
+
Start the experiment (sets status to RUNNING).
|
|
54
53
|
|
|
55
|
-
|
|
56
|
-
|
|
54
|
+
Returns:
|
|
55
|
+
The experiment instance for chaining
|
|
56
|
+
"""
|
|
57
|
+
return self._experiment._open()
|
|
57
58
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
59
|
+
def complete(self) -> None:
|
|
60
|
+
"""Mark experiment as completed (status: COMPLETED)."""
|
|
61
|
+
self._experiment._close(status="COMPLETED")
|
|
61
62
|
|
|
63
|
+
def fail(self) -> None:
|
|
64
|
+
"""Mark experiment as failed (status: FAILED)."""
|
|
65
|
+
self._experiment._close(status="FAILED")
|
|
62
66
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
67
|
+
def cancel(self) -> None:
|
|
68
|
+
"""Mark experiment as cancelled (status: CANCELLED)."""
|
|
69
|
+
self._experiment._close(status="CANCELLED")
|
|
66
70
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
"""
|
|
71
|
+
@property
|
|
72
|
+
def folder(self) -> Optional[str]:
|
|
73
|
+
"""
|
|
74
|
+
Get the current folder for this experiment.
|
|
72
75
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
Initialize RunManager.
|
|
76
|
+
Returns:
|
|
77
|
+
Current folder path or None
|
|
76
78
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
79
|
+
Example:
|
|
80
|
+
current_folder = exp.run.folder
|
|
81
|
+
"""
|
|
82
|
+
return self._experiment.folder
|
|
81
83
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
84
|
+
@folder.setter
|
|
85
|
+
def folder(self, value: Optional[str]) -> None:
|
|
86
|
+
"""
|
|
87
|
+
Set the folder for this experiment before initialization.
|
|
85
88
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
"""
|
|
89
|
-
return self._experiment._open()
|
|
89
|
+
This can ONLY be set before the experiment is started (initialized).
|
|
90
|
+
Once the experiment is opened, the folder cannot be changed.
|
|
90
91
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
92
|
+
Supports template variables:
|
|
93
|
+
- {RUN.name} - Experiment name
|
|
94
|
+
- {RUN.project} - Project name
|
|
94
95
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
96
|
+
Args:
|
|
97
|
+
value: Folder path with optional template variables
|
|
98
|
+
(e.g., "experiments/{RUN.name}" or None)
|
|
98
99
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
self._experiment._close(status="CANCELLED")
|
|
100
|
+
Raises:
|
|
101
|
+
RuntimeError: If experiment is already initialized/open
|
|
102
102
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
"""
|
|
106
|
-
Get the current folder prefix for this experiment.
|
|
103
|
+
Examples:
|
|
104
|
+
from ml_dash import dxp
|
|
107
105
|
|
|
108
|
-
|
|
109
|
-
|
|
106
|
+
# Static folder
|
|
107
|
+
dxp.run.folder = "experiments/vision/resnet"
|
|
110
108
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
"""
|
|
114
|
-
return self._experiment._folder_path
|
|
109
|
+
# Template with experiment name
|
|
110
|
+
dxp.run.folder = "/iclr_2024/{RUN.name}"
|
|
115
111
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
"""
|
|
119
|
-
Set the folder prefix for this experiment before initialization.
|
|
112
|
+
# Template with multiple variables
|
|
113
|
+
dxp.run.folder = "{RUN.project}/experiments/{RUN.name}"
|
|
120
114
|
|
|
121
|
-
|
|
122
|
-
|
|
115
|
+
# Now start the experiment
|
|
116
|
+
with dxp.run:
|
|
117
|
+
dxp.params.set(lr=0.001)
|
|
118
|
+
"""
|
|
119
|
+
if self._experiment._is_open:
|
|
120
|
+
raise RuntimeError(
|
|
121
|
+
"Cannot change folder after experiment is initialized. "
|
|
122
|
+
"Set folder before calling start() or entering 'with' block."
|
|
123
|
+
)
|
|
123
124
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
125
|
+
# Check if this is a template (contains {RUN.) or static folder
|
|
126
|
+
if value and '{RUN.' in value:
|
|
127
|
+
# Store the template - it will be formatted when the run starts
|
|
128
|
+
self._experiment._folder_template = value
|
|
129
|
+
else:
|
|
130
|
+
# Static folder - set directly
|
|
131
|
+
self._experiment.folder = value
|
|
127
132
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
133
|
+
def __enter__(self) -> "Experiment":
|
|
134
|
+
"""Context manager entry - starts the experiment."""
|
|
135
|
+
return self.start()
|
|
131
136
|
|
|
132
|
-
|
|
133
|
-
|
|
137
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
138
|
+
"""Context manager exit - completes or fails the experiment."""
|
|
139
|
+
if exc_type is not None:
|
|
140
|
+
self.fail()
|
|
141
|
+
else:
|
|
142
|
+
self.complete()
|
|
143
|
+
return False
|
|
134
144
|
|
|
135
|
-
|
|
136
|
-
|
|
145
|
+
def __call__(self, func: Callable) -> Callable:
|
|
146
|
+
"""
|
|
147
|
+
Decorator support for wrapping functions with experiment lifecycle.
|
|
137
148
|
|
|
138
|
-
|
|
139
|
-
|
|
149
|
+
Usage:
|
|
150
|
+
@exp.run
|
|
151
|
+
def train(exp):
|
|
152
|
+
exp.log("Training...")
|
|
153
|
+
"""
|
|
154
|
+
@functools.wraps(func)
|
|
155
|
+
def wrapper(*args, **kwargs):
|
|
156
|
+
with self as exp:
|
|
157
|
+
return func(exp, *args, **kwargs)
|
|
158
|
+
return wrapper
|
|
140
159
|
|
|
141
|
-
# Template with experiment name
|
|
142
|
-
dxp.run.prefix = "ge/iclr_2024/{EXP.name}"
|
|
143
160
|
|
|
144
|
-
|
|
145
|
-
with dxp.run:
|
|
146
|
-
dxp.params.set(lr=0.001)
|
|
147
|
-
"""
|
|
148
|
-
if self._experiment._is_open:
|
|
149
|
-
raise RuntimeError(
|
|
150
|
-
"Cannot change prefix after experiment is initialized. "
|
|
151
|
-
"Set prefix before calling start() or entering 'with' block."
|
|
152
|
-
)
|
|
153
|
-
|
|
154
|
-
if value:
|
|
155
|
-
# Sync EXP with this experiment's values
|
|
156
|
-
RUN.name = self._experiment.name
|
|
157
|
-
RUN.description = self._experiment.description
|
|
158
|
-
# Generate id/timestamp if not already set
|
|
159
|
-
if RUN.id is None:
|
|
160
|
-
RUN._init_run()
|
|
161
|
-
# Format with EXP - use helper to expand properties correctly
|
|
162
|
-
value = _expand_exp_template(value)
|
|
163
|
-
|
|
164
|
-
# Update the folder on the experiment
|
|
165
|
-
self._experiment._folder_path = value
|
|
166
|
-
|
|
167
|
-
def __enter__(self) -> "Experiment":
|
|
168
|
-
"""Context manager entry - starts the experiment."""
|
|
169
|
-
return self.start()
|
|
170
|
-
|
|
171
|
-
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
172
|
-
"""Context manager exit - completes or fails the experiment."""
|
|
173
|
-
if exc_type is not None:
|
|
174
|
-
self.fail()
|
|
175
|
-
else:
|
|
176
|
-
self.complete()
|
|
177
|
-
return False
|
|
178
|
-
|
|
179
|
-
def __call__(self, func: Callable) -> Callable:
|
|
161
|
+
class Experiment:
|
|
180
162
|
"""
|
|
181
|
-
|
|
163
|
+
ML-Dash experiment for metricing experiments.
|
|
182
164
|
|
|
183
|
-
Usage:
|
|
184
|
-
@exp.run
|
|
185
|
-
def train(exp):
|
|
186
|
-
exp.log("Training...")
|
|
187
|
-
"""
|
|
165
|
+
Usage examples:
|
|
188
166
|
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
167
|
+
# Remote mode
|
|
168
|
+
experiment = Experiment(
|
|
169
|
+
name="my-experiment",
|
|
170
|
+
project="my-project",
|
|
171
|
+
remote="https://api.dash.ml",
|
|
172
|
+
api_key="your-jwt-token"
|
|
173
|
+
)
|
|
193
174
|
|
|
194
|
-
|
|
175
|
+
# Local mode
|
|
176
|
+
experiment = Experiment(
|
|
177
|
+
name="my-experiment",
|
|
178
|
+
project="my-project",
|
|
179
|
+
local_path=".ml-dash"
|
|
180
|
+
)
|
|
195
181
|
|
|
182
|
+
# Context manager
|
|
183
|
+
with Experiment(...) as exp:
|
|
184
|
+
exp.log(...)
|
|
196
185
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
Prefix format: {owner}/{project}/path.../[name]
|
|
202
|
-
- owner: First segment (e.g., your username)
|
|
203
|
-
- project: Second segment (e.g., project name)
|
|
204
|
-
- path: Remaining segments form the folder structure
|
|
205
|
-
- name: Derived from last segment (may be a seed/id)
|
|
206
|
-
|
|
207
|
-
Usage examples:
|
|
208
|
-
|
|
209
|
-
# Local mode (default)
|
|
210
|
-
experiment = Experiment(prefix="ge/my-project/experiments/exp1")
|
|
211
|
-
|
|
212
|
-
# Custom local storage directory
|
|
213
|
-
experiment = Experiment(
|
|
214
|
-
prefix="ge/my-project/experiments/exp1",
|
|
215
|
-
dash_root=".dash"
|
|
216
|
-
)
|
|
217
|
-
|
|
218
|
-
# Remote mode with custom server
|
|
219
|
-
experiment = Experiment(
|
|
220
|
-
prefix="ge/my-project/experiments/exp1",
|
|
221
|
-
dash_url="https://custom-server.com"
|
|
222
|
-
)
|
|
223
|
-
|
|
224
|
-
# Context manager
|
|
225
|
-
with Experiment(prefix="ge/my-project/exp1").run as exp:
|
|
226
|
-
exp.logs.info("Training started")
|
|
227
|
-
|
|
228
|
-
# Decorator
|
|
229
|
-
@ml_dash_experiment(prefix="ge/ws/experiments/exp", dash_url="https://api.dash.ml")
|
|
230
|
-
def train():
|
|
231
|
-
...
|
|
232
|
-
"""
|
|
233
|
-
|
|
234
|
-
def __init__(
|
|
235
|
-
self,
|
|
236
|
-
prefix: Optional[str] = None,
|
|
237
|
-
*,
|
|
238
|
-
readme: Optional[str] = None,
|
|
239
|
-
# Ge: this is an instance only property
|
|
240
|
-
tags: Optional[List[str]] = None,
|
|
241
|
-
# Ge: Bindrs is an instance-only property, it is not set inside the RUN namespace.
|
|
242
|
-
bindrs: Optional[List[str]] = None,
|
|
243
|
-
# Ge: This is also instance-only
|
|
244
|
-
metadata: Optional[Dict[str, Any]] = None,
|
|
245
|
-
# Mode configuration
|
|
246
|
-
dash_url: Optional[Union[str, bool]] = None,
|
|
247
|
-
dash_root: Optional[str] = ".dash",
|
|
248
|
-
# Deprecated parameters (for backward compatibility)
|
|
249
|
-
remote: Optional[Union[str, bool]] = None,
|
|
250
|
-
local_path: Optional[str] = None,
|
|
251
|
-
# Internal parameters
|
|
252
|
-
_write_protected: bool = False,
|
|
253
|
-
# The rest of the params go directly to populate the RUN object.
|
|
254
|
-
**run_params: Unpack[RUN],
|
|
255
|
-
):
|
|
256
|
-
"""
|
|
257
|
-
Initialize an ML-Dash experiment.
|
|
258
|
-
|
|
259
|
-
Args:
|
|
260
|
-
prefix: Full experiment path like "owner/project/folder.../name" (defaults to DASH_PREFIX env var).
|
|
261
|
-
Format: {owner}/{project}/path.../[name]
|
|
262
|
-
- owner: First segment (e.g., username)
|
|
263
|
-
- project: Second segment (e.g., project name)
|
|
264
|
-
- path: Remaining segments form the folder path
|
|
265
|
-
- name: Derived from last segment (may be a seed/id, not always meaningful)
|
|
266
|
-
readme: Optional experiment readme/description
|
|
267
|
-
tags: Optional list of tags
|
|
268
|
-
bindrs: Optional list of bindrs
|
|
269
|
-
metadata: Optional metadata dict
|
|
270
|
-
dash_url: Remote API URL. True=use EXP.API_URL, str=custom URL, None=no remote. Token auto-loaded from ~/.dash/token.enc
|
|
271
|
-
dash_root: Local storage root path (defaults to ".dash"). Set to None for remote-only mode.
|
|
272
|
-
remote: (Deprecated) Use dash_url instead
|
|
273
|
-
local_path: (Deprecated) Use dash_root instead
|
|
274
|
-
_write_protected: Internal parameter - if True, experiment becomes immutable after creation
|
|
275
|
-
|
|
276
|
-
Mode Selection:
|
|
277
|
-
- Default (no dash_url): Local-only mode (writes to ".dash/")
|
|
278
|
-
- dash_url + dash_root: Hybrid mode (local + remote)
|
|
279
|
-
- dash_url + dash_root=None: Remote-only mode
|
|
280
|
-
"""
|
|
281
|
-
import os
|
|
282
|
-
import warnings
|
|
283
|
-
|
|
284
|
-
# Handle backward compatibility
|
|
285
|
-
if remote is not None:
|
|
286
|
-
warnings.warn(
|
|
287
|
-
"Parameter 'remote' is deprecated. Use 'dash_url' instead.",
|
|
288
|
-
DeprecationWarning,
|
|
289
|
-
stacklevel=2
|
|
290
|
-
)
|
|
291
|
-
if dash_url is None:
|
|
292
|
-
dash_url = remote
|
|
293
|
-
|
|
294
|
-
if local_path is not None:
|
|
295
|
-
warnings.warn(
|
|
296
|
-
"Parameter 'local_path' is deprecated. Use 'dash_root' instead.",
|
|
297
|
-
DeprecationWarning,
|
|
298
|
-
stacklevel=2
|
|
299
|
-
)
|
|
300
|
-
if dash_root == ".dash": # Only override if dash_root is default
|
|
301
|
-
dash_root = local_path
|
|
302
|
-
|
|
303
|
-
# Resolve prefix from environment variable if not provided
|
|
304
|
-
self._folder_path = prefix or os.getenv("DASH_PREFIX")
|
|
305
|
-
|
|
306
|
-
if not self._folder_path:
|
|
307
|
-
raise ValueError("prefix (or DASH_PREFIX env var) must be provided")
|
|
308
|
-
|
|
309
|
-
# Parse prefix: {owner}/{project}/path.../[name]
|
|
310
|
-
parts = self._folder_path.strip("/").split("/")
|
|
311
|
-
if len(parts) < 2:
|
|
312
|
-
raise ValueError(
|
|
313
|
-
f"prefix must have at least owner/project: got '{self._folder_path}'"
|
|
314
|
-
)
|
|
315
|
-
|
|
316
|
-
self.owner = parts[0]
|
|
317
|
-
self.project = parts[1]
|
|
318
|
-
# Name is the last segment (may be a seed/id, not always a meaningful name)
|
|
319
|
-
self.name = parts[-1] if len(parts) > 2 else parts[1]
|
|
320
|
-
|
|
321
|
-
self.readme = readme
|
|
322
|
-
self.tags = tags
|
|
323
|
-
self._bindrs_list = bindrs
|
|
324
|
-
self._write_protected = _write_protected
|
|
325
|
-
self.metadata = metadata
|
|
326
|
-
|
|
327
|
-
# Initialize RUN with experiment values
|
|
328
|
-
RUN.name = self.name
|
|
329
|
-
if readme:
|
|
330
|
-
RUN.readme = readme
|
|
331
|
-
|
|
332
|
-
# Determine operation mode
|
|
333
|
-
# dash_root defaults to ".dash", dash_url defaults to None
|
|
334
|
-
if dash_url and dash_root:
|
|
335
|
-
self.mode = OperationMode.HYBRID
|
|
336
|
-
elif dash_url:
|
|
337
|
-
self.mode = OperationMode.REMOTE
|
|
338
|
-
else:
|
|
339
|
-
self.mode = OperationMode.LOCAL
|
|
340
|
-
|
|
341
|
-
# Initialize backend
|
|
342
|
-
self._client: Optional[RemoteClient] = None
|
|
343
|
-
self._storage: Optional[LocalStorage] = None
|
|
344
|
-
self._experiment_id: Optional[str] = None
|
|
345
|
-
self._experiment_data: Optional[Dict[str, Any]] = None
|
|
346
|
-
self._is_open = False
|
|
347
|
-
self._metrics_manager: Optional["MetricsManager"] = None # Cached metrics manager
|
|
348
|
-
|
|
349
|
-
if self.mode in (OperationMode.REMOTE, OperationMode.HYBRID):
|
|
350
|
-
# RemoteClient will auto-load token from ~/.dash/token.enc
|
|
351
|
-
# Use RUN.api_url if dash_url=True (boolean), otherwise use the provided URL
|
|
352
|
-
api_url = RUN.api_url if dash_url is True else dash_url
|
|
353
|
-
self._client = RemoteClient(base_url=api_url)
|
|
354
|
-
|
|
355
|
-
if self.mode in (OperationMode.LOCAL, OperationMode.HYBRID):
|
|
356
|
-
self._storage = LocalStorage(root_path=Path(dash_root))
|
|
357
|
-
|
|
358
|
-
def _open(self) -> "Experiment":
|
|
186
|
+
# Decorator
|
|
187
|
+
@ml_dash_experiment(name="exp", project="ws", remote="...")
|
|
188
|
+
def train():
|
|
189
|
+
...
|
|
359
190
|
"""
|
|
360
|
-
Internal method to open the experiment (create or update on server/filesystem).
|
|
361
191
|
|
|
362
|
-
|
|
363
|
-
self
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
" [cyan]ml-dash login[/cyan]\n\n"
|
|
415
|
-
"[dim]This will open your browser for secure OAuth2 authentication.\n"
|
|
416
|
-
"Your token will be stored securely in your system keychain.[/dim]\n\n"
|
|
417
|
-
"[bold]Alternative:[/bold]\n"
|
|
418
|
-
" Use [cyan]local_path[/cyan] instead of [cyan]remote[/cyan] for offline experiments"
|
|
192
|
+
def __init__(
|
|
193
|
+
self,
|
|
194
|
+
name: str,
|
|
195
|
+
project: str,
|
|
196
|
+
*,
|
|
197
|
+
description: Optional[str] = None,
|
|
198
|
+
tags: Optional[List[str]] = None,
|
|
199
|
+
bindrs: Optional[List[str]] = None,
|
|
200
|
+
folder: Optional[str] = None,
|
|
201
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
202
|
+
# Mode configuration
|
|
203
|
+
remote: Optional[str] = None,
|
|
204
|
+
api_key: Optional[str] = None,
|
|
205
|
+
local_path: Optional[str] = None,
|
|
206
|
+
# Internal parameters
|
|
207
|
+
_write_protected: bool = False,
|
|
208
|
+
):
|
|
209
|
+
"""
|
|
210
|
+
Initialize an ML-Dash experiment.
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
name: Experiment name (unique within project)
|
|
214
|
+
project: Project name
|
|
215
|
+
description: Optional experiment description
|
|
216
|
+
tags: Optional list of tags
|
|
217
|
+
bindrs: Optional list of bindrs
|
|
218
|
+
folder: Optional folder path (e.g., "/experiments/baseline")
|
|
219
|
+
metadata: Optional metadata dict
|
|
220
|
+
remote: Remote API URL (e.g., "https://api.dash.ml")
|
|
221
|
+
api_key: JWT token for authentication (auto-loaded from storage if not provided)
|
|
222
|
+
local_path: Local storage root path (for local mode)
|
|
223
|
+
_write_protected: Internal parameter - if True, experiment becomes immutable after creation
|
|
224
|
+
"""
|
|
225
|
+
self.name = name
|
|
226
|
+
self.project = project
|
|
227
|
+
self.description = description
|
|
228
|
+
self.tags = tags
|
|
229
|
+
self._bindrs_list = bindrs
|
|
230
|
+
self.folder = folder
|
|
231
|
+
self._write_protected = _write_protected
|
|
232
|
+
self.metadata = metadata
|
|
233
|
+
|
|
234
|
+
# Determine operation mode
|
|
235
|
+
if remote and local_path:
|
|
236
|
+
self.mode = OperationMode.HYBRID
|
|
237
|
+
elif remote:
|
|
238
|
+
self.mode = OperationMode.REMOTE
|
|
239
|
+
elif local_path:
|
|
240
|
+
self.mode = OperationMode.LOCAL
|
|
241
|
+
else:
|
|
242
|
+
raise ValueError(
|
|
243
|
+
"Must specify either 'remote' (with api_key) or 'local_path'"
|
|
419
244
|
)
|
|
420
245
|
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
246
|
+
# Initialize backend
|
|
247
|
+
self._client: Optional[RemoteClient] = None
|
|
248
|
+
self._storage: Optional[LocalStorage] = None
|
|
249
|
+
self._experiment_id: Optional[str] = None
|
|
250
|
+
self._experiment_data: Optional[Dict[str, Any]] = None
|
|
251
|
+
self._is_open = False
|
|
252
|
+
self._metrics_manager: Optional['MetricsManager'] = None # Cached metrics manager
|
|
253
|
+
self._folder_template: Optional[str] = None # Template for folder path
|
|
254
|
+
|
|
255
|
+
if self.mode in (OperationMode.REMOTE, OperationMode.HYBRID):
|
|
256
|
+
# api_key can be None - RemoteClient will auto-load from storage
|
|
257
|
+
self._client = RemoteClient(base_url=remote, api_key=api_key)
|
|
258
|
+
|
|
259
|
+
if self.mode in (OperationMode.LOCAL, OperationMode.HYBRID):
|
|
260
|
+
if not local_path:
|
|
261
|
+
raise ValueError("local_path is required for local mode")
|
|
262
|
+
self._storage = LocalStorage(root_path=Path(local_path))
|
|
263
|
+
|
|
264
|
+
def _open(self) -> "Experiment":
|
|
265
|
+
"""
|
|
266
|
+
Internal method to open the experiment (create or update on server/filesystem).
|
|
267
|
+
|
|
268
|
+
Returns:
|
|
269
|
+
self for chaining
|
|
270
|
+
"""
|
|
271
|
+
if self._is_open:
|
|
272
|
+
return self
|
|
273
|
+
|
|
274
|
+
# Initialize RUN with experiment values
|
|
275
|
+
RUN.name = self.name
|
|
276
|
+
RUN.project = self.project
|
|
277
|
+
RUN.description = self.description
|
|
278
|
+
RUN._init_run() # Generate id and timestamp
|
|
279
|
+
|
|
280
|
+
# Format folder template if present
|
|
281
|
+
if self._folder_template:
|
|
282
|
+
self.folder = RUN._format(self._folder_template)
|
|
283
|
+
|
|
284
|
+
if self._client:
|
|
285
|
+
# Remote mode: create/update experiment via API
|
|
286
|
+
response = self._client.create_or_update_experiment(
|
|
287
|
+
project=self.project,
|
|
288
|
+
name=self.name,
|
|
289
|
+
description=self.description,
|
|
290
|
+
tags=self.tags,
|
|
291
|
+
bindrs=self._bindrs_list,
|
|
292
|
+
folder=self.folder,
|
|
293
|
+
write_protected=self._write_protected,
|
|
294
|
+
metadata=self.metadata,
|
|
295
|
+
)
|
|
296
|
+
self._experiment_data = response
|
|
297
|
+
self._experiment_id = response["experiment"]["id"]
|
|
298
|
+
|
|
299
|
+
if self._storage:
|
|
300
|
+
# Local mode: create experiment directory structure
|
|
301
|
+
self._storage.create_experiment(
|
|
302
|
+
project=self.project,
|
|
303
|
+
name=self.name,
|
|
304
|
+
description=self.description,
|
|
305
|
+
tags=self.tags,
|
|
306
|
+
bindrs=self._bindrs_list,
|
|
307
|
+
folder=self.folder,
|
|
308
|
+
metadata=self.metadata,
|
|
426
309
|
)
|
|
427
|
-
console.print("\n")
|
|
428
|
-
console.print(panel)
|
|
429
|
-
console.print("\n")
|
|
430
|
-
except ImportError:
|
|
431
|
-
# Fallback if rich is not available
|
|
432
|
-
print("\n" + "=" * 60)
|
|
433
|
-
print("⚠ Authentication Required")
|
|
434
|
-
print("=" * 60)
|
|
435
|
-
print("\nYou need to authenticate before using remote experiments.\n")
|
|
436
|
-
print("To authenticate:")
|
|
437
|
-
print(" ml-dash login\n")
|
|
438
|
-
print("Alternative:")
|
|
439
|
-
print(" Use local_path instead of remote for offline experiments\n")
|
|
440
|
-
print("=" * 60 + "\n")
|
|
441
|
-
|
|
442
|
-
import sys
|
|
443
|
-
|
|
444
|
-
sys.exit(1)
|
|
445
|
-
else:
|
|
446
|
-
# Re-raise other exceptions
|
|
447
|
-
raise
|
|
448
|
-
|
|
449
|
-
if self._storage:
|
|
450
|
-
# Local mode: create experiment directory structure
|
|
451
|
-
self._storage.create_experiment(
|
|
452
|
-
owner=self.owner,
|
|
453
|
-
project=self.project,
|
|
454
|
-
prefix=self._folder_path,
|
|
455
|
-
description=self.readme,
|
|
456
|
-
tags=self.tags,
|
|
457
|
-
bindrs=self._bindrs_list,
|
|
458
|
-
metadata=self.metadata,
|
|
459
|
-
)
|
|
460
|
-
|
|
461
|
-
self._is_open = True
|
|
462
|
-
return self
|
|
463
|
-
|
|
464
|
-
def _close(self, status: str = "COMPLETED"):
|
|
465
|
-
"""
|
|
466
|
-
Internal method to close the experiment and update status.
|
|
467
|
-
|
|
468
|
-
Args:
|
|
469
|
-
status: Status to set - "COMPLETED" (default), "FAILED", or "CANCELLED"
|
|
470
|
-
"""
|
|
471
|
-
if not self._is_open:
|
|
472
|
-
return
|
|
473
|
-
|
|
474
|
-
# Flush any pending writes
|
|
475
|
-
if self._storage:
|
|
476
|
-
self._storage.flush()
|
|
477
|
-
|
|
478
|
-
# Update experiment status in remote mode
|
|
479
|
-
if self._client and self._experiment_id:
|
|
480
|
-
try:
|
|
481
|
-
self._client.update_experiment_status(
|
|
482
|
-
experiment_id=self._experiment_id, status=status
|
|
483
|
-
)
|
|
484
|
-
|
|
485
|
-
# Display completion message with link to view results
|
|
486
|
-
status_emoji = {"COMPLETED": "✓", "FAILED": "✗", "CANCELLED": "⊘"}.get(
|
|
487
|
-
status, "•"
|
|
488
|
-
)
|
|
489
|
-
|
|
490
|
-
status_color = {
|
|
491
|
-
"COMPLETED": "green",
|
|
492
|
-
"FAILED": "red",
|
|
493
|
-
"CANCELLED": "yellow",
|
|
494
|
-
}.get(status, "white")
|
|
495
|
-
|
|
496
|
-
try:
|
|
497
|
-
from rich.console import Console
|
|
498
|
-
|
|
499
|
-
console = Console()
|
|
500
|
-
console.print(
|
|
501
|
-
f"[{status_color}]{status_emoji} Experiment {status.lower()}: "
|
|
502
|
-
f"[bold]{self.name}[/bold] (project: {self.project})[/{status_color}]\n"
|
|
503
|
-
f"[dim]View results, statistics, and plots online at:[/dim] "
|
|
504
|
-
f"[link=https://dash.ml]https://dash.ml[/link]"
|
|
505
|
-
)
|
|
506
|
-
except ImportError:
|
|
507
|
-
# Fallback if rich is not available
|
|
508
|
-
print(
|
|
509
|
-
f"{status_emoji} Experiment {status.lower()}: {self.name} (project: {self.project})"
|
|
510
|
-
)
|
|
511
|
-
print("View results at: https://dash.ml")
|
|
512
|
-
|
|
513
|
-
except Exception as e:
|
|
514
|
-
# Log error but don't fail the close operation
|
|
515
|
-
print(f"Warning: Failed to update experiment status: {e}")
|
|
516
|
-
|
|
517
|
-
self._is_open = False
|
|
518
|
-
|
|
519
|
-
# Reset RUN for next experiment
|
|
520
|
-
# TODO: RUN._reset() - method doesn't exist
|
|
521
|
-
# RUN._reset()
|
|
522
|
-
|
|
523
|
-
@property
|
|
524
|
-
def run(self) -> RunManager:
|
|
525
|
-
"""
|
|
526
|
-
Get the RunManager for lifecycle operations.
|
|
527
|
-
|
|
528
|
-
Usage:
|
|
529
|
-
# Method calls
|
|
530
|
-
experiment.run.start()
|
|
531
|
-
experiment.run.complete()
|
|
532
310
|
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
311
|
+
self._is_open = True
|
|
312
|
+
return self
|
|
313
|
+
|
|
314
|
+
def _close(self, status: str = "COMPLETED"):
|
|
315
|
+
"""
|
|
316
|
+
Internal method to close the experiment and update status.
|
|
317
|
+
|
|
318
|
+
Args:
|
|
319
|
+
status: Status to set - "COMPLETED" (default), "FAILED", or "CANCELLED"
|
|
320
|
+
"""
|
|
321
|
+
if not self._is_open:
|
|
322
|
+
return
|
|
323
|
+
|
|
324
|
+
# Flush any pending writes
|
|
325
|
+
if self._storage:
|
|
326
|
+
self._storage.flush()
|
|
327
|
+
|
|
328
|
+
# Update experiment status in remote mode
|
|
329
|
+
if self._client and self._experiment_id:
|
|
330
|
+
try:
|
|
331
|
+
self._client.update_experiment_status(
|
|
332
|
+
experiment_id=self._experiment_id,
|
|
333
|
+
status=status
|
|
334
|
+
)
|
|
335
|
+
except Exception as e:
|
|
336
|
+
# Log error but don't fail the close operation
|
|
337
|
+
print(f"Warning: Failed to update experiment status: {e}")
|
|
338
|
+
|
|
339
|
+
self._is_open = False
|
|
340
|
+
|
|
341
|
+
# Reset RUN for next experiment
|
|
342
|
+
RUN._reset()
|
|
343
|
+
|
|
344
|
+
@property
|
|
345
|
+
def run(self) -> RunManager:
|
|
346
|
+
"""
|
|
347
|
+
Get the RunManager for lifecycle operations.
|
|
348
|
+
|
|
349
|
+
Usage:
|
|
350
|
+
# Method calls
|
|
351
|
+
experiment.run.start()
|
|
352
|
+
experiment.run.complete()
|
|
353
|
+
|
|
354
|
+
# Context manager
|
|
355
|
+
with Experiment(...).run as exp:
|
|
356
|
+
exp.log("Training...")
|
|
357
|
+
|
|
358
|
+
# Decorator
|
|
359
|
+
@experiment.run
|
|
360
|
+
def train(exp):
|
|
361
|
+
exp.log("Training...")
|
|
362
|
+
|
|
363
|
+
Returns:
|
|
364
|
+
RunManager instance
|
|
365
|
+
"""
|
|
366
|
+
return RunManager(self)
|
|
367
|
+
|
|
368
|
+
@property
|
|
369
|
+
def params(self) -> ParametersBuilder:
|
|
370
|
+
"""
|
|
371
|
+
Get a ParametersBuilder for parameter operations.
|
|
372
|
+
|
|
373
|
+
Usage:
|
|
374
|
+
# Set parameters
|
|
375
|
+
experiment.params.set(lr=0.001, batch_size=32)
|
|
376
|
+
|
|
377
|
+
# Get parameters
|
|
378
|
+
params = experiment.params.get()
|
|
379
|
+
|
|
380
|
+
Returns:
|
|
381
|
+
ParametersBuilder instance
|
|
382
|
+
|
|
383
|
+
Raises:
|
|
384
|
+
RuntimeError: If experiment is not open
|
|
385
|
+
"""
|
|
386
|
+
if not self._is_open:
|
|
387
|
+
raise RuntimeError(
|
|
388
|
+
"Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
|
|
389
|
+
"Example:\n"
|
|
390
|
+
" with dxp.run:\n"
|
|
391
|
+
" dxp.params.set(lr=0.001)"
|
|
392
|
+
)
|
|
536
393
|
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
394
|
+
return ParametersBuilder(self)
|
|
395
|
+
|
|
396
|
+
def log(
|
|
397
|
+
self,
|
|
398
|
+
message: Optional[str] = None,
|
|
399
|
+
level: Optional[str] = None,
|
|
400
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
401
|
+
**extra_metadata
|
|
402
|
+
) -> Optional[LogBuilder]:
|
|
403
|
+
"""
|
|
404
|
+
Create a log entry or return a LogBuilder for fluent API.
|
|
405
|
+
|
|
406
|
+
This method supports two styles:
|
|
407
|
+
|
|
408
|
+
1. Fluent style (no message provided):
|
|
409
|
+
Returns a LogBuilder that allows chaining with level methods.
|
|
410
|
+
|
|
411
|
+
Examples:
|
|
412
|
+
experiment.log(metadata={"epoch": 1}).info("Training started")
|
|
413
|
+
experiment.log().error("Failed", error_code=500)
|
|
414
|
+
|
|
415
|
+
2. Traditional style (message provided):
|
|
416
|
+
Writes the log immediately and returns None.
|
|
417
|
+
|
|
418
|
+
Examples:
|
|
419
|
+
experiment.log("Training started", level="info", epoch=1)
|
|
420
|
+
experiment.log("Training started") # Defaults to "info"
|
|
421
|
+
|
|
422
|
+
Args:
|
|
423
|
+
message: Optional log message (for traditional style)
|
|
424
|
+
level: Optional log level (for traditional style, defaults to "info")
|
|
425
|
+
metadata: Optional metadata dict
|
|
426
|
+
**extra_metadata: Additional metadata as keyword arguments
|
|
427
|
+
|
|
428
|
+
Returns:
|
|
429
|
+
LogBuilder if no message provided (fluent mode)
|
|
430
|
+
None if log was written directly (traditional mode)
|
|
431
|
+
|
|
432
|
+
Raises:
|
|
433
|
+
RuntimeError: If experiment is not open
|
|
434
|
+
ValueError: If log level is invalid
|
|
435
|
+
"""
|
|
436
|
+
if not self._is_open:
|
|
437
|
+
raise RuntimeError(
|
|
438
|
+
"Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
|
|
439
|
+
"Example:\n"
|
|
440
|
+
" with dxp.run:\n"
|
|
441
|
+
" dxp.log().info('Training started')"
|
|
442
|
+
)
|
|
541
443
|
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
444
|
+
# Fluent mode: return LogBuilder
|
|
445
|
+
if message is None:
|
|
446
|
+
combined_metadata = {**(metadata or {}), **extra_metadata}
|
|
447
|
+
return LogBuilder(self, combined_metadata if combined_metadata else None)
|
|
448
|
+
|
|
449
|
+
# Traditional mode: write immediately
|
|
450
|
+
level = level or LogLevel.INFO.value # Default to "info"
|
|
451
|
+
level = LogLevel.validate(level) # Validate level
|
|
452
|
+
|
|
453
|
+
combined_metadata = {**(metadata or {}), **extra_metadata}
|
|
454
|
+
self._write_log(
|
|
455
|
+
message=message,
|
|
456
|
+
level=level,
|
|
457
|
+
metadata=combined_metadata if combined_metadata else None,
|
|
458
|
+
timestamp=None
|
|
459
|
+
)
|
|
460
|
+
return None
|
|
461
|
+
|
|
462
|
+
def _write_log(
|
|
463
|
+
self,
|
|
464
|
+
message: str,
|
|
465
|
+
level: str,
|
|
466
|
+
metadata: Optional[Dict[str, Any]],
|
|
467
|
+
timestamp: Optional[datetime]
|
|
468
|
+
) -> None:
|
|
469
|
+
"""
|
|
470
|
+
Internal method to write a log entry immediately.
|
|
471
|
+
No buffering - writes directly to storage/remote AND stdout/stderr.
|
|
472
|
+
|
|
473
|
+
Args:
|
|
474
|
+
message: Log message
|
|
475
|
+
level: Log level (already validated)
|
|
476
|
+
metadata: Optional metadata dict
|
|
477
|
+
timestamp: Optional custom timestamp (defaults to now)
|
|
478
|
+
"""
|
|
479
|
+
log_entry = {
|
|
480
|
+
"timestamp": (timestamp or datetime.utcnow()).isoformat() + "Z",
|
|
481
|
+
"level": level,
|
|
482
|
+
"message": message,
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
if metadata:
|
|
486
|
+
log_entry["metadata"] = metadata
|
|
487
|
+
|
|
488
|
+
# Mirror to stdout/stderr before writing to storage
|
|
489
|
+
self._print_log(message, level, metadata)
|
|
490
|
+
|
|
491
|
+
# Write immediately (no buffering)
|
|
492
|
+
if self._client:
|
|
493
|
+
# Remote mode: send to API (wrapped in array for batch API)
|
|
494
|
+
self._client.create_log_entries(
|
|
495
|
+
experiment_id=self._experiment_id,
|
|
496
|
+
logs=[log_entry] # Single log in array
|
|
497
|
+
)
|
|
546
498
|
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
499
|
+
if self._storage:
|
|
500
|
+
# Local mode: write to file immediately
|
|
501
|
+
self._storage.write_log(
|
|
502
|
+
project=self.project,
|
|
503
|
+
experiment=self.name,
|
|
504
|
+
folder=self.folder,
|
|
505
|
+
message=log_entry["message"],
|
|
506
|
+
level=log_entry["level"],
|
|
507
|
+
metadata=log_entry.get("metadata"),
|
|
508
|
+
timestamp=log_entry["timestamp"]
|
|
509
|
+
)
|
|
551
510
|
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
511
|
+
def _print_log(
|
|
512
|
+
self,
|
|
513
|
+
message: str,
|
|
514
|
+
level: str,
|
|
515
|
+
metadata: Optional[Dict[str, Any]]
|
|
516
|
+
) -> None:
|
|
517
|
+
"""
|
|
518
|
+
Print log to stdout or stderr based on level.
|
|
519
|
+
|
|
520
|
+
ERROR and FATAL go to stderr, all others go to stdout.
|
|
521
|
+
|
|
522
|
+
Args:
|
|
523
|
+
message: Log message
|
|
524
|
+
level: Log level
|
|
525
|
+
metadata: Optional metadata dict
|
|
526
|
+
"""
|
|
527
|
+
import sys
|
|
528
|
+
|
|
529
|
+
# Format the log message
|
|
530
|
+
level_upper = level.upper()
|
|
531
|
+
|
|
532
|
+
# Build metadata string if present
|
|
533
|
+
metadata_str = ""
|
|
534
|
+
if metadata:
|
|
535
|
+
# Format metadata as key=value pairs
|
|
536
|
+
pairs = [f"{k}={v}" for k, v in metadata.items()]
|
|
537
|
+
metadata_str = f" [{', '.join(pairs)}]"
|
|
538
|
+
|
|
539
|
+
# Format: [LEVEL] message [key=value, ...]
|
|
540
|
+
formatted_message = f"[{level_upper}] {message}{metadata_str}"
|
|
541
|
+
|
|
542
|
+
# Route to stdout or stderr based on level
|
|
543
|
+
if level in ("error", "fatal"):
|
|
544
|
+
print(formatted_message, file=sys.stderr)
|
|
545
|
+
else:
|
|
546
|
+
print(formatted_message, file=sys.stdout)
|
|
547
|
+
|
|
548
|
+
@property
|
|
549
|
+
def files(self) -> FilesAccessor:
|
|
550
|
+
"""
|
|
551
|
+
Get a FilesAccessor for fluent file operations.
|
|
552
|
+
|
|
553
|
+
Returns:
|
|
554
|
+
FilesAccessor instance for chaining
|
|
555
|
+
|
|
556
|
+
Raises:
|
|
557
|
+
RuntimeError: If experiment is not open
|
|
558
|
+
|
|
559
|
+
Examples:
|
|
560
|
+
# Upload file
|
|
561
|
+
experiment.files("checkpoints").save(net, to="checkpoint.pt")
|
|
562
|
+
|
|
563
|
+
# List files
|
|
564
|
+
files = experiment.files("/some/location").list()
|
|
565
|
+
files = experiment.files("/models").list()
|
|
566
|
+
|
|
567
|
+
# Download file
|
|
568
|
+
experiment.files("some.text").download()
|
|
569
|
+
experiment.files("some.text").download(to="./model.pt")
|
|
570
|
+
|
|
571
|
+
# Download Files via Glob Pattern
|
|
572
|
+
file_paths = experiment.files("images").list("*.png")
|
|
573
|
+
experiment.files("images").download("*.png")
|
|
574
|
+
|
|
575
|
+
# This is equivalent to downloading to a directory
|
|
576
|
+
experiment.files.download("images/*.png", to="local_images")
|
|
577
|
+
|
|
578
|
+
# Delete files
|
|
579
|
+
experiment.files("some.text").delete()
|
|
580
|
+
experiment.files.delete("some.text")
|
|
581
|
+
|
|
582
|
+
# Specific File Types
|
|
583
|
+
dxp.files.save_text("content", to="view.yaml")
|
|
584
|
+
dxp.files.save_json(dict(hey="yo"), to="config.json")
|
|
585
|
+
dxp.files.save_blob(b"xxx", to="data.bin")
|
|
586
|
+
"""
|
|
587
|
+
if not self._is_open:
|
|
588
|
+
raise RuntimeError(
|
|
589
|
+
"Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
|
|
590
|
+
"Example:\n"
|
|
591
|
+
" with dxp.run:\n"
|
|
592
|
+
" dxp.files('path').save()"
|
|
593
|
+
)
|
|
555
594
|
|
|
556
|
-
|
|
557
|
-
params = experiment.params.get()
|
|
595
|
+
return FilesAccessor(self)
|
|
558
596
|
|
|
559
|
-
|
|
560
|
-
|
|
597
|
+
def bindrs(self, bindr_name: str) -> BindrsBuilder:
|
|
598
|
+
"""
|
|
599
|
+
Get a BindrsBuilder for working with file collections (bindrs).
|
|
561
600
|
|
|
562
|
-
|
|
563
|
-
RuntimeError: If experiment is not open
|
|
564
|
-
"""
|
|
565
|
-
if not self._is_open:
|
|
566
|
-
raise RuntimeError(
|
|
567
|
-
"Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
|
|
568
|
-
"Example:\n"
|
|
569
|
-
" with dxp.run:\n"
|
|
570
|
-
" dxp.params.set(lr=0.001)"
|
|
571
|
-
)
|
|
572
|
-
|
|
573
|
-
return ParametersBuilder(self)
|
|
574
|
-
|
|
575
|
-
@property
|
|
576
|
-
def logs(self) -> LogBuilder:
|
|
577
|
-
"""
|
|
578
|
-
Get a LogBuilder for fluent-style logging.
|
|
601
|
+
Bindrs are collections of files that can span multiple prefixes.
|
|
579
602
|
|
|
580
|
-
|
|
581
|
-
|
|
603
|
+
Args:
|
|
604
|
+
bindr_name: Name of the bindr (collection)
|
|
582
605
|
|
|
583
|
-
|
|
584
|
-
|
|
606
|
+
Returns:
|
|
607
|
+
BindrsBuilder instance for chaining
|
|
585
608
|
|
|
586
|
-
|
|
587
|
-
|
|
609
|
+
Raises:
|
|
610
|
+
RuntimeError: If experiment is not open
|
|
588
611
|
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
exp.logs.warn("GPU memory low", memory_available="1GB")
|
|
593
|
-
exp.logs.debug("Debug info", step=100)
|
|
594
|
-
"""
|
|
595
|
-
if not self._is_open:
|
|
596
|
-
raise RuntimeError(
|
|
597
|
-
"Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
|
|
598
|
-
"Example:\n"
|
|
599
|
-
" with dxp.run:\n"
|
|
600
|
-
" dxp.logs.info('Training started')"
|
|
601
|
-
)
|
|
602
|
-
|
|
603
|
-
return LogBuilder(self, metadata=None)
|
|
604
|
-
|
|
605
|
-
def log(
|
|
606
|
-
self,
|
|
607
|
-
message: Optional[str] = None,
|
|
608
|
-
level: Optional[str] = None,
|
|
609
|
-
metadata: Optional[Dict[str, Any]] = None,
|
|
610
|
-
**extra_metadata,
|
|
611
|
-
) -> Optional[LogBuilder]:
|
|
612
|
-
"""
|
|
613
|
-
Create a log entry (traditional style).
|
|
612
|
+
Examples:
|
|
613
|
+
# List files in a bindr
|
|
614
|
+
file_paths = experiment.bindrs("some-bindr").list()
|
|
614
615
|
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
616
|
+
Note:
|
|
617
|
+
This is a placeholder for future bindr functionality.
|
|
618
|
+
"""
|
|
619
|
+
if not self._is_open:
|
|
620
|
+
raise RuntimeError(
|
|
621
|
+
"Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
|
|
622
|
+
"Example:\n"
|
|
623
|
+
" with dxp.run:\n"
|
|
624
|
+
" files = dxp.bindrs('my-bindr').list()"
|
|
625
|
+
)
|
|
618
626
|
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
627
|
+
return BindrsBuilder(self, bindr_name)
|
|
628
|
+
|
|
629
|
+
def _upload_file(
|
|
630
|
+
self,
|
|
631
|
+
file_path: str,
|
|
632
|
+
prefix: str,
|
|
633
|
+
filename: str,
|
|
634
|
+
description: Optional[str],
|
|
635
|
+
tags: Optional[List[str]],
|
|
636
|
+
metadata: Optional[Dict[str, Any]],
|
|
637
|
+
checksum: str,
|
|
638
|
+
content_type: str,
|
|
639
|
+
size_bytes: int
|
|
640
|
+
) -> Dict[str, Any]:
|
|
641
|
+
"""
|
|
642
|
+
Internal method to upload a file.
|
|
643
|
+
|
|
644
|
+
Args:
|
|
645
|
+
file_path: Local file path
|
|
646
|
+
prefix: Logical path prefix
|
|
647
|
+
filename: Original filename
|
|
648
|
+
description: Optional description
|
|
649
|
+
tags: Optional tags
|
|
650
|
+
metadata: Optional metadata
|
|
651
|
+
checksum: SHA256 checksum
|
|
652
|
+
content_type: MIME type
|
|
653
|
+
size_bytes: File size in bytes
|
|
654
|
+
|
|
655
|
+
Returns:
|
|
656
|
+
File metadata dict
|
|
657
|
+
"""
|
|
658
|
+
result = None
|
|
659
|
+
|
|
660
|
+
if self._client:
|
|
661
|
+
# Remote mode: upload to API
|
|
662
|
+
result = self._client.upload_file(
|
|
663
|
+
experiment_id=self._experiment_id,
|
|
664
|
+
file_path=file_path,
|
|
665
|
+
prefix=prefix,
|
|
666
|
+
filename=filename,
|
|
667
|
+
description=description,
|
|
668
|
+
tags=tags,
|
|
669
|
+
metadata=metadata,
|
|
670
|
+
checksum=checksum,
|
|
671
|
+
content_type=content_type,
|
|
672
|
+
size_bytes=size_bytes
|
|
673
|
+
)
|
|
622
674
|
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
675
|
+
if self._storage:
|
|
676
|
+
# Local mode: copy to local storage
|
|
677
|
+
result = self._storage.write_file(
|
|
678
|
+
project=self.project,
|
|
679
|
+
experiment=self.name,
|
|
680
|
+
folder=self.folder,
|
|
681
|
+
file_path=file_path,
|
|
682
|
+
prefix=prefix,
|
|
683
|
+
filename=filename,
|
|
684
|
+
description=description,
|
|
685
|
+
tags=tags,
|
|
686
|
+
metadata=metadata,
|
|
687
|
+
checksum=checksum,
|
|
688
|
+
content_type=content_type,
|
|
689
|
+
size_bytes=size_bytes
|
|
690
|
+
)
|
|
626
691
|
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
692
|
+
return result
|
|
693
|
+
|
|
694
|
+
def _list_files(
|
|
695
|
+
self,
|
|
696
|
+
prefix: Optional[str] = None,
|
|
697
|
+
tags: Optional[List[str]] = None
|
|
698
|
+
) -> List[Dict[str, Any]]:
|
|
699
|
+
"""
|
|
700
|
+
Internal method to list files.
|
|
701
|
+
|
|
702
|
+
Args:
|
|
703
|
+
prefix: Optional prefix filter
|
|
704
|
+
tags: Optional tags filter
|
|
705
|
+
|
|
706
|
+
Returns:
|
|
707
|
+
List of file metadata dicts
|
|
708
|
+
"""
|
|
709
|
+
files = []
|
|
710
|
+
|
|
711
|
+
if self._client:
|
|
712
|
+
# Remote mode: fetch from API
|
|
713
|
+
files = self._client.list_files(
|
|
714
|
+
experiment_id=self._experiment_id,
|
|
715
|
+
prefix=prefix,
|
|
716
|
+
tags=tags
|
|
717
|
+
)
|
|
632
718
|
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
719
|
+
if self._storage:
|
|
720
|
+
# Local mode: read from metadata file
|
|
721
|
+
files = self._storage.list_files(
|
|
722
|
+
project=self.project,
|
|
723
|
+
experiment=self.name,
|
|
724
|
+
prefix=prefix,
|
|
725
|
+
tags=tags
|
|
726
|
+
)
|
|
636
727
|
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
"
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
"
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
# Traditional mode: write immediately
|
|
662
|
-
level = level or LogLevel.INFO.value # Default to "info"
|
|
663
|
-
level = LogLevel.validate(level) # Validate level
|
|
664
|
-
|
|
665
|
-
combined_metadata = {**(metadata or {}), **extra_metadata}
|
|
666
|
-
self._write_log(
|
|
667
|
-
message=message,
|
|
668
|
-
level=level,
|
|
669
|
-
metadata=combined_metadata if combined_metadata else None,
|
|
670
|
-
timestamp=None,
|
|
671
|
-
)
|
|
672
|
-
return None
|
|
673
|
-
|
|
674
|
-
def _write_log(
|
|
675
|
-
self,
|
|
676
|
-
message: str,
|
|
677
|
-
level: str,
|
|
678
|
-
metadata: Optional[Dict[str, Any]],
|
|
679
|
-
timestamp: Optional[datetime],
|
|
680
|
-
) -> None:
|
|
681
|
-
"""
|
|
682
|
-
Internal method to write a log entry immediately.
|
|
683
|
-
No buffering - writes directly to storage/remote AND stdout/stderr.
|
|
684
|
-
|
|
685
|
-
Args:
|
|
686
|
-
message: Log message
|
|
687
|
-
level: Log level (already validated)
|
|
688
|
-
metadata: Optional metadata dict
|
|
689
|
-
timestamp: Optional custom timestamp (defaults to now)
|
|
690
|
-
"""
|
|
691
|
-
log_entry = {
|
|
692
|
-
"timestamp": (timestamp or datetime.utcnow()).isoformat() + "Z",
|
|
693
|
-
"level": level,
|
|
694
|
-
"message": message,
|
|
695
|
-
}
|
|
696
|
-
|
|
697
|
-
if metadata:
|
|
698
|
-
log_entry["metadata"] = metadata
|
|
699
|
-
|
|
700
|
-
# Mirror to stdout/stderr before writing to storage
|
|
701
|
-
self._print_log(message, level, metadata)
|
|
702
|
-
|
|
703
|
-
# Write immediately (no buffering)
|
|
704
|
-
if self._client:
|
|
705
|
-
# Remote mode: send to API (wrapped in array for batch API)
|
|
706
|
-
self._client.create_log_entries(
|
|
707
|
-
experiment_id=self._experiment_id,
|
|
708
|
-
logs=[log_entry], # Single log in array
|
|
709
|
-
)
|
|
710
|
-
|
|
711
|
-
if self._storage:
|
|
712
|
-
# Local mode: write to file immediately
|
|
713
|
-
self._storage.write_log(
|
|
714
|
-
owner=self.owner,
|
|
715
|
-
project=self.project,
|
|
716
|
-
prefix=self._folder_path,
|
|
717
|
-
message=log_entry["message"],
|
|
718
|
-
level=log_entry["level"],
|
|
719
|
-
metadata=log_entry.get("metadata"),
|
|
720
|
-
timestamp=log_entry["timestamp"],
|
|
721
|
-
)
|
|
722
|
-
|
|
723
|
-
def _print_log(
|
|
724
|
-
self, message: str, level: str, metadata: Optional[Dict[str, Any]]
|
|
725
|
-
) -> None:
|
|
726
|
-
"""
|
|
727
|
-
Print log to stdout or stderr based on level.
|
|
728
|
+
return files
|
|
729
|
+
|
|
730
|
+
def _download_file(
|
|
731
|
+
self,
|
|
732
|
+
file_id: str,
|
|
733
|
+
dest_path: Optional[str] = None
|
|
734
|
+
) -> str:
|
|
735
|
+
"""
|
|
736
|
+
Internal method to download a file.
|
|
737
|
+
|
|
738
|
+
Args:
|
|
739
|
+
file_id: File ID
|
|
740
|
+
dest_path: Optional destination path (defaults to original filename)
|
|
741
|
+
|
|
742
|
+
Returns:
|
|
743
|
+
Path to downloaded file
|
|
744
|
+
"""
|
|
745
|
+
if self._client:
|
|
746
|
+
# Remote mode: download from API
|
|
747
|
+
return self._client.download_file(
|
|
748
|
+
experiment_id=self._experiment_id,
|
|
749
|
+
file_id=file_id,
|
|
750
|
+
dest_path=dest_path
|
|
751
|
+
)
|
|
728
752
|
|
|
729
|
-
|
|
753
|
+
if self._storage:
|
|
754
|
+
# Local mode: copy from local storage
|
|
755
|
+
return self._storage.read_file(
|
|
756
|
+
project=self.project,
|
|
757
|
+
experiment=self.name,
|
|
758
|
+
file_id=file_id,
|
|
759
|
+
dest_path=dest_path
|
|
760
|
+
)
|
|
730
761
|
|
|
731
|
-
|
|
732
|
-
message: Log message
|
|
733
|
-
level: Log level
|
|
734
|
-
metadata: Optional metadata dict
|
|
735
|
-
"""
|
|
736
|
-
import sys
|
|
762
|
+
raise RuntimeError("No client or storage configured")
|
|
737
763
|
|
|
738
|
-
|
|
739
|
-
|
|
764
|
+
def _delete_file(self, file_id: str) -> Dict[str, Any]:
|
|
765
|
+
"""
|
|
766
|
+
Internal method to delete a file.
|
|
740
767
|
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
if metadata:
|
|
744
|
-
# Format metadata as key=value pairs
|
|
745
|
-
pairs = [f"{k}={v}" for k, v in metadata.items()]
|
|
746
|
-
metadata_str = f" [{', '.join(pairs)}]"
|
|
768
|
+
Args:
|
|
769
|
+
file_id: File ID
|
|
747
770
|
|
|
748
|
-
|
|
749
|
-
|
|
771
|
+
Returns:
|
|
772
|
+
Dict with id and deletedAt
|
|
773
|
+
"""
|
|
774
|
+
result = None
|
|
750
775
|
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
776
|
+
if self._client:
|
|
777
|
+
# Remote mode: delete via API
|
|
778
|
+
result = self._client.delete_file(
|
|
779
|
+
experiment_id=self._experiment_id,
|
|
780
|
+
file_id=file_id
|
|
781
|
+
)
|
|
756
782
|
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
783
|
+
if self._storage:
|
|
784
|
+
# Local mode: soft delete in metadata
|
|
785
|
+
result = self._storage.delete_file(
|
|
786
|
+
project=self.project,
|
|
787
|
+
experiment=self.name,
|
|
788
|
+
file_id=file_id
|
|
789
|
+
)
|
|
761
790
|
|
|
762
|
-
|
|
763
|
-
|
|
791
|
+
return result
|
|
792
|
+
|
|
793
|
+
def _update_file(
|
|
794
|
+
self,
|
|
795
|
+
file_id: str,
|
|
796
|
+
description: Optional[str],
|
|
797
|
+
tags: Optional[List[str]],
|
|
798
|
+
metadata: Optional[Dict[str, Any]]
|
|
799
|
+
) -> Dict[str, Any]:
|
|
800
|
+
"""
|
|
801
|
+
Internal method to update file metadata.
|
|
802
|
+
|
|
803
|
+
Args:
|
|
804
|
+
file_id: File ID
|
|
805
|
+
description: Optional description
|
|
806
|
+
tags: Optional tags
|
|
807
|
+
metadata: Optional metadata
|
|
808
|
+
|
|
809
|
+
Returns:
|
|
810
|
+
Updated file metadata dict
|
|
811
|
+
"""
|
|
812
|
+
result = None
|
|
813
|
+
|
|
814
|
+
if self._client:
|
|
815
|
+
# Remote mode: update via API
|
|
816
|
+
result = self._client.update_file(
|
|
817
|
+
experiment_id=self._experiment_id,
|
|
818
|
+
file_id=file_id,
|
|
819
|
+
description=description,
|
|
820
|
+
tags=tags,
|
|
821
|
+
metadata=metadata
|
|
822
|
+
)
|
|
764
823
|
|
|
765
|
-
|
|
766
|
-
|
|
824
|
+
if self._storage:
|
|
825
|
+
# Local mode: update in metadata file
|
|
826
|
+
result = self._storage.update_file_metadata(
|
|
827
|
+
project=self.project,
|
|
828
|
+
experiment=self.name,
|
|
829
|
+
file_id=file_id,
|
|
830
|
+
description=description,
|
|
831
|
+
tags=tags,
|
|
832
|
+
metadata=metadata
|
|
833
|
+
)
|
|
767
834
|
|
|
768
|
-
|
|
769
|
-
# Upload file - supports flexible syntax
|
|
770
|
-
experiment.files("checkpoints").upload("./model.pt", to="checkpoint.pt")
|
|
771
|
-
experiment.files(prefix="checkpoints").upload("./model.pt")
|
|
772
|
-
experiment.files().upload("./model.pt", to="models/model.pt") # root
|
|
835
|
+
return result
|
|
773
836
|
|
|
774
|
-
# List files
|
|
775
|
-
files = experiment.files("/some/location").list()
|
|
776
|
-
files = experiment.files("/models").list()
|
|
777
837
|
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
838
|
+
def _write_params(self, flattened_params: Dict[str, Any]) -> None:
|
|
839
|
+
"""
|
|
840
|
+
Internal method to write/merge parameters.
|
|
781
841
|
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
842
|
+
Args:
|
|
843
|
+
flattened_params: Already-flattened parameter dict with dot notation
|
|
844
|
+
"""
|
|
845
|
+
if self._client:
|
|
846
|
+
# Remote mode: send to API
|
|
847
|
+
self._client.set_parameters(
|
|
848
|
+
experiment_id=self._experiment_id,
|
|
849
|
+
data=flattened_params
|
|
850
|
+
)
|
|
785
851
|
|
|
786
|
-
|
|
787
|
-
|
|
852
|
+
if self._storage:
|
|
853
|
+
# Local mode: write to file
|
|
854
|
+
self._storage.write_parameters(
|
|
855
|
+
project=self.project,
|
|
856
|
+
experiment=self.name,
|
|
857
|
+
folder=self.folder,
|
|
858
|
+
data=flattened_params
|
|
859
|
+
)
|
|
788
860
|
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
861
|
+
def _read_params(self) -> Optional[Dict[str, Any]]:
|
|
862
|
+
"""
|
|
863
|
+
Internal method to read parameters.
|
|
792
864
|
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
"""
|
|
798
|
-
if not self._is_open:
|
|
799
|
-
raise RuntimeError(
|
|
800
|
-
"Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
|
|
801
|
-
"Example:\n"
|
|
802
|
-
" with dxp.run:\n"
|
|
803
|
-
" dxp.files('path').upload()"
|
|
804
|
-
)
|
|
865
|
+
Returns:
|
|
866
|
+
Flattened parameters dict, or None if no parameters exist
|
|
867
|
+
"""
|
|
868
|
+
params = None
|
|
805
869
|
|
|
806
|
-
|
|
870
|
+
if self._client:
|
|
871
|
+
# Remote mode: fetch from API
|
|
872
|
+
try:
|
|
873
|
+
params = self._client.get_parameters(experiment_id=self._experiment_id)
|
|
874
|
+
except Exception:
|
|
875
|
+
# Parameters don't exist yet
|
|
876
|
+
params = None
|
|
877
|
+
|
|
878
|
+
if self._storage:
|
|
879
|
+
# Local mode: read from file
|
|
880
|
+
params = self._storage.read_parameters(
|
|
881
|
+
project=self.project,
|
|
882
|
+
experiment=self.name
|
|
883
|
+
)
|
|
807
884
|
|
|
808
|
-
|
|
809
|
-
"""
|
|
810
|
-
Get a BindrsBuilder for working with file collections (bindrs).
|
|
885
|
+
return params
|
|
811
886
|
|
|
812
|
-
|
|
887
|
+
@property
|
|
888
|
+
def metrics(self) -> 'MetricsManager':
|
|
889
|
+
"""
|
|
890
|
+
Get a MetricsManager for metric operations.
|
|
813
891
|
|
|
814
|
-
|
|
815
|
-
|
|
892
|
+
Supports two usage patterns:
|
|
893
|
+
1. Named: experiment.metrics("loss").append(value=0.5, step=1)
|
|
894
|
+
2. Unnamed: experiment.metrics.append(name="loss", value=0.5, step=1)
|
|
816
895
|
|
|
817
|
-
|
|
818
|
-
|
|
896
|
+
Returns:
|
|
897
|
+
MetricsManager instance
|
|
819
898
|
|
|
820
|
-
|
|
821
|
-
|
|
899
|
+
Raises:
|
|
900
|
+
RuntimeError: If experiment is not open
|
|
822
901
|
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
902
|
+
Examples:
|
|
903
|
+
# Named metric
|
|
904
|
+
experiment.metrics("train_loss").append(value=0.5, step=100)
|
|
826
905
|
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
"""
|
|
830
|
-
if not self._is_open:
|
|
831
|
-
raise RuntimeError(
|
|
832
|
-
"Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
|
|
833
|
-
"Example:\n"
|
|
834
|
-
" with dxp.run:\n"
|
|
835
|
-
" files = dxp.bindrs('my-bindr').list()"
|
|
836
|
-
)
|
|
837
|
-
|
|
838
|
-
return BindrsBuilder(self, bindr_name)
|
|
839
|
-
|
|
840
|
-
def _upload_file(
|
|
841
|
-
self,
|
|
842
|
-
file_path: str,
|
|
843
|
-
prefix: str,
|
|
844
|
-
filename: str,
|
|
845
|
-
description: Optional[str],
|
|
846
|
-
tags: Optional[List[str]],
|
|
847
|
-
metadata: Optional[Dict[str, Any]],
|
|
848
|
-
checksum: str,
|
|
849
|
-
content_type: str,
|
|
850
|
-
size_bytes: int,
|
|
851
|
-
) -> Dict[str, Any]:
|
|
852
|
-
"""
|
|
853
|
-
Internal method to upload a file.
|
|
854
|
-
|
|
855
|
-
Args:
|
|
856
|
-
file_path: Local file path
|
|
857
|
-
prefix: Logical path prefix
|
|
858
|
-
filename: Original filename
|
|
859
|
-
description: Optional description
|
|
860
|
-
tags: Optional tags
|
|
861
|
-
metadata: Optional metadata
|
|
862
|
-
checksum: SHA256 checksum
|
|
863
|
-
content_type: MIME type
|
|
864
|
-
size_bytes: File size in bytes
|
|
865
|
-
|
|
866
|
-
Returns:
|
|
867
|
-
File metadata dict
|
|
868
|
-
"""
|
|
869
|
-
result = None
|
|
870
|
-
|
|
871
|
-
if self._client:
|
|
872
|
-
# Remote mode: upload to API
|
|
873
|
-
result = self._client.upload_file(
|
|
874
|
-
experiment_id=self._experiment_id,
|
|
875
|
-
file_path=file_path,
|
|
876
|
-
prefix=prefix,
|
|
877
|
-
filename=filename,
|
|
878
|
-
description=description,
|
|
879
|
-
tags=tags,
|
|
880
|
-
metadata=metadata,
|
|
881
|
-
checksum=checksum,
|
|
882
|
-
content_type=content_type,
|
|
883
|
-
size_bytes=size_bytes,
|
|
884
|
-
)
|
|
885
|
-
|
|
886
|
-
if self._storage:
|
|
887
|
-
# Local mode: copy to local storage
|
|
888
|
-
result = self._storage.write_file(
|
|
889
|
-
owner=self.owner,
|
|
890
|
-
project=self.project,
|
|
891
|
-
prefix=self._folder_path,
|
|
892
|
-
file_path=file_path,
|
|
893
|
-
path=prefix,
|
|
894
|
-
filename=filename,
|
|
895
|
-
description=description,
|
|
896
|
-
tags=tags,
|
|
897
|
-
metadata=metadata,
|
|
898
|
-
checksum=checksum,
|
|
899
|
-
content_type=content_type,
|
|
900
|
-
size_bytes=size_bytes,
|
|
901
|
-
)
|
|
902
|
-
|
|
903
|
-
return result
|
|
904
|
-
|
|
905
|
-
def _list_files(
|
|
906
|
-
self, prefix: Optional[str] = None, tags: Optional[List[str]] = None
|
|
907
|
-
) -> List[Dict[str, Any]]:
|
|
908
|
-
"""
|
|
909
|
-
Internal method to list files.
|
|
906
|
+
# Unnamed (name in append call)
|
|
907
|
+
experiment.metrics.append(name="train_loss", value=0.5, step=100)
|
|
910
908
|
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
909
|
+
# Append batch
|
|
910
|
+
experiment.metrics("metrics").append_batch([
|
|
911
|
+
{"loss": 0.5, "acc": 0.8, "step": 1},
|
|
912
|
+
{"loss": 0.4, "acc": 0.85, "step": 2}
|
|
913
|
+
])
|
|
914
914
|
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
"""
|
|
918
|
-
files = []
|
|
919
|
-
|
|
920
|
-
if self._client:
|
|
921
|
-
# Remote mode: fetch from API
|
|
922
|
-
files = self._client.list_files(
|
|
923
|
-
experiment_id=self._experiment_id, prefix=prefix, tags=tags
|
|
924
|
-
)
|
|
925
|
-
|
|
926
|
-
if self._storage:
|
|
927
|
-
# Local mode: read from metadata file
|
|
928
|
-
files = self._storage.list_files(
|
|
929
|
-
owner=self.owner,
|
|
930
|
-
project=self.project,
|
|
931
|
-
prefix=self._folder_path,
|
|
932
|
-
path_prefix=prefix,
|
|
933
|
-
tags=tags,
|
|
934
|
-
)
|
|
935
|
-
|
|
936
|
-
return files
|
|
937
|
-
|
|
938
|
-
def _download_file(self, file_id: str, dest_path: Optional[str] = None) -> str:
|
|
939
|
-
"""
|
|
940
|
-
Internal method to download a file.
|
|
915
|
+
# Read data
|
|
916
|
+
data = experiment.metrics("train_loss").read(start_index=0, limit=100)
|
|
941
917
|
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
918
|
+
# Get statistics
|
|
919
|
+
stats = experiment.metrics("train_loss").stats()
|
|
920
|
+
"""
|
|
921
|
+
from .metric import MetricsManager
|
|
945
922
|
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
return self._client.download_file(
|
|
952
|
-
experiment_id=self._experiment_id, file_id=file_id, dest_path=dest_path
|
|
953
|
-
)
|
|
954
|
-
|
|
955
|
-
if self._storage:
|
|
956
|
-
# Local mode: copy from local storage
|
|
957
|
-
return self._storage.read_file(
|
|
958
|
-
owner=self.owner,
|
|
959
|
-
project=self.project,
|
|
960
|
-
prefix=self._folder_path,
|
|
961
|
-
file_id=file_id,
|
|
962
|
-
dest_path=dest_path,
|
|
963
|
-
)
|
|
964
|
-
|
|
965
|
-
raise RuntimeError("No client or storage configured")
|
|
966
|
-
|
|
967
|
-
def _delete_file(self, file_id: str) -> Dict[str, Any]:
|
|
968
|
-
"""
|
|
969
|
-
Internal method to delete a file.
|
|
923
|
+
if not self._is_open:
|
|
924
|
+
raise RuntimeError(
|
|
925
|
+
"Cannot use metrics on closed experiment. "
|
|
926
|
+
"Use 'with Experiment(...).run as experiment:' or call experiment.run.start() first."
|
|
927
|
+
)
|
|
970
928
|
|
|
971
|
-
|
|
972
|
-
|
|
929
|
+
# Cache the MetricsManager instance to preserve MetricBuilder cache across calls
|
|
930
|
+
if self._metrics_manager is None:
|
|
931
|
+
self._metrics_manager = MetricsManager(self)
|
|
932
|
+
return self._metrics_manager
|
|
933
|
+
|
|
934
|
+
def _append_to_metric(
|
|
935
|
+
self,
|
|
936
|
+
name: Optional[str],
|
|
937
|
+
data: Dict[str, Any],
|
|
938
|
+
description: Optional[str],
|
|
939
|
+
tags: Optional[List[str]],
|
|
940
|
+
metadata: Optional[Dict[str, Any]]
|
|
941
|
+
) -> Dict[str, Any]:
|
|
942
|
+
"""
|
|
943
|
+
Internal method to append a single data point to a metric.
|
|
944
|
+
|
|
945
|
+
Args:
|
|
946
|
+
name: Metric name (can be None for unnamed metrics)
|
|
947
|
+
data: Data point (flexible schema)
|
|
948
|
+
description: Optional metric description
|
|
949
|
+
tags: Optional tags
|
|
950
|
+
metadata: Optional metadata
|
|
951
|
+
|
|
952
|
+
Returns:
|
|
953
|
+
Dict with metricId, index, bufferedDataPoints, chunkSize
|
|
954
|
+
"""
|
|
955
|
+
result = None
|
|
956
|
+
|
|
957
|
+
if self._client:
|
|
958
|
+
# Remote mode: append via API
|
|
959
|
+
result = self._client.append_to_metric(
|
|
960
|
+
experiment_id=self._experiment_id,
|
|
961
|
+
metric_name=name,
|
|
962
|
+
data=data,
|
|
963
|
+
description=description,
|
|
964
|
+
tags=tags,
|
|
965
|
+
metadata=metadata
|
|
966
|
+
)
|
|
973
967
|
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
# Local mode: soft delete in metadata
|
|
987
|
-
result = self._storage.delete_file(
|
|
988
|
-
owner=self.owner,
|
|
989
|
-
project=self.project,
|
|
990
|
-
prefix=self._folder_path,
|
|
991
|
-
file_id=file_id,
|
|
992
|
-
)
|
|
993
|
-
|
|
994
|
-
return result
|
|
995
|
-
|
|
996
|
-
def _update_file(
|
|
997
|
-
self,
|
|
998
|
-
file_id: str,
|
|
999
|
-
description: Optional[str],
|
|
1000
|
-
tags: Optional[List[str]],
|
|
1001
|
-
metadata: Optional[Dict[str, Any]],
|
|
1002
|
-
) -> Dict[str, Any]:
|
|
1003
|
-
"""
|
|
1004
|
-
Internal method to update file metadata.
|
|
968
|
+
if self._storage:
|
|
969
|
+
# Local mode: append to local storage
|
|
970
|
+
result = self._storage.append_to_metric(
|
|
971
|
+
project=self.project,
|
|
972
|
+
experiment=self.name,
|
|
973
|
+
folder=self.folder,
|
|
974
|
+
metric_name=name,
|
|
975
|
+
data=data,
|
|
976
|
+
description=description,
|
|
977
|
+
tags=tags,
|
|
978
|
+
metadata=metadata
|
|
979
|
+
)
|
|
1005
980
|
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
981
|
+
return result
|
|
982
|
+
|
|
983
|
+
def _append_batch_to_metric(
|
|
984
|
+
self,
|
|
985
|
+
name: Optional[str],
|
|
986
|
+
data_points: List[Dict[str, Any]],
|
|
987
|
+
description: Optional[str],
|
|
988
|
+
tags: Optional[List[str]],
|
|
989
|
+
metadata: Optional[Dict[str, Any]]
|
|
990
|
+
) -> Dict[str, Any]:
|
|
991
|
+
"""
|
|
992
|
+
Internal method to append multiple data points to a metric.
|
|
993
|
+
|
|
994
|
+
Args:
|
|
995
|
+
name: Metric name (can be None for unnamed metrics)
|
|
996
|
+
data_points: List of data points
|
|
997
|
+
description: Optional metric description
|
|
998
|
+
tags: Optional tags
|
|
999
|
+
metadata: Optional metadata
|
|
1000
|
+
|
|
1001
|
+
Returns:
|
|
1002
|
+
Dict with metricId, startIndex, endIndex, count
|
|
1003
|
+
"""
|
|
1004
|
+
result = None
|
|
1005
|
+
|
|
1006
|
+
if self._client:
|
|
1007
|
+
# Remote mode: append batch via API
|
|
1008
|
+
result = self._client.append_batch_to_metric(
|
|
1009
|
+
experiment_id=self._experiment_id,
|
|
1010
|
+
metric_name=name,
|
|
1011
|
+
data_points=data_points,
|
|
1012
|
+
description=description,
|
|
1013
|
+
tags=tags,
|
|
1014
|
+
metadata=metadata
|
|
1015
|
+
)
|
|
1011
1016
|
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
tags=tags,
|
|
1024
|
-
metadata=metadata,
|
|
1025
|
-
)
|
|
1026
|
-
|
|
1027
|
-
if self._storage:
|
|
1028
|
-
# Local mode: update in metadata file
|
|
1029
|
-
result = self._storage.update_file_metadata(
|
|
1030
|
-
owner=self.owner,
|
|
1031
|
-
project=self.project,
|
|
1032
|
-
prefix=self._folder_path,
|
|
1033
|
-
file_id=file_id,
|
|
1034
|
-
description=description,
|
|
1035
|
-
tags=tags,
|
|
1036
|
-
metadata=metadata,
|
|
1037
|
-
)
|
|
1038
|
-
|
|
1039
|
-
return result
|
|
1040
|
-
|
|
1041
|
-
def _write_params(self, flattened_params: Dict[str, Any]) -> None:
|
|
1042
|
-
"""
|
|
1043
|
-
Internal method to write/merge parameters.
|
|
1017
|
+
if self._storage:
|
|
1018
|
+
# Local mode: append batch to local storage
|
|
1019
|
+
result = self._storage.append_batch_to_metric(
|
|
1020
|
+
project=self.project,
|
|
1021
|
+
experiment=self.name,
|
|
1022
|
+
metric_name=name,
|
|
1023
|
+
data_points=data_points,
|
|
1024
|
+
description=description,
|
|
1025
|
+
tags=tags,
|
|
1026
|
+
metadata=metadata
|
|
1027
|
+
)
|
|
1044
1028
|
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1029
|
+
return result
|
|
1030
|
+
|
|
1031
|
+
def _read_metric_data(
|
|
1032
|
+
self,
|
|
1033
|
+
name: str,
|
|
1034
|
+
start_index: int,
|
|
1035
|
+
limit: int
|
|
1036
|
+
) -> Dict[str, Any]:
|
|
1037
|
+
"""
|
|
1038
|
+
Internal method to read data points from a metric.
|
|
1039
|
+
|
|
1040
|
+
Args:
|
|
1041
|
+
name: Metric name
|
|
1042
|
+
start_index: Starting index
|
|
1043
|
+
limit: Max points to read
|
|
1044
|
+
|
|
1045
|
+
Returns:
|
|
1046
|
+
Dict with data, startIndex, endIndex, total, hasMore
|
|
1047
|
+
"""
|
|
1048
|
+
result = None
|
|
1049
|
+
|
|
1050
|
+
if self._client:
|
|
1051
|
+
# Remote mode: read via API
|
|
1052
|
+
result = self._client.read_metric_data(
|
|
1053
|
+
experiment_id=self._experiment_id,
|
|
1054
|
+
metric_name=name,
|
|
1055
|
+
start_index=start_index,
|
|
1056
|
+
limit=limit
|
|
1057
|
+
)
|
|
1066
1058
|
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
except Exception:
|
|
1077
|
-
# Parameters don't exist yet
|
|
1078
|
-
params = None
|
|
1059
|
+
if self._storage:
|
|
1060
|
+
# Local mode: read from local storage
|
|
1061
|
+
result = self._storage.read_metric_data(
|
|
1062
|
+
project=self.project,
|
|
1063
|
+
experiment=self.name,
|
|
1064
|
+
metric_name=name,
|
|
1065
|
+
start_index=start_index,
|
|
1066
|
+
limit=limit
|
|
1067
|
+
)
|
|
1079
1068
|
|
|
1080
|
-
|
|
1081
|
-
# Local mode: read from file
|
|
1082
|
-
params = self._storage.read_parameters(
|
|
1083
|
-
owner=self.owner, project=self.project, prefix=self._folder_path
|
|
1084
|
-
)
|
|
1069
|
+
return result
|
|
1085
1070
|
|
|
1086
|
-
|
|
1071
|
+
def _get_metric_stats(self, name: str) -> Dict[str, Any]:
|
|
1072
|
+
"""
|
|
1073
|
+
Internal method to get metric statistics.
|
|
1087
1074
|
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
"""
|
|
1091
|
-
Get a MetricsManager for metric operations.
|
|
1075
|
+
Args:
|
|
1076
|
+
name: Metric name
|
|
1092
1077
|
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1078
|
+
Returns:
|
|
1079
|
+
Dict with metric stats
|
|
1080
|
+
"""
|
|
1081
|
+
result = None
|
|
1096
1082
|
|
|
1097
|
-
|
|
1098
|
-
|
|
1083
|
+
if self._client:
|
|
1084
|
+
# Remote mode: get stats via API
|
|
1085
|
+
result = self._client.get_metric_stats(
|
|
1086
|
+
experiment_id=self._experiment_id,
|
|
1087
|
+
metric_name=name
|
|
1088
|
+
)
|
|
1099
1089
|
|
|
1100
|
-
|
|
1101
|
-
|
|
1090
|
+
if self._storage:
|
|
1091
|
+
# Local mode: get stats from local storage
|
|
1092
|
+
result = self._storage.get_metric_stats(
|
|
1093
|
+
project=self.project,
|
|
1094
|
+
experiment=self.name,
|
|
1095
|
+
metric_name=name
|
|
1096
|
+
)
|
|
1102
1097
|
|
|
1103
|
-
|
|
1104
|
-
# Named metric with multi-field logging
|
|
1105
|
-
experiment.metrics("train").log(loss=0.5, accuracy=0.9)
|
|
1106
|
-
experiment.metrics("eval").log(loss=0.6, accuracy=0.85)
|
|
1107
|
-
experiment.metrics.log(epoch=epoch).flush()
|
|
1098
|
+
return result
|
|
1108
1099
|
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
train=dict(loss=0.142, accuracy=0.80),
|
|
1113
|
-
eval=dict(loss=0.201, accuracy=0.76)
|
|
1114
|
-
)
|
|
1100
|
+
def _list_metrics(self) -> List[Dict[str, Any]]:
|
|
1101
|
+
"""
|
|
1102
|
+
Internal method to list all metrics in experiment.
|
|
1115
1103
|
|
|
1116
|
-
|
|
1117
|
-
|
|
1104
|
+
Returns:
|
|
1105
|
+
List of metric summaries
|
|
1106
|
+
"""
|
|
1107
|
+
result = None
|
|
1118
1108
|
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
from .metric import MetricsManager
|
|
1123
|
-
|
|
1124
|
-
if not self._is_open:
|
|
1125
|
-
raise RuntimeError(
|
|
1126
|
-
"Cannot use metrics on closed experiment. "
|
|
1127
|
-
"Use 'with Experiment(...).run as experiment:' or call experiment.run.start() first."
|
|
1128
|
-
)
|
|
1129
|
-
|
|
1130
|
-
# Cache the MetricsManager instance to preserve MetricBuilder cache across calls
|
|
1131
|
-
if self._metrics_manager is None:
|
|
1132
|
-
self._metrics_manager = MetricsManager(self)
|
|
1133
|
-
return self._metrics_manager
|
|
1134
|
-
|
|
1135
|
-
def _append_to_metric(
|
|
1136
|
-
self,
|
|
1137
|
-
name: Optional[str],
|
|
1138
|
-
data: Dict[str, Any],
|
|
1139
|
-
description: Optional[str],
|
|
1140
|
-
tags: Optional[List[str]],
|
|
1141
|
-
metadata: Optional[Dict[str, Any]],
|
|
1142
|
-
) -> Dict[str, Any]:
|
|
1143
|
-
"""
|
|
1144
|
-
Internal method to append a single data point to a metric.
|
|
1109
|
+
if self._client:
|
|
1110
|
+
# Remote mode: list via API
|
|
1111
|
+
result = self._client.list_metrics(experiment_id=self._experiment_id)
|
|
1145
1112
|
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1113
|
+
if self._storage:
|
|
1114
|
+
# Local mode: list from local storage
|
|
1115
|
+
result = self._storage.list_metrics(
|
|
1116
|
+
project=self.project,
|
|
1117
|
+
experiment=self.name
|
|
1118
|
+
)
|
|
1152
1119
|
|
|
1153
|
-
|
|
1154
|
-
Dict with metricId, index, bufferedDataPoints, chunkSize
|
|
1155
|
-
"""
|
|
1156
|
-
result = None
|
|
1157
|
-
|
|
1158
|
-
if self._client:
|
|
1159
|
-
# Remote mode: append via API
|
|
1160
|
-
result = self._client.append_to_metric(
|
|
1161
|
-
experiment_id=self._experiment_id,
|
|
1162
|
-
metric_name=name,
|
|
1163
|
-
data=data,
|
|
1164
|
-
description=description,
|
|
1165
|
-
tags=tags,
|
|
1166
|
-
metadata=metadata,
|
|
1167
|
-
)
|
|
1168
|
-
|
|
1169
|
-
if self._storage:
|
|
1170
|
-
# Local mode: append to local storage
|
|
1171
|
-
result = self._storage.append_to_metric(
|
|
1172
|
-
owner=self.owner,
|
|
1173
|
-
project=self.project,
|
|
1174
|
-
prefix=self._folder_path,
|
|
1175
|
-
metric_name=name,
|
|
1176
|
-
data=data,
|
|
1177
|
-
description=description,
|
|
1178
|
-
tags=tags,
|
|
1179
|
-
metadata=metadata,
|
|
1180
|
-
)
|
|
1181
|
-
|
|
1182
|
-
return result
|
|
1183
|
-
|
|
1184
|
-
def _append_batch_to_metric(
|
|
1185
|
-
self,
|
|
1186
|
-
name: Optional[str],
|
|
1187
|
-
data_points: List[Dict[str, Any]],
|
|
1188
|
-
description: Optional[str],
|
|
1189
|
-
tags: Optional[List[str]],
|
|
1190
|
-
metadata: Optional[Dict[str, Any]],
|
|
1191
|
-
) -> Dict[str, Any]:
|
|
1192
|
-
"""
|
|
1193
|
-
Internal method to append multiple data points to a metric.
|
|
1120
|
+
return result or []
|
|
1194
1121
|
|
|
1195
|
-
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
tags: Optional tags
|
|
1200
|
-
metadata: Optional metadata
|
|
1122
|
+
@property
|
|
1123
|
+
def id(self) -> Optional[str]:
|
|
1124
|
+
"""Get the experiment ID (only available after open in remote mode)."""
|
|
1125
|
+
return self._experiment_id
|
|
1201
1126
|
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
if self._client:
|
|
1208
|
-
# Remote mode: append batch via API
|
|
1209
|
-
result = self._client.append_batch_to_metric(
|
|
1210
|
-
experiment_id=self._experiment_id,
|
|
1211
|
-
metric_name=name,
|
|
1212
|
-
data_points=data_points,
|
|
1213
|
-
description=description,
|
|
1214
|
-
tags=tags,
|
|
1215
|
-
metadata=metadata,
|
|
1216
|
-
)
|
|
1217
|
-
|
|
1218
|
-
if self._storage:
|
|
1219
|
-
# Local mode: append batch to local storage
|
|
1220
|
-
result = self._storage.append_batch_to_metric(
|
|
1221
|
-
owner=self.owner,
|
|
1222
|
-
project=self.project,
|
|
1223
|
-
prefix=self._folder_path,
|
|
1224
|
-
metric_name=name,
|
|
1225
|
-
data_points=data_points,
|
|
1226
|
-
description=description,
|
|
1227
|
-
tags=tags,
|
|
1228
|
-
metadata=metadata,
|
|
1229
|
-
)
|
|
1230
|
-
|
|
1231
|
-
return result
|
|
1232
|
-
|
|
1233
|
-
def _read_metric_data(
|
|
1234
|
-
self, name: str, start_index: int, limit: int
|
|
1235
|
-
) -> Dict[str, Any]:
|
|
1236
|
-
"""
|
|
1237
|
-
Internal method to read data points from a metric.
|
|
1127
|
+
@property
|
|
1128
|
+
def data(self) -> Optional[Dict[str, Any]]:
|
|
1129
|
+
"""Get the full experiment data (only available after open in remote mode)."""
|
|
1130
|
+
return self._experiment_data
|
|
1238
1131
|
|
|
1239
|
-
Args:
|
|
1240
|
-
name: Metric name
|
|
1241
|
-
start_index: Starting index
|
|
1242
|
-
limit: Max points to read
|
|
1243
1132
|
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
if self._client:
|
|
1250
|
-
# Remote mode: read via API
|
|
1251
|
-
result = self._client.read_metric_data(
|
|
1252
|
-
experiment_id=self._experiment_id,
|
|
1253
|
-
metric_name=name,
|
|
1254
|
-
start_index=start_index,
|
|
1255
|
-
limit=limit,
|
|
1256
|
-
)
|
|
1257
|
-
|
|
1258
|
-
if self._storage:
|
|
1259
|
-
# Local mode: read from local storage
|
|
1260
|
-
result = self._storage.read_metric_data(
|
|
1261
|
-
owner=self.owner,
|
|
1262
|
-
project=self.project,
|
|
1263
|
-
prefix=self._folder_path,
|
|
1264
|
-
metric_name=name,
|
|
1265
|
-
start_index=start_index,
|
|
1266
|
-
limit=limit,
|
|
1267
|
-
)
|
|
1268
|
-
|
|
1269
|
-
return result
|
|
1270
|
-
|
|
1271
|
-
def _get_metric_stats(self, name: str) -> Dict[str, Any]:
|
|
1133
|
+
def ml_dash_experiment(
|
|
1134
|
+
name: str,
|
|
1135
|
+
project: str,
|
|
1136
|
+
**kwargs
|
|
1137
|
+
) -> Callable:
|
|
1272
1138
|
"""
|
|
1273
|
-
|
|
1139
|
+
Decorator for wrapping functions with an ML-Dash experiment.
|
|
1274
1140
|
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
result = self._client.get_metric_stats(
|
|
1286
|
-
experiment_id=self._experiment_id, metric_name=name
|
|
1287
|
-
)
|
|
1288
|
-
|
|
1289
|
-
if self._storage:
|
|
1290
|
-
# Local mode: get stats from local storage
|
|
1291
|
-
result = self._storage.get_metric_stats(
|
|
1292
|
-
owner=self.owner,
|
|
1293
|
-
project=self.project,
|
|
1294
|
-
prefix=self._folder_path,
|
|
1295
|
-
metric_name=name,
|
|
1296
|
-
)
|
|
1297
|
-
|
|
1298
|
-
return result
|
|
1299
|
-
|
|
1300
|
-
def _list_metrics(self) -> List[Dict[str, Any]]:
|
|
1301
|
-
"""
|
|
1302
|
-
Internal method to list all metrics in experiment.
|
|
1141
|
+
Usage:
|
|
1142
|
+
@ml_dash_experiment(
|
|
1143
|
+
name="my-experiment",
|
|
1144
|
+
project="my-project",
|
|
1145
|
+
remote="https://api.dash.ml",
|
|
1146
|
+
api_key="your-token"
|
|
1147
|
+
)
|
|
1148
|
+
def train_model():
|
|
1149
|
+
# Function code here
|
|
1150
|
+
pass
|
|
1303
1151
|
|
|
1304
|
-
|
|
1305
|
-
|
|
1152
|
+
The decorated function will receive an 'experiment' keyword argument
|
|
1153
|
+
with the active Experiment instance.
|
|
1306
1154
|
"""
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
|
|
1313
|
-
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
owner=self.owner, project=self.project, prefix=self._folder_path
|
|
1317
|
-
)
|
|
1318
|
-
|
|
1319
|
-
return result or []
|
|
1320
|
-
|
|
1321
|
-
@property
|
|
1322
|
-
def id(self) -> Optional[str]:
|
|
1323
|
-
"""Get the experiment ID (only available after open in remote mode)."""
|
|
1324
|
-
return self._experiment_id
|
|
1325
|
-
|
|
1326
|
-
@property
|
|
1327
|
-
def data(self) -> Optional[Dict[str, Any]]:
|
|
1328
|
-
"""Get the full experiment data (only available after open in remote mode)."""
|
|
1329
|
-
return self._experiment_data
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
def ml_dash_experiment(prefix: str, **kwargs) -> Callable:
|
|
1333
|
-
"""
|
|
1334
|
-
Decorator for wrapping functions with an ML-Dash experiment.
|
|
1335
|
-
|
|
1336
|
-
Args:
|
|
1337
|
-
prefix: Full experiment path like "owner/project/folder.../name"
|
|
1338
|
-
**kwargs: Additional arguments passed to Experiment constructor
|
|
1339
|
-
|
|
1340
|
-
Usage:
|
|
1341
|
-
@ml_dash_experiment(
|
|
1342
|
-
prefix="ge/my-project/experiments/my-experiment",
|
|
1343
|
-
dash_url="https://api.dash.ml"
|
|
1344
|
-
)
|
|
1345
|
-
def train_model():
|
|
1346
|
-
# Function code here
|
|
1347
|
-
pass
|
|
1348
|
-
|
|
1349
|
-
The decorated function will receive an 'experiment' keyword argument
|
|
1350
|
-
with the active Experiment instance.
|
|
1351
|
-
"""
|
|
1352
|
-
|
|
1353
|
-
def decorator(func: Callable) -> Callable:
|
|
1354
|
-
@functools.wraps(func)
|
|
1355
|
-
def wrapper(*args, **func_kwargs):
|
|
1356
|
-
with Experiment(prefix=prefix, **kwargs).run as experiment:
|
|
1357
|
-
# Inject experiment into function kwargs
|
|
1358
|
-
func_kwargs["experiment"] = experiment
|
|
1359
|
-
return func(*args, **func_kwargs)
|
|
1360
|
-
|
|
1361
|
-
return wrapper
|
|
1362
|
-
|
|
1363
|
-
return decorator
|
|
1155
|
+
def decorator(func: Callable) -> Callable:
|
|
1156
|
+
@functools.wraps(func)
|
|
1157
|
+
def wrapper(*args, **func_kwargs):
|
|
1158
|
+
with Experiment(name=name, project=project, **kwargs).run as experiment:
|
|
1159
|
+
# Inject experiment into function kwargs
|
|
1160
|
+
func_kwargs['experiment'] = experiment
|
|
1161
|
+
return func(*args, **func_kwargs)
|
|
1162
|
+
return wrapper
|
|
1163
|
+
return decorator
|