ml-dash 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ml_dash/__init__.py +51 -7
- ml_dash/client.py +562 -0
- ml_dash/experiment.py +916 -0
- ml_dash/files.py +313 -0
- ml_dash/log.py +181 -0
- ml_dash/metric.py +186 -0
- ml_dash/params.py +188 -0
- ml_dash/py.typed +0 -0
- ml_dash/storage.py +922 -0
- ml_dash-0.5.0.dist-info/METADATA +237 -0
- ml_dash-0.5.0.dist-info/RECORD +12 -0
- {ml_dash-0.4.0.dist-info → ml_dash-0.5.0.dist-info}/WHEEL +1 -1
- ml_dash/ARCHITECTURE.md +0 -382
- ml_dash/autolog.py +0 -32
- ml_dash/backends/__init__.py +0 -11
- ml_dash/backends/base.py +0 -124
- ml_dash/backends/dash_backend.py +0 -571
- ml_dash/backends/local_backend.py +0 -90
- ml_dash/components/__init__.py +0 -13
- ml_dash/components/files.py +0 -246
- ml_dash/components/logs.py +0 -104
- ml_dash/components/metrics.py +0 -169
- ml_dash/components/parameters.py +0 -144
- ml_dash/job_logger.py +0 -42
- ml_dash/ml_logger.py +0 -234
- ml_dash/run.py +0 -331
- ml_dash-0.4.0.dist-info/METADATA +0 -1424
- ml_dash-0.4.0.dist-info/RECORD +0 -19
- ml_dash-0.4.0.dist-info/entry_points.txt +0 -3
ml_dash/run.py
DELETED
|
@@ -1,331 +0,0 @@
|
|
|
1
|
-
"""Experiment class - main API for ML-Logger."""
|
|
2
|
-
|
|
3
|
-
import json
|
|
4
|
-
import os
|
|
5
|
-
import socket
|
|
6
|
-
import time
|
|
7
|
-
import uuid
|
|
8
|
-
from contextlib import contextmanager
|
|
9
|
-
from datetime import datetime
|
|
10
|
-
from functools import wraps
|
|
11
|
-
from pathlib import Path
|
|
12
|
-
from typing import Any, Callable, Dict, Optional
|
|
13
|
-
|
|
14
|
-
from .backends.base import StorageBackend
|
|
15
|
-
from .backends.local_backend import LocalBackend
|
|
16
|
-
from .backends.dash_backend import DashBackend
|
|
17
|
-
from .components.parameters import ParameterManager
|
|
18
|
-
from .components.metrics import MetricsLogger
|
|
19
|
-
from .components.files import FileManager
|
|
20
|
-
from .components.logs import LogManager
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
class Experiment:
|
|
24
|
-
"""Main experiment tracking class.
|
|
25
|
-
|
|
26
|
-
Represents a single training execution with parameters, metrics, files, and logs.
|
|
27
|
-
|
|
28
|
-
Args:
|
|
29
|
-
namespace: User/team namespace (required)
|
|
30
|
-
workspace: Project workspace (required)
|
|
31
|
-
prefix: Experiment path (required)
|
|
32
|
-
remote: Remote server URL (optional)
|
|
33
|
-
local_root: Local storage directory (default: ".ml-logger")
|
|
34
|
-
directory: Directory path for organizing experiments (optional)
|
|
35
|
-
readme: Searchable description (optional)
|
|
36
|
-
experiment_id: Server-side experiment ID (optional)
|
|
37
|
-
"""
|
|
38
|
-
|
|
39
|
-
def __init__(
|
|
40
|
-
self,
|
|
41
|
-
namespace: str,
|
|
42
|
-
workspace: str,
|
|
43
|
-
prefix: str,
|
|
44
|
-
remote: Optional[str] = None,
|
|
45
|
-
local_root: str = ".ml-logger",
|
|
46
|
-
directory: Optional[str] = None,
|
|
47
|
-
readme: Optional[str] = None,
|
|
48
|
-
experiment_id: Optional[str] = None,
|
|
49
|
-
tags: Optional[list] = None,
|
|
50
|
-
):
|
|
51
|
-
"""Initialize experiment.
|
|
52
|
-
|
|
53
|
-
Args:
|
|
54
|
-
namespace: User/team namespace
|
|
55
|
-
workspace: Project workspace
|
|
56
|
-
prefix: Experiment path (used as experiment name)
|
|
57
|
-
remote: Remote server URL (optional)
|
|
58
|
-
local_root: Local storage directory
|
|
59
|
-
directory: Directory path for organizing experiments (e.g., "dir1/dir2")
|
|
60
|
-
readme: Searchable description
|
|
61
|
-
experiment_id: Server-side experiment ID
|
|
62
|
-
tags: Experiment tags
|
|
63
|
-
"""
|
|
64
|
-
self.namespace = namespace
|
|
65
|
-
self.workspace = workspace
|
|
66
|
-
self.prefix = prefix
|
|
67
|
-
self.remote = remote
|
|
68
|
-
self.local_root = local_root
|
|
69
|
-
self.directory = directory
|
|
70
|
-
self.readme = readme or ""
|
|
71
|
-
self.experiment_id = experiment_id
|
|
72
|
-
self.run_id: Optional[str] = None
|
|
73
|
-
self.charts: Dict[str, Any] = {}
|
|
74
|
-
self.tags = tags or []
|
|
75
|
-
|
|
76
|
-
# Full path: {local_root}/{namespace}/{workspace}/{directory}/{prefix}
|
|
77
|
-
# If directory is provided, insert it before prefix
|
|
78
|
-
if directory:
|
|
79
|
-
self.local_path = f"{namespace}/{workspace}/{directory}/{prefix}"
|
|
80
|
-
else:
|
|
81
|
-
self.local_path = f"{namespace}/{workspace}/{prefix}"
|
|
82
|
-
|
|
83
|
-
# Initialize backend
|
|
84
|
-
if remote:
|
|
85
|
-
# Use remote DashBackend
|
|
86
|
-
self.backend: StorageBackend = DashBackend(
|
|
87
|
-
server_url=remote,
|
|
88
|
-
namespace=namespace,
|
|
89
|
-
workspace=workspace,
|
|
90
|
-
experiment_name=prefix,
|
|
91
|
-
experiment_id=experiment_id,
|
|
92
|
-
directory=directory,
|
|
93
|
-
)
|
|
94
|
-
# Initialize experiment on server
|
|
95
|
-
try:
|
|
96
|
-
exp_data = self.backend.initialize_experiment(description=readme, tags=tags)
|
|
97
|
-
self.experiment_id = exp_data.get("id")
|
|
98
|
-
print(f"✓ Initialized experiment on remote server: {self.experiment_id}")
|
|
99
|
-
except Exception as e:
|
|
100
|
-
print(f"Warning: Failed to initialize experiment on remote server: {e}")
|
|
101
|
-
# Fall back to local backend
|
|
102
|
-
self.backend = LocalBackend(local_root)
|
|
103
|
-
else:
|
|
104
|
-
# Use local backend
|
|
105
|
-
self.backend = LocalBackend(local_root)
|
|
106
|
-
|
|
107
|
-
# Initialize components
|
|
108
|
-
self.params = ParameterManager(self.backend, self.local_path)
|
|
109
|
-
self.metrics = MetricsLogger(self.backend, self.local_path)
|
|
110
|
-
self.files = FileManager(self.backend, self.local_path)
|
|
111
|
-
self.logs = LogManager(self.backend, self.local_path)
|
|
112
|
-
|
|
113
|
-
# Metadata
|
|
114
|
-
self._meta_file = f"{self.local_path}/.ml-logger.meta.json"
|
|
115
|
-
self._status = "created"
|
|
116
|
-
self._started_at: Optional[float] = None
|
|
117
|
-
self._completed_at: Optional[float] = None
|
|
118
|
-
self._hostname = socket.gethostname()
|
|
119
|
-
|
|
120
|
-
# Load or create metadata (only for local backend)
|
|
121
|
-
if not remote:
|
|
122
|
-
self._load_metadata()
|
|
123
|
-
|
|
124
|
-
def _load_metadata(self) -> None:
|
|
125
|
-
"""Load experiment metadata from file."""
|
|
126
|
-
if self.backend.exists(self._meta_file):
|
|
127
|
-
try:
|
|
128
|
-
content = self.backend.read_text(self._meta_file)
|
|
129
|
-
meta = json.loads(content)
|
|
130
|
-
self._status = meta.get("status", "created")
|
|
131
|
-
self._started_at = meta.get("started_at")
|
|
132
|
-
self._completed_at = meta.get("completed_at")
|
|
133
|
-
self.readme = meta.get("readme", self.readme)
|
|
134
|
-
self.charts = meta.get("charts", {})
|
|
135
|
-
except Exception:
|
|
136
|
-
pass
|
|
137
|
-
|
|
138
|
-
def _save_metadata(self) -> None:
|
|
139
|
-
"""Save experiment metadata to file."""
|
|
140
|
-
meta = {
|
|
141
|
-
"namespace": self.namespace,
|
|
142
|
-
"workspace": self.workspace,
|
|
143
|
-
"prefix": self.prefix,
|
|
144
|
-
"remote": self.remote,
|
|
145
|
-
"experiment_id": self.experiment_id,
|
|
146
|
-
"readme": self.readme,
|
|
147
|
-
"charts": self.charts,
|
|
148
|
-
"status": self._status,
|
|
149
|
-
"started_at": self._started_at,
|
|
150
|
-
"completed_at": self._completed_at,
|
|
151
|
-
"hostname": self._hostname,
|
|
152
|
-
"updated_at": time.time(),
|
|
153
|
-
}
|
|
154
|
-
content = json.dumps(meta, indent=2)
|
|
155
|
-
self.backend.write_text(self._meta_file, content)
|
|
156
|
-
|
|
157
|
-
def run(self, func: Optional[Callable] = None):
|
|
158
|
-
"""Mark experiment as started (supports 3 patterns).
|
|
159
|
-
|
|
160
|
-
Pattern 1 - Direct call:
|
|
161
|
-
experiment.run()
|
|
162
|
-
# ... training code ...
|
|
163
|
-
experiment.complete()
|
|
164
|
-
|
|
165
|
-
Pattern 2 - Context manager:
|
|
166
|
-
with experiment.run():
|
|
167
|
-
# ... training code ...
|
|
168
|
-
|
|
169
|
-
Pattern 3 - Decorator:
|
|
170
|
-
@experiment.run
|
|
171
|
-
def train():
|
|
172
|
-
# ... training code ...
|
|
173
|
-
|
|
174
|
-
Args:
|
|
175
|
-
func: Function to wrap (for decorator pattern)
|
|
176
|
-
|
|
177
|
-
Returns:
|
|
178
|
-
Context manager or decorated function
|
|
179
|
-
"""
|
|
180
|
-
if func is None:
|
|
181
|
-
# Pattern 1 (direct) or Pattern 2 (context manager)
|
|
182
|
-
self._status = "running"
|
|
183
|
-
self._started_at = time.time()
|
|
184
|
-
# Only save metadata for local backends
|
|
185
|
-
if not isinstance(self.backend, DashBackend):
|
|
186
|
-
self._save_metadata()
|
|
187
|
-
return self._run_context()
|
|
188
|
-
else:
|
|
189
|
-
# Pattern 3 (decorator)
|
|
190
|
-
@wraps(func)
|
|
191
|
-
def wrapper(*args, **kwargs):
|
|
192
|
-
with self.run():
|
|
193
|
-
return func(*args, **kwargs)
|
|
194
|
-
return wrapper
|
|
195
|
-
|
|
196
|
-
@contextmanager
|
|
197
|
-
def _run_context(self):
|
|
198
|
-
"""Context manager for run lifecycle."""
|
|
199
|
-
try:
|
|
200
|
-
# Create run on remote server if using DashBackend
|
|
201
|
-
if isinstance(self.backend, DashBackend) and not self.run_id:
|
|
202
|
-
try:
|
|
203
|
-
run_data = self.backend.create_run(name=self.prefix, tags=self.tags)
|
|
204
|
-
self.run_id = run_data.get("id")
|
|
205
|
-
print(f"✓ Created run on remote server: {self.run_id}")
|
|
206
|
-
except Exception as e:
|
|
207
|
-
print(f"Warning: Failed to create run on remote server: {e}")
|
|
208
|
-
|
|
209
|
-
yield self
|
|
210
|
-
self.complete()
|
|
211
|
-
except Exception as e:
|
|
212
|
-
self.fail(str(e))
|
|
213
|
-
raise
|
|
214
|
-
|
|
215
|
-
def complete(self) -> None:
|
|
216
|
-
"""Mark experiment as completed."""
|
|
217
|
-
self._status = "completed"
|
|
218
|
-
self._completed_at = time.time()
|
|
219
|
-
|
|
220
|
-
# Update run status on remote server
|
|
221
|
-
if isinstance(self.backend, DashBackend) and self.run_id:
|
|
222
|
-
try:
|
|
223
|
-
self.backend.update_run(status="COMPLETED")
|
|
224
|
-
print(f"✓ Marked run as COMPLETED on remote server")
|
|
225
|
-
except Exception as e:
|
|
226
|
-
print(f"Warning: Failed to update run status: {e}")
|
|
227
|
-
|
|
228
|
-
# Save metadata locally
|
|
229
|
-
if not isinstance(self.backend, DashBackend):
|
|
230
|
-
self._save_metadata()
|
|
231
|
-
|
|
232
|
-
def fail(self, error: str) -> None:
|
|
233
|
-
"""Mark experiment as failed.
|
|
234
|
-
|
|
235
|
-
Args:
|
|
236
|
-
error: Error message
|
|
237
|
-
"""
|
|
238
|
-
self._status = "failed"
|
|
239
|
-
self._completed_at = time.time()
|
|
240
|
-
|
|
241
|
-
# Log error
|
|
242
|
-
self.logs.error("Experiment failed", error=error)
|
|
243
|
-
|
|
244
|
-
# Update run status on remote server
|
|
245
|
-
if isinstance(self.backend, DashBackend) and self.run_id:
|
|
246
|
-
try:
|
|
247
|
-
self.backend.update_run(status="FAILED", metadata={"error": error})
|
|
248
|
-
print(f"✓ Marked run as FAILED on remote server")
|
|
249
|
-
except Exception as e:
|
|
250
|
-
print(f"Warning: Failed to update run status: {e}")
|
|
251
|
-
|
|
252
|
-
# Save metadata locally
|
|
253
|
-
if not isinstance(self.backend, DashBackend):
|
|
254
|
-
self._save_metadata()
|
|
255
|
-
|
|
256
|
-
# Convenience methods for logging
|
|
257
|
-
def info(self, message: str, **context) -> None:
|
|
258
|
-
"""Log info message (convenience method).
|
|
259
|
-
|
|
260
|
-
Args:
|
|
261
|
-
message: Log message
|
|
262
|
-
**context: Additional context
|
|
263
|
-
"""
|
|
264
|
-
self.logs.info(message, **context)
|
|
265
|
-
|
|
266
|
-
def error(self, message: str, **context) -> None:
|
|
267
|
-
"""Log error message (convenience method).
|
|
268
|
-
|
|
269
|
-
Args:
|
|
270
|
-
message: Log message
|
|
271
|
-
**context: Additional context
|
|
272
|
-
"""
|
|
273
|
-
self.logs.error(message, **context)
|
|
274
|
-
|
|
275
|
-
def warning(self, message: str, **context) -> None:
|
|
276
|
-
"""Log warning message (convenience method).
|
|
277
|
-
|
|
278
|
-
Args:
|
|
279
|
-
message: Log message
|
|
280
|
-
**context: Additional context
|
|
281
|
-
"""
|
|
282
|
-
self.logs.warning(message, **context)
|
|
283
|
-
|
|
284
|
-
def debug(self, message: str, **context) -> None:
|
|
285
|
-
"""Log debug message (convenience method).
|
|
286
|
-
|
|
287
|
-
Args:
|
|
288
|
-
message: Log message
|
|
289
|
-
**context: Additional context
|
|
290
|
-
"""
|
|
291
|
-
self.logs.debug(message, **context)
|
|
292
|
-
|
|
293
|
-
@classmethod
|
|
294
|
-
def _auto_configure(cls) -> "Experiment":
|
|
295
|
-
"""Create auto-configured experiment from environment.
|
|
296
|
-
|
|
297
|
-
Reads configuration from:
|
|
298
|
-
- ML_LOGGER_NAMESPACE (default: "default")
|
|
299
|
-
- ML_LOGGER_WORKSPACE (default: "experiments")
|
|
300
|
-
- ML_LOGGER_PREFIX (default: timestamp+uuid)
|
|
301
|
-
- ML_LOGGER_REMOTE (optional)
|
|
302
|
-
|
|
303
|
-
Returns:
|
|
304
|
-
Auto-configured Experiment instance
|
|
305
|
-
"""
|
|
306
|
-
namespace = os.environ.get("ML_LOGGER_NAMESPACE", "default")
|
|
307
|
-
workspace = os.environ.get("ML_LOGGER_WORKSPACE", "experiments")
|
|
308
|
-
|
|
309
|
-
# Generate default prefix with timestamp + short UUID
|
|
310
|
-
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
311
|
-
short_id = str(uuid.uuid4())[:8]
|
|
312
|
-
default_prefix = f"{timestamp}_{short_id}"
|
|
313
|
-
|
|
314
|
-
prefix = os.environ.get("ML_LOGGER_PREFIX", default_prefix)
|
|
315
|
-
remote = os.environ.get("ML_LOGGER_REMOTE")
|
|
316
|
-
|
|
317
|
-
return cls(
|
|
318
|
-
namespace=namespace,
|
|
319
|
-
workspace=workspace,
|
|
320
|
-
prefix=prefix,
|
|
321
|
-
remote=remote,
|
|
322
|
-
)
|
|
323
|
-
|
|
324
|
-
def __repr__(self) -> str:
|
|
325
|
-
"""String representation."""
|
|
326
|
-
return (
|
|
327
|
-
f"Experiment(namespace='{self.namespace}', "
|
|
328
|
-
f"workspace='{self.workspace}', "
|
|
329
|
-
f"prefix='{self.prefix}', "
|
|
330
|
-
f"status='{self._status}')"
|
|
331
|
-
)
|