ml-dash 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ml_dash/run.py DELETED
@@ -1,331 +0,0 @@
1
- """Experiment class - main API for ML-Logger."""
2
-
3
- import json
4
- import os
5
- import socket
6
- import time
7
- import uuid
8
- from contextlib import contextmanager
9
- from datetime import datetime
10
- from functools import wraps
11
- from pathlib import Path
12
- from typing import Any, Callable, Dict, Optional
13
-
14
- from .backends.base import StorageBackend
15
- from .backends.local_backend import LocalBackend
16
- from .backends.dash_backend import DashBackend
17
- from .components.parameters import ParameterManager
18
- from .components.metrics import MetricsLogger
19
- from .components.files import FileManager
20
- from .components.logs import LogManager
21
-
22
-
23
- class Experiment:
24
- """Main experiment tracking class.
25
-
26
- Represents a single training execution with parameters, metrics, files, and logs.
27
-
28
- Args:
29
- namespace: User/team namespace (required)
30
- workspace: Project workspace (required)
31
- prefix: Experiment path (required)
32
- remote: Remote server URL (optional)
33
- local_root: Local storage directory (default: ".ml-logger")
34
- directory: Directory path for organizing experiments (optional)
35
- readme: Searchable description (optional)
36
- experiment_id: Server-side experiment ID (optional)
37
- """
38
-
39
- def __init__(
40
- self,
41
- namespace: str,
42
- workspace: str,
43
- prefix: str,
44
- remote: Optional[str] = None,
45
- local_root: str = ".ml-logger",
46
- directory: Optional[str] = None,
47
- readme: Optional[str] = None,
48
- experiment_id: Optional[str] = None,
49
- tags: Optional[list] = None,
50
- ):
51
- """Initialize experiment.
52
-
53
- Args:
54
- namespace: User/team namespace
55
- workspace: Project workspace
56
- prefix: Experiment path (used as experiment name)
57
- remote: Remote server URL (optional)
58
- local_root: Local storage directory
59
- directory: Directory path for organizing experiments (e.g., "dir1/dir2")
60
- readme: Searchable description
61
- experiment_id: Server-side experiment ID
62
- tags: Experiment tags
63
- """
64
- self.namespace = namespace
65
- self.workspace = workspace
66
- self.prefix = prefix
67
- self.remote = remote
68
- self.local_root = local_root
69
- self.directory = directory
70
- self.readme = readme or ""
71
- self.experiment_id = experiment_id
72
- self.run_id: Optional[str] = None
73
- self.charts: Dict[str, Any] = {}
74
- self.tags = tags or []
75
-
76
- # Full path: {local_root}/{namespace}/{workspace}/{directory}/{prefix}
77
- # If directory is provided, insert it before prefix
78
- if directory:
79
- self.local_path = f"{namespace}/{workspace}/{directory}/{prefix}"
80
- else:
81
- self.local_path = f"{namespace}/{workspace}/{prefix}"
82
-
83
- # Initialize backend
84
- if remote:
85
- # Use remote DashBackend
86
- self.backend: StorageBackend = DashBackend(
87
- server_url=remote,
88
- namespace=namespace,
89
- workspace=workspace,
90
- experiment_name=prefix,
91
- experiment_id=experiment_id,
92
- directory=directory,
93
- )
94
- # Initialize experiment on server
95
- try:
96
- exp_data = self.backend.initialize_experiment(description=readme, tags=tags)
97
- self.experiment_id = exp_data.get("id")
98
- print(f"✓ Initialized experiment on remote server: {self.experiment_id}")
99
- except Exception as e:
100
- print(f"Warning: Failed to initialize experiment on remote server: {e}")
101
- # Fall back to local backend
102
- self.backend = LocalBackend(local_root)
103
- else:
104
- # Use local backend
105
- self.backend = LocalBackend(local_root)
106
-
107
- # Initialize components
108
- self.params = ParameterManager(self.backend, self.local_path)
109
- self.metrics = MetricsLogger(self.backend, self.local_path)
110
- self.files = FileManager(self.backend, self.local_path)
111
- self.logs = LogManager(self.backend, self.local_path)
112
-
113
- # Metadata
114
- self._meta_file = f"{self.local_path}/.ml-logger.meta.json"
115
- self._status = "created"
116
- self._started_at: Optional[float] = None
117
- self._completed_at: Optional[float] = None
118
- self._hostname = socket.gethostname()
119
-
120
- # Load or create metadata (only for local backend)
121
- if not remote:
122
- self._load_metadata()
123
-
124
- def _load_metadata(self) -> None:
125
- """Load experiment metadata from file."""
126
- if self.backend.exists(self._meta_file):
127
- try:
128
- content = self.backend.read_text(self._meta_file)
129
- meta = json.loads(content)
130
- self._status = meta.get("status", "created")
131
- self._started_at = meta.get("started_at")
132
- self._completed_at = meta.get("completed_at")
133
- self.readme = meta.get("readme", self.readme)
134
- self.charts = meta.get("charts", {})
135
- except Exception:
136
- pass
137
-
138
- def _save_metadata(self) -> None:
139
- """Save experiment metadata to file."""
140
- meta = {
141
- "namespace": self.namespace,
142
- "workspace": self.workspace,
143
- "prefix": self.prefix,
144
- "remote": self.remote,
145
- "experiment_id": self.experiment_id,
146
- "readme": self.readme,
147
- "charts": self.charts,
148
- "status": self._status,
149
- "started_at": self._started_at,
150
- "completed_at": self._completed_at,
151
- "hostname": self._hostname,
152
- "updated_at": time.time(),
153
- }
154
- content = json.dumps(meta, indent=2)
155
- self.backend.write_text(self._meta_file, content)
156
-
157
- def run(self, func: Optional[Callable] = None):
158
- """Mark experiment as started (supports 3 patterns).
159
-
160
- Pattern 1 - Direct call:
161
- experiment.run()
162
- # ... training code ...
163
- experiment.complete()
164
-
165
- Pattern 2 - Context manager:
166
- with experiment.run():
167
- # ... training code ...
168
-
169
- Pattern 3 - Decorator:
170
- @experiment.run
171
- def train():
172
- # ... training code ...
173
-
174
- Args:
175
- func: Function to wrap (for decorator pattern)
176
-
177
- Returns:
178
- Context manager or decorated function
179
- """
180
- if func is None:
181
- # Pattern 1 (direct) or Pattern 2 (context manager)
182
- self._status = "running"
183
- self._started_at = time.time()
184
- # Only save metadata for local backends
185
- if not isinstance(self.backend, DashBackend):
186
- self._save_metadata()
187
- return self._run_context()
188
- else:
189
- # Pattern 3 (decorator)
190
- @wraps(func)
191
- def wrapper(*args, **kwargs):
192
- with self.run():
193
- return func(*args, **kwargs)
194
- return wrapper
195
-
196
- @contextmanager
197
- def _run_context(self):
198
- """Context manager for run lifecycle."""
199
- try:
200
- # Create run on remote server if using DashBackend
201
- if isinstance(self.backend, DashBackend) and not self.run_id:
202
- try:
203
- run_data = self.backend.create_run(name=self.prefix, tags=self.tags)
204
- self.run_id = run_data.get("id")
205
- print(f"✓ Created run on remote server: {self.run_id}")
206
- except Exception as e:
207
- print(f"Warning: Failed to create run on remote server: {e}")
208
-
209
- yield self
210
- self.complete()
211
- except Exception as e:
212
- self.fail(str(e))
213
- raise
214
-
215
- def complete(self) -> None:
216
- """Mark experiment as completed."""
217
- self._status = "completed"
218
- self._completed_at = time.time()
219
-
220
- # Update run status on remote server
221
- if isinstance(self.backend, DashBackend) and self.run_id:
222
- try:
223
- self.backend.update_run(status="COMPLETED")
224
- print(f"✓ Marked run as COMPLETED on remote server")
225
- except Exception as e:
226
- print(f"Warning: Failed to update run status: {e}")
227
-
228
- # Save metadata locally
229
- if not isinstance(self.backend, DashBackend):
230
- self._save_metadata()
231
-
232
- def fail(self, error: str) -> None:
233
- """Mark experiment as failed.
234
-
235
- Args:
236
- error: Error message
237
- """
238
- self._status = "failed"
239
- self._completed_at = time.time()
240
-
241
- # Log error
242
- self.logs.error("Experiment failed", error=error)
243
-
244
- # Update run status on remote server
245
- if isinstance(self.backend, DashBackend) and self.run_id:
246
- try:
247
- self.backend.update_run(status="FAILED", metadata={"error": error})
248
- print(f"✓ Marked run as FAILED on remote server")
249
- except Exception as e:
250
- print(f"Warning: Failed to update run status: {e}")
251
-
252
- # Save metadata locally
253
- if not isinstance(self.backend, DashBackend):
254
- self._save_metadata()
255
-
256
- # Convenience methods for logging
257
- def info(self, message: str, **context) -> None:
258
- """Log info message (convenience method).
259
-
260
- Args:
261
- message: Log message
262
- **context: Additional context
263
- """
264
- self.logs.info(message, **context)
265
-
266
- def error(self, message: str, **context) -> None:
267
- """Log error message (convenience method).
268
-
269
- Args:
270
- message: Log message
271
- **context: Additional context
272
- """
273
- self.logs.error(message, **context)
274
-
275
- def warning(self, message: str, **context) -> None:
276
- """Log warning message (convenience method).
277
-
278
- Args:
279
- message: Log message
280
- **context: Additional context
281
- """
282
- self.logs.warning(message, **context)
283
-
284
- def debug(self, message: str, **context) -> None:
285
- """Log debug message (convenience method).
286
-
287
- Args:
288
- message: Log message
289
- **context: Additional context
290
- """
291
- self.logs.debug(message, **context)
292
-
293
- @classmethod
294
- def _auto_configure(cls) -> "Experiment":
295
- """Create auto-configured experiment from environment.
296
-
297
- Reads configuration from:
298
- - ML_LOGGER_NAMESPACE (default: "default")
299
- - ML_LOGGER_WORKSPACE (default: "experiments")
300
- - ML_LOGGER_PREFIX (default: timestamp+uuid)
301
- - ML_LOGGER_REMOTE (optional)
302
-
303
- Returns:
304
- Auto-configured Experiment instance
305
- """
306
- namespace = os.environ.get("ML_LOGGER_NAMESPACE", "default")
307
- workspace = os.environ.get("ML_LOGGER_WORKSPACE", "experiments")
308
-
309
- # Generate default prefix with timestamp + short UUID
310
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
311
- short_id = str(uuid.uuid4())[:8]
312
- default_prefix = f"{timestamp}_{short_id}"
313
-
314
- prefix = os.environ.get("ML_LOGGER_PREFIX", default_prefix)
315
- remote = os.environ.get("ML_LOGGER_REMOTE")
316
-
317
- return cls(
318
- namespace=namespace,
319
- workspace=workspace,
320
- prefix=prefix,
321
- remote=remote,
322
- )
323
-
324
- def __repr__(self) -> str:
325
- """String representation."""
326
- return (
327
- f"Experiment(namespace='{self.namespace}', "
328
- f"workspace='{self.workspace}', "
329
- f"prefix='{self.prefix}', "
330
- f"status='{self._status}')"
331
- )