ml-dash 0.0.17__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ml_dash/ARCHITECTURE.md +382 -0
- ml_dash/__init__.py +14 -1
- ml_dash/autolog.py +32 -0
- ml_dash/backends/__init__.py +11 -0
- ml_dash/backends/base.py +124 -0
- ml_dash/backends/dash_backend.py +571 -0
- ml_dash/backends/local_backend.py +90 -0
- ml_dash/components/__init__.py +13 -0
- ml_dash/components/files.py +246 -0
- ml_dash/components/logs.py +104 -0
- ml_dash/components/metrics.py +169 -0
- ml_dash/components/parameters.py +144 -0
- ml_dash/job_logger.py +42 -0
- ml_dash/ml_logger.py +234 -0
- ml_dash/run.py +331 -0
- ml_dash-0.4.0.dist-info/METADATA +1424 -0
- ml_dash-0.4.0.dist-info/RECORD +19 -0
- ml_dash-0.4.0.dist-info/WHEEL +4 -0
- ml_dash-0.4.0.dist-info/entry_points.txt +3 -0
- app-build/asset-manifest.json +0 -15
- app-build/favicon.ico +0 -0
- app-build/github-markdown.css +0 -957
- app-build/index.html +0 -1
- app-build/manifest.json +0 -15
- app-build/monaco-editor-worker-loader-proxy.js +0 -6
- app-build/precache-manifest.ffc09f8a591c529a1bd5c6f21f49815f.js +0 -26
- app-build/service-worker.js +0 -34
- ml_dash/app.py +0 -60
- ml_dash/config.py +0 -16
- ml_dash/example.py +0 -0
- ml_dash/file_events.py +0 -71
- ml_dash/file_handlers.py +0 -141
- ml_dash/file_utils.py +0 -5
- ml_dash/file_watcher.py +0 -30
- ml_dash/main.py +0 -60
- ml_dash/mime_types.py +0 -20
- ml_dash/schema/__init__.py +0 -110
- ml_dash/schema/archive.py +0 -165
- ml_dash/schema/directories.py +0 -59
- ml_dash/schema/experiments.py +0 -65
- ml_dash/schema/files/__init__.py +0 -204
- ml_dash/schema/files/file_helpers.py +0 -79
- ml_dash/schema/files/images.py +0 -27
- ml_dash/schema/files/metrics.py +0 -64
- ml_dash/schema/files/parameters.py +0 -50
- ml_dash/schema/files/series.py +0 -235
- ml_dash/schema/files/videos.py +0 -27
- ml_dash/schema/helpers.py +0 -66
- ml_dash/schema/projects.py +0 -65
- ml_dash/schema/schema_helpers.py +0 -19
- ml_dash/schema/users.py +0 -33
- ml_dash/sse.py +0 -18
- ml_dash-0.0.17.dist-info/METADATA +0 -67
- ml_dash-0.0.17.dist-info/RECORD +0 -38
- ml_dash-0.0.17.dist-info/WHEEL +0 -5
- ml_dash-0.0.17.dist-info/top_level.txt +0 -2
ml_dash/ml_logger.py
ADDED
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
"""ML_Logger - Legacy logging class for backward compatibility.
|
|
2
|
+
|
|
3
|
+
This class provides a simpler interface for basic logging with filtering capabilities.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import re
|
|
7
|
+
from enum import Enum
|
|
8
|
+
from typing import Any, Callable, Dict, List, Optional, Pattern
|
|
9
|
+
|
|
10
|
+
from .backends.local_backend import LocalBackend
|
|
11
|
+
from .backends.base import StorageBackend
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class LogLevel(Enum):
|
|
15
|
+
"""Log level enumeration."""
|
|
16
|
+
DEBUG = 0
|
|
17
|
+
INFO = 1
|
|
18
|
+
WARNING = 2
|
|
19
|
+
ERROR = 3
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ML_Logger:
|
|
23
|
+
"""Legacy logger class with filtering capabilities.
|
|
24
|
+
|
|
25
|
+
This class provides a simpler interface for logging with built-in filtering
|
|
26
|
+
by log level, patterns, and custom filter functions.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
prefix: Directory prefix for logging (e.g., "../data")
|
|
30
|
+
backend: Storage backend (optional, defaults to LocalBackend)
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
prefix: str,
|
|
36
|
+
backend: Optional[StorageBackend] = None,
|
|
37
|
+
):
|
|
38
|
+
"""Initialize ML_Logger.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
prefix: Directory prefix for logging
|
|
42
|
+
backend: Storage backend (optional)
|
|
43
|
+
"""
|
|
44
|
+
self.prefix = prefix
|
|
45
|
+
self.backend = backend or LocalBackend(prefix)
|
|
46
|
+
|
|
47
|
+
# Buffer for in-memory log storage
|
|
48
|
+
self.buffer: List[Dict[str, Any]] = []
|
|
49
|
+
|
|
50
|
+
# Filtering configuration
|
|
51
|
+
self._min_level = LogLevel.DEBUG
|
|
52
|
+
self._include_patterns: List[Pattern] = []
|
|
53
|
+
self._exclude_patterns: List[Pattern] = []
|
|
54
|
+
self._custom_filters: List[Callable] = []
|
|
55
|
+
|
|
56
|
+
def log(self, message: str, level: str = "INFO", **context) -> None:
|
|
57
|
+
"""Log a message with optional context.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
message: Log message
|
|
61
|
+
level: Log level (DEBUG, INFO, WARNING, ERROR)
|
|
62
|
+
**context: Additional context fields
|
|
63
|
+
"""
|
|
64
|
+
entry = {
|
|
65
|
+
"message": message,
|
|
66
|
+
"level": level.upper(),
|
|
67
|
+
**context
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
# Apply filters
|
|
71
|
+
if self._should_log(entry):
|
|
72
|
+
self.buffer.append(entry)
|
|
73
|
+
|
|
74
|
+
def info(self, message: str, **context) -> None:
|
|
75
|
+
"""Log an info message.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
message: Log message
|
|
79
|
+
**context: Additional context
|
|
80
|
+
"""
|
|
81
|
+
self.log(message, level="INFO", **context)
|
|
82
|
+
|
|
83
|
+
def warning(self, message: str, **context) -> None:
|
|
84
|
+
"""Log a warning message.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
message: Log message
|
|
88
|
+
**context: Additional context
|
|
89
|
+
"""
|
|
90
|
+
self.log(message, level="WARNING", **context)
|
|
91
|
+
|
|
92
|
+
def error(self, message: str, **context) -> None:
|
|
93
|
+
"""Log an error message.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
message: Log message
|
|
97
|
+
**context: Additional context
|
|
98
|
+
"""
|
|
99
|
+
self.log(message, level="ERROR", **context)
|
|
100
|
+
|
|
101
|
+
def debug(self, message: str, **context) -> None:
|
|
102
|
+
"""Log a debug message.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
message: Log message
|
|
106
|
+
**context: Additional context
|
|
107
|
+
"""
|
|
108
|
+
self.log(message, level="DEBUG", **context)
|
|
109
|
+
|
|
110
|
+
def set_level(self, level: str) -> None:
|
|
111
|
+
"""Set minimum log level.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
level: Minimum level (DEBUG, INFO, WARNING, ERROR)
|
|
115
|
+
"""
|
|
116
|
+
self._min_level = LogLevel[level.upper()]
|
|
117
|
+
|
|
118
|
+
def add_include_pattern(self, pattern: str) -> None:
|
|
119
|
+
"""Add a pattern to include in logs.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
pattern: Regex pattern to match
|
|
123
|
+
"""
|
|
124
|
+
self._include_patterns.append(re.compile(pattern))
|
|
125
|
+
|
|
126
|
+
def add_exclude_pattern(self, pattern: str) -> None:
|
|
127
|
+
"""Add a pattern to exclude from logs.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
pattern: Regex pattern to match
|
|
131
|
+
"""
|
|
132
|
+
self._exclude_patterns.append(re.compile(pattern))
|
|
133
|
+
|
|
134
|
+
def add_filter(self, filter_func: Callable[[Dict[str, Any]], bool]) -> None:
|
|
135
|
+
"""Add a custom filter function.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
filter_func: Function that takes log entry and returns True to keep it
|
|
139
|
+
"""
|
|
140
|
+
self._custom_filters.append(filter_func)
|
|
141
|
+
|
|
142
|
+
def clear_filters(self) -> None:
|
|
143
|
+
"""Clear all filters."""
|
|
144
|
+
self._min_level = LogLevel.DEBUG
|
|
145
|
+
self._include_patterns.clear()
|
|
146
|
+
self._exclude_patterns.clear()
|
|
147
|
+
self._custom_filters.clear()
|
|
148
|
+
|
|
149
|
+
def get_filtered_logs(
|
|
150
|
+
self,
|
|
151
|
+
level: Optional[str] = None,
|
|
152
|
+
pattern: Optional[str] = None,
|
|
153
|
+
start_step: Optional[int] = None,
|
|
154
|
+
end_step: Optional[int] = None,
|
|
155
|
+
) -> List[Dict[str, Any]]:
|
|
156
|
+
"""Get filtered logs from buffer.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
level: Filter by log level
|
|
160
|
+
pattern: Filter by regex pattern
|
|
161
|
+
start_step: Filter by minimum step
|
|
162
|
+
end_step: Filter by maximum step
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
List of filtered log entries
|
|
166
|
+
"""
|
|
167
|
+
filtered = self.buffer.copy()
|
|
168
|
+
|
|
169
|
+
if level:
|
|
170
|
+
filtered = [entry for entry in filtered if entry.get("level") == level.upper()]
|
|
171
|
+
|
|
172
|
+
if pattern:
|
|
173
|
+
regex = re.compile(pattern)
|
|
174
|
+
filtered = [
|
|
175
|
+
entry for entry in filtered
|
|
176
|
+
if regex.search(entry.get("message", ""))
|
|
177
|
+
]
|
|
178
|
+
|
|
179
|
+
if start_step is not None or end_step is not None:
|
|
180
|
+
# Assign default step based on index in buffer if not present
|
|
181
|
+
result = []
|
|
182
|
+
for i, entry in enumerate(filtered):
|
|
183
|
+
step = entry.get("step", i)
|
|
184
|
+
if (start_step is None or step >= start_step) and \
|
|
185
|
+
(end_step is None or step <= end_step):
|
|
186
|
+
# Add step to entry if it wasn't there
|
|
187
|
+
entry_with_step = entry.copy()
|
|
188
|
+
if "step" not in entry_with_step:
|
|
189
|
+
entry_with_step["step"] = i
|
|
190
|
+
result.append(entry_with_step)
|
|
191
|
+
return result
|
|
192
|
+
|
|
193
|
+
return filtered
|
|
194
|
+
|
|
195
|
+
def _should_log(self, entry: Dict[str, Any]) -> bool:
|
|
196
|
+
"""Check if an entry should be logged based on filters.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
entry: Log entry to check
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
True if entry should be logged
|
|
203
|
+
"""
|
|
204
|
+
# Check log level
|
|
205
|
+
entry_level = LogLevel[entry.get("level", "INFO")]
|
|
206
|
+
if entry_level.value < self._min_level.value:
|
|
207
|
+
return False
|
|
208
|
+
|
|
209
|
+
message = entry.get("message", "")
|
|
210
|
+
|
|
211
|
+
# Check include patterns
|
|
212
|
+
if self._include_patterns:
|
|
213
|
+
if not any(pattern.search(message) for pattern in self._include_patterns):
|
|
214
|
+
return False
|
|
215
|
+
|
|
216
|
+
# Check exclude patterns
|
|
217
|
+
if self._exclude_patterns:
|
|
218
|
+
if any(pattern.search(message) for pattern in self._exclude_patterns):
|
|
219
|
+
return False
|
|
220
|
+
|
|
221
|
+
# Check custom filters
|
|
222
|
+
for filter_func in self._custom_filters:
|
|
223
|
+
if not filter_func(entry):
|
|
224
|
+
return False
|
|
225
|
+
|
|
226
|
+
return True
|
|
227
|
+
|
|
228
|
+
def clear_buffer(self) -> None:
|
|
229
|
+
"""Clear the log buffer."""
|
|
230
|
+
self.buffer.clear()
|
|
231
|
+
|
|
232
|
+
def __repr__(self) -> str:
|
|
233
|
+
"""String representation."""
|
|
234
|
+
return f"ML_Logger(prefix='{self.prefix}', entries={len(self.buffer)})"
|
ml_dash/run.py
ADDED
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
"""Experiment class - main API for ML-Logger."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import socket
|
|
6
|
+
import time
|
|
7
|
+
import uuid
|
|
8
|
+
from contextlib import contextmanager
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from functools import wraps
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any, Callable, Dict, Optional
|
|
13
|
+
|
|
14
|
+
from .backends.base import StorageBackend
|
|
15
|
+
from .backends.local_backend import LocalBackend
|
|
16
|
+
from .backends.dash_backend import DashBackend
|
|
17
|
+
from .components.parameters import ParameterManager
|
|
18
|
+
from .components.metrics import MetricsLogger
|
|
19
|
+
from .components.files import FileManager
|
|
20
|
+
from .components.logs import LogManager
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class Experiment:
|
|
24
|
+
"""Main experiment tracking class.
|
|
25
|
+
|
|
26
|
+
Represents a single training execution with parameters, metrics, files, and logs.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
namespace: User/team namespace (required)
|
|
30
|
+
workspace: Project workspace (required)
|
|
31
|
+
prefix: Experiment path (required)
|
|
32
|
+
remote: Remote server URL (optional)
|
|
33
|
+
local_root: Local storage directory (default: ".ml-logger")
|
|
34
|
+
directory: Directory path for organizing experiments (optional)
|
|
35
|
+
readme: Searchable description (optional)
|
|
36
|
+
experiment_id: Server-side experiment ID (optional)
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def __init__(
|
|
40
|
+
self,
|
|
41
|
+
namespace: str,
|
|
42
|
+
workspace: str,
|
|
43
|
+
prefix: str,
|
|
44
|
+
remote: Optional[str] = None,
|
|
45
|
+
local_root: str = ".ml-logger",
|
|
46
|
+
directory: Optional[str] = None,
|
|
47
|
+
readme: Optional[str] = None,
|
|
48
|
+
experiment_id: Optional[str] = None,
|
|
49
|
+
tags: Optional[list] = None,
|
|
50
|
+
):
|
|
51
|
+
"""Initialize experiment.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
namespace: User/team namespace
|
|
55
|
+
workspace: Project workspace
|
|
56
|
+
prefix: Experiment path (used as experiment name)
|
|
57
|
+
remote: Remote server URL (optional)
|
|
58
|
+
local_root: Local storage directory
|
|
59
|
+
directory: Directory path for organizing experiments (e.g., "dir1/dir2")
|
|
60
|
+
readme: Searchable description
|
|
61
|
+
experiment_id: Server-side experiment ID
|
|
62
|
+
tags: Experiment tags
|
|
63
|
+
"""
|
|
64
|
+
self.namespace = namespace
|
|
65
|
+
self.workspace = workspace
|
|
66
|
+
self.prefix = prefix
|
|
67
|
+
self.remote = remote
|
|
68
|
+
self.local_root = local_root
|
|
69
|
+
self.directory = directory
|
|
70
|
+
self.readme = readme or ""
|
|
71
|
+
self.experiment_id = experiment_id
|
|
72
|
+
self.run_id: Optional[str] = None
|
|
73
|
+
self.charts: Dict[str, Any] = {}
|
|
74
|
+
self.tags = tags or []
|
|
75
|
+
|
|
76
|
+
# Full path: {local_root}/{namespace}/{workspace}/{directory}/{prefix}
|
|
77
|
+
# If directory is provided, insert it before prefix
|
|
78
|
+
if directory:
|
|
79
|
+
self.local_path = f"{namespace}/{workspace}/{directory}/{prefix}"
|
|
80
|
+
else:
|
|
81
|
+
self.local_path = f"{namespace}/{workspace}/{prefix}"
|
|
82
|
+
|
|
83
|
+
# Initialize backend
|
|
84
|
+
if remote:
|
|
85
|
+
# Use remote DashBackend
|
|
86
|
+
self.backend: StorageBackend = DashBackend(
|
|
87
|
+
server_url=remote,
|
|
88
|
+
namespace=namespace,
|
|
89
|
+
workspace=workspace,
|
|
90
|
+
experiment_name=prefix,
|
|
91
|
+
experiment_id=experiment_id,
|
|
92
|
+
directory=directory,
|
|
93
|
+
)
|
|
94
|
+
# Initialize experiment on server
|
|
95
|
+
try:
|
|
96
|
+
exp_data = self.backend.initialize_experiment(description=readme, tags=tags)
|
|
97
|
+
self.experiment_id = exp_data.get("id")
|
|
98
|
+
print(f"✓ Initialized experiment on remote server: {self.experiment_id}")
|
|
99
|
+
except Exception as e:
|
|
100
|
+
print(f"Warning: Failed to initialize experiment on remote server: {e}")
|
|
101
|
+
# Fall back to local backend
|
|
102
|
+
self.backend = LocalBackend(local_root)
|
|
103
|
+
else:
|
|
104
|
+
# Use local backend
|
|
105
|
+
self.backend = LocalBackend(local_root)
|
|
106
|
+
|
|
107
|
+
# Initialize components
|
|
108
|
+
self.params = ParameterManager(self.backend, self.local_path)
|
|
109
|
+
self.metrics = MetricsLogger(self.backend, self.local_path)
|
|
110
|
+
self.files = FileManager(self.backend, self.local_path)
|
|
111
|
+
self.logs = LogManager(self.backend, self.local_path)
|
|
112
|
+
|
|
113
|
+
# Metadata
|
|
114
|
+
self._meta_file = f"{self.local_path}/.ml-logger.meta.json"
|
|
115
|
+
self._status = "created"
|
|
116
|
+
self._started_at: Optional[float] = None
|
|
117
|
+
self._completed_at: Optional[float] = None
|
|
118
|
+
self._hostname = socket.gethostname()
|
|
119
|
+
|
|
120
|
+
# Load or create metadata (only for local backend)
|
|
121
|
+
if not remote:
|
|
122
|
+
self._load_metadata()
|
|
123
|
+
|
|
124
|
+
def _load_metadata(self) -> None:
|
|
125
|
+
"""Load experiment metadata from file."""
|
|
126
|
+
if self.backend.exists(self._meta_file):
|
|
127
|
+
try:
|
|
128
|
+
content = self.backend.read_text(self._meta_file)
|
|
129
|
+
meta = json.loads(content)
|
|
130
|
+
self._status = meta.get("status", "created")
|
|
131
|
+
self._started_at = meta.get("started_at")
|
|
132
|
+
self._completed_at = meta.get("completed_at")
|
|
133
|
+
self.readme = meta.get("readme", self.readme)
|
|
134
|
+
self.charts = meta.get("charts", {})
|
|
135
|
+
except Exception:
|
|
136
|
+
pass
|
|
137
|
+
|
|
138
|
+
def _save_metadata(self) -> None:
|
|
139
|
+
"""Save experiment metadata to file."""
|
|
140
|
+
meta = {
|
|
141
|
+
"namespace": self.namespace,
|
|
142
|
+
"workspace": self.workspace,
|
|
143
|
+
"prefix": self.prefix,
|
|
144
|
+
"remote": self.remote,
|
|
145
|
+
"experiment_id": self.experiment_id,
|
|
146
|
+
"readme": self.readme,
|
|
147
|
+
"charts": self.charts,
|
|
148
|
+
"status": self._status,
|
|
149
|
+
"started_at": self._started_at,
|
|
150
|
+
"completed_at": self._completed_at,
|
|
151
|
+
"hostname": self._hostname,
|
|
152
|
+
"updated_at": time.time(),
|
|
153
|
+
}
|
|
154
|
+
content = json.dumps(meta, indent=2)
|
|
155
|
+
self.backend.write_text(self._meta_file, content)
|
|
156
|
+
|
|
157
|
+
def run(self, func: Optional[Callable] = None):
|
|
158
|
+
"""Mark experiment as started (supports 3 patterns).
|
|
159
|
+
|
|
160
|
+
Pattern 1 - Direct call:
|
|
161
|
+
experiment.run()
|
|
162
|
+
# ... training code ...
|
|
163
|
+
experiment.complete()
|
|
164
|
+
|
|
165
|
+
Pattern 2 - Context manager:
|
|
166
|
+
with experiment.run():
|
|
167
|
+
# ... training code ...
|
|
168
|
+
|
|
169
|
+
Pattern 3 - Decorator:
|
|
170
|
+
@experiment.run
|
|
171
|
+
def train():
|
|
172
|
+
# ... training code ...
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
func: Function to wrap (for decorator pattern)
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
Context manager or decorated function
|
|
179
|
+
"""
|
|
180
|
+
if func is None:
|
|
181
|
+
# Pattern 1 (direct) or Pattern 2 (context manager)
|
|
182
|
+
self._status = "running"
|
|
183
|
+
self._started_at = time.time()
|
|
184
|
+
# Only save metadata for local backends
|
|
185
|
+
if not isinstance(self.backend, DashBackend):
|
|
186
|
+
self._save_metadata()
|
|
187
|
+
return self._run_context()
|
|
188
|
+
else:
|
|
189
|
+
# Pattern 3 (decorator)
|
|
190
|
+
@wraps(func)
|
|
191
|
+
def wrapper(*args, **kwargs):
|
|
192
|
+
with self.run():
|
|
193
|
+
return func(*args, **kwargs)
|
|
194
|
+
return wrapper
|
|
195
|
+
|
|
196
|
+
@contextmanager
|
|
197
|
+
def _run_context(self):
|
|
198
|
+
"""Context manager for run lifecycle."""
|
|
199
|
+
try:
|
|
200
|
+
# Create run on remote server if using DashBackend
|
|
201
|
+
if isinstance(self.backend, DashBackend) and not self.run_id:
|
|
202
|
+
try:
|
|
203
|
+
run_data = self.backend.create_run(name=self.prefix, tags=self.tags)
|
|
204
|
+
self.run_id = run_data.get("id")
|
|
205
|
+
print(f"✓ Created run on remote server: {self.run_id}")
|
|
206
|
+
except Exception as e:
|
|
207
|
+
print(f"Warning: Failed to create run on remote server: {e}")
|
|
208
|
+
|
|
209
|
+
yield self
|
|
210
|
+
self.complete()
|
|
211
|
+
except Exception as e:
|
|
212
|
+
self.fail(str(e))
|
|
213
|
+
raise
|
|
214
|
+
|
|
215
|
+
def complete(self) -> None:
|
|
216
|
+
"""Mark experiment as completed."""
|
|
217
|
+
self._status = "completed"
|
|
218
|
+
self._completed_at = time.time()
|
|
219
|
+
|
|
220
|
+
# Update run status on remote server
|
|
221
|
+
if isinstance(self.backend, DashBackend) and self.run_id:
|
|
222
|
+
try:
|
|
223
|
+
self.backend.update_run(status="COMPLETED")
|
|
224
|
+
print(f"✓ Marked run as COMPLETED on remote server")
|
|
225
|
+
except Exception as e:
|
|
226
|
+
print(f"Warning: Failed to update run status: {e}")
|
|
227
|
+
|
|
228
|
+
# Save metadata locally
|
|
229
|
+
if not isinstance(self.backend, DashBackend):
|
|
230
|
+
self._save_metadata()
|
|
231
|
+
|
|
232
|
+
def fail(self, error: str) -> None:
|
|
233
|
+
"""Mark experiment as failed.
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
error: Error message
|
|
237
|
+
"""
|
|
238
|
+
self._status = "failed"
|
|
239
|
+
self._completed_at = time.time()
|
|
240
|
+
|
|
241
|
+
# Log error
|
|
242
|
+
self.logs.error("Experiment failed", error=error)
|
|
243
|
+
|
|
244
|
+
# Update run status on remote server
|
|
245
|
+
if isinstance(self.backend, DashBackend) and self.run_id:
|
|
246
|
+
try:
|
|
247
|
+
self.backend.update_run(status="FAILED", metadata={"error": error})
|
|
248
|
+
print(f"✓ Marked run as FAILED on remote server")
|
|
249
|
+
except Exception as e:
|
|
250
|
+
print(f"Warning: Failed to update run status: {e}")
|
|
251
|
+
|
|
252
|
+
# Save metadata locally
|
|
253
|
+
if not isinstance(self.backend, DashBackend):
|
|
254
|
+
self._save_metadata()
|
|
255
|
+
|
|
256
|
+
# Convenience methods for logging
|
|
257
|
+
def info(self, message: str, **context) -> None:
|
|
258
|
+
"""Log info message (convenience method).
|
|
259
|
+
|
|
260
|
+
Args:
|
|
261
|
+
message: Log message
|
|
262
|
+
**context: Additional context
|
|
263
|
+
"""
|
|
264
|
+
self.logs.info(message, **context)
|
|
265
|
+
|
|
266
|
+
def error(self, message: str, **context) -> None:
|
|
267
|
+
"""Log error message (convenience method).
|
|
268
|
+
|
|
269
|
+
Args:
|
|
270
|
+
message: Log message
|
|
271
|
+
**context: Additional context
|
|
272
|
+
"""
|
|
273
|
+
self.logs.error(message, **context)
|
|
274
|
+
|
|
275
|
+
def warning(self, message: str, **context) -> None:
|
|
276
|
+
"""Log warning message (convenience method).
|
|
277
|
+
|
|
278
|
+
Args:
|
|
279
|
+
message: Log message
|
|
280
|
+
**context: Additional context
|
|
281
|
+
"""
|
|
282
|
+
self.logs.warning(message, **context)
|
|
283
|
+
|
|
284
|
+
def debug(self, message: str, **context) -> None:
|
|
285
|
+
"""Log debug message (convenience method).
|
|
286
|
+
|
|
287
|
+
Args:
|
|
288
|
+
message: Log message
|
|
289
|
+
**context: Additional context
|
|
290
|
+
"""
|
|
291
|
+
self.logs.debug(message, **context)
|
|
292
|
+
|
|
293
|
+
@classmethod
|
|
294
|
+
def _auto_configure(cls) -> "Experiment":
|
|
295
|
+
"""Create auto-configured experiment from environment.
|
|
296
|
+
|
|
297
|
+
Reads configuration from:
|
|
298
|
+
- ML_LOGGER_NAMESPACE (default: "default")
|
|
299
|
+
- ML_LOGGER_WORKSPACE (default: "experiments")
|
|
300
|
+
- ML_LOGGER_PREFIX (default: timestamp+uuid)
|
|
301
|
+
- ML_LOGGER_REMOTE (optional)
|
|
302
|
+
|
|
303
|
+
Returns:
|
|
304
|
+
Auto-configured Experiment instance
|
|
305
|
+
"""
|
|
306
|
+
namespace = os.environ.get("ML_LOGGER_NAMESPACE", "default")
|
|
307
|
+
workspace = os.environ.get("ML_LOGGER_WORKSPACE", "experiments")
|
|
308
|
+
|
|
309
|
+
# Generate default prefix with timestamp + short UUID
|
|
310
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
311
|
+
short_id = str(uuid.uuid4())[:8]
|
|
312
|
+
default_prefix = f"{timestamp}_{short_id}"
|
|
313
|
+
|
|
314
|
+
prefix = os.environ.get("ML_LOGGER_PREFIX", default_prefix)
|
|
315
|
+
remote = os.environ.get("ML_LOGGER_REMOTE")
|
|
316
|
+
|
|
317
|
+
return cls(
|
|
318
|
+
namespace=namespace,
|
|
319
|
+
workspace=workspace,
|
|
320
|
+
prefix=prefix,
|
|
321
|
+
remote=remote,
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
def __repr__(self) -> str:
|
|
325
|
+
"""String representation."""
|
|
326
|
+
return (
|
|
327
|
+
f"Experiment(namespace='{self.namespace}', "
|
|
328
|
+
f"workspace='{self.workspace}', "
|
|
329
|
+
f"prefix='{self.prefix}', "
|
|
330
|
+
f"status='{self._status}')"
|
|
331
|
+
)
|