ml-dash 0.0.17__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ml_dash/ARCHITECTURE.md +382 -0
- ml_dash/__init__.py +14 -1
- ml_dash/autolog.py +32 -0
- ml_dash/backends/__init__.py +11 -0
- ml_dash/backends/base.py +124 -0
- ml_dash/backends/dash_backend.py +571 -0
- ml_dash/backends/local_backend.py +90 -0
- ml_dash/components/__init__.py +13 -0
- ml_dash/components/files.py +246 -0
- ml_dash/components/logs.py +104 -0
- ml_dash/components/metrics.py +169 -0
- ml_dash/components/parameters.py +144 -0
- ml_dash/job_logger.py +42 -0
- ml_dash/ml_logger.py +234 -0
- ml_dash/run.py +331 -0
- ml_dash-0.4.0.dist-info/METADATA +1424 -0
- ml_dash-0.4.0.dist-info/RECORD +19 -0
- ml_dash-0.4.0.dist-info/WHEEL +4 -0
- ml_dash-0.4.0.dist-info/entry_points.txt +3 -0
- app-build/asset-manifest.json +0 -15
- app-build/favicon.ico +0 -0
- app-build/github-markdown.css +0 -957
- app-build/index.html +0 -1
- app-build/manifest.json +0 -15
- app-build/monaco-editor-worker-loader-proxy.js +0 -6
- app-build/precache-manifest.ffc09f8a591c529a1bd5c6f21f49815f.js +0 -26
- app-build/service-worker.js +0 -34
- ml_dash/app.py +0 -60
- ml_dash/config.py +0 -16
- ml_dash/example.py +0 -0
- ml_dash/file_events.py +0 -71
- ml_dash/file_handlers.py +0 -141
- ml_dash/file_utils.py +0 -5
- ml_dash/file_watcher.py +0 -30
- ml_dash/main.py +0 -60
- ml_dash/mime_types.py +0 -20
- ml_dash/schema/__init__.py +0 -110
- ml_dash/schema/archive.py +0 -165
- ml_dash/schema/directories.py +0 -59
- ml_dash/schema/experiments.py +0 -65
- ml_dash/schema/files/__init__.py +0 -204
- ml_dash/schema/files/file_helpers.py +0 -79
- ml_dash/schema/files/images.py +0 -27
- ml_dash/schema/files/metrics.py +0 -64
- ml_dash/schema/files/parameters.py +0 -50
- ml_dash/schema/files/series.py +0 -235
- ml_dash/schema/files/videos.py +0 -27
- ml_dash/schema/helpers.py +0 -66
- ml_dash/schema/projects.py +0 -65
- ml_dash/schema/schema_helpers.py +0 -19
- ml_dash/schema/users.py +0 -33
- ml_dash/sse.py +0 -18
- ml_dash-0.0.17.dist-info/METADATA +0 -67
- ml_dash-0.0.17.dist-info/RECORD +0 -38
- ml_dash-0.0.17.dist-info/WHEEL +0 -5
- ml_dash-0.0.17.dist-info/top_level.txt +0 -2
|
@@ -0,0 +1,571 @@
|
|
|
1
|
+
"""ML-Dash server storage backend for ML-Logger.
|
|
2
|
+
|
|
3
|
+
This backend syncs data to an ML-Dash server via HTTP API.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
import time
|
|
8
|
+
from typing import Optional, List, Dict, Any
|
|
9
|
+
import requests
|
|
10
|
+
|
|
11
|
+
from .base import StorageBackend
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class DashBackend(StorageBackend):
|
|
15
|
+
"""ML-Dash server storage backend.
|
|
16
|
+
|
|
17
|
+
Syncs data to a remote ML-Dash server via HTTP API.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
server_url: URL of the ML-Dash server (e.g., "http://localhost:4000")
|
|
21
|
+
namespace: User/team namespace
|
|
22
|
+
workspace: Project workspace
|
|
23
|
+
experiment_name: Name of the experiment
|
|
24
|
+
experiment_id: Server-side experiment ID (optional, will be created if not provided)
|
|
25
|
+
run_id: Server-side run ID (optional, will be created when needed)
|
|
26
|
+
directory: Directory path for organizing experiments (optional)
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
server_url: str,
|
|
32
|
+
namespace: str,
|
|
33
|
+
workspace: str,
|
|
34
|
+
experiment_name: str,
|
|
35
|
+
experiment_id: Optional[str] = None,
|
|
36
|
+
run_id: Optional[str] = None,
|
|
37
|
+
directory: Optional[str] = None,
|
|
38
|
+
):
|
|
39
|
+
"""Initialize ML-Dash backend.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
server_url: URL of the ML-Dash server
|
|
43
|
+
namespace: User/team namespace
|
|
44
|
+
workspace: Project workspace
|
|
45
|
+
experiment_name: Name of the experiment
|
|
46
|
+
experiment_id: Server-side experiment ID (optional)
|
|
47
|
+
run_id: Server-side run ID (optional)
|
|
48
|
+
directory: Directory path for organizing experiments (e.g., "dir1/dir2")
|
|
49
|
+
"""
|
|
50
|
+
self.server_url = server_url.rstrip("/")
|
|
51
|
+
self.namespace = namespace
|
|
52
|
+
self.workspace = workspace
|
|
53
|
+
self.experiment_name = experiment_name
|
|
54
|
+
self.experiment_id = experiment_id
|
|
55
|
+
self.namespace_id: Optional[str] = None
|
|
56
|
+
self.run_id = run_id
|
|
57
|
+
self.directory = directory
|
|
58
|
+
self._session = requests.Session()
|
|
59
|
+
|
|
60
|
+
def initialize_experiment(self, description: Optional[str] = None, tags: Optional[List[str]] = None) -> Dict[str, Any]:
|
|
61
|
+
"""Create or get the experiment on the server.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
description: Experiment description
|
|
65
|
+
tags: Experiment tags
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
Experiment data from server
|
|
69
|
+
"""
|
|
70
|
+
url = f"{self.server_url}/api/v1/experiments"
|
|
71
|
+
data = {
|
|
72
|
+
"namespace": self.namespace,
|
|
73
|
+
"workspace": self.workspace,
|
|
74
|
+
"experimentName": self.experiment_name,
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
if description:
|
|
78
|
+
data["description"] = description
|
|
79
|
+
if tags:
|
|
80
|
+
data["tags"] = tags
|
|
81
|
+
if self.directory:
|
|
82
|
+
data["directory"] = self.directory
|
|
83
|
+
|
|
84
|
+
response = self._session.post(url, json=data)
|
|
85
|
+
response.raise_for_status()
|
|
86
|
+
result = response.json()
|
|
87
|
+
|
|
88
|
+
if result.get("success"):
|
|
89
|
+
experiment = result.get("experiment", {})
|
|
90
|
+
self.experiment_id = experiment.get("id")
|
|
91
|
+
self.namespace_id = experiment.get("namespaceId")
|
|
92
|
+
return experiment
|
|
93
|
+
else:
|
|
94
|
+
raise Exception(f"Failed to create experiment: {result}")
|
|
95
|
+
|
|
96
|
+
def create_run(self, name: Optional[str] = None, tags: Optional[List[str]] = None, metadata: Optional[Dict] = None) -> Dict[str, Any]:
|
|
97
|
+
"""Create a new run on the server.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
name: Run name
|
|
101
|
+
tags: Run tags
|
|
102
|
+
metadata: Run metadata
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
Run data from server
|
|
106
|
+
"""
|
|
107
|
+
if not self.experiment_id or not self.namespace_id:
|
|
108
|
+
raise Exception("Must call initialize_experiment() before create_run()")
|
|
109
|
+
|
|
110
|
+
url = f"{self.server_url}/api/v1/runs"
|
|
111
|
+
data = {
|
|
112
|
+
"experimentId": self.experiment_id,
|
|
113
|
+
"namespaceId": self.namespace_id,
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
if name:
|
|
117
|
+
data["name"] = name
|
|
118
|
+
if tags:
|
|
119
|
+
data["tags"] = tags
|
|
120
|
+
if metadata:
|
|
121
|
+
data["metadata"] = metadata
|
|
122
|
+
|
|
123
|
+
response = self._session.post(url, json=data)
|
|
124
|
+
response.raise_for_status()
|
|
125
|
+
result = response.json()
|
|
126
|
+
|
|
127
|
+
if result.get("success"):
|
|
128
|
+
run = result.get("run", {})
|
|
129
|
+
self.run_id = run.get("id")
|
|
130
|
+
return run
|
|
131
|
+
else:
|
|
132
|
+
raise Exception(f"Failed to create run: {result}")
|
|
133
|
+
|
|
134
|
+
def update_run(self, status: Optional[str] = None, metadata: Optional[Dict] = None) -> Dict[str, Any]:
|
|
135
|
+
"""Update the run status.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
status: Run status (RUNNING, COMPLETED, FAILED, STOPPED)
|
|
139
|
+
metadata: Additional metadata
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
Updated run data from server
|
|
143
|
+
"""
|
|
144
|
+
if not self.run_id:
|
|
145
|
+
raise Exception("No run_id available. Call create_run() first")
|
|
146
|
+
|
|
147
|
+
url = f"{self.server_url}/api/v1/runs/{self.run_id}"
|
|
148
|
+
data = {}
|
|
149
|
+
|
|
150
|
+
if status:
|
|
151
|
+
data["status"] = status
|
|
152
|
+
if metadata:
|
|
153
|
+
data["metadata"] = metadata
|
|
154
|
+
if status in ["COMPLETED", "FAILED", "STOPPED"]:
|
|
155
|
+
data["endedAt"] = time.time() * 1000 # Convert to milliseconds
|
|
156
|
+
|
|
157
|
+
response = self._session.put(url, json=data)
|
|
158
|
+
response.raise_for_status()
|
|
159
|
+
result = response.json()
|
|
160
|
+
|
|
161
|
+
if result.get("success"):
|
|
162
|
+
return result.get("run", {})
|
|
163
|
+
else:
|
|
164
|
+
raise Exception(f"Failed to update run: {result}")
|
|
165
|
+
|
|
166
|
+
def log_metrics(self, metrics: Dict[str, List[Dict[str, Any]]]) -> Dict[str, Any]:
|
|
167
|
+
"""Log metrics to the server.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
metrics: Dictionary mapping metric names to lists of {step, timestamp, value} dicts
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
Server response
|
|
174
|
+
"""
|
|
175
|
+
if not self.experiment_id or not self.run_id:
|
|
176
|
+
raise Exception("Must initialize experiment and create run before logging metrics")
|
|
177
|
+
|
|
178
|
+
url = f"{self.server_url}/api/v1/metrics"
|
|
179
|
+
data = {
|
|
180
|
+
"experimentId": self.experiment_id,
|
|
181
|
+
"runId": self.run_id,
|
|
182
|
+
"metrics": metrics,
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
response = self._session.post(url, json=data)
|
|
186
|
+
response.raise_for_status()
|
|
187
|
+
return response.json()
|
|
188
|
+
|
|
189
|
+
def log_parameters(self, parameters: Dict[str, Any], operation: str = "set") -> Dict[str, Any]:
|
|
190
|
+
"""Log parameters to the server.
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
parameters: Parameter dictionary
|
|
194
|
+
operation: Operation type (set, extend, update)
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
Server response
|
|
198
|
+
"""
|
|
199
|
+
if not self.namespace_id:
|
|
200
|
+
raise Exception("Must initialize experiment before logging parameters")
|
|
201
|
+
|
|
202
|
+
url = f"{self.server_url}/api/v1/parameters"
|
|
203
|
+
data = {
|
|
204
|
+
"namespaceId": self.namespace_id,
|
|
205
|
+
"parameters": parameters,
|
|
206
|
+
"operation": operation,
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
if self.experiment_id:
|
|
210
|
+
data["experimentId"] = self.experiment_id
|
|
211
|
+
if self.run_id:
|
|
212
|
+
data["runId"] = self.run_id
|
|
213
|
+
|
|
214
|
+
response = self._session.post(url, json=data)
|
|
215
|
+
response.raise_for_status()
|
|
216
|
+
return response.json()
|
|
217
|
+
|
|
218
|
+
def log_entry(self, level: str, message: str, metadata: Optional[Dict] = None, timestamp: Optional[float] = None) -> Dict[str, Any]:
|
|
219
|
+
"""Log a text entry to the server.
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
level: Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
|
|
223
|
+
message: Log message
|
|
224
|
+
metadata: Additional metadata
|
|
225
|
+
timestamp: Timestamp (defaults to current time)
|
|
226
|
+
|
|
227
|
+
Returns:
|
|
228
|
+
Server response
|
|
229
|
+
"""
|
|
230
|
+
if not self.run_id:
|
|
231
|
+
raise Exception("Must create run before logging entries")
|
|
232
|
+
|
|
233
|
+
url = f"{self.server_url}/api/v1/logs"
|
|
234
|
+
data = {
|
|
235
|
+
"runId": self.run_id,
|
|
236
|
+
"level": level.upper(),
|
|
237
|
+
"message": message,
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
if timestamp:
|
|
241
|
+
data["timestamp"] = timestamp * 1000 # Convert to milliseconds
|
|
242
|
+
if metadata:
|
|
243
|
+
data["metadata"] = metadata
|
|
244
|
+
|
|
245
|
+
response = self._session.post(url, json=data)
|
|
246
|
+
response.raise_for_status()
|
|
247
|
+
return response.json()
|
|
248
|
+
|
|
249
|
+
def upload_file(self, name: str, file_data: bytes, artifact_type: str = "OTHER", mime_type: Optional[str] = None, metadata: Optional[Dict] = None) -> Dict[str, Any]:
|
|
250
|
+
"""Upload a file to the server.
|
|
251
|
+
|
|
252
|
+
Args:
|
|
253
|
+
name: File name
|
|
254
|
+
file_data: File content as bytes
|
|
255
|
+
artifact_type: Type of artifact (IMAGE, VIDEO, MODEL, etc.)
|
|
256
|
+
mime_type: MIME type
|
|
257
|
+
metadata: Additional metadata
|
|
258
|
+
|
|
259
|
+
Returns:
|
|
260
|
+
Server response with artifact info
|
|
261
|
+
"""
|
|
262
|
+
if not self.run_id:
|
|
263
|
+
raise Exception("Must create run before uploading files")
|
|
264
|
+
|
|
265
|
+
url = f"{self.server_url}/api/v1/files"
|
|
266
|
+
|
|
267
|
+
# Prepare form data
|
|
268
|
+
files = {"file": (name, file_data, mime_type or "application/octet-stream")}
|
|
269
|
+
data = {
|
|
270
|
+
"runId": self.run_id,
|
|
271
|
+
"name": name,
|
|
272
|
+
"type": artifact_type,
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
if metadata:
|
|
276
|
+
data["metadata"] = json.dumps(metadata)
|
|
277
|
+
|
|
278
|
+
response = self._session.post(url, files=files, data=data)
|
|
279
|
+
response.raise_for_status()
|
|
280
|
+
return response.json()
|
|
281
|
+
|
|
282
|
+
# StorageBackend interface methods
|
|
283
|
+
|
|
284
|
+
def exists(self, path: str) -> bool:
|
|
285
|
+
"""Check if a file exists (not fully supported in remote backend)."""
|
|
286
|
+
# For logs.jsonl, check if we have a run_id (meaning logs can be fetched)
|
|
287
|
+
if "logs.jsonl" in path:
|
|
288
|
+
return self.run_id is not None
|
|
289
|
+
|
|
290
|
+
# For metrics.jsonl and parameters.jsonl, assume they can be read
|
|
291
|
+
if "metrics.jsonl" in path or "parameters.jsonl" in path:
|
|
292
|
+
return self.run_id is not None or self.experiment_id is not None
|
|
293
|
+
|
|
294
|
+
# For other files, assume they don't exist locally
|
|
295
|
+
return False
|
|
296
|
+
|
|
297
|
+
def write_bytes(self, path: str, data: bytes) -> None:
|
|
298
|
+
"""Write binary data to the server as a file."""
|
|
299
|
+
# Extract filename from path
|
|
300
|
+
filename = path.split("/")[-1]
|
|
301
|
+
|
|
302
|
+
# Determine artifact type from extension
|
|
303
|
+
artifact_type = "OTHER"
|
|
304
|
+
if any(path.endswith(ext) for ext in [".png", ".jpg", ".jpeg", ".gif", ".bmp"]):
|
|
305
|
+
artifact_type = "IMAGE"
|
|
306
|
+
elif any(path.endswith(ext) for ext in [".mp4", ".avi", ".mov"]):
|
|
307
|
+
artifact_type = "VIDEO"
|
|
308
|
+
elif any(path.endswith(ext) for ext in [".pt", ".pth", ".h5", ".pkl"]):
|
|
309
|
+
artifact_type = "MODEL"
|
|
310
|
+
elif any(path.endswith(ext) for ext in [".json"]):
|
|
311
|
+
artifact_type = "JSON"
|
|
312
|
+
elif any(path.endswith(ext) for ext in [".yaml", ".yml"]):
|
|
313
|
+
artifact_type = "YAML"
|
|
314
|
+
elif any(path.endswith(ext) for ext in [".md"]):
|
|
315
|
+
artifact_type = "MARKDOWN"
|
|
316
|
+
elif any(path.endswith(ext) for ext in [".csv"]):
|
|
317
|
+
artifact_type = "CSV"
|
|
318
|
+
|
|
319
|
+
self.upload_file(filename, data, artifact_type=artifact_type)
|
|
320
|
+
|
|
321
|
+
def read_bytes(self, path: str) -> bytes:
|
|
322
|
+
"""Read binary data from the server."""
|
|
323
|
+
# Not implemented - files are stored in S3, would need presigned URLs
|
|
324
|
+
raise NotImplementedError("DashBackend.read_bytes() not implemented - files are stored in S3")
|
|
325
|
+
|
|
326
|
+
def write_text(self, path: str, text: str) -> None:
|
|
327
|
+
"""Write text to the server."""
|
|
328
|
+
self.write_bytes(path, text.encode("utf-8"))
|
|
329
|
+
|
|
330
|
+
def read_text(self, path: str) -> str:
|
|
331
|
+
"""Read text from the server.
|
|
332
|
+
|
|
333
|
+
For logs.jsonl, metrics.jsonl, and parameters.jsonl, fetches data from server
|
|
334
|
+
and formats as JSONL.
|
|
335
|
+
"""
|
|
336
|
+
# Handle logs.jsonl
|
|
337
|
+
if "logs.jsonl" in path:
|
|
338
|
+
if not self.run_id:
|
|
339
|
+
return ""
|
|
340
|
+
|
|
341
|
+
url = f"{self.server_url}/api/v1/runs/{self.run_id}/logs"
|
|
342
|
+
response = self._session.get(url)
|
|
343
|
+
response.raise_for_status()
|
|
344
|
+
result = response.json()
|
|
345
|
+
|
|
346
|
+
if not result.get("success"):
|
|
347
|
+
raise Exception(f"Failed to fetch logs: {result}")
|
|
348
|
+
|
|
349
|
+
# Convert logs to JSONL format
|
|
350
|
+
logs = result.get("logs", [])
|
|
351
|
+
lines = []
|
|
352
|
+
for log in logs:
|
|
353
|
+
# Convert to the format used by local backend
|
|
354
|
+
entry = {
|
|
355
|
+
"timestamp": log.get("timestamp"),
|
|
356
|
+
"level": log.get("level"),
|
|
357
|
+
"message": log.get("message"),
|
|
358
|
+
}
|
|
359
|
+
if log.get("metadata"):
|
|
360
|
+
entry["context"] = log["metadata"]
|
|
361
|
+
|
|
362
|
+
lines.append(json.dumps(entry))
|
|
363
|
+
|
|
364
|
+
return "\n".join(lines)
|
|
365
|
+
|
|
366
|
+
# Handle parameters.jsonl
|
|
367
|
+
if "parameters.jsonl" in path:
|
|
368
|
+
if not self.run_id and not self.experiment_id:
|
|
369
|
+
return ""
|
|
370
|
+
|
|
371
|
+
url = f"{self.server_url}/api/v1/parameters"
|
|
372
|
+
params = {}
|
|
373
|
+
if self.run_id:
|
|
374
|
+
params["runId"] = self.run_id
|
|
375
|
+
elif self.experiment_id:
|
|
376
|
+
params["experimentId"] = self.experiment_id
|
|
377
|
+
|
|
378
|
+
response = self._session.get(url, params=params)
|
|
379
|
+
response.raise_for_status()
|
|
380
|
+
result = response.json()
|
|
381
|
+
|
|
382
|
+
if not result.get("success"):
|
|
383
|
+
raise Exception(f"Failed to fetch parameters: {result}")
|
|
384
|
+
|
|
385
|
+
# The server stores only the final state, not operation history
|
|
386
|
+
# We return a single 'set' operation with the merged data
|
|
387
|
+
parameters_list = result.get("parameters", [])
|
|
388
|
+
|
|
389
|
+
if not parameters_list:
|
|
390
|
+
return ""
|
|
391
|
+
|
|
392
|
+
# Merge all parameter data (in case there are multiple records)
|
|
393
|
+
merged_data = {}
|
|
394
|
+
for param in parameters_list:
|
|
395
|
+
param_data = param.get("data", {})
|
|
396
|
+
merged_data.update(param_data)
|
|
397
|
+
|
|
398
|
+
# Return as a single set operation
|
|
399
|
+
entry = {
|
|
400
|
+
"timestamp": time.time(),
|
|
401
|
+
"operation": "set",
|
|
402
|
+
"data": merged_data,
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
return json.dumps(entry)
|
|
406
|
+
|
|
407
|
+
# Handle metrics.jsonl
|
|
408
|
+
if "metrics.jsonl" in path:
|
|
409
|
+
if not self.experiment_id:
|
|
410
|
+
return ""
|
|
411
|
+
|
|
412
|
+
url = f"{self.server_url}/api/v1/experiments/{self.experiment_id}/metrics"
|
|
413
|
+
response = self._session.get(url)
|
|
414
|
+
response.raise_for_status()
|
|
415
|
+
result = response.json()
|
|
416
|
+
|
|
417
|
+
if not result.get("success"):
|
|
418
|
+
raise Exception(f"Failed to fetch metrics: {result}")
|
|
419
|
+
|
|
420
|
+
# Convert metrics to JSONL format, grouped by step/timestamp
|
|
421
|
+
metrics_list = result.get("metrics", [])
|
|
422
|
+
|
|
423
|
+
# Group metrics by (step, timestamp)
|
|
424
|
+
grouped = {}
|
|
425
|
+
|
|
426
|
+
for metric in metrics_list:
|
|
427
|
+
metric_name = metric.get("name")
|
|
428
|
+
data = metric.get("data", {})
|
|
429
|
+
|
|
430
|
+
# If we have a specific run_id, filter to that run
|
|
431
|
+
if self.run_id and self.run_id in data:
|
|
432
|
+
run_data = data[self.run_id]
|
|
433
|
+
for point in run_data:
|
|
434
|
+
step = point.get("step")
|
|
435
|
+
timestamp = point.get("timestamp", time.time()) / 1000
|
|
436
|
+
value = point.get("value")
|
|
437
|
+
|
|
438
|
+
# Create key for grouping
|
|
439
|
+
key = (step, int(timestamp * 1000)) # Group by step and timestamp (ms precision)
|
|
440
|
+
|
|
441
|
+
if key not in grouped:
|
|
442
|
+
grouped[key] = {
|
|
443
|
+
"timestamp": timestamp,
|
|
444
|
+
"metrics": {},
|
|
445
|
+
}
|
|
446
|
+
if step is not None:
|
|
447
|
+
grouped[key]["step"] = step
|
|
448
|
+
|
|
449
|
+
grouped[key]["metrics"][metric_name] = value
|
|
450
|
+
|
|
451
|
+
elif not self.run_id:
|
|
452
|
+
# If no specific run, include all runs
|
|
453
|
+
for run_id, run_data in data.items():
|
|
454
|
+
for point in run_data:
|
|
455
|
+
step = point.get("step")
|
|
456
|
+
timestamp = point.get("timestamp", time.time()) / 1000
|
|
457
|
+
value = point.get("value")
|
|
458
|
+
|
|
459
|
+
key = (step, int(timestamp * 1000))
|
|
460
|
+
|
|
461
|
+
if key not in grouped:
|
|
462
|
+
grouped[key] = {
|
|
463
|
+
"timestamp": timestamp,
|
|
464
|
+
"metrics": {},
|
|
465
|
+
}
|
|
466
|
+
if step is not None:
|
|
467
|
+
grouped[key]["step"] = step
|
|
468
|
+
|
|
469
|
+
grouped[key]["metrics"][metric_name] = value
|
|
470
|
+
|
|
471
|
+
# Convert to JSONL, sorted by step
|
|
472
|
+
lines = []
|
|
473
|
+
for key in sorted(grouped.keys()):
|
|
474
|
+
lines.append(json.dumps(grouped[key]))
|
|
475
|
+
|
|
476
|
+
return "\n".join(lines) if lines else ""
|
|
477
|
+
|
|
478
|
+
# For other files, use read_bytes
|
|
479
|
+
return self.read_bytes(path).decode("utf-8")
|
|
480
|
+
|
|
481
|
+
def append_text(self, path: str, text: str) -> None:
|
|
482
|
+
"""Append text to a file on the server.
|
|
483
|
+
|
|
484
|
+
For JSONL files (metrics, logs, parameters), we parse and send to appropriate endpoints.
|
|
485
|
+
"""
|
|
486
|
+
# Determine what type of data this is based on the path
|
|
487
|
+
if "metrics.jsonl" in path:
|
|
488
|
+
self._append_metrics(text)
|
|
489
|
+
elif "logs.jsonl" in path:
|
|
490
|
+
self._append_log(text)
|
|
491
|
+
elif "parameters.jsonl" in path:
|
|
492
|
+
self._append_parameters(text)
|
|
493
|
+
else:
|
|
494
|
+
# For other files, we can't really append in S3, so we skip
|
|
495
|
+
pass
|
|
496
|
+
|
|
497
|
+
def _append_metrics(self, line: str) -> None:
|
|
498
|
+
"""Parse and send metrics from a JSONL line."""
|
|
499
|
+
try:
|
|
500
|
+
entry = json.loads(line.strip())
|
|
501
|
+
metrics_data = entry.get("metrics", {})
|
|
502
|
+
step = entry.get("step")
|
|
503
|
+
timestamp = entry.get("timestamp", time.time())
|
|
504
|
+
|
|
505
|
+
# Convert to the format expected by the API
|
|
506
|
+
formatted_metrics = {}
|
|
507
|
+
for name, value in metrics_data.items():
|
|
508
|
+
formatted_metrics[name] = [{
|
|
509
|
+
"step": step,
|
|
510
|
+
"timestamp": timestamp * 1000, # Convert to milliseconds
|
|
511
|
+
"value": float(value),
|
|
512
|
+
}]
|
|
513
|
+
|
|
514
|
+
if formatted_metrics:
|
|
515
|
+
self.log_metrics(formatted_metrics)
|
|
516
|
+
except Exception as e:
|
|
517
|
+
print(f"Warning: Failed to send metrics: {e}")
|
|
518
|
+
|
|
519
|
+
def _append_log(self, line: str) -> None:
|
|
520
|
+
"""Parse and send log entry from a JSONL line."""
|
|
521
|
+
try:
|
|
522
|
+
entry = json.loads(line.strip())
|
|
523
|
+
level = entry.get("level", "INFO")
|
|
524
|
+
message = entry.get("message", "")
|
|
525
|
+
timestamp = entry.get("timestamp", time.time())
|
|
526
|
+
context = entry.get("context")
|
|
527
|
+
|
|
528
|
+
self.log_entry(level, message, metadata=context, timestamp=timestamp)
|
|
529
|
+
except Exception as e:
|
|
530
|
+
print(f"Warning: Failed to send log entry: {e}")
|
|
531
|
+
|
|
532
|
+
def _append_parameters(self, line: str) -> None:
|
|
533
|
+
"""Parse and send parameters from a JSONL line."""
|
|
534
|
+
try:
|
|
535
|
+
entry = json.loads(line.strip())
|
|
536
|
+
operation = entry.get("operation", "set")
|
|
537
|
+
|
|
538
|
+
if operation == "set":
|
|
539
|
+
data = entry.get("data", {})
|
|
540
|
+
self.log_parameters(data, operation="set")
|
|
541
|
+
elif operation == "extend":
|
|
542
|
+
data = entry.get("data", {})
|
|
543
|
+
self.log_parameters(data, operation="extend")
|
|
544
|
+
elif operation == "update":
|
|
545
|
+
# For single key updates, we can send as a set operation
|
|
546
|
+
key = entry.get("key")
|
|
547
|
+
value = entry.get("value")
|
|
548
|
+
if key:
|
|
549
|
+
self.log_parameters({key: value}, operation="update")
|
|
550
|
+
except Exception as e:
|
|
551
|
+
print(f"Warning: Failed to send parameters: {e}")
|
|
552
|
+
|
|
553
|
+
def list_dir(self, path: str = "") -> List[str]:
|
|
554
|
+
"""List contents of a directory on the server."""
|
|
555
|
+
# Not implemented - would need a separate API endpoint
|
|
556
|
+
raise NotImplementedError("DashBackend.list_dir() not implemented")
|
|
557
|
+
|
|
558
|
+
def get_url(self, path: str) -> Optional[str]:
|
|
559
|
+
"""Get a URL for accessing a file on the server."""
|
|
560
|
+
# Files are stored in S3, URL would come from the artifact record
|
|
561
|
+
return f"{self.server_url}/files/{self.namespace}/{self.workspace}/{path}"
|
|
562
|
+
|
|
563
|
+
def makedirs(self, path: str, exist_ok: bool = True) -> None:
|
|
564
|
+
"""Create directories on the server."""
|
|
565
|
+
# Server-side directories are created automatically
|
|
566
|
+
pass
|
|
567
|
+
|
|
568
|
+
def delete(self, path: str) -> None:
|
|
569
|
+
"""Delete a file on the server."""
|
|
570
|
+
# Not implemented - would need a separate API endpoint
|
|
571
|
+
raise NotImplementedError("DashBackend.delete() not implemented")
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""Local file system storage backend for ML-Logger."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Optional, List
|
|
5
|
+
import os
|
|
6
|
+
|
|
7
|
+
from .base import StorageBackend
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class LocalBackend(StorageBackend):
|
|
11
|
+
"""Local file system storage backend.
|
|
12
|
+
|
|
13
|
+
Stores all data in the local file system.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
root_dir: Root directory for storage (default: ".ml-logger")
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(self, root_dir: str = ".ml-logger"):
|
|
20
|
+
"""Initialize local backend.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
root_dir: Root directory for storage
|
|
24
|
+
"""
|
|
25
|
+
self.root_dir = Path(root_dir).resolve()
|
|
26
|
+
self.root_dir.mkdir(parents=True, exist_ok=True)
|
|
27
|
+
|
|
28
|
+
def _resolve_path(self, path: str) -> Path:
|
|
29
|
+
"""Resolve a path relative to root_dir.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
path: Relative path
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
Absolute Path object
|
|
36
|
+
"""
|
|
37
|
+
return self.root_dir / path
|
|
38
|
+
|
|
39
|
+
def exists(self, path: str) -> bool:
|
|
40
|
+
"""Check if a file or directory exists."""
|
|
41
|
+
return self._resolve_path(path).exists()
|
|
42
|
+
|
|
43
|
+
def write_bytes(self, path: str, data: bytes) -> None:
|
|
44
|
+
"""Write binary data to a file."""
|
|
45
|
+
full_path = self._resolve_path(path)
|
|
46
|
+
full_path.parent.mkdir(parents=True, exist_ok=True)
|
|
47
|
+
full_path.write_bytes(data)
|
|
48
|
+
|
|
49
|
+
def read_bytes(self, path: str) -> bytes:
|
|
50
|
+
"""Read binary data from a file."""
|
|
51
|
+
return self._resolve_path(path).read_bytes()
|
|
52
|
+
|
|
53
|
+
def write_text(self, path: str, text: str) -> None:
|
|
54
|
+
"""Write text to a file."""
|
|
55
|
+
full_path = self._resolve_path(path)
|
|
56
|
+
full_path.parent.mkdir(parents=True, exist_ok=True)
|
|
57
|
+
full_path.write_text(text, encoding="utf-8")
|
|
58
|
+
|
|
59
|
+
def read_text(self, path: str) -> str:
|
|
60
|
+
"""Read text from a file."""
|
|
61
|
+
return self._resolve_path(path).read_text(encoding="utf-8")
|
|
62
|
+
|
|
63
|
+
def append_text(self, path: str, text: str) -> None:
|
|
64
|
+
"""Append text to a file."""
|
|
65
|
+
full_path = self._resolve_path(path)
|
|
66
|
+
full_path.parent.mkdir(parents=True, exist_ok=True)
|
|
67
|
+
with open(full_path, "a", encoding="utf-8") as f:
|
|
68
|
+
f.write(text)
|
|
69
|
+
|
|
70
|
+
def list_dir(self, path: str = "") -> List[str]:
|
|
71
|
+
"""List contents of a directory."""
|
|
72
|
+
full_path = self._resolve_path(path) if path else self.root_dir
|
|
73
|
+
if not full_path.exists():
|
|
74
|
+
return []
|
|
75
|
+
return [item.name for item in full_path.iterdir()]
|
|
76
|
+
|
|
77
|
+
def get_url(self, path: str) -> Optional[str]:
|
|
78
|
+
"""Get a file:// URL for accessing a file."""
|
|
79
|
+
full_path = self._resolve_path(path)
|
|
80
|
+
return f"file://{full_path}" if full_path.exists() else None
|
|
81
|
+
|
|
82
|
+
def makedirs(self, path: str, exist_ok: bool = True) -> None:
|
|
83
|
+
"""Create directories recursively."""
|
|
84
|
+
self._resolve_path(path).mkdir(parents=True, exist_ok=exist_ok)
|
|
85
|
+
|
|
86
|
+
def delete(self, path: str) -> None:
|
|
87
|
+
"""Delete a file."""
|
|
88
|
+
full_path = self._resolve_path(path)
|
|
89
|
+
if full_path.exists():
|
|
90
|
+
full_path.unlink()
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Logger components for managing different data types."""
|
|
2
|
+
|
|
3
|
+
from .parameters import ParameterManager
|
|
4
|
+
from .metrics import MetricsLogger
|
|
5
|
+
from .files import FileManager
|
|
6
|
+
from .logs import LogManager
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"ParameterManager",
|
|
10
|
+
"MetricsLogger",
|
|
11
|
+
"FileManager",
|
|
12
|
+
"LogManager",
|
|
13
|
+
]
|