ml-dash 0.0.18__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ml_dash/__init__.py +58 -1
- ml_dash/client.py +562 -0
- ml_dash/experiment.py +916 -0
- ml_dash/files.py +313 -0
- ml_dash/log.py +181 -0
- ml_dash/metric.py +186 -0
- ml_dash/params.py +188 -0
- ml_dash/storage.py +922 -0
- ml_dash-0.2.1.dist-info/METADATA +237 -0
- ml_dash-0.2.1.dist-info/RECORD +12 -0
- ml_dash-0.2.1.dist-info/WHEEL +4 -0
- app-build/asset-manifest.json +0 -15
- app-build/favicon.ico +0 -0
- app-build/github-markdown.css +0 -957
- app-build/index.html +0 -1
- app-build/manifest.json +0 -15
- app-build/monaco-editor-worker-loader-proxy.js +0 -6
- app-build/precache-manifest.ffc09f8a591c529a1bd5c6f21f49815f.js +0 -26
- app-build/service-worker.js +0 -34
- ml_dash/app.py +0 -60
- ml_dash/config.py +0 -16
- ml_dash/file_events.py +0 -71
- ml_dash/file_handlers.py +0 -141
- ml_dash/file_utils.py +0 -5
- ml_dash/file_watcher.py +0 -30
- ml_dash/main.py +0 -60
- ml_dash/mime_types.py +0 -20
- ml_dash/schema/__init__.py +0 -110
- ml_dash/schema/archive.py +0 -165
- ml_dash/schema/directories.py +0 -59
- ml_dash/schema/experiments.py +0 -65
- ml_dash/schema/files/__init__.py +0 -204
- ml_dash/schema/files/file_helpers.py +0 -79
- ml_dash/schema/files/images.py +0 -27
- ml_dash/schema/files/metrics.py +0 -64
- ml_dash/schema/files/parameters.py +0 -50
- ml_dash/schema/files/series.py +0 -235
- ml_dash/schema/files/videos.py +0 -27
- ml_dash/schema/helpers.py +0 -66
- ml_dash/schema/projects.py +0 -65
- ml_dash/schema/schema_helpers.py +0 -19
- ml_dash/schema/users.py +0 -33
- ml_dash/sse.py +0 -18
- ml_dash-0.0.18.dist-info/METADATA +0 -67
- ml_dash-0.0.18.dist-info/RECORD +0 -38
- ml_dash-0.0.18.dist-info/WHEEL +0 -5
- ml_dash-0.0.18.dist-info/top_level.txt +0 -2
- /ml_dash/{example.py → py.typed} +0 -0
ml_dash/storage.py
ADDED
|
@@ -0,0 +1,922 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Local filesystem storage for ML-Dash.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import Optional, Dict, Any, List
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
import json
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class LocalStorage:
|
|
12
|
+
"""
|
|
13
|
+
Local filesystem storage backend.
|
|
14
|
+
|
|
15
|
+
Directory structure:
|
|
16
|
+
<root>/
|
|
17
|
+
<project>/
|
|
18
|
+
<experiment_name>/
|
|
19
|
+
experiment.json # Experiment metadata
|
|
20
|
+
logs/
|
|
21
|
+
logs.jsonl # Log entries
|
|
22
|
+
.log_sequence # Sequence counter
|
|
23
|
+
metrics/
|
|
24
|
+
<metric_name>.jsonl
|
|
25
|
+
files/
|
|
26
|
+
<uploaded_files>
|
|
27
|
+
parameters.json # Flattened parameters
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(self, root_path: Path):
|
|
31
|
+
"""
|
|
32
|
+
Initialize local storage.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
root_path: Root directory for local storage
|
|
36
|
+
"""
|
|
37
|
+
self.root_path = Path(root_path)
|
|
38
|
+
self.root_path.mkdir(parents=True, exist_ok=True)
|
|
39
|
+
|
|
40
|
+
def create_experiment(
|
|
41
|
+
self,
|
|
42
|
+
project: str,
|
|
43
|
+
name: str,
|
|
44
|
+
description: Optional[str] = None,
|
|
45
|
+
tags: Optional[List[str]] = None,
|
|
46
|
+
folder: Optional[str] = None,
|
|
47
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
48
|
+
) -> Path:
|
|
49
|
+
"""
|
|
50
|
+
Create a experiment directory structure.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
project: Project name
|
|
54
|
+
name: Experiment name
|
|
55
|
+
description: Optional description
|
|
56
|
+
tags: Optional tags
|
|
57
|
+
folder: Optional folder path (used for organization)
|
|
58
|
+
metadata: Optional metadata
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
Path to experiment directory
|
|
62
|
+
"""
|
|
63
|
+
# Create project directory
|
|
64
|
+
project_dir = self.root_path / project
|
|
65
|
+
project_dir.mkdir(parents=True, exist_ok=True)
|
|
66
|
+
|
|
67
|
+
# Create experiment directory
|
|
68
|
+
experiment_dir = project_dir / name
|
|
69
|
+
experiment_dir.mkdir(parents=True, exist_ok=True)
|
|
70
|
+
|
|
71
|
+
# Create subdirectories
|
|
72
|
+
(experiment_dir / "logs").mkdir(exist_ok=True)
|
|
73
|
+
(experiment_dir / "metrics").mkdir(exist_ok=True)
|
|
74
|
+
(experiment_dir / "files").mkdir(exist_ok=True)
|
|
75
|
+
|
|
76
|
+
# Write experiment metadata
|
|
77
|
+
experiment_metadata = {
|
|
78
|
+
"name": name,
|
|
79
|
+
"project": project,
|
|
80
|
+
"description": description,
|
|
81
|
+
"tags": tags or [],
|
|
82
|
+
"folder": folder,
|
|
83
|
+
"metadata": metadata,
|
|
84
|
+
"created_at": datetime.utcnow().isoformat() + "Z",
|
|
85
|
+
"write_protected": False,
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
experiment_file = experiment_dir / "experiment.json"
|
|
89
|
+
if not experiment_file.exists():
|
|
90
|
+
# Only create if doesn't exist (don't overwrite)
|
|
91
|
+
with open(experiment_file, "w") as f:
|
|
92
|
+
json.dump(experiment_metadata, f, indent=2)
|
|
93
|
+
else:
|
|
94
|
+
# Update existing experiment
|
|
95
|
+
with open(experiment_file, "r") as f:
|
|
96
|
+
existing = json.load(f)
|
|
97
|
+
# Merge updates
|
|
98
|
+
if description is not None:
|
|
99
|
+
existing["description"] = description
|
|
100
|
+
if tags is not None:
|
|
101
|
+
existing["tags"] = tags
|
|
102
|
+
if folder is not None:
|
|
103
|
+
existing["folder"] = folder
|
|
104
|
+
if metadata is not None:
|
|
105
|
+
existing["metadata"] = metadata
|
|
106
|
+
existing["updated_at"] = datetime.utcnow().isoformat() + "Z"
|
|
107
|
+
with open(experiment_file, "w") as f:
|
|
108
|
+
json.dump(existing, f, indent=2)
|
|
109
|
+
|
|
110
|
+
return experiment_dir
|
|
111
|
+
|
|
112
|
+
def flush(self):
|
|
113
|
+
"""Flush any pending writes (no-op for now)."""
|
|
114
|
+
pass
|
|
115
|
+
|
|
116
|
+
def write_log(
|
|
117
|
+
self,
|
|
118
|
+
project: str,
|
|
119
|
+
experiment: str,
|
|
120
|
+
message: str,
|
|
121
|
+
level: str,
|
|
122
|
+
timestamp: str,
|
|
123
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
124
|
+
):
|
|
125
|
+
"""
|
|
126
|
+
Write a single log entry immediately to JSONL file.
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
project: Project name
|
|
130
|
+
experiment: Experiment name
|
|
131
|
+
message: Log message
|
|
132
|
+
level: Log level
|
|
133
|
+
timestamp: ISO timestamp string
|
|
134
|
+
metadata: Optional metadata
|
|
135
|
+
"""
|
|
136
|
+
experiment_dir = self.root_path / project / experiment
|
|
137
|
+
logs_dir = experiment_dir / "logs"
|
|
138
|
+
logs_file = logs_dir / "logs.jsonl"
|
|
139
|
+
seq_file = logs_dir / ".log_sequence"
|
|
140
|
+
|
|
141
|
+
# Read and increment sequence counter
|
|
142
|
+
sequence_number = 0
|
|
143
|
+
if seq_file.exists():
|
|
144
|
+
try:
|
|
145
|
+
sequence_number = int(seq_file.read_text().strip())
|
|
146
|
+
except (ValueError, IOError):
|
|
147
|
+
sequence_number = 0
|
|
148
|
+
|
|
149
|
+
log_entry = {
|
|
150
|
+
"sequenceNumber": sequence_number,
|
|
151
|
+
"timestamp": timestamp,
|
|
152
|
+
"level": level,
|
|
153
|
+
"message": message,
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
if metadata:
|
|
157
|
+
log_entry["metadata"] = metadata
|
|
158
|
+
|
|
159
|
+
# Write log immediately
|
|
160
|
+
with open(logs_file, "a") as f:
|
|
161
|
+
f.write(json.dumps(log_entry) + "\n")
|
|
162
|
+
|
|
163
|
+
# Update sequence counter
|
|
164
|
+
seq_file.write_text(str(sequence_number + 1))
|
|
165
|
+
|
|
166
|
+
def write_metric_data(
|
|
167
|
+
self,
|
|
168
|
+
project: str,
|
|
169
|
+
experiment: str,
|
|
170
|
+
metric_name: str,
|
|
171
|
+
data: Any,
|
|
172
|
+
):
|
|
173
|
+
"""
|
|
174
|
+
Write metric data point.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
project: Project name
|
|
178
|
+
experiment: Experiment name
|
|
179
|
+
metric_name: Metric name
|
|
180
|
+
data: Data point
|
|
181
|
+
"""
|
|
182
|
+
experiment_dir = self.root_path / project / experiment
|
|
183
|
+
metric_file = experiment_dir / "metrics" / f"{metric_name}.jsonl"
|
|
184
|
+
|
|
185
|
+
data_point = {
|
|
186
|
+
"timestamp": datetime.utcnow().isoformat() + "Z",
|
|
187
|
+
"data": data,
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
with open(metric_file, "a") as f:
|
|
191
|
+
f.write(json.dumps(data_point) + "\n")
|
|
192
|
+
|
|
193
|
+
def write_parameters(
|
|
194
|
+
self,
|
|
195
|
+
project: str,
|
|
196
|
+
experiment: str,
|
|
197
|
+
data: Dict[str, Any],
|
|
198
|
+
):
|
|
199
|
+
"""
|
|
200
|
+
Write/merge parameters. Always merges with existing parameters.
|
|
201
|
+
|
|
202
|
+
File format:
|
|
203
|
+
{
|
|
204
|
+
"version": 2,
|
|
205
|
+
"data": {"model.lr": 0.001, "model.batch_size": 32},
|
|
206
|
+
"updatedAt": "2024-01-15T10:30:00Z"
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
project: Project name
|
|
211
|
+
experiment: Experiment name
|
|
212
|
+
data: Flattened parameter dict with dot notation (already flattened)
|
|
213
|
+
"""
|
|
214
|
+
experiment_dir = self.root_path / project / experiment
|
|
215
|
+
params_file = experiment_dir / "parameters.json"
|
|
216
|
+
|
|
217
|
+
# Read existing if present
|
|
218
|
+
if params_file.exists():
|
|
219
|
+
with open(params_file, "r") as f:
|
|
220
|
+
existing_doc = json.load(f)
|
|
221
|
+
|
|
222
|
+
# Merge with existing data
|
|
223
|
+
existing_data = existing_doc.get("data", {})
|
|
224
|
+
existing_data.update(data)
|
|
225
|
+
|
|
226
|
+
# Increment version
|
|
227
|
+
version = existing_doc.get("version", 1) + 1
|
|
228
|
+
|
|
229
|
+
params_doc = {
|
|
230
|
+
"version": version,
|
|
231
|
+
"data": existing_data,
|
|
232
|
+
"updatedAt": datetime.utcnow().isoformat() + "Z"
|
|
233
|
+
}
|
|
234
|
+
else:
|
|
235
|
+
# Create new parameters document
|
|
236
|
+
params_doc = {
|
|
237
|
+
"version": 1,
|
|
238
|
+
"data": data,
|
|
239
|
+
"createdAt": datetime.utcnow().isoformat() + "Z",
|
|
240
|
+
"updatedAt": datetime.utcnow().isoformat() + "Z"
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
with open(params_file, "w") as f:
|
|
244
|
+
json.dump(params_doc, f, indent=2)
|
|
245
|
+
|
|
246
|
+
def read_parameters(
|
|
247
|
+
self,
|
|
248
|
+
project: str,
|
|
249
|
+
experiment: str,
|
|
250
|
+
) -> Optional[Dict[str, Any]]:
|
|
251
|
+
"""
|
|
252
|
+
Read parameters from local file.
|
|
253
|
+
|
|
254
|
+
Args:
|
|
255
|
+
project: Project name
|
|
256
|
+
experiment: Experiment name
|
|
257
|
+
|
|
258
|
+
Returns:
|
|
259
|
+
Flattened parameter dict, or None if file doesn't exist
|
|
260
|
+
"""
|
|
261
|
+
experiment_dir = self.root_path / project / experiment
|
|
262
|
+
params_file = experiment_dir / "parameters.json"
|
|
263
|
+
|
|
264
|
+
if not params_file.exists():
|
|
265
|
+
return None
|
|
266
|
+
|
|
267
|
+
try:
|
|
268
|
+
with open(params_file, "r") as f:
|
|
269
|
+
params_doc = json.load(f)
|
|
270
|
+
return params_doc.get("data", {})
|
|
271
|
+
except (json.JSONDecodeError, IOError):
|
|
272
|
+
return None
|
|
273
|
+
|
|
274
|
+
def write_file(
|
|
275
|
+
self,
|
|
276
|
+
project: str,
|
|
277
|
+
experiment: str,
|
|
278
|
+
file_path: str,
|
|
279
|
+
prefix: str,
|
|
280
|
+
filename: str,
|
|
281
|
+
description: Optional[str],
|
|
282
|
+
tags: Optional[List[str]],
|
|
283
|
+
metadata: Optional[Dict[str, Any]],
|
|
284
|
+
checksum: str,
|
|
285
|
+
content_type: str,
|
|
286
|
+
size_bytes: int
|
|
287
|
+
) -> Dict[str, Any]:
|
|
288
|
+
"""
|
|
289
|
+
Write file to local storage.
|
|
290
|
+
|
|
291
|
+
Copies file to: files/<file_id>/<filename>
|
|
292
|
+
Updates .files_metadata.json with file metadata
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
project: Project name
|
|
296
|
+
experiment: Experiment name
|
|
297
|
+
file_path: Source file path
|
|
298
|
+
prefix: Logical path prefix
|
|
299
|
+
filename: Original filename
|
|
300
|
+
description: Optional description
|
|
301
|
+
tags: Optional tags
|
|
302
|
+
metadata: Optional metadata
|
|
303
|
+
checksum: SHA256 checksum
|
|
304
|
+
content_type: MIME type
|
|
305
|
+
size_bytes: File size in bytes
|
|
306
|
+
|
|
307
|
+
Returns:
|
|
308
|
+
File metadata dict
|
|
309
|
+
"""
|
|
310
|
+
import shutil
|
|
311
|
+
from .files import generate_snowflake_id
|
|
312
|
+
|
|
313
|
+
experiment_dir = self.root_path / project / experiment
|
|
314
|
+
files_dir = experiment_dir / "files"
|
|
315
|
+
metadata_file = files_dir / ".files_metadata.json"
|
|
316
|
+
|
|
317
|
+
# Generate Snowflake ID for file
|
|
318
|
+
file_id = generate_snowflake_id()
|
|
319
|
+
|
|
320
|
+
# Create file directory
|
|
321
|
+
file_dir = files_dir / file_id
|
|
322
|
+
file_dir.mkdir(parents=True, exist_ok=True)
|
|
323
|
+
|
|
324
|
+
# Copy file
|
|
325
|
+
dest_file = file_dir / filename
|
|
326
|
+
shutil.copy2(file_path, dest_file)
|
|
327
|
+
|
|
328
|
+
# Create file metadata
|
|
329
|
+
file_metadata = {
|
|
330
|
+
"id": file_id,
|
|
331
|
+
"experimentId": f"{project}/{experiment}", # Local mode doesn't have real experiment ID
|
|
332
|
+
"path": prefix,
|
|
333
|
+
"filename": filename,
|
|
334
|
+
"description": description,
|
|
335
|
+
"tags": tags or [],
|
|
336
|
+
"contentType": content_type,
|
|
337
|
+
"sizeBytes": size_bytes,
|
|
338
|
+
"checksum": checksum,
|
|
339
|
+
"metadata": metadata,
|
|
340
|
+
"uploadedAt": datetime.utcnow().isoformat() + "Z",
|
|
341
|
+
"updatedAt": datetime.utcnow().isoformat() + "Z",
|
|
342
|
+
"deletedAt": None
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
# Read existing metadata
|
|
346
|
+
files_metadata = {"files": []}
|
|
347
|
+
if metadata_file.exists():
|
|
348
|
+
try:
|
|
349
|
+
with open(metadata_file, "r") as f:
|
|
350
|
+
files_metadata = json.load(f)
|
|
351
|
+
except (json.JSONDecodeError, IOError):
|
|
352
|
+
files_metadata = {"files": []}
|
|
353
|
+
|
|
354
|
+
# Check if file with same prefix+filename exists (overwrite behavior)
|
|
355
|
+
existing_index = None
|
|
356
|
+
for i, existing_file in enumerate(files_metadata["files"]):
|
|
357
|
+
if (existing_file["path"] == prefix and
|
|
358
|
+
existing_file["filename"] == filename and
|
|
359
|
+
existing_file["deletedAt"] is None):
|
|
360
|
+
existing_index = i
|
|
361
|
+
break
|
|
362
|
+
|
|
363
|
+
if existing_index is not None:
|
|
364
|
+
# Overwrite: remove old file and update metadata
|
|
365
|
+
old_file = files_metadata["files"][existing_index]
|
|
366
|
+
old_file_dir = files_dir / old_file["id"]
|
|
367
|
+
if old_file_dir.exists():
|
|
368
|
+
shutil.rmtree(old_file_dir)
|
|
369
|
+
files_metadata["files"][existing_index] = file_metadata
|
|
370
|
+
else:
|
|
371
|
+
# New file: append to list
|
|
372
|
+
files_metadata["files"].append(file_metadata)
|
|
373
|
+
|
|
374
|
+
# Write updated metadata
|
|
375
|
+
with open(metadata_file, "w") as f:
|
|
376
|
+
json.dump(files_metadata, f, indent=2)
|
|
377
|
+
|
|
378
|
+
return file_metadata
|
|
379
|
+
|
|
380
|
+
def list_files(
|
|
381
|
+
self,
|
|
382
|
+
project: str,
|
|
383
|
+
experiment: str,
|
|
384
|
+
prefix: Optional[str] = None,
|
|
385
|
+
tags: Optional[List[str]] = None
|
|
386
|
+
) -> List[Dict[str, Any]]:
|
|
387
|
+
"""
|
|
388
|
+
List files from local storage.
|
|
389
|
+
|
|
390
|
+
Args:
|
|
391
|
+
project: Project name
|
|
392
|
+
experiment: Experiment name
|
|
393
|
+
prefix: Optional prefix filter
|
|
394
|
+
tags: Optional tags filter
|
|
395
|
+
|
|
396
|
+
Returns:
|
|
397
|
+
List of file metadata dicts (only non-deleted files)
|
|
398
|
+
"""
|
|
399
|
+
experiment_dir = self.root_path / project / experiment
|
|
400
|
+
metadata_file = experiment_dir / "files" / ".files_metadata.json"
|
|
401
|
+
|
|
402
|
+
if not metadata_file.exists():
|
|
403
|
+
return []
|
|
404
|
+
|
|
405
|
+
try:
|
|
406
|
+
with open(metadata_file, "r") as f:
|
|
407
|
+
files_metadata = json.load(f)
|
|
408
|
+
except (json.JSONDecodeError, IOError):
|
|
409
|
+
return []
|
|
410
|
+
|
|
411
|
+
files = files_metadata.get("files", [])
|
|
412
|
+
|
|
413
|
+
# Filter out deleted files
|
|
414
|
+
files = [f for f in files if f.get("deletedAt") is None]
|
|
415
|
+
|
|
416
|
+
# Apply prefix filter
|
|
417
|
+
if prefix:
|
|
418
|
+
files = [f for f in files if f["path"].startswith(prefix)]
|
|
419
|
+
|
|
420
|
+
# Apply tags filter
|
|
421
|
+
if tags:
|
|
422
|
+
files = [f for f in files if any(tag in f.get("tags", []) for tag in tags)]
|
|
423
|
+
|
|
424
|
+
return files
|
|
425
|
+
|
|
426
|
+
def read_file(
|
|
427
|
+
self,
|
|
428
|
+
project: str,
|
|
429
|
+
experiment: str,
|
|
430
|
+
file_id: str,
|
|
431
|
+
dest_path: Optional[str] = None
|
|
432
|
+
) -> str:
|
|
433
|
+
"""
|
|
434
|
+
Read/copy file from local storage.
|
|
435
|
+
|
|
436
|
+
Args:
|
|
437
|
+
project: Project name
|
|
438
|
+
experiment: Experiment name
|
|
439
|
+
file_id: File ID
|
|
440
|
+
dest_path: Optional destination path (defaults to original filename)
|
|
441
|
+
|
|
442
|
+
Returns:
|
|
443
|
+
Path to copied file
|
|
444
|
+
|
|
445
|
+
Raises:
|
|
446
|
+
FileNotFoundError: If file not found
|
|
447
|
+
ValueError: If checksum verification fails
|
|
448
|
+
"""
|
|
449
|
+
import shutil
|
|
450
|
+
from .files import verify_checksum
|
|
451
|
+
|
|
452
|
+
experiment_dir = self.root_path / project / experiment
|
|
453
|
+
files_dir = experiment_dir / "files"
|
|
454
|
+
metadata_file = files_dir / ".files_metadata.json"
|
|
455
|
+
|
|
456
|
+
if not metadata_file.exists():
|
|
457
|
+
raise FileNotFoundError(f"File {file_id} not found")
|
|
458
|
+
|
|
459
|
+
# Find file metadata
|
|
460
|
+
with open(metadata_file, "r") as f:
|
|
461
|
+
files_metadata = json.load(f)
|
|
462
|
+
|
|
463
|
+
file_metadata = None
|
|
464
|
+
for f in files_metadata.get("files", []):
|
|
465
|
+
if f["id"] == file_id and f.get("deletedAt") is None:
|
|
466
|
+
file_metadata = f
|
|
467
|
+
break
|
|
468
|
+
|
|
469
|
+
if not file_metadata:
|
|
470
|
+
raise FileNotFoundError(f"File {file_id} not found")
|
|
471
|
+
|
|
472
|
+
# Get source file
|
|
473
|
+
source_file = files_dir / file_id / file_metadata["filename"]
|
|
474
|
+
if not source_file.exists():
|
|
475
|
+
raise FileNotFoundError(f"File {file_id} not found on disk")
|
|
476
|
+
|
|
477
|
+
# Determine destination
|
|
478
|
+
if dest_path is None:
|
|
479
|
+
dest_path = file_metadata["filename"]
|
|
480
|
+
|
|
481
|
+
# Copy file
|
|
482
|
+
shutil.copy2(source_file, dest_path)
|
|
483
|
+
|
|
484
|
+
# Verify checksum
|
|
485
|
+
expected_checksum = file_metadata["checksum"]
|
|
486
|
+
if not verify_checksum(dest_path, expected_checksum):
|
|
487
|
+
import os
|
|
488
|
+
os.remove(dest_path)
|
|
489
|
+
raise ValueError(f"Checksum verification failed for file {file_id}")
|
|
490
|
+
|
|
491
|
+
return dest_path
|
|
492
|
+
|
|
493
|
+
def delete_file(
|
|
494
|
+
self,
|
|
495
|
+
project: str,
|
|
496
|
+
experiment: str,
|
|
497
|
+
file_id: str
|
|
498
|
+
) -> Dict[str, Any]:
|
|
499
|
+
"""
|
|
500
|
+
Delete file from local storage (soft delete in metadata).
|
|
501
|
+
|
|
502
|
+
Args:
|
|
503
|
+
project: Project name
|
|
504
|
+
experiment: Experiment name
|
|
505
|
+
file_id: File ID
|
|
506
|
+
|
|
507
|
+
Returns:
|
|
508
|
+
Dict with id and deletedAt
|
|
509
|
+
|
|
510
|
+
Raises:
|
|
511
|
+
FileNotFoundError: If file not found
|
|
512
|
+
"""
|
|
513
|
+
experiment_dir = self.root_path / project / experiment
|
|
514
|
+
metadata_file = experiment_dir / "files" / ".files_metadata.json"
|
|
515
|
+
|
|
516
|
+
if not metadata_file.exists():
|
|
517
|
+
raise FileNotFoundError(f"File {file_id} not found")
|
|
518
|
+
|
|
519
|
+
# Read metadata
|
|
520
|
+
with open(metadata_file, "r") as f:
|
|
521
|
+
files_metadata = json.load(f)
|
|
522
|
+
|
|
523
|
+
# Find and soft delete file
|
|
524
|
+
file_found = False
|
|
525
|
+
for file_meta in files_metadata.get("files", []):
|
|
526
|
+
if file_meta["id"] == file_id:
|
|
527
|
+
if file_meta.get("deletedAt") is not None:
|
|
528
|
+
raise FileNotFoundError(f"File {file_id} already deleted")
|
|
529
|
+
file_meta["deletedAt"] = datetime.utcnow().isoformat() + "Z"
|
|
530
|
+
file_meta["updatedAt"] = file_meta["deletedAt"]
|
|
531
|
+
file_found = True
|
|
532
|
+
break
|
|
533
|
+
|
|
534
|
+
if not file_found:
|
|
535
|
+
raise FileNotFoundError(f"File {file_id} not found")
|
|
536
|
+
|
|
537
|
+
# Write updated metadata
|
|
538
|
+
with open(metadata_file, "w") as f:
|
|
539
|
+
json.dump(files_metadata, f, indent=2)
|
|
540
|
+
|
|
541
|
+
return {
|
|
542
|
+
"id": file_id,
|
|
543
|
+
"deletedAt": file_meta["deletedAt"]
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
def update_file_metadata(
|
|
547
|
+
self,
|
|
548
|
+
project: str,
|
|
549
|
+
experiment: str,
|
|
550
|
+
file_id: str,
|
|
551
|
+
description: Optional[str] = None,
|
|
552
|
+
tags: Optional[List[str]] = None,
|
|
553
|
+
metadata: Optional[Dict[str, Any]] = None
|
|
554
|
+
) -> Dict[str, Any]:
|
|
555
|
+
"""
|
|
556
|
+
Update file metadata in local storage.
|
|
557
|
+
|
|
558
|
+
Args:
|
|
559
|
+
project: Project name
|
|
560
|
+
experiment: Experiment name
|
|
561
|
+
file_id: File ID
|
|
562
|
+
description: Optional description
|
|
563
|
+
tags: Optional tags
|
|
564
|
+
metadata: Optional metadata
|
|
565
|
+
|
|
566
|
+
Returns:
|
|
567
|
+
Updated file metadata dict
|
|
568
|
+
|
|
569
|
+
Raises:
|
|
570
|
+
FileNotFoundError: If file not found
|
|
571
|
+
"""
|
|
572
|
+
experiment_dir = self.root_path / project / experiment
|
|
573
|
+
metadata_file = experiment_dir / "files" / ".files_metadata.json"
|
|
574
|
+
|
|
575
|
+
if not metadata_file.exists():
|
|
576
|
+
raise FileNotFoundError(f"File {file_id} not found")
|
|
577
|
+
|
|
578
|
+
# Read metadata
|
|
579
|
+
with open(metadata_file, "r") as f:
|
|
580
|
+
files_metadata = json.load(f)
|
|
581
|
+
|
|
582
|
+
# Find and update file
|
|
583
|
+
file_found = False
|
|
584
|
+
updated_file = None
|
|
585
|
+
for file_meta in files_metadata.get("files", []):
|
|
586
|
+
if file_meta["id"] == file_id:
|
|
587
|
+
if file_meta.get("deletedAt") is not None:
|
|
588
|
+
raise FileNotFoundError(f"File {file_id} has been deleted")
|
|
589
|
+
|
|
590
|
+
# Update fields
|
|
591
|
+
if description is not None:
|
|
592
|
+
file_meta["description"] = description
|
|
593
|
+
if tags is not None:
|
|
594
|
+
file_meta["tags"] = tags
|
|
595
|
+
if metadata is not None:
|
|
596
|
+
file_meta["metadata"] = metadata
|
|
597
|
+
|
|
598
|
+
file_meta["updatedAt"] = datetime.utcnow().isoformat() + "Z"
|
|
599
|
+
file_found = True
|
|
600
|
+
updated_file = file_meta
|
|
601
|
+
break
|
|
602
|
+
|
|
603
|
+
if not file_found:
|
|
604
|
+
raise FileNotFoundError(f"File {file_id} not found")
|
|
605
|
+
|
|
606
|
+
# Write updated metadata
|
|
607
|
+
with open(metadata_file, "w") as f:
|
|
608
|
+
json.dump(files_metadata, f, indent=2)
|
|
609
|
+
|
|
610
|
+
return updated_file
|
|
611
|
+
|
|
612
|
+
def _get_experiment_dir(self, project: str, experiment: str) -> Path:
|
|
613
|
+
"""Get experiment directory path."""
|
|
614
|
+
return self.root_path / project / experiment
|
|
615
|
+
|
|
616
|
+
def append_to_metric(
|
|
617
|
+
self,
|
|
618
|
+
project: str,
|
|
619
|
+
experiment: str,
|
|
620
|
+
metric_name: str,
|
|
621
|
+
data: Dict[str, Any],
|
|
622
|
+
description: Optional[str] = None,
|
|
623
|
+
tags: Optional[List[str]] = None,
|
|
624
|
+
metadata: Optional[Dict[str, Any]] = None
|
|
625
|
+
) -> Dict[str, Any]:
|
|
626
|
+
"""
|
|
627
|
+
Append a single data point to a metric in local storage.
|
|
628
|
+
|
|
629
|
+
Storage format:
|
|
630
|
+
.ml-dash/{project}/{experiment}/metrics/{metric_name}/
|
|
631
|
+
data.jsonl # Data points (one JSON object per line)
|
|
632
|
+
metadata.json # Metric metadata (name, description, tags, stats)
|
|
633
|
+
|
|
634
|
+
Args:
|
|
635
|
+
project: Project name
|
|
636
|
+
experiment: Experiment name
|
|
637
|
+
metric_name: Metric name
|
|
638
|
+
data: Data point (flexible schema)
|
|
639
|
+
description: Optional metric description
|
|
640
|
+
tags: Optional tags
|
|
641
|
+
metadata: Optional metric metadata
|
|
642
|
+
|
|
643
|
+
Returns:
|
|
644
|
+
Dict with metricId, index, bufferedDataPoints, chunkSize
|
|
645
|
+
"""
|
|
646
|
+
experiment_dir = self._get_experiment_dir(project, experiment)
|
|
647
|
+
metrics_dir = experiment_dir / "metrics"
|
|
648
|
+
metrics_dir.mkdir(parents=True, exist_ok=True)
|
|
649
|
+
|
|
650
|
+
metric_dir = metrics_dir / metric_name
|
|
651
|
+
metric_dir.mkdir(exist_ok=True)
|
|
652
|
+
|
|
653
|
+
data_file = metric_dir / "data.jsonl"
|
|
654
|
+
metadata_file = metric_dir / "metadata.json"
|
|
655
|
+
|
|
656
|
+
# Load or initialize metadata
|
|
657
|
+
if metadata_file.exists():
|
|
658
|
+
with open(metadata_file, "r") as f:
|
|
659
|
+
metric_meta = json.load(f)
|
|
660
|
+
else:
|
|
661
|
+
metric_meta = {
|
|
662
|
+
"metricId": f"local-metric-{metric_name}",
|
|
663
|
+
"name": metric_name,
|
|
664
|
+
"description": description,
|
|
665
|
+
"tags": tags or [],
|
|
666
|
+
"metadata": metadata,
|
|
667
|
+
"totalDataPoints": 0,
|
|
668
|
+
"nextIndex": 0,
|
|
669
|
+
"createdAt": datetime.utcnow().isoformat() + "Z"
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
# Get next index
|
|
673
|
+
index = metric_meta["nextIndex"]
|
|
674
|
+
|
|
675
|
+
# Append data point to JSONL file
|
|
676
|
+
data_entry = {
|
|
677
|
+
"index": index,
|
|
678
|
+
"data": data,
|
|
679
|
+
"createdAt": datetime.utcnow().isoformat() + "Z"
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
with open(data_file, "a") as f:
|
|
683
|
+
f.write(json.dumps(data_entry) + "\n")
|
|
684
|
+
|
|
685
|
+
# Update metadata
|
|
686
|
+
metric_meta["nextIndex"] = index + 1
|
|
687
|
+
metric_meta["totalDataPoints"] = metric_meta["totalDataPoints"] + 1
|
|
688
|
+
metric_meta["updatedAt"] = datetime.utcnow().isoformat() + "Z"
|
|
689
|
+
|
|
690
|
+
with open(metadata_file, "w") as f:
|
|
691
|
+
json.dump(metric_meta, f, indent=2)
|
|
692
|
+
|
|
693
|
+
return {
|
|
694
|
+
"metricId": metric_meta["metricId"],
|
|
695
|
+
"index": str(index),
|
|
696
|
+
"bufferedDataPoints": str(metric_meta["totalDataPoints"]),
|
|
697
|
+
"chunkSize": 10000 # Default chunk size for local mode
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
def append_batch_to_metric(
|
|
701
|
+
self,
|
|
702
|
+
project: str,
|
|
703
|
+
experiment: str,
|
|
704
|
+
metric_name: str,
|
|
705
|
+
data_points: List[Dict[str, Any]],
|
|
706
|
+
description: Optional[str] = None,
|
|
707
|
+
tags: Optional[List[str]] = None,
|
|
708
|
+
metadata: Optional[Dict[str, Any]] = None
|
|
709
|
+
) -> Dict[str, Any]:
|
|
710
|
+
"""
|
|
711
|
+
Append multiple data points to a metric in local storage (batch).
|
|
712
|
+
|
|
713
|
+
Args:
|
|
714
|
+
project: Project name
|
|
715
|
+
experiment: Experiment name
|
|
716
|
+
metric_name: Metric name
|
|
717
|
+
data_points: List of data points
|
|
718
|
+
description: Optional metric description
|
|
719
|
+
tags: Optional tags
|
|
720
|
+
metadata: Optional metric metadata
|
|
721
|
+
|
|
722
|
+
Returns:
|
|
723
|
+
Dict with metricId, startIndex, endIndex, count
|
|
724
|
+
"""
|
|
725
|
+
experiment_dir = self._get_experiment_dir(project, experiment)
|
|
726
|
+
metrics_dir = experiment_dir / "metrics"
|
|
727
|
+
metrics_dir.mkdir(parents=True, exist_ok=True)
|
|
728
|
+
|
|
729
|
+
metric_dir = metrics_dir / metric_name
|
|
730
|
+
metric_dir.mkdir(exist_ok=True)
|
|
731
|
+
|
|
732
|
+
data_file = metric_dir / "data.jsonl"
|
|
733
|
+
metadata_file = metric_dir / "metadata.json"
|
|
734
|
+
|
|
735
|
+
# Load or initialize metadata
|
|
736
|
+
if metadata_file.exists():
|
|
737
|
+
with open(metadata_file, "r") as f:
|
|
738
|
+
metric_meta = json.load(f)
|
|
739
|
+
else:
|
|
740
|
+
metric_meta = {
|
|
741
|
+
"metricId": f"local-metric-{metric_name}",
|
|
742
|
+
"name": metric_name,
|
|
743
|
+
"description": description,
|
|
744
|
+
"tags": tags or [],
|
|
745
|
+
"metadata": metadata,
|
|
746
|
+
"totalDataPoints": 0,
|
|
747
|
+
"nextIndex": 0,
|
|
748
|
+
"createdAt": datetime.utcnow().isoformat() + "Z"
|
|
749
|
+
}
|
|
750
|
+
|
|
751
|
+
start_index = metric_meta["nextIndex"]
|
|
752
|
+
end_index = start_index + len(data_points) - 1
|
|
753
|
+
|
|
754
|
+
# Append data points to JSONL file
|
|
755
|
+
with open(data_file, "a") as f:
|
|
756
|
+
for i, data in enumerate(data_points):
|
|
757
|
+
data_entry = {
|
|
758
|
+
"index": start_index + i,
|
|
759
|
+
"data": data,
|
|
760
|
+
"createdAt": datetime.utcnow().isoformat() + "Z"
|
|
761
|
+
}
|
|
762
|
+
f.write(json.dumps(data_entry) + "\n")
|
|
763
|
+
|
|
764
|
+
# Update metadata
|
|
765
|
+
metric_meta["nextIndex"] = end_index + 1
|
|
766
|
+
metric_meta["totalDataPoints"] = metric_meta["totalDataPoints"] + len(data_points)
|
|
767
|
+
metric_meta["updatedAt"] = datetime.utcnow().isoformat() + "Z"
|
|
768
|
+
|
|
769
|
+
with open(metadata_file, "w") as f:
|
|
770
|
+
json.dump(metric_meta, f, indent=2)
|
|
771
|
+
|
|
772
|
+
return {
|
|
773
|
+
"metricId": metric_meta["metricId"],
|
|
774
|
+
"startIndex": str(start_index),
|
|
775
|
+
"endIndex": str(end_index),
|
|
776
|
+
"count": len(data_points),
|
|
777
|
+
"bufferedDataPoints": str(metric_meta["totalDataPoints"]),
|
|
778
|
+
"chunkSize": 10000
|
|
779
|
+
}
|
|
780
|
+
|
|
781
|
+
def read_metric_data(
|
|
782
|
+
self,
|
|
783
|
+
project: str,
|
|
784
|
+
experiment: str,
|
|
785
|
+
metric_name: str,
|
|
786
|
+
start_index: int = 0,
|
|
787
|
+
limit: int = 1000
|
|
788
|
+
) -> Dict[str, Any]:
|
|
789
|
+
"""
|
|
790
|
+
Read data points from a metric in local storage.
|
|
791
|
+
|
|
792
|
+
Args:
|
|
793
|
+
project: Project name
|
|
794
|
+
experiment: Experiment name
|
|
795
|
+
metric_name: Metric name
|
|
796
|
+
start_index: Starting index
|
|
797
|
+
limit: Max points to read
|
|
798
|
+
|
|
799
|
+
Returns:
|
|
800
|
+
Dict with data, startIndex, endIndex, total, hasMore
|
|
801
|
+
"""
|
|
802
|
+
experiment_dir = self._get_experiment_dir(project, experiment)
|
|
803
|
+
metric_dir = experiment_dir / "metrics" / metric_name
|
|
804
|
+
data_file = metric_dir / "data.jsonl"
|
|
805
|
+
|
|
806
|
+
if not data_file.exists():
|
|
807
|
+
return {
|
|
808
|
+
"data": [],
|
|
809
|
+
"startIndex": start_index,
|
|
810
|
+
"endIndex": start_index - 1,
|
|
811
|
+
"total": 0,
|
|
812
|
+
"hasMore": False
|
|
813
|
+
}
|
|
814
|
+
|
|
815
|
+
# Read all data points from JSONL file
|
|
816
|
+
data_points = []
|
|
817
|
+
with open(data_file, "r") as f:
|
|
818
|
+
for line in f:
|
|
819
|
+
if line.strip():
|
|
820
|
+
entry = json.loads(line)
|
|
821
|
+
# Filter by index range
|
|
822
|
+
if start_index <= entry["index"] < start_index + limit:
|
|
823
|
+
data_points.append(entry)
|
|
824
|
+
|
|
825
|
+
# Get total count
|
|
826
|
+
metadata_file = metric_dir / "metadata.json"
|
|
827
|
+
total_count = 0
|
|
828
|
+
if metadata_file.exists():
|
|
829
|
+
with open(metadata_file, "r") as f:
|
|
830
|
+
metric_meta = json.load(f)
|
|
831
|
+
total_count = metric_meta["totalDataPoints"]
|
|
832
|
+
|
|
833
|
+
return {
|
|
834
|
+
"data": data_points,
|
|
835
|
+
"startIndex": start_index,
|
|
836
|
+
"endIndex": start_index + len(data_points) - 1 if data_points else start_index - 1,
|
|
837
|
+
"total": len(data_points),
|
|
838
|
+
"hasMore": start_index + len(data_points) < total_count
|
|
839
|
+
}
|
|
840
|
+
|
|
841
|
+
def get_metric_stats(
|
|
842
|
+
self,
|
|
843
|
+
project: str,
|
|
844
|
+
experiment: str,
|
|
845
|
+
metric_name: str
|
|
846
|
+
) -> Dict[str, Any]:
|
|
847
|
+
"""
|
|
848
|
+
Get metric statistics from local storage.
|
|
849
|
+
|
|
850
|
+
Args:
|
|
851
|
+
project: Project name
|
|
852
|
+
experiment: Experiment name
|
|
853
|
+
metric_name: Metric name
|
|
854
|
+
|
|
855
|
+
Returns:
|
|
856
|
+
Dict with metric stats
|
|
857
|
+
"""
|
|
858
|
+
experiment_dir = self._get_experiment_dir(project, experiment)
|
|
859
|
+
metric_dir = experiment_dir / "metrics" / metric_name
|
|
860
|
+
metadata_file = metric_dir / "metadata.json"
|
|
861
|
+
|
|
862
|
+
if not metadata_file.exists():
|
|
863
|
+
raise FileNotFoundError(f"Metric {metric_name} not found")
|
|
864
|
+
|
|
865
|
+
with open(metadata_file, "r") as f:
|
|
866
|
+
metric_meta = json.load(f)
|
|
867
|
+
|
|
868
|
+
return {
|
|
869
|
+
"metricId": metric_meta["metricId"],
|
|
870
|
+
"name": metric_meta["name"],
|
|
871
|
+
"description": metric_meta.get("description"),
|
|
872
|
+
"tags": metric_meta.get("tags", []),
|
|
873
|
+
"metadata": metric_meta.get("metadata"),
|
|
874
|
+
"totalDataPoints": str(metric_meta["totalDataPoints"]),
|
|
875
|
+
"bufferedDataPoints": str(metric_meta["totalDataPoints"]), # All buffered in local mode
|
|
876
|
+
"chunkedDataPoints": "0", # No chunking in local mode
|
|
877
|
+
"totalChunks": 0,
|
|
878
|
+
"chunkSize": 10000,
|
|
879
|
+
"firstDataAt": metric_meta.get("createdAt"),
|
|
880
|
+
"lastDataAt": metric_meta.get("updatedAt"),
|
|
881
|
+
"createdAt": metric_meta.get("createdAt"),
|
|
882
|
+
"updatedAt": metric_meta.get("updatedAt", metric_meta.get("createdAt"))
|
|
883
|
+
}
|
|
884
|
+
|
|
885
|
+
def list_metrics(
|
|
886
|
+
self,
|
|
887
|
+
project: str,
|
|
888
|
+
experiment: str
|
|
889
|
+
) -> List[Dict[str, Any]]:
|
|
890
|
+
"""
|
|
891
|
+
List all metrics in an experiment from local storage.
|
|
892
|
+
|
|
893
|
+
Args:
|
|
894
|
+
project: Project name
|
|
895
|
+
experiment: Experiment name
|
|
896
|
+
|
|
897
|
+
Returns:
|
|
898
|
+
List of metric summaries
|
|
899
|
+
"""
|
|
900
|
+
experiment_dir = self._get_experiment_dir(project, experiment)
|
|
901
|
+
metrics_dir = experiment_dir / "metrics"
|
|
902
|
+
|
|
903
|
+
if not metrics_dir.exists():
|
|
904
|
+
return []
|
|
905
|
+
|
|
906
|
+
metrics = []
|
|
907
|
+
for metric_dir in metrics_dir.iterdir():
|
|
908
|
+
if metric_dir.is_dir():
|
|
909
|
+
metadata_file = metric_dir / "metadata.json"
|
|
910
|
+
if metadata_file.exists():
|
|
911
|
+
with open(metadata_file, "r") as f:
|
|
912
|
+
metric_meta = json.load(f)
|
|
913
|
+
metrics.append({
|
|
914
|
+
"metricId": metric_meta["metricId"],
|
|
915
|
+
"name": metric_meta["name"],
|
|
916
|
+
"description": metric_meta.get("description"),
|
|
917
|
+
"tags": metric_meta.get("tags", []),
|
|
918
|
+
"totalDataPoints": str(metric_meta["totalDataPoints"]),
|
|
919
|
+
"createdAt": metric_meta.get("createdAt")
|
|
920
|
+
})
|
|
921
|
+
|
|
922
|
+
return metrics
|