ml-dash 0.0.11__py3-none-any.whl → 0.5.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. ml_dash/__init__.py +59 -1
  2. ml_dash/auto_start.py +42 -0
  3. ml_dash/cli.py +67 -0
  4. ml_dash/cli_commands/__init__.py +1 -0
  5. ml_dash/cli_commands/download.py +797 -0
  6. ml_dash/cli_commands/list.py +343 -0
  7. ml_dash/cli_commands/upload.py +1298 -0
  8. ml_dash/client.py +955 -0
  9. ml_dash/config.py +114 -11
  10. ml_dash/experiment.py +1020 -0
  11. ml_dash/files.py +688 -0
  12. ml_dash/log.py +181 -0
  13. ml_dash/metric.py +292 -0
  14. ml_dash/params.py +188 -0
  15. ml_dash/storage.py +1115 -0
  16. ml_dash-0.5.9.dist-info/METADATA +244 -0
  17. ml_dash-0.5.9.dist-info/RECORD +20 -0
  18. ml_dash-0.5.9.dist-info/WHEEL +4 -0
  19. ml_dash-0.5.9.dist-info/entry_points.txt +3 -0
  20. ml_dash/app.py +0 -33
  21. ml_dash/file_events.py +0 -71
  22. ml_dash/file_handlers.py +0 -141
  23. ml_dash/file_utils.py +0 -5
  24. ml_dash/file_watcher.py +0 -30
  25. ml_dash/main.py +0 -60
  26. ml_dash/mime_types.py +0 -20
  27. ml_dash/schema/__init__.py +0 -110
  28. ml_dash/schema/archive.py +0 -165
  29. ml_dash/schema/directories.py +0 -59
  30. ml_dash/schema/experiments.py +0 -65
  31. ml_dash/schema/files/__init__.py +0 -204
  32. ml_dash/schema/files/file_helpers.py +0 -79
  33. ml_dash/schema/files/images.py +0 -27
  34. ml_dash/schema/files/metrics.py +0 -64
  35. ml_dash/schema/files/parameters.py +0 -50
  36. ml_dash/schema/files/series.py +0 -235
  37. ml_dash/schema/files/videos.py +0 -27
  38. ml_dash/schema/helpers.py +0 -66
  39. ml_dash/schema/projects.py +0 -65
  40. ml_dash/schema/schema_helpers.py +0 -19
  41. ml_dash/schema/users.py +0 -33
  42. ml_dash/sse.py +0 -18
  43. ml_dash-0.0.11.dist-info/METADATA +0 -67
  44. ml_dash-0.0.11.dist-info/RECORD +0 -30
  45. ml_dash-0.0.11.dist-info/WHEEL +0 -5
  46. ml_dash-0.0.11.dist-info/top_level.txt +0 -1
  47. /ml_dash/{example.py → py.typed} +0 -0
ml_dash/storage.py ADDED
@@ -0,0 +1,1115 @@
1
+ """
2
+ Local filesystem storage for ML-Dash.
3
+ """
4
+
5
+ from typing import Optional, Dict, Any, List
6
+ from pathlib import Path
7
+ import json
8
+ from datetime import datetime
9
+ import threading
10
+ import time
11
+ import fcntl
12
+ import sys
13
+ from contextlib import contextmanager
14
+
15
+
16
+ class LocalStorage:
17
+ """
18
+ Local filesystem storage backend.
19
+
20
+ Directory structure:
21
+ <root>/
22
+ <project>/
23
+ <experiment_name>/
24
+ experiment.json # Experiment metadata
25
+ logs/
26
+ logs.jsonl # Log entries
27
+ .log_sequence # Sequence counter
28
+ metrics/
29
+ <metric_name>.jsonl
30
+ files/
31
+ <uploaded_files>
32
+ parameters.json # Flattened parameters
33
+ """
34
+
35
+ def __init__(self, root_path: Path):
36
+ """
37
+ Initialize local storage.
38
+
39
+ Args:
40
+ root_path: Root directory for local storage
41
+ """
42
+ self.root_path = Path(root_path)
43
+ self.root_path.mkdir(parents=True, exist_ok=True)
44
+
45
+ @contextmanager
46
+ def _file_lock(self, lock_file: Path):
47
+ """
48
+ Context manager for file-based locking (works across processes and threads).
49
+
50
+ Args:
51
+ lock_file: Path to the lock file
52
+
53
+ Yields:
54
+ File handle with exclusive lock
55
+ """
56
+ lock_file.parent.mkdir(parents=True, exist_ok=True)
57
+ lock_fd = None
58
+
59
+ try:
60
+ # Open lock file
61
+ lock_fd = open(lock_file, 'a')
62
+
63
+ # Acquire exclusive lock (blocking)
64
+ # Use fcntl on Unix-like systems
65
+ if hasattr(fcntl, 'flock'):
66
+ fcntl.flock(lock_fd.fileno(), fcntl.LOCK_EX)
67
+ elif hasattr(fcntl, 'lockf'):
68
+ fcntl.lockf(lock_fd.fileno(), fcntl.LOCK_EX)
69
+ else:
70
+ # Fallback for systems without fcntl (like Windows)
71
+ # Use simple file existence as lock (not perfect but better than nothing)
72
+ pass
73
+
74
+ yield lock_fd
75
+
76
+ finally:
77
+ # Release lock and close file
78
+ if lock_fd:
79
+ try:
80
+ if hasattr(fcntl, 'flock'):
81
+ fcntl.flock(lock_fd.fileno(), fcntl.LOCK_UN)
82
+ elif hasattr(fcntl, 'lockf'):
83
+ fcntl.lockf(lock_fd.fileno(), fcntl.LOCK_UN)
84
+ except Exception:
85
+ pass
86
+ lock_fd.close()
87
+
88
+ def create_experiment(
89
+ self,
90
+ project: str,
91
+ name: str,
92
+ description: Optional[str] = None,
93
+ tags: Optional[List[str]] = None,
94
+ bindrs: Optional[List[str]] = None,
95
+ folder: Optional[str] = None,
96
+ metadata: Optional[Dict[str, Any]] = None,
97
+ ) -> Path:
98
+ """
99
+ Create a experiment directory structure.
100
+
101
+ Args:
102
+ project: Project name
103
+ name: Experiment name
104
+ description: Optional description
105
+ tags: Optional tags
106
+ bindrs: Optional bindrs
107
+ folder: Optional folder path (used for organization)
108
+ metadata: Optional metadata
109
+
110
+ Returns:
111
+ Path to experiment directory
112
+ """
113
+ # Determine base path - include folder in hierarchy if specified
114
+ if folder is not None:
115
+ # Strip leading / to make it relative, then use as base path
116
+ folder_path = folder.lstrip('/')
117
+ base_path = self.root_path / folder_path
118
+ else:
119
+ base_path = self.root_path
120
+
121
+ # Create project directory
122
+ project_dir = base_path / project
123
+ project_dir.mkdir(parents=True, exist_ok=True)
124
+
125
+ # Create experiment directory
126
+ experiment_dir = project_dir / name
127
+ experiment_dir.mkdir(parents=True, exist_ok=True)
128
+
129
+ # Create subdirectories
130
+ (experiment_dir / "logs").mkdir(exist_ok=True)
131
+ (experiment_dir / "metrics").mkdir(exist_ok=True)
132
+ (experiment_dir / "files").mkdir(exist_ok=True)
133
+
134
+ # Write experiment metadata
135
+ experiment_metadata = {
136
+ "name": name,
137
+ "project": project,
138
+ "description": description,
139
+ "tags": tags or [],
140
+ "bindrs": bindrs or [],
141
+ "folder": folder,
142
+ "metadata": metadata,
143
+ "created_at": datetime.utcnow().isoformat() + "Z",
144
+ "write_protected": False,
145
+ }
146
+
147
+ experiment_file = experiment_dir / "experiment.json"
148
+
149
+ # File-based lock for concurrent experiment creation/update
150
+ lock_file = experiment_dir / ".experiment.lock"
151
+ with self._file_lock(lock_file):
152
+ if not experiment_file.exists():
153
+ # Only create if doesn't exist (don't overwrite)
154
+ with open(experiment_file, "w") as f:
155
+ json.dump(experiment_metadata, f, indent=2)
156
+ else:
157
+ # Update existing experiment
158
+ try:
159
+ with open(experiment_file, "r") as f:
160
+ existing = json.load(f)
161
+ except (json.JSONDecodeError, IOError):
162
+ # File might be corrupted or empty, recreate it
163
+ with open(experiment_file, "w") as f:
164
+ json.dump(experiment_metadata, f, indent=2)
165
+ return experiment_dir
166
+
167
+ # Merge updates
168
+ if description is not None:
169
+ existing["description"] = description
170
+ if tags is not None:
171
+ existing["tags"] = tags
172
+ if bindrs is not None:
173
+ existing["bindrs"] = bindrs
174
+ if folder is not None:
175
+ existing["folder"] = folder
176
+ if metadata is not None:
177
+ existing["metadata"] = metadata
178
+ existing["updated_at"] = datetime.utcnow().isoformat() + "Z"
179
+ with open(experiment_file, "w") as f:
180
+ json.dump(existing, f, indent=2)
181
+
182
+ return experiment_dir
183
+
184
+ def flush(self):
185
+ """Flush any pending writes (no-op for now)."""
186
+ pass
187
+
188
+ def write_log(
189
+ self,
190
+ project: str,
191
+ experiment: str,
192
+ message: str,
193
+ level: str,
194
+ timestamp: str,
195
+ metadata: Optional[Dict[str, Any]] = None,
196
+ ):
197
+ """
198
+ Write a single log entry immediately to JSONL file.
199
+
200
+ Args:
201
+ project: Project name
202
+ experiment: Experiment name
203
+ message: Log message
204
+ level: Log level
205
+ timestamp: ISO timestamp string
206
+ metadata: Optional metadata
207
+ """
208
+ experiment_dir = self._get_experiment_dir(project, experiment)
209
+ logs_dir = experiment_dir / "logs"
210
+ logs_file = logs_dir / "logs.jsonl"
211
+ seq_file = logs_dir / ".log_sequence"
212
+
213
+ # File-based lock for concurrent log writes (prevents sequence collision)
214
+ lock_file = logs_dir / ".log_sequence.lock"
215
+ with self._file_lock(lock_file):
216
+ # Read and increment sequence counter
217
+ sequence_number = 0
218
+ if seq_file.exists():
219
+ try:
220
+ sequence_number = int(seq_file.read_text().strip())
221
+ except (ValueError, IOError):
222
+ sequence_number = 0
223
+
224
+ log_entry = {
225
+ "sequenceNumber": sequence_number,
226
+ "timestamp": timestamp,
227
+ "level": level,
228
+ "message": message,
229
+ }
230
+
231
+ if metadata:
232
+ log_entry["metadata"] = metadata
233
+
234
+ # Write log immediately
235
+ with open(logs_file, "a") as f:
236
+ f.write(json.dumps(log_entry) + "\n")
237
+
238
+ # Update sequence counter
239
+ seq_file.write_text(str(sequence_number + 1))
240
+
241
+ def write_metric_data(
242
+ self,
243
+ project: str,
244
+ experiment: str,
245
+ metric_name: str,
246
+ data: Any,
247
+ ):
248
+ """
249
+ Write metric data point.
250
+
251
+ Args:
252
+ project: Project name
253
+ experiment: Experiment name
254
+ metric_name: Metric name
255
+ data: Data point
256
+ """
257
+ experiment_dir = self._get_experiment_dir(project, experiment)
258
+ metric_file = experiment_dir / "metrics" / f"{metric_name}.jsonl"
259
+
260
+ data_point = {
261
+ "timestamp": datetime.utcnow().isoformat() + "Z",
262
+ "data": data,
263
+ }
264
+
265
+ with open(metric_file, "a") as f:
266
+ f.write(json.dumps(data_point) + "\n")
267
+
268
+ def write_parameters(
269
+ self,
270
+ project: str,
271
+ experiment: str,
272
+ data: Dict[str, Any],
273
+ ):
274
+ """
275
+ Write/merge parameters. Always merges with existing parameters.
276
+
277
+ File format:
278
+ {
279
+ "version": 2,
280
+ "data": {"model.lr": 0.001, "model.batch_size": 32},
281
+ "updatedAt": "2024-01-15T10:30:00Z"
282
+ }
283
+
284
+ Args:
285
+ project: Project name
286
+ experiment: Experiment name
287
+ data: Flattened parameter dict with dot notation (already flattened)
288
+ """
289
+ experiment_dir = self._get_experiment_dir(project, experiment)
290
+ params_file = experiment_dir / "parameters.json"
291
+
292
+ # File-based lock for concurrent parameter writes (prevents data loss and version conflicts)
293
+ lock_file = experiment_dir / ".parameters.lock"
294
+ with self._file_lock(lock_file):
295
+ # Read existing if present
296
+ if params_file.exists():
297
+ try:
298
+ with open(params_file, "r") as f:
299
+ existing_doc = json.load(f)
300
+ except (json.JSONDecodeError, IOError):
301
+ # Corrupted file, recreate
302
+ existing_doc = None
303
+
304
+ if existing_doc:
305
+ # Merge with existing data
306
+ existing_data = existing_doc.get("data", {})
307
+ existing_data.update(data)
308
+
309
+ # Increment version
310
+ version = existing_doc.get("version", 1) + 1
311
+
312
+ params_doc = {
313
+ "version": version,
314
+ "data": existing_data,
315
+ "updatedAt": datetime.utcnow().isoformat() + "Z"
316
+ }
317
+ else:
318
+ # Create new if corrupted
319
+ params_doc = {
320
+ "version": 1,
321
+ "data": data,
322
+ "createdAt": datetime.utcnow().isoformat() + "Z",
323
+ "updatedAt": datetime.utcnow().isoformat() + "Z"
324
+ }
325
+ else:
326
+ # Create new parameters document
327
+ params_doc = {
328
+ "version": 1,
329
+ "data": data,
330
+ "createdAt": datetime.utcnow().isoformat() + "Z",
331
+ "updatedAt": datetime.utcnow().isoformat() + "Z"
332
+ }
333
+
334
+ with open(params_file, "w") as f:
335
+ json.dump(params_doc, f, indent=2)
336
+
337
+ def read_parameters(
338
+ self,
339
+ project: str,
340
+ experiment: str,
341
+ ) -> Optional[Dict[str, Any]]:
342
+ """
343
+ Read parameters from local file.
344
+
345
+ Args:
346
+ project: Project name
347
+ experiment: Experiment name
348
+
349
+ Returns:
350
+ Flattened parameter dict, or None if file doesn't exist
351
+ """
352
+ experiment_dir = self._get_experiment_dir(project, experiment)
353
+ params_file = experiment_dir / "parameters.json"
354
+
355
+ if not params_file.exists():
356
+ return None
357
+
358
+ try:
359
+ with open(params_file, "r") as f:
360
+ params_doc = json.load(f)
361
+ return params_doc.get("data", {})
362
+ except (json.JSONDecodeError, IOError):
363
+ return None
364
+
365
+ def write_file(
366
+ self,
367
+ project: str,
368
+ experiment: str,
369
+ file_path: str,
370
+ prefix: str,
371
+ filename: str,
372
+ description: Optional[str],
373
+ tags: Optional[List[str]],
374
+ metadata: Optional[Dict[str, Any]],
375
+ checksum: str,
376
+ content_type: str,
377
+ size_bytes: int
378
+ ) -> Dict[str, Any]:
379
+ """
380
+ Write file to local storage.
381
+
382
+ Copies file to: files/<prefix>/<file_id>/<filename>
383
+ Updates .files_metadata.json with file metadata
384
+
385
+ Args:
386
+ project: Project name
387
+ experiment: Experiment name
388
+ file_path: Source file path
389
+ prefix: Logical path prefix
390
+ filename: Original filename
391
+ description: Optional description
392
+ tags: Optional tags
393
+ metadata: Optional metadata
394
+ checksum: SHA256 checksum
395
+ content_type: MIME type
396
+ size_bytes: File size in bytes
397
+
398
+ Returns:
399
+ File metadata dict
400
+ """
401
+ import shutil
402
+ from .files import generate_snowflake_id
403
+
404
+ experiment_dir = self._get_experiment_dir(project, experiment)
405
+ files_dir = experiment_dir / "files"
406
+ metadata_file = files_dir / ".files_metadata.json"
407
+
408
+ # Generate Snowflake ID for file
409
+ file_id = generate_snowflake_id()
410
+
411
+ # Normalize prefix (remove leading slashes to avoid absolute paths)
412
+ normalized_prefix = prefix.lstrip("/") if prefix else ""
413
+
414
+ # Create prefix directory, then file directory
415
+ prefix_dir = files_dir / normalized_prefix if normalized_prefix else files_dir
416
+ prefix_dir.mkdir(parents=True, exist_ok=True)
417
+
418
+ file_dir = prefix_dir / file_id
419
+ file_dir.mkdir(parents=True, exist_ok=True)
420
+
421
+ # Copy file
422
+ dest_file = file_dir / filename
423
+ shutil.copy2(file_path, dest_file)
424
+
425
+ # Create file metadata
426
+ file_metadata = {
427
+ "id": file_id,
428
+ "experimentId": f"{project}/{experiment}", # Local mode doesn't have real experiment ID
429
+ "path": prefix,
430
+ "filename": filename,
431
+ "description": description,
432
+ "tags": tags or [],
433
+ "contentType": content_type,
434
+ "sizeBytes": size_bytes,
435
+ "checksum": checksum,
436
+ "metadata": metadata,
437
+ "uploadedAt": datetime.utcnow().isoformat() + "Z",
438
+ "updatedAt": datetime.utcnow().isoformat() + "Z",
439
+ "deletedAt": None
440
+ }
441
+
442
+ # File-based lock for concurrent safety (works across processes/threads/instances)
443
+ lock_file = files_dir / ".files_metadata.lock"
444
+ with self._file_lock(lock_file):
445
+ # Read existing metadata
446
+ files_metadata = {"files": []}
447
+ if metadata_file.exists():
448
+ try:
449
+ with open(metadata_file, "r") as f:
450
+ files_metadata = json.load(f)
451
+ except (json.JSONDecodeError, IOError):
452
+ files_metadata = {"files": []}
453
+
454
+ # Check if file with same prefix+filename exists (overwrite behavior)
455
+ existing_index = None
456
+ for i, existing_file in enumerate(files_metadata["files"]):
457
+ if (existing_file["path"] == prefix and
458
+ existing_file["filename"] == filename and
459
+ existing_file["deletedAt"] is None):
460
+ existing_index = i
461
+ break
462
+
463
+ if existing_index is not None:
464
+ # Overwrite: remove old file and update metadata
465
+ old_file = files_metadata["files"][existing_index]
466
+ old_prefix = old_file["path"].lstrip("/") if old_file["path"] else ""
467
+ if old_prefix:
468
+ old_file_dir = files_dir / old_prefix / old_file["id"]
469
+ else:
470
+ old_file_dir = files_dir / old_file["id"]
471
+ if old_file_dir.exists():
472
+ shutil.rmtree(old_file_dir)
473
+ files_metadata["files"][existing_index] = file_metadata
474
+ else:
475
+ # New file: append to list
476
+ files_metadata["files"].append(file_metadata)
477
+
478
+ # Write updated metadata
479
+ with open(metadata_file, "w") as f:
480
+ json.dump(files_metadata, f, indent=2)
481
+
482
+ return file_metadata
483
+
484
+ def list_files(
485
+ self,
486
+ project: str,
487
+ experiment: str,
488
+ prefix: Optional[str] = None,
489
+ tags: Optional[List[str]] = None
490
+ ) -> List[Dict[str, Any]]:
491
+ """
492
+ List files from local storage.
493
+
494
+ Args:
495
+ project: Project name
496
+ experiment: Experiment name
497
+ prefix: Optional prefix filter
498
+ tags: Optional tags filter
499
+
500
+ Returns:
501
+ List of file metadata dicts (only non-deleted files)
502
+ """
503
+ experiment_dir = self._get_experiment_dir(project, experiment)
504
+ metadata_file = experiment_dir / "files" / ".files_metadata.json"
505
+
506
+ if not metadata_file.exists():
507
+ return []
508
+
509
+ try:
510
+ with open(metadata_file, "r") as f:
511
+ files_metadata = json.load(f)
512
+ except (json.JSONDecodeError, IOError):
513
+ return []
514
+
515
+ files = files_metadata.get("files", [])
516
+
517
+ # Filter out deleted files
518
+ files = [f for f in files if f.get("deletedAt") is None]
519
+
520
+ # Apply prefix filter
521
+ if prefix:
522
+ files = [f for f in files if f["path"].startswith(prefix)]
523
+
524
+ # Apply tags filter
525
+ if tags:
526
+ files = [f for f in files if any(tag in f.get("tags", []) for tag in tags)]
527
+
528
+ return files
529
+
530
+ def read_file(
531
+ self,
532
+ project: str,
533
+ experiment: str,
534
+ file_id: str,
535
+ dest_path: Optional[str] = None
536
+ ) -> str:
537
+ """
538
+ Read/copy file from local storage.
539
+
540
+ Args:
541
+ project: Project name
542
+ experiment: Experiment name
543
+ file_id: File ID
544
+ dest_path: Optional destination path (defaults to original filename)
545
+
546
+ Returns:
547
+ Path to copied file
548
+
549
+ Raises:
550
+ FileNotFoundError: If file not found
551
+ ValueError: If checksum verification fails
552
+ """
553
+ import shutil
554
+ from .files import verify_checksum
555
+
556
+ experiment_dir = self._get_experiment_dir(project, experiment)
557
+ files_dir = experiment_dir / "files"
558
+ metadata_file = files_dir / ".files_metadata.json"
559
+
560
+ if not metadata_file.exists():
561
+ raise FileNotFoundError(f"File {file_id} not found")
562
+
563
+ # Find file metadata
564
+ with open(metadata_file, "r") as f:
565
+ files_metadata = json.load(f)
566
+
567
+ file_metadata = None
568
+ for f in files_metadata.get("files", []):
569
+ if f["id"] == file_id and f.get("deletedAt") is None:
570
+ file_metadata = f
571
+ break
572
+
573
+ if not file_metadata:
574
+ raise FileNotFoundError(f"File {file_id} not found")
575
+
576
+ # Get source file
577
+ file_prefix = file_metadata["path"].lstrip("/") if file_metadata["path"] else ""
578
+ if file_prefix:
579
+ source_file = files_dir / file_prefix / file_id / file_metadata["filename"]
580
+ else:
581
+ source_file = files_dir / file_id / file_metadata["filename"]
582
+ if not source_file.exists():
583
+ raise FileNotFoundError(f"File {file_id} not found on disk")
584
+
585
+ # Determine destination
586
+ if dest_path is None:
587
+ dest_path = file_metadata["filename"]
588
+
589
+ # Copy file
590
+ shutil.copy2(source_file, dest_path)
591
+
592
+ # Verify checksum
593
+ expected_checksum = file_metadata["checksum"]
594
+ if not verify_checksum(dest_path, expected_checksum):
595
+ import os
596
+ os.remove(dest_path)
597
+ raise ValueError(f"Checksum verification failed for file {file_id}")
598
+
599
+ return dest_path
600
+
601
+ def delete_file(
602
+ self,
603
+ project: str,
604
+ experiment: str,
605
+ file_id: str
606
+ ) -> Dict[str, Any]:
607
+ """
608
+ Delete file from local storage (soft delete in metadata).
609
+
610
+ Args:
611
+ project: Project name
612
+ experiment: Experiment name
613
+ file_id: File ID
614
+
615
+ Returns:
616
+ Dict with id and deletedAt
617
+
618
+ Raises:
619
+ FileNotFoundError: If file not found
620
+ """
621
+ experiment_dir = self._get_experiment_dir(project, experiment)
622
+ metadata_file = experiment_dir / "files" / ".files_metadata.json"
623
+
624
+ if not metadata_file.exists():
625
+ raise FileNotFoundError(f"File {file_id} not found")
626
+
627
+ # File-based lock for concurrent safety (works across processes/threads/instances)
628
+ lock_file = files_dir / ".files_metadata.lock"
629
+ with self._file_lock(lock_file):
630
+ # Read metadata
631
+ with open(metadata_file, "r") as f:
632
+ files_metadata = json.load(f)
633
+
634
+ # Find and soft delete file
635
+ file_found = False
636
+ for file_meta in files_metadata.get("files", []):
637
+ if file_meta["id"] == file_id:
638
+ if file_meta.get("deletedAt") is not None:
639
+ raise FileNotFoundError(f"File {file_id} already deleted")
640
+ file_meta["deletedAt"] = datetime.utcnow().isoformat() + "Z"
641
+ file_meta["updatedAt"] = file_meta["deletedAt"]
642
+ file_found = True
643
+ break
644
+
645
+ if not file_found:
646
+ raise FileNotFoundError(f"File {file_id} not found")
647
+
648
+ # Write updated metadata
649
+ with open(metadata_file, "w") as f:
650
+ json.dump(files_metadata, f, indent=2)
651
+
652
+ return {
653
+ "id": file_id,
654
+ "deletedAt": file_meta["deletedAt"]
655
+ }
656
+
657
+ def update_file_metadata(
658
+ self,
659
+ project: str,
660
+ experiment: str,
661
+ file_id: str,
662
+ description: Optional[str] = None,
663
+ tags: Optional[List[str]] = None,
664
+ metadata: Optional[Dict[str, Any]] = None
665
+ ) -> Dict[str, Any]:
666
+ """
667
+ Update file metadata in local storage.
668
+
669
+ Args:
670
+ project: Project name
671
+ experiment: Experiment name
672
+ file_id: File ID
673
+ description: Optional description
674
+ tags: Optional tags
675
+ metadata: Optional metadata
676
+
677
+ Returns:
678
+ Updated file metadata dict
679
+
680
+ Raises:
681
+ FileNotFoundError: If file not found
682
+ """
683
+ experiment_dir = self._get_experiment_dir(project, experiment)
684
+ metadata_file = experiment_dir / "files" / ".files_metadata.json"
685
+
686
+ if not metadata_file.exists():
687
+ raise FileNotFoundError(f"File {file_id} not found")
688
+
689
+ # File-based lock for concurrent safety (works across processes/threads/instances)
690
+ lock_file = files_dir / ".files_metadata.lock"
691
+ with self._file_lock(lock_file):
692
+ # Read metadata
693
+ with open(metadata_file, "r") as f:
694
+ files_metadata = json.load(f)
695
+
696
+ # Find and update file
697
+ file_found = False
698
+ updated_file = None
699
+ for file_meta in files_metadata.get("files", []):
700
+ if file_meta["id"] == file_id:
701
+ if file_meta.get("deletedAt") is not None:
702
+ raise FileNotFoundError(f"File {file_id} has been deleted")
703
+
704
+ # Update fields
705
+ if description is not None:
706
+ file_meta["description"] = description
707
+ if tags is not None:
708
+ file_meta["tags"] = tags
709
+ if metadata is not None:
710
+ file_meta["metadata"] = metadata
711
+
712
+ file_meta["updatedAt"] = datetime.utcnow().isoformat() + "Z"
713
+ file_found = True
714
+ updated_file = file_meta
715
+ break
716
+
717
+ if not file_found:
718
+ raise FileNotFoundError(f"File {file_id} not found")
719
+
720
+ # Write updated metadata
721
+ with open(metadata_file, "w") as f:
722
+ json.dump(files_metadata, f, indent=2)
723
+
724
+ return updated_file
725
+
726
+ def _get_experiment_dir(self, project: str, experiment: str, folder: Optional[str] = None) -> Path:
727
+ """
728
+ Get experiment directory path.
729
+
730
+ If folder is not provided, tries to read it from experiment.json metadata.
731
+ Falls back to root_path/project/experiment if not found.
732
+ """
733
+ # If folder explicitly provided, use it
734
+ if folder is not None:
735
+ folder_path = folder.lstrip('/')
736
+ return self.root_path / folder_path / project / experiment
737
+
738
+ # Try to read folder from experiment metadata
739
+ # Check common locations where experiment might exist
740
+ possible_paths = []
741
+
742
+ # First, try without folder (most common case)
743
+ default_path = self.root_path / project / experiment
744
+ possible_paths.append(default_path)
745
+
746
+ # Then scan for experiment.json in subdirectories (for folder-based experiments)
747
+ try:
748
+ for item in self.root_path.rglob(f"*/{project}/{experiment}/experiment.json"):
749
+ exp_dir = item.parent
750
+ if exp_dir not in [p for p in possible_paths]:
751
+ possible_paths.insert(0, exp_dir) # Prioritize found paths
752
+ except:
753
+ pass
754
+
755
+ # Check each possible path for experiment.json with folder metadata
756
+ for path in possible_paths:
757
+ exp_json = path / "experiment.json"
758
+ if exp_json.exists():
759
+ try:
760
+ with open(exp_json, 'r') as f:
761
+ metadata = json.load(f)
762
+ if metadata.get('folder'):
763
+ folder_path = metadata['folder'].lstrip('/')
764
+ return self.root_path / folder_path / project / experiment
765
+ except:
766
+ pass
767
+ # Found experiment.json, use this path even if no folder metadata
768
+ return path
769
+
770
+ # Fallback to default path
771
+ return default_path
772
+
773
+ def append_to_metric(
774
+ self,
775
+ project: str,
776
+ experiment: str,
777
+ metric_name: Optional[str],
778
+ data: Dict[str, Any],
779
+ description: Optional[str] = None,
780
+ tags: Optional[List[str]] = None,
781
+ metadata: Optional[Dict[str, Any]] = None
782
+ ) -> Dict[str, Any]:
783
+ """
784
+ Append a single data point to a metric in local storage.
785
+
786
+ Storage format:
787
+ .ml-dash/{project}/{experiment}/metrics/{metric_name}/
788
+ data.jsonl # Data points (one JSON object per line)
789
+ metadata.json # Metric metadata (name, description, tags, stats)
790
+
791
+ Args:
792
+ project: Project name
793
+ experiment: Experiment name
794
+ metric_name: Metric name (None for unnamed metrics)
795
+ data: Data point (flexible schema)
796
+ description: Optional metric description
797
+ tags: Optional tags
798
+ metadata: Optional metric metadata
799
+
800
+ Returns:
801
+ Dict with metricId, index, bufferedDataPoints, chunkSize
802
+ """
803
+ experiment_dir = self._get_experiment_dir(project, experiment)
804
+ metrics_dir = experiment_dir / "metrics"
805
+ metrics_dir.mkdir(parents=True, exist_ok=True)
806
+
807
+ # Convert None to string for directory name
808
+ dir_name = str(metric_name) if metric_name is not None else "None"
809
+ metric_dir = metrics_dir / dir_name
810
+ metric_dir.mkdir(exist_ok=True)
811
+
812
+ data_file = metric_dir / "data.jsonl"
813
+ metadata_file = metric_dir / "metadata.json"
814
+
815
+ # File-based lock for concurrent metric appends (prevents index collision and count errors)
816
+ lock_file = metric_dir / ".metadata.lock"
817
+ with self._file_lock(lock_file):
818
+ # Load or initialize metadata
819
+ if metadata_file.exists():
820
+ try:
821
+ with open(metadata_file, "r") as f:
822
+ metric_meta = json.load(f)
823
+ except (json.JSONDecodeError, IOError):
824
+ # Corrupted metadata, reinitialize
825
+ metric_meta = {
826
+ "metricId": f"local-metric-{metric_name}",
827
+ "name": metric_name,
828
+ "description": description,
829
+ "tags": tags or [],
830
+ "metadata": metadata,
831
+ "totalDataPoints": 0,
832
+ "nextIndex": 0,
833
+ "createdAt": datetime.utcnow().isoformat() + "Z"
834
+ }
835
+ else:
836
+ metric_meta = {
837
+ "metricId": f"local-metric-{metric_name}",
838
+ "name": metric_name,
839
+ "description": description,
840
+ "tags": tags or [],
841
+ "metadata": metadata,
842
+ "totalDataPoints": 0,
843
+ "nextIndex": 0,
844
+ "createdAt": datetime.utcnow().isoformat() + "Z"
845
+ }
846
+
847
+ # Get next index
848
+ index = metric_meta["nextIndex"]
849
+
850
+ # Append data point to JSONL file
851
+ data_entry = {
852
+ "index": index,
853
+ "data": data,
854
+ "createdAt": datetime.utcnow().isoformat() + "Z"
855
+ }
856
+
857
+ with open(data_file, "a") as f:
858
+ f.write(json.dumps(data_entry) + "\n")
859
+
860
+ # Update metadata
861
+ metric_meta["nextIndex"] = index + 1
862
+ metric_meta["totalDataPoints"] = metric_meta["totalDataPoints"] + 1
863
+ metric_meta["updatedAt"] = datetime.utcnow().isoformat() + "Z"
864
+
865
+ with open(metadata_file, "w") as f:
866
+ json.dump(metric_meta, f, indent=2)
867
+
868
+ return {
869
+ "metricId": metric_meta["metricId"],
870
+ "index": str(index),
871
+ "bufferedDataPoints": str(metric_meta["totalDataPoints"]),
872
+ "chunkSize": 10000 # Default chunk size for local mode
873
+ }
874
+
875
+ def append_batch_to_metric(
876
+ self,
877
+ project: str,
878
+ experiment: str,
879
+ metric_name: Optional[str],
880
+ data_points: List[Dict[str, Any]],
881
+ description: Optional[str] = None,
882
+ tags: Optional[List[str]] = None,
883
+ metadata: Optional[Dict[str, Any]] = None
884
+ ) -> Dict[str, Any]:
885
+ """
886
+ Append multiple data points to a metric in local storage (batch).
887
+
888
+ Args:
889
+ project: Project name
890
+ experiment: Experiment name
891
+ metric_name: Metric name (None for unnamed metrics)
892
+ data_points: List of data points
893
+ description: Optional metric description
894
+ tags: Optional tags
895
+ metadata: Optional metric metadata
896
+
897
+ Returns:
898
+ Dict with metricId, startIndex, endIndex, count
899
+ """
900
+ experiment_dir = self._get_experiment_dir(project, experiment)
901
+ metrics_dir = experiment_dir / "metrics"
902
+ metrics_dir.mkdir(parents=True, exist_ok=True)
903
+
904
+ # Convert None to string for directory name
905
+ dir_name = str(metric_name) if metric_name is not None else "None"
906
+ metric_dir = metrics_dir / dir_name
907
+ metric_dir.mkdir(exist_ok=True)
908
+
909
+ data_file = metric_dir / "data.jsonl"
910
+ metadata_file = metric_dir / "metadata.json"
911
+
912
+ # File-based lock for concurrent batch appends (prevents index collision and count errors)
913
+ lock_file = metric_dir / ".metadata.lock"
914
+ with self._file_lock(lock_file):
915
+ # Load or initialize metadata
916
+ if metadata_file.exists():
917
+ try:
918
+ with open(metadata_file, "r") as f:
919
+ metric_meta = json.load(f)
920
+ except (json.JSONDecodeError, IOError):
921
+ # Corrupted metadata, reinitialize
922
+ metric_meta = {
923
+ "metricId": f"local-metric-{metric_name}",
924
+ "name": metric_name,
925
+ "description": description,
926
+ "tags": tags or [],
927
+ "metadata": metadata,
928
+ "totalDataPoints": 0,
929
+ "nextIndex": 0,
930
+ "createdAt": datetime.utcnow().isoformat() + "Z"
931
+ }
932
+ else:
933
+ metric_meta = {
934
+ "metricId": f"local-metric-{metric_name}",
935
+ "name": metric_name,
936
+ "description": description,
937
+ "tags": tags or [],
938
+ "metadata": metadata,
939
+ "totalDataPoints": 0,
940
+ "nextIndex": 0,
941
+ "createdAt": datetime.utcnow().isoformat() + "Z"
942
+ }
943
+
944
+ start_index = metric_meta["nextIndex"]
945
+ end_index = start_index + len(data_points) - 1
946
+
947
+ # Append data points to JSONL file
948
+ with open(data_file, "a") as f:
949
+ for i, data in enumerate(data_points):
950
+ data_entry = {
951
+ "index": start_index + i,
952
+ "data": data,
953
+ "createdAt": datetime.utcnow().isoformat() + "Z"
954
+ }
955
+ f.write(json.dumps(data_entry) + "\n")
956
+
957
+ # Update metadata
958
+ metric_meta["nextIndex"] = end_index + 1
959
+ metric_meta["totalDataPoints"] = metric_meta["totalDataPoints"] + len(data_points)
960
+ metric_meta["updatedAt"] = datetime.utcnow().isoformat() + "Z"
961
+
962
+ with open(metadata_file, "w") as f:
963
+ json.dump(metric_meta, f, indent=2)
964
+
965
+ return {
966
+ "metricId": metric_meta["metricId"],
967
+ "startIndex": str(start_index),
968
+ "endIndex": str(end_index),
969
+ "count": len(data_points),
970
+ "bufferedDataPoints": str(metric_meta["totalDataPoints"]),
971
+ "chunkSize": 10000
972
+ }
973
+
974
+ def read_metric_data(
975
+ self,
976
+ project: str,
977
+ experiment: str,
978
+ metric_name: str,
979
+ start_index: int = 0,
980
+ limit: int = 1000
981
+ ) -> Dict[str, Any]:
982
+ """
983
+ Read data points from a metric in local storage.
984
+
985
+ Args:
986
+ project: Project name
987
+ experiment: Experiment name
988
+ metric_name: Metric name
989
+ start_index: Starting index
990
+ limit: Max points to read
991
+
992
+ Returns:
993
+ Dict with data, startIndex, endIndex, total, hasMore
994
+ """
995
+ experiment_dir = self._get_experiment_dir(project, experiment)
996
+ metric_dir = experiment_dir / "metrics" / metric_name
997
+ data_file = metric_dir / "data.jsonl"
998
+
999
+ if not data_file.exists():
1000
+ return {
1001
+ "data": [],
1002
+ "startIndex": start_index,
1003
+ "endIndex": start_index - 1,
1004
+ "total": 0,
1005
+ "hasMore": False
1006
+ }
1007
+
1008
+ # Read all data points from JSONL file
1009
+ data_points = []
1010
+ with open(data_file, "r") as f:
1011
+ for line in f:
1012
+ if line.strip():
1013
+ entry = json.loads(line)
1014
+ # Filter by index range
1015
+ if start_index <= entry["index"] < start_index + limit:
1016
+ data_points.append(entry)
1017
+
1018
+ # Get total count
1019
+ metadata_file = metric_dir / "metadata.json"
1020
+ total_count = 0
1021
+ if metadata_file.exists():
1022
+ with open(metadata_file, "r") as f:
1023
+ metric_meta = json.load(f)
1024
+ total_count = metric_meta["totalDataPoints"]
1025
+
1026
+ return {
1027
+ "data": data_points,
1028
+ "startIndex": start_index,
1029
+ "endIndex": start_index + len(data_points) - 1 if data_points else start_index - 1,
1030
+ "total": len(data_points),
1031
+ "hasMore": start_index + len(data_points) < total_count
1032
+ }
1033
+
1034
+ def get_metric_stats(
1035
+ self,
1036
+ project: str,
1037
+ experiment: str,
1038
+ metric_name: str
1039
+ ) -> Dict[str, Any]:
1040
+ """
1041
+ Get metric statistics from local storage.
1042
+
1043
+ Args:
1044
+ project: Project name
1045
+ experiment: Experiment name
1046
+ metric_name: Metric name
1047
+
1048
+ Returns:
1049
+ Dict with metric stats
1050
+ """
1051
+ experiment_dir = self._get_experiment_dir(project, experiment)
1052
+ metric_dir = experiment_dir / "metrics" / metric_name
1053
+ metadata_file = metric_dir / "metadata.json"
1054
+
1055
+ if not metadata_file.exists():
1056
+ raise FileNotFoundError(f"Metric {metric_name} not found")
1057
+
1058
+ with open(metadata_file, "r") as f:
1059
+ metric_meta = json.load(f)
1060
+
1061
+ return {
1062
+ "metricId": metric_meta["metricId"],
1063
+ "name": metric_meta["name"],
1064
+ "description": metric_meta.get("description"),
1065
+ "tags": metric_meta.get("tags", []),
1066
+ "metadata": metric_meta.get("metadata"),
1067
+ "totalDataPoints": str(metric_meta["totalDataPoints"]),
1068
+ "bufferedDataPoints": str(metric_meta["totalDataPoints"]), # All buffered in local mode
1069
+ "chunkedDataPoints": "0", # No chunking in local mode
1070
+ "totalChunks": 0,
1071
+ "chunkSize": 10000,
1072
+ "firstDataAt": metric_meta.get("createdAt"),
1073
+ "lastDataAt": metric_meta.get("updatedAt"),
1074
+ "createdAt": metric_meta.get("createdAt"),
1075
+ "updatedAt": metric_meta.get("updatedAt", metric_meta.get("createdAt"))
1076
+ }
1077
+
1078
+ def list_metrics(
1079
+ self,
1080
+ project: str,
1081
+ experiment: str
1082
+ ) -> List[Dict[str, Any]]:
1083
+ """
1084
+ List all metrics in an experiment from local storage.
1085
+
1086
+ Args:
1087
+ project: Project name
1088
+ experiment: Experiment name
1089
+
1090
+ Returns:
1091
+ List of metric summaries
1092
+ """
1093
+ experiment_dir = self._get_experiment_dir(project, experiment)
1094
+ metrics_dir = experiment_dir / "metrics"
1095
+
1096
+ if not metrics_dir.exists():
1097
+ return []
1098
+
1099
+ metrics = []
1100
+ for metric_dir in metrics_dir.iterdir():
1101
+ if metric_dir.is_dir():
1102
+ metadata_file = metric_dir / "metadata.json"
1103
+ if metadata_file.exists():
1104
+ with open(metadata_file, "r") as f:
1105
+ metric_meta = json.load(f)
1106
+ metrics.append({
1107
+ "metricId": metric_meta["metricId"],
1108
+ "name": metric_meta["name"],
1109
+ "description": metric_meta.get("description"),
1110
+ "tags": metric_meta.get("tags", []),
1111
+ "totalDataPoints": str(metric_meta["totalDataPoints"]),
1112
+ "createdAt": metric_meta.get("createdAt")
1113
+ })
1114
+
1115
+ return metrics