ml-dash 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ml_dash/storage.py ADDED
@@ -0,0 +1,941 @@
1
+ """
2
+ Local filesystem storage for ML-Dash.
3
+ """
4
+
5
+ from typing import Optional, Dict, Any, List
6
+ from pathlib import Path
7
+ import json
8
+ from datetime import datetime
9
+
10
+
11
+ class LocalStorage:
12
+ """
13
+ Local filesystem storage backend.
14
+
15
+ Directory structure:
16
+ <root>/
17
+ <project>/
18
+ <experiment_name>/
19
+ experiment.json # Experiment metadata
20
+ logs/
21
+ logs.jsonl # Log entries
22
+ .log_sequence # Sequence counter
23
+ metrics/
24
+ <metric_name>.jsonl
25
+ files/
26
+ <uploaded_files>
27
+ parameters.json # Flattened parameters
28
+ """
29
+
30
+ def __init__(self, root_path: Path):
31
+ """
32
+ Initialize local storage.
33
+
34
+ Args:
35
+ root_path: Root directory for local storage
36
+ """
37
+ self.root_path = Path(root_path)
38
+ self.root_path.mkdir(parents=True, exist_ok=True)
39
+
40
+ def create_experiment(
41
+ self,
42
+ project: str,
43
+ name: str,
44
+ description: Optional[str] = None,
45
+ tags: Optional[List[str]] = None,
46
+ bindrs: Optional[List[str]] = None,
47
+ folder: Optional[str] = None,
48
+ metadata: Optional[Dict[str, Any]] = None,
49
+ ) -> Path:
50
+ """
51
+ Create a experiment directory structure.
52
+
53
+ Args:
54
+ project: Project name
55
+ name: Experiment name
56
+ description: Optional description
57
+ tags: Optional tags
58
+ bindrs: Optional bindrs
59
+ folder: Optional folder path (used for organization)
60
+ metadata: Optional metadata
61
+
62
+ Returns:
63
+ Path to experiment directory
64
+ """
65
+ # Create project directory
66
+ project_dir = self.root_path / project
67
+ project_dir.mkdir(parents=True, exist_ok=True)
68
+
69
+ # Create experiment directory
70
+ experiment_dir = project_dir / name
71
+ experiment_dir.mkdir(parents=True, exist_ok=True)
72
+
73
+ # Create subdirectories
74
+ (experiment_dir / "logs").mkdir(exist_ok=True)
75
+ (experiment_dir / "metrics").mkdir(exist_ok=True)
76
+ (experiment_dir / "files").mkdir(exist_ok=True)
77
+
78
+ # Write experiment metadata
79
+ experiment_metadata = {
80
+ "name": name,
81
+ "project": project,
82
+ "description": description,
83
+ "tags": tags or [],
84
+ "bindrs": bindrs or [],
85
+ "folder": folder,
86
+ "metadata": metadata,
87
+ "created_at": datetime.utcnow().isoformat() + "Z",
88
+ "write_protected": False,
89
+ }
90
+
91
+ experiment_file = experiment_dir / "experiment.json"
92
+ if not experiment_file.exists():
93
+ # Only create if doesn't exist (don't overwrite)
94
+ with open(experiment_file, "w") as f:
95
+ json.dump(experiment_metadata, f, indent=2)
96
+ else:
97
+ # Update existing experiment
98
+ with open(experiment_file, "r") as f:
99
+ existing = json.load(f)
100
+ # Merge updates
101
+ if description is not None:
102
+ existing["description"] = description
103
+ if tags is not None:
104
+ existing["tags"] = tags
105
+ if bindrs is not None:
106
+ existing["bindrs"] = bindrs
107
+ if folder is not None:
108
+ existing["folder"] = folder
109
+ if metadata is not None:
110
+ existing["metadata"] = metadata
111
+ existing["updated_at"] = datetime.utcnow().isoformat() + "Z"
112
+ with open(experiment_file, "w") as f:
113
+ json.dump(existing, f, indent=2)
114
+
115
+ return experiment_dir
116
+
117
+ def flush(self):
118
+ """Flush any pending writes (no-op for now)."""
119
+ pass
120
+
121
+ def write_log(
122
+ self,
123
+ project: str,
124
+ experiment: str,
125
+ message: str,
126
+ level: str,
127
+ timestamp: str,
128
+ metadata: Optional[Dict[str, Any]] = None,
129
+ ):
130
+ """
131
+ Write a single log entry immediately to JSONL file.
132
+
133
+ Args:
134
+ project: Project name
135
+ experiment: Experiment name
136
+ message: Log message
137
+ level: Log level
138
+ timestamp: ISO timestamp string
139
+ metadata: Optional metadata
140
+ """
141
+ experiment_dir = self.root_path / project / experiment
142
+ logs_dir = experiment_dir / "logs"
143
+ logs_file = logs_dir / "logs.jsonl"
144
+ seq_file = logs_dir / ".log_sequence"
145
+
146
+ # Read and increment sequence counter
147
+ sequence_number = 0
148
+ if seq_file.exists():
149
+ try:
150
+ sequence_number = int(seq_file.read_text().strip())
151
+ except (ValueError, IOError):
152
+ sequence_number = 0
153
+
154
+ log_entry = {
155
+ "sequenceNumber": sequence_number,
156
+ "timestamp": timestamp,
157
+ "level": level,
158
+ "message": message,
159
+ }
160
+
161
+ if metadata:
162
+ log_entry["metadata"] = metadata
163
+
164
+ # Write log immediately
165
+ with open(logs_file, "a") as f:
166
+ f.write(json.dumps(log_entry) + "\n")
167
+
168
+ # Update sequence counter
169
+ seq_file.write_text(str(sequence_number + 1))
170
+
171
+ def write_metric_data(
172
+ self,
173
+ project: str,
174
+ experiment: str,
175
+ metric_name: str,
176
+ data: Any,
177
+ ):
178
+ """
179
+ Write metric data point.
180
+
181
+ Args:
182
+ project: Project name
183
+ experiment: Experiment name
184
+ metric_name: Metric name
185
+ data: Data point
186
+ """
187
+ experiment_dir = self.root_path / project / experiment
188
+ metric_file = experiment_dir / "metrics" / f"{metric_name}.jsonl"
189
+
190
+ data_point = {
191
+ "timestamp": datetime.utcnow().isoformat() + "Z",
192
+ "data": data,
193
+ }
194
+
195
+ with open(metric_file, "a") as f:
196
+ f.write(json.dumps(data_point) + "\n")
197
+
198
+ def write_parameters(
199
+ self,
200
+ project: str,
201
+ experiment: str,
202
+ data: Dict[str, Any],
203
+ ):
204
+ """
205
+ Write/merge parameters. Always merges with existing parameters.
206
+
207
+ File format:
208
+ {
209
+ "version": 2,
210
+ "data": {"model.lr": 0.001, "model.batch_size": 32},
211
+ "updatedAt": "2024-01-15T10:30:00Z"
212
+ }
213
+
214
+ Args:
215
+ project: Project name
216
+ experiment: Experiment name
217
+ data: Flattened parameter dict with dot notation (already flattened)
218
+ """
219
+ experiment_dir = self.root_path / project / experiment
220
+ params_file = experiment_dir / "parameters.json"
221
+
222
+ # Read existing if present
223
+ if params_file.exists():
224
+ with open(params_file, "r") as f:
225
+ existing_doc = json.load(f)
226
+
227
+ # Merge with existing data
228
+ existing_data = existing_doc.get("data", {})
229
+ existing_data.update(data)
230
+
231
+ # Increment version
232
+ version = existing_doc.get("version", 1) + 1
233
+
234
+ params_doc = {
235
+ "version": version,
236
+ "data": existing_data,
237
+ "updatedAt": datetime.utcnow().isoformat() + "Z"
238
+ }
239
+ else:
240
+ # Create new parameters document
241
+ params_doc = {
242
+ "version": 1,
243
+ "data": data,
244
+ "createdAt": datetime.utcnow().isoformat() + "Z",
245
+ "updatedAt": datetime.utcnow().isoformat() + "Z"
246
+ }
247
+
248
+ with open(params_file, "w") as f:
249
+ json.dump(params_doc, f, indent=2)
250
+
251
+ def read_parameters(
252
+ self,
253
+ project: str,
254
+ experiment: str,
255
+ ) -> Optional[Dict[str, Any]]:
256
+ """
257
+ Read parameters from local file.
258
+
259
+ Args:
260
+ project: Project name
261
+ experiment: Experiment name
262
+
263
+ Returns:
264
+ Flattened parameter dict, or None if file doesn't exist
265
+ """
266
+ experiment_dir = self.root_path / project / experiment
267
+ params_file = experiment_dir / "parameters.json"
268
+
269
+ if not params_file.exists():
270
+ return None
271
+
272
+ try:
273
+ with open(params_file, "r") as f:
274
+ params_doc = json.load(f)
275
+ return params_doc.get("data", {})
276
+ except (json.JSONDecodeError, IOError):
277
+ return None
278
+
279
+ def write_file(
280
+ self,
281
+ project: str,
282
+ experiment: str,
283
+ file_path: str,
284
+ prefix: str,
285
+ filename: str,
286
+ description: Optional[str],
287
+ tags: Optional[List[str]],
288
+ metadata: Optional[Dict[str, Any]],
289
+ checksum: str,
290
+ content_type: str,
291
+ size_bytes: int
292
+ ) -> Dict[str, Any]:
293
+ """
294
+ Write file to local storage.
295
+
296
+ Copies file to: files/<prefix>/<file_id>/<filename>
297
+ Updates .files_metadata.json with file metadata
298
+
299
+ Args:
300
+ project: Project name
301
+ experiment: Experiment name
302
+ file_path: Source file path
303
+ prefix: Logical path prefix
304
+ filename: Original filename
305
+ description: Optional description
306
+ tags: Optional tags
307
+ metadata: Optional metadata
308
+ checksum: SHA256 checksum
309
+ content_type: MIME type
310
+ size_bytes: File size in bytes
311
+
312
+ Returns:
313
+ File metadata dict
314
+ """
315
+ import shutil
316
+ from .files import generate_snowflake_id
317
+
318
+ experiment_dir = self.root_path / project / experiment
319
+ files_dir = experiment_dir / "files"
320
+ metadata_file = files_dir / ".files_metadata.json"
321
+
322
+ # Generate Snowflake ID for file
323
+ file_id = generate_snowflake_id()
324
+
325
+ # Normalize prefix (remove leading slashes to avoid absolute paths)
326
+ normalized_prefix = prefix.lstrip("/") if prefix else ""
327
+
328
+ # Create prefix directory, then file directory
329
+ prefix_dir = files_dir / normalized_prefix if normalized_prefix else files_dir
330
+ prefix_dir.mkdir(parents=True, exist_ok=True)
331
+
332
+ file_dir = prefix_dir / file_id
333
+ file_dir.mkdir(parents=True, exist_ok=True)
334
+
335
+ # Copy file
336
+ dest_file = file_dir / filename
337
+ shutil.copy2(file_path, dest_file)
338
+
339
+ # Create file metadata
340
+ file_metadata = {
341
+ "id": file_id,
342
+ "experimentId": f"{project}/{experiment}", # Local mode doesn't have real experiment ID
343
+ "path": prefix,
344
+ "filename": filename,
345
+ "description": description,
346
+ "tags": tags or [],
347
+ "contentType": content_type,
348
+ "sizeBytes": size_bytes,
349
+ "checksum": checksum,
350
+ "metadata": metadata,
351
+ "uploadedAt": datetime.utcnow().isoformat() + "Z",
352
+ "updatedAt": datetime.utcnow().isoformat() + "Z",
353
+ "deletedAt": None
354
+ }
355
+
356
+ # Read existing metadata
357
+ files_metadata = {"files": []}
358
+ if metadata_file.exists():
359
+ try:
360
+ with open(metadata_file, "r") as f:
361
+ files_metadata = json.load(f)
362
+ except (json.JSONDecodeError, IOError):
363
+ files_metadata = {"files": []}
364
+
365
+ # Check if file with same prefix+filename exists (overwrite behavior)
366
+ existing_index = None
367
+ for i, existing_file in enumerate(files_metadata["files"]):
368
+ if (existing_file["path"] == prefix and
369
+ existing_file["filename"] == filename and
370
+ existing_file["deletedAt"] is None):
371
+ existing_index = i
372
+ break
373
+
374
+ if existing_index is not None:
375
+ # Overwrite: remove old file and update metadata
376
+ old_file = files_metadata["files"][existing_index]
377
+ old_prefix = old_file["path"].lstrip("/") if old_file["path"] else ""
378
+ if old_prefix:
379
+ old_file_dir = files_dir / old_prefix / old_file["id"]
380
+ else:
381
+ old_file_dir = files_dir / old_file["id"]
382
+ if old_file_dir.exists():
383
+ shutil.rmtree(old_file_dir)
384
+ files_metadata["files"][existing_index] = file_metadata
385
+ else:
386
+ # New file: append to list
387
+ files_metadata["files"].append(file_metadata)
388
+
389
+ # Write updated metadata
390
+ with open(metadata_file, "w") as f:
391
+ json.dump(files_metadata, f, indent=2)
392
+
393
+ return file_metadata
394
+
395
+ def list_files(
396
+ self,
397
+ project: str,
398
+ experiment: str,
399
+ prefix: Optional[str] = None,
400
+ tags: Optional[List[str]] = None
401
+ ) -> List[Dict[str, Any]]:
402
+ """
403
+ List files from local storage.
404
+
405
+ Args:
406
+ project: Project name
407
+ experiment: Experiment name
408
+ prefix: Optional prefix filter
409
+ tags: Optional tags filter
410
+
411
+ Returns:
412
+ List of file metadata dicts (only non-deleted files)
413
+ """
414
+ experiment_dir = self.root_path / project / experiment
415
+ metadata_file = experiment_dir / "files" / ".files_metadata.json"
416
+
417
+ if not metadata_file.exists():
418
+ return []
419
+
420
+ try:
421
+ with open(metadata_file, "r") as f:
422
+ files_metadata = json.load(f)
423
+ except (json.JSONDecodeError, IOError):
424
+ return []
425
+
426
+ files = files_metadata.get("files", [])
427
+
428
+ # Filter out deleted files
429
+ files = [f for f in files if f.get("deletedAt") is None]
430
+
431
+ # Apply prefix filter
432
+ if prefix:
433
+ files = [f for f in files if f["path"].startswith(prefix)]
434
+
435
+ # Apply tags filter
436
+ if tags:
437
+ files = [f for f in files if any(tag in f.get("tags", []) for tag in tags)]
438
+
439
+ return files
440
+
441
+ def read_file(
442
+ self,
443
+ project: str,
444
+ experiment: str,
445
+ file_id: str,
446
+ dest_path: Optional[str] = None
447
+ ) -> str:
448
+ """
449
+ Read/copy file from local storage.
450
+
451
+ Args:
452
+ project: Project name
453
+ experiment: Experiment name
454
+ file_id: File ID
455
+ dest_path: Optional destination path (defaults to original filename)
456
+
457
+ Returns:
458
+ Path to copied file
459
+
460
+ Raises:
461
+ FileNotFoundError: If file not found
462
+ ValueError: If checksum verification fails
463
+ """
464
+ import shutil
465
+ from .files import verify_checksum
466
+
467
+ experiment_dir = self.root_path / project / experiment
468
+ files_dir = experiment_dir / "files"
469
+ metadata_file = files_dir / ".files_metadata.json"
470
+
471
+ if not metadata_file.exists():
472
+ raise FileNotFoundError(f"File {file_id} not found")
473
+
474
+ # Find file metadata
475
+ with open(metadata_file, "r") as f:
476
+ files_metadata = json.load(f)
477
+
478
+ file_metadata = None
479
+ for f in files_metadata.get("files", []):
480
+ if f["id"] == file_id and f.get("deletedAt") is None:
481
+ file_metadata = f
482
+ break
483
+
484
+ if not file_metadata:
485
+ raise FileNotFoundError(f"File {file_id} not found")
486
+
487
+ # Get source file
488
+ file_prefix = file_metadata["path"].lstrip("/") if file_metadata["path"] else ""
489
+ if file_prefix:
490
+ source_file = files_dir / file_prefix / file_id / file_metadata["filename"]
491
+ else:
492
+ source_file = files_dir / file_id / file_metadata["filename"]
493
+ if not source_file.exists():
494
+ raise FileNotFoundError(f"File {file_id} not found on disk")
495
+
496
+ # Determine destination
497
+ if dest_path is None:
498
+ dest_path = file_metadata["filename"]
499
+
500
+ # Copy file
501
+ shutil.copy2(source_file, dest_path)
502
+
503
+ # Verify checksum
504
+ expected_checksum = file_metadata["checksum"]
505
+ if not verify_checksum(dest_path, expected_checksum):
506
+ import os
507
+ os.remove(dest_path)
508
+ raise ValueError(f"Checksum verification failed for file {file_id}")
509
+
510
+ return dest_path
511
+
512
+ def delete_file(
513
+ self,
514
+ project: str,
515
+ experiment: str,
516
+ file_id: str
517
+ ) -> Dict[str, Any]:
518
+ """
519
+ Delete file from local storage (soft delete in metadata).
520
+
521
+ Args:
522
+ project: Project name
523
+ experiment: Experiment name
524
+ file_id: File ID
525
+
526
+ Returns:
527
+ Dict with id and deletedAt
528
+
529
+ Raises:
530
+ FileNotFoundError: If file not found
531
+ """
532
+ experiment_dir = self.root_path / project / experiment
533
+ metadata_file = experiment_dir / "files" / ".files_metadata.json"
534
+
535
+ if not metadata_file.exists():
536
+ raise FileNotFoundError(f"File {file_id} not found")
537
+
538
+ # Read metadata
539
+ with open(metadata_file, "r") as f:
540
+ files_metadata = json.load(f)
541
+
542
+ # Find and soft delete file
543
+ file_found = False
544
+ for file_meta in files_metadata.get("files", []):
545
+ if file_meta["id"] == file_id:
546
+ if file_meta.get("deletedAt") is not None:
547
+ raise FileNotFoundError(f"File {file_id} already deleted")
548
+ file_meta["deletedAt"] = datetime.utcnow().isoformat() + "Z"
549
+ file_meta["updatedAt"] = file_meta["deletedAt"]
550
+ file_found = True
551
+ break
552
+
553
+ if not file_found:
554
+ raise FileNotFoundError(f"File {file_id} not found")
555
+
556
+ # Write updated metadata
557
+ with open(metadata_file, "w") as f:
558
+ json.dump(files_metadata, f, indent=2)
559
+
560
+ return {
561
+ "id": file_id,
562
+ "deletedAt": file_meta["deletedAt"]
563
+ }
564
+
565
+ def update_file_metadata(
566
+ self,
567
+ project: str,
568
+ experiment: str,
569
+ file_id: str,
570
+ description: Optional[str] = None,
571
+ tags: Optional[List[str]] = None,
572
+ metadata: Optional[Dict[str, Any]] = None
573
+ ) -> Dict[str, Any]:
574
+ """
575
+ Update file metadata in local storage.
576
+
577
+ Args:
578
+ project: Project name
579
+ experiment: Experiment name
580
+ file_id: File ID
581
+ description: Optional description
582
+ tags: Optional tags
583
+ metadata: Optional metadata
584
+
585
+ Returns:
586
+ Updated file metadata dict
587
+
588
+ Raises:
589
+ FileNotFoundError: If file not found
590
+ """
591
+ experiment_dir = self.root_path / project / experiment
592
+ metadata_file = experiment_dir / "files" / ".files_metadata.json"
593
+
594
+ if not metadata_file.exists():
595
+ raise FileNotFoundError(f"File {file_id} not found")
596
+
597
+ # Read metadata
598
+ with open(metadata_file, "r") as f:
599
+ files_metadata = json.load(f)
600
+
601
+ # Find and update file
602
+ file_found = False
603
+ updated_file = None
604
+ for file_meta in files_metadata.get("files", []):
605
+ if file_meta["id"] == file_id:
606
+ if file_meta.get("deletedAt") is not None:
607
+ raise FileNotFoundError(f"File {file_id} has been deleted")
608
+
609
+ # Update fields
610
+ if description is not None:
611
+ file_meta["description"] = description
612
+ if tags is not None:
613
+ file_meta["tags"] = tags
614
+ if metadata is not None:
615
+ file_meta["metadata"] = metadata
616
+
617
+ file_meta["updatedAt"] = datetime.utcnow().isoformat() + "Z"
618
+ file_found = True
619
+ updated_file = file_meta
620
+ break
621
+
622
+ if not file_found:
623
+ raise FileNotFoundError(f"File {file_id} not found")
624
+
625
+ # Write updated metadata
626
+ with open(metadata_file, "w") as f:
627
+ json.dump(files_metadata, f, indent=2)
628
+
629
+ return updated_file
630
+
631
+ def _get_experiment_dir(self, project: str, experiment: str) -> Path:
632
+ """Get experiment directory path."""
633
+ return self.root_path / project / experiment
634
+
635
+ def append_to_metric(
636
+ self,
637
+ project: str,
638
+ experiment: str,
639
+ metric_name: str,
640
+ data: Dict[str, Any],
641
+ description: Optional[str] = None,
642
+ tags: Optional[List[str]] = None,
643
+ metadata: Optional[Dict[str, Any]] = None
644
+ ) -> Dict[str, Any]:
645
+ """
646
+ Append a single data point to a metric in local storage.
647
+
648
+ Storage format:
649
+ .ml-dash/{project}/{experiment}/metrics/{metric_name}/
650
+ data.jsonl # Data points (one JSON object per line)
651
+ metadata.json # Metric metadata (name, description, tags, stats)
652
+
653
+ Args:
654
+ project: Project name
655
+ experiment: Experiment name
656
+ metric_name: Metric name
657
+ data: Data point (flexible schema)
658
+ description: Optional metric description
659
+ tags: Optional tags
660
+ metadata: Optional metric metadata
661
+
662
+ Returns:
663
+ Dict with metricId, index, bufferedDataPoints, chunkSize
664
+ """
665
+ experiment_dir = self._get_experiment_dir(project, experiment)
666
+ metrics_dir = experiment_dir / "metrics"
667
+ metrics_dir.mkdir(parents=True, exist_ok=True)
668
+
669
+ metric_dir = metrics_dir / metric_name
670
+ metric_dir.mkdir(exist_ok=True)
671
+
672
+ data_file = metric_dir / "data.jsonl"
673
+ metadata_file = metric_dir / "metadata.json"
674
+
675
+ # Load or initialize metadata
676
+ if metadata_file.exists():
677
+ with open(metadata_file, "r") as f:
678
+ metric_meta = json.load(f)
679
+ else:
680
+ metric_meta = {
681
+ "metricId": f"local-metric-{metric_name}",
682
+ "name": metric_name,
683
+ "description": description,
684
+ "tags": tags or [],
685
+ "metadata": metadata,
686
+ "totalDataPoints": 0,
687
+ "nextIndex": 0,
688
+ "createdAt": datetime.utcnow().isoformat() + "Z"
689
+ }
690
+
691
+ # Get next index
692
+ index = metric_meta["nextIndex"]
693
+
694
+ # Append data point to JSONL file
695
+ data_entry = {
696
+ "index": index,
697
+ "data": data,
698
+ "createdAt": datetime.utcnow().isoformat() + "Z"
699
+ }
700
+
701
+ with open(data_file, "a") as f:
702
+ f.write(json.dumps(data_entry) + "\n")
703
+
704
+ # Update metadata
705
+ metric_meta["nextIndex"] = index + 1
706
+ metric_meta["totalDataPoints"] = metric_meta["totalDataPoints"] + 1
707
+ metric_meta["updatedAt"] = datetime.utcnow().isoformat() + "Z"
708
+
709
+ with open(metadata_file, "w") as f:
710
+ json.dump(metric_meta, f, indent=2)
711
+
712
+ return {
713
+ "metricId": metric_meta["metricId"],
714
+ "index": str(index),
715
+ "bufferedDataPoints": str(metric_meta["totalDataPoints"]),
716
+ "chunkSize": 10000 # Default chunk size for local mode
717
+ }
718
+
719
+ def append_batch_to_metric(
720
+ self,
721
+ project: str,
722
+ experiment: str,
723
+ metric_name: str,
724
+ data_points: List[Dict[str, Any]],
725
+ description: Optional[str] = None,
726
+ tags: Optional[List[str]] = None,
727
+ metadata: Optional[Dict[str, Any]] = None
728
+ ) -> Dict[str, Any]:
729
+ """
730
+ Append multiple data points to a metric in local storage (batch).
731
+
732
+ Args:
733
+ project: Project name
734
+ experiment: Experiment name
735
+ metric_name: Metric name
736
+ data_points: List of data points
737
+ description: Optional metric description
738
+ tags: Optional tags
739
+ metadata: Optional metric metadata
740
+
741
+ Returns:
742
+ Dict with metricId, startIndex, endIndex, count
743
+ """
744
+ experiment_dir = self._get_experiment_dir(project, experiment)
745
+ metrics_dir = experiment_dir / "metrics"
746
+ metrics_dir.mkdir(parents=True, exist_ok=True)
747
+
748
+ metric_dir = metrics_dir / metric_name
749
+ metric_dir.mkdir(exist_ok=True)
750
+
751
+ data_file = metric_dir / "data.jsonl"
752
+ metadata_file = metric_dir / "metadata.json"
753
+
754
+ # Load or initialize metadata
755
+ if metadata_file.exists():
756
+ with open(metadata_file, "r") as f:
757
+ metric_meta = json.load(f)
758
+ else:
759
+ metric_meta = {
760
+ "metricId": f"local-metric-{metric_name}",
761
+ "name": metric_name,
762
+ "description": description,
763
+ "tags": tags or [],
764
+ "metadata": metadata,
765
+ "totalDataPoints": 0,
766
+ "nextIndex": 0,
767
+ "createdAt": datetime.utcnow().isoformat() + "Z"
768
+ }
769
+
770
+ start_index = metric_meta["nextIndex"]
771
+ end_index = start_index + len(data_points) - 1
772
+
773
+ # Append data points to JSONL file
774
+ with open(data_file, "a") as f:
775
+ for i, data in enumerate(data_points):
776
+ data_entry = {
777
+ "index": start_index + i,
778
+ "data": data,
779
+ "createdAt": datetime.utcnow().isoformat() + "Z"
780
+ }
781
+ f.write(json.dumps(data_entry) + "\n")
782
+
783
+ # Update metadata
784
+ metric_meta["nextIndex"] = end_index + 1
785
+ metric_meta["totalDataPoints"] = metric_meta["totalDataPoints"] + len(data_points)
786
+ metric_meta["updatedAt"] = datetime.utcnow().isoformat() + "Z"
787
+
788
+ with open(metadata_file, "w") as f:
789
+ json.dump(metric_meta, f, indent=2)
790
+
791
+ return {
792
+ "metricId": metric_meta["metricId"],
793
+ "startIndex": str(start_index),
794
+ "endIndex": str(end_index),
795
+ "count": len(data_points),
796
+ "bufferedDataPoints": str(metric_meta["totalDataPoints"]),
797
+ "chunkSize": 10000
798
+ }
799
+
800
+ def read_metric_data(
801
+ self,
802
+ project: str,
803
+ experiment: str,
804
+ metric_name: str,
805
+ start_index: int = 0,
806
+ limit: int = 1000
807
+ ) -> Dict[str, Any]:
808
+ """
809
+ Read data points from a metric in local storage.
810
+
811
+ Args:
812
+ project: Project name
813
+ experiment: Experiment name
814
+ metric_name: Metric name
815
+ start_index: Starting index
816
+ limit: Max points to read
817
+
818
+ Returns:
819
+ Dict with data, startIndex, endIndex, total, hasMore
820
+ """
821
+ experiment_dir = self._get_experiment_dir(project, experiment)
822
+ metric_dir = experiment_dir / "metrics" / metric_name
823
+ data_file = metric_dir / "data.jsonl"
824
+
825
+ if not data_file.exists():
826
+ return {
827
+ "data": [],
828
+ "startIndex": start_index,
829
+ "endIndex": start_index - 1,
830
+ "total": 0,
831
+ "hasMore": False
832
+ }
833
+
834
+ # Read all data points from JSONL file
835
+ data_points = []
836
+ with open(data_file, "r") as f:
837
+ for line in f:
838
+ if line.strip():
839
+ entry = json.loads(line)
840
+ # Filter by index range
841
+ if start_index <= entry["index"] < start_index + limit:
842
+ data_points.append(entry)
843
+
844
+ # Get total count
845
+ metadata_file = metric_dir / "metadata.json"
846
+ total_count = 0
847
+ if metadata_file.exists():
848
+ with open(metadata_file, "r") as f:
849
+ metric_meta = json.load(f)
850
+ total_count = metric_meta["totalDataPoints"]
851
+
852
+ return {
853
+ "data": data_points,
854
+ "startIndex": start_index,
855
+ "endIndex": start_index + len(data_points) - 1 if data_points else start_index - 1,
856
+ "total": len(data_points),
857
+ "hasMore": start_index + len(data_points) < total_count
858
+ }
859
+
860
+ def get_metric_stats(
861
+ self,
862
+ project: str,
863
+ experiment: str,
864
+ metric_name: str
865
+ ) -> Dict[str, Any]:
866
+ """
867
+ Get metric statistics from local storage.
868
+
869
+ Args:
870
+ project: Project name
871
+ experiment: Experiment name
872
+ metric_name: Metric name
873
+
874
+ Returns:
875
+ Dict with metric stats
876
+ """
877
+ experiment_dir = self._get_experiment_dir(project, experiment)
878
+ metric_dir = experiment_dir / "metrics" / metric_name
879
+ metadata_file = metric_dir / "metadata.json"
880
+
881
+ if not metadata_file.exists():
882
+ raise FileNotFoundError(f"Metric {metric_name} not found")
883
+
884
+ with open(metadata_file, "r") as f:
885
+ metric_meta = json.load(f)
886
+
887
+ return {
888
+ "metricId": metric_meta["metricId"],
889
+ "name": metric_meta["name"],
890
+ "description": metric_meta.get("description"),
891
+ "tags": metric_meta.get("tags", []),
892
+ "metadata": metric_meta.get("metadata"),
893
+ "totalDataPoints": str(metric_meta["totalDataPoints"]),
894
+ "bufferedDataPoints": str(metric_meta["totalDataPoints"]), # All buffered in local mode
895
+ "chunkedDataPoints": "0", # No chunking in local mode
896
+ "totalChunks": 0,
897
+ "chunkSize": 10000,
898
+ "firstDataAt": metric_meta.get("createdAt"),
899
+ "lastDataAt": metric_meta.get("updatedAt"),
900
+ "createdAt": metric_meta.get("createdAt"),
901
+ "updatedAt": metric_meta.get("updatedAt", metric_meta.get("createdAt"))
902
+ }
903
+
904
+ def list_metrics(
905
+ self,
906
+ project: str,
907
+ experiment: str
908
+ ) -> List[Dict[str, Any]]:
909
+ """
910
+ List all metrics in an experiment from local storage.
911
+
912
+ Args:
913
+ project: Project name
914
+ experiment: Experiment name
915
+
916
+ Returns:
917
+ List of metric summaries
918
+ """
919
+ experiment_dir = self._get_experiment_dir(project, experiment)
920
+ metrics_dir = experiment_dir / "metrics"
921
+
922
+ if not metrics_dir.exists():
923
+ return []
924
+
925
+ metrics = []
926
+ for metric_dir in metrics_dir.iterdir():
927
+ if metric_dir.is_dir():
928
+ metadata_file = metric_dir / "metadata.json"
929
+ if metadata_file.exists():
930
+ with open(metadata_file, "r") as f:
931
+ metric_meta = json.load(f)
932
+ metrics.append({
933
+ "metricId": metric_meta["metricId"],
934
+ "name": metric_meta["name"],
935
+ "description": metric_meta.get("description"),
936
+ "tags": metric_meta.get("tags", []),
937
+ "totalDataPoints": str(metric_meta["totalDataPoints"]),
938
+ "createdAt": metric_meta.get("createdAt")
939
+ })
940
+
941
+ return metrics