ml-dash 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ml_dash/storage.py ADDED
@@ -0,0 +1,922 @@
1
+ """
2
+ Local filesystem storage for ML-Dash.
3
+ """
4
+
5
+ from typing import Optional, Dict, Any, List
6
+ from pathlib import Path
7
+ import json
8
+ from datetime import datetime
9
+
10
+
11
+ class LocalStorage:
12
+ """
13
+ Local filesystem storage backend.
14
+
15
+ Directory structure:
16
+ <root>/
17
+ <project>/
18
+ <experiment_name>/
19
+ experiment.json # Experiment metadata
20
+ logs/
21
+ logs.jsonl # Log entries
22
+ .log_sequence # Sequence counter
23
+ metrics/
24
+ <metric_name>.jsonl
25
+ files/
26
+ <uploaded_files>
27
+ parameters.json # Flattened parameters
28
+ """
29
+
30
+ def __init__(self, root_path: Path):
31
+ """
32
+ Initialize local storage.
33
+
34
+ Args:
35
+ root_path: Root directory for local storage
36
+ """
37
+ self.root_path = Path(root_path)
38
+ self.root_path.mkdir(parents=True, exist_ok=True)
39
+
40
+ def create_experiment(
41
+ self,
42
+ project: str,
43
+ name: str,
44
+ description: Optional[str] = None,
45
+ tags: Optional[List[str]] = None,
46
+ folder: Optional[str] = None,
47
+ metadata: Optional[Dict[str, Any]] = None,
48
+ ) -> Path:
49
+ """
50
+ Create a experiment directory structure.
51
+
52
+ Args:
53
+ project: Project name
54
+ name: Experiment name
55
+ description: Optional description
56
+ tags: Optional tags
57
+ folder: Optional folder path (used for organization)
58
+ metadata: Optional metadata
59
+
60
+ Returns:
61
+ Path to experiment directory
62
+ """
63
+ # Create project directory
64
+ project_dir = self.root_path / project
65
+ project_dir.mkdir(parents=True, exist_ok=True)
66
+
67
+ # Create experiment directory
68
+ experiment_dir = project_dir / name
69
+ experiment_dir.mkdir(parents=True, exist_ok=True)
70
+
71
+ # Create subdirectories
72
+ (experiment_dir / "logs").mkdir(exist_ok=True)
73
+ (experiment_dir / "metrics").mkdir(exist_ok=True)
74
+ (experiment_dir / "files").mkdir(exist_ok=True)
75
+
76
+ # Write experiment metadata
77
+ experiment_metadata = {
78
+ "name": name,
79
+ "project": project,
80
+ "description": description,
81
+ "tags": tags or [],
82
+ "folder": folder,
83
+ "metadata": metadata,
84
+ "created_at": datetime.utcnow().isoformat() + "Z",
85
+ "write_protected": False,
86
+ }
87
+
88
+ experiment_file = experiment_dir / "experiment.json"
89
+ if not experiment_file.exists():
90
+ # Only create if doesn't exist (don't overwrite)
91
+ with open(experiment_file, "w") as f:
92
+ json.dump(experiment_metadata, f, indent=2)
93
+ else:
94
+ # Update existing experiment
95
+ with open(experiment_file, "r") as f:
96
+ existing = json.load(f)
97
+ # Merge updates
98
+ if description is not None:
99
+ existing["description"] = description
100
+ if tags is not None:
101
+ existing["tags"] = tags
102
+ if folder is not None:
103
+ existing["folder"] = folder
104
+ if metadata is not None:
105
+ existing["metadata"] = metadata
106
+ existing["updated_at"] = datetime.utcnow().isoformat() + "Z"
107
+ with open(experiment_file, "w") as f:
108
+ json.dump(existing, f, indent=2)
109
+
110
+ return experiment_dir
111
+
112
+ def flush(self):
113
+ """Flush any pending writes (no-op for now)."""
114
+ pass
115
+
116
+ def write_log(
117
+ self,
118
+ project: str,
119
+ experiment: str,
120
+ message: str,
121
+ level: str,
122
+ timestamp: str,
123
+ metadata: Optional[Dict[str, Any]] = None,
124
+ ):
125
+ """
126
+ Write a single log entry immediately to JSONL file.
127
+
128
+ Args:
129
+ project: Project name
130
+ experiment: Experiment name
131
+ message: Log message
132
+ level: Log level
133
+ timestamp: ISO timestamp string
134
+ metadata: Optional metadata
135
+ """
136
+ experiment_dir = self.root_path / project / experiment
137
+ logs_dir = experiment_dir / "logs"
138
+ logs_file = logs_dir / "logs.jsonl"
139
+ seq_file = logs_dir / ".log_sequence"
140
+
141
+ # Read and increment sequence counter
142
+ sequence_number = 0
143
+ if seq_file.exists():
144
+ try:
145
+ sequence_number = int(seq_file.read_text().strip())
146
+ except (ValueError, IOError):
147
+ sequence_number = 0
148
+
149
+ log_entry = {
150
+ "sequenceNumber": sequence_number,
151
+ "timestamp": timestamp,
152
+ "level": level,
153
+ "message": message,
154
+ }
155
+
156
+ if metadata:
157
+ log_entry["metadata"] = metadata
158
+
159
+ # Write log immediately
160
+ with open(logs_file, "a") as f:
161
+ f.write(json.dumps(log_entry) + "\n")
162
+
163
+ # Update sequence counter
164
+ seq_file.write_text(str(sequence_number + 1))
165
+
166
+ def write_metric_data(
167
+ self,
168
+ project: str,
169
+ experiment: str,
170
+ metric_name: str,
171
+ data: Any,
172
+ ):
173
+ """
174
+ Write metric data point.
175
+
176
+ Args:
177
+ project: Project name
178
+ experiment: Experiment name
179
+ metric_name: Metric name
180
+ data: Data point
181
+ """
182
+ experiment_dir = self.root_path / project / experiment
183
+ metric_file = experiment_dir / "metrics" / f"{metric_name}.jsonl"
184
+
185
+ data_point = {
186
+ "timestamp": datetime.utcnow().isoformat() + "Z",
187
+ "data": data,
188
+ }
189
+
190
+ with open(metric_file, "a") as f:
191
+ f.write(json.dumps(data_point) + "\n")
192
+
193
+ def write_parameters(
194
+ self,
195
+ project: str,
196
+ experiment: str,
197
+ data: Dict[str, Any],
198
+ ):
199
+ """
200
+ Write/merge parameters. Always merges with existing parameters.
201
+
202
+ File format:
203
+ {
204
+ "version": 2,
205
+ "data": {"model.lr": 0.001, "model.batch_size": 32},
206
+ "updatedAt": "2024-01-15T10:30:00Z"
207
+ }
208
+
209
+ Args:
210
+ project: Project name
211
+ experiment: Experiment name
212
+ data: Flattened parameter dict with dot notation (already flattened)
213
+ """
214
+ experiment_dir = self.root_path / project / experiment
215
+ params_file = experiment_dir / "parameters.json"
216
+
217
+ # Read existing if present
218
+ if params_file.exists():
219
+ with open(params_file, "r") as f:
220
+ existing_doc = json.load(f)
221
+
222
+ # Merge with existing data
223
+ existing_data = existing_doc.get("data", {})
224
+ existing_data.update(data)
225
+
226
+ # Increment version
227
+ version = existing_doc.get("version", 1) + 1
228
+
229
+ params_doc = {
230
+ "version": version,
231
+ "data": existing_data,
232
+ "updatedAt": datetime.utcnow().isoformat() + "Z"
233
+ }
234
+ else:
235
+ # Create new parameters document
236
+ params_doc = {
237
+ "version": 1,
238
+ "data": data,
239
+ "createdAt": datetime.utcnow().isoformat() + "Z",
240
+ "updatedAt": datetime.utcnow().isoformat() + "Z"
241
+ }
242
+
243
+ with open(params_file, "w") as f:
244
+ json.dump(params_doc, f, indent=2)
245
+
246
+ def read_parameters(
247
+ self,
248
+ project: str,
249
+ experiment: str,
250
+ ) -> Optional[Dict[str, Any]]:
251
+ """
252
+ Read parameters from local file.
253
+
254
+ Args:
255
+ project: Project name
256
+ experiment: Experiment name
257
+
258
+ Returns:
259
+ Flattened parameter dict, or None if file doesn't exist
260
+ """
261
+ experiment_dir = self.root_path / project / experiment
262
+ params_file = experiment_dir / "parameters.json"
263
+
264
+ if not params_file.exists():
265
+ return None
266
+
267
+ try:
268
+ with open(params_file, "r") as f:
269
+ params_doc = json.load(f)
270
+ return params_doc.get("data", {})
271
+ except (json.JSONDecodeError, IOError):
272
+ return None
273
+
274
+ def write_file(
275
+ self,
276
+ project: str,
277
+ experiment: str,
278
+ file_path: str,
279
+ prefix: str,
280
+ filename: str,
281
+ description: Optional[str],
282
+ tags: Optional[List[str]],
283
+ metadata: Optional[Dict[str, Any]],
284
+ checksum: str,
285
+ content_type: str,
286
+ size_bytes: int
287
+ ) -> Dict[str, Any]:
288
+ """
289
+ Write file to local storage.
290
+
291
+ Copies file to: files/<file_id>/<filename>
292
+ Updates .files_metadata.json with file metadata
293
+
294
+ Args:
295
+ project: Project name
296
+ experiment: Experiment name
297
+ file_path: Source file path
298
+ prefix: Logical path prefix
299
+ filename: Original filename
300
+ description: Optional description
301
+ tags: Optional tags
302
+ metadata: Optional metadata
303
+ checksum: SHA256 checksum
304
+ content_type: MIME type
305
+ size_bytes: File size in bytes
306
+
307
+ Returns:
308
+ File metadata dict
309
+ """
310
+ import shutil
311
+ from .files import generate_snowflake_id
312
+
313
+ experiment_dir = self.root_path / project / experiment
314
+ files_dir = experiment_dir / "files"
315
+ metadata_file = files_dir / ".files_metadata.json"
316
+
317
+ # Generate Snowflake ID for file
318
+ file_id = generate_snowflake_id()
319
+
320
+ # Create file directory
321
+ file_dir = files_dir / file_id
322
+ file_dir.mkdir(parents=True, exist_ok=True)
323
+
324
+ # Copy file
325
+ dest_file = file_dir / filename
326
+ shutil.copy2(file_path, dest_file)
327
+
328
+ # Create file metadata
329
+ file_metadata = {
330
+ "id": file_id,
331
+ "experimentId": f"{project}/{experiment}", # Local mode doesn't have real experiment ID
332
+ "path": prefix,
333
+ "filename": filename,
334
+ "description": description,
335
+ "tags": tags or [],
336
+ "contentType": content_type,
337
+ "sizeBytes": size_bytes,
338
+ "checksum": checksum,
339
+ "metadata": metadata,
340
+ "uploadedAt": datetime.utcnow().isoformat() + "Z",
341
+ "updatedAt": datetime.utcnow().isoformat() + "Z",
342
+ "deletedAt": None
343
+ }
344
+
345
+ # Read existing metadata
346
+ files_metadata = {"files": []}
347
+ if metadata_file.exists():
348
+ try:
349
+ with open(metadata_file, "r") as f:
350
+ files_metadata = json.load(f)
351
+ except (json.JSONDecodeError, IOError):
352
+ files_metadata = {"files": []}
353
+
354
+ # Check if file with same prefix+filename exists (overwrite behavior)
355
+ existing_index = None
356
+ for i, existing_file in enumerate(files_metadata["files"]):
357
+ if (existing_file["path"] == prefix and
358
+ existing_file["filename"] == filename and
359
+ existing_file["deletedAt"] is None):
360
+ existing_index = i
361
+ break
362
+
363
+ if existing_index is not None:
364
+ # Overwrite: remove old file and update metadata
365
+ old_file = files_metadata["files"][existing_index]
366
+ old_file_dir = files_dir / old_file["id"]
367
+ if old_file_dir.exists():
368
+ shutil.rmtree(old_file_dir)
369
+ files_metadata["files"][existing_index] = file_metadata
370
+ else:
371
+ # New file: append to list
372
+ files_metadata["files"].append(file_metadata)
373
+
374
+ # Write updated metadata
375
+ with open(metadata_file, "w") as f:
376
+ json.dump(files_metadata, f, indent=2)
377
+
378
+ return file_metadata
379
+
380
+ def list_files(
381
+ self,
382
+ project: str,
383
+ experiment: str,
384
+ prefix: Optional[str] = None,
385
+ tags: Optional[List[str]] = None
386
+ ) -> List[Dict[str, Any]]:
387
+ """
388
+ List files from local storage.
389
+
390
+ Args:
391
+ project: Project name
392
+ experiment: Experiment name
393
+ prefix: Optional prefix filter
394
+ tags: Optional tags filter
395
+
396
+ Returns:
397
+ List of file metadata dicts (only non-deleted files)
398
+ """
399
+ experiment_dir = self.root_path / project / experiment
400
+ metadata_file = experiment_dir / "files" / ".files_metadata.json"
401
+
402
+ if not metadata_file.exists():
403
+ return []
404
+
405
+ try:
406
+ with open(metadata_file, "r") as f:
407
+ files_metadata = json.load(f)
408
+ except (json.JSONDecodeError, IOError):
409
+ return []
410
+
411
+ files = files_metadata.get("files", [])
412
+
413
+ # Filter out deleted files
414
+ files = [f for f in files if f.get("deletedAt") is None]
415
+
416
+ # Apply prefix filter
417
+ if prefix:
418
+ files = [f for f in files if f["path"].startswith(prefix)]
419
+
420
+ # Apply tags filter
421
+ if tags:
422
+ files = [f for f in files if any(tag in f.get("tags", []) for tag in tags)]
423
+
424
+ return files
425
+
426
+ def read_file(
427
+ self,
428
+ project: str,
429
+ experiment: str,
430
+ file_id: str,
431
+ dest_path: Optional[str] = None
432
+ ) -> str:
433
+ """
434
+ Read/copy file from local storage.
435
+
436
+ Args:
437
+ project: Project name
438
+ experiment: Experiment name
439
+ file_id: File ID
440
+ dest_path: Optional destination path (defaults to original filename)
441
+
442
+ Returns:
443
+ Path to copied file
444
+
445
+ Raises:
446
+ FileNotFoundError: If file not found
447
+ ValueError: If checksum verification fails
448
+ """
449
+ import shutil
450
+ from .files import verify_checksum
451
+
452
+ experiment_dir = self.root_path / project / experiment
453
+ files_dir = experiment_dir / "files"
454
+ metadata_file = files_dir / ".files_metadata.json"
455
+
456
+ if not metadata_file.exists():
457
+ raise FileNotFoundError(f"File {file_id} not found")
458
+
459
+ # Find file metadata
460
+ with open(metadata_file, "r") as f:
461
+ files_metadata = json.load(f)
462
+
463
+ file_metadata = None
464
+ for f in files_metadata.get("files", []):
465
+ if f["id"] == file_id and f.get("deletedAt") is None:
466
+ file_metadata = f
467
+ break
468
+
469
+ if not file_metadata:
470
+ raise FileNotFoundError(f"File {file_id} not found")
471
+
472
+ # Get source file
473
+ source_file = files_dir / file_id / file_metadata["filename"]
474
+ if not source_file.exists():
475
+ raise FileNotFoundError(f"File {file_id} not found on disk")
476
+
477
+ # Determine destination
478
+ if dest_path is None:
479
+ dest_path = file_metadata["filename"]
480
+
481
+ # Copy file
482
+ shutil.copy2(source_file, dest_path)
483
+
484
+ # Verify checksum
485
+ expected_checksum = file_metadata["checksum"]
486
+ if not verify_checksum(dest_path, expected_checksum):
487
+ import os
488
+ os.remove(dest_path)
489
+ raise ValueError(f"Checksum verification failed for file {file_id}")
490
+
491
+ return dest_path
492
+
493
+ def delete_file(
494
+ self,
495
+ project: str,
496
+ experiment: str,
497
+ file_id: str
498
+ ) -> Dict[str, Any]:
499
+ """
500
+ Delete file from local storage (soft delete in metadata).
501
+
502
+ Args:
503
+ project: Project name
504
+ experiment: Experiment name
505
+ file_id: File ID
506
+
507
+ Returns:
508
+ Dict with id and deletedAt
509
+
510
+ Raises:
511
+ FileNotFoundError: If file not found
512
+ """
513
+ experiment_dir = self.root_path / project / experiment
514
+ metadata_file = experiment_dir / "files" / ".files_metadata.json"
515
+
516
+ if not metadata_file.exists():
517
+ raise FileNotFoundError(f"File {file_id} not found")
518
+
519
+ # Read metadata
520
+ with open(metadata_file, "r") as f:
521
+ files_metadata = json.load(f)
522
+
523
+ # Find and soft delete file
524
+ file_found = False
525
+ for file_meta in files_metadata.get("files", []):
526
+ if file_meta["id"] == file_id:
527
+ if file_meta.get("deletedAt") is not None:
528
+ raise FileNotFoundError(f"File {file_id} already deleted")
529
+ file_meta["deletedAt"] = datetime.utcnow().isoformat() + "Z"
530
+ file_meta["updatedAt"] = file_meta["deletedAt"]
531
+ file_found = True
532
+ break
533
+
534
+ if not file_found:
535
+ raise FileNotFoundError(f"File {file_id} not found")
536
+
537
+ # Write updated metadata
538
+ with open(metadata_file, "w") as f:
539
+ json.dump(files_metadata, f, indent=2)
540
+
541
+ return {
542
+ "id": file_id,
543
+ "deletedAt": file_meta["deletedAt"]
544
+ }
545
+
546
+ def update_file_metadata(
547
+ self,
548
+ project: str,
549
+ experiment: str,
550
+ file_id: str,
551
+ description: Optional[str] = None,
552
+ tags: Optional[List[str]] = None,
553
+ metadata: Optional[Dict[str, Any]] = None
554
+ ) -> Dict[str, Any]:
555
+ """
556
+ Update file metadata in local storage.
557
+
558
+ Args:
559
+ project: Project name
560
+ experiment: Experiment name
561
+ file_id: File ID
562
+ description: Optional description
563
+ tags: Optional tags
564
+ metadata: Optional metadata
565
+
566
+ Returns:
567
+ Updated file metadata dict
568
+
569
+ Raises:
570
+ FileNotFoundError: If file not found
571
+ """
572
+ experiment_dir = self.root_path / project / experiment
573
+ metadata_file = experiment_dir / "files" / ".files_metadata.json"
574
+
575
+ if not metadata_file.exists():
576
+ raise FileNotFoundError(f"File {file_id} not found")
577
+
578
+ # Read metadata
579
+ with open(metadata_file, "r") as f:
580
+ files_metadata = json.load(f)
581
+
582
+ # Find and update file
583
+ file_found = False
584
+ updated_file = None
585
+ for file_meta in files_metadata.get("files", []):
586
+ if file_meta["id"] == file_id:
587
+ if file_meta.get("deletedAt") is not None:
588
+ raise FileNotFoundError(f"File {file_id} has been deleted")
589
+
590
+ # Update fields
591
+ if description is not None:
592
+ file_meta["description"] = description
593
+ if tags is not None:
594
+ file_meta["tags"] = tags
595
+ if metadata is not None:
596
+ file_meta["metadata"] = metadata
597
+
598
+ file_meta["updatedAt"] = datetime.utcnow().isoformat() + "Z"
599
+ file_found = True
600
+ updated_file = file_meta
601
+ break
602
+
603
+ if not file_found:
604
+ raise FileNotFoundError(f"File {file_id} not found")
605
+
606
+ # Write updated metadata
607
+ with open(metadata_file, "w") as f:
608
+ json.dump(files_metadata, f, indent=2)
609
+
610
+ return updated_file
611
+
612
+ def _get_experiment_dir(self, project: str, experiment: str) -> Path:
613
+ """Get experiment directory path."""
614
+ return self.root_path / project / experiment
615
+
616
+ def append_to_metric(
617
+ self,
618
+ project: str,
619
+ experiment: str,
620
+ metric_name: str,
621
+ data: Dict[str, Any],
622
+ description: Optional[str] = None,
623
+ tags: Optional[List[str]] = None,
624
+ metadata: Optional[Dict[str, Any]] = None
625
+ ) -> Dict[str, Any]:
626
+ """
627
+ Append a single data point to a metric in local storage.
628
+
629
+ Storage format:
630
+ .ml-dash/{project}/{experiment}/metrics/{metric_name}/
631
+ data.jsonl # Data points (one JSON object per line)
632
+ metadata.json # Metric metadata (name, description, tags, stats)
633
+
634
+ Args:
635
+ project: Project name
636
+ experiment: Experiment name
637
+ metric_name: Metric name
638
+ data: Data point (flexible schema)
639
+ description: Optional metric description
640
+ tags: Optional tags
641
+ metadata: Optional metric metadata
642
+
643
+ Returns:
644
+ Dict with metricId, index, bufferedDataPoints, chunkSize
645
+ """
646
+ experiment_dir = self._get_experiment_dir(project, experiment)
647
+ metrics_dir = experiment_dir / "metrics"
648
+ metrics_dir.mkdir(parents=True, exist_ok=True)
649
+
650
+ metric_dir = metrics_dir / metric_name
651
+ metric_dir.mkdir(exist_ok=True)
652
+
653
+ data_file = metric_dir / "data.jsonl"
654
+ metadata_file = metric_dir / "metadata.json"
655
+
656
+ # Load or initialize metadata
657
+ if metadata_file.exists():
658
+ with open(metadata_file, "r") as f:
659
+ metric_meta = json.load(f)
660
+ else:
661
+ metric_meta = {
662
+ "metricId": f"local-metric-{metric_name}",
663
+ "name": metric_name,
664
+ "description": description,
665
+ "tags": tags or [],
666
+ "metadata": metadata,
667
+ "totalDataPoints": 0,
668
+ "nextIndex": 0,
669
+ "createdAt": datetime.utcnow().isoformat() + "Z"
670
+ }
671
+
672
+ # Get next index
673
+ index = metric_meta["nextIndex"]
674
+
675
+ # Append data point to JSONL file
676
+ data_entry = {
677
+ "index": index,
678
+ "data": data,
679
+ "createdAt": datetime.utcnow().isoformat() + "Z"
680
+ }
681
+
682
+ with open(data_file, "a") as f:
683
+ f.write(json.dumps(data_entry) + "\n")
684
+
685
+ # Update metadata
686
+ metric_meta["nextIndex"] = index + 1
687
+ metric_meta["totalDataPoints"] = metric_meta["totalDataPoints"] + 1
688
+ metric_meta["updatedAt"] = datetime.utcnow().isoformat() + "Z"
689
+
690
+ with open(metadata_file, "w") as f:
691
+ json.dump(metric_meta, f, indent=2)
692
+
693
+ return {
694
+ "metricId": metric_meta["metricId"],
695
+ "index": str(index),
696
+ "bufferedDataPoints": str(metric_meta["totalDataPoints"]),
697
+ "chunkSize": 10000 # Default chunk size for local mode
698
+ }
699
+
700
+ def append_batch_to_metric(
701
+ self,
702
+ project: str,
703
+ experiment: str,
704
+ metric_name: str,
705
+ data_points: List[Dict[str, Any]],
706
+ description: Optional[str] = None,
707
+ tags: Optional[List[str]] = None,
708
+ metadata: Optional[Dict[str, Any]] = None
709
+ ) -> Dict[str, Any]:
710
+ """
711
+ Append multiple data points to a metric in local storage (batch).
712
+
713
+ Args:
714
+ project: Project name
715
+ experiment: Experiment name
716
+ metric_name: Metric name
717
+ data_points: List of data points
718
+ description: Optional metric description
719
+ tags: Optional tags
720
+ metadata: Optional metric metadata
721
+
722
+ Returns:
723
+ Dict with metricId, startIndex, endIndex, count
724
+ """
725
+ experiment_dir = self._get_experiment_dir(project, experiment)
726
+ metrics_dir = experiment_dir / "metrics"
727
+ metrics_dir.mkdir(parents=True, exist_ok=True)
728
+
729
+ metric_dir = metrics_dir / metric_name
730
+ metric_dir.mkdir(exist_ok=True)
731
+
732
+ data_file = metric_dir / "data.jsonl"
733
+ metadata_file = metric_dir / "metadata.json"
734
+
735
+ # Load or initialize metadata
736
+ if metadata_file.exists():
737
+ with open(metadata_file, "r") as f:
738
+ metric_meta = json.load(f)
739
+ else:
740
+ metric_meta = {
741
+ "metricId": f"local-metric-{metric_name}",
742
+ "name": metric_name,
743
+ "description": description,
744
+ "tags": tags or [],
745
+ "metadata": metadata,
746
+ "totalDataPoints": 0,
747
+ "nextIndex": 0,
748
+ "createdAt": datetime.utcnow().isoformat() + "Z"
749
+ }
750
+
751
+ start_index = metric_meta["nextIndex"]
752
+ end_index = start_index + len(data_points) - 1
753
+
754
+ # Append data points to JSONL file
755
+ with open(data_file, "a") as f:
756
+ for i, data in enumerate(data_points):
757
+ data_entry = {
758
+ "index": start_index + i,
759
+ "data": data,
760
+ "createdAt": datetime.utcnow().isoformat() + "Z"
761
+ }
762
+ f.write(json.dumps(data_entry) + "\n")
763
+
764
+ # Update metadata
765
+ metric_meta["nextIndex"] = end_index + 1
766
+ metric_meta["totalDataPoints"] = metric_meta["totalDataPoints"] + len(data_points)
767
+ metric_meta["updatedAt"] = datetime.utcnow().isoformat() + "Z"
768
+
769
+ with open(metadata_file, "w") as f:
770
+ json.dump(metric_meta, f, indent=2)
771
+
772
+ return {
773
+ "metricId": metric_meta["metricId"],
774
+ "startIndex": str(start_index),
775
+ "endIndex": str(end_index),
776
+ "count": len(data_points),
777
+ "bufferedDataPoints": str(metric_meta["totalDataPoints"]),
778
+ "chunkSize": 10000
779
+ }
780
+
781
+ def read_metric_data(
782
+ self,
783
+ project: str,
784
+ experiment: str,
785
+ metric_name: str,
786
+ start_index: int = 0,
787
+ limit: int = 1000
788
+ ) -> Dict[str, Any]:
789
+ """
790
+ Read data points from a metric in local storage.
791
+
792
+ Args:
793
+ project: Project name
794
+ experiment: Experiment name
795
+ metric_name: Metric name
796
+ start_index: Starting index
797
+ limit: Max points to read
798
+
799
+ Returns:
800
+ Dict with data, startIndex, endIndex, total, hasMore
801
+ """
802
+ experiment_dir = self._get_experiment_dir(project, experiment)
803
+ metric_dir = experiment_dir / "metrics" / metric_name
804
+ data_file = metric_dir / "data.jsonl"
805
+
806
+ if not data_file.exists():
807
+ return {
808
+ "data": [],
809
+ "startIndex": start_index,
810
+ "endIndex": start_index - 1,
811
+ "total": 0,
812
+ "hasMore": False
813
+ }
814
+
815
+ # Read all data points from JSONL file
816
+ data_points = []
817
+ with open(data_file, "r") as f:
818
+ for line in f:
819
+ if line.strip():
820
+ entry = json.loads(line)
821
+ # Filter by index range
822
+ if start_index <= entry["index"] < start_index + limit:
823
+ data_points.append(entry)
824
+
825
+ # Get total count
826
+ metadata_file = metric_dir / "metadata.json"
827
+ total_count = 0
828
+ if metadata_file.exists():
829
+ with open(metadata_file, "r") as f:
830
+ metric_meta = json.load(f)
831
+ total_count = metric_meta["totalDataPoints"]
832
+
833
+ return {
834
+ "data": data_points,
835
+ "startIndex": start_index,
836
+ "endIndex": start_index + len(data_points) - 1 if data_points else start_index - 1,
837
+ "total": len(data_points),
838
+ "hasMore": start_index + len(data_points) < total_count
839
+ }
840
+
841
+ def get_metric_stats(
842
+ self,
843
+ project: str,
844
+ experiment: str,
845
+ metric_name: str
846
+ ) -> Dict[str, Any]:
847
+ """
848
+ Get metric statistics from local storage.
849
+
850
+ Args:
851
+ project: Project name
852
+ experiment: Experiment name
853
+ metric_name: Metric name
854
+
855
+ Returns:
856
+ Dict with metric stats
857
+ """
858
+ experiment_dir = self._get_experiment_dir(project, experiment)
859
+ metric_dir = experiment_dir / "metrics" / metric_name
860
+ metadata_file = metric_dir / "metadata.json"
861
+
862
+ if not metadata_file.exists():
863
+ raise FileNotFoundError(f"Metric {metric_name} not found")
864
+
865
+ with open(metadata_file, "r") as f:
866
+ metric_meta = json.load(f)
867
+
868
+ return {
869
+ "metricId": metric_meta["metricId"],
870
+ "name": metric_meta["name"],
871
+ "description": metric_meta.get("description"),
872
+ "tags": metric_meta.get("tags", []),
873
+ "metadata": metric_meta.get("metadata"),
874
+ "totalDataPoints": str(metric_meta["totalDataPoints"]),
875
+ "bufferedDataPoints": str(metric_meta["totalDataPoints"]), # All buffered in local mode
876
+ "chunkedDataPoints": "0", # No chunking in local mode
877
+ "totalChunks": 0,
878
+ "chunkSize": 10000,
879
+ "firstDataAt": metric_meta.get("createdAt"),
880
+ "lastDataAt": metric_meta.get("updatedAt"),
881
+ "createdAt": metric_meta.get("createdAt"),
882
+ "updatedAt": metric_meta.get("updatedAt", metric_meta.get("createdAt"))
883
+ }
884
+
885
+ def list_metrics(
886
+ self,
887
+ project: str,
888
+ experiment: str
889
+ ) -> List[Dict[str, Any]]:
890
+ """
891
+ List all metrics in an experiment from local storage.
892
+
893
+ Args:
894
+ project: Project name
895
+ experiment: Experiment name
896
+
897
+ Returns:
898
+ List of metric summaries
899
+ """
900
+ experiment_dir = self._get_experiment_dir(project, experiment)
901
+ metrics_dir = experiment_dir / "metrics"
902
+
903
+ if not metrics_dir.exists():
904
+ return []
905
+
906
+ metrics = []
907
+ for metric_dir in metrics_dir.iterdir():
908
+ if metric_dir.is_dir():
909
+ metadata_file = metric_dir / "metadata.json"
910
+ if metadata_file.exists():
911
+ with open(metadata_file, "r") as f:
912
+ metric_meta = json.load(f)
913
+ metrics.append({
914
+ "metricId": metric_meta["metricId"],
915
+ "name": metric_meta["name"],
916
+ "description": metric_meta.get("description"),
917
+ "tags": metric_meta.get("tags", []),
918
+ "totalDataPoints": str(metric_meta["totalDataPoints"]),
919
+ "createdAt": metric_meta.get("createdAt")
920
+ })
921
+
922
+ return metrics