ml-dash 0.6.6__py3-none-any.whl → 0.6.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ml_dash/storage.py CHANGED
@@ -1095,3 +1095,406 @@ class LocalStorage:
1095
1095
  )
1096
1096
 
1097
1097
  return metrics
1098
+
1099
+ # ============================================================================
1100
+ # Track Storage Methods
1101
+ # ============================================================================
1102
+
1103
+ def _serialize_value(self, value: Any) -> Any:
1104
+ """
1105
+ Convert value to JSON-serializable format.
1106
+
1107
+ Handles numpy arrays, nested dicts/lists, etc.
1108
+
1109
+ Args:
1110
+ value: Value to serialize
1111
+
1112
+ Returns:
1113
+ JSON-serializable value
1114
+ """
1115
+ # Check for numpy array
1116
+ if hasattr(value, '__array__') or (hasattr(value, 'tolist') and hasattr(value, 'dtype')):
1117
+ # It's a numpy array
1118
+ try:
1119
+ return value.tolist()
1120
+ except AttributeError:
1121
+ pass
1122
+
1123
+ # Check for numpy scalar types
1124
+ if hasattr(value, 'item'):
1125
+ try:
1126
+ return value.item()
1127
+ except (AttributeError, ValueError):
1128
+ pass
1129
+
1130
+ # Recursively handle dicts
1131
+ if isinstance(value, dict):
1132
+ return {k: self._serialize_value(v) for k, v in value.items()}
1133
+
1134
+ # Recursively handle lists
1135
+ if isinstance(value, (list, tuple)):
1136
+ return [self._serialize_value(v) for v in value]
1137
+
1138
+ # Return as-is for other types (int, float, str, bool, None)
1139
+ return value
1140
+
1141
+ def _flatten_dict(self, obj: Any, prefix: str = '') -> Dict[str, Any]:
1142
+ """
1143
+ Flatten nested dict with dot notation (e.g., camera.pos).
1144
+
1145
+ Args:
1146
+ obj: Object to flatten
1147
+ prefix: Current key prefix
1148
+
1149
+ Returns:
1150
+ Flattened dict
1151
+ """
1152
+ result = {}
1153
+
1154
+ if not isinstance(obj, dict):
1155
+ # Serialize the value before returning
1156
+ serialized = self._serialize_value(obj)
1157
+ return {prefix: serialized} if prefix else serialized
1158
+
1159
+ for key, value in obj.items():
1160
+ new_key = f"{prefix}.{key}" if prefix else key
1161
+
1162
+ if isinstance(value, dict):
1163
+ result.update(self._flatten_dict(value, new_key))
1164
+ else:
1165
+ # Serialize the value
1166
+ result[new_key] = self._serialize_value(value)
1167
+
1168
+ return result
1169
+
1170
+ def append_batch_to_track(
1171
+ self,
1172
+ owner: str,
1173
+ project: str,
1174
+ prefix: str,
1175
+ topic: str,
1176
+ entries: List[Dict[str, Any]],
1177
+ ) -> Dict[str, Any]:
1178
+ """
1179
+ Append batch of timestamped entries to a track in local storage.
1180
+
1181
+ Storage format:
1182
+ .dash/{owner}/{project}/{prefix}/tracks/{topic_safe}/
1183
+ data.jsonl # Timestamped entries (one JSON object per line)
1184
+ metadata.json # Track metadata (topic, columns, stats)
1185
+
1186
+ Args:
1187
+ owner: Owner/user
1188
+ project: Project name
1189
+ prefix: Experiment prefix
1190
+ topic: Track topic (e.g., "robot/position")
1191
+ entries: List of entries with timestamp and data fields
1192
+
1193
+ Returns:
1194
+ Dict with trackId, count
1195
+ """
1196
+ experiment_dir = self._get_experiment_dir(owner, project, prefix)
1197
+ tracks_dir = experiment_dir / "tracks"
1198
+ tracks_dir.mkdir(parents=True, exist_ok=True)
1199
+
1200
+ # Sanitize topic for directory name (replace / with _)
1201
+ topic_safe = topic.replace("/", "_")
1202
+ track_dir = tracks_dir / topic_safe
1203
+ track_dir.mkdir(exist_ok=True)
1204
+
1205
+ data_file = track_dir / "data.jsonl"
1206
+ metadata_file = track_dir / "metadata.json"
1207
+
1208
+ # File-based lock for concurrent writes
1209
+ lock_file = track_dir / ".metadata.lock"
1210
+ with self._file_lock(lock_file):
1211
+ # Load or initialize metadata
1212
+ if metadata_file.exists():
1213
+ try:
1214
+ with open(metadata_file, "r") as f:
1215
+ track_meta = json.load(f)
1216
+ except (json.JSONDecodeError, IOError):
1217
+ # Corrupted metadata, reinitialize
1218
+ track_meta = {
1219
+ "trackId": f"local-track-{topic_safe}",
1220
+ "topic": topic,
1221
+ "columns": [],
1222
+ "totalEntries": 0,
1223
+ "firstTimestamp": None,
1224
+ "lastTimestamp": None,
1225
+ "createdAt": datetime.utcnow().isoformat() + "Z",
1226
+ }
1227
+ else:
1228
+ track_meta = {
1229
+ "trackId": f"local-track-{topic_safe}",
1230
+ "topic": topic,
1231
+ "columns": [],
1232
+ "totalEntries": 0,
1233
+ "firstTimestamp": None,
1234
+ "lastTimestamp": None,
1235
+ "createdAt": datetime.utcnow().isoformat() + "Z",
1236
+ }
1237
+
1238
+ # Process entries and update metadata
1239
+ all_columns = set(track_meta["columns"])
1240
+ min_ts = track_meta["firstTimestamp"]
1241
+ max_ts = track_meta["lastTimestamp"]
1242
+
1243
+ processed_entries = []
1244
+ for entry in entries:
1245
+ timestamp = entry.get("timestamp")
1246
+ if timestamp is None:
1247
+ continue
1248
+
1249
+ # Extract data fields (everything except timestamp)
1250
+ data_fields = {k: v for k, v in entry.items() if k != "timestamp"}
1251
+
1252
+ # Flatten nested structures
1253
+ flattened = self._flatten_dict(data_fields)
1254
+
1255
+ # Update column set
1256
+ all_columns.update(flattened.keys())
1257
+
1258
+ # Update timestamp range
1259
+ if min_ts is None or timestamp < min_ts:
1260
+ min_ts = timestamp
1261
+ if max_ts is None or timestamp > max_ts:
1262
+ max_ts = timestamp
1263
+
1264
+ processed_entries.append({
1265
+ "timestamp": timestamp,
1266
+ **flattened
1267
+ })
1268
+
1269
+ # Append entries to JSONL file (sorted by timestamp for consistency)
1270
+ processed_entries.sort(key=lambda x: x["timestamp"])
1271
+ with open(data_file, "a") as f:
1272
+ for entry in processed_entries:
1273
+ f.write(json.dumps(entry) + "\n")
1274
+
1275
+ # Update metadata
1276
+ track_meta["columns"] = sorted(list(all_columns))
1277
+ track_meta["totalEntries"] += len(processed_entries)
1278
+ track_meta["firstTimestamp"] = min_ts
1279
+ track_meta["lastTimestamp"] = max_ts
1280
+
1281
+ # Write metadata
1282
+ with open(metadata_file, "w") as f:
1283
+ json.dump(track_meta, f, indent=2)
1284
+
1285
+ return {
1286
+ "trackId": track_meta["trackId"],
1287
+ "count": len(processed_entries),
1288
+ }
1289
+
1290
+ def read_track_data(
1291
+ self,
1292
+ owner: str,
1293
+ project: str,
1294
+ prefix: str,
1295
+ topic: str,
1296
+ start_timestamp: Optional[float] = None,
1297
+ end_timestamp: Optional[float] = None,
1298
+ columns: Optional[List[str]] = None,
1299
+ format: str = "json",
1300
+ ) -> Any:
1301
+ """
1302
+ Read track data from local storage with optional filtering.
1303
+
1304
+ Args:
1305
+ owner: Owner/user
1306
+ project: Project name
1307
+ prefix: Experiment prefix
1308
+ topic: Track topic
1309
+ start_timestamp: Optional start timestamp filter
1310
+ end_timestamp: Optional end timestamp filter
1311
+ columns: Optional list of columns to retrieve
1312
+ format: Export format ('json', 'jsonl', 'parquet', 'mocap')
1313
+
1314
+ Returns:
1315
+ Track data in requested format
1316
+ """
1317
+ experiment_dir = self._get_experiment_dir(owner, project, prefix)
1318
+ topic_safe = topic.replace("/", "_")
1319
+ track_dir = experiment_dir / "tracks" / topic_safe
1320
+ data_file = track_dir / "data.jsonl"
1321
+
1322
+ if not data_file.exists():
1323
+ if format == "json":
1324
+ return {"entries": [], "count": 0}
1325
+ elif format == "jsonl":
1326
+ return b""
1327
+ elif format == "parquet":
1328
+ # Return empty parquet file
1329
+ import pyarrow as pa
1330
+ import pyarrow.parquet as pq
1331
+ import io
1332
+ table = pa.table({"timestamp": []})
1333
+ buf = io.BytesIO()
1334
+ pq.write_table(table, buf)
1335
+ return buf.getvalue()
1336
+ elif format == "mocap":
1337
+ return {
1338
+ "version": "1.0",
1339
+ "metadata": {"topic": topic, "frameCount": 0, "duration": 0},
1340
+ "channels": [],
1341
+ "frames": []
1342
+ }
1343
+
1344
+ # Read all entries from JSONL file
1345
+ entries = []
1346
+ with open(data_file, "r") as f:
1347
+ for line in f:
1348
+ if line.strip():
1349
+ entry = json.loads(line)
1350
+
1351
+ # Filter by timestamp range
1352
+ timestamp = entry.get("timestamp")
1353
+ if start_timestamp is not None and timestamp < start_timestamp:
1354
+ continue
1355
+ if end_timestamp is not None and timestamp > end_timestamp:
1356
+ continue
1357
+
1358
+ # Filter by columns
1359
+ if columns:
1360
+ filtered_entry = {"timestamp": timestamp}
1361
+ for col in columns:
1362
+ if col in entry:
1363
+ filtered_entry[col] = entry[col]
1364
+ entries.append(filtered_entry)
1365
+ else:
1366
+ entries.append(entry)
1367
+
1368
+ # Return in requested format
1369
+ if format == "json":
1370
+ return {"entries": entries, "count": len(entries)}
1371
+
1372
+ elif format == "jsonl":
1373
+ lines = [json.dumps(entry) for entry in entries]
1374
+ return "\n".join(lines).encode('utf-8')
1375
+
1376
+ elif format == "parquet":
1377
+ # Convert to Apache Parquet
1378
+ import pyarrow as pa
1379
+ import pyarrow.parquet as pq
1380
+ import io
1381
+
1382
+ if not entries:
1383
+ table = pa.table({"timestamp": []})
1384
+ else:
1385
+ # Build schema from entries
1386
+ table = pa.Table.from_pylist(entries)
1387
+
1388
+ buf = io.BytesIO()
1389
+ pq.write_table(table, buf, compression='zstd')
1390
+ return buf.getvalue()
1391
+
1392
+ elif format == "mocap":
1393
+ # Read metadata
1394
+ metadata_file = track_dir / "metadata.json"
1395
+ track_meta = {}
1396
+ if metadata_file.exists():
1397
+ with open(metadata_file, "r") as f:
1398
+ track_meta = json.load(f)
1399
+
1400
+ # Build mocap format
1401
+ if not entries:
1402
+ return {
1403
+ "version": "1.0",
1404
+ "metadata": {
1405
+ "topic": topic,
1406
+ "frameCount": 0,
1407
+ "duration": 0,
1408
+ "startTime": 0,
1409
+ "endTime": 0,
1410
+ },
1411
+ "channels": [],
1412
+ "frames": []
1413
+ }
1414
+
1415
+ first_ts = entries[0]["timestamp"]
1416
+ last_ts = entries[-1]["timestamp"]
1417
+ duration = last_ts - first_ts
1418
+ fps = track_meta.get("metadata", {}).get("fps", 30) if isinstance(track_meta.get("metadata"), dict) else 30
1419
+
1420
+ # Get all channels (columns)
1421
+ all_channels = set()
1422
+ for entry in entries:
1423
+ all_channels.update(k for k in entry.keys() if k != "timestamp")
1424
+
1425
+ return {
1426
+ "version": "1.0",
1427
+ "metadata": {
1428
+ "topic": topic,
1429
+ "description": track_meta.get("description"),
1430
+ "tags": track_meta.get("tags", []),
1431
+ "fps": fps,
1432
+ "duration": duration,
1433
+ "frameCount": len(entries),
1434
+ "startTime": first_ts,
1435
+ "endTime": last_ts,
1436
+ },
1437
+ "channels": sorted(list(all_channels)),
1438
+ "frames": [{"time": e["timestamp"], **{k: v for k, v in e.items() if k != "timestamp"}} for e in entries]
1439
+ }
1440
+
1441
+ else:
1442
+ raise ValueError(f"Unsupported format: {format}")
1443
+
1444
+ def list_tracks(
1445
+ self,
1446
+ owner: str,
1447
+ project: str,
1448
+ prefix: str,
1449
+ topic_filter: Optional[str] = None,
1450
+ ) -> List[Dict[str, Any]]:
1451
+ """
1452
+ List all tracks in an experiment.
1453
+
1454
+ Args:
1455
+ owner: Owner/user
1456
+ project: Project name
1457
+ prefix: Experiment prefix
1458
+ topic_filter: Optional topic filter (e.g., "robot/*")
1459
+
1460
+ Returns:
1461
+ List of track summaries
1462
+ """
1463
+ experiment_dir = self._get_experiment_dir(owner, project, prefix)
1464
+ tracks_dir = experiment_dir / "tracks"
1465
+
1466
+ if not tracks_dir.exists():
1467
+ return []
1468
+
1469
+ tracks = []
1470
+ for track_dir in tracks_dir.iterdir():
1471
+ if track_dir.is_dir():
1472
+ metadata_file = track_dir / "metadata.json"
1473
+ if metadata_file.exists():
1474
+ with open(metadata_file, "r") as f:
1475
+ track_meta = json.load(f)
1476
+
1477
+ topic = track_meta["topic"]
1478
+
1479
+ # Apply topic filter
1480
+ if topic_filter:
1481
+ if topic_filter.endswith("/*"):
1482
+ # Prefix match
1483
+ prefix_match = topic_filter[:-2]
1484
+ if not topic.startswith(prefix_match):
1485
+ continue
1486
+ elif topic != topic_filter:
1487
+ # Exact match
1488
+ continue
1489
+
1490
+ tracks.append({
1491
+ "id": track_meta["trackId"],
1492
+ "topic": topic,
1493
+ "totalEntries": track_meta["totalEntries"],
1494
+ "firstTimestamp": track_meta.get("firstTimestamp"),
1495
+ "lastTimestamp": track_meta.get("lastTimestamp"),
1496
+ "columns": track_meta.get("columns", []),
1497
+ "createdAt": track_meta.get("createdAt"),
1498
+ })
1499
+
1500
+ return tracks
ml_dash/track.py ADDED
@@ -0,0 +1,263 @@
1
+ """
2
+ Track API - Timestamped multi-modal data logging for ML experiments.
3
+
4
+ Tracks are used for storing sparse timestamped data like robot trajectories,
5
+ camera poses, sensor readings, etc. Each track has a topic (e.g., "robot/position")
6
+ and stores entries with timestamps and arbitrary data fields.
7
+ """
8
+
9
+ from typing import Dict, Any, List, Optional, TYPE_CHECKING
10
+ from collections import defaultdict
11
+
12
+ if TYPE_CHECKING:
13
+ from .experiment import Experiment
14
+
15
+
16
+ class TracksManager:
17
+ """
18
+ Manager for track operations with support for global and per-topic flush.
19
+
20
+ Usage:
21
+ # Append to specific topic
22
+ experiment.tracks("robot/position").append(q=[0.1, 0.2], _ts=1.0)
23
+
24
+ # Flush all topics
25
+ experiment.tracks.flush()
26
+
27
+ # Flush specific topic
28
+ experiment.tracks("robot/position").flush()
29
+ """
30
+
31
+ def __init__(self, experiment: 'Experiment'):
32
+ """
33
+ Initialize TracksManager.
34
+
35
+ Args:
36
+ experiment: Parent Experiment instance
37
+ """
38
+ self._experiment = experiment
39
+ self._track_builders: Dict[str, 'TrackBuilder'] = {} # Cache for TrackBuilder instances
40
+
41
+ def __call__(self, topic: str) -> 'TrackBuilder':
42
+ """
43
+ Get TrackBuilder for a specific topic.
44
+
45
+ Args:
46
+ topic: Track topic (e.g., "robot/position", "camera/rgb")
47
+
48
+ Returns:
49
+ TrackBuilder instance for the topic
50
+
51
+ Example:
52
+ experiment.tracks("robot/position").append(x=1.0, y=2.0, _ts=0.5)
53
+ """
54
+ if topic not in self._track_builders:
55
+ self._track_builders[topic] = TrackBuilder(self._experiment, topic, tracks_manager=self)
56
+
57
+ return self._track_builders[topic]
58
+
59
+ def flush(self) -> None:
60
+ """
61
+ Flush all topics to storage (remote or local).
62
+
63
+ This will write all buffered track entries to the server/filesystem.
64
+
65
+ Example:
66
+ experiment.tracks.flush()
67
+ """
68
+ # Flush all topics via background buffer manager
69
+ if self._experiment._buffer_manager:
70
+ self._experiment._buffer_manager.flush_tracks()
71
+
72
+
73
+ class TrackBuilder:
74
+ """
75
+ Builder for track operations.
76
+
77
+ Provides fluent API for appending timestamped data to tracks.
78
+
79
+ Usage:
80
+ # Append single entry
81
+ experiment.tracks("robot/position").append(q=[0.1, 0.2], e=[0.5, 0.6], _ts=1.0)
82
+
83
+ # Flush specific topic
84
+ experiment.tracks("robot/position").flush()
85
+ """
86
+
87
+ def __init__(
88
+ self,
89
+ experiment: 'Experiment',
90
+ topic: str,
91
+ description: Optional[str] = None,
92
+ tags: Optional[List[str]] = None,
93
+ metadata: Optional[Dict[str, Any]] = None,
94
+ tracks_manager: Optional['TracksManager'] = None
95
+ ):
96
+ """
97
+ Initialize TrackBuilder.
98
+
99
+ Args:
100
+ experiment: Parent Experiment instance
101
+ topic: Track topic (e.g., "robot/position")
102
+ description: Optional track description
103
+ tags: Optional tags for categorization
104
+ metadata: Optional structured metadata (fps, units, etc.)
105
+ tracks_manager: Parent TracksManager (for global flush)
106
+ """
107
+ self._experiment = experiment
108
+ self._topic = topic
109
+ self._description = description
110
+ self._tags = tags
111
+ self._metadata = metadata
112
+ self._tracks_manager = tracks_manager
113
+
114
+ def append(self, **kwargs) -> 'TrackBuilder':
115
+ """
116
+ Append a single timestamped entry to the track.
117
+
118
+ The _ts parameter is required for the timestamp. All other kwargs are data fields.
119
+
120
+ Entries with the same _ts will be merged when flushed.
121
+
122
+ Args:
123
+ _ts: Timestamp (required)
124
+ **kwargs: Data fields (e.g., q=[0.1, 0.2], e=[0.5, 0.6])
125
+
126
+ Returns:
127
+ Self for method chaining
128
+
129
+ Raises:
130
+ ValueError: If _ts is not provided
131
+
132
+ Example:
133
+ experiment.tracks("robot/position").append(
134
+ q=[0.1, -0.22, 0.45],
135
+ e=[0.5, 0.0, 0.6],
136
+ a=[1.0, 0.0],
137
+ v=[0.01, 0.02],
138
+ _ts=2.0
139
+ )
140
+ """
141
+ # Extract timestamp
142
+ if '_ts' not in kwargs:
143
+ raise ValueError("Timestamp '_ts' is required for track.append()")
144
+
145
+ timestamp = kwargs.pop('_ts')
146
+
147
+ # Validate timestamp
148
+ try:
149
+ timestamp = float(timestamp)
150
+ except (TypeError, ValueError):
151
+ raise ValueError(f"Timestamp '_ts' must be numeric, got: {type(timestamp)}")
152
+
153
+ # Remaining kwargs are data fields
154
+ data = kwargs
155
+
156
+ # Write to experiment (will be buffered)
157
+ self._experiment._write_track(self._topic, timestamp, data)
158
+
159
+ return self
160
+
161
+ def flush(self) -> 'TrackBuilder':
162
+ """
163
+ Flush this topic's buffered entries to storage.
164
+
165
+ Example:
166
+ experiment.tracks("robot/position").flush()
167
+
168
+ Returns:
169
+ Self for method chaining
170
+ """
171
+ if self._experiment._buffer_manager:
172
+ self._experiment._buffer_manager.flush_track(self._topic)
173
+
174
+ return self
175
+
176
+ def read(
177
+ self,
178
+ start_timestamp: Optional[float] = None,
179
+ end_timestamp: Optional[float] = None,
180
+ columns: Optional[List[str]] = None,
181
+ format: str = "json"
182
+ ) -> Any:
183
+ """
184
+ Read track data with optional filtering.
185
+
186
+ Args:
187
+ start_timestamp: Optional start timestamp filter
188
+ end_timestamp: Optional end timestamp filter
189
+ columns: Optional list of columns to retrieve
190
+ format: Export format ('json', 'jsonl', 'parquet', 'mocap')
191
+
192
+ Returns:
193
+ Track data in requested format
194
+
195
+ Raises:
196
+ ValueError: If experiment not opened or no client configured
197
+
198
+ Example:
199
+ # Get all data as JSON
200
+ data = experiment.tracks("robot/position").read()
201
+
202
+ # Get data in time range
203
+ data = experiment.tracks("robot/position").read(
204
+ start_timestamp=0.0,
205
+ end_timestamp=10.0
206
+ )
207
+
208
+ # Export as JSONL
209
+ jsonl_bytes = experiment.tracks("robot/position").read(format="jsonl")
210
+
211
+ # Export as Parquet
212
+ parquet_bytes = experiment.tracks("robot/position").read(format="parquet")
213
+
214
+ # Export as Mocap JSON
215
+ mocap_data = experiment.tracks("robot/position").read(format="mocap")
216
+ """
217
+ # Remote mode
218
+ if self._experiment.run._client:
219
+ # Need experiment ID for remote mode
220
+ if not self._experiment._experiment_id:
221
+ raise ValueError("Experiment must be opened before reading tracks. Use 'with experiment.run:'")
222
+
223
+ return self._experiment.run._client.get_track_data(
224
+ experiment_id=self._experiment._experiment_id,
225
+ topic=self._topic,
226
+ start_timestamp=start_timestamp,
227
+ end_timestamp=end_timestamp,
228
+ columns=columns,
229
+ format=format
230
+ )
231
+
232
+ # Local mode
233
+ if self._experiment.run._storage:
234
+ return self._experiment.run._storage.read_track_data(
235
+ owner=self._experiment.run.owner,
236
+ project=self._experiment.run.project,
237
+ prefix=self._experiment.run._folder_path,
238
+ topic=self._topic,
239
+ start_timestamp=start_timestamp,
240
+ end_timestamp=end_timestamp,
241
+ columns=columns,
242
+ format=format
243
+ )
244
+
245
+ raise ValueError("No client or storage configured for experiment")
246
+
247
+ def list_entries(self) -> List[Dict[str, Any]]:
248
+ """
249
+ List all entries in this track (for remote mode).
250
+
251
+ Returns:
252
+ List of entry dicts
253
+
254
+ Example:
255
+ entries = experiment.tracks("robot/position").list_entries()
256
+ """
257
+ # Just read with default JSON format
258
+ result = self.read(format="json")
259
+
260
+ if isinstance(result, dict) and "entries" in result:
261
+ return result["entries"]
262
+
263
+ return []