ml-dash 0.6.5__py3-none-any.whl → 0.6.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -148,6 +148,18 @@ def add_parser(subparsers) -> argparse.ArgumentParser:
148
148
  help="JWT token for authentication (optional - auto-loads from 'ml-dash login' if not provided)",
149
149
  )
150
150
 
151
+ # Track upload mode
152
+ parser.add_argument(
153
+ "--tracks",
154
+ type=str,
155
+ help="Upload track data file (e.g., robot_position.jsonl). Requires --remote-path.",
156
+ )
157
+ parser.add_argument(
158
+ "--remote-path",
159
+ type=str,
160
+ help="Remote path for track (e.g., 'namespace/project/exp/robot/position')",
161
+ )
162
+
151
163
  """
152
164
 
153
165
  cd .dash/geyang
@@ -265,7 +277,11 @@ def discover_experiments(
265
277
 
266
278
  Args:
267
279
  local_path: Root path of local storage
268
- project_filter: Glob pattern to filter experiments by prefix (e.g., "tom/*/exp*")
280
+ project_filter: Either a simple project name (e.g., "proj1") or a glob
281
+ pattern for the full path (e.g., "tom/*/exp*"). If the
282
+ filter contains '/', '*', or '?', it's treated as a glob
283
+ pattern matched against the full relative path. Otherwise,
284
+ it's matched exactly against the project name.
269
285
  experiment_filter: Only discover this experiment (requires project_filter)
270
286
 
271
287
  Returns:
@@ -319,9 +335,18 @@ def discover_experiments(
319
335
 
320
336
  # Apply filters with glob pattern support
321
337
  if project_filter:
322
- # Support glob pattern matching on the full relative path
323
- if not fnmatch.fnmatch(full_relative_path, project_filter):
324
- continue
338
+ # Check if project_filter is a glob pattern or simple project name
339
+ is_glob_pattern = any(c in project_filter for c in ['*', '?', '/'])
340
+
341
+ if is_glob_pattern:
342
+ # Treat as glob pattern - match against full relative path
343
+ if not fnmatch.fnmatch(full_relative_path, project_filter):
344
+ continue
345
+ else:
346
+ # Treat as simple project name - match against parsed project
347
+ if project_name != project_filter:
348
+ continue
349
+
325
350
  if experiment_filter and exp_name != experiment_filter:
326
351
  continue
327
352
 
@@ -1066,6 +1091,121 @@ class ExperimentUploader:
1066
1091
  return total_uploaded
1067
1092
 
1068
1093
 
1094
+ def cmd_upload_track(args: argparse.Namespace) -> int:
1095
+ """Upload track data file to remote server."""
1096
+ from datetime import datetime
1097
+
1098
+ # Load config
1099
+ config = Config()
1100
+ remote_url = args.dash_url or config.remote_url
1101
+ api_key = args.api_key or config.api_key
1102
+
1103
+ if not remote_url:
1104
+ console.print("[red]Error:[/red] --dash-url is required (or set in config)")
1105
+ return 1
1106
+
1107
+ if not args.tracks or not args.remote_path:
1108
+ console.print("[red]Error:[/red] Both --tracks and --remote-path are required for track upload")
1109
+ console.print("Usage: ml-dash upload --tracks <local-file> --remote-path namespace/project/exp/topic")
1110
+ return 1
1111
+
1112
+ # Parse local file path
1113
+ local_file = Path(args.tracks)
1114
+ if not local_file.exists():
1115
+ console.print(f"[red]Error:[/red] File not found: {local_file}")
1116
+ return 1
1117
+
1118
+ # Parse remote path: namespace/project/.../experiment/topic
1119
+ remote_path = args.remote_path.strip("/")
1120
+ parts = remote_path.split("/")
1121
+
1122
+ if len(parts) < 4:
1123
+ console.print(
1124
+ "[red]Error:[/red] Remote path must be: 'namespace/project/experiment/topic'"
1125
+ )
1126
+ console.print("Example: geyang/project/exp1/robot/position")
1127
+ return 1
1128
+
1129
+ namespace = parts[0]
1130
+ project = parts[1]
1131
+ experiment_name = parts[-2] # Second to last is experiment
1132
+ topic = "/".join(parts[-1:]) # Last part is topic (could be multi-level)
1133
+
1134
+ console.print(f"[bold]Uploading track data...[/bold]")
1135
+ console.print(f" Local file: {local_file}")
1136
+ console.print(f" Namespace: {namespace}")
1137
+ console.print(f" Project: {project}")
1138
+ console.print(f" Experiment: {experiment_name}")
1139
+ console.print(f" Topic: {topic}")
1140
+
1141
+ try:
1142
+ # Initialize remote client
1143
+ remote_client = RemoteClient(base_url=remote_url, namespace=namespace, api_key=api_key)
1144
+
1145
+ # Get experiment ID
1146
+ exp_data = remote_client.get_experiment_graphql(project, experiment_name)
1147
+ if not exp_data:
1148
+ console.print(
1149
+ f"[red]Error:[/red] Experiment '{experiment_name}' not found in project '{project}'"
1150
+ )
1151
+ return 1
1152
+
1153
+ experiment_id = exp_data["id"]
1154
+
1155
+ # Read local file (assume JSONL format)
1156
+ console.print(f"\n[cyan]Reading local file...[/cyan]")
1157
+ entries = []
1158
+
1159
+ with open(local_file, 'r') as f:
1160
+ for line_num, line in enumerate(f, 1):
1161
+ if line.strip():
1162
+ try:
1163
+ entry = json.loads(line)
1164
+ if "timestamp" not in entry:
1165
+ console.print(f"[yellow]Warning:[/yellow] Line {line_num} missing timestamp, skipping")
1166
+ continue
1167
+ entries.append(entry)
1168
+ except json.JSONDecodeError as e:
1169
+ console.print(f"[yellow]Warning:[/yellow] Line {line_num} invalid JSON: {e}")
1170
+ continue
1171
+
1172
+ if not entries:
1173
+ console.print("[red]Error:[/red] No valid entries found in file")
1174
+ return 1
1175
+
1176
+ console.print(f" Found {len(entries)} entries")
1177
+
1178
+ # Upload in batches
1179
+ console.print(f"\n[cyan]Uploading to server...[/cyan]")
1180
+ batch_size = 1000
1181
+ total_uploaded = 0
1182
+
1183
+ for i in range(0, len(entries), batch_size):
1184
+ batch = entries[i:i + batch_size]
1185
+ remote_client.append_batch_to_track(
1186
+ experiment_id=experiment_id,
1187
+ topic=topic,
1188
+ entries=batch
1189
+ )
1190
+ total_uploaded += len(batch)
1191
+ console.print(f" Uploaded {total_uploaded}/{len(entries)} entries")
1192
+
1193
+ # Success
1194
+ console.print(f"\n[green]✓ Track data uploaded successfully[/green]")
1195
+ console.print(f" Total entries: {total_uploaded}")
1196
+ console.print(f" Topic: {topic}")
1197
+ console.print(f" Experiment: {namespace}/{project}/{experiment_name}")
1198
+
1199
+ return 0
1200
+
1201
+ except Exception as e:
1202
+ console.print(f"[red]Error uploading track data:[/red] {e}")
1203
+ if args.verbose:
1204
+ import traceback
1205
+ console.print(traceback.format_exc())
1206
+ return 1
1207
+
1208
+
1069
1209
  def cmd_upload(args: argparse.Namespace) -> int:
1070
1210
  """
1071
1211
  Execute upload command.
@@ -1076,6 +1216,10 @@ def cmd_upload(args: argparse.Namespace) -> int:
1076
1216
  Returns:
1077
1217
  Exit code (0 for success, 1 for error)
1078
1218
  """
1219
+ # Handle track upload if --tracks is specified
1220
+ if args.tracks:
1221
+ return cmd_upload_track(args)
1222
+
1079
1223
  # Load config
1080
1224
  config = Config()
1081
1225
 
ml_dash/client.py CHANGED
@@ -6,21 +6,61 @@ from typing import Optional, Dict, Any, List
6
6
  import httpx
7
7
 
8
8
 
9
+ def _serialize_value(value: Any) -> Any:
10
+ """
11
+ Convert value to JSON-serializable format.
12
+
13
+ Handles numpy arrays, nested dicts/lists, etc.
14
+
15
+ Args:
16
+ value: Value to serialize
17
+
18
+ Returns:
19
+ JSON-serializable value
20
+ """
21
+ # Check for numpy array
22
+ if hasattr(value, '__array__') or (hasattr(value, 'tolist') and hasattr(value, 'dtype')):
23
+ # It's a numpy array
24
+ try:
25
+ return value.tolist()
26
+ except AttributeError:
27
+ pass
28
+
29
+ # Check for numpy scalar types
30
+ if hasattr(value, 'item'):
31
+ try:
32
+ return value.item()
33
+ except (AttributeError, ValueError):
34
+ pass
35
+
36
+ # Recursively handle dicts
37
+ if isinstance(value, dict):
38
+ return {k: _serialize_value(v) for k, v in value.items()}
39
+
40
+ # Recursively handle lists
41
+ if isinstance(value, (list, tuple)):
42
+ return [_serialize_value(v) for v in value]
43
+
44
+ # Return as-is for other types (int, float, str, bool, None)
45
+ return value
46
+
47
+
9
48
  class RemoteClient:
10
49
  """Client for communicating with ML-Dash server."""
11
50
 
12
- def __init__(self, base_url: str, namespace: str, api_key: Optional[str] = None):
51
+ def __init__(self, base_url: str, namespace: Optional[str] = None, api_key: Optional[str] = None):
13
52
  """
14
53
  Initialize remote client.
15
54
 
16
55
  Args:
17
56
  base_url: Base URL of ML-Dash server (e.g., "http://localhost:3000")
18
- namespace: Namespace slug (e.g., "my-namespace")
57
+ namespace: Namespace slug (e.g., "my-namespace"). If not provided, will be queried from server.
19
58
  api_key: JWT token for authentication (optional - auto-loads from storage if not provided)
20
59
 
21
60
  Note:
22
61
  If no api_key is provided, token will be loaded from storage on first API call.
23
62
  If still not found, AuthenticationError will be raised at that time.
63
+ If no namespace is provided, it will be fetched from the server on first API call.
24
64
  """
25
65
  # Store original base URL for GraphQL (no /api prefix)
26
66
  self.graphql_base_url = base_url.rstrip("/")
@@ -28,9 +68,6 @@ class RemoteClient:
28
68
  # Add /api prefix to base URL for REST API calls
29
69
  self.base_url = base_url.rstrip("/") + "/api"
30
70
 
31
- # Store namespace
32
- self.namespace = namespace
33
-
34
71
  # If no api_key provided, try to load from storage
35
72
  if not api_key:
36
73
  from .auth.token_storage import get_token_storage
@@ -39,10 +76,70 @@ class RemoteClient:
39
76
  api_key = storage.load("ml-dash-token")
40
77
 
41
78
  self.api_key = api_key
79
+
80
+ # Store namespace (can be None, will be fetched on first API call if needed)
81
+ self._namespace = namespace
82
+ self._namespace_fetched = False
83
+
42
84
  self._rest_client = None
43
85
  self._gql_client = None
44
86
  self._id_cache: Dict[str, str] = {} # Cache for slug -> ID mappings
45
87
 
88
+ @property
89
+ def namespace(self) -> str:
90
+ """
91
+ Get namespace, fetching from server if not already set.
92
+
93
+ Returns:
94
+ Namespace slug
95
+
96
+ Raises:
97
+ AuthenticationError: If not authenticated
98
+ ValueError: If namespace cannot be determined
99
+ """
100
+ if self._namespace:
101
+ return self._namespace
102
+
103
+ if not self._namespace_fetched:
104
+ # Fetch namespace from server
105
+ self._namespace = self._fetch_namespace_from_server()
106
+ self._namespace_fetched = True
107
+
108
+ if not self._namespace:
109
+ raise ValueError("Could not determine namespace. Please provide --namespace explicitly.")
110
+
111
+ return self._namespace
112
+
113
+ @namespace.setter
114
+ def namespace(self, value: str):
115
+ """Set namespace."""
116
+ self._namespace = value
117
+ self._namespace_fetched = True
118
+
119
+ def _fetch_namespace_from_server(self) -> Optional[str]:
120
+ """
121
+ Fetch current user's namespace from server.
122
+
123
+ Returns:
124
+ Namespace slug or None if cannot be determined
125
+ """
126
+ try:
127
+ self._ensure_authenticated()
128
+
129
+ # Query server for current user's namespace
130
+ query = """
131
+ query GetMyNamespace {
132
+ me {
133
+ username
134
+ }
135
+ }
136
+ """
137
+ result = self.graphql_query(query)
138
+ username = result.get("me", {}).get("username")
139
+ return username
140
+ except Exception:
141
+ return None
142
+
46
143
  def _ensure_authenticated(self):
47
144
  """Check if authenticated, raise error if not."""
48
145
  if not self.api_key:
@@ -1224,14 +1321,18 @@ class RemoteClient:
1224
1321
  }
1225
1322
  files {
1226
1323
  id
1227
- filename
1228
- path
1229
- contentType
1230
- sizeBytes
1231
- checksum
1324
+ name
1325
+ pPath
1232
1326
  description
1233
1327
  tags
1234
1328
  metadata
1329
+ physicalFile {
1330
+ filename
1331
+ contentType
1332
+ sizeBytes
1333
+ checksum
1334
+ s3Url
1335
+ }
1235
1336
  }
1236
1337
  parameters {
1237
1338
  id
@@ -1248,16 +1349,15 @@ class RemoteClient:
1248
1349
  return result.get("experiments", [])
1249
1350
 
1250
1351
  def get_experiment_graphql(
1251
- self, project_slug: str, experiment_name: str
1352
+ self, project_slug: str, experiment_name: str, namespace_slug: Optional[str] = None
1252
1353
  ) -> Optional[Dict[str, Any]]:
1253
1354
  """
1254
1355
  Get a single experiment via GraphQL.
1255
1356
 
1256
- Namespace is automatically inferred from JWT token on the server.
1257
-
1258
1357
  Args:
1259
1358
  project_slug: Project slug
1260
1359
  experiment_name: Experiment name
1360
+ namespace_slug: Namespace slug (optional - defaults to client's namespace)
1261
1361
 
1262
1362
  Returns:
1263
1363
  Experiment dict with metadata, or None if not found
@@ -1266,8 +1366,8 @@ class RemoteClient:
1266
1366
  httpx.HTTPStatusError: If request fails
1267
1367
  """
1268
1368
  query = """
1269
- query Experiment($projectSlug: String!, $experimentName: String!) {
1270
- experiment(projectSlug: $projectSlug, experimentName: $experimentName) {
1369
+ query Experiment($namespaceSlug: String, $projectSlug: String!, $experimentName: String!) {
1370
+ experiment(namespaceSlug: $namespaceSlug, projectSlug: $projectSlug, experimentName: $experimentName) {
1271
1371
  id
1272
1372
  name
1273
1373
  description
@@ -1291,14 +1391,18 @@ class RemoteClient:
1291
1391
  }
1292
1392
  files {
1293
1393
  id
1294
- filename
1295
- path
1296
- contentType
1297
- sizeBytes
1298
- checksum
1394
+ name
1395
+ pPath
1299
1396
  description
1300
1397
  tags
1301
1398
  metadata
1399
+ physicalFile {
1400
+ filename
1401
+ contentType
1402
+ sizeBytes
1403
+ checksum
1404
+ s3Url
1405
+ }
1302
1406
  }
1303
1407
  parameters {
1304
1408
  id
@@ -1307,7 +1411,11 @@ class RemoteClient:
1307
1411
  }
1308
1412
  }
1309
1413
  """
1414
+ # Use provided namespace or fall back to client's namespace
1415
+ ns = namespace_slug or self.namespace
1416
+
1310
1417
  variables = {
1418
+ "namespaceSlug": ns,
1311
1419
  "projectSlug": project_slug,
1312
1420
  "experimentName": experiment_name
1313
1421
  }
@@ -1366,14 +1474,18 @@ class RemoteClient:
1366
1474
  }
1367
1475
  files {
1368
1476
  id
1369
- filename
1370
- path
1371
- contentType
1372
- sizeBytes
1373
- checksum
1477
+ name
1478
+ pPath
1374
1479
  description
1375
1480
  tags
1376
1481
  metadata
1482
+ physicalFile {
1483
+ filename
1484
+ contentType
1485
+ sizeBytes
1486
+ checksum
1487
+ s3Url
1488
+ }
1377
1489
  }
1378
1490
  }
1379
1491
  }
@@ -1543,6 +1655,197 @@ class RemoteClient:
1543
1655
  response.raise_for_status()
1544
1656
  return response.json()
1545
1657
 
1658
+ # =============================================================================
1659
+ # Track Methods
1660
+ # =============================================================================
1661
+
1662
+ def create_track(
1663
+ self,
1664
+ experiment_id: str,
1665
+ topic: str,
1666
+ description: Optional[str] = None,
1667
+ tags: Optional[List[str]] = None,
1668
+ metadata: Optional[Dict[str, Any]] = None,
1669
+ ) -> Dict[str, Any]:
1670
+ """
1671
+ Create a new track for timestamped multi-modal data.
1672
+
1673
+ Args:
1674
+ experiment_id: Experiment ID (Snowflake ID)
1675
+ topic: Track topic (e.g., "robot/position", "camera/rgb")
1676
+ description: Optional track description
1677
+ tags: Optional tags
1678
+ metadata: Optional metadata (e.g., fps, units)
1679
+
1680
+ Returns:
1681
+ Dict with track ID and metadata
1682
+
1683
+ Raises:
1684
+ httpx.HTTPStatusError: If request fails
1685
+ """
1686
+ payload = {
1687
+ "topic": topic,
1688
+ }
1689
+ if description:
1690
+ payload["description"] = description
1691
+ if tags:
1692
+ payload["tags"] = tags
1693
+ if metadata:
1694
+ payload["metadata"] = metadata
1695
+
1696
+ response = self._client.post(
1697
+ f"/experiments/{experiment_id}/tracks",
1698
+ json=payload,
1699
+ )
1700
+ response.raise_for_status()
1701
+ return response.json()
1702
+
1703
+ def append_to_track(
1704
+ self,
1705
+ experiment_id: str,
1706
+ topic: str,
1707
+ timestamp: float,
1708
+ data: Dict[str, Any],
1709
+ ) -> Dict[str, Any]:
1710
+ """
1711
+ Append a single entry to a track.
1712
+
1713
+ Args:
1714
+ experiment_id: Experiment ID (Snowflake ID)
1715
+ topic: Track topic (e.g., "robot/position")
1716
+ timestamp: Numeric timestamp
1717
+ data: Data fields as dict (will be flattened with dot-notation)
1718
+
1719
+ Returns:
1720
+ Dict with timestamp and flattened data
1721
+
1722
+ Raises:
1723
+ httpx.HTTPStatusError: If request fails
1724
+ """
1725
+ import urllib.parse
1726
+
1727
+ topic_encoded = urllib.parse.quote(topic, safe='')
1728
+
1729
+ response = self._client.post(
1730
+ f"/experiments/{experiment_id}/tracks/{topic_encoded}/append",
1731
+ json={"timestamp": timestamp, "data": data},
1732
+ )
1733
+ response.raise_for_status()
1734
+ return response.json()
1735
+
1736
+ def append_batch_to_track(
1737
+ self,
1738
+ experiment_id: str,
1739
+ topic: str,
1740
+ entries: List[Dict[str, Any]],
1741
+ ) -> Dict[str, Any]:
1742
+ """
1743
+ Append multiple entries to a track in batch.
1744
+
1745
+ Args:
1746
+ experiment_id: Experiment ID (Snowflake ID)
1747
+ topic: Track topic (e.g., "robot/position")
1748
+ entries: List of entries, each with 'timestamp' and other data fields
1749
+
1750
+ Returns:
1751
+ Dict with count of entries added
1752
+
1753
+ Raises:
1754
+ httpx.HTTPStatusError: If request fails
1755
+ """
1756
+ import urllib.parse
1757
+
1758
+ topic_encoded = urllib.parse.quote(topic, safe='')
1759
+
1760
+ # Serialize entries to handle numpy arrays
1761
+ serialized_entries = [_serialize_value(entry) for entry in entries]
1762
+
1763
+ response = self._client.post(
1764
+ f"/experiments/{experiment_id}/tracks/{topic_encoded}/append_batch",
1765
+ json={"entries": serialized_entries},
1766
+ )
1767
+ response.raise_for_status()
1768
+ return response.json()
1769
+
1770
+ def get_track_data(
1771
+ self,
1772
+ experiment_id: str,
1773
+ topic: str,
1774
+ start_timestamp: Optional[float] = None,
1775
+ end_timestamp: Optional[float] = None,
1776
+ columns: Optional[List[str]] = None,
1777
+ format: str = "json",
1778
+ ) -> Any:
1779
+ """
1780
+ Get track data with optional filtering.
1781
+
1782
+ Args:
1783
+ experiment_id: Experiment ID
1784
+ topic: Track topic
1785
+ start_timestamp: Optional start timestamp filter
1786
+ end_timestamp: Optional end timestamp filter
1787
+ columns: Optional list of columns to retrieve
1788
+ format: Export format ('json', 'jsonl', 'parquet', 'mcap')
1789
+
1790
+ Returns:
1791
+ Track data in requested format (dict for json, bytes for jsonl/parquet/mcap)
1792
+
1793
+ Raises:
1794
+ httpx.HTTPStatusError: If request fails
1795
+ """
1796
+ import urllib.parse
1797
+
1798
+ topic_encoded = urllib.parse.quote(topic, safe='')
1799
+ params: Dict[str, str] = {"format": format}
1800
+
1801
+ if start_timestamp is not None:
1802
+ params["start_timestamp"] = str(start_timestamp)
1803
+ if end_timestamp is not None:
1804
+ params["end_timestamp"] = str(end_timestamp)
1805
+ if columns:
1806
+ params["columns"] = ",".join(columns)
1807
+
1808
+ response = self._client.get(
1809
+ f"/experiments/{experiment_id}/tracks/{topic_encoded}/data",
1810
+ params=params,
1811
+ )
1812
+ response.raise_for_status()
1813
+
1814
+ # Return bytes for binary formats, dict for JSON
1815
+ if format in ("jsonl", "parquet", "mcap"):
1816
+ return response.content
1817
+ return response.json()
1818
+
1819
+ def list_tracks(
1820
+ self,
1821
+ experiment_id: str,
1822
+ topic_filter: Optional[str] = None,
1823
+ ) -> List[Dict[str, Any]]:
1824
+ """
1825
+ List all tracks in an experiment.
1826
+
1827
+ Args:
1828
+ experiment_id: Experiment ID
1829
+ topic_filter: Optional topic filter (e.g., "robot/*" for prefix match)
1830
+
1831
+ Returns:
1832
+ List of track metadata dicts
1833
+
1834
+ Raises:
1835
+ httpx.HTTPStatusError: If request fails
1836
+ """
1837
+ params: Dict[str, str] = {}
1838
+ if topic_filter:
1839
+ params["topic"] = topic_filter
1840
+
1841
+ response = self._client.get(
1842
+ f"/experiments/{experiment_id}/tracks",
1843
+ params=params,
1844
+ )
1845
+ response.raise_for_status()
1846
+ result = response.json()
1847
+ return result.get("tracks", [])
1848
+
1546
1849
  def close(self):
1547
1850
  """Close the HTTP clients."""
1548
1851
  self._client.close()