ml-dash 0.6.3__py3-none-any.whl → 0.6.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ml_dash/__init__.py CHANGED
@@ -43,7 +43,7 @@ from .params import ParametersBuilder
43
43
  from .run import RUN
44
44
  from .storage import LocalStorage
45
45
 
46
- __version__ = "0.6.3"
46
+ __version__ = "0.6.4"
47
47
 
48
48
  __all__ = [
49
49
  "Experiment",
@@ -56,6 +56,12 @@ Notes:
56
56
  type=str,
57
57
  help="ML-Dash server URL (default: https://api.dash.ml)",
58
58
  )
59
+ parser.add_argument(
60
+ "--namespace",
61
+ type=str,
62
+ required=True,
63
+ help="Namespace to use for queries (required)",
64
+ )
59
65
 
60
66
 
61
67
  def extract_path(data, path: str):
@@ -131,9 +137,12 @@ def cmd_api(args) -> int:
131
137
  # Get remote URL
132
138
  remote_url = args.dash_url or config.remote_url or "https://api.dash.ml"
133
139
 
140
+ # Get namespace
141
+ namespace = args.namespace
142
+
134
143
  try:
135
144
  # Initialize client
136
- client = RemoteClient(base_url=remote_url)
145
+ client = RemoteClient(base_url=remote_url, namespace=namespace)
137
146
 
138
147
  # Determine query type and build query
139
148
  if args.mutation:
@@ -240,7 +240,9 @@ class ExperimentDownloader:
240
240
  """Get thread-local remote client for safe concurrent access."""
241
241
  if not hasattr(self._thread_local, "client"):
242
242
  self._thread_local.client = RemoteClient(
243
- base_url=self.remote.base_url, api_key=self.remote.api_key
243
+ base_url=self.remote.base_url,
244
+ namespace=self.remote.namespace,
245
+ api_key=self.remote.api_key
244
246
  )
245
247
  return self._thread_local.client
246
248
 
@@ -630,8 +632,23 @@ def cmd_download(args: argparse.Namespace) -> int:
630
632
  console.print("[red]Error:[/red] --dash-url is required (or set in config)")
631
633
  return 1
632
634
 
635
+ # Extract namespace from project argument
636
+ namespace = None
637
+ if args.project:
638
+ # Parse namespace from project filter (format: "owner/project" or "owner/project/exp")
639
+ project_parts = args.project.strip("/").split("/")
640
+ if len(project_parts) >= 2: # Has at least "owner/project"
641
+ namespace = project_parts[0]
642
+
643
+ if not namespace:
644
+ console.print(
645
+ "[red]Error:[/red] --project must be in format 'namespace/project' or 'namespace/project/exp'"
646
+ )
647
+ console.print("Example: ml-dash download --project alice/my-project")
648
+ return 1
649
+
633
650
  # Initialize clients (RemoteClient will auto-load token if api_key is None)
634
- remote_client = RemoteClient(base_url=remote_url, api_key=api_key)
651
+ remote_client = RemoteClient(base_url=remote_url, namespace=namespace, api_key=api_key)
635
652
  local_storage = LocalStorage(root_path=Path(args.path))
636
653
 
637
654
  # Load or create state
@@ -260,9 +260,26 @@ def cmd_list(args: argparse.Namespace) -> int:
260
260
  # Get API key (command line > config > auto-loaded from storage)
261
261
  api_key = args.api_key or config.api_key
262
262
 
263
+ # Extract namespace from project argument
264
+ namespace = None
265
+ if args.project:
266
+ # Parse namespace from project filter (format: "namespace/project")
267
+ project_parts = args.project.strip("/").split("/")
268
+ # For simple patterns without '/', treat as project-only pattern
269
+ if '/' in args.project and len(project_parts) >= 2:
270
+ namespace = project_parts[0]
271
+
272
+ if not namespace:
273
+ console.print(
274
+ "[red]Error:[/red] --project must be in format 'namespace/project'"
275
+ )
276
+ console.print("Example: ml-dash list --project alice/my-project")
277
+ console.print("Or use glob patterns: ml-dash list --project alice/proj-*")
278
+ return 1
279
+
263
280
  # Create remote client
264
281
  try:
265
- remote_client = RemoteClient(base_url=remote_url, api_key=api_key)
282
+ remote_client = RemoteClient(base_url=remote_url, namespace=namespace, api_key=api_key)
266
283
  except Exception as e:
267
284
  console.print(f"[red]Error connecting to remote:[/red] {e}")
268
285
  return 1
@@ -632,7 +632,9 @@ class ExperimentUploader:
632
632
  # Create a new client for this thread
633
633
  # Use graphql_base_url (without /api) since RemoteClient.__init__ will add /api
634
634
  self._thread_local.client = RemoteClient(
635
- base_url=self.remote.graphql_base_url, api_key=self.remote.api_key
635
+ base_url=self.remote.graphql_base_url,
636
+ namespace=self.remote.namespace,
637
+ api_key=self.remote.api_key
636
638
  )
637
639
  return self._thread_local.client
638
640
 
@@ -1231,8 +1233,27 @@ def cmd_upload(args: argparse.Namespace) -> int:
1231
1233
  f"[green]{len(valid_experiments)} experiment(s) ready to upload[/green]"
1232
1234
  )
1233
1235
 
1236
+ # Extract namespace from target or first experiment
1237
+ namespace = None
1238
+ if args.target:
1239
+ # Parse namespace from target prefix (format: "owner/project/...")
1240
+ target_parts = args.target.strip("/").split("/")
1241
+ if len(target_parts) >= 1:
1242
+ namespace = target_parts[0]
1243
+ if not namespace and valid_experiments:
1244
+ # Parse namespace from first experiment's prefix
1245
+ first_prefix = valid_experiments[0].prefix
1246
+ if first_prefix:
1247
+ prefix_parts = first_prefix.strip("/").split("/")
1248
+ if len(prefix_parts) >= 1:
1249
+ namespace = prefix_parts[0]
1250
+
1251
+ if not namespace:
1252
+ console.print("[red]Error:[/red] Could not determine namespace from experiments or target")
1253
+ return 1
1254
+
1234
1255
  # Initialize remote client and local storage
1235
- remote_client = RemoteClient(base_url=remote_url, api_key=api_key)
1256
+ remote_client = RemoteClient(base_url=remote_url, namespace=namespace, api_key=api_key)
1236
1257
  local_storage = LocalStorage(root_path=local_path)
1237
1258
 
1238
1259
  # Upload experiments with progress tracking
ml_dash/client.py CHANGED
@@ -9,12 +9,13 @@ import httpx
9
9
  class RemoteClient:
10
10
  """Client for communicating with ML-Dash server."""
11
11
 
12
- def __init__(self, base_url: str, api_key: Optional[str] = None):
12
+ def __init__(self, base_url: str, namespace: str, api_key: Optional[str] = None):
13
13
  """
14
14
  Initialize remote client.
15
15
 
16
16
  Args:
17
17
  base_url: Base URL of ML-Dash server (e.g., "http://localhost:3000")
18
+ namespace: Namespace slug (e.g., "my-namespace")
18
19
  api_key: JWT token for authentication (optional - auto-loads from storage if not provided)
19
20
 
20
21
  Note:
@@ -27,6 +28,9 @@ class RemoteClient:
27
28
  # Add /api prefix to base URL for REST API calls
28
29
  self.base_url = base_url.rstrip("/") + "/api"
29
30
 
31
+ # Store namespace
32
+ self.namespace = namespace
33
+
30
34
  # If no api_key provided, try to load from storage
31
35
  if not api_key:
32
36
  from .auth.token_storage import get_token_storage
@@ -37,6 +41,7 @@ class RemoteClient:
37
41
  self.api_key = api_key
38
42
  self._rest_client = None
39
43
  self._gql_client = None
44
+ self._id_cache: Dict[str, str] = {} # Cache for slug -> ID mappings
40
45
 
41
46
  def _ensure_authenticated(self):
42
47
  """Check if authenticated, raise error if not."""
@@ -77,6 +82,80 @@ class RemoteClient:
77
82
  )
78
83
  return self._gql_client
79
84
 
85
+ def _get_project_id(self, project_slug: str) -> str:
86
+ """
87
+ Resolve project ID from slug using GraphQL.
88
+
89
+ Args:
90
+ project_slug: Project slug
91
+
92
+ Returns:
93
+ Project ID (Snowflake ID)
94
+
95
+ Raises:
96
+ ValueError: If project not found
97
+ """
98
+ cache_key = f"project:{self.namespace}:{project_slug}"
99
+ if cache_key in self._id_cache:
100
+ return self._id_cache[cache_key]
101
+
102
+ query = """
103
+ query GetProject($namespace: String!) {
104
+ namespace(slug: $namespace) {
105
+ projects {
106
+ id
107
+ slug
108
+ }
109
+ }
110
+ }
111
+ """
112
+ result = self.graphql_query(query, {
113
+ "namespace": self.namespace
114
+ })
115
+
116
+ projects = result.get("namespace", {}).get("projects", [])
117
+ for project in projects:
118
+ if project["slug"] == project_slug:
119
+ project_id = project["id"]
120
+ self._id_cache[cache_key] = project_id
121
+ return project_id
122
+
123
+ raise ValueError(f"Project '{project_slug}' not found in namespace '{self.namespace}'")
124
+
125
+ def _get_experiment_node_id(self, experiment_id: str) -> str:
126
+ """
127
+ Resolve node ID from experiment ID using GraphQL.
128
+
129
+ Args:
130
+ experiment_id: Experiment ID
131
+
132
+ Returns:
133
+ Node ID
134
+
135
+ Raises:
136
+ ValueError: If experiment node not found
137
+ """
138
+ cache_key = f"exp_node:{experiment_id}"
139
+ if cache_key in self._id_cache:
140
+ return self._id_cache[cache_key]
141
+
142
+ query = """
143
+ query GetExperimentNode($experimentId: ID!) {
144
+ experimentNode(experimentId: $experimentId) {
145
+ id
146
+ }
147
+ }
148
+ """
149
+ result = self.graphql_query(query, {"experimentId": experiment_id})
150
+
151
+ node = result.get("experimentNode")
152
+ if not node:
153
+ raise ValueError(f"No node found for experiment ID '{experiment_id}'")
154
+
155
+ node_id = node["id"]
156
+ self._id_cache[cache_key] = node_id
157
+ return node_id
158
+
80
159
  def create_or_update_experiment(
81
160
  self,
82
161
  project: str,
@@ -89,26 +168,33 @@ class RemoteClient:
89
168
  metadata: Optional[Dict[str, Any]] = None,
90
169
  ) -> Dict[str, Any]:
91
170
  """
92
- Create or update an experiment.
171
+ Create or update an experiment using unified node API.
93
172
 
94
173
  Args:
95
- project: Project name
96
- name: Experiment name (last segment of prefix)
174
+ project: Project slug
175
+ name: Experiment name
97
176
  description: Optional description
98
177
  tags: Optional list of tags
99
178
  bindrs: Optional list of bindrs
100
- prefix: Full prefix path sent to backend for folder hierarchy creation
179
+ prefix: Full prefix path (ignored in new API - use folders instead)
101
180
  write_protected: If True, experiment becomes immutable
102
181
  metadata: Optional metadata dict
103
182
 
104
183
  Returns:
105
- Response dict with experiment, project, and namespace data
184
+ Response dict with experiment, node, and project data
106
185
 
107
186
  Raises:
108
187
  httpx.HTTPStatusError: If request fails
188
+ ValueError: If project not found
109
189
  """
190
+ # Resolve project ID from slug
191
+ project_id = self._get_project_id(project)
192
+
193
+ # Build payload for unified node API
110
194
  payload = {
195
+ "type": "EXPERIMENT",
111
196
  "name": name,
197
+ "projectId": project_id,
112
198
  }
113
199
 
114
200
  if description is not None:
@@ -121,15 +207,22 @@ class RemoteClient:
121
207
  payload["writeProtected"] = write_protected
122
208
  if metadata is not None:
123
209
  payload["metadata"] = metadata
124
- if prefix is not None:
125
- payload["prefix"] = prefix
126
210
 
211
+ # Call unified node creation API
127
212
  response = self._client.post(
128
- f"/projects/{project}/experiments",
213
+ f"/namespaces/{self.namespace}/nodes",
129
214
  json=payload,
130
215
  )
131
216
  response.raise_for_status()
132
- return response.json()
217
+ result = response.json()
218
+
219
+ # Cache the experiment node ID mapping
220
+ if "experiment" in result and "node" in result:
221
+ exp_id = result["experiment"]["id"]
222
+ node_id = result["node"]["id"]
223
+ self._id_cache[f"exp_node:{exp_id}"] = node_id
224
+
225
+ return result
133
226
 
134
227
  def update_experiment_status(
135
228
  self,
@@ -137,24 +230,27 @@ class RemoteClient:
137
230
  status: str,
138
231
  ) -> Dict[str, Any]:
139
232
  """
140
- Update experiment status.
233
+ Update experiment status using unified node API.
141
234
 
142
235
  Args:
143
236
  experiment_id: Experiment ID
144
237
  status: Status value - "RUNNING" | "COMPLETED" | "FAILED" | "CANCELLED"
145
238
 
146
239
  Returns:
147
- Response dict with updated experiment data
240
+ Response dict with updated node data
148
241
 
149
242
  Raises:
150
243
  httpx.HTTPStatusError: If request fails
244
+ ValueError: If experiment node not found
151
245
  """
152
- payload = {
153
- "status": status,
154
- }
246
+ # Resolve node ID from experiment ID
247
+ node_id = self._get_experiment_node_id(experiment_id)
248
+
249
+ # Update node with new status
250
+ payload = {"status": status}
155
251
 
156
252
  response = self._client.patch(
157
- f"/experiments/{experiment_id}/status",
253
+ f"/nodes/{node_id}",
158
254
  json=payload,
159
255
  )
160
256
  response.raise_for_status()
@@ -263,15 +359,17 @@ class RemoteClient:
263
359
  metadata: Optional[Dict[str, Any]],
264
360
  checksum: str,
265
361
  content_type: str,
266
- size_bytes: int
362
+ size_bytes: int,
363
+ project_id: Optional[str] = None,
364
+ parent_id: str = "ROOT"
267
365
  ) -> Dict[str, Any]:
268
366
  """
269
- Upload a file to an experiment.
367
+ Upload a file to an experiment using unified node API.
270
368
 
271
369
  Args:
272
370
  experiment_id: Experiment ID (Snowflake ID)
273
371
  file_path: Local file path
274
- prefix: Logical path prefix
372
+ prefix: Logical path prefix (DEPRECATED - use parent_id for folder structure)
275
373
  filename: Original filename
276
374
  description: Optional description
277
375
  tags: Optional tags
@@ -279,23 +377,43 @@ class RemoteClient:
279
377
  checksum: SHA256 checksum
280
378
  content_type: MIME type
281
379
  size_bytes: File size in bytes
380
+ project_id: Project ID (optional - will be resolved from experiment if not provided)
381
+ parent_id: Parent node ID (folder) or "ROOT" for root level
282
382
 
283
383
  Returns:
284
- File metadata dict
384
+ Response dict with node and physicalFile data
285
385
 
286
386
  Raises:
287
387
  httpx.HTTPStatusError: If request fails
288
388
  """
389
+ # If project_id not provided, need to resolve it from experiment
390
+ # For now, assuming we have it or it will be queried separately
391
+ if project_id is None:
392
+ # Query experiment to get project ID
393
+ query = """
394
+ query GetExperimentProject($experimentId: ID!) {
395
+ experimentById(id: $experimentId) {
396
+ projectId
397
+ }
398
+ }
399
+ """
400
+ result = self.graphql_query(query, {"experimentId": experiment_id})
401
+ project_id = result.get("experimentById", {}).get("projectId")
402
+ if not project_id:
403
+ raise ValueError(f"Could not resolve project ID for experiment {experiment_id}")
404
+
289
405
  # Prepare multipart form data
290
- # Read file content first (httpx needs content, not file handle)
291
406
  with open(file_path, "rb") as f:
292
407
  file_content = f.read()
293
408
 
294
409
  files = {"file": (filename, file_content, content_type)}
295
410
  data = {
296
- "prefix": prefix,
411
+ "type": "FILE",
412
+ "projectId": project_id,
413
+ "experimentId": experiment_id,
414
+ "parentId": parent_id,
415
+ "name": filename,
297
416
  "checksum": checksum,
298
- "sizeBytes": str(size_bytes),
299
417
  }
300
418
  if description:
301
419
  data["description"] = description
@@ -305,15 +423,53 @@ class RemoteClient:
305
423
  import json
306
424
  data["metadata"] = json.dumps(metadata)
307
425
 
308
- # httpx will automatically set multipart/form-data content-type
426
+ # Call unified node creation API
309
427
  response = self._client.post(
310
- f"/experiments/{experiment_id}/files",
428
+ f"/namespaces/{self.namespace}/nodes",
311
429
  files=files,
312
430
  data=data
313
431
  )
314
432
 
315
433
  response.raise_for_status()
316
- return response.json()
434
+ result = response.json()
435
+
436
+ # Transform unified node response to expected file metadata format
437
+ # The server returns {node: {...}, physicalFile: {...}}
438
+ # We need to flatten it to match the expected format
439
+ node = result.get("node", {})
440
+ physical_file = result.get("physicalFile", {})
441
+
442
+ # Convert BigInt IDs and sizeBytes from string back to appropriate types
443
+ # Node ID should remain as string for consistency
444
+ node_id = node.get("id")
445
+ if isinstance(node_id, (int, float)):
446
+ # If it was deserialized as a number, convert to string to preserve full precision
447
+ node_id = str(int(node_id))
448
+
449
+ size_bytes = physical_file.get("sizeBytes")
450
+ if isinstance(size_bytes, str):
451
+ size_bytes = int(size_bytes)
452
+
453
+ # Use experimentId from node, not the parameter (which might be a path string)
454
+ experiment_id_from_node = node.get("experimentId")
455
+ if isinstance(experiment_id_from_node, (int, float)):
456
+ experiment_id_from_node = str(int(experiment_id_from_node))
457
+
458
+ return {
459
+ "id": node_id,
460
+ "experimentId": experiment_id_from_node or experiment_id,
461
+ "path": prefix, # Use prefix as path for backward compatibility
462
+ "filename": filename,
463
+ "description": node.get("description"),
464
+ "tags": node.get("tags", []),
465
+ "contentType": physical_file.get("contentType"),
466
+ "sizeBytes": size_bytes,
467
+ "checksum": physical_file.get("checksum"),
468
+ "metadata": node.get("metadata"),
469
+ "uploadedAt": node.get("createdAt"),
470
+ "updatedAt": node.get("updatedAt"),
471
+ "deletedAt": node.get("deletedAt"),
472
+ }
317
473
 
318
474
  def list_files(
319
475
  self,
@@ -322,48 +478,72 @@ class RemoteClient:
322
478
  tags: Optional[List[str]] = None
323
479
  ) -> List[Dict[str, Any]]:
324
480
  """
325
- List files in an experiment.
481
+ List files in an experiment using GraphQL.
326
482
 
327
483
  Args:
328
484
  experiment_id: Experiment ID (Snowflake ID)
329
- prefix: Optional prefix filter
485
+ prefix: Optional prefix filter (DEPRECATED - filtering not supported in new API)
330
486
  tags: Optional tags filter
331
487
 
332
488
  Returns:
333
- List of file metadata dicts
489
+ List of file node dicts
334
490
 
335
491
  Raises:
336
492
  httpx.HTTPStatusError: If request fails
337
493
  """
338
- params = {}
339
- if prefix:
340
- params["prefix"] = prefix
494
+ query = """
495
+ query ListExperimentFiles($experimentId: ID!) {
496
+ experimentById(id: $experimentId) {
497
+ files {
498
+ id
499
+ name
500
+ description
501
+ tags
502
+ metadata
503
+ createdAt
504
+ pPath
505
+ physicalFile {
506
+ id
507
+ filename
508
+ contentType
509
+ sizeBytes
510
+ checksum
511
+ s3Url
512
+ }
513
+ }
514
+ }
515
+ }
516
+ """
517
+ result = self.graphql_query(query, {"experimentId": experiment_id})
518
+ files = result.get("experimentById", {}).get("files", [])
519
+
520
+ # Apply client-side filtering if tags specified
341
521
  if tags:
342
- params["tags"] = ",".join(tags)
522
+ filtered_files = []
523
+ for file in files:
524
+ file_tags = file.get("tags", [])
525
+ if any(tag in file_tags for tag in tags):
526
+ filtered_files.append(file)
527
+ return filtered_files
343
528
 
344
- response = self._client.get(
345
- f"/experiments/{experiment_id}/files",
346
- params=params
347
- )
348
- response.raise_for_status()
349
- result = response.json()
350
- return result.get("files", [])
529
+ return files
351
530
 
352
531
  def get_file(self, experiment_id: str, file_id: str) -> Dict[str, Any]:
353
532
  """
354
- Get file metadata.
533
+ Get file metadata using unified node API.
355
534
 
356
535
  Args:
357
- experiment_id: Experiment ID (Snowflake ID)
358
- file_id: File ID (Snowflake ID)
536
+ experiment_id: Experiment ID (DEPRECATED - not used in new API)
537
+ file_id: File node ID (Snowflake ID)
359
538
 
360
539
  Returns:
361
- File metadata dict
540
+ Node metadata dict
362
541
 
363
542
  Raises:
364
543
  httpx.HTTPStatusError: If request fails
365
544
  """
366
- response = self._client.get(f"/experiments/{experiment_id}/files/{file_id}")
545
+ # file_id is actually the node ID in the new system
546
+ response = self._client.get(f"/nodes/{file_id}")
367
547
  response.raise_for_status()
368
548
  return response.json()
369
549
 
@@ -374,11 +554,11 @@ class RemoteClient:
374
554
  dest_path: Optional[str] = None
375
555
  ) -> str:
376
556
  """
377
- Download a file from a experiment.
557
+ Download a file using unified node API.
378
558
 
379
559
  Args:
380
- experiment_id: Experiment ID (Snowflake ID)
381
- file_id: File ID (Snowflake ID)
560
+ experiment_id: Experiment ID (DEPRECATED - not used in new API)
561
+ file_id: File node ID (Snowflake ID)
382
562
  dest_path: Optional destination path (defaults to original filename)
383
563
 
384
564
  Returns:
@@ -390,40 +570,39 @@ class RemoteClient:
390
570
  """
391
571
  # Get file metadata first to get filename and checksum
392
572
  file_metadata = self.get_file(experiment_id, file_id)
393
- filename = file_metadata["filename"]
394
- expected_checksum = file_metadata["checksum"]
573
+ filename = file_metadata.get("name") or file_metadata.get("physicalFile", {}).get("filename")
574
+ expected_checksum = file_metadata.get("physicalFile", {}).get("checksum")
395
575
 
396
576
  # Determine destination path
397
577
  if dest_path is None:
398
578
  dest_path = filename
399
579
 
400
- # Download file
401
- response = self._client.get(
402
- f"/experiments/{experiment_id}/files/{file_id}/download"
403
- )
580
+ # Download file using node API
581
+ response = self._client.get(f"/nodes/{file_id}/download")
404
582
  response.raise_for_status()
405
583
 
406
584
  # Write to file
407
585
  with open(dest_path, "wb") as f:
408
586
  f.write(response.content)
409
587
 
410
- # Verify checksum
411
- from .files import verify_checksum
412
- if not verify_checksum(dest_path, expected_checksum):
413
- # Delete corrupted file
414
- import os
415
- os.remove(dest_path)
416
- raise ValueError(f"Checksum verification failed for file {file_id}")
588
+ # Verify checksum if available
589
+ if expected_checksum:
590
+ from .files import verify_checksum
591
+ if not verify_checksum(dest_path, expected_checksum):
592
+ # Delete corrupted file
593
+ import os
594
+ os.remove(dest_path)
595
+ raise ValueError(f"Checksum verification failed for file {file_id}")
417
596
 
418
597
  return dest_path
419
598
 
420
599
  def delete_file(self, experiment_id: str, file_id: str) -> Dict[str, Any]:
421
600
  """
422
- Delete a file (soft delete).
601
+ Delete a file using unified node API (soft delete).
423
602
 
424
603
  Args:
425
- experiment_id: Experiment ID (Snowflake ID)
426
- file_id: File ID (Snowflake ID)
604
+ experiment_id: Experiment ID (DEPRECATED - not used in new API)
605
+ file_id: File node ID (Snowflake ID)
427
606
 
428
607
  Returns:
429
608
  Dict with id and deletedAt
@@ -431,7 +610,7 @@ class RemoteClient:
431
610
  Raises:
432
611
  httpx.HTTPStatusError: If request fails
433
612
  """
434
- response = self._client.delete(f"/experiments/{experiment_id}/files/{file_id}")
613
+ response = self._client.delete(f"/nodes/{file_id}")
435
614
  response.raise_for_status()
436
615
  return response.json()
437
616
 
@@ -444,17 +623,17 @@ class RemoteClient:
444
623
  metadata: Optional[Dict[str, Any]] = None
445
624
  ) -> Dict[str, Any]:
446
625
  """
447
- Update file metadata.
626
+ Update file metadata using unified node API.
448
627
 
449
628
  Args:
450
- experiment_id: Experiment ID (Snowflake ID)
451
- file_id: File ID (Snowflake ID)
629
+ experiment_id: Experiment ID (DEPRECATED - not used in new API)
630
+ file_id: File node ID (Snowflake ID)
452
631
  description: Optional description
453
632
  tags: Optional tags
454
633
  metadata: Optional metadata
455
634
 
456
635
  Returns:
457
- Updated file metadata dict
636
+ Updated node metadata dict
458
637
 
459
638
  Raises:
460
639
  httpx.HTTPStatusError: If request fails
@@ -468,7 +647,7 @@ class RemoteClient:
468
647
  payload["metadata"] = metadata
469
648
 
470
649
  response = self._client.patch(
471
- f"/experiments/{experiment_id}/files/{file_id}",
650
+ f"/nodes/{file_id}",
472
651
  json=payload
473
652
  )
474
653
  response.raise_for_status()
@@ -905,11 +1084,11 @@ class RemoteClient:
905
1084
  self, experiment_id: str, file_id: str, dest_path: str
906
1085
  ) -> str:
907
1086
  """
908
- Download a file with streaming for large files.
1087
+ Download a file with streaming for large files using unified node API.
909
1088
 
910
1089
  Args:
911
- experiment_id: Experiment ID (Snowflake ID)
912
- file_id: File ID (Snowflake ID)
1090
+ experiment_id: Experiment ID (DEPRECATED - not used in new API)
1091
+ file_id: File node ID (Snowflake ID)
913
1092
  dest_path: Destination path to save file
914
1093
 
915
1094
  Returns:
@@ -921,22 +1100,23 @@ class RemoteClient:
921
1100
  """
922
1101
  # Get metadata first for checksum
923
1102
  file_metadata = self.get_file(experiment_id, file_id)
924
- expected_checksum = file_metadata["checksum"]
1103
+ expected_checksum = file_metadata.get("physicalFile", {}).get("checksum")
925
1104
 
926
- # Stream download
927
- with self._client.stream("GET", f"/experiments/{experiment_id}/files/{file_id}/download") as response:
1105
+ # Stream download using node API
1106
+ with self._client.stream("GET", f"/nodes/{file_id}/download") as response:
928
1107
  response.raise_for_status()
929
1108
 
930
1109
  with open(dest_path, "wb") as f:
931
1110
  for chunk in response.iter_bytes(chunk_size=8192):
932
1111
  f.write(chunk)
933
1112
 
934
- # Verify checksum
935
- from .files import verify_checksum
936
- if not verify_checksum(dest_path, expected_checksum):
937
- import os
938
- os.remove(dest_path)
939
- raise ValueError(f"Checksum verification failed for file {file_id}")
1113
+ # Verify checksum if available
1114
+ if expected_checksum:
1115
+ from .files import verify_checksum
1116
+ if not verify_checksum(dest_path, expected_checksum):
1117
+ import os
1118
+ os.remove(dest_path)
1119
+ raise ValueError(f"Checksum verification failed for file {file_id}")
940
1120
 
941
1121
  return dest_path
942
1122
 
ml_dash/experiment.py CHANGED
@@ -350,7 +350,7 @@ class Experiment:
350
350
  # RemoteClient will auto-load token from ~/.dash/token.enc
351
351
  # Use RUN.api_url if dash_url=True (boolean), otherwise use the provided URL
352
352
  api_url = RUN.api_url if dash_url is True else dash_url
353
- self._client = RemoteClient(base_url=api_url)
353
+ self._client = RemoteClient(base_url=api_url, namespace=self.owner)
354
354
 
355
355
  if self.mode in (OperationMode.LOCAL, OperationMode.HYBRID):
356
356
  self._storage = LocalStorage(root_path=Path(dash_root))
@@ -703,22 +703,40 @@ class Experiment:
703
703
  # Write immediately (no buffering)
704
704
  if self._client:
705
705
  # Remote mode: send to API (wrapped in array for batch API)
706
- self._client.create_log_entries(
707
- experiment_id=self._experiment_id,
708
- logs=[log_entry], # Single log in array
709
- )
706
+ try:
707
+ self._client.create_log_entries(
708
+ experiment_id=self._experiment_id,
709
+ logs=[log_entry], # Single log in array
710
+ )
711
+ except Exception as e:
712
+ # Log warning but don't crash training
713
+ import warnings
714
+ warnings.warn(
715
+ f"Failed to write log to remote server: {e}. Training will continue.",
716
+ RuntimeWarning,
717
+ stacklevel=4
718
+ )
719
+ # Fall through to local storage if available
710
720
 
711
721
  if self._storage:
712
722
  # Local mode: write to file immediately
713
- self._storage.write_log(
714
- owner=self.owner,
715
- project=self.project,
716
- prefix=self._folder_path,
717
- message=log_entry["message"],
718
- level=log_entry["level"],
719
- metadata=log_entry.get("metadata"),
720
- timestamp=log_entry["timestamp"],
721
- )
723
+ try:
724
+ self._storage.write_log(
725
+ owner=self.owner,
726
+ project=self.project,
727
+ prefix=self._folder_path,
728
+ message=log_entry["message"],
729
+ level=log_entry["level"],
730
+ metadata=log_entry.get("metadata"),
731
+ timestamp=log_entry["timestamp"],
732
+ )
733
+ except Exception as e:
734
+ import warnings
735
+ warnings.warn(
736
+ f"Failed to write log to local storage: {e}",
737
+ RuntimeWarning,
738
+ stacklevel=4
739
+ )
722
740
 
723
741
  def _print_log(
724
742
  self, message: str, level: str, metadata: Optional[Dict[str, Any]]
@@ -1139,7 +1157,7 @@ class Experiment:
1139
1157
  description: Optional[str],
1140
1158
  tags: Optional[List[str]],
1141
1159
  metadata: Optional[Dict[str, Any]],
1142
- ) -> Dict[str, Any]:
1160
+ ) -> Optional[Dict[str, Any]]:
1143
1161
  """
1144
1162
  Internal method to append a single data point to a metric.
1145
1163
 
@@ -1151,33 +1169,54 @@ class Experiment:
1151
1169
  metadata: Optional metadata
1152
1170
 
1153
1171
  Returns:
1154
- Dict with metricId, index, bufferedDataPoints, chunkSize
1172
+ Dict with metricId, index, bufferedDataPoints, chunkSize or None if all backends fail
1155
1173
  """
1156
1174
  result = None
1157
1175
 
1158
1176
  if self._client:
1159
1177
  # Remote mode: append via API
1160
- result = self._client.append_to_metric(
1161
- experiment_id=self._experiment_id,
1162
- metric_name=name,
1163
- data=data,
1164
- description=description,
1165
- tags=tags,
1166
- metadata=metadata,
1167
- )
1178
+ try:
1179
+ result = self._client.append_to_metric(
1180
+ experiment_id=self._experiment_id,
1181
+ metric_name=name,
1182
+ data=data,
1183
+ description=description,
1184
+ tags=tags,
1185
+ metadata=metadata,
1186
+ )
1187
+ except Exception as e:
1188
+ # Log warning but don't crash training
1189
+ import warnings
1190
+ metric_display = f"'{name}'" if name else "unnamed metric"
1191
+ warnings.warn(
1192
+ f"Failed to log {metric_display} to remote server: {e}. "
1193
+ f"Training will continue.",
1194
+ RuntimeWarning,
1195
+ stacklevel=3
1196
+ )
1197
+ # Fall through to local storage if available
1168
1198
 
1169
1199
  if self._storage:
1170
1200
  # Local mode: append to local storage
1171
- result = self._storage.append_to_metric(
1172
- owner=self.owner,
1173
- project=self.project,
1174
- prefix=self._folder_path,
1175
- metric_name=name,
1176
- data=data,
1177
- description=description,
1178
- tags=tags,
1179
- metadata=metadata,
1180
- )
1201
+ try:
1202
+ result = self._storage.append_to_metric(
1203
+ owner=self.owner,
1204
+ project=self.project,
1205
+ prefix=self._folder_path,
1206
+ metric_name=name,
1207
+ data=data,
1208
+ description=description,
1209
+ tags=tags,
1210
+ metadata=metadata,
1211
+ )
1212
+ except Exception as e:
1213
+ import warnings
1214
+ metric_display = f"'{name}'" if name else "unnamed metric"
1215
+ warnings.warn(
1216
+ f"Failed to log {metric_display} to local storage: {e}",
1217
+ RuntimeWarning,
1218
+ stacklevel=3
1219
+ )
1181
1220
 
1182
1221
  return result
1183
1222
 
@@ -1188,7 +1227,7 @@ class Experiment:
1188
1227
  description: Optional[str],
1189
1228
  tags: Optional[List[str]],
1190
1229
  metadata: Optional[Dict[str, Any]],
1191
- ) -> Dict[str, Any]:
1230
+ ) -> Optional[Dict[str, Any]]:
1192
1231
  """
1193
1232
  Internal method to append multiple data points to a metric.
1194
1233
 
@@ -1200,33 +1239,54 @@ class Experiment:
1200
1239
  metadata: Optional metadata
1201
1240
 
1202
1241
  Returns:
1203
- Dict with metricId, startIndex, endIndex, count
1242
+ Dict with metricId, startIndex, endIndex, count or None if all backends fail
1204
1243
  """
1205
1244
  result = None
1206
1245
 
1207
1246
  if self._client:
1208
1247
  # Remote mode: append batch via API
1209
- result = self._client.append_batch_to_metric(
1210
- experiment_id=self._experiment_id,
1211
- metric_name=name,
1212
- data_points=data_points,
1213
- description=description,
1214
- tags=tags,
1215
- metadata=metadata,
1216
- )
1248
+ try:
1249
+ result = self._client.append_batch_to_metric(
1250
+ experiment_id=self._experiment_id,
1251
+ metric_name=name,
1252
+ data_points=data_points,
1253
+ description=description,
1254
+ tags=tags,
1255
+ metadata=metadata,
1256
+ )
1257
+ except Exception as e:
1258
+ # Log warning but don't crash training
1259
+ import warnings
1260
+ metric_display = f"'{name}'" if name else "unnamed metric"
1261
+ warnings.warn(
1262
+ f"Failed to log batch to {metric_display} on remote server: {e}. "
1263
+ f"Training will continue.",
1264
+ RuntimeWarning,
1265
+ stacklevel=3
1266
+ )
1267
+ # Fall through to local storage if available
1217
1268
 
1218
1269
  if self._storage:
1219
1270
  # Local mode: append batch to local storage
1220
- result = self._storage.append_batch_to_metric(
1221
- owner=self.owner,
1222
- project=self.project,
1223
- prefix=self._folder_path,
1224
- metric_name=name,
1225
- data_points=data_points,
1226
- description=description,
1227
- tags=tags,
1228
- metadata=metadata,
1229
- )
1271
+ try:
1272
+ result = self._storage.append_batch_to_metric(
1273
+ owner=self.owner,
1274
+ project=self.project,
1275
+ prefix=self._folder_path,
1276
+ metric_name=name,
1277
+ data_points=data_points,
1278
+ description=description,
1279
+ tags=tags,
1280
+ metadata=metadata,
1281
+ )
1282
+ except Exception as e:
1283
+ import warnings
1284
+ metric_display = f"'{name}'" if name else "unnamed metric"
1285
+ warnings.warn(
1286
+ f"Failed to log batch to {metric_display} in local storage: {e}",
1287
+ RuntimeWarning,
1288
+ stacklevel=3
1289
+ )
1230
1290
 
1231
1291
  return result
1232
1292
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ml-dash
3
- Version: 0.6.3
3
+ Version: 0.6.4
4
4
  Summary: ML experiment tracking and data storage
5
5
  Keywords: machine-learning,experiment-tracking,mlops,data-storage
6
6
  Author: Ge Yang, Tom Tao
@@ -1,4 +1,4 @@
1
- ml_dash/__init__.py,sha256=D5p0zXPS1M1dfD3_jT_NSYI_XwbB_7Q9ZOKN80BUY78,1583
1
+ ml_dash/__init__.py,sha256=XJym_-vgqFVwyAD-VsPZF9WWepTZ4w-Lwui5ns1gmJI,1583
2
2
  ml_dash/auth/__init__.py,sha256=3lwM-Y8UBHPU1gFW2JNpmXlPVTnkGudWLKNFFKulQfo,1200
3
3
  ml_dash/auth/constants.py,sha256=ku4QzQUMNjvyJwjy7AUdywMAZd59jXSxNHZxDiagUWU,280
4
4
  ml_dash/auth/device_flow.py,sha256=DQOdPNlZCuU1umZOA_A6WXdRM3zWphnyo9IntToBl_A,7921
@@ -8,16 +8,16 @@ ml_dash/auth/token_storage.py,sha256=L18W8J7D1LlCDlY3Q32l0RXeNh0o7YVDQeeGYm64Dgw
8
8
  ml_dash/auto_start.py,sha256=62_eZG1qBNAwu6AXduTSo4niCVZ27X52ZK0WEr3yS1o,1812
9
9
  ml_dash/cli.py,sha256=BoaBulcqnM88XuV5BQEx_-AQAXJAYSJqpvnHggEII_I,2559
10
10
  ml_dash/cli_commands/__init__.py,sha256=bjAmV7MsW-bhtW_4SnLJ0Cfkt9h82vMDC8ebW1Ke8KE,38
11
- ml_dash/cli_commands/api.py,sha256=tgHB3pvSYv36_RbxsAtiEfjtivnIn7NjdHq0AL2QQGo,4335
12
- ml_dash/cli_commands/download.py,sha256=ZnRhaDLIM28Dri4-YHLU1fBwC9AAvNoiuut3pkdBhJU,27422
13
- ml_dash/cli_commands/list.py,sha256=9dK0UbNTvysGM5c8Mkb5XfFNkhMIhtjIP1v9BFo-5ew,15400
11
+ ml_dash/cli_commands/api.py,sha256=NekZEJGWNpIfB6YrsrOw7kw7rZKjVudwgJWPZIy6ANQ,4535
12
+ ml_dash/cli_commands/download.py,sha256=LeZXjQSEPIxZALuo90fj8RHjFWIbtGPE0F625sD3cU8,28054
13
+ ml_dash/cli_commands/list.py,sha256=oc_yJXFhsvGgr3JedG2j7747yX69Qc546geIi4DQ54k,16129
14
14
  ml_dash/cli_commands/login.py,sha256=zX-urtUrfzg2qOGtKNYQgj6UloN9kzj4zEO6h_xwuNs,6782
15
15
  ml_dash/cli_commands/logout.py,sha256=lTUUNyRXqvo61qNkCd4KBrPUujDAHnNqsHkU6bHie0U,1332
16
16
  ml_dash/cli_commands/profile.py,sha256=BaSM6BAN3YM4tw95iKV_nypKZxwsB3PoAAejQcYip5E,2351
17
- ml_dash/cli_commands/upload.py,sha256=Ch1pWC4rU3M9P52Ne_gAlkE7yz4WZKgZlRBG3hpy9_4,44059
18
- ml_dash/client.py,sha256=TEk-Vt323wBpDPPwX-fFFS7IVF7hS3aBDxn9lewbpls,31455
17
+ ml_dash/cli_commands/upload.py,sha256=_607CcGjvjnwTgGzyxHaDG0qDAlSLlpZDoq6Sy-3paQ,44828
18
+ ml_dash/client.py,sha256=kuLOJcBifgyBKQwQpI3jTvPlaUinJu_fCXROfGE3zGk,38328
19
19
  ml_dash/config.py,sha256=oz2xvoBh2X_xUXWr92cPD5nFxXMT5LxVNypv5B5O0fA,3116
20
- ml_dash/experiment.py,sha256=DsEl4q7EksfBApOjd1q4ncX6COSC7Hv2bCeFPbeELC8,39218
20
+ ml_dash/experiment.py,sha256=1uDCKNDlgGkKoogao3sEFz1sUhmiRvX3ZPGoQ7H3ozE,41361
21
21
  ml_dash/files.py,sha256=bihUHKpdknytLGuGgkcvhh585nziZrvYjiHl6rHnoD0,49227
22
22
  ml_dash/log.py,sha256=E-DLg0vejVLLEyShJ_r0LneDMI0XU7XTH5iKWYJe9jI,5298
23
23
  ml_dash/metric.py,sha256=ghD1jnuv6dbjV1Jlo7q0mx9UEzpdto2Y1-oDWrSfg04,25809
@@ -27,7 +27,7 @@ ml_dash/remote_auto_start.py,sha256=5fvQDHv1CWEKFb6WAa5_uyEInwV_SvotXjOO_6i6ZKE,
27
27
  ml_dash/run.py,sha256=C0quTLZXKDAlwstzEiJ75CWCX1pwYrmtMZH3z-ia6Pw,6310
28
28
  ml_dash/snowflake.py,sha256=14rEpRU5YltsmmmZW0EMUy_hdv5S5ME9gWVtmdmwfiU,4917
29
29
  ml_dash/storage.py,sha256=9mG42pvvWkkracbjCr9Xdp890Nm4XSxL7_JeFbBe28g,33020
30
- ml_dash-0.6.3.dist-info/WHEEL,sha256=z-mOpxbJHqy3cq6SvUThBZdaLGFZzdZPtgWLcP2NKjQ,79
31
- ml_dash-0.6.3.dist-info/entry_points.txt,sha256=dYs2EHX1uRNO7AQGNnVaJJpgiy0Z9q7tiy4fHSyaf3Q,46
32
- ml_dash-0.6.3.dist-info/METADATA,sha256=CUfPUXV3i0CUsxrq80yvbXUe0s8xJUPfLzD6jLozEQ8,7203
33
- ml_dash-0.6.3.dist-info/RECORD,,
30
+ ml_dash-0.6.4.dist-info/WHEEL,sha256=z-mOpxbJHqy3cq6SvUThBZdaLGFZzdZPtgWLcP2NKjQ,79
31
+ ml_dash-0.6.4.dist-info/entry_points.txt,sha256=dYs2EHX1uRNO7AQGNnVaJJpgiy0Z9q7tiy4fHSyaf3Q,46
32
+ ml_dash-0.6.4.dist-info/METADATA,sha256=KB2IyFCHFl4pMG9CR7k4UWQb6EzSRAsN3ZZ-OPTg2hA,7203
33
+ ml_dash-0.6.4.dist-info/RECORD,,