ml-dash 0.6.4__tar.gz → 0.6.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. {ml_dash-0.6.4 → ml_dash-0.6.5}/PKG-INFO +1 -1
  2. {ml_dash-0.6.4 → ml_dash-0.6.5}/pyproject.toml +4 -1
  3. {ml_dash-0.6.4 → ml_dash-0.6.5}/src/ml_dash/client.py +315 -13
  4. {ml_dash-0.6.4 → ml_dash-0.6.5}/LICENSE +0 -0
  5. {ml_dash-0.6.4 → ml_dash-0.6.5}/README.md +0 -0
  6. {ml_dash-0.6.4 → ml_dash-0.6.5}/src/ml_dash/__init__.py +0 -0
  7. {ml_dash-0.6.4 → ml_dash-0.6.5}/src/ml_dash/auth/__init__.py +0 -0
  8. {ml_dash-0.6.4 → ml_dash-0.6.5}/src/ml_dash/auth/constants.py +0 -0
  9. {ml_dash-0.6.4 → ml_dash-0.6.5}/src/ml_dash/auth/device_flow.py +0 -0
  10. {ml_dash-0.6.4 → ml_dash-0.6.5}/src/ml_dash/auth/device_secret.py +0 -0
  11. {ml_dash-0.6.4 → ml_dash-0.6.5}/src/ml_dash/auth/exceptions.py +0 -0
  12. {ml_dash-0.6.4 → ml_dash-0.6.5}/src/ml_dash/auth/token_storage.py +0 -0
  13. {ml_dash-0.6.4 → ml_dash-0.6.5}/src/ml_dash/auto_start.py +0 -0
  14. {ml_dash-0.6.4 → ml_dash-0.6.5}/src/ml_dash/cli.py +0 -0
  15. {ml_dash-0.6.4 → ml_dash-0.6.5}/src/ml_dash/cli_commands/__init__.py +0 -0
  16. {ml_dash-0.6.4 → ml_dash-0.6.5}/src/ml_dash/cli_commands/api.py +0 -0
  17. {ml_dash-0.6.4 → ml_dash-0.6.5}/src/ml_dash/cli_commands/download.py +0 -0
  18. {ml_dash-0.6.4 → ml_dash-0.6.5}/src/ml_dash/cli_commands/list.py +0 -0
  19. {ml_dash-0.6.4 → ml_dash-0.6.5}/src/ml_dash/cli_commands/login.py +0 -0
  20. {ml_dash-0.6.4 → ml_dash-0.6.5}/src/ml_dash/cli_commands/logout.py +0 -0
  21. {ml_dash-0.6.4 → ml_dash-0.6.5}/src/ml_dash/cli_commands/profile.py +0 -0
  22. {ml_dash-0.6.4 → ml_dash-0.6.5}/src/ml_dash/cli_commands/upload.py +0 -0
  23. {ml_dash-0.6.4 → ml_dash-0.6.5}/src/ml_dash/config.py +0 -0
  24. {ml_dash-0.6.4 → ml_dash-0.6.5}/src/ml_dash/experiment.py +0 -0
  25. {ml_dash-0.6.4 → ml_dash-0.6.5}/src/ml_dash/files.py +0 -0
  26. {ml_dash-0.6.4 → ml_dash-0.6.5}/src/ml_dash/log.py +0 -0
  27. {ml_dash-0.6.4 → ml_dash-0.6.5}/src/ml_dash/metric.py +0 -0
  28. {ml_dash-0.6.4 → ml_dash-0.6.5}/src/ml_dash/params.py +0 -0
  29. {ml_dash-0.6.4 → ml_dash-0.6.5}/src/ml_dash/py.typed +0 -0
  30. {ml_dash-0.6.4 → ml_dash-0.6.5}/src/ml_dash/remote_auto_start.py +0 -0
  31. {ml_dash-0.6.4 → ml_dash-0.6.5}/src/ml_dash/run.py +0 -0
  32. {ml_dash-0.6.4 → ml_dash-0.6.5}/src/ml_dash/snowflake.py +0 -0
  33. {ml_dash-0.6.4 → ml_dash-0.6.5}/src/ml_dash/storage.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ml-dash
3
- Version: 0.6.4
3
+ Version: 0.6.5
4
4
  Summary: ML experiment tracking and data storage
5
5
  Keywords: machine-learning,experiment-tracking,mlops,data-storage
6
6
  Author: Ge Yang, Tom Tao
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "ml-dash"
3
- version = "0.6.4"
3
+ version = "0.6.5"
4
4
  description = "ML experiment tracking and data storage"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.9"
@@ -60,6 +60,9 @@ dev = [
60
60
  "mypy>=1.9.0",
61
61
  ]
62
62
 
63
+ [tool.ruff]
64
+ indent-width = 2
65
+
63
66
  [tool.uv]
64
67
 
65
68
  [build-system]
@@ -82,7 +82,7 @@ class RemoteClient:
82
82
  )
83
83
  return self._gql_client
84
84
 
85
- def _get_project_id(self, project_slug: str) -> str:
85
+ def _get_project_id(self, project_slug: str) -> Optional[str]:
86
86
  """
87
87
  Resolve project ID from slug using GraphQL.
88
88
 
@@ -90,10 +90,8 @@ class RemoteClient:
90
90
  project_slug: Project slug
91
91
 
92
92
  Returns:
93
- Project ID (Snowflake ID)
94
-
95
- Raises:
96
- ValueError: If project not found
93
+ Project ID (Snowflake ID) if found, None if not found
94
+ When None is returned, the server will auto-create the project
97
95
  """
98
96
  cache_key = f"project:{self.namespace}:{project_slug}"
99
97
  if cache_key in self._id_cache:
@@ -113,14 +111,19 @@ class RemoteClient:
113
111
  "namespace": self.namespace
114
112
  })
115
113
 
116
- projects = result.get("namespace", {}).get("projects", [])
114
+ namespace_data = result.get("namespace")
115
+ if namespace_data is None:
116
+ raise ValueError(f"Namespace '{self.namespace}' not found. Please check the namespace exists on the server.")
117
+
118
+ projects = namespace_data.get("projects", [])
117
119
  for project in projects:
118
120
  if project["slug"] == project_slug:
119
121
  project_id = project["id"]
120
122
  self._id_cache[cache_key] = project_id
121
123
  return project_id
122
124
 
123
- raise ValueError(f"Project '{project_slug}' not found in namespace '{self.namespace}'")
125
+ # Project not found - return None to let server auto-create it
126
+ return None
124
127
 
125
128
  def _get_experiment_node_id(self, experiment_id: str) -> str:
126
129
  """
@@ -182,21 +185,85 @@ class RemoteClient:
182
185
 
183
186
  Returns:
184
187
  Response dict with experiment, node, and project data
188
+ Note: Project will be auto-created if it doesn't exist
185
189
 
186
190
  Raises:
187
191
  httpx.HTTPStatusError: If request fails
188
- ValueError: If project not found
189
192
  """
190
- # Resolve project ID from slug
193
+ # Resolve project ID from slug (returns None if not found)
191
194
  project_id = self._get_project_id(project)
192
195
 
196
+ # Parse prefix to create folder hierarchy for experiment
197
+ # prefix format: "namespace/project/folder1/folder2/experiment_name"
198
+ # We need to create folders: folder1 -> folder2 and place experiment under folder2
199
+ parent_id = "ROOT"
200
+
201
+ if prefix:
202
+ # Parse prefix to extract folder path
203
+ parts = prefix.strip('/').split('/')
204
+ # parts: [namespace, project, folder1, folder2, ..., experiment_name]
205
+
206
+ if len(parts) >= 3:
207
+ # We have at least namespace/project/something
208
+ # Extract folder parts (everything between project and experiment name)
209
+ # Skip namespace (parts[0]) and project (parts[1])
210
+ # Skip experiment name (parts[-1])
211
+ folder_parts = parts[2:-1] if len(parts) > 3 else []
212
+
213
+ if folder_parts:
214
+ # Ensure we have a project_id for folder creation
215
+ if not project_id:
216
+ # Create the project first since we need its ID for folders
217
+ project_response = self._client.post(
218
+ f"/namespaces/{self.namespace}/nodes",
219
+ json={
220
+ "type": "PROJECT",
221
+ "name": project,
222
+ "slug": project,
223
+ }
224
+ )
225
+ project_response.raise_for_status()
226
+ project_data = project_response.json()
227
+ project_id = project_data.get("project", {}).get("id")
228
+
229
+ if project_id:
230
+ # Create folder hierarchy
231
+ current_parent_id = "ROOT"
232
+ for folder_name in folder_parts:
233
+ if not folder_name:
234
+ continue
235
+ # Create folder (server handles upsert)
236
+ # NOTE: Do NOT pass experimentId for project-level folders
237
+ folder_response = self._client.post(
238
+ f"/namespaces/{self.namespace}/nodes",
239
+ json={
240
+ "type": "FOLDER",
241
+ "projectId": project_id,
242
+ "parentId": current_parent_id,
243
+ "name": folder_name
244
+ # experimentId intentionally omitted - these are project-level folders
245
+ }
246
+ )
247
+ folder_response.raise_for_status()
248
+ folder_data = folder_response.json()
249
+ current_parent_id = folder_data.get("node", {}).get("id")
250
+
251
+ # Update parent_id for experiment
252
+ parent_id = current_parent_id
253
+
193
254
  # Build payload for unified node API
194
255
  payload = {
195
256
  "type": "EXPERIMENT",
196
257
  "name": name,
197
- "projectId": project_id,
258
+ "parentId": parent_id,
198
259
  }
199
260
 
261
+ # Send projectId if available, otherwise projectSlug (server will auto-create)
262
+ if project_id:
263
+ payload["projectId"] = project_id
264
+ else:
265
+ payload["projectSlug"] = project
266
+
200
267
  if description is not None:
201
268
  payload["description"] = description
202
269
  if tags is not None:
@@ -369,7 +436,10 @@ class RemoteClient:
369
436
  Args:
370
437
  experiment_id: Experiment ID (Snowflake ID)
371
438
  file_path: Local file path
372
- prefix: Logical path prefix (DEPRECATED - use parent_id for folder structure)
439
+ prefix: Logical path prefix for folder structure (e.g., "models/checkpoints")
440
+ Will create nested folders automatically. May include namespace/project
441
+ parts which will be stripped automatically (e.g., "ns/proj/folder1/folder2"
442
+ will create folders: folder1 -> folder2)
373
443
  filename: Original filename
374
444
  description: Optional description
375
445
  tags: Optional tags
@@ -378,7 +448,8 @@ class RemoteClient:
378
448
  content_type: MIME type
379
449
  size_bytes: File size in bytes
380
450
  project_id: Project ID (optional - will be resolved from experiment if not provided)
381
- parent_id: Parent node ID (folder) or "ROOT" for root level
451
+ parent_id: Parent node ID (folder) or "ROOT" for root level.
452
+ If prefix is provided, folders will be created under this parent.
382
453
 
383
454
  Returns:
384
455
  Response dict with node and physicalFile data
@@ -402,6 +473,236 @@ class RemoteClient:
402
473
  if not project_id:
403
474
  raise ValueError(f"Could not resolve project ID for experiment {experiment_id}")
404
475
 
476
+ # Resolve experiment node ID (files should be children of the experiment node, not ROOT)
477
+ # Check cache first, otherwise query
478
+ experiment_node_id = self._id_cache.get(f"exp_node:{experiment_id}")
479
+ if not experiment_node_id:
480
+ # Query to get the experiment node ID
481
+ query = """
482
+ query GetExperimentNode($experimentId: ID!) {
483
+ experimentById(id: $experimentId) {
484
+ id
485
+ }
486
+ }
487
+ """
488
+ # Note: experimentById returns the Experiment record, not the Node
489
+ # We need to find the Node with type=EXPERIMENT and experimentId=experiment_id
490
+ # Use the project nodes query instead
491
+ query = """
492
+ query GetExperimentNode($projectId: ID!, $experimentId: ID!) {
493
+ project(id: $projectId) {
494
+ nodes(parentId: null, maxDepth: 10) {
495
+ id
496
+ type
497
+ experimentId
498
+ children {
499
+ id
500
+ type
501
+ experimentId
502
+ children {
503
+ id
504
+ type
505
+ experimentId
506
+ }
507
+ }
508
+ }
509
+ }
510
+ }
511
+ """
512
+ result = self.graphql_query(query, {"projectId": project_id, "experimentId": experiment_id})
513
+
514
+ # Find the experiment node
515
+ def find_experiment_node(nodes, exp_id):
516
+ for node in nodes:
517
+ if node.get("type") == "EXPERIMENT" and node.get("experimentId") == exp_id:
518
+ return node.get("id")
519
+ if node.get("children"):
520
+ found = find_experiment_node(node["children"], exp_id)
521
+ if found:
522
+ return found
523
+ return None
524
+
525
+ project_nodes = result.get("project", {}).get("nodes", [])
526
+ experiment_node_id = find_experiment_node(project_nodes, experiment_id)
527
+
528
+ if experiment_node_id:
529
+ # Cache it for future uploads
530
+ self._id_cache[f"exp_node:{experiment_id}"] = experiment_node_id
531
+ else:
532
+ # Fallback to ROOT if we can't find the experiment node
533
+ # This might happen for old experiments or legacy data
534
+ experiment_node_id = "ROOT"
535
+
536
+ # Get experiment node path to strip from prefix
537
+ # When we use experiment_node_id as parent, we need to strip the experiment's
538
+ # folder path from the prefix to avoid creating duplicate folders
539
+ # We'll cache this in the id_cache to avoid repeated queries
540
+ cache_key = f"exp_folder_path:{experiment_id}"
541
+ experiment_folder_path = self._id_cache.get(cache_key)
542
+
543
+ if experiment_folder_path is None and experiment_node_id != "ROOT":
544
+ # Query experiment to get its project info for the GraphQL query
545
+ exp_query = """
546
+ query GetExpInfo($experimentId: ID!) {
547
+ experimentById(id: $experimentId) {
548
+ project {
549
+ slug
550
+ namespace {
551
+ slug
552
+ }
553
+ }
554
+ }
555
+ }
556
+ """
557
+ exp_result = self.graphql_query(exp_query, {"experimentId": experiment_id})
558
+ project_slug = exp_result.get("experimentById", {}).get("project", {}).get("slug")
559
+ namespace_slug = exp_result.get("experimentById", {}).get("project", {}).get("namespace", {}).get("slug")
560
+
561
+ if project_slug and namespace_slug:
562
+ # Query to get the experiment node's path
563
+ # This includes all ancestor folders up to the experiment
564
+ query = """
565
+ query GetExperimentPath($namespaceSlug: String!, $projectSlug: String!) {
566
+ project(namespaceSlug: $namespaceSlug, projectSlug: $projectSlug) {
567
+ nodes(parentId: null, maxDepth: 10) {
568
+ id
569
+ name
570
+ type
571
+ experimentId
572
+ parentId
573
+ children {
574
+ id
575
+ name
576
+ type
577
+ experimentId
578
+ parentId
579
+ children {
580
+ id
581
+ name
582
+ type
583
+ experimentId
584
+ parentId
585
+ }
586
+ }
587
+ }
588
+ }
589
+ }
590
+ """
591
+ result = self.graphql_query(query, {"namespaceSlug": namespace_slug, "projectSlug": project_slug})
592
+
593
+ # Build path to experiment node
594
+ def find_node_path(nodes, target_id, current_path=None):
595
+ if current_path is None:
596
+ current_path = []
597
+ for node in nodes:
598
+ new_path = current_path + [node.get("name")]
599
+ if node.get("id") == target_id:
600
+ return new_path
601
+ if node.get("children"):
602
+ found = find_node_path(node["children"], target_id, new_path)
603
+ if found:
604
+ return found
605
+ return None
606
+
607
+ project_nodes = result.get("project", {}).get("nodes", [])
608
+ path_parts = find_node_path(project_nodes, experiment_node_id)
609
+ if path_parts:
610
+ # IMPORTANT: Don't include the experiment node's name itself
611
+ # We want the path TO the experiment's parent folder, not the experiment
612
+ # E.g., if path is ["examples", "exp-name"], we want "examples"
613
+ if len(path_parts) > 1:
614
+ experiment_folder_path = "/".join(path_parts[:-1])
615
+ else:
616
+ # Experiment is at root level, no parent folders
617
+ experiment_folder_path = ""
618
+ # Cache it
619
+ self._id_cache[cache_key] = experiment_folder_path
620
+ else:
621
+ # Couldn't find path, set empty string to avoid re-querying
622
+ experiment_folder_path = ""
623
+ self._id_cache[cache_key] = experiment_folder_path
624
+
625
+ # Use experiment node ID as the parent for file uploads
626
+ # Files and folders should be children of the experiment node
627
+ if parent_id == "ROOT" and experiment_node_id != "ROOT":
628
+ parent_id = experiment_node_id
629
+
630
+ # Parse prefix to create folder hierarchy
631
+ # prefix like "models/checkpoints" should create folders: models -> checkpoints
632
+ # NOTE: The prefix may contain namespace/project parts (e.g., "ns/proj/folder1/folder2")
633
+ # We need to strip the namespace and project parts since we're already in an experiment context
634
+ if prefix and prefix != '/' and prefix.strip():
635
+ # Clean and normalize prefix
636
+ prefix = prefix.strip('/')
637
+
638
+ # Try to detect and strip namespace/project from prefix
639
+ # Common patterns: "namespace/project/folders..." or just "folders..."
640
+ # Since we're in experiment context, we already know the namespace and project
641
+ # Check if prefix starts with namespace
642
+ if prefix.startswith(self.namespace + '/'):
643
+ # Strip namespace
644
+ prefix = prefix[len(self.namespace) + 1:]
645
+
646
+ # Now check if it starts with project slug/name
647
+ # We need to query the experiment to get the project info
648
+ query = """
649
+ query GetExperimentProject($experimentId: ID!) {
650
+ experimentById(id: $experimentId) {
651
+ project {
652
+ slug
653
+ name
654
+ }
655
+ }
656
+ }
657
+ """
658
+ exp_result = self.graphql_query(query, {"experimentId": experiment_id})
659
+ project_info = exp_result.get("experimentById", {}).get("project", {})
660
+ project_slug = project_info.get("slug", "")
661
+ project_name = project_info.get("name", "")
662
+
663
+ # Try to strip project slug or name
664
+ if project_slug and prefix.startswith(project_slug + '/'):
665
+ prefix = prefix[len(project_slug) + 1:]
666
+ elif project_name and prefix.startswith(project_name + '/'):
667
+ prefix = prefix[len(project_name) + 1:]
668
+
669
+ # Strip experiment folder path from prefix since we're using experiment node as parent
670
+ # For example: if prefix is "examples/exp1/models" and experiment is at "examples/exp1",
671
+ # strip "examples/exp1/" to get "models"
672
+ if experiment_folder_path and prefix.startswith(experiment_folder_path + '/'):
673
+ prefix = prefix[len(experiment_folder_path) + 1:]
674
+ elif experiment_folder_path and prefix == experiment_folder_path:
675
+ # Prefix is exactly the experiment path, no subfolders
676
+ prefix = ""
677
+
678
+ if prefix:
679
+ folder_parts = prefix.split('/')
680
+ current_parent_id = parent_id
681
+
682
+ # Create or find each folder in the hierarchy
683
+ # Server handles upsert - will return existing folder if it exists
684
+ for folder_name in folder_parts:
685
+ if not folder_name: # Skip empty parts
686
+ continue
687
+
688
+ # Create folder (server will return existing if duplicate)
689
+ folder_response = self._client.post(
690
+ f"/namespaces/{self.namespace}/nodes",
691
+ json={
692
+ "type": "FOLDER",
693
+ "projectId": project_id,
694
+ "experimentId": experiment_id,
695
+ "parentId": current_parent_id,
696
+ "name": folder_name
697
+ }
698
+ )
699
+ folder_response.raise_for_status()
700
+ folder_data = folder_response.json()
701
+ current_parent_id = folder_data.get("node", {}).get("id")
702
+
703
+ # Update parent_id to the final folder in the hierarchy
704
+ parent_id = current_parent_id
705
+
405
706
  # Prepare multipart form data
406
707
  with open(file_path, "rb") as f:
407
708
  file_content = f.read()
@@ -833,7 +1134,8 @@ class RemoteClient:
833
1134
  if "errors" in result:
834
1135
  raise Exception(f"GraphQL errors: {result['errors']}")
835
1136
 
836
- return result.get("data", {})
1137
+ # Handle case where data is explicitly null in response
1138
+ return result.get("data") or {}
837
1139
 
838
1140
  def list_projects_graphql(self) -> List[Dict[str, Any]]:
839
1141
  """
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes