ml-dash 0.6.3__py3-none-any.whl → 0.6.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ml_dash/__init__.py +1 -1
- ml_dash/cli_commands/api.py +10 -1
- ml_dash/cli_commands/download.py +19 -2
- ml_dash/cli_commands/list.py +18 -1
- ml_dash/cli_commands/upload.py +23 -2
- ml_dash/client.py +565 -83
- ml_dash/experiment.py +114 -54
- {ml_dash-0.6.3.dist-info → ml_dash-0.6.5.dist-info}/METADATA +1 -1
- {ml_dash-0.6.3.dist-info → ml_dash-0.6.5.dist-info}/RECORD +11 -11
- {ml_dash-0.6.3.dist-info → ml_dash-0.6.5.dist-info}/WHEEL +0 -0
- {ml_dash-0.6.3.dist-info → ml_dash-0.6.5.dist-info}/entry_points.txt +0 -0
ml_dash/__init__.py
CHANGED
ml_dash/cli_commands/api.py
CHANGED
|
@@ -56,6 +56,12 @@ Notes:
|
|
|
56
56
|
type=str,
|
|
57
57
|
help="ML-Dash server URL (default: https://api.dash.ml)",
|
|
58
58
|
)
|
|
59
|
+
parser.add_argument(
|
|
60
|
+
"--namespace",
|
|
61
|
+
type=str,
|
|
62
|
+
required=True,
|
|
63
|
+
help="Namespace to use for queries (required)",
|
|
64
|
+
)
|
|
59
65
|
|
|
60
66
|
|
|
61
67
|
def extract_path(data, path: str):
|
|
@@ -131,9 +137,12 @@ def cmd_api(args) -> int:
|
|
|
131
137
|
# Get remote URL
|
|
132
138
|
remote_url = args.dash_url or config.remote_url or "https://api.dash.ml"
|
|
133
139
|
|
|
140
|
+
# Get namespace
|
|
141
|
+
namespace = args.namespace
|
|
142
|
+
|
|
134
143
|
try:
|
|
135
144
|
# Initialize client
|
|
136
|
-
client = RemoteClient(base_url=remote_url)
|
|
145
|
+
client = RemoteClient(base_url=remote_url, namespace=namespace)
|
|
137
146
|
|
|
138
147
|
# Determine query type and build query
|
|
139
148
|
if args.mutation:
|
ml_dash/cli_commands/download.py
CHANGED
|
@@ -240,7 +240,9 @@ class ExperimentDownloader:
|
|
|
240
240
|
"""Get thread-local remote client for safe concurrent access."""
|
|
241
241
|
if not hasattr(self._thread_local, "client"):
|
|
242
242
|
self._thread_local.client = RemoteClient(
|
|
243
|
-
base_url=self.remote.base_url,
|
|
243
|
+
base_url=self.remote.base_url,
|
|
244
|
+
namespace=self.remote.namespace,
|
|
245
|
+
api_key=self.remote.api_key
|
|
244
246
|
)
|
|
245
247
|
return self._thread_local.client
|
|
246
248
|
|
|
@@ -630,8 +632,23 @@ def cmd_download(args: argparse.Namespace) -> int:
|
|
|
630
632
|
console.print("[red]Error:[/red] --dash-url is required (or set in config)")
|
|
631
633
|
return 1
|
|
632
634
|
|
|
635
|
+
# Extract namespace from project argument
|
|
636
|
+
namespace = None
|
|
637
|
+
if args.project:
|
|
638
|
+
# Parse namespace from project filter (format: "owner/project" or "owner/project/exp")
|
|
639
|
+
project_parts = args.project.strip("/").split("/")
|
|
640
|
+
if len(project_parts) >= 2: # Has at least "owner/project"
|
|
641
|
+
namespace = project_parts[0]
|
|
642
|
+
|
|
643
|
+
if not namespace:
|
|
644
|
+
console.print(
|
|
645
|
+
"[red]Error:[/red] --project must be in format 'namespace/project' or 'namespace/project/exp'"
|
|
646
|
+
)
|
|
647
|
+
console.print("Example: ml-dash download --project alice/my-project")
|
|
648
|
+
return 1
|
|
649
|
+
|
|
633
650
|
# Initialize clients (RemoteClient will auto-load token if api_key is None)
|
|
634
|
-
remote_client = RemoteClient(base_url=remote_url, api_key=api_key)
|
|
651
|
+
remote_client = RemoteClient(base_url=remote_url, namespace=namespace, api_key=api_key)
|
|
635
652
|
local_storage = LocalStorage(root_path=Path(args.path))
|
|
636
653
|
|
|
637
654
|
# Load or create state
|
ml_dash/cli_commands/list.py
CHANGED
|
@@ -260,9 +260,26 @@ def cmd_list(args: argparse.Namespace) -> int:
|
|
|
260
260
|
# Get API key (command line > config > auto-loaded from storage)
|
|
261
261
|
api_key = args.api_key or config.api_key
|
|
262
262
|
|
|
263
|
+
# Extract namespace from project argument
|
|
264
|
+
namespace = None
|
|
265
|
+
if args.project:
|
|
266
|
+
# Parse namespace from project filter (format: "namespace/project")
|
|
267
|
+
project_parts = args.project.strip("/").split("/")
|
|
268
|
+
# For simple patterns without '/', treat as project-only pattern
|
|
269
|
+
if '/' in args.project and len(project_parts) >= 2:
|
|
270
|
+
namespace = project_parts[0]
|
|
271
|
+
|
|
272
|
+
if not namespace:
|
|
273
|
+
console.print(
|
|
274
|
+
"[red]Error:[/red] --project must be in format 'namespace/project'"
|
|
275
|
+
)
|
|
276
|
+
console.print("Example: ml-dash list --project alice/my-project")
|
|
277
|
+
console.print("Or use glob patterns: ml-dash list --project alice/proj-*")
|
|
278
|
+
return 1
|
|
279
|
+
|
|
263
280
|
# Create remote client
|
|
264
281
|
try:
|
|
265
|
-
remote_client = RemoteClient(base_url=remote_url, api_key=api_key)
|
|
282
|
+
remote_client = RemoteClient(base_url=remote_url, namespace=namespace, api_key=api_key)
|
|
266
283
|
except Exception as e:
|
|
267
284
|
console.print(f"[red]Error connecting to remote:[/red] {e}")
|
|
268
285
|
return 1
|
ml_dash/cli_commands/upload.py
CHANGED
|
@@ -632,7 +632,9 @@ class ExperimentUploader:
|
|
|
632
632
|
# Create a new client for this thread
|
|
633
633
|
# Use graphql_base_url (without /api) since RemoteClient.__init__ will add /api
|
|
634
634
|
self._thread_local.client = RemoteClient(
|
|
635
|
-
base_url=self.remote.graphql_base_url,
|
|
635
|
+
base_url=self.remote.graphql_base_url,
|
|
636
|
+
namespace=self.remote.namespace,
|
|
637
|
+
api_key=self.remote.api_key
|
|
636
638
|
)
|
|
637
639
|
return self._thread_local.client
|
|
638
640
|
|
|
@@ -1231,8 +1233,27 @@ def cmd_upload(args: argparse.Namespace) -> int:
|
|
|
1231
1233
|
f"[green]{len(valid_experiments)} experiment(s) ready to upload[/green]"
|
|
1232
1234
|
)
|
|
1233
1235
|
|
|
1236
|
+
# Extract namespace from target or first experiment
|
|
1237
|
+
namespace = None
|
|
1238
|
+
if args.target:
|
|
1239
|
+
# Parse namespace from target prefix (format: "owner/project/...")
|
|
1240
|
+
target_parts = args.target.strip("/").split("/")
|
|
1241
|
+
if len(target_parts) >= 1:
|
|
1242
|
+
namespace = target_parts[0]
|
|
1243
|
+
if not namespace and valid_experiments:
|
|
1244
|
+
# Parse namespace from first experiment's prefix
|
|
1245
|
+
first_prefix = valid_experiments[0].prefix
|
|
1246
|
+
if first_prefix:
|
|
1247
|
+
prefix_parts = first_prefix.strip("/").split("/")
|
|
1248
|
+
if len(prefix_parts) >= 1:
|
|
1249
|
+
namespace = prefix_parts[0]
|
|
1250
|
+
|
|
1251
|
+
if not namespace:
|
|
1252
|
+
console.print("[red]Error:[/red] Could not determine namespace from experiments or target")
|
|
1253
|
+
return 1
|
|
1254
|
+
|
|
1234
1255
|
# Initialize remote client and local storage
|
|
1235
|
-
remote_client = RemoteClient(base_url=remote_url, api_key=api_key)
|
|
1256
|
+
remote_client = RemoteClient(base_url=remote_url, namespace=namespace, api_key=api_key)
|
|
1236
1257
|
local_storage = LocalStorage(root_path=local_path)
|
|
1237
1258
|
|
|
1238
1259
|
# Upload experiments with progress tracking
|
ml_dash/client.py
CHANGED
|
@@ -9,12 +9,13 @@ import httpx
|
|
|
9
9
|
class RemoteClient:
|
|
10
10
|
"""Client for communicating with ML-Dash server."""
|
|
11
11
|
|
|
12
|
-
def __init__(self, base_url: str, api_key: Optional[str] = None):
|
|
12
|
+
def __init__(self, base_url: str, namespace: str, api_key: Optional[str] = None):
|
|
13
13
|
"""
|
|
14
14
|
Initialize remote client.
|
|
15
15
|
|
|
16
16
|
Args:
|
|
17
17
|
base_url: Base URL of ML-Dash server (e.g., "http://localhost:3000")
|
|
18
|
+
namespace: Namespace slug (e.g., "my-namespace")
|
|
18
19
|
api_key: JWT token for authentication (optional - auto-loads from storage if not provided)
|
|
19
20
|
|
|
20
21
|
Note:
|
|
@@ -27,6 +28,9 @@ class RemoteClient:
|
|
|
27
28
|
# Add /api prefix to base URL for REST API calls
|
|
28
29
|
self.base_url = base_url.rstrip("/") + "/api"
|
|
29
30
|
|
|
31
|
+
# Store namespace
|
|
32
|
+
self.namespace = namespace
|
|
33
|
+
|
|
30
34
|
# If no api_key provided, try to load from storage
|
|
31
35
|
if not api_key:
|
|
32
36
|
from .auth.token_storage import get_token_storage
|
|
@@ -37,6 +41,7 @@ class RemoteClient:
|
|
|
37
41
|
self.api_key = api_key
|
|
38
42
|
self._rest_client = None
|
|
39
43
|
self._gql_client = None
|
|
44
|
+
self._id_cache: Dict[str, str] = {} # Cache for slug -> ID mappings
|
|
40
45
|
|
|
41
46
|
def _ensure_authenticated(self):
|
|
42
47
|
"""Check if authenticated, raise error if not."""
|
|
@@ -77,6 +82,83 @@ class RemoteClient:
|
|
|
77
82
|
)
|
|
78
83
|
return self._gql_client
|
|
79
84
|
|
|
85
|
+
def _get_project_id(self, project_slug: str) -> Optional[str]:
|
|
86
|
+
"""
|
|
87
|
+
Resolve project ID from slug using GraphQL.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
project_slug: Project slug
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
Project ID (Snowflake ID) if found, None if not found
|
|
94
|
+
When None is returned, the server will auto-create the project
|
|
95
|
+
"""
|
|
96
|
+
cache_key = f"project:{self.namespace}:{project_slug}"
|
|
97
|
+
if cache_key in self._id_cache:
|
|
98
|
+
return self._id_cache[cache_key]
|
|
99
|
+
|
|
100
|
+
query = """
|
|
101
|
+
query GetProject($namespace: String!) {
|
|
102
|
+
namespace(slug: $namespace) {
|
|
103
|
+
projects {
|
|
104
|
+
id
|
|
105
|
+
slug
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
"""
|
|
110
|
+
result = self.graphql_query(query, {
|
|
111
|
+
"namespace": self.namespace
|
|
112
|
+
})
|
|
113
|
+
|
|
114
|
+
namespace_data = result.get("namespace")
|
|
115
|
+
if namespace_data is None:
|
|
116
|
+
raise ValueError(f"Namespace '{self.namespace}' not found. Please check the namespace exists on the server.")
|
|
117
|
+
|
|
118
|
+
projects = namespace_data.get("projects", [])
|
|
119
|
+
for project in projects:
|
|
120
|
+
if project["slug"] == project_slug:
|
|
121
|
+
project_id = project["id"]
|
|
122
|
+
self._id_cache[cache_key] = project_id
|
|
123
|
+
return project_id
|
|
124
|
+
|
|
125
|
+
# Project not found - return None to let server auto-create it
|
|
126
|
+
return None
|
|
127
|
+
|
|
128
|
+
def _get_experiment_node_id(self, experiment_id: str) -> str:
|
|
129
|
+
"""
|
|
130
|
+
Resolve node ID from experiment ID using GraphQL.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
experiment_id: Experiment ID
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
Node ID
|
|
137
|
+
|
|
138
|
+
Raises:
|
|
139
|
+
ValueError: If experiment node not found
|
|
140
|
+
"""
|
|
141
|
+
cache_key = f"exp_node:{experiment_id}"
|
|
142
|
+
if cache_key in self._id_cache:
|
|
143
|
+
return self._id_cache[cache_key]
|
|
144
|
+
|
|
145
|
+
query = """
|
|
146
|
+
query GetExperimentNode($experimentId: ID!) {
|
|
147
|
+
experimentNode(experimentId: $experimentId) {
|
|
148
|
+
id
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
"""
|
|
152
|
+
result = self.graphql_query(query, {"experimentId": experiment_id})
|
|
153
|
+
|
|
154
|
+
node = result.get("experimentNode")
|
|
155
|
+
if not node:
|
|
156
|
+
raise ValueError(f"No node found for experiment ID '{experiment_id}'")
|
|
157
|
+
|
|
158
|
+
node_id = node["id"]
|
|
159
|
+
self._id_cache[cache_key] = node_id
|
|
160
|
+
return node_id
|
|
161
|
+
|
|
80
162
|
def create_or_update_experiment(
|
|
81
163
|
self,
|
|
82
164
|
project: str,
|
|
@@ -89,28 +171,99 @@ class RemoteClient:
|
|
|
89
171
|
metadata: Optional[Dict[str, Any]] = None,
|
|
90
172
|
) -> Dict[str, Any]:
|
|
91
173
|
"""
|
|
92
|
-
Create or update an experiment.
|
|
174
|
+
Create or update an experiment using unified node API.
|
|
93
175
|
|
|
94
176
|
Args:
|
|
95
|
-
project: Project
|
|
96
|
-
name: Experiment name
|
|
177
|
+
project: Project slug
|
|
178
|
+
name: Experiment name
|
|
97
179
|
description: Optional description
|
|
98
180
|
tags: Optional list of tags
|
|
99
181
|
bindrs: Optional list of bindrs
|
|
100
|
-
prefix: Full prefix path
|
|
182
|
+
prefix: Full prefix path (ignored in new API - use folders instead)
|
|
101
183
|
write_protected: If True, experiment becomes immutable
|
|
102
184
|
metadata: Optional metadata dict
|
|
103
185
|
|
|
104
186
|
Returns:
|
|
105
|
-
Response dict with experiment,
|
|
187
|
+
Response dict with experiment, node, and project data
|
|
188
|
+
Note: Project will be auto-created if it doesn't exist
|
|
106
189
|
|
|
107
190
|
Raises:
|
|
108
191
|
httpx.HTTPStatusError: If request fails
|
|
109
192
|
"""
|
|
193
|
+
# Resolve project ID from slug (returns None if not found)
|
|
194
|
+
project_id = self._get_project_id(project)
|
|
195
|
+
|
|
196
|
+
# Parse prefix to create folder hierarchy for experiment
|
|
197
|
+
# prefix format: "namespace/project/folder1/folder2/experiment_name"
|
|
198
|
+
# We need to create folders: folder1 -> folder2 and place experiment under folder2
|
|
199
|
+
parent_id = "ROOT"
|
|
200
|
+
|
|
201
|
+
if prefix:
|
|
202
|
+
# Parse prefix to extract folder path
|
|
203
|
+
parts = prefix.strip('/').split('/')
|
|
204
|
+
# parts: [namespace, project, folder1, folder2, ..., experiment_name]
|
|
205
|
+
|
|
206
|
+
if len(parts) >= 3:
|
|
207
|
+
# We have at least namespace/project/something
|
|
208
|
+
# Extract folder parts (everything between project and experiment name)
|
|
209
|
+
# Skip namespace (parts[0]) and project (parts[1])
|
|
210
|
+
# Skip experiment name (parts[-1])
|
|
211
|
+
folder_parts = parts[2:-1] if len(parts) > 3 else []
|
|
212
|
+
|
|
213
|
+
if folder_parts:
|
|
214
|
+
# Ensure we have a project_id for folder creation
|
|
215
|
+
if not project_id:
|
|
216
|
+
# Create the project first since we need its ID for folders
|
|
217
|
+
project_response = self._client.post(
|
|
218
|
+
f"/namespaces/{self.namespace}/nodes",
|
|
219
|
+
json={
|
|
220
|
+
"type": "PROJECT",
|
|
221
|
+
"name": project,
|
|
222
|
+
"slug": project,
|
|
223
|
+
}
|
|
224
|
+
)
|
|
225
|
+
project_response.raise_for_status()
|
|
226
|
+
project_data = project_response.json()
|
|
227
|
+
project_id = project_data.get("project", {}).get("id")
|
|
228
|
+
|
|
229
|
+
if project_id:
|
|
230
|
+
# Create folder hierarchy
|
|
231
|
+
current_parent_id = "ROOT"
|
|
232
|
+
for folder_name in folder_parts:
|
|
233
|
+
if not folder_name:
|
|
234
|
+
continue
|
|
235
|
+
# Create folder (server handles upsert)
|
|
236
|
+
# NOTE: Do NOT pass experimentId for project-level folders
|
|
237
|
+
folder_response = self._client.post(
|
|
238
|
+
f"/namespaces/{self.namespace}/nodes",
|
|
239
|
+
json={
|
|
240
|
+
"type": "FOLDER",
|
|
241
|
+
"projectId": project_id,
|
|
242
|
+
"parentId": current_parent_id,
|
|
243
|
+
"name": folder_name
|
|
244
|
+
# experimentId intentionally omitted - these are project-level folders
|
|
245
|
+
}
|
|
246
|
+
)
|
|
247
|
+
folder_response.raise_for_status()
|
|
248
|
+
folder_data = folder_response.json()
|
|
249
|
+
current_parent_id = folder_data.get("node", {}).get("id")
|
|
250
|
+
|
|
251
|
+
# Update parent_id for experiment
|
|
252
|
+
parent_id = current_parent_id
|
|
253
|
+
|
|
254
|
+
# Build payload for unified node API
|
|
110
255
|
payload = {
|
|
256
|
+
"type": "EXPERIMENT",
|
|
111
257
|
"name": name,
|
|
258
|
+
"parentId": parent_id,
|
|
112
259
|
}
|
|
113
260
|
|
|
261
|
+
# Send projectId if available, otherwise projectSlug (server will auto-create)
|
|
262
|
+
if project_id:
|
|
263
|
+
payload["projectId"] = project_id
|
|
264
|
+
else:
|
|
265
|
+
payload["projectSlug"] = project
|
|
266
|
+
|
|
114
267
|
if description is not None:
|
|
115
268
|
payload["description"] = description
|
|
116
269
|
if tags is not None:
|
|
@@ -121,15 +274,22 @@ class RemoteClient:
|
|
|
121
274
|
payload["writeProtected"] = write_protected
|
|
122
275
|
if metadata is not None:
|
|
123
276
|
payload["metadata"] = metadata
|
|
124
|
-
if prefix is not None:
|
|
125
|
-
payload["prefix"] = prefix
|
|
126
277
|
|
|
278
|
+
# Call unified node creation API
|
|
127
279
|
response = self._client.post(
|
|
128
|
-
f"/
|
|
280
|
+
f"/namespaces/{self.namespace}/nodes",
|
|
129
281
|
json=payload,
|
|
130
282
|
)
|
|
131
283
|
response.raise_for_status()
|
|
132
|
-
|
|
284
|
+
result = response.json()
|
|
285
|
+
|
|
286
|
+
# Cache the experiment node ID mapping
|
|
287
|
+
if "experiment" in result and "node" in result:
|
|
288
|
+
exp_id = result["experiment"]["id"]
|
|
289
|
+
node_id = result["node"]["id"]
|
|
290
|
+
self._id_cache[f"exp_node:{exp_id}"] = node_id
|
|
291
|
+
|
|
292
|
+
return result
|
|
133
293
|
|
|
134
294
|
def update_experiment_status(
|
|
135
295
|
self,
|
|
@@ -137,24 +297,27 @@ class RemoteClient:
|
|
|
137
297
|
status: str,
|
|
138
298
|
) -> Dict[str, Any]:
|
|
139
299
|
"""
|
|
140
|
-
Update experiment status.
|
|
300
|
+
Update experiment status using unified node API.
|
|
141
301
|
|
|
142
302
|
Args:
|
|
143
303
|
experiment_id: Experiment ID
|
|
144
304
|
status: Status value - "RUNNING" | "COMPLETED" | "FAILED" | "CANCELLED"
|
|
145
305
|
|
|
146
306
|
Returns:
|
|
147
|
-
Response dict with updated
|
|
307
|
+
Response dict with updated node data
|
|
148
308
|
|
|
149
309
|
Raises:
|
|
150
310
|
httpx.HTTPStatusError: If request fails
|
|
311
|
+
ValueError: If experiment node not found
|
|
151
312
|
"""
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
313
|
+
# Resolve node ID from experiment ID
|
|
314
|
+
node_id = self._get_experiment_node_id(experiment_id)
|
|
315
|
+
|
|
316
|
+
# Update node with new status
|
|
317
|
+
payload = {"status": status}
|
|
155
318
|
|
|
156
319
|
response = self._client.patch(
|
|
157
|
-
f"/
|
|
320
|
+
f"/nodes/{node_id}",
|
|
158
321
|
json=payload,
|
|
159
322
|
)
|
|
160
323
|
response.raise_for_status()
|
|
@@ -263,15 +426,20 @@ class RemoteClient:
|
|
|
263
426
|
metadata: Optional[Dict[str, Any]],
|
|
264
427
|
checksum: str,
|
|
265
428
|
content_type: str,
|
|
266
|
-
size_bytes: int
|
|
429
|
+
size_bytes: int,
|
|
430
|
+
project_id: Optional[str] = None,
|
|
431
|
+
parent_id: str = "ROOT"
|
|
267
432
|
) -> Dict[str, Any]:
|
|
268
433
|
"""
|
|
269
|
-
Upload a file to an experiment.
|
|
434
|
+
Upload a file to an experiment using unified node API.
|
|
270
435
|
|
|
271
436
|
Args:
|
|
272
437
|
experiment_id: Experiment ID (Snowflake ID)
|
|
273
438
|
file_path: Local file path
|
|
274
|
-
prefix: Logical path prefix
|
|
439
|
+
prefix: Logical path prefix for folder structure (e.g., "models/checkpoints")
|
|
440
|
+
Will create nested folders automatically. May include namespace/project
|
|
441
|
+
parts which will be stripped automatically (e.g., "ns/proj/folder1/folder2"
|
|
442
|
+
will create folders: folder1 -> folder2)
|
|
275
443
|
filename: Original filename
|
|
276
444
|
description: Optional description
|
|
277
445
|
tags: Optional tags
|
|
@@ -279,23 +447,274 @@ class RemoteClient:
|
|
|
279
447
|
checksum: SHA256 checksum
|
|
280
448
|
content_type: MIME type
|
|
281
449
|
size_bytes: File size in bytes
|
|
450
|
+
project_id: Project ID (optional - will be resolved from experiment if not provided)
|
|
451
|
+
parent_id: Parent node ID (folder) or "ROOT" for root level.
|
|
452
|
+
If prefix is provided, folders will be created under this parent.
|
|
282
453
|
|
|
283
454
|
Returns:
|
|
284
|
-
|
|
455
|
+
Response dict with node and physicalFile data
|
|
285
456
|
|
|
286
457
|
Raises:
|
|
287
458
|
httpx.HTTPStatusError: If request fails
|
|
288
459
|
"""
|
|
460
|
+
# If project_id not provided, need to resolve it from experiment
|
|
461
|
+
# For now, assuming we have it or it will be queried separately
|
|
462
|
+
if project_id is None:
|
|
463
|
+
# Query experiment to get project ID
|
|
464
|
+
query = """
|
|
465
|
+
query GetExperimentProject($experimentId: ID!) {
|
|
466
|
+
experimentById(id: $experimentId) {
|
|
467
|
+
projectId
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
"""
|
|
471
|
+
result = self.graphql_query(query, {"experimentId": experiment_id})
|
|
472
|
+
project_id = result.get("experimentById", {}).get("projectId")
|
|
473
|
+
if not project_id:
|
|
474
|
+
raise ValueError(f"Could not resolve project ID for experiment {experiment_id}")
|
|
475
|
+
|
|
476
|
+
# Resolve experiment node ID (files should be children of the experiment node, not ROOT)
|
|
477
|
+
# Check cache first, otherwise query
|
|
478
|
+
experiment_node_id = self._id_cache.get(f"exp_node:{experiment_id}")
|
|
479
|
+
if not experiment_node_id:
|
|
480
|
+
# Query to get the experiment node ID
|
|
481
|
+
query = """
|
|
482
|
+
query GetExperimentNode($experimentId: ID!) {
|
|
483
|
+
experimentById(id: $experimentId) {
|
|
484
|
+
id
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
"""
|
|
488
|
+
# Note: experimentById returns the Experiment record, not the Node
|
|
489
|
+
# We need to find the Node with type=EXPERIMENT and experimentId=experiment_id
|
|
490
|
+
# Use the project nodes query instead
|
|
491
|
+
query = """
|
|
492
|
+
query GetExperimentNode($projectId: ID!, $experimentId: ID!) {
|
|
493
|
+
project(id: $projectId) {
|
|
494
|
+
nodes(parentId: null, maxDepth: 10) {
|
|
495
|
+
id
|
|
496
|
+
type
|
|
497
|
+
experimentId
|
|
498
|
+
children {
|
|
499
|
+
id
|
|
500
|
+
type
|
|
501
|
+
experimentId
|
|
502
|
+
children {
|
|
503
|
+
id
|
|
504
|
+
type
|
|
505
|
+
experimentId
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
"""
|
|
512
|
+
result = self.graphql_query(query, {"projectId": project_id, "experimentId": experiment_id})
|
|
513
|
+
|
|
514
|
+
# Find the experiment node
|
|
515
|
+
def find_experiment_node(nodes, exp_id):
|
|
516
|
+
for node in nodes:
|
|
517
|
+
if node.get("type") == "EXPERIMENT" and node.get("experimentId") == exp_id:
|
|
518
|
+
return node.get("id")
|
|
519
|
+
if node.get("children"):
|
|
520
|
+
found = find_experiment_node(node["children"], exp_id)
|
|
521
|
+
if found:
|
|
522
|
+
return found
|
|
523
|
+
return None
|
|
524
|
+
|
|
525
|
+
project_nodes = result.get("project", {}).get("nodes", [])
|
|
526
|
+
experiment_node_id = find_experiment_node(project_nodes, experiment_id)
|
|
527
|
+
|
|
528
|
+
if experiment_node_id:
|
|
529
|
+
# Cache it for future uploads
|
|
530
|
+
self._id_cache[f"exp_node:{experiment_id}"] = experiment_node_id
|
|
531
|
+
else:
|
|
532
|
+
# Fallback to ROOT if we can't find the experiment node
|
|
533
|
+
# This might happen for old experiments or legacy data
|
|
534
|
+
experiment_node_id = "ROOT"
|
|
535
|
+
|
|
536
|
+
# Get experiment node path to strip from prefix
|
|
537
|
+
# When we use experiment_node_id as parent, we need to strip the experiment's
|
|
538
|
+
# folder path from the prefix to avoid creating duplicate folders
|
|
539
|
+
# We'll cache this in the id_cache to avoid repeated queries
|
|
540
|
+
cache_key = f"exp_folder_path:{experiment_id}"
|
|
541
|
+
experiment_folder_path = self._id_cache.get(cache_key)
|
|
542
|
+
|
|
543
|
+
if experiment_folder_path is None and experiment_node_id != "ROOT":
|
|
544
|
+
# Query experiment to get its project info for the GraphQL query
|
|
545
|
+
exp_query = """
|
|
546
|
+
query GetExpInfo($experimentId: ID!) {
|
|
547
|
+
experimentById(id: $experimentId) {
|
|
548
|
+
project {
|
|
549
|
+
slug
|
|
550
|
+
namespace {
|
|
551
|
+
slug
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
}
|
|
555
|
+
}
|
|
556
|
+
"""
|
|
557
|
+
exp_result = self.graphql_query(exp_query, {"experimentId": experiment_id})
|
|
558
|
+
project_slug = exp_result.get("experimentById", {}).get("project", {}).get("slug")
|
|
559
|
+
namespace_slug = exp_result.get("experimentById", {}).get("project", {}).get("namespace", {}).get("slug")
|
|
560
|
+
|
|
561
|
+
if project_slug and namespace_slug:
|
|
562
|
+
# Query to get the experiment node's path
|
|
563
|
+
# This includes all ancestor folders up to the experiment
|
|
564
|
+
query = """
|
|
565
|
+
query GetExperimentPath($namespaceSlug: String!, $projectSlug: String!) {
|
|
566
|
+
project(namespaceSlug: $namespaceSlug, projectSlug: $projectSlug) {
|
|
567
|
+
nodes(parentId: null, maxDepth: 10) {
|
|
568
|
+
id
|
|
569
|
+
name
|
|
570
|
+
type
|
|
571
|
+
experimentId
|
|
572
|
+
parentId
|
|
573
|
+
children {
|
|
574
|
+
id
|
|
575
|
+
name
|
|
576
|
+
type
|
|
577
|
+
experimentId
|
|
578
|
+
parentId
|
|
579
|
+
children {
|
|
580
|
+
id
|
|
581
|
+
name
|
|
582
|
+
type
|
|
583
|
+
experimentId
|
|
584
|
+
parentId
|
|
585
|
+
}
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
"""
|
|
591
|
+
result = self.graphql_query(query, {"namespaceSlug": namespace_slug, "projectSlug": project_slug})
|
|
592
|
+
|
|
593
|
+
# Build path to experiment node
|
|
594
|
+
def find_node_path(nodes, target_id, current_path=None):
|
|
595
|
+
if current_path is None:
|
|
596
|
+
current_path = []
|
|
597
|
+
for node in nodes:
|
|
598
|
+
new_path = current_path + [node.get("name")]
|
|
599
|
+
if node.get("id") == target_id:
|
|
600
|
+
return new_path
|
|
601
|
+
if node.get("children"):
|
|
602
|
+
found = find_node_path(node["children"], target_id, new_path)
|
|
603
|
+
if found:
|
|
604
|
+
return found
|
|
605
|
+
return None
|
|
606
|
+
|
|
607
|
+
project_nodes = result.get("project", {}).get("nodes", [])
|
|
608
|
+
path_parts = find_node_path(project_nodes, experiment_node_id)
|
|
609
|
+
if path_parts:
|
|
610
|
+
# IMPORTANT: Don't include the experiment node's name itself
|
|
611
|
+
# We want the path TO the experiment's parent folder, not the experiment
|
|
612
|
+
# E.g., if path is ["examples", "exp-name"], we want "examples"
|
|
613
|
+
if len(path_parts) > 1:
|
|
614
|
+
experiment_folder_path = "/".join(path_parts[:-1])
|
|
615
|
+
else:
|
|
616
|
+
# Experiment is at root level, no parent folders
|
|
617
|
+
experiment_folder_path = ""
|
|
618
|
+
# Cache it
|
|
619
|
+
self._id_cache[cache_key] = experiment_folder_path
|
|
620
|
+
else:
|
|
621
|
+
# Couldn't find path, set empty string to avoid re-querying
|
|
622
|
+
experiment_folder_path = ""
|
|
623
|
+
self._id_cache[cache_key] = experiment_folder_path
|
|
624
|
+
|
|
625
|
+
# Use experiment node ID as the parent for file uploads
|
|
626
|
+
# Files and folders should be children of the experiment node
|
|
627
|
+
if parent_id == "ROOT" and experiment_node_id != "ROOT":
|
|
628
|
+
parent_id = experiment_node_id
|
|
629
|
+
|
|
630
|
+
# Parse prefix to create folder hierarchy
|
|
631
|
+
# prefix like "models/checkpoints" should create folders: models -> checkpoints
|
|
632
|
+
# NOTE: The prefix may contain namespace/project parts (e.g., "ns/proj/folder1/folder2")
|
|
633
|
+
# We need to strip the namespace and project parts since we're already in an experiment context
|
|
634
|
+
if prefix and prefix != '/' and prefix.strip():
|
|
635
|
+
# Clean and normalize prefix
|
|
636
|
+
prefix = prefix.strip('/')
|
|
637
|
+
|
|
638
|
+
# Try to detect and strip namespace/project from prefix
|
|
639
|
+
# Common patterns: "namespace/project/folders..." or just "folders..."
|
|
640
|
+
# Since we're in experiment context, we already know the namespace and project
|
|
641
|
+
# Check if prefix starts with namespace
|
|
642
|
+
if prefix.startswith(self.namespace + '/'):
|
|
643
|
+
# Strip namespace
|
|
644
|
+
prefix = prefix[len(self.namespace) + 1:]
|
|
645
|
+
|
|
646
|
+
# Now check if it starts with project slug/name
|
|
647
|
+
# We need to query the experiment to get the project info
|
|
648
|
+
query = """
|
|
649
|
+
query GetExperimentProject($experimentId: ID!) {
|
|
650
|
+
experimentById(id: $experimentId) {
|
|
651
|
+
project {
|
|
652
|
+
slug
|
|
653
|
+
name
|
|
654
|
+
}
|
|
655
|
+
}
|
|
656
|
+
}
|
|
657
|
+
"""
|
|
658
|
+
exp_result = self.graphql_query(query, {"experimentId": experiment_id})
|
|
659
|
+
project_info = exp_result.get("experimentById", {}).get("project", {})
|
|
660
|
+
project_slug = project_info.get("slug", "")
|
|
661
|
+
project_name = project_info.get("name", "")
|
|
662
|
+
|
|
663
|
+
# Try to strip project slug or name
|
|
664
|
+
if project_slug and prefix.startswith(project_slug + '/'):
|
|
665
|
+
prefix = prefix[len(project_slug) + 1:]
|
|
666
|
+
elif project_name and prefix.startswith(project_name + '/'):
|
|
667
|
+
prefix = prefix[len(project_name) + 1:]
|
|
668
|
+
|
|
669
|
+
# Strip experiment folder path from prefix since we're using experiment node as parent
|
|
670
|
+
# For example: if prefix is "examples/exp1/models" and experiment is at "examples/exp1",
|
|
671
|
+
# strip "examples/exp1/" to get "models"
|
|
672
|
+
if experiment_folder_path and prefix.startswith(experiment_folder_path + '/'):
|
|
673
|
+
prefix = prefix[len(experiment_folder_path) + 1:]
|
|
674
|
+
elif experiment_folder_path and prefix == experiment_folder_path:
|
|
675
|
+
# Prefix is exactly the experiment path, no subfolders
|
|
676
|
+
prefix = ""
|
|
677
|
+
|
|
678
|
+
if prefix:
|
|
679
|
+
folder_parts = prefix.split('/')
|
|
680
|
+
current_parent_id = parent_id
|
|
681
|
+
|
|
682
|
+
# Create or find each folder in the hierarchy
|
|
683
|
+
# Server handles upsert - will return existing folder if it exists
|
|
684
|
+
for folder_name in folder_parts:
|
|
685
|
+
if not folder_name: # Skip empty parts
|
|
686
|
+
continue
|
|
687
|
+
|
|
688
|
+
# Create folder (server will return existing if duplicate)
|
|
689
|
+
folder_response = self._client.post(
|
|
690
|
+
f"/namespaces/{self.namespace}/nodes",
|
|
691
|
+
json={
|
|
692
|
+
"type": "FOLDER",
|
|
693
|
+
"projectId": project_id,
|
|
694
|
+
"experimentId": experiment_id,
|
|
695
|
+
"parentId": current_parent_id,
|
|
696
|
+
"name": folder_name
|
|
697
|
+
}
|
|
698
|
+
)
|
|
699
|
+
folder_response.raise_for_status()
|
|
700
|
+
folder_data = folder_response.json()
|
|
701
|
+
current_parent_id = folder_data.get("node", {}).get("id")
|
|
702
|
+
|
|
703
|
+
# Update parent_id to the final folder in the hierarchy
|
|
704
|
+
parent_id = current_parent_id
|
|
705
|
+
|
|
289
706
|
# Prepare multipart form data
|
|
290
|
-
# Read file content first (httpx needs content, not file handle)
|
|
291
707
|
with open(file_path, "rb") as f:
|
|
292
708
|
file_content = f.read()
|
|
293
709
|
|
|
294
710
|
files = {"file": (filename, file_content, content_type)}
|
|
295
711
|
data = {
|
|
296
|
-
"
|
|
712
|
+
"type": "FILE",
|
|
713
|
+
"projectId": project_id,
|
|
714
|
+
"experimentId": experiment_id,
|
|
715
|
+
"parentId": parent_id,
|
|
716
|
+
"name": filename,
|
|
297
717
|
"checksum": checksum,
|
|
298
|
-
"sizeBytes": str(size_bytes),
|
|
299
718
|
}
|
|
300
719
|
if description:
|
|
301
720
|
data["description"] = description
|
|
@@ -305,15 +724,53 @@ class RemoteClient:
|
|
|
305
724
|
import json
|
|
306
725
|
data["metadata"] = json.dumps(metadata)
|
|
307
726
|
|
|
308
|
-
#
|
|
727
|
+
# Call unified node creation API
|
|
309
728
|
response = self._client.post(
|
|
310
|
-
f"/
|
|
729
|
+
f"/namespaces/{self.namespace}/nodes",
|
|
311
730
|
files=files,
|
|
312
731
|
data=data
|
|
313
732
|
)
|
|
314
733
|
|
|
315
734
|
response.raise_for_status()
|
|
316
|
-
|
|
735
|
+
result = response.json()
|
|
736
|
+
|
|
737
|
+
# Transform unified node response to expected file metadata format
|
|
738
|
+
# The server returns {node: {...}, physicalFile: {...}}
|
|
739
|
+
# We need to flatten it to match the expected format
|
|
740
|
+
node = result.get("node", {})
|
|
741
|
+
physical_file = result.get("physicalFile", {})
|
|
742
|
+
|
|
743
|
+
# Convert BigInt IDs and sizeBytes from string back to appropriate types
|
|
744
|
+
# Node ID should remain as string for consistency
|
|
745
|
+
node_id = node.get("id")
|
|
746
|
+
if isinstance(node_id, (int, float)):
|
|
747
|
+
# If it was deserialized as a number, convert to string to preserve full precision
|
|
748
|
+
node_id = str(int(node_id))
|
|
749
|
+
|
|
750
|
+
size_bytes = physical_file.get("sizeBytes")
|
|
751
|
+
if isinstance(size_bytes, str):
|
|
752
|
+
size_bytes = int(size_bytes)
|
|
753
|
+
|
|
754
|
+
# Use experimentId from node, not the parameter (which might be a path string)
|
|
755
|
+
experiment_id_from_node = node.get("experimentId")
|
|
756
|
+
if isinstance(experiment_id_from_node, (int, float)):
|
|
757
|
+
experiment_id_from_node = str(int(experiment_id_from_node))
|
|
758
|
+
|
|
759
|
+
return {
|
|
760
|
+
"id": node_id,
|
|
761
|
+
"experimentId": experiment_id_from_node or experiment_id,
|
|
762
|
+
"path": prefix, # Use prefix as path for backward compatibility
|
|
763
|
+
"filename": filename,
|
|
764
|
+
"description": node.get("description"),
|
|
765
|
+
"tags": node.get("tags", []),
|
|
766
|
+
"contentType": physical_file.get("contentType"),
|
|
767
|
+
"sizeBytes": size_bytes,
|
|
768
|
+
"checksum": physical_file.get("checksum"),
|
|
769
|
+
"metadata": node.get("metadata"),
|
|
770
|
+
"uploadedAt": node.get("createdAt"),
|
|
771
|
+
"updatedAt": node.get("updatedAt"),
|
|
772
|
+
"deletedAt": node.get("deletedAt"),
|
|
773
|
+
}
|
|
317
774
|
|
|
318
775
|
def list_files(
|
|
319
776
|
self,
|
|
@@ -322,48 +779,72 @@ class RemoteClient:
|
|
|
322
779
|
tags: Optional[List[str]] = None
|
|
323
780
|
) -> List[Dict[str, Any]]:
|
|
324
781
|
"""
|
|
325
|
-
List files in an experiment.
|
|
782
|
+
List files in an experiment using GraphQL.
|
|
326
783
|
|
|
327
784
|
Args:
|
|
328
785
|
experiment_id: Experiment ID (Snowflake ID)
|
|
329
|
-
prefix: Optional prefix filter
|
|
786
|
+
prefix: Optional prefix filter (DEPRECATED - filtering not supported in new API)
|
|
330
787
|
tags: Optional tags filter
|
|
331
788
|
|
|
332
789
|
Returns:
|
|
333
|
-
List of file
|
|
790
|
+
List of file node dicts
|
|
334
791
|
|
|
335
792
|
Raises:
|
|
336
793
|
httpx.HTTPStatusError: If request fails
|
|
337
794
|
"""
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
795
|
+
query = """
|
|
796
|
+
query ListExperimentFiles($experimentId: ID!) {
|
|
797
|
+
experimentById(id: $experimentId) {
|
|
798
|
+
files {
|
|
799
|
+
id
|
|
800
|
+
name
|
|
801
|
+
description
|
|
802
|
+
tags
|
|
803
|
+
metadata
|
|
804
|
+
createdAt
|
|
805
|
+
pPath
|
|
806
|
+
physicalFile {
|
|
807
|
+
id
|
|
808
|
+
filename
|
|
809
|
+
contentType
|
|
810
|
+
sizeBytes
|
|
811
|
+
checksum
|
|
812
|
+
s3Url
|
|
813
|
+
}
|
|
814
|
+
}
|
|
815
|
+
}
|
|
816
|
+
}
|
|
817
|
+
"""
|
|
818
|
+
result = self.graphql_query(query, {"experimentId": experiment_id})
|
|
819
|
+
files = result.get("experimentById", {}).get("files", [])
|
|
820
|
+
|
|
821
|
+
# Apply client-side filtering if tags specified
|
|
341
822
|
if tags:
|
|
342
|
-
|
|
823
|
+
filtered_files = []
|
|
824
|
+
for file in files:
|
|
825
|
+
file_tags = file.get("tags", [])
|
|
826
|
+
if any(tag in file_tags for tag in tags):
|
|
827
|
+
filtered_files.append(file)
|
|
828
|
+
return filtered_files
|
|
343
829
|
|
|
344
|
-
|
|
345
|
-
f"/experiments/{experiment_id}/files",
|
|
346
|
-
params=params
|
|
347
|
-
)
|
|
348
|
-
response.raise_for_status()
|
|
349
|
-
result = response.json()
|
|
350
|
-
return result.get("files", [])
|
|
830
|
+
return files
|
|
351
831
|
|
|
352
832
|
def get_file(self, experiment_id: str, file_id: str) -> Dict[str, Any]:
|
|
353
833
|
"""
|
|
354
|
-
Get file metadata.
|
|
834
|
+
Get file metadata using unified node API.
|
|
355
835
|
|
|
356
836
|
Args:
|
|
357
|
-
experiment_id: Experiment ID (
|
|
358
|
-
file_id: File ID (Snowflake ID)
|
|
837
|
+
experiment_id: Experiment ID (DEPRECATED - not used in new API)
|
|
838
|
+
file_id: File node ID (Snowflake ID)
|
|
359
839
|
|
|
360
840
|
Returns:
|
|
361
|
-
|
|
841
|
+
Node metadata dict
|
|
362
842
|
|
|
363
843
|
Raises:
|
|
364
844
|
httpx.HTTPStatusError: If request fails
|
|
365
845
|
"""
|
|
366
|
-
|
|
846
|
+
# file_id is actually the node ID in the new system
|
|
847
|
+
response = self._client.get(f"/nodes/{file_id}")
|
|
367
848
|
response.raise_for_status()
|
|
368
849
|
return response.json()
|
|
369
850
|
|
|
@@ -374,11 +855,11 @@ class RemoteClient:
|
|
|
374
855
|
dest_path: Optional[str] = None
|
|
375
856
|
) -> str:
|
|
376
857
|
"""
|
|
377
|
-
Download a file
|
|
858
|
+
Download a file using unified node API.
|
|
378
859
|
|
|
379
860
|
Args:
|
|
380
|
-
experiment_id: Experiment ID (
|
|
381
|
-
file_id: File ID (Snowflake ID)
|
|
861
|
+
experiment_id: Experiment ID (DEPRECATED - not used in new API)
|
|
862
|
+
file_id: File node ID (Snowflake ID)
|
|
382
863
|
dest_path: Optional destination path (defaults to original filename)
|
|
383
864
|
|
|
384
865
|
Returns:
|
|
@@ -390,40 +871,39 @@ class RemoteClient:
|
|
|
390
871
|
"""
|
|
391
872
|
# Get file metadata first to get filename and checksum
|
|
392
873
|
file_metadata = self.get_file(experiment_id, file_id)
|
|
393
|
-
filename = file_metadata
|
|
394
|
-
expected_checksum = file_metadata
|
|
874
|
+
filename = file_metadata.get("name") or file_metadata.get("physicalFile", {}).get("filename")
|
|
875
|
+
expected_checksum = file_metadata.get("physicalFile", {}).get("checksum")
|
|
395
876
|
|
|
396
877
|
# Determine destination path
|
|
397
878
|
if dest_path is None:
|
|
398
879
|
dest_path = filename
|
|
399
880
|
|
|
400
|
-
# Download file
|
|
401
|
-
response = self._client.get(
|
|
402
|
-
f"/experiments/{experiment_id}/files/{file_id}/download"
|
|
403
|
-
)
|
|
881
|
+
# Download file using node API
|
|
882
|
+
response = self._client.get(f"/nodes/{file_id}/download")
|
|
404
883
|
response.raise_for_status()
|
|
405
884
|
|
|
406
885
|
# Write to file
|
|
407
886
|
with open(dest_path, "wb") as f:
|
|
408
887
|
f.write(response.content)
|
|
409
888
|
|
|
410
|
-
# Verify checksum
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
889
|
+
# Verify checksum if available
|
|
890
|
+
if expected_checksum:
|
|
891
|
+
from .files import verify_checksum
|
|
892
|
+
if not verify_checksum(dest_path, expected_checksum):
|
|
893
|
+
# Delete corrupted file
|
|
894
|
+
import os
|
|
895
|
+
os.remove(dest_path)
|
|
896
|
+
raise ValueError(f"Checksum verification failed for file {file_id}")
|
|
417
897
|
|
|
418
898
|
return dest_path
|
|
419
899
|
|
|
420
900
|
def delete_file(self, experiment_id: str, file_id: str) -> Dict[str, Any]:
|
|
421
901
|
"""
|
|
422
|
-
Delete a file (soft delete).
|
|
902
|
+
Delete a file using unified node API (soft delete).
|
|
423
903
|
|
|
424
904
|
Args:
|
|
425
|
-
experiment_id: Experiment ID (
|
|
426
|
-
file_id: File ID (Snowflake ID)
|
|
905
|
+
experiment_id: Experiment ID (DEPRECATED - not used in new API)
|
|
906
|
+
file_id: File node ID (Snowflake ID)
|
|
427
907
|
|
|
428
908
|
Returns:
|
|
429
909
|
Dict with id and deletedAt
|
|
@@ -431,7 +911,7 @@ class RemoteClient:
|
|
|
431
911
|
Raises:
|
|
432
912
|
httpx.HTTPStatusError: If request fails
|
|
433
913
|
"""
|
|
434
|
-
response = self._client.delete(f"/
|
|
914
|
+
response = self._client.delete(f"/nodes/{file_id}")
|
|
435
915
|
response.raise_for_status()
|
|
436
916
|
return response.json()
|
|
437
917
|
|
|
@@ -444,17 +924,17 @@ class RemoteClient:
|
|
|
444
924
|
metadata: Optional[Dict[str, Any]] = None
|
|
445
925
|
) -> Dict[str, Any]:
|
|
446
926
|
"""
|
|
447
|
-
Update file metadata.
|
|
927
|
+
Update file metadata using unified node API.
|
|
448
928
|
|
|
449
929
|
Args:
|
|
450
|
-
experiment_id: Experiment ID (
|
|
451
|
-
file_id: File ID (Snowflake ID)
|
|
930
|
+
experiment_id: Experiment ID (DEPRECATED - not used in new API)
|
|
931
|
+
file_id: File node ID (Snowflake ID)
|
|
452
932
|
description: Optional description
|
|
453
933
|
tags: Optional tags
|
|
454
934
|
metadata: Optional metadata
|
|
455
935
|
|
|
456
936
|
Returns:
|
|
457
|
-
Updated
|
|
937
|
+
Updated node metadata dict
|
|
458
938
|
|
|
459
939
|
Raises:
|
|
460
940
|
httpx.HTTPStatusError: If request fails
|
|
@@ -468,7 +948,7 @@ class RemoteClient:
|
|
|
468
948
|
payload["metadata"] = metadata
|
|
469
949
|
|
|
470
950
|
response = self._client.patch(
|
|
471
|
-
f"/
|
|
951
|
+
f"/nodes/{file_id}",
|
|
472
952
|
json=payload
|
|
473
953
|
)
|
|
474
954
|
response.raise_for_status()
|
|
@@ -654,7 +1134,8 @@ class RemoteClient:
|
|
|
654
1134
|
if "errors" in result:
|
|
655
1135
|
raise Exception(f"GraphQL errors: {result['errors']}")
|
|
656
1136
|
|
|
657
|
-
|
|
1137
|
+
# Handle case where data is explicitly null in response
|
|
1138
|
+
return result.get("data") or {}
|
|
658
1139
|
|
|
659
1140
|
def list_projects_graphql(self) -> List[Dict[str, Any]]:
|
|
660
1141
|
"""
|
|
@@ -905,11 +1386,11 @@ class RemoteClient:
|
|
|
905
1386
|
self, experiment_id: str, file_id: str, dest_path: str
|
|
906
1387
|
) -> str:
|
|
907
1388
|
"""
|
|
908
|
-
Download a file with streaming for large files.
|
|
1389
|
+
Download a file with streaming for large files using unified node API.
|
|
909
1390
|
|
|
910
1391
|
Args:
|
|
911
|
-
experiment_id: Experiment ID (
|
|
912
|
-
file_id: File ID (Snowflake ID)
|
|
1392
|
+
experiment_id: Experiment ID (DEPRECATED - not used in new API)
|
|
1393
|
+
file_id: File node ID (Snowflake ID)
|
|
913
1394
|
dest_path: Destination path to save file
|
|
914
1395
|
|
|
915
1396
|
Returns:
|
|
@@ -921,22 +1402,23 @@ class RemoteClient:
|
|
|
921
1402
|
"""
|
|
922
1403
|
# Get metadata first for checksum
|
|
923
1404
|
file_metadata = self.get_file(experiment_id, file_id)
|
|
924
|
-
expected_checksum = file_metadata
|
|
1405
|
+
expected_checksum = file_metadata.get("physicalFile", {}).get("checksum")
|
|
925
1406
|
|
|
926
|
-
# Stream download
|
|
927
|
-
with self._client.stream("GET", f"/
|
|
1407
|
+
# Stream download using node API
|
|
1408
|
+
with self._client.stream("GET", f"/nodes/{file_id}/download") as response:
|
|
928
1409
|
response.raise_for_status()
|
|
929
1410
|
|
|
930
1411
|
with open(dest_path, "wb") as f:
|
|
931
1412
|
for chunk in response.iter_bytes(chunk_size=8192):
|
|
932
1413
|
f.write(chunk)
|
|
933
1414
|
|
|
934
|
-
# Verify checksum
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
1415
|
+
# Verify checksum if available
|
|
1416
|
+
if expected_checksum:
|
|
1417
|
+
from .files import verify_checksum
|
|
1418
|
+
if not verify_checksum(dest_path, expected_checksum):
|
|
1419
|
+
import os
|
|
1420
|
+
os.remove(dest_path)
|
|
1421
|
+
raise ValueError(f"Checksum verification failed for file {file_id}")
|
|
940
1422
|
|
|
941
1423
|
return dest_path
|
|
942
1424
|
|
ml_dash/experiment.py
CHANGED
|
@@ -350,7 +350,7 @@ class Experiment:
|
|
|
350
350
|
# RemoteClient will auto-load token from ~/.dash/token.enc
|
|
351
351
|
# Use RUN.api_url if dash_url=True (boolean), otherwise use the provided URL
|
|
352
352
|
api_url = RUN.api_url if dash_url is True else dash_url
|
|
353
|
-
self._client = RemoteClient(base_url=api_url)
|
|
353
|
+
self._client = RemoteClient(base_url=api_url, namespace=self.owner)
|
|
354
354
|
|
|
355
355
|
if self.mode in (OperationMode.LOCAL, OperationMode.HYBRID):
|
|
356
356
|
self._storage = LocalStorage(root_path=Path(dash_root))
|
|
@@ -703,22 +703,40 @@ class Experiment:
|
|
|
703
703
|
# Write immediately (no buffering)
|
|
704
704
|
if self._client:
|
|
705
705
|
# Remote mode: send to API (wrapped in array for batch API)
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
706
|
+
try:
|
|
707
|
+
self._client.create_log_entries(
|
|
708
|
+
experiment_id=self._experiment_id,
|
|
709
|
+
logs=[log_entry], # Single log in array
|
|
710
|
+
)
|
|
711
|
+
except Exception as e:
|
|
712
|
+
# Log warning but don't crash training
|
|
713
|
+
import warnings
|
|
714
|
+
warnings.warn(
|
|
715
|
+
f"Failed to write log to remote server: {e}. Training will continue.",
|
|
716
|
+
RuntimeWarning,
|
|
717
|
+
stacklevel=4
|
|
718
|
+
)
|
|
719
|
+
# Fall through to local storage if available
|
|
710
720
|
|
|
711
721
|
if self._storage:
|
|
712
722
|
# Local mode: write to file immediately
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
723
|
+
try:
|
|
724
|
+
self._storage.write_log(
|
|
725
|
+
owner=self.owner,
|
|
726
|
+
project=self.project,
|
|
727
|
+
prefix=self._folder_path,
|
|
728
|
+
message=log_entry["message"],
|
|
729
|
+
level=log_entry["level"],
|
|
730
|
+
metadata=log_entry.get("metadata"),
|
|
731
|
+
timestamp=log_entry["timestamp"],
|
|
732
|
+
)
|
|
733
|
+
except Exception as e:
|
|
734
|
+
import warnings
|
|
735
|
+
warnings.warn(
|
|
736
|
+
f"Failed to write log to local storage: {e}",
|
|
737
|
+
RuntimeWarning,
|
|
738
|
+
stacklevel=4
|
|
739
|
+
)
|
|
722
740
|
|
|
723
741
|
def _print_log(
|
|
724
742
|
self, message: str, level: str, metadata: Optional[Dict[str, Any]]
|
|
@@ -1139,7 +1157,7 @@ class Experiment:
|
|
|
1139
1157
|
description: Optional[str],
|
|
1140
1158
|
tags: Optional[List[str]],
|
|
1141
1159
|
metadata: Optional[Dict[str, Any]],
|
|
1142
|
-
) -> Dict[str, Any]:
|
|
1160
|
+
) -> Optional[Dict[str, Any]]:
|
|
1143
1161
|
"""
|
|
1144
1162
|
Internal method to append a single data point to a metric.
|
|
1145
1163
|
|
|
@@ -1151,33 +1169,54 @@ class Experiment:
|
|
|
1151
1169
|
metadata: Optional metadata
|
|
1152
1170
|
|
|
1153
1171
|
Returns:
|
|
1154
|
-
Dict with metricId, index, bufferedDataPoints, chunkSize
|
|
1172
|
+
Dict with metricId, index, bufferedDataPoints, chunkSize or None if all backends fail
|
|
1155
1173
|
"""
|
|
1156
1174
|
result = None
|
|
1157
1175
|
|
|
1158
1176
|
if self._client:
|
|
1159
1177
|
# Remote mode: append via API
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1178
|
+
try:
|
|
1179
|
+
result = self._client.append_to_metric(
|
|
1180
|
+
experiment_id=self._experiment_id,
|
|
1181
|
+
metric_name=name,
|
|
1182
|
+
data=data,
|
|
1183
|
+
description=description,
|
|
1184
|
+
tags=tags,
|
|
1185
|
+
metadata=metadata,
|
|
1186
|
+
)
|
|
1187
|
+
except Exception as e:
|
|
1188
|
+
# Log warning but don't crash training
|
|
1189
|
+
import warnings
|
|
1190
|
+
metric_display = f"'{name}'" if name else "unnamed metric"
|
|
1191
|
+
warnings.warn(
|
|
1192
|
+
f"Failed to log {metric_display} to remote server: {e}. "
|
|
1193
|
+
f"Training will continue.",
|
|
1194
|
+
RuntimeWarning,
|
|
1195
|
+
stacklevel=3
|
|
1196
|
+
)
|
|
1197
|
+
# Fall through to local storage if available
|
|
1168
1198
|
|
|
1169
1199
|
if self._storage:
|
|
1170
1200
|
# Local mode: append to local storage
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
|
|
1180
|
-
|
|
1201
|
+
try:
|
|
1202
|
+
result = self._storage.append_to_metric(
|
|
1203
|
+
owner=self.owner,
|
|
1204
|
+
project=self.project,
|
|
1205
|
+
prefix=self._folder_path,
|
|
1206
|
+
metric_name=name,
|
|
1207
|
+
data=data,
|
|
1208
|
+
description=description,
|
|
1209
|
+
tags=tags,
|
|
1210
|
+
metadata=metadata,
|
|
1211
|
+
)
|
|
1212
|
+
except Exception as e:
|
|
1213
|
+
import warnings
|
|
1214
|
+
metric_display = f"'{name}'" if name else "unnamed metric"
|
|
1215
|
+
warnings.warn(
|
|
1216
|
+
f"Failed to log {metric_display} to local storage: {e}",
|
|
1217
|
+
RuntimeWarning,
|
|
1218
|
+
stacklevel=3
|
|
1219
|
+
)
|
|
1181
1220
|
|
|
1182
1221
|
return result
|
|
1183
1222
|
|
|
@@ -1188,7 +1227,7 @@ class Experiment:
|
|
|
1188
1227
|
description: Optional[str],
|
|
1189
1228
|
tags: Optional[List[str]],
|
|
1190
1229
|
metadata: Optional[Dict[str, Any]],
|
|
1191
|
-
) -> Dict[str, Any]:
|
|
1230
|
+
) -> Optional[Dict[str, Any]]:
|
|
1192
1231
|
"""
|
|
1193
1232
|
Internal method to append multiple data points to a metric.
|
|
1194
1233
|
|
|
@@ -1200,33 +1239,54 @@ class Experiment:
|
|
|
1200
1239
|
metadata: Optional metadata
|
|
1201
1240
|
|
|
1202
1241
|
Returns:
|
|
1203
|
-
Dict with metricId, startIndex, endIndex, count
|
|
1242
|
+
Dict with metricId, startIndex, endIndex, count or None if all backends fail
|
|
1204
1243
|
"""
|
|
1205
1244
|
result = None
|
|
1206
1245
|
|
|
1207
1246
|
if self._client:
|
|
1208
1247
|
# Remote mode: append batch via API
|
|
1209
|
-
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1248
|
+
try:
|
|
1249
|
+
result = self._client.append_batch_to_metric(
|
|
1250
|
+
experiment_id=self._experiment_id,
|
|
1251
|
+
metric_name=name,
|
|
1252
|
+
data_points=data_points,
|
|
1253
|
+
description=description,
|
|
1254
|
+
tags=tags,
|
|
1255
|
+
metadata=metadata,
|
|
1256
|
+
)
|
|
1257
|
+
except Exception as e:
|
|
1258
|
+
# Log warning but don't crash training
|
|
1259
|
+
import warnings
|
|
1260
|
+
metric_display = f"'{name}'" if name else "unnamed metric"
|
|
1261
|
+
warnings.warn(
|
|
1262
|
+
f"Failed to log batch to {metric_display} on remote server: {e}. "
|
|
1263
|
+
f"Training will continue.",
|
|
1264
|
+
RuntimeWarning,
|
|
1265
|
+
stacklevel=3
|
|
1266
|
+
)
|
|
1267
|
+
# Fall through to local storage if available
|
|
1217
1268
|
|
|
1218
1269
|
if self._storage:
|
|
1219
1270
|
# Local mode: append batch to local storage
|
|
1220
|
-
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1271
|
+
try:
|
|
1272
|
+
result = self._storage.append_batch_to_metric(
|
|
1273
|
+
owner=self.owner,
|
|
1274
|
+
project=self.project,
|
|
1275
|
+
prefix=self._folder_path,
|
|
1276
|
+
metric_name=name,
|
|
1277
|
+
data_points=data_points,
|
|
1278
|
+
description=description,
|
|
1279
|
+
tags=tags,
|
|
1280
|
+
metadata=metadata,
|
|
1281
|
+
)
|
|
1282
|
+
except Exception as e:
|
|
1283
|
+
import warnings
|
|
1284
|
+
metric_display = f"'{name}'" if name else "unnamed metric"
|
|
1285
|
+
warnings.warn(
|
|
1286
|
+
f"Failed to log batch to {metric_display} in local storage: {e}",
|
|
1287
|
+
RuntimeWarning,
|
|
1288
|
+
stacklevel=3
|
|
1289
|
+
)
|
|
1230
1290
|
|
|
1231
1291
|
return result
|
|
1232
1292
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
ml_dash/__init__.py,sha256=
|
|
1
|
+
ml_dash/__init__.py,sha256=XJym_-vgqFVwyAD-VsPZF9WWepTZ4w-Lwui5ns1gmJI,1583
|
|
2
2
|
ml_dash/auth/__init__.py,sha256=3lwM-Y8UBHPU1gFW2JNpmXlPVTnkGudWLKNFFKulQfo,1200
|
|
3
3
|
ml_dash/auth/constants.py,sha256=ku4QzQUMNjvyJwjy7AUdywMAZd59jXSxNHZxDiagUWU,280
|
|
4
4
|
ml_dash/auth/device_flow.py,sha256=DQOdPNlZCuU1umZOA_A6WXdRM3zWphnyo9IntToBl_A,7921
|
|
@@ -8,16 +8,16 @@ ml_dash/auth/token_storage.py,sha256=L18W8J7D1LlCDlY3Q32l0RXeNh0o7YVDQeeGYm64Dgw
|
|
|
8
8
|
ml_dash/auto_start.py,sha256=62_eZG1qBNAwu6AXduTSo4niCVZ27X52ZK0WEr3yS1o,1812
|
|
9
9
|
ml_dash/cli.py,sha256=BoaBulcqnM88XuV5BQEx_-AQAXJAYSJqpvnHggEII_I,2559
|
|
10
10
|
ml_dash/cli_commands/__init__.py,sha256=bjAmV7MsW-bhtW_4SnLJ0Cfkt9h82vMDC8ebW1Ke8KE,38
|
|
11
|
-
ml_dash/cli_commands/api.py,sha256=
|
|
12
|
-
ml_dash/cli_commands/download.py,sha256=
|
|
13
|
-
ml_dash/cli_commands/list.py,sha256=
|
|
11
|
+
ml_dash/cli_commands/api.py,sha256=NekZEJGWNpIfB6YrsrOw7kw7rZKjVudwgJWPZIy6ANQ,4535
|
|
12
|
+
ml_dash/cli_commands/download.py,sha256=LeZXjQSEPIxZALuo90fj8RHjFWIbtGPE0F625sD3cU8,28054
|
|
13
|
+
ml_dash/cli_commands/list.py,sha256=oc_yJXFhsvGgr3JedG2j7747yX69Qc546geIi4DQ54k,16129
|
|
14
14
|
ml_dash/cli_commands/login.py,sha256=zX-urtUrfzg2qOGtKNYQgj6UloN9kzj4zEO6h_xwuNs,6782
|
|
15
15
|
ml_dash/cli_commands/logout.py,sha256=lTUUNyRXqvo61qNkCd4KBrPUujDAHnNqsHkU6bHie0U,1332
|
|
16
16
|
ml_dash/cli_commands/profile.py,sha256=BaSM6BAN3YM4tw95iKV_nypKZxwsB3PoAAejQcYip5E,2351
|
|
17
|
-
ml_dash/cli_commands/upload.py,sha256=
|
|
18
|
-
ml_dash/client.py,sha256=
|
|
17
|
+
ml_dash/cli_commands/upload.py,sha256=_607CcGjvjnwTgGzyxHaDG0qDAlSLlpZDoq6Sy-3paQ,44828
|
|
18
|
+
ml_dash/client.py,sha256=0j4mgr7u9MPIkBL7LR4EqxYYGkFDOSMY6KizF2aNSGA,52848
|
|
19
19
|
ml_dash/config.py,sha256=oz2xvoBh2X_xUXWr92cPD5nFxXMT5LxVNypv5B5O0fA,3116
|
|
20
|
-
ml_dash/experiment.py,sha256=
|
|
20
|
+
ml_dash/experiment.py,sha256=1uDCKNDlgGkKoogao3sEFz1sUhmiRvX3ZPGoQ7H3ozE,41361
|
|
21
21
|
ml_dash/files.py,sha256=bihUHKpdknytLGuGgkcvhh585nziZrvYjiHl6rHnoD0,49227
|
|
22
22
|
ml_dash/log.py,sha256=E-DLg0vejVLLEyShJ_r0LneDMI0XU7XTH5iKWYJe9jI,5298
|
|
23
23
|
ml_dash/metric.py,sha256=ghD1jnuv6dbjV1Jlo7q0mx9UEzpdto2Y1-oDWrSfg04,25809
|
|
@@ -27,7 +27,7 @@ ml_dash/remote_auto_start.py,sha256=5fvQDHv1CWEKFb6WAa5_uyEInwV_SvotXjOO_6i6ZKE,
|
|
|
27
27
|
ml_dash/run.py,sha256=C0quTLZXKDAlwstzEiJ75CWCX1pwYrmtMZH3z-ia6Pw,6310
|
|
28
28
|
ml_dash/snowflake.py,sha256=14rEpRU5YltsmmmZW0EMUy_hdv5S5ME9gWVtmdmwfiU,4917
|
|
29
29
|
ml_dash/storage.py,sha256=9mG42pvvWkkracbjCr9Xdp890Nm4XSxL7_JeFbBe28g,33020
|
|
30
|
-
ml_dash-0.6.
|
|
31
|
-
ml_dash-0.6.
|
|
32
|
-
ml_dash-0.6.
|
|
33
|
-
ml_dash-0.6.
|
|
30
|
+
ml_dash-0.6.5.dist-info/WHEEL,sha256=z-mOpxbJHqy3cq6SvUThBZdaLGFZzdZPtgWLcP2NKjQ,79
|
|
31
|
+
ml_dash-0.6.5.dist-info/entry_points.txt,sha256=dYs2EHX1uRNO7AQGNnVaJJpgiy0Z9q7tiy4fHSyaf3Q,46
|
|
32
|
+
ml_dash-0.6.5.dist-info/METADATA,sha256=a9v8BibbvXdWfXDTTGWVsLWWTA6iE2DMMwUOZjYH-J8,7203
|
|
33
|
+
ml_dash-0.6.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|