mlops-python-sdk 0.0.1__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mlops/__init__.py +3 -3
- mlops/api/client/api/storage/__init__.py +1 -0
- mlops/api/client/api/storage/get_storage_presign_download.py +175 -0
- mlops/api/client/api/storage/get_storage_presign_upload.py +175 -0
- mlops/api/client/api/tasks/cancel_task.py +14 -14
- mlops/api/client/api/tasks/delete_task.py +14 -14
- mlops/api/client/api/tasks/get_task.py +15 -15
- mlops/api/client/api/tasks/get_task_by_task_id.py +204 -0
- mlops/api/client/api/tasks/get_task_logs.py +300 -0
- mlops/api/client/api/tasks/list_tasks.py +14 -14
- mlops/api/client/models/__init__.py +22 -0
- mlops/api/client/models/get_storage_presign_download_response_200.py +60 -0
- mlops/api/client/models/get_storage_presign_upload_response_200.py +79 -0
- mlops/api/client/models/get_task_logs_direction.py +9 -0
- mlops/api/client/models/get_task_logs_log_type.py +10 -0
- mlops/api/client/models/job_spec.py +273 -0
- mlops/api/client/models/job_spec_env.py +44 -0
- mlops/api/client/models/job_spec_master_strategy.py +8 -0
- mlops/api/client/models/log_pagination.py +90 -0
- mlops/api/client/models/task_log_entry.py +105 -0
- mlops/api/client/models/task_log_entry_log_type.py +9 -0
- mlops/api/client/models/task_logs_response.py +112 -0
- mlops/api/client/models/task_submit_request.py +24 -6
- mlops/connection_config.py +4 -11
- mlops/exceptions.py +10 -10
- mlops/task/__init__.py +1 -1
- mlops/task/client.py +11 -35
- mlops/task/task.py +186 -40
- {mlops_python_sdk-0.0.1.dist-info → mlops_python_sdk-1.0.1.dist-info}/METADATA +21 -30
- mlops_python_sdk-1.0.1.dist-info/RECORD +52 -0
- mlops_python_sdk-0.0.1.dist-info/RECORD +0 -36
- {mlops_python_sdk-0.0.1.dist-info → mlops_python_sdk-1.0.1.dist-info}/WHEEL +0 -0
mlops/task/task.py
CHANGED
|
@@ -1,12 +1,17 @@
|
|
|
1
1
|
"""
|
|
2
|
-
High-level Task SDK interface for
|
|
2
|
+
High-level Task SDK interface for MLOps.
|
|
3
3
|
|
|
4
|
-
This module provides a convenient interface for managing tasks through the
|
|
5
|
-
"""
|
|
4
|
+
This module provides a convenient interface for managing tasks through the MLOps API.
|
|
5
|
+
"""
|
|
6
6
|
|
|
7
7
|
import json
|
|
8
|
+
import os
|
|
8
9
|
from http import HTTPStatus
|
|
10
|
+
from pathlib import Path
|
|
9
11
|
from typing import Optional
|
|
12
|
+
|
|
13
|
+
import httpx
|
|
14
|
+
|
|
10
15
|
from ..api.client.api.tasks import (
|
|
11
16
|
submit_task,
|
|
12
17
|
get_task,
|
|
@@ -14,8 +19,15 @@ from ..api.client.api.tasks import (
|
|
|
14
19
|
cancel_task,
|
|
15
20
|
delete_task,
|
|
16
21
|
)
|
|
22
|
+
from ..api.client.api.storage import (
|
|
23
|
+
get_storage_presign_upload,
|
|
24
|
+
get_storage_presign_download,
|
|
25
|
+
)
|
|
17
26
|
from ..api.client.models.task import Task as TaskModel
|
|
18
27
|
from ..api.client.models.task_submit_request import TaskSubmitRequest
|
|
28
|
+
from ..api.client.models.task_submit_request_environment_type_0 import (
|
|
29
|
+
TaskSubmitRequestEnvironmentType0,
|
|
30
|
+
)
|
|
19
31
|
from ..api.client.models.task_submit_response import TaskSubmitResponse
|
|
20
32
|
from ..api.client.models.task_list_response import TaskListResponse
|
|
21
33
|
from ..api.client.models.task_status import TaskStatus
|
|
@@ -29,13 +41,46 @@ from ..exceptions import (
|
|
|
29
41
|
from .client import TaskClient, handle_api_exception
|
|
30
42
|
|
|
31
43
|
|
|
44
|
+
def _validate_archive_file_path(file_path: str) -> Path:
|
|
45
|
+
p = Path(os.path.expanduser(file_path)).resolve()
|
|
46
|
+
if not p.exists():
|
|
47
|
+
raise APIException(f"File not found: {p}")
|
|
48
|
+
if not p.is_file():
|
|
49
|
+
raise APIException(f"file_path must be a file: {p}")
|
|
50
|
+
|
|
51
|
+
lower = p.name.lower()
|
|
52
|
+
if not (lower.endswith(".zip") or lower.endswith(".tar.gz") or lower.endswith(".tgz")):
|
|
53
|
+
raise APIException(f"file_path must be one of .zip, .tar.gz, .tgz: {p}")
|
|
54
|
+
return p
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _upload_file_to_presigned_url(url: str, file_path: Path, timeout: Optional[float]) -> None:
|
|
58
|
+
size = file_path.stat().st_size
|
|
59
|
+
# Use a dedicated client for S3 presigned upload (avoid leaking API auth headers).
|
|
60
|
+
with httpx.Client(timeout=timeout) as client:
|
|
61
|
+
with file_path.open("rb") as f:
|
|
62
|
+
resp = client.put(
|
|
63
|
+
url,
|
|
64
|
+
content=f,
|
|
65
|
+
headers={
|
|
66
|
+
"Content-Length": str(size),
|
|
67
|
+
"Content-Type": "application/octet-stream",
|
|
68
|
+
},
|
|
69
|
+
)
|
|
70
|
+
if resp.status_code < 200 or resp.status_code >= 300:
|
|
71
|
+
body = (resp.text or "")[:2048]
|
|
72
|
+
raise APIException(
|
|
73
|
+
f"Failed to upload file to presigned url: HTTP {resp.status_code}: {body}"
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
|
|
32
77
|
class Task:
|
|
33
78
|
"""
|
|
34
79
|
High-level interface for managing tasks.
|
|
35
80
|
|
|
36
81
|
Example:
|
|
37
82
|
```python
|
|
38
|
-
from
|
|
83
|
+
from mlops import Task, ConnectionConfig
|
|
39
84
|
|
|
40
85
|
config = ConnectionConfig(api_key="your_api_key")
|
|
41
86
|
task = Task(config=config)
|
|
@@ -43,28 +88,28 @@ class Task:
|
|
|
43
88
|
# Submit a task with script
|
|
44
89
|
result = task.submit(
|
|
45
90
|
name="my-task",
|
|
46
|
-
|
|
91
|
+
cluster_name="slurm-cn",
|
|
47
92
|
script="#!/bin/bash\\necho 'Hello World'"
|
|
48
93
|
)
|
|
49
94
|
|
|
50
95
|
# Or submit with command
|
|
51
96
|
result = task.submit(
|
|
52
97
|
name="my-task",
|
|
53
|
-
|
|
98
|
+
cluster_name="slurm-cn",
|
|
54
99
|
command="echo 'Hello World'"
|
|
55
100
|
)
|
|
56
101
|
|
|
57
102
|
# Get task details
|
|
58
|
-
task_info = task.get(task_id=result.job_id,
|
|
103
|
+
task_info = task.get(task_id=result.job_id, cluster_name="slurm-cn")
|
|
59
104
|
|
|
60
105
|
# List tasks
|
|
61
106
|
tasks = task.list(status=TaskStatus.RUNNING)
|
|
62
107
|
|
|
63
108
|
# Cancel a task
|
|
64
|
-
task.cancel(task_id=result.job_id,
|
|
109
|
+
task.cancel(task_id=result.job_id, cluster_name="slurm-cn")
|
|
65
110
|
|
|
66
111
|
# Delete a task
|
|
67
|
-
task.delete(task_id=result.job_id,
|
|
112
|
+
task.delete(task_id=result.job_id, cluster_name="slurm-cn")
|
|
68
113
|
```
|
|
69
114
|
"""
|
|
70
115
|
|
|
@@ -72,7 +117,6 @@ class Task:
|
|
|
72
117
|
self,
|
|
73
118
|
config: Optional["ConnectionConfig"] = None,
|
|
74
119
|
api_key: Optional[str] = None,
|
|
75
|
-
access_token: Optional[str] = None,
|
|
76
120
|
domain: Optional[str] = None,
|
|
77
121
|
debug: Optional[bool] = None,
|
|
78
122
|
request_timeout: Optional[float] = None,
|
|
@@ -83,7 +127,6 @@ class Task:
|
|
|
83
127
|
Args:
|
|
84
128
|
config: ConnectionConfig instance. If not provided, a new one will be created.
|
|
85
129
|
api_key: API key for authentication. Overrides config.api_key.
|
|
86
|
-
access_token: Access token for authentication. Overrides config.access_token.
|
|
87
130
|
domain: API domain. Overrides config.domain.
|
|
88
131
|
debug: Enable debug mode. Overrides config.debug.
|
|
89
132
|
request_timeout: Request timeout in seconds. Overrides config.request_timeout.
|
|
@@ -95,8 +138,6 @@ class Task:
|
|
|
95
138
|
# Override config values if provided
|
|
96
139
|
if api_key is not None:
|
|
97
140
|
config.api_key = api_key
|
|
98
|
-
if access_token is not None:
|
|
99
|
-
config.access_token = access_token
|
|
100
141
|
if domain is not None:
|
|
101
142
|
config.domain = domain
|
|
102
143
|
if debug is not None:
|
|
@@ -106,22 +147,22 @@ class Task:
|
|
|
106
147
|
|
|
107
148
|
self._config = config
|
|
108
149
|
self._client = TaskClient(config=config)
|
|
109
|
-
|
|
110
150
|
def submit(
|
|
111
151
|
self,
|
|
112
152
|
name: str,
|
|
113
|
-
|
|
153
|
+
cluster_name: str,
|
|
114
154
|
script: Optional[str] = None,
|
|
115
155
|
command: Optional[str] = None,
|
|
116
156
|
resources: Optional[dict] = None,
|
|
117
157
|
team_id: Optional[int] = None,
|
|
158
|
+
file_path: Optional[str] = None,
|
|
118
159
|
) -> TaskSubmitResponse:
|
|
119
160
|
"""
|
|
120
161
|
Submit a new task.
|
|
121
162
|
|
|
122
163
|
Args:
|
|
123
164
|
name: Task name
|
|
124
|
-
|
|
165
|
+
cluster_name: Cluster name to submit the task to
|
|
125
166
|
script: Task script content (optional, but at least one of script or command is required)
|
|
126
167
|
command: Command to execute (optional, but at least one of script or command is required)
|
|
127
168
|
resources: Resource requirements dict (optional)
|
|
@@ -134,19 +175,16 @@ class Task:
|
|
|
134
175
|
APIException: If the API returns an error
|
|
135
176
|
AuthenticationException: If authentication fails
|
|
136
177
|
"""
|
|
137
|
-
# Validate required fields
|
|
138
|
-
if cluster_id is None:
|
|
139
|
-
raise APIException("cluster_id is required")
|
|
140
|
-
|
|
141
178
|
# At least one of script or command must be provided
|
|
142
179
|
if not script and not command:
|
|
143
180
|
raise APIException("At least one of 'script' or 'command' must be provided")
|
|
144
181
|
|
|
145
182
|
# Map resources dict to individual fields
|
|
146
183
|
# resources dict can contain: cpu, cpus_per_task, memory, nodes, gres, time, partition, etc.
|
|
184
|
+
|
|
147
185
|
request_kwargs = {
|
|
148
186
|
"name": name,
|
|
149
|
-
"
|
|
187
|
+
"cluster_name": cluster_name,
|
|
150
188
|
}
|
|
151
189
|
|
|
152
190
|
# Handle script and command (at least one is required)
|
|
@@ -177,7 +215,87 @@ class Task:
|
|
|
177
215
|
request_kwargs["partition"] = resources.get("partition")
|
|
178
216
|
if "tres" in resources:
|
|
179
217
|
request_kwargs["tres"] = resources.get("tres")
|
|
180
|
-
|
|
218
|
+
|
|
219
|
+
if file_path:
|
|
220
|
+
local_path = _validate_archive_file_path(file_path)
|
|
221
|
+
timeout = self._config.get_request_timeout()
|
|
222
|
+
|
|
223
|
+
# 1) Get presigned upload URL
|
|
224
|
+
presign_upload_obj = get_storage_presign_upload.sync_detailed(
|
|
225
|
+
client=self._client,
|
|
226
|
+
filename=local_path.name,
|
|
227
|
+
)
|
|
228
|
+
presign_upload = presign_upload_obj.parsed
|
|
229
|
+
if isinstance(presign_upload, ErrorResponse):
|
|
230
|
+
status_code = (
|
|
231
|
+
presign_upload.code
|
|
232
|
+
if presign_upload.code != UNSET and presign_upload.code != 0
|
|
233
|
+
else presign_upload_obj.status_code.value
|
|
234
|
+
)
|
|
235
|
+
exception = handle_api_exception(
|
|
236
|
+
Response(
|
|
237
|
+
status_code=HTTPStatus(status_code),
|
|
238
|
+
content=presign_upload_obj.content,
|
|
239
|
+
headers=presign_upload_obj.headers,
|
|
240
|
+
parsed=None,
|
|
241
|
+
)
|
|
242
|
+
)
|
|
243
|
+
raise exception
|
|
244
|
+
|
|
245
|
+
if (
|
|
246
|
+
presign_upload is None
|
|
247
|
+
or presign_upload.url in (UNSET, None)
|
|
248
|
+
or presign_upload.key in (UNSET, None)
|
|
249
|
+
):
|
|
250
|
+
raise APIException("Failed to get presigned upload url: empty response")
|
|
251
|
+
|
|
252
|
+
# 2) Upload file to S3 (presigned URL)
|
|
253
|
+
_upload_file_to_presigned_url(
|
|
254
|
+
url=str(presign_upload.url),
|
|
255
|
+
file_path=local_path,
|
|
256
|
+
timeout=timeout,
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
# 3) Get presigned download URL
|
|
260
|
+
presign_download_obj = get_storage_presign_download.sync_detailed(
|
|
261
|
+
client=self._client,
|
|
262
|
+
key=str(presign_upload.key),
|
|
263
|
+
)
|
|
264
|
+
presign_download = presign_download_obj.parsed
|
|
265
|
+
if isinstance(presign_download, ErrorResponse):
|
|
266
|
+
status_code = (
|
|
267
|
+
presign_download.code
|
|
268
|
+
if presign_download.code != UNSET and presign_download.code != 0
|
|
269
|
+
else presign_download_obj.status_code.value
|
|
270
|
+
)
|
|
271
|
+
exception = handle_api_exception(
|
|
272
|
+
Response(
|
|
273
|
+
status_code=HTTPStatus(status_code),
|
|
274
|
+
content=presign_download_obj.content,
|
|
275
|
+
headers=presign_download_obj.headers,
|
|
276
|
+
parsed=None,
|
|
277
|
+
)
|
|
278
|
+
)
|
|
279
|
+
raise exception
|
|
280
|
+
|
|
281
|
+
if presign_download is None or presign_download.url in (UNSET, None):
|
|
282
|
+
raise APIException(
|
|
283
|
+
"Failed to get presigned download url: empty response"
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
# 4) Set env var (merge if user already provided environment)
|
|
287
|
+
env: dict[str, str] = {}
|
|
288
|
+
existing_env = request_kwargs.get("environment")
|
|
289
|
+
if isinstance(existing_env, TaskSubmitRequestEnvironmentType0):
|
|
290
|
+
env.update(existing_env.additional_properties)
|
|
291
|
+
elif isinstance(existing_env, dict):
|
|
292
|
+
env.update(existing_env)
|
|
293
|
+
|
|
294
|
+
env["SYSTEM_DOWNLOAD_ARCHIVE_URL"] = str(presign_download.url)
|
|
295
|
+
request_kwargs["environment"] = TaskSubmitRequestEnvironmentType0.from_dict(
|
|
296
|
+
env
|
|
297
|
+
)
|
|
298
|
+
|
|
181
299
|
request = TaskSubmitRequest(**request_kwargs)
|
|
182
300
|
|
|
183
301
|
# Use sync_detailed to get full response information
|
|
@@ -230,14 +348,14 @@ class Task:
|
|
|
230
348
|
def get(
|
|
231
349
|
self,
|
|
232
350
|
task_id: int,
|
|
233
|
-
|
|
351
|
+
cluster_name: str,
|
|
234
352
|
) -> TaskModel:
|
|
235
353
|
"""
|
|
236
354
|
Get task details by task ID.
|
|
237
355
|
|
|
238
356
|
Args:
|
|
239
357
|
task_id: Task ID
|
|
240
|
-
|
|
358
|
+
cluster_name: Cluster name
|
|
241
359
|
|
|
242
360
|
Returns:
|
|
243
361
|
Task model with task details
|
|
@@ -250,7 +368,7 @@ class Task:
|
|
|
250
368
|
response_obj = get_task.sync_detailed(
|
|
251
369
|
id=task_id,
|
|
252
370
|
client=self._client,
|
|
253
|
-
|
|
371
|
+
cluster_name=cluster_name,
|
|
254
372
|
)
|
|
255
373
|
response = response_obj.parsed
|
|
256
374
|
|
|
@@ -302,7 +420,7 @@ class Task:
|
|
|
302
420
|
status: Optional[TaskStatus] = None,
|
|
303
421
|
user_id: Optional[int] = None,
|
|
304
422
|
team_id: Optional[int] = None,
|
|
305
|
-
|
|
423
|
+
cluster_name: Optional[str] = None,
|
|
306
424
|
) -> TaskListResponse:
|
|
307
425
|
"""
|
|
308
426
|
List tasks with optional filtering.
|
|
@@ -313,7 +431,7 @@ class Task:
|
|
|
313
431
|
status: Filter by task status (optional)
|
|
314
432
|
user_id: Filter by user ID (optional)
|
|
315
433
|
team_id: Filter by team ID (optional)
|
|
316
|
-
|
|
434
|
+
cluster_name: Filter by cluster name (optional)
|
|
317
435
|
|
|
318
436
|
Returns:
|
|
319
437
|
TaskListResponse containing the list of tasks
|
|
@@ -321,42 +439,70 @@ class Task:
|
|
|
321
439
|
Raises:
|
|
322
440
|
APIException: If the API returns an error
|
|
323
441
|
"""
|
|
324
|
-
response
|
|
442
|
+
# Use sync_detailed to get full response information
|
|
443
|
+
response_obj = list_tasks.sync_detailed(
|
|
325
444
|
client=self._client,
|
|
326
445
|
page=page,
|
|
327
446
|
page_size=page_size,
|
|
328
447
|
status=status if status is not None else UNSET,
|
|
329
448
|
user_id=user_id if user_id is not None else UNSET,
|
|
330
449
|
team_id=team_id if team_id is not None else UNSET,
|
|
331
|
-
|
|
450
|
+
cluster_name=cluster_name if cluster_name is not None else UNSET,
|
|
332
451
|
)
|
|
452
|
+
response = response_obj.parsed
|
|
333
453
|
|
|
334
454
|
if isinstance(response, ErrorResponse):
|
|
335
|
-
|
|
455
|
+
# Extract error message from ErrorResponse
|
|
456
|
+
error_msg = "Unknown error"
|
|
457
|
+
if response.error and response.error != UNSET:
|
|
458
|
+
error_msg = response.error
|
|
459
|
+
elif response_obj.content:
|
|
460
|
+
try:
|
|
461
|
+
error_data = json.loads(response_obj.content.decode())
|
|
462
|
+
error_msg = error_data.get("error", "Unknown error")
|
|
463
|
+
except (json.JSONDecodeError, UnicodeDecodeError):
|
|
464
|
+
error_msg = response_obj.content.decode(errors="replace")
|
|
465
|
+
|
|
466
|
+
# Check status code to determine exception type
|
|
467
|
+
status_code = response.code if response.code != UNSET and response.code != 0 else response_obj.status_code.value
|
|
468
|
+
if status_code == 404:
|
|
469
|
+
raise NotFoundException(error_msg)
|
|
470
|
+
|
|
471
|
+
# Use handle_api_exception which returns an exception object
|
|
472
|
+
exception = handle_api_exception(
|
|
336
473
|
Response(
|
|
337
|
-
status_code=HTTPStatus(
|
|
338
|
-
content=
|
|
339
|
-
headers=
|
|
474
|
+
status_code=HTTPStatus(status_code),
|
|
475
|
+
content=response_obj.content,
|
|
476
|
+
headers=response_obj.headers,
|
|
340
477
|
parsed=None,
|
|
341
478
|
)
|
|
342
479
|
)
|
|
480
|
+
raise exception
|
|
343
481
|
|
|
344
482
|
if response is None:
|
|
345
|
-
|
|
483
|
+
# If response is None, try to extract error from raw response
|
|
484
|
+
error_msg = "No response from server"
|
|
485
|
+
if response_obj.content:
|
|
486
|
+
try:
|
|
487
|
+
error_data = json.loads(response_obj.content.decode())
|
|
488
|
+
error_msg = error_data.get("error", f"HTTP {response_obj.status_code.value}: {response_obj.content.decode()}")
|
|
489
|
+
except (json.JSONDecodeError, UnicodeDecodeError):
|
|
490
|
+
error_msg = f"HTTP {response_obj.status_code.value}: {response_obj.content.decode(errors='replace')}"
|
|
491
|
+
raise APIException(f"Failed to list tasks: {error_msg}")
|
|
346
492
|
|
|
347
493
|
return response
|
|
348
494
|
|
|
349
495
|
def cancel(
|
|
350
496
|
self,
|
|
351
497
|
task_id: int,
|
|
352
|
-
|
|
498
|
+
cluster_name: str,
|
|
353
499
|
) -> bool:
|
|
354
500
|
"""
|
|
355
501
|
Cancel a task.
|
|
356
502
|
|
|
357
503
|
Args:
|
|
358
504
|
task_id: Task ID to cancel
|
|
359
|
-
|
|
505
|
+
cluster_name: Cluster name where the task is running
|
|
360
506
|
|
|
361
507
|
Returns:
|
|
362
508
|
True if the task was cancelled successfully
|
|
@@ -369,7 +515,7 @@ class Task:
|
|
|
369
515
|
response_obj = cancel_task.sync_detailed(
|
|
370
516
|
id=task_id,
|
|
371
517
|
client=self._client,
|
|
372
|
-
|
|
518
|
+
cluster_name=cluster_name,
|
|
373
519
|
)
|
|
374
520
|
response = response_obj.parsed
|
|
375
521
|
|
|
@@ -406,14 +552,14 @@ class Task:
|
|
|
406
552
|
def delete(
|
|
407
553
|
self,
|
|
408
554
|
task_id: int,
|
|
409
|
-
|
|
555
|
+
cluster_name: str,
|
|
410
556
|
) -> bool:
|
|
411
557
|
"""
|
|
412
558
|
Delete a task.
|
|
413
559
|
|
|
414
560
|
Args:
|
|
415
561
|
task_id: Task ID to delete
|
|
416
|
-
|
|
562
|
+
cluster_name: Cluster name where the task is running
|
|
417
563
|
|
|
418
564
|
Returns:
|
|
419
565
|
True if the task was deleted successfully
|
|
@@ -426,7 +572,7 @@ class Task:
|
|
|
426
572
|
response_obj = delete_task.sync_detailed(
|
|
427
573
|
id=task_id,
|
|
428
574
|
client=self._client,
|
|
429
|
-
|
|
575
|
+
cluster_name=cluster_name,
|
|
430
576
|
)
|
|
431
577
|
response = response_obj.parsed
|
|
432
578
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: mlops-python-sdk
|
|
3
|
-
Version:
|
|
3
|
+
Version: 1.0.1
|
|
4
4
|
Summary: MLOps Python SDK for XCloud Service API
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: mlops
|
|
@@ -25,7 +25,7 @@ Description-Content-Type: text/markdown
|
|
|
25
25
|
|
|
26
26
|
# MLOps Python SDK
|
|
27
27
|
|
|
28
|
-
[MLOps](https://
|
|
28
|
+
[MLOps](https://xcloud-service.com) Python SDK for XCloud Service API. Manage and execute tasks with confidence.
|
|
29
29
|
|
|
30
30
|
## Installation
|
|
31
31
|
|
|
@@ -39,9 +39,9 @@ pip install mlops-python-sdk
|
|
|
39
39
|
|
|
40
40
|
### 1. Setup Authentication
|
|
41
41
|
|
|
42
|
-
You can authenticate using either an API Key
|
|
42
|
+
You can authenticate using either an API Key.
|
|
43
43
|
|
|
44
|
-
####
|
|
44
|
+
#### API Key (Recommended for programmatic access)
|
|
45
45
|
|
|
46
46
|
1. Sign up at [MLOps](https://xcloud-service.com)
|
|
47
47
|
2. Create an API key from [API Keys](https://xcloud-service.com/home/api-keys)
|
|
@@ -52,18 +52,11 @@ export MLOPS_API_KEY=xck_******
|
|
|
52
52
|
export MLOPS_DOMAIN=localhost:8090 # optional, default is localhost:8090
|
|
53
53
|
```
|
|
54
54
|
|
|
55
|
-
#### Option 2: Access Token (For user authentication)
|
|
56
|
-
|
|
57
|
-
```bash
|
|
58
|
-
export MLOPS_ACCESS_TOKEN=your_access_token
|
|
59
|
-
export MLOPS_DOMAIN=localhost:8090 # optional
|
|
60
|
-
```
|
|
61
|
-
|
|
62
55
|
### 2. Basic Usage
|
|
63
56
|
|
|
64
57
|
```python
|
|
65
|
-
from
|
|
66
|
-
from
|
|
58
|
+
from mlops import Task, ConnectionConfig
|
|
59
|
+
from mlops.api.client.models.task_status import TaskStatus
|
|
67
60
|
|
|
68
61
|
# Initialize Task client (uses environment variables by default)
|
|
69
62
|
task = Task()
|
|
@@ -119,7 +112,7 @@ The `Task` class provides a high-level interface for managing tasks.
|
|
|
119
112
|
#### Initialization
|
|
120
113
|
|
|
121
114
|
```python
|
|
122
|
-
from
|
|
115
|
+
from mlops import Task, ConnectionConfig
|
|
123
116
|
|
|
124
117
|
# Using environment variables
|
|
125
118
|
task = Task()
|
|
@@ -127,7 +120,6 @@ task = Task()
|
|
|
127
120
|
# With explicit configuration
|
|
128
121
|
config = ConnectionConfig(
|
|
129
122
|
api_key="xck_******", # API key for authentication
|
|
130
|
-
access_token="token_******", # Access token (alternative to API key)
|
|
131
123
|
domain="localhost:8090", # API domain
|
|
132
124
|
debug=False, # Enable debug mode
|
|
133
125
|
request_timeout=30.0 # Request timeout in seconds
|
|
@@ -222,7 +214,7 @@ tasks = task.list(
|
|
|
222
214
|
**Example:**
|
|
223
215
|
|
|
224
216
|
```python
|
|
225
|
-
from
|
|
217
|
+
from mlops.api.client.models.task_status import TaskStatus
|
|
226
218
|
|
|
227
219
|
# List all running tasks
|
|
228
220
|
running_tasks = task.list(status=TaskStatus.RUNNING)
|
|
@@ -261,7 +253,7 @@ task.cancel(task_id=12345, cluster_id=1)
|
|
|
261
253
|
Task status values for filtering:
|
|
262
254
|
|
|
263
255
|
```python
|
|
264
|
-
from
|
|
256
|
+
from mlops.api.client.models.task_status import TaskStatus
|
|
265
257
|
|
|
266
258
|
TaskStatus.PENDING # Task is pending
|
|
267
259
|
TaskStatus.QUEUED # Task is queued
|
|
@@ -280,7 +272,6 @@ TaskStatus.CREATED # Task was created
|
|
|
280
272
|
The SDK reads configuration from environment variables:
|
|
281
273
|
|
|
282
274
|
- `MLOPS_API_KEY`: API key for authentication
|
|
283
|
-
- `MLOPS_ACCESS_TOKEN`: Access token for authentication (alternative to API key)
|
|
284
275
|
- `MLOPS_DOMAIN`: API domain (default: `localhost:8090`)
|
|
285
276
|
- `MLOPS_DEBUG`: Enable debug mode (`true`/`false`, default: `false`)
|
|
286
277
|
- `MLOPS_API_PATH`: API path prefix (default: `/api/v1`)
|
|
@@ -290,7 +281,7 @@ The SDK reads configuration from environment variables:
|
|
|
290
281
|
You can also configure the connection programmatically:
|
|
291
282
|
|
|
292
283
|
```python
|
|
293
|
-
from
|
|
284
|
+
from mlops import ConnectionConfig
|
|
294
285
|
|
|
295
286
|
config = ConnectionConfig(
|
|
296
287
|
domain="api.example.com",
|
|
@@ -306,7 +297,7 @@ config = ConnectionConfig(
|
|
|
306
297
|
The SDK provides specific exception types:
|
|
307
298
|
|
|
308
299
|
```python
|
|
309
|
-
from
|
|
300
|
+
from mlops.exceptions import (
|
|
310
301
|
APIException, # General API errors
|
|
311
302
|
AuthenticationException, # Authentication failures
|
|
312
303
|
NotFoundException, # Resource not found
|
|
@@ -330,7 +321,7 @@ except APIException as e:
|
|
|
330
321
|
### Submit a Machine Learning Training Job
|
|
331
322
|
|
|
332
323
|
```python
|
|
333
|
-
from
|
|
324
|
+
from mlops import Task
|
|
334
325
|
|
|
335
326
|
task = Task()
|
|
336
327
|
|
|
@@ -339,16 +330,16 @@ result = task.submit(
|
|
|
339
330
|
cluster_id=1,
|
|
340
331
|
script="""#!/bin/bash
|
|
341
332
|
#SBATCH --gres=gpu:1
|
|
342
|
-
#SBATCH --cpus-per-task=
|
|
343
|
-
#SBATCH --mem=
|
|
333
|
+
#SBATCH --cpus-per-task=2
|
|
334
|
+
#SBATCH --mem=4GB
|
|
344
335
|
|
|
345
336
|
python train.py --config config.yaml
|
|
346
337
|
""",
|
|
347
338
|
resources={
|
|
348
|
-
"cpus_per_task":
|
|
349
|
-
"memory": "
|
|
339
|
+
"cpus_per_task": 2,
|
|
340
|
+
"memory": "4GB",
|
|
350
341
|
"gres": "gpu:1",
|
|
351
|
-
"time": "
|
|
342
|
+
"time": "1-00:00:00", # 1 days
|
|
352
343
|
"partition": "gpu"
|
|
353
344
|
}
|
|
354
345
|
)
|
|
@@ -359,8 +350,8 @@ print(f"Training job submitted: {result.job_id}")
|
|
|
359
350
|
### Monitor Task Status
|
|
360
351
|
|
|
361
352
|
```python
|
|
362
|
-
from
|
|
363
|
-
from
|
|
353
|
+
from mlops import Task
|
|
354
|
+
from mlops.api.client.models.task_status import TaskStatus
|
|
364
355
|
import time
|
|
365
356
|
|
|
366
357
|
task = Task()
|
|
@@ -380,8 +371,8 @@ while True:
|
|
|
380
371
|
### List and Filter Tasks
|
|
381
372
|
|
|
382
373
|
```python
|
|
383
|
-
from
|
|
384
|
-
from
|
|
374
|
+
from mlops import Task
|
|
375
|
+
from mlops.api.client.models.task_status import TaskStatus
|
|
385
376
|
|
|
386
377
|
task = Task()
|
|
387
378
|
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
mlops/__init__.py,sha256=8K3ZQ-g9GI0r35SbEiHhQZmdGajc0bJY7435F7qeGR4,948
|
|
2
|
+
mlops/api/client/__init__.py,sha256=ebLsr_eRzRT06jQ2k4bxbX3NwD4A3gcxdVdcUBUMmIM,161
|
|
3
|
+
mlops/api/client/api/__init__.py,sha256=zTSiG_ujSjAqWPyc435YXaX9XTlpMjiJWBbV-f-YtdA,45
|
|
4
|
+
mlops/api/client/api/storage/__init__.py,sha256=5vd9uJWAjRqa9xzxzYkLD1yoZ12Ld_bAaNB5WX4fbE8,56
|
|
5
|
+
mlops/api/client/api/storage/get_storage_presign_download.py,sha256=xeSCCR-C4J9IY3FJyQv9KU1qHjvBul_dV6gjTkUB-pA,4812
|
|
6
|
+
mlops/api/client/api/storage/get_storage_presign_upload.py,sha256=V6x_E5_ldu3Hzwg1IqH8kb_wDGkncaZaswqq7f67_Vg,4855
|
|
7
|
+
mlops/api/client/api/tasks/__init__.py,sha256=5vd9uJWAjRqa9xzxzYkLD1yoZ12Ld_bAaNB5WX4fbE8,56
|
|
8
|
+
mlops/api/client/api/tasks/cancel_task.py,sha256=BwgzLXB7ahV1w05sfHhMSvLTH4jehqQFasUFuTBXLTI,4931
|
|
9
|
+
mlops/api/client/api/tasks/delete_task.py,sha256=r2LBF_YFncsBAZt_JCMp8MA1fFw5OQwfEewLtBJZOYo,5667
|
|
10
|
+
mlops/api/client/api/tasks/get_task.py,sha256=268QTjBbi2LV1t4dBmbCBjQO6Cgpa-QS959fIXW9Nsg,4984
|
|
11
|
+
mlops/api/client/api/tasks/get_task_by_task_id.py,sha256=X8tDWDGnGyVlkBKby_tz0BW9QlweTn8OP4APLZRIPSU,5470
|
|
12
|
+
mlops/api/client/api/tasks/get_task_logs.py,sha256=lmN14NvYddwQ7FewrQ-kzxKItzP8vilTpVbKBNOaFIQ,9406
|
|
13
|
+
mlops/api/client/api/tasks/list_tasks.py,sha256=5HLoNuTw1QcFGOwcD2x26w78K_crU0UoAC4jl0QBfzY,7572
|
|
14
|
+
mlops/api/client/api/tasks/submit_task.py,sha256=fXV8QSM3J_vT4aCdYFBnqCjrNFj-pz-wBmOZWSEhttI,5015
|
|
15
|
+
mlops/api/client/client.py,sha256=o_mdLqyBCQstu5tS1WZFwqIEbGwkvWQ7eQjuCJw_5VY,12419
|
|
16
|
+
mlops/api/client/errors.py,sha256=gO8GBmKqmSNgAg-E5oT-oOyxztvp7V_6XG7OUTT15q0,546
|
|
17
|
+
mlops/api/client/models/__init__.py,sha256=lD_D-IJF4YZu5T8poDaINUnflgp24V6BVzSWDqt3bd8,2044
|
|
18
|
+
mlops/api/client/models/error_response.py,sha256=gmFOtAcZZTBPGrR3MXJe-_viEpfJOR9r_ffXNotaPlQ,1809
|
|
19
|
+
mlops/api/client/models/get_storage_presign_download_response_200.py,sha256=b3O833zpx7b2qaZIEPqi-qzxNqPgsnW3inzPWOJH9bw,1797
|
|
20
|
+
mlops/api/client/models/get_storage_presign_upload_response_200.py,sha256=YacqPNaARhvdbXIceIBsg1PCuYKQQ_22wUyoYNwKpaw,2410
|
|
21
|
+
mlops/api/client/models/get_task_logs_direction.py,sha256=6N9YzVZDkMHC1RwKWAwC2hwFt9rX4WD9dwyI85L9rg8,175
|
|
22
|
+
mlops/api/client/models/get_task_logs_log_type.py,sha256=8uJwMeHhWYCloF_tr95wtzXwcNEnB4xwtzQbwfhZtIs,183
|
|
23
|
+
mlops/api/client/models/job_spec.py,sha256=rBOb7NsJhWel8kb6gEclGQAkFu93OJddxhemmcdxyyw,9372
|
|
24
|
+
mlops/api/client/models/job_spec_env.py,sha256=EtFZzKvfCEFyiriz4EKzmyVPgJmTVpBxpXer6ULZ94M,1218
|
|
25
|
+
mlops/api/client/models/job_spec_master_strategy.py,sha256=UmzktsAfMhyQYYKDKrjEQm6gcEC8iEr0h892jhoGNd4,156
|
|
26
|
+
mlops/api/client/models/log_pagination.py,sha256=gwJUB5ByqWst6_9bPOzoib-_cSNCTSCujw0y_St18JQ,2879
|
|
27
|
+
mlops/api/client/models/message_response.py,sha256=rV3BMdP_fnmyxJW3mYLGECuUQ7VNVUKdzJwnY8Jdvu4,1609
|
|
28
|
+
mlops/api/client/models/task.py,sha256=XAioNH5gHvP5mDlDJ5cJlq10jHSKJuk82YNxuAV9rZo,61827
|
|
29
|
+
mlops/api/client/models/task_alloc_tres_type_0.py,sha256=U3lUyNmFDVyEZABMXJ997zn6vnPOJ_gjBJsczoPd41A,1330
|
|
30
|
+
mlops/api/client/models/task_gres_detail_type_0_item.py,sha256=_DVf3bNzqfYxZvcYnhOEbXW-CuN2BhPwWQZQT9MZt-A,1272
|
|
31
|
+
mlops/api/client/models/task_job_resources_type_0.py,sha256=A63CcQn2at3q9Vq137BBBi3LVbp5xVPUzFtGaE92Czw,1373
|
|
32
|
+
mlops/api/client/models/task_list_response.py,sha256=nVhi5LFiTUlyvmHz9Sc9-2A-awJ7s6lNKQQfwG2OfL4,2909
|
|
33
|
+
mlops/api/client/models/task_log_entry.py,sha256=_zRsb6upH2Bsc_TSnV1HvMc9N-dpUgDw-EdO6hY0In8,3370
|
|
34
|
+
mlops/api/client/models/task_log_entry_log_type.py,sha256=uVqbF8RewyFkezY6sy28HeuItE5KpTc1KiHWuHNpt0c,168
|
|
35
|
+
mlops/api/client/models/task_logs_response.py,sha256=QEGRy51qB7t0K-EGusxzDmkDlAjdKkwHF92em3dLb1c,3557
|
|
36
|
+
mlops/api/client/models/task_resources_type_0.py,sha256=36nxeOqAJS4ksfQtzoXigWVMhEV1Tnq5Z_64sHa3gGQ,1341
|
|
37
|
+
mlops/api/client/models/task_status.py,sha256=Tht4F2UeBp-QBLhh-z0fEw45r5cBCfkFUro-la42BPY,315
|
|
38
|
+
mlops/api/client/models/task_submit_request.py,sha256=8zbEK2Y_dT4S6Wflm6WTNn9f_f-SLQ7Sl92bS2H_T0c,22941
|
|
39
|
+
mlops/api/client/models/task_submit_request_environment_type_0.py,sha256=Wx6ye6vVHytSex186AeUm27-XMWMmZe6lbL2Ons2mkw,1454
|
|
40
|
+
mlops/api/client/models/task_submit_response.py,sha256=EK3ZXxo_XO5Yn2zdOrR-VMPKg9om49qQ1ywS2Smgink,2200
|
|
41
|
+
mlops/api/client/models/task_tres_type_0.py,sha256=rEaiQG7A19mlTIHDppzxuWa4oPfh9qsKjPhhVOlBf4g,1292
|
|
42
|
+
mlops/api/client/models/task_tres_used_type_0.py,sha256=4w6An7-ZCqa8cc3SPi7mcwGK-ekT6AYq_dEdf8KzoYA,1320
|
|
43
|
+
mlops/api/client/py.typed,sha256=8ZJUsxZiuOy1oJeVhsTWQhTG_6pTVHVXk5hJL79ebTk,25
|
|
44
|
+
mlops/api/client/types.py,sha256=AX4orxQZQJat3vZrgjJ-TYb2sNBL8kNo9yqYDT-n8y8,1391
|
|
45
|
+
mlops/connection_config.py,sha256=aU_8WwkMcomjt4dDyRk1Oyr92ywwuIhFLmv0oQ29KkM,2953
|
|
46
|
+
mlops/exceptions.py,sha256=3kfda-Rz0km9kV-gvnPCw7ueemWkXIGGdT0NXx6z9Xk,1680
|
|
47
|
+
mlops/task/__init__.py,sha256=M983vMPLj3tZQNFXQyTP5I2RsRorFElezLeppr3WLsw,133
|
|
48
|
+
mlops/task/client.py,sha256=V131WLVJl1raGAVixUhJCX8s1neN15mxAjQwO01qlIg,3552
|
|
49
|
+
mlops/task/task.py,sha256=7QBSNpmI4jacWep2FaSZyA86wgsx-BiNAlBrKi-Razg,23450
|
|
50
|
+
mlops_python_sdk-1.0.1.dist-info/METADATA,sha256=afpVJjsJ-TN-lmCym3ScsMJhrOdFoId06MBfLEfFbY4,9525
|
|
51
|
+
mlops_python_sdk-1.0.1.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
52
|
+
mlops_python_sdk-1.0.1.dist-info/RECORD,,
|