mlops-python-sdk 1.0.1__tar.gz → 1.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mlops_python_sdk-1.0.3/PKG-INFO +235 -0
- mlops_python_sdk-1.0.3/README.md +209 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/models/task_submit_request.py +44 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/connection_config.py +2 -2
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/task/task.py +144 -33
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/pyproject.toml +1 -1
- mlops_python_sdk-1.0.1/PKG-INFO +0 -407
- mlops_python_sdk-1.0.1/README.md +0 -381
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/__init__.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/__init__.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/api/__init__.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/api/storage/__init__.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/api/storage/get_storage_presign_download.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/api/storage/get_storage_presign_upload.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/api/tasks/__init__.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/api/tasks/cancel_task.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/api/tasks/delete_task.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/api/tasks/get_task.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/api/tasks/get_task_by_task_id.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/api/tasks/get_task_logs.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/api/tasks/list_tasks.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/api/tasks/submit_task.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/client.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/errors.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/models/__init__.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/models/error_response.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/models/get_storage_presign_download_response_200.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/models/get_storage_presign_upload_response_200.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/models/get_task_logs_direction.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/models/get_task_logs_log_type.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/models/job_spec.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/models/job_spec_env.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/models/job_spec_master_strategy.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/models/log_pagination.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/models/message_response.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/models/task.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/models/task_alloc_tres_type_0.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/models/task_gres_detail_type_0_item.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/models/task_job_resources_type_0.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/models/task_list_response.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/models/task_log_entry.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/models/task_log_entry_log_type.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/models/task_logs_response.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/models/task_resources_type_0.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/models/task_status.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/models/task_submit_request_environment_type_0.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/models/task_submit_response.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/models/task_tres_type_0.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/models/task_tres_used_type_0.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/py.typed +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/types.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/exceptions.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/task/__init__.py +0 -0
- {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/task/client.py +0 -0
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: mlops-python-sdk
|
|
3
|
+
Version: 1.0.3
|
|
4
|
+
Summary: MLOps Python SDK for XCloud Service API
|
|
5
|
+
License: MIT
|
|
6
|
+
Author: mlops
|
|
7
|
+
Author-email: mlops@example.com
|
|
8
|
+
Requires-Python: >=3.9,<4.0
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Requires-Dist: attrs (>=23.2.0)
|
|
17
|
+
Requires-Dist: httpx (>=0.27.0,<1.0.0)
|
|
18
|
+
Requires-Dist: packaging (>=24.1)
|
|
19
|
+
Requires-Dist: python-dateutil (>=2.8.2)
|
|
20
|
+
Requires-Dist: typing-extensions (>=4.1.0)
|
|
21
|
+
Project-URL: Bug Tracker, https://github.com/xcloud-service/xservice/issues
|
|
22
|
+
Project-URL: Homepage, https://mlops.cloud/
|
|
23
|
+
Project-URL: Repository, https://github.com/xcloud-service/xservice
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
|
|
26
|
+
# SDK
|
|
27
|
+
|
|
28
|
+
Software Development Kits for integrating with the XCloud Service API.
|
|
29
|
+
|
|
30
|
+
> [!NOTE] SDK Support
|
|
31
|
+
> SDKs provide type-safe, high-level interfaces for interacting with the platform API. They handle authentication, error handling, and request retries automatically.
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
## Installation
|
|
35
|
+
|
|
36
|
+
The Python SDK installation.
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
pip install mlops-python-sdk
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
### Configuration
|
|
43
|
+
|
|
44
|
+
The SDK reads configuration from environment variables by default:
|
|
45
|
+
|
|
46
|
+
- `MLOPS_API_KEY`: API key (required)
|
|
47
|
+
- `MLOPS_DOMAIN`: API domain, e.g. `localhost:8090` or `https://example.com`
|
|
48
|
+
- `MLOPS_API_PATH`: API path prefix (default: `/api/v1`)
|
|
49
|
+
- `MLOPS_DEBUG`: `true|false` (default: `false`)
|
|
50
|
+
|
|
51
|
+
Or configure in code:
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
from mlops import ConnectionConfig, Task
|
|
55
|
+
|
|
56
|
+
config = ConnectionConfig(
|
|
57
|
+
api_key="xck_...",
|
|
58
|
+
domain="https://example.com",
|
|
59
|
+
api_path="/api/v1",
|
|
60
|
+
debug=False,
|
|
61
|
+
)
|
|
62
|
+
task = Task(config=config)
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## SDK Usage
|
|
66
|
+
|
|
67
|
+
### Initialize client
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
from mlops import Task
|
|
71
|
+
|
|
72
|
+
task = Task() # uses environment variables by default
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### Submit a GPU task
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
from mlops import Task
|
|
79
|
+
|
|
80
|
+
task = Task()
|
|
81
|
+
resp = task.submit(
|
|
82
|
+
name="gpu-task-from-sdk",
|
|
83
|
+
cluster_name="slurm-cn",
|
|
84
|
+
team_id=1,
|
|
85
|
+
image="/mnt/minio/images/01ai-registry.cn-shanghai.cr.aliyuncs.com+public+llamafactory+0.9.3.sqsh",
|
|
86
|
+
entry_command="llamafactory-cli train /workspace/config/test_lora.yaml",
|
|
87
|
+
resources={
|
|
88
|
+
"partition": "gpu",
|
|
89
|
+
"nodes": 2,
|
|
90
|
+
"ntasks": 2,
|
|
91
|
+
"cpus_per_task": 2,
|
|
92
|
+
"memory": "4G",
|
|
93
|
+
"time": "01:00:00",
|
|
94
|
+
"gres": "gpu:nvidia_a10:1",
|
|
95
|
+
"qos": "qos_xcloud",
|
|
96
|
+
},
|
|
97
|
+
file_path="/path/to/xservice.zip", # optional: .zip/.tar.gz/.tgz
|
|
98
|
+
)
|
|
99
|
+
print(resp.job_id)
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
### Submit a CPU task
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
from mlops import Task
|
|
106
|
+
|
|
107
|
+
task = Task()
|
|
108
|
+
resp = task.submit(
|
|
109
|
+
name="cpu-task-from-sdk",
|
|
110
|
+
cluster_name="slurm-cn",
|
|
111
|
+
team_id=1,
|
|
112
|
+
image="docker://01ai-registry.cn-shanghai.cr.aliyuncs.com/01-ai/xcs/v2/alpine:3.23.0",
|
|
113
|
+
entry_command="echo hello",
|
|
114
|
+
resources={
|
|
115
|
+
"partition": "cpu",
|
|
116
|
+
"nodes": 1,
|
|
117
|
+
"ntasks": 1,
|
|
118
|
+
"cpus_per_task": 1,
|
|
119
|
+
"memory": "1G",
|
|
120
|
+
"time": "01:00:00",
|
|
121
|
+
"qos": "qos_xcloud",
|
|
122
|
+
},
|
|
123
|
+
)
|
|
124
|
+
print(resp.job_id)
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
### List tasks
|
|
128
|
+
|
|
129
|
+
```python
|
|
130
|
+
from mlops import Task
|
|
131
|
+
from mlops.api.client.models.task_status import TaskStatus
|
|
132
|
+
|
|
133
|
+
task = Task()
|
|
134
|
+
resp = task.list(status=TaskStatus.COMPLETED, cluster_name="slurm-cn", page=1, page_size=20)
|
|
135
|
+
print(len(resp.tasks or []))
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
### Get task details
|
|
139
|
+
|
|
140
|
+
```python
|
|
141
|
+
from mlops import Task
|
|
142
|
+
|
|
143
|
+
task = Task()
|
|
144
|
+
task_info = task.get(task_id=12345, cluster_name="slurm-cn")
|
|
145
|
+
print(task_info)
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
### Cancel a task
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
from mlops import Task
|
|
152
|
+
|
|
153
|
+
task = Task()
|
|
154
|
+
task.cancel(task_id=12345, cluster_name="slurm-cn")
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
### Delete a task
|
|
158
|
+
|
|
159
|
+
```python
|
|
160
|
+
from mlops import Task
|
|
161
|
+
|
|
162
|
+
task = Task()
|
|
163
|
+
task.delete(task_id=12345, cluster_name="slurm-cn")
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
**Task Management Methods:**
|
|
167
|
+
|
|
168
|
+
- `submit()` - Submit a new task with container image and entry command
|
|
169
|
+
- `get()` - Get task details by task ID
|
|
170
|
+
- `list()` - List tasks with optional filters (status, cluster_name, team_id, user_id)
|
|
171
|
+
- `cancel()` - Cancel a running task
|
|
172
|
+
- `delete()` - Delete a task record
|
|
173
|
+
|
|
174
|
+
**Task Status Values:**
|
|
175
|
+
|
|
176
|
+
```python
|
|
177
|
+
from mlops.api.client.models.task_status import TaskStatus
|
|
178
|
+
|
|
179
|
+
TaskStatus.PENDING # Task is pending
|
|
180
|
+
TaskStatus.QUEUED # Task is queued
|
|
181
|
+
TaskStatus.RUNNING # Task is running
|
|
182
|
+
TaskStatus.COMPLETED # Task completed successfully
|
|
183
|
+
TaskStatus.SUCCEEDED # Task succeeded
|
|
184
|
+
TaskStatus.FAILED # Task failed
|
|
185
|
+
TaskStatus.CANCELLED # Task was cancelled
|
|
186
|
+
TaskStatus.CREATED # Task was created
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
**Error Handling:**
|
|
190
|
+
|
|
191
|
+
```python
|
|
192
|
+
from mlops.exceptions import (
|
|
193
|
+
APIException,
|
|
194
|
+
AuthenticationException,
|
|
195
|
+
NotFoundException,
|
|
196
|
+
RateLimitException,
|
|
197
|
+
TimeoutException,
|
|
198
|
+
InvalidArgumentException,
|
|
199
|
+
NotEnoughSpaceException
|
|
200
|
+
)
|
|
201
|
+
from mlops import Task
|
|
202
|
+
|
|
203
|
+
task = Task()
|
|
204
|
+
|
|
205
|
+
try:
|
|
206
|
+
result = task.submit(
|
|
207
|
+
name="test",
|
|
208
|
+
cluster_name="slurm-cn",
|
|
209
|
+
image="docker://alpine:3.23.0",
|
|
210
|
+
entry_command="echo hello",
|
|
211
|
+
)
|
|
212
|
+
except AuthenticationException as e:
|
|
213
|
+
print(f"Authentication failed: {e}")
|
|
214
|
+
except NotFoundException as e:
|
|
215
|
+
print(f"Resource not found: {e}")
|
|
216
|
+
except APIException as e:
|
|
217
|
+
print(f"API error: {e}")
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
> [!TIP] Error Handling
|
|
221
|
+
> SDKs automatically parse typed responses and raise structured exceptions.
|
|
222
|
+
|
|
223
|
+
## Features
|
|
224
|
+
|
|
225
|
+
- Type-safe API clients
|
|
226
|
+
- Automatic authentication
|
|
227
|
+
- Error handling
|
|
228
|
+
- Typed response parsing (generated models)
|
|
229
|
+
- Unexpected-status guard (optional)
|
|
230
|
+
|
|
231
|
+
## Resources
|
|
232
|
+
|
|
233
|
+
- [Python SDK Documentation](https://github.com/xcloud-service/xservice/tree/main/client/python-sdk)
|
|
234
|
+
- [API Reference](https://xcloud-service.com/docs/api)
|
|
235
|
+
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
# SDK
|
|
2
|
+
|
|
3
|
+
Software Development Kits for integrating with the XCloud Service API.
|
|
4
|
+
|
|
5
|
+
> [!NOTE] SDK Support
|
|
6
|
+
> SDKs provide type-safe, high-level interfaces for interacting with the platform API. They handle authentication, error handling, and request retries automatically.
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
## Installation
|
|
10
|
+
|
|
11
|
+
The Python SDK installation.
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
pip install mlops-python-sdk
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
### Configuration
|
|
18
|
+
|
|
19
|
+
The SDK reads configuration from environment variables by default:
|
|
20
|
+
|
|
21
|
+
- `MLOPS_API_KEY`: API key (required)
|
|
22
|
+
- `MLOPS_DOMAIN`: API domain, e.g. `localhost:8090` or `https://example.com`
|
|
23
|
+
- `MLOPS_API_PATH`: API path prefix (default: `/api/v1`)
|
|
24
|
+
- `MLOPS_DEBUG`: `true|false` (default: `false`)
|
|
25
|
+
|
|
26
|
+
Or configure in code:
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
from mlops import ConnectionConfig, Task
|
|
30
|
+
|
|
31
|
+
config = ConnectionConfig(
|
|
32
|
+
api_key="xck_...",
|
|
33
|
+
domain="https://example.com",
|
|
34
|
+
api_path="/api/v1",
|
|
35
|
+
debug=False,
|
|
36
|
+
)
|
|
37
|
+
task = Task(config=config)
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## SDK Usage
|
|
41
|
+
|
|
42
|
+
### Initialize client
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
from mlops import Task
|
|
46
|
+
|
|
47
|
+
task = Task() # uses environment variables by default
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### Submit a GPU task
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
from mlops import Task
|
|
54
|
+
|
|
55
|
+
task = Task()
|
|
56
|
+
resp = task.submit(
|
|
57
|
+
name="gpu-task-from-sdk",
|
|
58
|
+
cluster_name="slurm-cn",
|
|
59
|
+
team_id=1,
|
|
60
|
+
image="/mnt/minio/images/01ai-registry.cn-shanghai.cr.aliyuncs.com+public+llamafactory+0.9.3.sqsh",
|
|
61
|
+
entry_command="llamafactory-cli train /workspace/config/test_lora.yaml",
|
|
62
|
+
resources={
|
|
63
|
+
"partition": "gpu",
|
|
64
|
+
"nodes": 2,
|
|
65
|
+
"ntasks": 2,
|
|
66
|
+
"cpus_per_task": 2,
|
|
67
|
+
"memory": "4G",
|
|
68
|
+
"time": "01:00:00",
|
|
69
|
+
"gres": "gpu:nvidia_a10:1",
|
|
70
|
+
"qos": "qos_xcloud",
|
|
71
|
+
},
|
|
72
|
+
file_path="/path/to/xservice.zip", # optional: .zip/.tar.gz/.tgz
|
|
73
|
+
)
|
|
74
|
+
print(resp.job_id)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### Submit a CPU task
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
from mlops import Task
|
|
81
|
+
|
|
82
|
+
task = Task()
|
|
83
|
+
resp = task.submit(
|
|
84
|
+
name="cpu-task-from-sdk",
|
|
85
|
+
cluster_name="slurm-cn",
|
|
86
|
+
team_id=1,
|
|
87
|
+
image="docker://01ai-registry.cn-shanghai.cr.aliyuncs.com/01-ai/xcs/v2/alpine:3.23.0",
|
|
88
|
+
entry_command="echo hello",
|
|
89
|
+
resources={
|
|
90
|
+
"partition": "cpu",
|
|
91
|
+
"nodes": 1,
|
|
92
|
+
"ntasks": 1,
|
|
93
|
+
"cpus_per_task": 1,
|
|
94
|
+
"memory": "1G",
|
|
95
|
+
"time": "01:00:00",
|
|
96
|
+
"qos": "qos_xcloud",
|
|
97
|
+
},
|
|
98
|
+
)
|
|
99
|
+
print(resp.job_id)
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
### List tasks
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
from mlops import Task
|
|
106
|
+
from mlops.api.client.models.task_status import TaskStatus
|
|
107
|
+
|
|
108
|
+
task = Task()
|
|
109
|
+
resp = task.list(status=TaskStatus.COMPLETED, cluster_name="slurm-cn", page=1, page_size=20)
|
|
110
|
+
print(len(resp.tasks or []))
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
### Get task details
|
|
114
|
+
|
|
115
|
+
```python
|
|
116
|
+
from mlops import Task
|
|
117
|
+
|
|
118
|
+
task = Task()
|
|
119
|
+
task_info = task.get(task_id=12345, cluster_name="slurm-cn")
|
|
120
|
+
print(task_info)
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### Cancel a task
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
from mlops import Task
|
|
127
|
+
|
|
128
|
+
task = Task()
|
|
129
|
+
task.cancel(task_id=12345, cluster_name="slurm-cn")
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
### Delete a task
|
|
133
|
+
|
|
134
|
+
```python
|
|
135
|
+
from mlops import Task
|
|
136
|
+
|
|
137
|
+
task = Task()
|
|
138
|
+
task.delete(task_id=12345, cluster_name="slurm-cn")
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
**Task Management Methods:**
|
|
142
|
+
|
|
143
|
+
- `submit()` - Submit a new task with container image and entry command
|
|
144
|
+
- `get()` - Get task details by task ID
|
|
145
|
+
- `list()` - List tasks with optional filters (status, cluster_name, team_id, user_id)
|
|
146
|
+
- `cancel()` - Cancel a running task
|
|
147
|
+
- `delete()` - Delete a task record
|
|
148
|
+
|
|
149
|
+
**Task Status Values:**
|
|
150
|
+
|
|
151
|
+
```python
|
|
152
|
+
from mlops.api.client.models.task_status import TaskStatus
|
|
153
|
+
|
|
154
|
+
TaskStatus.PENDING # Task is pending
|
|
155
|
+
TaskStatus.QUEUED # Task is queued
|
|
156
|
+
TaskStatus.RUNNING # Task is running
|
|
157
|
+
TaskStatus.COMPLETED # Task completed successfully
|
|
158
|
+
TaskStatus.SUCCEEDED # Task succeeded
|
|
159
|
+
TaskStatus.FAILED # Task failed
|
|
160
|
+
TaskStatus.CANCELLED # Task was cancelled
|
|
161
|
+
TaskStatus.CREATED # Task was created
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
**Error Handling:**
|
|
165
|
+
|
|
166
|
+
```python
|
|
167
|
+
from mlops.exceptions import (
|
|
168
|
+
APIException,
|
|
169
|
+
AuthenticationException,
|
|
170
|
+
NotFoundException,
|
|
171
|
+
RateLimitException,
|
|
172
|
+
TimeoutException,
|
|
173
|
+
InvalidArgumentException,
|
|
174
|
+
NotEnoughSpaceException
|
|
175
|
+
)
|
|
176
|
+
from mlops import Task
|
|
177
|
+
|
|
178
|
+
task = Task()
|
|
179
|
+
|
|
180
|
+
try:
|
|
181
|
+
result = task.submit(
|
|
182
|
+
name="test",
|
|
183
|
+
cluster_name="slurm-cn",
|
|
184
|
+
image="docker://alpine:3.23.0",
|
|
185
|
+
entry_command="echo hello",
|
|
186
|
+
)
|
|
187
|
+
except AuthenticationException as e:
|
|
188
|
+
print(f"Authentication failed: {e}")
|
|
189
|
+
except NotFoundException as e:
|
|
190
|
+
print(f"Resource not found: {e}")
|
|
191
|
+
except APIException as e:
|
|
192
|
+
print(f"API error: {e}")
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
> [!TIP] Error Handling
|
|
196
|
+
> SDKs automatically parse typed responses and raise structured exceptions.
|
|
197
|
+
|
|
198
|
+
## Features
|
|
199
|
+
|
|
200
|
+
- Type-safe API clients
|
|
201
|
+
- Automatic authentication
|
|
202
|
+
- Error handling
|
|
203
|
+
- Typed response parsing (generated models)
|
|
204
|
+
- Unexpected-status guard (optional)
|
|
205
|
+
|
|
206
|
+
## Resources
|
|
207
|
+
|
|
208
|
+
- [Python SDK Documentation](https://github.com/xcloud-service/xservice/tree/main/client/python-sdk)
|
|
209
|
+
- [API Reference](https://xcloud-service.com/docs/api)
|
{mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.3}/mlops/api/client/models/task_submit_request.py
RENAMED
|
@@ -29,12 +29,18 @@ class TaskSubmitRequest:
|
|
|
29
29
|
cpus_per_task (Union[None, Unset, int]): CPUs per task Example: 1.
|
|
30
30
|
dependency (Union[None, Unset, str]): Job dependencies Example: afterok:12345.
|
|
31
31
|
distribution (Union[None, Unset, str]): Task distribution Example: block.
|
|
32
|
+
entry_command (Union[None, Unset, str]): Container entry command/script (bash snippet) executed inside the
|
|
33
|
+
container. The platform runs it under /workspace.
|
|
34
|
+
Example: python -V && ls -la.
|
|
32
35
|
environment (Union['TaskSubmitRequestEnvironmentType0', None, Unset]): Environment variables as key-value pairs
|
|
33
36
|
Example: {'CUDA_VISIBLE_DEVICES': '0,1', 'PYTHONPATH': '/opt/python/lib'}.
|
|
34
37
|
error (Union[None, Unset, str]): Standard error file pattern Example: error_%j.log.
|
|
35
38
|
exclude (Union[None, Unset, str]): Nodes to exclude
|
|
36
39
|
export (Union[None, Unset, str]): Environment export Example: ALL.
|
|
37
40
|
gres (Union[None, Unset, str]): Generic resources (e.g., "gpu:1", "gpu:tesla:2") Example: gpu:1.
|
|
41
|
+
image (Union[None, Unset, str]): Container image reference. Can be a Slurm container plugin supported reference
|
|
42
|
+
(e.g. "docker://..."), or a registry reference which will be mapped to a local .sqsh image path by the platform.
|
|
43
|
+
Example: 01ai-registry.cn-shanghai.cr.aliyuncs.com/public/llamafactory:0.9.3.
|
|
38
44
|
input_ (Union[None, Unset, str]): Standard input file
|
|
39
45
|
job_spec (Union[Unset, JobSpec]): Domain-specific job specification (rendered into slurm script)
|
|
40
46
|
mem_bind (Union[None, Unset, str]): Memory binding
|
|
@@ -65,11 +71,13 @@ class TaskSubmitRequest:
|
|
|
65
71
|
cpus_per_task: Union[None, Unset, int] = UNSET
|
|
66
72
|
dependency: Union[None, Unset, str] = UNSET
|
|
67
73
|
distribution: Union[None, Unset, str] = UNSET
|
|
74
|
+
entry_command: Union[None, Unset, str] = UNSET
|
|
68
75
|
environment: Union["TaskSubmitRequestEnvironmentType0", None, Unset] = UNSET
|
|
69
76
|
error: Union[None, Unset, str] = UNSET
|
|
70
77
|
exclude: Union[None, Unset, str] = UNSET
|
|
71
78
|
export: Union[None, Unset, str] = UNSET
|
|
72
79
|
gres: Union[None, Unset, str] = UNSET
|
|
80
|
+
image: Union[None, Unset, str] = UNSET
|
|
73
81
|
input_: Union[None, Unset, str] = UNSET
|
|
74
82
|
job_spec: Union[Unset, "JobSpec"] = UNSET
|
|
75
83
|
mem_bind: Union[None, Unset, str] = UNSET
|
|
@@ -143,6 +151,12 @@ class TaskSubmitRequest:
|
|
|
143
151
|
else:
|
|
144
152
|
distribution = self.distribution
|
|
145
153
|
|
|
154
|
+
entry_command: Union[None, Unset, str]
|
|
155
|
+
if isinstance(self.entry_command, Unset):
|
|
156
|
+
entry_command = UNSET
|
|
157
|
+
else:
|
|
158
|
+
entry_command = self.entry_command
|
|
159
|
+
|
|
146
160
|
environment: Union[None, Unset, dict[str, Any]]
|
|
147
161
|
if isinstance(self.environment, Unset):
|
|
148
162
|
environment = UNSET
|
|
@@ -175,6 +189,12 @@ class TaskSubmitRequest:
|
|
|
175
189
|
else:
|
|
176
190
|
gres = self.gres
|
|
177
191
|
|
|
192
|
+
image: Union[None, Unset, str]
|
|
193
|
+
if isinstance(self.image, Unset):
|
|
194
|
+
image = UNSET
|
|
195
|
+
else:
|
|
196
|
+
image = self.image
|
|
197
|
+
|
|
178
198
|
input_: Union[None, Unset, str]
|
|
179
199
|
if isinstance(self.input_, Unset):
|
|
180
200
|
input_ = UNSET
|
|
@@ -289,6 +309,8 @@ class TaskSubmitRequest:
|
|
|
289
309
|
field_dict["dependency"] = dependency
|
|
290
310
|
if distribution is not UNSET:
|
|
291
311
|
field_dict["distribution"] = distribution
|
|
312
|
+
if entry_command is not UNSET:
|
|
313
|
+
field_dict["entry_command"] = entry_command
|
|
292
314
|
if environment is not UNSET:
|
|
293
315
|
field_dict["environment"] = environment
|
|
294
316
|
if error is not UNSET:
|
|
@@ -299,6 +321,8 @@ class TaskSubmitRequest:
|
|
|
299
321
|
field_dict["export"] = export
|
|
300
322
|
if gres is not UNSET:
|
|
301
323
|
field_dict["gres"] = gres
|
|
324
|
+
if image is not UNSET:
|
|
325
|
+
field_dict["image"] = image
|
|
302
326
|
if input_ is not UNSET:
|
|
303
327
|
field_dict["input"] = input_
|
|
304
328
|
if job_spec is not UNSET:
|
|
@@ -416,6 +440,15 @@ class TaskSubmitRequest:
|
|
|
416
440
|
|
|
417
441
|
distribution = _parse_distribution(d.pop("distribution", UNSET))
|
|
418
442
|
|
|
443
|
+
def _parse_entry_command(data: object) -> Union[None, Unset, str]:
|
|
444
|
+
if data is None:
|
|
445
|
+
return data
|
|
446
|
+
if isinstance(data, Unset):
|
|
447
|
+
return data
|
|
448
|
+
return cast(Union[None, Unset, str], data)
|
|
449
|
+
|
|
450
|
+
entry_command = _parse_entry_command(d.pop("entry_command", UNSET))
|
|
451
|
+
|
|
419
452
|
def _parse_environment(data: object) -> Union["TaskSubmitRequestEnvironmentType0", None, Unset]:
|
|
420
453
|
if data is None:
|
|
421
454
|
return data
|
|
@@ -469,6 +502,15 @@ class TaskSubmitRequest:
|
|
|
469
502
|
|
|
470
503
|
gres = _parse_gres(d.pop("gres", UNSET))
|
|
471
504
|
|
|
505
|
+
def _parse_image(data: object) -> Union[None, Unset, str]:
|
|
506
|
+
if data is None:
|
|
507
|
+
return data
|
|
508
|
+
if isinstance(data, Unset):
|
|
509
|
+
return data
|
|
510
|
+
return cast(Union[None, Unset, str], data)
|
|
511
|
+
|
|
512
|
+
image = _parse_image(d.pop("image", UNSET))
|
|
513
|
+
|
|
472
514
|
def _parse_input_(data: object) -> Union[None, Unset, str]:
|
|
473
515
|
if data is None:
|
|
474
516
|
return data
|
|
@@ -615,11 +657,13 @@ class TaskSubmitRequest:
|
|
|
615
657
|
cpus_per_task=cpus_per_task,
|
|
616
658
|
dependency=dependency,
|
|
617
659
|
distribution=distribution,
|
|
660
|
+
entry_command=entry_command,
|
|
618
661
|
environment=environment,
|
|
619
662
|
error=error,
|
|
620
663
|
exclude=exclude,
|
|
621
664
|
export=export,
|
|
622
665
|
gres=gres,
|
|
666
|
+
image=image,
|
|
623
667
|
input_=input_,
|
|
624
668
|
job_spec=job_spec,
|
|
625
669
|
mem_bind=mem_bind,
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import os
|
|
2
2
|
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import Optional, Dict
|
|
4
4
|
from httpx._types import ProxyTypes
|
|
5
5
|
|
|
6
|
-
REQUEST_TIMEOUT: float =
|
|
6
|
+
REQUEST_TIMEOUT: float = 120.0 # 120 seconds
|
|
7
7
|
|
|
8
8
|
KEEPALIVE_PING_INTERVAL_SEC = 50 # 50 seconds
|
|
9
9
|
KEEPALIVE_PING_HEADER = "Keepalive-Ping-Interval"
|