mlops-python-sdk 1.0.1__tar.gz → 1.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. mlops_python_sdk-1.0.2/PKG-INFO +254 -0
  2. mlops_python_sdk-1.0.2/README.md +228 -0
  3. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/models/task_submit_request.py +44 -0
  4. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/connection_config.py +2 -2
  5. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/task/task.py +44 -32
  6. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/pyproject.toml +1 -1
  7. mlops_python_sdk-1.0.1/PKG-INFO +0 -407
  8. mlops_python_sdk-1.0.1/README.md +0 -381
  9. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/__init__.py +0 -0
  10. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/__init__.py +0 -0
  11. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/api/__init__.py +0 -0
  12. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/api/storage/__init__.py +0 -0
  13. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/api/storage/get_storage_presign_download.py +0 -0
  14. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/api/storage/get_storage_presign_upload.py +0 -0
  15. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/api/tasks/__init__.py +0 -0
  16. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/api/tasks/cancel_task.py +0 -0
  17. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/api/tasks/delete_task.py +0 -0
  18. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/api/tasks/get_task.py +0 -0
  19. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/api/tasks/get_task_by_task_id.py +0 -0
  20. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/api/tasks/get_task_logs.py +0 -0
  21. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/api/tasks/list_tasks.py +0 -0
  22. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/api/tasks/submit_task.py +0 -0
  23. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/client.py +0 -0
  24. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/errors.py +0 -0
  25. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/models/__init__.py +0 -0
  26. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/models/error_response.py +0 -0
  27. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/models/get_storage_presign_download_response_200.py +0 -0
  28. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/models/get_storage_presign_upload_response_200.py +0 -0
  29. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/models/get_task_logs_direction.py +0 -0
  30. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/models/get_task_logs_log_type.py +0 -0
  31. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/models/job_spec.py +0 -0
  32. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/models/job_spec_env.py +0 -0
  33. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/models/job_spec_master_strategy.py +0 -0
  34. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/models/log_pagination.py +0 -0
  35. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/models/message_response.py +0 -0
  36. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/models/task.py +0 -0
  37. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/models/task_alloc_tres_type_0.py +0 -0
  38. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/models/task_gres_detail_type_0_item.py +0 -0
  39. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/models/task_job_resources_type_0.py +0 -0
  40. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/models/task_list_response.py +0 -0
  41. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/models/task_log_entry.py +0 -0
  42. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/models/task_log_entry_log_type.py +0 -0
  43. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/models/task_logs_response.py +0 -0
  44. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/models/task_resources_type_0.py +0 -0
  45. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/models/task_status.py +0 -0
  46. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/models/task_submit_request_environment_type_0.py +0 -0
  47. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/models/task_submit_response.py +0 -0
  48. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/models/task_tres_type_0.py +0 -0
  49. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/models/task_tres_used_type_0.py +0 -0
  50. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/py.typed +0 -0
  51. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/api/client/types.py +0 -0
  52. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/exceptions.py +0 -0
  53. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/task/__init__.py +0 -0
  54. {mlops_python_sdk-1.0.1 → mlops_python_sdk-1.0.2}/mlops/task/client.py +0 -0
@@ -0,0 +1,254 @@
1
+ Metadata-Version: 2.3
2
+ Name: mlops-python-sdk
3
+ Version: 1.0.2
4
+ Summary: MLOps Python SDK for XCloud Service API
5
+ License: MIT
6
+ Author: mlops
7
+ Author-email: mlops@example.com
8
+ Requires-Python: >=3.9,<4.0
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.9
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Requires-Dist: attrs (>=23.2.0)
17
+ Requires-Dist: httpx (>=0.27.0,<1.0.0)
18
+ Requires-Dist: packaging (>=24.1)
19
+ Requires-Dist: python-dateutil (>=2.8.2)
20
+ Requires-Dist: typing-extensions (>=4.1.0)
21
+ Project-URL: Bug Tracker, https://github.com/xcloud-service/xservice/issues
22
+ Project-URL: Homepage, https://mlops.cloud/
23
+ Project-URL: Repository, https://github.com/xcloud-service/xservice
24
+ Description-Content-Type: text/markdown
25
+
26
+ # SDK
27
+
28
+ Software Development Kits for integrating with the XCloud Service API.
29
+
30
+ > [!NOTE] SDK Support
31
+ > SDKs provide type-safe, high-level interfaces for interacting with the platform API. They handle authentication, error handling, and request retries automatically.
32
+
33
+ ## Available SDKs
34
+
35
+ ### Python SDK
36
+
37
+ ### Installation
38
+
39
+ The Python SDK installation.
40
+
41
+ ```bash
42
+ pip install mlops-python-sdk
43
+ ```
44
+
45
+ ### Configuration
46
+
47
+ The SDK reads configuration from environment variables by default:
48
+
49
+ - `MLOPS_API_KEY`: API key (required)
50
+ - `MLOPS_DOMAIN`: API domain, e.g. `localhost:8090` or `https://example.com`
51
+ - `MLOPS_API_PATH`: API path prefix (default: `/api/v1`)
52
+ - `MLOPS_DEBUG`: `true|false` (default: `false`)
53
+
54
+ Or configure in code:
55
+
56
+ ```python
57
+ from mlops import ConnectionConfig, Task
58
+
59
+ config = ConnectionConfig(
60
+ api_key="xck_...",
61
+ domain="https://example.com",
62
+ api_path="/api/v1",
63
+ debug=False,
64
+ )
65
+ task = Task(config=config)
66
+ ```
67
+
68
+ ### Usage
69
+
70
+ ```python
71
+ from mlops import Task
72
+ from mlops.api.client.models.task_status import TaskStatus
73
+ from pathlib import Path
74
+
75
+ # Initialize Task client (uses environment variables by default)
76
+ task = Task()
77
+
78
+ # Submit a task with gpu type
79
+ try:
80
+ result = task.submit(
81
+ name="gpu-task-from-sdk",
82
+ image="/mnt/minio/images/01ai-registry.cn-shanghai.cr.aliyuncs.com+public+llamafactory+0.9.3.sqsh",
83
+ entry_command="llamafactory-cli train /workspace/config/test_lora.yaml",
84
+ resources={
85
+ "partition": "gpu",
86
+ "nodes": 2,
87
+ "ntasks": 2,
88
+ "cpus_per_task": 2,
89
+ "memory": "4G",
90
+ "time": "01:00:00",
91
+ "gres": "gpu:nvidia_a10:1",
92
+ "qos": "qos_xcloud",
93
+ },
94
+ cluster_name="slurm-cn",
95
+ team_id=1,
96
+ file_path="your file path", # optional, support for .zip, .tar.gz, .tgz
97
+ )
98
+
99
+ if result is not None:
100
+ print("==== gpu task submitted successfully ====")
101
+ job_id = result.job_id
102
+ else:
103
+ print("==== gpu task submitted failed ====")
104
+ except Exception as e:
105
+ print("==== gpu task submitted failed error ====", e)
106
+
107
+ # Submit a task with cpu type
108
+ try:
109
+ entry_content = Path("entry.sh").read_text(encoding="utf-8")
110
+ result = task.submit(
111
+ name="cpu-task-from-sdk",
112
+ image="docker://01ai-registry.cn-shanghai.cr.aliyuncs.com/01-ai/xcs/v2/alpine:3.23.0",
113
+ entry_command=entry_content,
114
+ resources={
115
+ "partition": "cpu",
116
+ "nodes": 1,
117
+ "ntasks": 1,
118
+ "cpus_per_task": 1,
119
+ "memory": "1G",
120
+ "time": "01:00:00",
121
+ "qos": "qos_xcloud",
122
+ },
123
+ cluster_name="slurm-cn",
124
+ team_id=1,
125
+ )
126
+
127
+ if result is not None:
128
+ print("==== cpu task submitted successfully ====")
129
+ job_id = result.job_id
130
+ else:
131
+ print("==== cpu task submitted failed ====")
132
+ except Exception as e:
133
+ print("==== cpu task submitted failed error ====", e)
134
+
135
+ # List tasks with filters
136
+ try:
137
+ completed_tasks = task.list(
138
+ status=TaskStatus.COMPLETED,
139
+ cluster_name="slurm-cn",
140
+ page=1,
141
+ page_size=20
142
+ )
143
+
144
+ # Get task details
145
+ if completed_tasks is not None and len(completed_tasks.tasks) > 0:
146
+ print("==== completed_tasks number ====", len(completed_tasks.tasks))
147
+ task_info = task.get(task_id=completed_tasks.tasks[0].job_id, cluster_name="slurm-cn")
148
+ print("==== task_info ====", task_info)
149
+ else:
150
+ print("==== no completed tasks to get details ====")
151
+ except Exception as e:
152
+ print("==== get task details failed error ====", e)
153
+
154
+
155
+ # Cancel a running task
156
+ try:
157
+ running_tasks = task.list(
158
+ status=TaskStatus.RUNNING,
159
+ cluster_name="slurm-cn",
160
+ page=1,
161
+ page_size=20
162
+ )
163
+ if running_tasks is not None and len(running_tasks.tasks) > 0:
164
+ print("==== running_tasks number ====", len(running_tasks.tasks))
165
+ # Cancel a task
166
+ result = task.cancel(task_id=running_tasks.tasks[0].job_id, cluster_name="slurm-cn")
167
+ print("==== task cancelled ====", running_tasks.tasks[0].job_id, result)
168
+ else:
169
+ print("==== no running tasks to cancel ====")
170
+ except Exception as e:
171
+ print("==== cancel running task failed error ====", e)
172
+
173
+
174
+ # Delete a task
175
+ try:
176
+ completed_tasks = task.list(
177
+ status=TaskStatus.COMPLETED,
178
+ cluster_name="slurm-cn",
179
+ page=1,
180
+ page_size=20
181
+ )
182
+ if completed_tasks is not None and len(completed_tasks.tasks) > 0:
183
+ print("==== completed_tasks number ====", len(completed_tasks.tasks))
184
+ # Delete a task
185
+ result = task.delete(task_id=completed_tasks.tasks[0].job_id, cluster_name="slurm-cn")
186
+ print("==== task deleted ====", completed_tasks.tasks[0].job_id, result)
187
+ else:
188
+ print("==== no completed tasks to delete ====")
189
+ except Exception as e:
190
+ print("==== delete completed task failed error ====", e)
191
+ ```
192
+
193
+ **Task Management Methods:**
194
+
195
+ - `submit()` - Submit a new task with container image and entry command
196
+ - `get()` - Get task details by task ID
197
+ - `list()` - List tasks with optional filters (status, cluster_name, team_id, user_id)
198
+ - `cancel()` - Cancel a running task
199
+ - `delete()` - Delete a task record
200
+
201
+ **Task Status Values:**
202
+
203
+ ```python
204
+ from mlops.api.client.models.task_status import TaskStatus
205
+
206
+ TaskStatus.PENDING # Task is pending
207
+ TaskStatus.QUEUED # Task is queued
208
+ TaskStatus.RUNNING # Task is running
209
+ TaskStatus.COMPLETED # Task completed successfully
210
+ TaskStatus.SUCCEEDED # Task succeeded
211
+ TaskStatus.FAILED # Task failed
212
+ TaskStatus.CANCELLED # Task was cancelled
213
+ TaskStatus.CREATED # Task was created
214
+ ```
215
+
216
+ **Error Handling:**
217
+
218
+ ```python
219
+ from mlops.exceptions import (
220
+ APIException,
221
+ AuthenticationException,
222
+ NotFoundException,
223
+ RateLimitException,
224
+ TimeoutException,
225
+ InvalidArgumentException,
226
+ NotEnoughSpaceException
227
+ )
228
+
229
+ try:
230
+ result = task.submit(name="test", cluster_name="slurm-cn", command="echo hello")
231
+ except AuthenticationException as e:
232
+ print(f"Authentication failed: {e}")
233
+ except NotFoundException as e:
234
+ print(f"Resource not found: {e}")
235
+ except APIException as e:
236
+ print(f"API error: {e}")
237
+ ```
238
+
239
+ > [!TIP] Error Handling
240
+ > SDKs automatically handle common errors and retry failed requests. Check SDK documentation for error handling best practices.
241
+
242
+ ## Features
243
+
244
+ - Type-safe API clients
245
+ - Automatic authentication
246
+ - Error handling
247
+ - Request retry logic
248
+ - Response validation
249
+
250
+ ## Resources
251
+
252
+ - [Python SDK Documentation](https://github.com/xcloud-service/xservice/tree/main/client/python-sdk)
253
+ - [API Reference](https://xcloud-service.com/docs/api)
254
+
@@ -0,0 +1,228 @@
1
+ # SDK
2
+
3
+ Software Development Kits for integrating with the XCloud Service API.
4
+
5
+ > [!NOTE] SDK Support
6
+ > SDKs provide type-safe, high-level interfaces for interacting with the platform API. They handle authentication, error handling, and request retries automatically.
7
+
8
+ ## Available SDKs
9
+
10
+ ### Python SDK
11
+
12
+ ### Installation
13
+
14
+ The Python SDK installation.
15
+
16
+ ```bash
17
+ pip install mlops-python-sdk
18
+ ```
19
+
20
+ ### Configuration
21
+
22
+ The SDK reads configuration from environment variables by default:
23
+
24
+ - `MLOPS_API_KEY`: API key (required)
25
+ - `MLOPS_DOMAIN`: API domain, e.g. `localhost:8090` or `https://example.com`
26
+ - `MLOPS_API_PATH`: API path prefix (default: `/api/v1`)
27
+ - `MLOPS_DEBUG`: `true|false` (default: `false`)
28
+
29
+ Or configure in code:
30
+
31
+ ```python
32
+ from mlops import ConnectionConfig, Task
33
+
34
+ config = ConnectionConfig(
35
+ api_key="xck_...",
36
+ domain="https://example.com",
37
+ api_path="/api/v1",
38
+ debug=False,
39
+ )
40
+ task = Task(config=config)
41
+ ```
42
+
43
+ ### Usage
44
+
45
+ ```python
46
+ from mlops import Task
47
+ from mlops.api.client.models.task_status import TaskStatus
48
+ from pathlib import Path
49
+
50
+ # Initialize Task client (uses environment variables by default)
51
+ task = Task()
52
+
53
+ # Submit a task with gpu type
54
+ try:
55
+ result = task.submit(
56
+ name="gpu-task-from-sdk",
57
+ image="/mnt/minio/images/01ai-registry.cn-shanghai.cr.aliyuncs.com+public+llamafactory+0.9.3.sqsh",
58
+ entry_command="llamafactory-cli train /workspace/config/test_lora.yaml",
59
+ resources={
60
+ "partition": "gpu",
61
+ "nodes": 2,
62
+ "ntasks": 2,
63
+ "cpus_per_task": 2,
64
+ "memory": "4G",
65
+ "time": "01:00:00",
66
+ "gres": "gpu:nvidia_a10:1",
67
+ "qos": "qos_xcloud",
68
+ },
69
+ cluster_name="slurm-cn",
70
+ team_id=1,
71
+ file_path="your file path", # optional, support for .zip, .tar.gz, .tgz
72
+ )
73
+
74
+ if result is not None:
75
+ print("==== gpu task submitted successfully ====")
76
+ job_id = result.job_id
77
+ else:
78
+ print("==== gpu task submitted failed ====")
79
+ except Exception as e:
80
+ print("==== gpu task submitted failed error ====", e)
81
+
82
+ # Submit a task with cpu type
83
+ try:
84
+ entry_content = Path("entry.sh").read_text(encoding="utf-8")
85
+ result = task.submit(
86
+ name="cpu-task-from-sdk",
87
+ image="docker://01ai-registry.cn-shanghai.cr.aliyuncs.com/01-ai/xcs/v2/alpine:3.23.0",
88
+ entry_command=entry_content,
89
+ resources={
90
+ "partition": "cpu",
91
+ "nodes": 1,
92
+ "ntasks": 1,
93
+ "cpus_per_task": 1,
94
+ "memory": "1G",
95
+ "time": "01:00:00",
96
+ "qos": "qos_xcloud",
97
+ },
98
+ cluster_name="slurm-cn",
99
+ team_id=1,
100
+ )
101
+
102
+ if result is not None:
103
+ print("==== cpu task submitted successfully ====")
104
+ job_id = result.job_id
105
+ else:
106
+ print("==== cpu task submitted failed ====")
107
+ except Exception as e:
108
+ print("==== cpu task submitted failed error ====", e)
109
+
110
+ # List tasks with filters
111
+ try:
112
+ completed_tasks = task.list(
113
+ status=TaskStatus.COMPLETED,
114
+ cluster_name="slurm-cn",
115
+ page=1,
116
+ page_size=20
117
+ )
118
+
119
+ # Get task details
120
+ if completed_tasks is not None and len(completed_tasks.tasks) > 0:
121
+ print("==== completed_tasks number ====", len(completed_tasks.tasks))
122
+ task_info = task.get(task_id=completed_tasks.tasks[0].job_id, cluster_name="slurm-cn")
123
+ print("==== task_info ====", task_info)
124
+ else:
125
+ print("==== no completed tasks to get details ====")
126
+ except Exception as e:
127
+ print("==== get task details failed error ====", e)
128
+
129
+
130
+ # Cancel a running task
131
+ try:
132
+ running_tasks = task.list(
133
+ status=TaskStatus.RUNNING,
134
+ cluster_name="slurm-cn",
135
+ page=1,
136
+ page_size=20
137
+ )
138
+ if running_tasks is not None and len(running_tasks.tasks) > 0:
139
+ print("==== running_tasks number ====", len(running_tasks.tasks))
140
+ # Cancel a task
141
+ result = task.cancel(task_id=running_tasks.tasks[0].job_id, cluster_name="slurm-cn")
142
+ print("==== task cancelled ====", running_tasks.tasks[0].job_id, result)
143
+ else:
144
+ print("==== no running tasks to cancel ====")
145
+ except Exception as e:
146
+ print("==== cancel running task failed error ====", e)
147
+
148
+
149
+ # Delete a task
150
+ try:
151
+ completed_tasks = task.list(
152
+ status=TaskStatus.COMPLETED,
153
+ cluster_name="slurm-cn",
154
+ page=1,
155
+ page_size=20
156
+ )
157
+ if completed_tasks is not None and len(completed_tasks.tasks) > 0:
158
+ print("==== completed_tasks number ====", len(completed_tasks.tasks))
159
+ # Delete a task
160
+ result = task.delete(task_id=completed_tasks.tasks[0].job_id, cluster_name="slurm-cn")
161
+ print("==== task deleted ====", completed_tasks.tasks[0].job_id, result)
162
+ else:
163
+ print("==== no completed tasks to delete ====")
164
+ except Exception as e:
165
+ print("==== delete completed task failed error ====", e)
166
+ ```
167
+
168
+ **Task Management Methods:**
169
+
170
+ - `submit()` - Submit a new task with container image and entry command
171
+ - `get()` - Get task details by task ID
172
+ - `list()` - List tasks with optional filters (status, cluster_name, team_id, user_id)
173
+ - `cancel()` - Cancel a running task
174
+ - `delete()` - Delete a task record
175
+
176
+ **Task Status Values:**
177
+
178
+ ```python
179
+ from mlops.api.client.models.task_status import TaskStatus
180
+
181
+ TaskStatus.PENDING # Task is pending
182
+ TaskStatus.QUEUED # Task is queued
183
+ TaskStatus.RUNNING # Task is running
184
+ TaskStatus.COMPLETED # Task completed successfully
185
+ TaskStatus.SUCCEEDED # Task succeeded
186
+ TaskStatus.FAILED # Task failed
187
+ TaskStatus.CANCELLED # Task was cancelled
188
+ TaskStatus.CREATED # Task was created
189
+ ```
190
+
191
+ **Error Handling:**
192
+
193
+ ```python
194
+ from mlops.exceptions import (
195
+ APIException,
196
+ AuthenticationException,
197
+ NotFoundException,
198
+ RateLimitException,
199
+ TimeoutException,
200
+ InvalidArgumentException,
201
+ NotEnoughSpaceException
202
+ )
203
+
204
+ try:
205
+ result = task.submit(name="test", cluster_name="slurm-cn", command="echo hello")
206
+ except AuthenticationException as e:
207
+ print(f"Authentication failed: {e}")
208
+ except NotFoundException as e:
209
+ print(f"Resource not found: {e}")
210
+ except APIException as e:
211
+ print(f"API error: {e}")
212
+ ```
213
+
214
+ > [!TIP] Error Handling
215
+ > SDKs automatically handle common errors and retry failed requests. Check SDK documentation for error handling best practices.
216
+
217
+ ## Features
218
+
219
+ - Type-safe API clients
220
+ - Automatic authentication
221
+ - Error handling
222
+ - Request retry logic
223
+ - Response validation
224
+
225
+ ## Resources
226
+
227
+ - [Python SDK Documentation](https://github.com/xcloud-service/xservice/tree/main/client/python-sdk)
228
+ - [API Reference](https://xcloud-service.com/docs/api)
@@ -29,12 +29,18 @@ class TaskSubmitRequest:
29
29
  cpus_per_task (Union[None, Unset, int]): CPUs per task Example: 1.
30
30
  dependency (Union[None, Unset, str]): Job dependencies Example: afterok:12345.
31
31
  distribution (Union[None, Unset, str]): Task distribution Example: block.
32
+ entry_command (Union[None, Unset, str]): Container entry command/script (bash snippet) executed inside the
33
+ container. The platform runs it under /workspace.
34
+ Example: python -V && ls -la.
32
35
  environment (Union['TaskSubmitRequestEnvironmentType0', None, Unset]): Environment variables as key-value pairs
33
36
  Example: {'CUDA_VISIBLE_DEVICES': '0,1', 'PYTHONPATH': '/opt/python/lib'}.
34
37
  error (Union[None, Unset, str]): Standard error file pattern Example: error_%j.log.
35
38
  exclude (Union[None, Unset, str]): Nodes to exclude
36
39
  export (Union[None, Unset, str]): Environment export Example: ALL.
37
40
  gres (Union[None, Unset, str]): Generic resources (e.g., "gpu:1", "gpu:tesla:2") Example: gpu:1.
41
+ image (Union[None, Unset, str]): Container image reference. Can be a Slurm container plugin supported reference
42
+ (e.g. "docker://..."), or a registry reference which will be mapped to a local .sqsh image path by the platform.
43
+ Example: 01ai-registry.cn-shanghai.cr.aliyuncs.com/public/llamafactory:0.9.3.
38
44
  input_ (Union[None, Unset, str]): Standard input file
39
45
  job_spec (Union[Unset, JobSpec]): Domain-specific job specification (rendered into slurm script)
40
46
  mem_bind (Union[None, Unset, str]): Memory binding
@@ -65,11 +71,13 @@ class TaskSubmitRequest:
65
71
  cpus_per_task: Union[None, Unset, int] = UNSET
66
72
  dependency: Union[None, Unset, str] = UNSET
67
73
  distribution: Union[None, Unset, str] = UNSET
74
+ entry_command: Union[None, Unset, str] = UNSET
68
75
  environment: Union["TaskSubmitRequestEnvironmentType0", None, Unset] = UNSET
69
76
  error: Union[None, Unset, str] = UNSET
70
77
  exclude: Union[None, Unset, str] = UNSET
71
78
  export: Union[None, Unset, str] = UNSET
72
79
  gres: Union[None, Unset, str] = UNSET
80
+ image: Union[None, Unset, str] = UNSET
73
81
  input_: Union[None, Unset, str] = UNSET
74
82
  job_spec: Union[Unset, "JobSpec"] = UNSET
75
83
  mem_bind: Union[None, Unset, str] = UNSET
@@ -143,6 +151,12 @@ class TaskSubmitRequest:
143
151
  else:
144
152
  distribution = self.distribution
145
153
 
154
+ entry_command: Union[None, Unset, str]
155
+ if isinstance(self.entry_command, Unset):
156
+ entry_command = UNSET
157
+ else:
158
+ entry_command = self.entry_command
159
+
146
160
  environment: Union[None, Unset, dict[str, Any]]
147
161
  if isinstance(self.environment, Unset):
148
162
  environment = UNSET
@@ -175,6 +189,12 @@ class TaskSubmitRequest:
175
189
  else:
176
190
  gres = self.gres
177
191
 
192
+ image: Union[None, Unset, str]
193
+ if isinstance(self.image, Unset):
194
+ image = UNSET
195
+ else:
196
+ image = self.image
197
+
178
198
  input_: Union[None, Unset, str]
179
199
  if isinstance(self.input_, Unset):
180
200
  input_ = UNSET
@@ -289,6 +309,8 @@ class TaskSubmitRequest:
289
309
  field_dict["dependency"] = dependency
290
310
  if distribution is not UNSET:
291
311
  field_dict["distribution"] = distribution
312
+ if entry_command is not UNSET:
313
+ field_dict["entry_command"] = entry_command
292
314
  if environment is not UNSET:
293
315
  field_dict["environment"] = environment
294
316
  if error is not UNSET:
@@ -299,6 +321,8 @@ class TaskSubmitRequest:
299
321
  field_dict["export"] = export
300
322
  if gres is not UNSET:
301
323
  field_dict["gres"] = gres
324
+ if image is not UNSET:
325
+ field_dict["image"] = image
302
326
  if input_ is not UNSET:
303
327
  field_dict["input"] = input_
304
328
  if job_spec is not UNSET:
@@ -416,6 +440,15 @@ class TaskSubmitRequest:
416
440
 
417
441
  distribution = _parse_distribution(d.pop("distribution", UNSET))
418
442
 
443
+ def _parse_entry_command(data: object) -> Union[None, Unset, str]:
444
+ if data is None:
445
+ return data
446
+ if isinstance(data, Unset):
447
+ return data
448
+ return cast(Union[None, Unset, str], data)
449
+
450
+ entry_command = _parse_entry_command(d.pop("entry_command", UNSET))
451
+
419
452
  def _parse_environment(data: object) -> Union["TaskSubmitRequestEnvironmentType0", None, Unset]:
420
453
  if data is None:
421
454
  return data
@@ -469,6 +502,15 @@ class TaskSubmitRequest:
469
502
 
470
503
  gres = _parse_gres(d.pop("gres", UNSET))
471
504
 
505
+ def _parse_image(data: object) -> Union[None, Unset, str]:
506
+ if data is None:
507
+ return data
508
+ if isinstance(data, Unset):
509
+ return data
510
+ return cast(Union[None, Unset, str], data)
511
+
512
+ image = _parse_image(d.pop("image", UNSET))
513
+
472
514
  def _parse_input_(data: object) -> Union[None, Unset, str]:
473
515
  if data is None:
474
516
  return data
@@ -615,11 +657,13 @@ class TaskSubmitRequest:
615
657
  cpus_per_task=cpus_per_task,
616
658
  dependency=dependency,
617
659
  distribution=distribution,
660
+ entry_command=entry_command,
618
661
  environment=environment,
619
662
  error=error,
620
663
  exclude=exclude,
621
664
  export=export,
622
665
  gres=gres,
666
+ image=image,
623
667
  input_=input_,
624
668
  job_spec=job_spec,
625
669
  mem_bind=mem_bind,
@@ -1,9 +1,9 @@
1
1
  import os
2
2
 
3
- from typing import Literal, Optional, Dict
3
+ from typing import Optional, Dict
4
4
  from httpx._types import ProxyTypes
5
5
 
6
- REQUEST_TIMEOUT: float = 30.0 # 30 seconds
6
+ REQUEST_TIMEOUT: float = 120.0 # 120 seconds
7
7
 
8
8
  KEEPALIVE_PING_INTERVAL_SEC = 50 # 50 seconds
9
9
  KEEPALIVE_PING_HEADER = "Keepalive-Ping-Interval"