mlops-python-sdk 1.0.2__tar.gz → 1.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. mlops_python_sdk-1.0.3/PKG-INFO +235 -0
  2. mlops_python_sdk-1.0.3/README.md +209 -0
  3. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/task/task.py +100 -1
  4. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/pyproject.toml +1 -1
  5. mlops_python_sdk-1.0.2/PKG-INFO +0 -254
  6. mlops_python_sdk-1.0.2/README.md +0 -228
  7. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/__init__.py +0 -0
  8. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/__init__.py +0 -0
  9. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/api/__init__.py +0 -0
  10. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/api/storage/__init__.py +0 -0
  11. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/api/storage/get_storage_presign_download.py +0 -0
  12. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/api/storage/get_storage_presign_upload.py +0 -0
  13. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/api/tasks/__init__.py +0 -0
  14. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/api/tasks/cancel_task.py +0 -0
  15. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/api/tasks/delete_task.py +0 -0
  16. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/api/tasks/get_task.py +0 -0
  17. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/api/tasks/get_task_by_task_id.py +0 -0
  18. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/api/tasks/get_task_logs.py +0 -0
  19. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/api/tasks/list_tasks.py +0 -0
  20. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/api/tasks/submit_task.py +0 -0
  21. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/client.py +0 -0
  22. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/errors.py +0 -0
  23. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/models/__init__.py +0 -0
  24. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/models/error_response.py +0 -0
  25. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/models/get_storage_presign_download_response_200.py +0 -0
  26. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/models/get_storage_presign_upload_response_200.py +0 -0
  27. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/models/get_task_logs_direction.py +0 -0
  28. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/models/get_task_logs_log_type.py +0 -0
  29. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/models/job_spec.py +0 -0
  30. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/models/job_spec_env.py +0 -0
  31. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/models/job_spec_master_strategy.py +0 -0
  32. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/models/log_pagination.py +0 -0
  33. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/models/message_response.py +0 -0
  34. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/models/task.py +0 -0
  35. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/models/task_alloc_tres_type_0.py +0 -0
  36. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/models/task_gres_detail_type_0_item.py +0 -0
  37. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/models/task_job_resources_type_0.py +0 -0
  38. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/models/task_list_response.py +0 -0
  39. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/models/task_log_entry.py +0 -0
  40. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/models/task_log_entry_log_type.py +0 -0
  41. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/models/task_logs_response.py +0 -0
  42. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/models/task_resources_type_0.py +0 -0
  43. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/models/task_status.py +0 -0
  44. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/models/task_submit_request.py +0 -0
  45. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/models/task_submit_request_environment_type_0.py +0 -0
  46. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/models/task_submit_response.py +0 -0
  47. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/models/task_tres_type_0.py +0 -0
  48. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/models/task_tres_used_type_0.py +0 -0
  49. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/py.typed +0 -0
  50. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/api/client/types.py +0 -0
  51. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/connection_config.py +0 -0
  52. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/exceptions.py +0 -0
  53. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/task/__init__.py +0 -0
  54. {mlops_python_sdk-1.0.2 → mlops_python_sdk-1.0.3}/mlops/task/client.py +0 -0
@@ -0,0 +1,235 @@
1
+ Metadata-Version: 2.3
2
+ Name: mlops-python-sdk
3
+ Version: 1.0.3
4
+ Summary: MLOps Python SDK for XCloud Service API
5
+ License: MIT
6
+ Author: mlops
7
+ Author-email: mlops@example.com
8
+ Requires-Python: >=3.9,<4.0
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.9
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Requires-Dist: attrs (>=23.2.0)
17
+ Requires-Dist: httpx (>=0.27.0,<1.0.0)
18
+ Requires-Dist: packaging (>=24.1)
19
+ Requires-Dist: python-dateutil (>=2.8.2)
20
+ Requires-Dist: typing-extensions (>=4.1.0)
21
+ Project-URL: Bug Tracker, https://github.com/xcloud-service/xservice/issues
22
+ Project-URL: Homepage, https://mlops.cloud/
23
+ Project-URL: Repository, https://github.com/xcloud-service/xservice
24
+ Description-Content-Type: text/markdown
25
+
26
+ # SDK
27
+
28
+ Software Development Kits for integrating with the XCloud Service API.
29
+
30
+ > [!NOTE] SDK Support
31
+ > SDKs provide type-safe, high-level interfaces for interacting with the platform API. They handle authentication, error handling, and request retries automatically.
32
+
33
+
34
+ ## Installation
35
+
36
+ The Python SDK installation.
37
+
38
+ ```bash
39
+ pip install mlops-python-sdk
40
+ ```
41
+
42
+ ### Configuration
43
+
44
+ The SDK reads configuration from environment variables by default:
45
+
46
+ - `MLOPS_API_KEY`: API key (required)
47
+ - `MLOPS_DOMAIN`: API domain, e.g. `localhost:8090` or `https://example.com`
48
+ - `MLOPS_API_PATH`: API path prefix (default: `/api/v1`)
49
+ - `MLOPS_DEBUG`: `true|false` (default: `false`)
50
+
51
+ Or configure in code:
52
+
53
+ ```python
54
+ from mlops import ConnectionConfig, Task
55
+
56
+ config = ConnectionConfig(
57
+ api_key="xck_...",
58
+ domain="https://example.com",
59
+ api_path="/api/v1",
60
+ debug=False,
61
+ )
62
+ task = Task(config=config)
63
+ ```
64
+
65
+ ## SDK Usage
66
+
67
+ ### Initialize client
68
+
69
+ ```python
70
+ from mlops import Task
71
+
72
+ task = Task() # uses environment variables by default
73
+ ```
74
+
75
+ ### Submit a GPU task
76
+
77
+ ```python
78
+ from mlops import Task
79
+
80
+ task = Task()
81
+ resp = task.submit(
82
+ name="gpu-task-from-sdk",
83
+ cluster_name="slurm-cn",
84
+ team_id=1,
85
+ image="/mnt/minio/images/01ai-registry.cn-shanghai.cr.aliyuncs.com+public+llamafactory+0.9.3.sqsh",
86
+ entry_command="llamafactory-cli train /workspace/config/test_lora.yaml",
87
+ resources={
88
+ "partition": "gpu",
89
+ "nodes": 2,
90
+ "ntasks": 2,
91
+ "cpus_per_task": 2,
92
+ "memory": "4G",
93
+ "time": "01:00:00",
94
+ "gres": "gpu:nvidia_a10:1",
95
+ "qos": "qos_xcloud",
96
+ },
97
+ file_path="/path/to/xservice.zip", # optional: .zip/.tar.gz/.tgz
98
+ )
99
+ print(resp.job_id)
100
+ ```
101
+
102
+ ### Submit a CPU task
103
+
104
+ ```python
105
+ from mlops import Task
106
+
107
+ task = Task()
108
+ resp = task.submit(
109
+ name="cpu-task-from-sdk",
110
+ cluster_name="slurm-cn",
111
+ team_id=1,
112
+ image="docker://01ai-registry.cn-shanghai.cr.aliyuncs.com/01-ai/xcs/v2/alpine:3.23.0",
113
+ entry_command="echo hello",
114
+ resources={
115
+ "partition": "cpu",
116
+ "nodes": 1,
117
+ "ntasks": 1,
118
+ "cpus_per_task": 1,
119
+ "memory": "1G",
120
+ "time": "01:00:00",
121
+ "qos": "qos_xcloud",
122
+ },
123
+ )
124
+ print(resp.job_id)
125
+ ```
126
+
127
+ ### List tasks
128
+
129
+ ```python
130
+ from mlops import Task
131
+ from mlops.api.client.models.task_status import TaskStatus
132
+
133
+ task = Task()
134
+ resp = task.list(status=TaskStatus.COMPLETED, cluster_name="slurm-cn", page=1, page_size=20)
135
+ print(len(resp.tasks or []))
136
+ ```
137
+
138
+ ### Get task details
139
+
140
+ ```python
141
+ from mlops import Task
142
+
143
+ task = Task()
144
+ task_info = task.get(task_id=12345, cluster_name="slurm-cn")
145
+ print(task_info)
146
+ ```
147
+
148
+ ### Cancel a task
149
+
150
+ ```python
151
+ from mlops import Task
152
+
153
+ task = Task()
154
+ task.cancel(task_id=12345, cluster_name="slurm-cn")
155
+ ```
156
+
157
+ ### Delete a task
158
+
159
+ ```python
160
+ from mlops import Task
161
+
162
+ task = Task()
163
+ task.delete(task_id=12345, cluster_name="slurm-cn")
164
+ ```
165
+
166
+ **Task Management Methods:**
167
+
168
+ - `submit()` - Submit a new task with container image and entry command
169
+ - `get()` - Get task details by task ID
170
+ - `list()` - List tasks with optional filters (status, cluster_name, team_id, user_id)
171
+ - `cancel()` - Cancel a running task
172
+ - `delete()` - Delete a task record
173
+
174
+ **Task Status Values:**
175
+
176
+ ```python
177
+ from mlops.api.client.models.task_status import TaskStatus
178
+
179
+ TaskStatus.PENDING # Task is pending
180
+ TaskStatus.QUEUED # Task is queued
181
+ TaskStatus.RUNNING # Task is running
182
+ TaskStatus.COMPLETED # Task completed successfully
183
+ TaskStatus.SUCCEEDED # Task succeeded
184
+ TaskStatus.FAILED # Task failed
185
+ TaskStatus.CANCELLED # Task was cancelled
186
+ TaskStatus.CREATED # Task was created
187
+ ```
188
+
189
+ **Error Handling:**
190
+
191
+ ```python
192
+ from mlops.exceptions import (
193
+ APIException,
194
+ AuthenticationException,
195
+ NotFoundException,
196
+ RateLimitException,
197
+ TimeoutException,
198
+ InvalidArgumentException,
199
+ NotEnoughSpaceException
200
+ )
201
+ from mlops import Task
202
+
203
+ task = Task()
204
+
205
+ try:
206
+ result = task.submit(
207
+ name="test",
208
+ cluster_name="slurm-cn",
209
+ image="docker://alpine:3.23.0",
210
+ entry_command="echo hello",
211
+ )
212
+ except AuthenticationException as e:
213
+ print(f"Authentication failed: {e}")
214
+ except NotFoundException as e:
215
+ print(f"Resource not found: {e}")
216
+ except APIException as e:
217
+ print(f"API error: {e}")
218
+ ```
219
+
220
+ > [!TIP] Error Handling
221
+ > SDKs automatically parse typed responses and raise structured exceptions.
222
+
223
+ ## Features
224
+
225
+ - Type-safe API clients
226
+ - Automatic authentication
227
+ - Error handling
228
+ - Typed response parsing (generated models)
229
+ - Unexpected-status guard (optional)
230
+
231
+ ## Resources
232
+
233
+ - [Python SDK Documentation](https://github.com/xcloud-service/xservice/tree/main/client/python-sdk)
234
+ - [API Reference](https://xcloud-service.com/docs/api)
235
+
@@ -0,0 +1,209 @@
1
+ # SDK
2
+
3
+ Software Development Kits for integrating with the XCloud Service API.
4
+
5
+ > [!NOTE] SDK Support
6
+ > SDKs provide type-safe, high-level interfaces for interacting with the platform API. They handle authentication, error handling, and request retries automatically.
7
+
8
+
9
+ ## Installation
10
+
11
+ The Python SDK installation.
12
+
13
+ ```bash
14
+ pip install mlops-python-sdk
15
+ ```
16
+
17
+ ### Configuration
18
+
19
+ The SDK reads configuration from environment variables by default:
20
+
21
+ - `MLOPS_API_KEY`: API key (required)
22
+ - `MLOPS_DOMAIN`: API domain, e.g. `localhost:8090` or `https://example.com`
23
+ - `MLOPS_API_PATH`: API path prefix (default: `/api/v1`)
24
+ - `MLOPS_DEBUG`: `true|false` (default: `false`)
25
+
26
+ Or configure in code:
27
+
28
+ ```python
29
+ from mlops import ConnectionConfig, Task
30
+
31
+ config = ConnectionConfig(
32
+ api_key="xck_...",
33
+ domain="https://example.com",
34
+ api_path="/api/v1",
35
+ debug=False,
36
+ )
37
+ task = Task(config=config)
38
+ ```
39
+
40
+ ## SDK Usage
41
+
42
+ ### Initialize client
43
+
44
+ ```python
45
+ from mlops import Task
46
+
47
+ task = Task() # uses environment variables by default
48
+ ```
49
+
50
+ ### Submit a GPU task
51
+
52
+ ```python
53
+ from mlops import Task
54
+
55
+ task = Task()
56
+ resp = task.submit(
57
+ name="gpu-task-from-sdk",
58
+ cluster_name="slurm-cn",
59
+ team_id=1,
60
+ image="/mnt/minio/images/01ai-registry.cn-shanghai.cr.aliyuncs.com+public+llamafactory+0.9.3.sqsh",
61
+ entry_command="llamafactory-cli train /workspace/config/test_lora.yaml",
62
+ resources={
63
+ "partition": "gpu",
64
+ "nodes": 2,
65
+ "ntasks": 2,
66
+ "cpus_per_task": 2,
67
+ "memory": "4G",
68
+ "time": "01:00:00",
69
+ "gres": "gpu:nvidia_a10:1",
70
+ "qos": "qos_xcloud",
71
+ },
72
+ file_path="/path/to/xservice.zip", # optional: .zip/.tar.gz/.tgz
73
+ )
74
+ print(resp.job_id)
75
+ ```
76
+
77
+ ### Submit a CPU task
78
+
79
+ ```python
80
+ from mlops import Task
81
+
82
+ task = Task()
83
+ resp = task.submit(
84
+ name="cpu-task-from-sdk",
85
+ cluster_name="slurm-cn",
86
+ team_id=1,
87
+ image="docker://01ai-registry.cn-shanghai.cr.aliyuncs.com/01-ai/xcs/v2/alpine:3.23.0",
88
+ entry_command="echo hello",
89
+ resources={
90
+ "partition": "cpu",
91
+ "nodes": 1,
92
+ "ntasks": 1,
93
+ "cpus_per_task": 1,
94
+ "memory": "1G",
95
+ "time": "01:00:00",
96
+ "qos": "qos_xcloud",
97
+ },
98
+ )
99
+ print(resp.job_id)
100
+ ```
101
+
102
+ ### List tasks
103
+
104
+ ```python
105
+ from mlops import Task
106
+ from mlops.api.client.models.task_status import TaskStatus
107
+
108
+ task = Task()
109
+ resp = task.list(status=TaskStatus.COMPLETED, cluster_name="slurm-cn", page=1, page_size=20)
110
+ print(len(resp.tasks or []))
111
+ ```
112
+
113
+ ### Get task details
114
+
115
+ ```python
116
+ from mlops import Task
117
+
118
+ task = Task()
119
+ task_info = task.get(task_id=12345, cluster_name="slurm-cn")
120
+ print(task_info)
121
+ ```
122
+
123
+ ### Cancel a task
124
+
125
+ ```python
126
+ from mlops import Task
127
+
128
+ task = Task()
129
+ task.cancel(task_id=12345, cluster_name="slurm-cn")
130
+ ```
131
+
132
+ ### Delete a task
133
+
134
+ ```python
135
+ from mlops import Task
136
+
137
+ task = Task()
138
+ task.delete(task_id=12345, cluster_name="slurm-cn")
139
+ ```
140
+
141
+ **Task Management Methods:**
142
+
143
+ - `submit()` - Submit a new task with container image and entry command
144
+ - `get()` - Get task details by task ID
145
+ - `list()` - List tasks with optional filters (status, cluster_name, team_id, user_id)
146
+ - `cancel()` - Cancel a running task
147
+ - `delete()` - Delete a task record
148
+
149
+ **Task Status Values:**
150
+
151
+ ```python
152
+ from mlops.api.client.models.task_status import TaskStatus
153
+
154
+ TaskStatus.PENDING # Task is pending
155
+ TaskStatus.QUEUED # Task is queued
156
+ TaskStatus.RUNNING # Task is running
157
+ TaskStatus.COMPLETED # Task completed successfully
158
+ TaskStatus.SUCCEEDED # Task succeeded
159
+ TaskStatus.FAILED # Task failed
160
+ TaskStatus.CANCELLED # Task was cancelled
161
+ TaskStatus.CREATED # Task was created
162
+ ```
163
+
164
+ **Error Handling:**
165
+
166
+ ```python
167
+ from mlops.exceptions import (
168
+ APIException,
169
+ AuthenticationException,
170
+ NotFoundException,
171
+ RateLimitException,
172
+ TimeoutException,
173
+ InvalidArgumentException,
174
+ NotEnoughSpaceException
175
+ )
176
+ from mlops import Task
177
+
178
+ task = Task()
179
+
180
+ try:
181
+ result = task.submit(
182
+ name="test",
183
+ cluster_name="slurm-cn",
184
+ image="docker://alpine:3.23.0",
185
+ entry_command="echo hello",
186
+ )
187
+ except AuthenticationException as e:
188
+ print(f"Authentication failed: {e}")
189
+ except NotFoundException as e:
190
+ print(f"Resource not found: {e}")
191
+ except APIException as e:
192
+ print(f"API error: {e}")
193
+ ```
194
+
195
+ > [!TIP] Error Handling
196
+ > SDKs automatically parse typed responses and raise structured exceptions.
197
+
198
+ ## Features
199
+
200
+ - Type-safe API clients
201
+ - Automatic authentication
202
+ - Error handling
203
+ - Typed response parsing (generated models)
204
+ - Unexpected-status guard (optional)
205
+
206
+ ## Resources
207
+
208
+ - [Python SDK Documentation](https://github.com/xcloud-service/xservice/tree/main/client/python-sdk)
209
+ - [API Reference](https://xcloud-service.com/docs/api)
@@ -6,6 +6,9 @@ This module provides a convenient interface for managing tasks through the MLOps
6
6
 
7
7
  import json
8
8
  import os
9
+ import sys
10
+ import threading
11
+ import time
9
12
  from http import HTTPStatus
10
13
  from pathlib import Path
11
14
  from typing import Optional
@@ -55,13 +58,109 @@ def _validate_archive_file_path(file_path: str) -> Path:
55
58
 
56
59
 
57
60
  def _upload_file_to_presigned_url(url: str, file_path: Path, timeout: Optional[float]) -> None:
61
+ def _format_bytes_iec(n: int) -> str:
62
+ if n < 1024:
63
+ return f"{n}B"
64
+ unit = 1024.0
65
+ suffixes = ["KiB", "MiB", "GiB", "TiB", "PiB"]
66
+ v = float(n)
67
+ i = -1
68
+ while v >= unit and i < len(suffixes) - 1:
69
+ v /= unit
70
+ i += 1
71
+ return f"{v:.1f}{suffixes[i]}"
72
+
73
+ def _render_bar(done: int, total: int, width: int = 28) -> str:
74
+ if total <= 0 or width <= 1:
75
+ return ">"
76
+ done = max(0, min(done, total))
77
+ filled = int(width * (done / total))
78
+ if filled >= width:
79
+ return "=" * width
80
+ if filled <= 0:
81
+ return ">" + (" " * (width - 1))
82
+ return ("=" * filled) + ">" + (" " * (width - filled - 1))
83
+
84
+ def _format_elapsed_seconds(start: float) -> str:
85
+ sec = int(max(0.0, time.monotonic() - start))
86
+ return f"{sec}s"
87
+
88
+ class _ProgressIterable:
89
+ def __init__(self, f, total: int, name: str, chunk_size: int = 64 * 1024):
90
+ self._f = f # file-like object
91
+ self._total = max(0, int(total))
92
+ self._name = name
93
+ self._chunk_size = max(1, int(chunk_size))
94
+ self._read = 0
95
+ self._start = time.monotonic()
96
+ self._completed = False
97
+ self._out = sys.stdout
98
+ try:
99
+ self._is_tty = bool(self._out.isatty())
100
+ except Exception:
101
+ self._is_tty = False
102
+
103
+ def _render_line(self, display_read: int) -> str:
104
+ display_read = max(0, min(int(display_read), self._total))
105
+ pct = (display_read / self._total) * 100.0 if self._total > 0 else 0.0
106
+ bar = _render_bar(display_read, self._total, width=28)
107
+ elapsed = _format_elapsed_seconds(self._start)
108
+ return (
109
+ f"uploading {self._name} [{bar}] {pct:6.2f}% "
110
+ f"({_format_bytes_iec(display_read)}/{_format_bytes_iec(self._total)}) "
111
+ f"elapsed {elapsed}"
112
+ )
113
+
114
+ def _print_line(self, line: str, final: bool = False) -> None:
115
+ if self._is_tty:
116
+ # Refresh same line in terminal.
117
+ print("\r" + line, end="" if not final else "\n", file=self._out, flush=True)
118
+ else:
119
+ # Always visible in non-TTY environments.
120
+ print(line, file=self._out, flush=True)
121
+
122
+ def __iter__(self):
123
+ stop_event = threading.Event()
124
+
125
+ def ticker() -> None:
126
+ last_sec = -1
127
+ # Print immediately so users see something right away.
128
+ self._print_line(self._render_line(self._read))
129
+ while not stop_event.is_set():
130
+ sec = int(max(0.0, time.monotonic() - self._start))
131
+ if sec != last_sec:
132
+ last_sec = sec
133
+ self._print_line(self._render_line(self._read))
134
+ # check frequently to avoid skipping seconds
135
+ stop_event.wait(0.05)
136
+
137
+ t = threading.Thread(target=ticker, name="mlops-upload-progress", daemon=True)
138
+ t.start()
139
+ try:
140
+ while True:
141
+ chunk = self._f.read(self._chunk_size)
142
+ if not chunk:
143
+ break
144
+ self._read += len(chunk)
145
+ yield chunk
146
+ finally:
147
+ # Ensure a final 100% line and stop ticker.
148
+ self._read = self._total
149
+ self._completed = True
150
+ stop_event.set()
151
+ t.join(timeout=0.2)
152
+ self._print_line(self._render_line(self._read), final=True)
153
+
58
154
  size = file_path.stat().st_size
59
155
  # Use a dedicated client for S3 presigned upload (avoid leaking API auth headers).
60
156
  with httpx.Client(timeout=timeout) as client:
61
157
  with file_path.open("rb") as f:
158
+ content = f
159
+ if size > 0:
160
+ content = _ProgressIterable(f, total=size, name=file_path.name)
62
161
  resp = client.put(
63
162
  url,
64
- content=f,
163
+ content=content,
65
164
  headers={
66
165
  "Content-Length": str(size),
67
166
  "Content-Type": "application/octet-stream",
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "mlops-python-sdk"
3
- version = "1.0.2"
3
+ version = "1.0.3"
4
4
  description = "MLOps Python SDK for XCloud Service API"
5
5
  authors = ["mlops <mlops@example.com>"]
6
6
  license = "MIT"
@@ -1,254 +0,0 @@
1
- Metadata-Version: 2.3
2
- Name: mlops-python-sdk
3
- Version: 1.0.2
4
- Summary: MLOps Python SDK for XCloud Service API
5
- License: MIT
6
- Author: mlops
7
- Author-email: mlops@example.com
8
- Requires-Python: >=3.9,<4.0
9
- Classifier: License :: OSI Approved :: MIT License
10
- Classifier: Programming Language :: Python :: 3
11
- Classifier: Programming Language :: Python :: 3.9
12
- Classifier: Programming Language :: Python :: 3.10
13
- Classifier: Programming Language :: Python :: 3.11
14
- Classifier: Programming Language :: Python :: 3.12
15
- Classifier: Programming Language :: Python :: 3.13
16
- Requires-Dist: attrs (>=23.2.0)
17
- Requires-Dist: httpx (>=0.27.0,<1.0.0)
18
- Requires-Dist: packaging (>=24.1)
19
- Requires-Dist: python-dateutil (>=2.8.2)
20
- Requires-Dist: typing-extensions (>=4.1.0)
21
- Project-URL: Bug Tracker, https://github.com/xcloud-service/xservice/issues
22
- Project-URL: Homepage, https://mlops.cloud/
23
- Project-URL: Repository, https://github.com/xcloud-service/xservice
24
- Description-Content-Type: text/markdown
25
-
26
- # SDK
27
-
28
- Software Development Kits for integrating with the XCloud Service API.
29
-
30
- > [!NOTE] SDK Support
31
- > SDKs provide type-safe, high-level interfaces for interacting with the platform API. They handle authentication, error handling, and request retries automatically.
32
-
33
- ## Available SDKs
34
-
35
- ### Python SDK
36
-
37
- ### Installation
38
-
39
- The Python SDK installation.
40
-
41
- ```bash
42
- pip install mlops-python-sdk
43
- ```
44
-
45
- ### Configuration
46
-
47
- The SDK reads configuration from environment variables by default:
48
-
49
- - `MLOPS_API_KEY`: API key (required)
50
- - `MLOPS_DOMAIN`: API domain, e.g. `localhost:8090` or `https://example.com`
51
- - `MLOPS_API_PATH`: API path prefix (default: `/api/v1`)
52
- - `MLOPS_DEBUG`: `true|false` (default: `false`)
53
-
54
- Or configure in code:
55
-
56
- ```python
57
- from mlops import ConnectionConfig, Task
58
-
59
- config = ConnectionConfig(
60
- api_key="xck_...",
61
- domain="https://example.com",
62
- api_path="/api/v1",
63
- debug=False,
64
- )
65
- task = Task(config=config)
66
- ```
67
-
68
- ### Usage
69
-
70
- ```python
71
- from mlops import Task
72
- from mlops.api.client.models.task_status import TaskStatus
73
- from pathlib import Path
74
-
75
- # Initialize Task client (uses environment variables by default)
76
- task = Task()
77
-
78
- # Submit a task with gpu type
79
- try:
80
- result = task.submit(
81
- name="gpu-task-from-sdk",
82
- image="/mnt/minio/images/01ai-registry.cn-shanghai.cr.aliyuncs.com+public+llamafactory+0.9.3.sqsh",
83
- entry_command="llamafactory-cli train /workspace/config/test_lora.yaml",
84
- resources={
85
- "partition": "gpu",
86
- "nodes": 2,
87
- "ntasks": 2,
88
- "cpus_per_task": 2,
89
- "memory": "4G",
90
- "time": "01:00:00",
91
- "gres": "gpu:nvidia_a10:1",
92
- "qos": "qos_xcloud",
93
- },
94
- cluster_name="slurm-cn",
95
- team_id=1,
96
- file_path="your file path", # optional, support for .zip, .tar.gz, .tgz
97
- )
98
-
99
- if result is not None:
100
- print("==== gpu task submitted successfully ====")
101
- job_id = result.job_id
102
- else:
103
- print("==== gpu task submitted failed ====")
104
- except Exception as e:
105
- print("==== gpu task submitted failed error ====", e)
106
-
107
- # Submit a task with cpu type
108
- try:
109
- entry_content = Path("entry.sh").read_text(encoding="utf-8")
110
- result = task.submit(
111
- name="cpu-task-from-sdk",
112
- image="docker://01ai-registry.cn-shanghai.cr.aliyuncs.com/01-ai/xcs/v2/alpine:3.23.0",
113
- entry_command=entry_content,
114
- resources={
115
- "partition": "cpu",
116
- "nodes": 1,
117
- "ntasks": 1,
118
- "cpus_per_task": 1,
119
- "memory": "1G",
120
- "time": "01:00:00",
121
- "qos": "qos_xcloud",
122
- },
123
- cluster_name="slurm-cn",
124
- team_id=1,
125
- )
126
-
127
- if result is not None:
128
- print("==== cpu task submitted successfully ====")
129
- job_id = result.job_id
130
- else:
131
- print("==== cpu task submitted failed ====")
132
- except Exception as e:
133
- print("==== cpu task submitted failed error ====", e)
134
-
135
- # List tasks with filters
136
- try:
137
- completed_tasks = task.list(
138
- status=TaskStatus.COMPLETED,
139
- cluster_name="slurm-cn",
140
- page=1,
141
- page_size=20
142
- )
143
-
144
- # Get task details
145
- if completed_tasks is not None and len(completed_tasks.tasks) > 0:
146
- print("==== completed_tasks number ====", len(completed_tasks.tasks))
147
- task_info = task.get(task_id=completed_tasks.tasks[0].job_id, cluster_name="slurm-cn")
148
- print("==== task_info ====", task_info)
149
- else:
150
- print("==== no completed tasks to get details ====")
151
- except Exception as e:
152
- print("==== get task details failed error ====", e)
153
-
154
-
155
- # Cancel a running task
156
- try:
157
- running_tasks = task.list(
158
- status=TaskStatus.RUNNING,
159
- cluster_name="slurm-cn",
160
- page=1,
161
- page_size=20
162
- )
163
- if running_tasks is not None and len(running_tasks.tasks) > 0:
164
- print("==== running_tasks number ====", len(running_tasks.tasks))
165
- # Cancel a task
166
- result = task.cancel(task_id=running_tasks.tasks[0].job_id, cluster_name="slurm-cn")
167
- print("==== task cancelled ====", running_tasks.tasks[0].job_id, result)
168
- else:
169
- print("==== no running tasks to cancel ====")
170
- except Exception as e:
171
- print("==== cancel running task failed error ====", e)
172
-
173
-
174
- # Delete a task
175
- try:
176
- completed_tasks = task.list(
177
- status=TaskStatus.COMPLETED,
178
- cluster_name="slurm-cn",
179
- page=1,
180
- page_size=20
181
- )
182
- if completed_tasks is not None and len(completed_tasks.tasks) > 0:
183
- print("==== completed_tasks number ====", len(completed_tasks.tasks))
184
- # Delete a task
185
- result = task.delete(task_id=completed_tasks.tasks[0].job_id, cluster_name="slurm-cn")
186
- print("==== task deleted ====", completed_tasks.tasks[0].job_id, result)
187
- else:
188
- print("==== no completed tasks to delete ====")
189
- except Exception as e:
190
- print("==== delete completed task failed error ====", e)
191
- ```
192
-
193
- **Task Management Methods:**
194
-
195
- - `submit()` - Submit a new task with container image and entry command
196
- - `get()` - Get task details by task ID
197
- - `list()` - List tasks with optional filters (status, cluster_name, team_id, user_id)
198
- - `cancel()` - Cancel a running task
199
- - `delete()` - Delete a task record
200
-
201
- **Task Status Values:**
202
-
203
- ```python
204
- from mlops.api.client.models.task_status import TaskStatus
205
-
206
- TaskStatus.PENDING # Task is pending
207
- TaskStatus.QUEUED # Task is queued
208
- TaskStatus.RUNNING # Task is running
209
- TaskStatus.COMPLETED # Task completed successfully
210
- TaskStatus.SUCCEEDED # Task succeeded
211
- TaskStatus.FAILED # Task failed
212
- TaskStatus.CANCELLED # Task was cancelled
213
- TaskStatus.CREATED # Task was created
214
- ```
215
-
216
- **Error Handling:**
217
-
218
- ```python
219
- from mlops.exceptions import (
220
- APIException,
221
- AuthenticationException,
222
- NotFoundException,
223
- RateLimitException,
224
- TimeoutException,
225
- InvalidArgumentException,
226
- NotEnoughSpaceException
227
- )
228
-
229
- try:
230
- result = task.submit(name="test", cluster_name="slurm-cn", command="echo hello")
231
- except AuthenticationException as e:
232
- print(f"Authentication failed: {e}")
233
- except NotFoundException as e:
234
- print(f"Resource not found: {e}")
235
- except APIException as e:
236
- print(f"API error: {e}")
237
- ```
238
-
239
- > [!TIP] Error Handling
240
- > SDKs automatically handle common errors and retry failed requests. Check SDK documentation for error handling best practices.
241
-
242
- ## Features
243
-
244
- - Type-safe API clients
245
- - Automatic authentication
246
- - Error handling
247
- - Request retry logic
248
- - Response validation
249
-
250
- ## Resources
251
-
252
- - [Python SDK Documentation](https://github.com/xcloud-service/xservice/tree/main/client/python-sdk)
253
- - [API Reference](https://xcloud-service.com/docs/api)
254
-
@@ -1,228 +0,0 @@
1
- # SDK
2
-
3
- Software Development Kits for integrating with the XCloud Service API.
4
-
5
- > [!NOTE] SDK Support
6
- > SDKs provide type-safe, high-level interfaces for interacting with the platform API. They handle authentication, error handling, and request retries automatically.
7
-
8
- ## Available SDKs
9
-
10
- ### Python SDK
11
-
12
- ### Installation
13
-
14
- The Python SDK installation.
15
-
16
- ```bash
17
- pip install mlops-python-sdk
18
- ```
19
-
20
- ### Configuration
21
-
22
- The SDK reads configuration from environment variables by default:
23
-
24
- - `MLOPS_API_KEY`: API key (required)
25
- - `MLOPS_DOMAIN`: API domain, e.g. `localhost:8090` or `https://example.com`
26
- - `MLOPS_API_PATH`: API path prefix (default: `/api/v1`)
27
- - `MLOPS_DEBUG`: `true|false` (default: `false`)
28
-
29
- Or configure in code:
30
-
31
- ```python
32
- from mlops import ConnectionConfig, Task
33
-
34
- config = ConnectionConfig(
35
- api_key="xck_...",
36
- domain="https://example.com",
37
- api_path="/api/v1",
38
- debug=False,
39
- )
40
- task = Task(config=config)
41
- ```
42
-
43
- ### Usage
44
-
45
- ```python
46
- from mlops import Task
47
- from mlops.api.client.models.task_status import TaskStatus
48
- from pathlib import Path
49
-
50
- # Initialize Task client (uses environment variables by default)
51
- task = Task()
52
-
53
- # Submit a task with gpu type
54
- try:
55
- result = task.submit(
56
- name="gpu-task-from-sdk",
57
- image="/mnt/minio/images/01ai-registry.cn-shanghai.cr.aliyuncs.com+public+llamafactory+0.9.3.sqsh",
58
- entry_command="llamafactory-cli train /workspace/config/test_lora.yaml",
59
- resources={
60
- "partition": "gpu",
61
- "nodes": 2,
62
- "ntasks": 2,
63
- "cpus_per_task": 2,
64
- "memory": "4G",
65
- "time": "01:00:00",
66
- "gres": "gpu:nvidia_a10:1",
67
- "qos": "qos_xcloud",
68
- },
69
- cluster_name="slurm-cn",
70
- team_id=1,
71
- file_path="your file path", # optional, support for .zip, .tar.gz, .tgz
72
- )
73
-
74
- if result is not None:
75
- print("==== gpu task submitted successfully ====")
76
- job_id = result.job_id
77
- else:
78
- print("==== gpu task submitted failed ====")
79
- except Exception as e:
80
- print("==== gpu task submitted failed error ====", e)
81
-
82
- # Submit a task with cpu type
83
- try:
84
- entry_content = Path("entry.sh").read_text(encoding="utf-8")
85
- result = task.submit(
86
- name="cpu-task-from-sdk",
87
- image="docker://01ai-registry.cn-shanghai.cr.aliyuncs.com/01-ai/xcs/v2/alpine:3.23.0",
88
- entry_command=entry_content,
89
- resources={
90
- "partition": "cpu",
91
- "nodes": 1,
92
- "ntasks": 1,
93
- "cpus_per_task": 1,
94
- "memory": "1G",
95
- "time": "01:00:00",
96
- "qos": "qos_xcloud",
97
- },
98
- cluster_name="slurm-cn",
99
- team_id=1,
100
- )
101
-
102
- if result is not None:
103
- print("==== cpu task submitted successfully ====")
104
- job_id = result.job_id
105
- else:
106
- print("==== cpu task submitted failed ====")
107
- except Exception as e:
108
- print("==== cpu task submitted failed error ====", e)
109
-
110
- # List tasks with filters
111
- try:
112
- completed_tasks = task.list(
113
- status=TaskStatus.COMPLETED,
114
- cluster_name="slurm-cn",
115
- page=1,
116
- page_size=20
117
- )
118
-
119
- # Get task details
120
- if completed_tasks is not None and len(completed_tasks.tasks) > 0:
121
- print("==== completed_tasks number ====", len(completed_tasks.tasks))
122
- task_info = task.get(task_id=completed_tasks.tasks[0].job_id, cluster_name="slurm-cn")
123
- print("==== task_info ====", task_info)
124
- else:
125
- print("==== no completed tasks to get details ====")
126
- except Exception as e:
127
- print("==== get task details failed error ====", e)
128
-
129
-
130
- # Cancel a running task
131
- try:
132
- running_tasks = task.list(
133
- status=TaskStatus.RUNNING,
134
- cluster_name="slurm-cn",
135
- page=1,
136
- page_size=20
137
- )
138
- if running_tasks is not None and len(running_tasks.tasks) > 0:
139
- print("==== running_tasks number ====", len(running_tasks.tasks))
140
- # Cancel a task
141
- result = task.cancel(task_id=running_tasks.tasks[0].job_id, cluster_name="slurm-cn")
142
- print("==== task cancelled ====", running_tasks.tasks[0].job_id, result)
143
- else:
144
- print("==== no running tasks to cancel ====")
145
- except Exception as e:
146
- print("==== cancel running task failed error ====", e)
147
-
148
-
149
- # Delete a task
150
- try:
151
- completed_tasks = task.list(
152
- status=TaskStatus.COMPLETED,
153
- cluster_name="slurm-cn",
154
- page=1,
155
- page_size=20
156
- )
157
- if completed_tasks is not None and len(completed_tasks.tasks) > 0:
158
- print("==== completed_tasks number ====", len(completed_tasks.tasks))
159
- # Delete a task
160
- result = task.delete(task_id=completed_tasks.tasks[0].job_id, cluster_name="slurm-cn")
161
- print("==== task deleted ====", completed_tasks.tasks[0].job_id, result)
162
- else:
163
- print("==== no completed tasks to delete ====")
164
- except Exception as e:
165
- print("==== delete completed task failed error ====", e)
166
- ```
167
-
168
- **Task Management Methods:**
169
-
170
- - `submit()` - Submit a new task with container image and entry command
171
- - `get()` - Get task details by task ID
172
- - `list()` - List tasks with optional filters (status, cluster_name, team_id, user_id)
173
- - `cancel()` - Cancel a running task
174
- - `delete()` - Delete a task record
175
-
176
- **Task Status Values:**
177
-
178
- ```python
179
- from mlops.api.client.models.task_status import TaskStatus
180
-
181
- TaskStatus.PENDING # Task is pending
182
- TaskStatus.QUEUED # Task is queued
183
- TaskStatus.RUNNING # Task is running
184
- TaskStatus.COMPLETED # Task completed successfully
185
- TaskStatus.SUCCEEDED # Task succeeded
186
- TaskStatus.FAILED # Task failed
187
- TaskStatus.CANCELLED # Task was cancelled
188
- TaskStatus.CREATED # Task was created
189
- ```
190
-
191
- **Error Handling:**
192
-
193
- ```python
194
- from mlops.exceptions import (
195
- APIException,
196
- AuthenticationException,
197
- NotFoundException,
198
- RateLimitException,
199
- TimeoutException,
200
- InvalidArgumentException,
201
- NotEnoughSpaceException
202
- )
203
-
204
- try:
205
- result = task.submit(name="test", cluster_name="slurm-cn", command="echo hello")
206
- except AuthenticationException as e:
207
- print(f"Authentication failed: {e}")
208
- except NotFoundException as e:
209
- print(f"Resource not found: {e}")
210
- except APIException as e:
211
- print(f"API error: {e}")
212
- ```
213
-
214
- > [!TIP] Error Handling
215
- > SDKs automatically handle common errors and retry failed requests. Check SDK documentation for error handling best practices.
216
-
217
- ## Features
218
-
219
- - Type-safe API clients
220
- - Automatic authentication
221
- - Error handling
222
- - Request retry logic
223
- - Response validation
224
-
225
- ## Resources
226
-
227
- - [Python SDK Documentation](https://github.com/xcloud-service/xservice/tree/main/client/python-sdk)
228
- - [API Reference](https://xcloud-service.com/docs/api)