mlops-python-sdk 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mlops/task/task.py CHANGED
@@ -6,6 +6,9 @@ This module provides a convenient interface for managing tasks through the MLOps
6
6
 
7
7
  import json
8
8
  import os
9
+ import sys
10
+ import threading
11
+ import time
9
12
  from http import HTTPStatus
10
13
  from pathlib import Path
11
14
  from typing import Optional
@@ -55,13 +58,109 @@ def _validate_archive_file_path(file_path: str) -> Path:
55
58
 
56
59
 
57
60
  def _upload_file_to_presigned_url(url: str, file_path: Path, timeout: Optional[float]) -> None:
61
+ def _format_bytes_iec(n: int) -> str:
62
+ if n < 1024:
63
+ return f"{n}B"
64
+ unit = 1024.0
65
+ suffixes = ["KiB", "MiB", "GiB", "TiB", "PiB"]
66
+ v = float(n)
67
+ i = -1
68
+ while v >= unit and i < len(suffixes) - 1:
69
+ v /= unit
70
+ i += 1
71
+ return f"{v:.1f}{suffixes[i]}"
72
+
73
+ def _render_bar(done: int, total: int, width: int = 28) -> str:
74
+ if total <= 0 or width <= 1:
75
+ return ">"
76
+ done = max(0, min(done, total))
77
+ filled = int(width * (done / total))
78
+ if filled >= width:
79
+ return "=" * width
80
+ if filled <= 0:
81
+ return ">" + (" " * (width - 1))
82
+ return ("=" * filled) + ">" + (" " * (width - filled - 1))
83
+
84
+ def _format_elapsed_seconds(start: float) -> str:
85
+ sec = int(max(0.0, time.monotonic() - start))
86
+ return f"{sec}s"
87
+
88
+ class _ProgressIterable:
89
+ def __init__(self, f, total: int, name: str, chunk_size: int = 64 * 1024):
90
+ self._f = f # file-like object
91
+ self._total = max(0, int(total))
92
+ self._name = name
93
+ self._chunk_size = max(1, int(chunk_size))
94
+ self._read = 0
95
+ self._start = time.monotonic()
96
+ self._completed = False
97
+ self._out = sys.stdout
98
+ try:
99
+ self._is_tty = bool(self._out.isatty())
100
+ except Exception:
101
+ self._is_tty = False
102
+
103
+ def _render_line(self, display_read: int) -> str:
104
+ display_read = max(0, min(int(display_read), self._total))
105
+ pct = (display_read / self._total) * 100.0 if self._total > 0 else 0.0
106
+ bar = _render_bar(display_read, self._total, width=28)
107
+ elapsed = _format_elapsed_seconds(self._start)
108
+ return (
109
+ f"uploading {self._name} [{bar}] {pct:6.2f}% "
110
+ f"({_format_bytes_iec(display_read)}/{_format_bytes_iec(self._total)}) "
111
+ f"elapsed {elapsed}"
112
+ )
113
+
114
+ def _print_line(self, line: str, final: bool = False) -> None:
115
+ if self._is_tty:
116
+ # Refresh same line in terminal.
117
+ print("\r" + line, end="" if not final else "\n", file=self._out, flush=True)
118
+ else:
119
+ # Always visible in non-TTY environments.
120
+ print(line, file=self._out, flush=True)
121
+
122
+ def __iter__(self):
123
+ stop_event = threading.Event()
124
+
125
+ def ticker() -> None:
126
+ last_sec = -1
127
+ # Print immediately so users see something right away.
128
+ self._print_line(self._render_line(self._read))
129
+ while not stop_event.is_set():
130
+ sec = int(max(0.0, time.monotonic() - self._start))
131
+ if sec != last_sec:
132
+ last_sec = sec
133
+ self._print_line(self._render_line(self._read))
134
+ # check frequently to avoid skipping seconds
135
+ stop_event.wait(0.05)
136
+
137
+ t = threading.Thread(target=ticker, name="mlops-upload-progress", daemon=True)
138
+ t.start()
139
+ try:
140
+ while True:
141
+ chunk = self._f.read(self._chunk_size)
142
+ if not chunk:
143
+ break
144
+ self._read += len(chunk)
145
+ yield chunk
146
+ finally:
147
+ # Ensure a final 100% line and stop ticker.
148
+ self._read = self._total
149
+ self._completed = True
150
+ stop_event.set()
151
+ t.join(timeout=0.2)
152
+ self._print_line(self._render_line(self._read), final=True)
153
+
58
154
  size = file_path.stat().st_size
59
155
  # Use a dedicated client for S3 presigned upload (avoid leaking API auth headers).
60
156
  with httpx.Client(timeout=timeout) as client:
61
157
  with file_path.open("rb") as f:
158
+ content = f
159
+ if size > 0:
160
+ content = _ProgressIterable(f, total=size, name=file_path.name)
62
161
  resp = client.put(
63
162
  url,
64
- content=f,
163
+ content=content,
65
164
  headers={
66
165
  "Content-Length": str(size),
67
166
  "Content-Type": "application/octet-stream",
@@ -0,0 +1,235 @@
1
+ Metadata-Version: 2.3
2
+ Name: mlops-python-sdk
3
+ Version: 1.0.3
4
+ Summary: MLOps Python SDK for XCloud Service API
5
+ License: MIT
6
+ Author: mlops
7
+ Author-email: mlops@example.com
8
+ Requires-Python: >=3.9,<4.0
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.9
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Requires-Dist: attrs (>=23.2.0)
17
+ Requires-Dist: httpx (>=0.27.0,<1.0.0)
18
+ Requires-Dist: packaging (>=24.1)
19
+ Requires-Dist: python-dateutil (>=2.8.2)
20
+ Requires-Dist: typing-extensions (>=4.1.0)
21
+ Project-URL: Bug Tracker, https://github.com/xcloud-service/xservice/issues
22
+ Project-URL: Homepage, https://mlops.cloud/
23
+ Project-URL: Repository, https://github.com/xcloud-service/xservice
24
+ Description-Content-Type: text/markdown
25
+
26
+ # SDK
27
+
28
+ Software Development Kits for integrating with the XCloud Service API.
29
+
30
+ > [!NOTE] SDK Support
31
+ > SDKs provide type-safe, high-level interfaces for interacting with the platform API. They handle authentication, error handling, and request retries automatically.
32
+
33
+
34
+ ## Installation
35
+
36
+ The Python SDK installation.
37
+
38
+ ```bash
39
+ pip install mlops-python-sdk
40
+ ```
41
+
42
+ ### Configuration
43
+
44
+ The SDK reads configuration from environment variables by default:
45
+
46
+ - `MLOPS_API_KEY`: API key (required)
47
+ - `MLOPS_DOMAIN`: API domain, e.g. `localhost:8090` or `https://example.com`
48
+ - `MLOPS_API_PATH`: API path prefix (default: `/api/v1`)
49
+ - `MLOPS_DEBUG`: `true|false` (default: `false`)
50
+
51
+ Or configure in code:
52
+
53
+ ```python
54
+ from mlops import ConnectionConfig, Task
55
+
56
+ config = ConnectionConfig(
57
+ api_key="xck_...",
58
+ domain="https://example.com",
59
+ api_path="/api/v1",
60
+ debug=False,
61
+ )
62
+ task = Task(config=config)
63
+ ```
64
+
65
+ ## SDK Usage
66
+
67
+ ### Initialize client
68
+
69
+ ```python
70
+ from mlops import Task
71
+
72
+ task = Task() # uses environment variables by default
73
+ ```
74
+
75
+ ### Submit a GPU task
76
+
77
+ ```python
78
+ from mlops import Task
79
+
80
+ task = Task()
81
+ resp = task.submit(
82
+ name="gpu-task-from-sdk",
83
+ cluster_name="slurm-cn",
84
+ team_id=1,
85
+ image="/mnt/minio/images/01ai-registry.cn-shanghai.cr.aliyuncs.com+public+llamafactory+0.9.3.sqsh",
86
+ entry_command="llamafactory-cli train /workspace/config/test_lora.yaml",
87
+ resources={
88
+ "partition": "gpu",
89
+ "nodes": 2,
90
+ "ntasks": 2,
91
+ "cpus_per_task": 2,
92
+ "memory": "4G",
93
+ "time": "01:00:00",
94
+ "gres": "gpu:nvidia_a10:1",
95
+ "qos": "qos_xcloud",
96
+ },
97
+ file_path="/path/to/xservice.zip", # optional: .zip/.tar.gz/.tgz
98
+ )
99
+ print(resp.job_id)
100
+ ```
101
+
102
+ ### Submit a CPU task
103
+
104
+ ```python
105
+ from mlops import Task
106
+
107
+ task = Task()
108
+ resp = task.submit(
109
+ name="cpu-task-from-sdk",
110
+ cluster_name="slurm-cn",
111
+ team_id=1,
112
+ image="docker://01ai-registry.cn-shanghai.cr.aliyuncs.com/01-ai/xcs/v2/alpine:3.23.0",
113
+ entry_command="echo hello",
114
+ resources={
115
+ "partition": "cpu",
116
+ "nodes": 1,
117
+ "ntasks": 1,
118
+ "cpus_per_task": 1,
119
+ "memory": "1G",
120
+ "time": "01:00:00",
121
+ "qos": "qos_xcloud",
122
+ },
123
+ )
124
+ print(resp.job_id)
125
+ ```
126
+
127
+ ### List tasks
128
+
129
+ ```python
130
+ from mlops import Task
131
+ from mlops.api.client.models.task_status import TaskStatus
132
+
133
+ task = Task()
134
+ resp = task.list(status=TaskStatus.COMPLETED, cluster_name="slurm-cn", page=1, page_size=20)
135
+ print(len(resp.tasks or []))
136
+ ```
137
+
138
+ ### Get task details
139
+
140
+ ```python
141
+ from mlops import Task
142
+
143
+ task = Task()
144
+ task_info = task.get(task_id=12345, cluster_name="slurm-cn")
145
+ print(task_info)
146
+ ```
147
+
148
+ ### Cancel a task
149
+
150
+ ```python
151
+ from mlops import Task
152
+
153
+ task = Task()
154
+ task.cancel(task_id=12345, cluster_name="slurm-cn")
155
+ ```
156
+
157
+ ### Delete a task
158
+
159
+ ```python
160
+ from mlops import Task
161
+
162
+ task = Task()
163
+ task.delete(task_id=12345, cluster_name="slurm-cn")
164
+ ```
165
+
166
+ **Task Management Methods:**
167
+
168
+ - `submit()` - Submit a new task with container image and entry command
169
+ - `get()` - Get task details by task ID
170
+ - `list()` - List tasks with optional filters (status, cluster_name, team_id, user_id)
171
+ - `cancel()` - Cancel a running task
172
+ - `delete()` - Delete a task record
173
+
174
+ **Task Status Values:**
175
+
176
+ ```python
177
+ from mlops.api.client.models.task_status import TaskStatus
178
+
179
+ TaskStatus.PENDING # Task is pending
180
+ TaskStatus.QUEUED # Task is queued
181
+ TaskStatus.RUNNING # Task is running
182
+ TaskStatus.COMPLETED # Task completed successfully
183
+ TaskStatus.SUCCEEDED # Task succeeded
184
+ TaskStatus.FAILED # Task failed
185
+ TaskStatus.CANCELLED # Task was cancelled
186
+ TaskStatus.CREATED # Task was created
187
+ ```
188
+
189
+ **Error Handling:**
190
+
191
+ ```python
192
+ from mlops.exceptions import (
193
+ APIException,
194
+ AuthenticationException,
195
+ NotFoundException,
196
+ RateLimitException,
197
+ TimeoutException,
198
+ InvalidArgumentException,
199
+ NotEnoughSpaceException
200
+ )
201
+ from mlops import Task
202
+
203
+ task = Task()
204
+
205
+ try:
206
+ result = task.submit(
207
+ name="test",
208
+ cluster_name="slurm-cn",
209
+ image="docker://alpine:3.23.0",
210
+ entry_command="echo hello",
211
+ )
212
+ except AuthenticationException as e:
213
+ print(f"Authentication failed: {e}")
214
+ except NotFoundException as e:
215
+ print(f"Resource not found: {e}")
216
+ except APIException as e:
217
+ print(f"API error: {e}")
218
+ ```
219
+
220
+ > [!TIP] Error Handling
221
+ > SDKs automatically parse typed responses and raise structured exceptions.
222
+
223
+ ## Features
224
+
225
+ - Type-safe API clients
226
+ - Automatic authentication
227
+ - Error handling
228
+ - Typed response parsing (generated models)
229
+ - Unexpected-status guard (optional)
230
+
231
+ ## Resources
232
+
233
+ - [Python SDK Documentation](https://github.com/xcloud-service/xservice/tree/main/client/python-sdk)
234
+ - [API Reference](https://xcloud-service.com/docs/api)
235
+
@@ -46,7 +46,7 @@ mlops/connection_config.py,sha256=_b9sVFGJtf1GynmIB4NtKCzg7kkgE-wSrsG3LwzlOqk,29
46
46
  mlops/exceptions.py,sha256=3kfda-Rz0km9kV-gvnPCw7ueemWkXIGGdT0NXx6z9Xk,1680
47
47
  mlops/task/__init__.py,sha256=M983vMPLj3tZQNFXQyTP5I2RsRorFElezLeppr3WLsw,133
48
48
  mlops/task/client.py,sha256=V131WLVJl1raGAVixUhJCX8s1neN15mxAjQwO01qlIg,3552
49
- mlops/task/task.py,sha256=Y_lWpIVY9Wq-2iuaoZYuskcWHasUzLSpXi9fkwn7S3s,23882
50
- mlops_python_sdk-1.0.2.dist-info/METADATA,sha256=lBkRytOiRISGMHHzk93fijbmF9EC9iKSpHm-6I9QNsM,7637
51
- mlops_python_sdk-1.0.2.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
52
- mlops_python_sdk-1.0.2.dist-info/RECORD,,
49
+ mlops/task/task.py,sha256=Eqb4XGMlFLjelg3js9Twoulf0Nlyn0pz5isuGl916vs,27756
50
+ mlops_python_sdk-1.0.3.dist-info/METADATA,sha256=KwMwLVAYfXBjKXXiU_p5TibVXGbli5gaxCCa0Wap9h4,5679
51
+ mlops_python_sdk-1.0.3.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
52
+ mlops_python_sdk-1.0.3.dist-info/RECORD,,
@@ -1,254 +0,0 @@
1
- Metadata-Version: 2.3
2
- Name: mlops-python-sdk
3
- Version: 1.0.2
4
- Summary: MLOps Python SDK for XCloud Service API
5
- License: MIT
6
- Author: mlops
7
- Author-email: mlops@example.com
8
- Requires-Python: >=3.9,<4.0
9
- Classifier: License :: OSI Approved :: MIT License
10
- Classifier: Programming Language :: Python :: 3
11
- Classifier: Programming Language :: Python :: 3.9
12
- Classifier: Programming Language :: Python :: 3.10
13
- Classifier: Programming Language :: Python :: 3.11
14
- Classifier: Programming Language :: Python :: 3.12
15
- Classifier: Programming Language :: Python :: 3.13
16
- Requires-Dist: attrs (>=23.2.0)
17
- Requires-Dist: httpx (>=0.27.0,<1.0.0)
18
- Requires-Dist: packaging (>=24.1)
19
- Requires-Dist: python-dateutil (>=2.8.2)
20
- Requires-Dist: typing-extensions (>=4.1.0)
21
- Project-URL: Bug Tracker, https://github.com/xcloud-service/xservice/issues
22
- Project-URL: Homepage, https://mlops.cloud/
23
- Project-URL: Repository, https://github.com/xcloud-service/xservice
24
- Description-Content-Type: text/markdown
25
-
26
- # SDK
27
-
28
- Software Development Kits for integrating with the XCloud Service API.
29
-
30
- > [!NOTE] SDK Support
31
- > SDKs provide type-safe, high-level interfaces for interacting with the platform API. They handle authentication, error handling, and request retries automatically.
32
-
33
- ## Available SDKs
34
-
35
- ### Python SDK
36
-
37
- ### Installation
38
-
39
- The Python SDK installation.
40
-
41
- ```bash
42
- pip install mlops-python-sdk
43
- ```
44
-
45
- ### Configuration
46
-
47
- The SDK reads configuration from environment variables by default:
48
-
49
- - `MLOPS_API_KEY`: API key (required)
50
- - `MLOPS_DOMAIN`: API domain, e.g. `localhost:8090` or `https://example.com`
51
- - `MLOPS_API_PATH`: API path prefix (default: `/api/v1`)
52
- - `MLOPS_DEBUG`: `true|false` (default: `false`)
53
-
54
- Or configure in code:
55
-
56
- ```python
57
- from mlops import ConnectionConfig, Task
58
-
59
- config = ConnectionConfig(
60
- api_key="xck_...",
61
- domain="https://example.com",
62
- api_path="/api/v1",
63
- debug=False,
64
- )
65
- task = Task(config=config)
66
- ```
67
-
68
- ### Usage
69
-
70
- ```python
71
- from mlops import Task
72
- from mlops.api.client.models.task_status import TaskStatus
73
- from pathlib import Path
74
-
75
- # Initialize Task client (uses environment variables by default)
76
- task = Task()
77
-
78
- # Submit a task with gpu type
79
- try:
80
- result = task.submit(
81
- name="gpu-task-from-sdk",
82
- image="/mnt/minio/images/01ai-registry.cn-shanghai.cr.aliyuncs.com+public+llamafactory+0.9.3.sqsh",
83
- entry_command="llamafactory-cli train /workspace/config/test_lora.yaml",
84
- resources={
85
- "partition": "gpu",
86
- "nodes": 2,
87
- "ntasks": 2,
88
- "cpus_per_task": 2,
89
- "memory": "4G",
90
- "time": "01:00:00",
91
- "gres": "gpu:nvidia_a10:1",
92
- "qos": "qos_xcloud",
93
- },
94
- cluster_name="slurm-cn",
95
- team_id=1,
96
- file_path="your file path", # optional, support for .zip, .tar.gz, .tgz
97
- )
98
-
99
- if result is not None:
100
- print("==== gpu task submitted successfully ====")
101
- job_id = result.job_id
102
- else:
103
- print("==== gpu task submitted failed ====")
104
- except Exception as e:
105
- print("==== gpu task submitted failed error ====", e)
106
-
107
- # Submit a task with cpu type
108
- try:
109
- entry_content = Path("entry.sh").read_text(encoding="utf-8")
110
- result = task.submit(
111
- name="cpu-task-from-sdk",
112
- image="docker://01ai-registry.cn-shanghai.cr.aliyuncs.com/01-ai/xcs/v2/alpine:3.23.0",
113
- entry_command=entry_content,
114
- resources={
115
- "partition": "cpu",
116
- "nodes": 1,
117
- "ntasks": 1,
118
- "cpus_per_task": 1,
119
- "memory": "1G",
120
- "time": "01:00:00",
121
- "qos": "qos_xcloud",
122
- },
123
- cluster_name="slurm-cn",
124
- team_id=1,
125
- )
126
-
127
- if result is not None:
128
- print("==== cpu task submitted successfully ====")
129
- job_id = result.job_id
130
- else:
131
- print("==== cpu task submitted failed ====")
132
- except Exception as e:
133
- print("==== cpu task submitted failed error ====", e)
134
-
135
- # List tasks with filters
136
- try:
137
- completed_tasks = task.list(
138
- status=TaskStatus.COMPLETED,
139
- cluster_name="slurm-cn",
140
- page=1,
141
- page_size=20
142
- )
143
-
144
- # Get task details
145
- if completed_tasks is not None and len(completed_tasks.tasks) > 0:
146
- print("==== completed_tasks number ====", len(completed_tasks.tasks))
147
- task_info = task.get(task_id=completed_tasks.tasks[0].job_id, cluster_name="slurm-cn")
148
- print("==== task_info ====", task_info)
149
- else:
150
- print("==== no completed tasks to get details ====")
151
- except Exception as e:
152
- print("==== get task details failed error ====", e)
153
-
154
-
155
- # Cancel a running task
156
- try:
157
- running_tasks = task.list(
158
- status=TaskStatus.RUNNING,
159
- cluster_name="slurm-cn",
160
- page=1,
161
- page_size=20
162
- )
163
- if running_tasks is not None and len(running_tasks.tasks) > 0:
164
- print("==== running_tasks number ====", len(running_tasks.tasks))
165
- # Cancel a task
166
- result = task.cancel(task_id=running_tasks.tasks[0].job_id, cluster_name="slurm-cn")
167
- print("==== task cancelled ====", running_tasks.tasks[0].job_id, result)
168
- else:
169
- print("==== no running tasks to cancel ====")
170
- except Exception as e:
171
- print("==== cancel running task failed error ====", e)
172
-
173
-
174
- # Delete a task
175
- try:
176
- completed_tasks = task.list(
177
- status=TaskStatus.COMPLETED,
178
- cluster_name="slurm-cn",
179
- page=1,
180
- page_size=20
181
- )
182
- if completed_tasks is not None and len(completed_tasks.tasks) > 0:
183
- print("==== completed_tasks number ====", len(completed_tasks.tasks))
184
- # Delete a task
185
- result = task.delete(task_id=completed_tasks.tasks[0].job_id, cluster_name="slurm-cn")
186
- print("==== task deleted ====", completed_tasks.tasks[0].job_id, result)
187
- else:
188
- print("==== no completed tasks to delete ====")
189
- except Exception as e:
190
- print("==== delete completed task failed error ====", e)
191
- ```
192
-
193
- **Task Management Methods:**
194
-
195
- - `submit()` - Submit a new task with container image and entry command
196
- - `get()` - Get task details by task ID
197
- - `list()` - List tasks with optional filters (status, cluster_name, team_id, user_id)
198
- - `cancel()` - Cancel a running task
199
- - `delete()` - Delete a task record
200
-
201
- **Task Status Values:**
202
-
203
- ```python
204
- from mlops.api.client.models.task_status import TaskStatus
205
-
206
- TaskStatus.PENDING # Task is pending
207
- TaskStatus.QUEUED # Task is queued
208
- TaskStatus.RUNNING # Task is running
209
- TaskStatus.COMPLETED # Task completed successfully
210
- TaskStatus.SUCCEEDED # Task succeeded
211
- TaskStatus.FAILED # Task failed
212
- TaskStatus.CANCELLED # Task was cancelled
213
- TaskStatus.CREATED # Task was created
214
- ```
215
-
216
- **Error Handling:**
217
-
218
- ```python
219
- from mlops.exceptions import (
220
- APIException,
221
- AuthenticationException,
222
- NotFoundException,
223
- RateLimitException,
224
- TimeoutException,
225
- InvalidArgumentException,
226
- NotEnoughSpaceException
227
- )
228
-
229
- try:
230
- result = task.submit(name="test", cluster_name="slurm-cn", command="echo hello")
231
- except AuthenticationException as e:
232
- print(f"Authentication failed: {e}")
233
- except NotFoundException as e:
234
- print(f"Resource not found: {e}")
235
- except APIException as e:
236
- print(f"API error: {e}")
237
- ```
238
-
239
- > [!TIP] Error Handling
240
- > SDKs automatically handle common errors and retry failed requests. Check SDK documentation for error handling best practices.
241
-
242
- ## Features
243
-
244
- - Type-safe API clients
245
- - Automatic authentication
246
- - Error handling
247
- - Request retry logic
248
- - Response validation
249
-
250
- ## Resources
251
-
252
- - [Python SDK Documentation](https://github.com/xcloud-service/xservice/tree/main/client/python-sdk)
253
- - [API Reference](https://xcloud-service.com/docs/api)
254
-