mlops-python-sdk 1.0.2__py3-none-any.whl → 1.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mlops/connection_config.py +17 -12
- mlops/task/task.py +246 -73
- mlops_python_sdk-1.0.4.dist-info/METADATA +235 -0
- {mlops_python_sdk-1.0.2.dist-info → mlops_python_sdk-1.0.4.dist-info}/RECORD +5 -5
- mlops_python_sdk-1.0.2.dist-info/METADATA +0 -254
- {mlops_python_sdk-1.0.2.dist-info → mlops_python_sdk-1.0.4.dist-info}/WHEEL +0 -0
mlops/connection_config.py
CHANGED
|
@@ -66,17 +66,7 @@ class ConnectionConfig:
|
|
|
66
66
|
self.api_path = "/" + self.api_path
|
|
67
67
|
|
|
68
68
|
# Build API URL
|
|
69
|
-
|
|
70
|
-
base_url = "http://localhost:8090"
|
|
71
|
-
else:
|
|
72
|
-
# If domain already includes protocol, use it as-is
|
|
73
|
-
# Otherwise, default to http:// for backward compatibility
|
|
74
|
-
if self.domain.startswith(("http://", "https://")):
|
|
75
|
-
base_url = self.domain
|
|
76
|
-
else:
|
|
77
|
-
base_url = f"http://{self.domain}"
|
|
78
|
-
|
|
79
|
-
self.api_url = f"{base_url}{self.api_path}"
|
|
69
|
+
self.build_api_url()
|
|
80
70
|
|
|
81
71
|
@staticmethod
|
|
82
72
|
def _get_request_timeout(
|
|
@@ -89,7 +79,22 @@ class ConnectionConfig:
|
|
|
89
79
|
return request_timeout
|
|
90
80
|
else:
|
|
91
81
|
return default_timeout
|
|
92
|
-
|
|
82
|
+
|
|
83
|
+
def build_api_url(self) -> None:
|
|
84
|
+
if self.debug:
|
|
85
|
+
base_url = "http://localhost:8090"
|
|
86
|
+
else:
|
|
87
|
+
# If domain already includes protocol, use it as-is
|
|
88
|
+
# Otherwise, default to http:// for backward compatibility
|
|
89
|
+
if self.domain.startswith(("http://", "https://")):
|
|
90
|
+
base_url = self.domain
|
|
91
|
+
elif self.domain.startswith("localhost") or self.domain.startswith("127.0.0.1"):
|
|
92
|
+
base_url = f"http://{self.domain}"
|
|
93
|
+
else:
|
|
94
|
+
base_url = f"https://{self.domain}"
|
|
95
|
+
|
|
96
|
+
self.api_url = f"{base_url}{self.api_path}"
|
|
97
|
+
|
|
93
98
|
def get_request_timeout(self, request_timeout: Optional[float] = None):
|
|
94
99
|
return self._get_request_timeout(self.request_timeout, request_timeout)
|
|
95
100
|
|
mlops/task/task.py
CHANGED
|
@@ -6,9 +6,15 @@ This module provides a convenient interface for managing tasks through the MLOps
|
|
|
6
6
|
|
|
7
7
|
import json
|
|
8
8
|
import os
|
|
9
|
+
import shutil
|
|
10
|
+
import sys
|
|
11
|
+
import threading
|
|
12
|
+
import tempfile
|
|
13
|
+
import time
|
|
14
|
+
import zipfile
|
|
9
15
|
from http import HTTPStatus
|
|
10
16
|
from pathlib import Path
|
|
11
|
-
from typing import Optional
|
|
17
|
+
from typing import Callable, Optional
|
|
12
18
|
|
|
13
19
|
import httpx
|
|
14
20
|
|
|
@@ -53,15 +59,183 @@ def _validate_archive_file_path(file_path: str) -> Path:
|
|
|
53
59
|
raise APIException(f"file_path must be one of .zip, .tar.gz, .tgz: {p}")
|
|
54
60
|
return p
|
|
55
61
|
|
|
62
|
+
def _is_archive_path(p: Path) -> bool:
|
|
63
|
+
lower = p.name.lower()
|
|
64
|
+
return lower.endswith(".zip") or lower.endswith(".tar.gz") or lower.endswith(".tgz")
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _zip_directory(src_dir: Path, dst_zip: Path) -> None:
|
|
68
|
+
src_dir = src_dir.resolve()
|
|
69
|
+
root_name = src_dir.name
|
|
70
|
+
with zipfile.ZipFile(dst_zip, "w", compression=zipfile.ZIP_DEFLATED) as zf:
|
|
71
|
+
for p in src_dir.rglob("*"):
|
|
72
|
+
if p.is_dir():
|
|
73
|
+
continue
|
|
74
|
+
rel = p.relative_to(src_dir).as_posix()
|
|
75
|
+
if rel in ("", "."):
|
|
76
|
+
continue
|
|
77
|
+
zf.write(p, arcname=f"{root_name}/{rel}")
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _zip_file(src_file: Path, dst_zip: Path) -> None:
|
|
81
|
+
src_file = src_file.resolve()
|
|
82
|
+
with zipfile.ZipFile(dst_zip, "w", compression=zipfile.ZIP_DEFLATED) as zf:
|
|
83
|
+
zf.write(src_file, arcname=src_file.name)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _path_to_archive_path(task_name: str, file_path: str) -> tuple[Path, Callable[[], None]]:
|
|
87
|
+
"""
|
|
88
|
+
Mirror cli-go `pathToArchivePath` behavior:
|
|
89
|
+
- directory: zip it
|
|
90
|
+
- file: if already an archive (.zip/.tar.gz/.tgz) return as-is; otherwise zip it
|
|
91
|
+
Returns (archive_path, cleanup_callable).
|
|
92
|
+
"""
|
|
93
|
+
p = Path(os.path.expanduser(file_path)).resolve()
|
|
94
|
+
if not p.exists():
|
|
95
|
+
raise APIException(f"File not found: {p}")
|
|
96
|
+
|
|
97
|
+
if p.is_dir():
|
|
98
|
+
tmp_dir = (
|
|
99
|
+
Path(tempfile.gettempdir())
|
|
100
|
+
/ "xservice-cli"
|
|
101
|
+
/ task_name
|
|
102
|
+
/ time.strftime("%Y%m%d%H%M%S")
|
|
103
|
+
)
|
|
104
|
+
tmp_dir.mkdir(parents=True, exist_ok=True)
|
|
105
|
+
archive_path = tmp_dir / f"{p.name}.zip"
|
|
106
|
+
try:
|
|
107
|
+
_zip_directory(p, archive_path)
|
|
108
|
+
except Exception as e:
|
|
109
|
+
shutil.rmtree(tmp_dir, ignore_errors=True)
|
|
110
|
+
raise APIException(f"failed to compress directory: {e}") from e
|
|
111
|
+
return archive_path, lambda: shutil.rmtree(tmp_dir, ignore_errors=True)
|
|
112
|
+
|
|
113
|
+
if not p.is_file():
|
|
114
|
+
raise APIException(f"file_path must be a file or directory: {p}")
|
|
115
|
+
|
|
116
|
+
if _is_archive_path(p):
|
|
117
|
+
return _validate_archive_file_path(str(p)), lambda: None
|
|
118
|
+
|
|
119
|
+
tmp_dir = (
|
|
120
|
+
Path(tempfile.gettempdir())
|
|
121
|
+
/ "xservice-cli"
|
|
122
|
+
/ task_name
|
|
123
|
+
/ time.strftime("%Y%m%d%H%M%S")
|
|
124
|
+
)
|
|
125
|
+
tmp_dir.mkdir(parents=True, exist_ok=True)
|
|
126
|
+
archive_path = tmp_dir / f"{p.name}.zip"
|
|
127
|
+
try:
|
|
128
|
+
_zip_file(p, archive_path)
|
|
129
|
+
except Exception as e:
|
|
130
|
+
shutil.rmtree(tmp_dir, ignore_errors=True)
|
|
131
|
+
raise APIException(f"failed to compress file: {e}") from e
|
|
132
|
+
return archive_path, lambda: shutil.rmtree(tmp_dir, ignore_errors=True)
|
|
133
|
+
|
|
56
134
|
|
|
57
135
|
def _upload_file_to_presigned_url(url: str, file_path: Path, timeout: Optional[float]) -> None:
|
|
136
|
+
def _format_bytes_iec(n: int) -> str:
|
|
137
|
+
if n < 1024:
|
|
138
|
+
return f"{n}B"
|
|
139
|
+
unit = 1024.0
|
|
140
|
+
suffixes = ["KiB", "MiB", "GiB", "TiB", "PiB"]
|
|
141
|
+
v = float(n)
|
|
142
|
+
i = -1
|
|
143
|
+
while v >= unit and i < len(suffixes) - 1:
|
|
144
|
+
v /= unit
|
|
145
|
+
i += 1
|
|
146
|
+
return f"{v:.1f}{suffixes[i]}"
|
|
147
|
+
|
|
148
|
+
def _render_bar(done: int, total: int, width: int = 28) -> str:
|
|
149
|
+
if total <= 0 or width <= 1:
|
|
150
|
+
return ">"
|
|
151
|
+
done = max(0, min(done, total))
|
|
152
|
+
filled = int(width * (done / total))
|
|
153
|
+
if filled >= width:
|
|
154
|
+
return "=" * width
|
|
155
|
+
if filled <= 0:
|
|
156
|
+
return ">" + (" " * (width - 1))
|
|
157
|
+
return ("=" * filled) + ">" + (" " * (width - filled - 1))
|
|
158
|
+
|
|
159
|
+
def _format_elapsed_seconds(start: float) -> str:
|
|
160
|
+
sec = int(max(0.0, time.monotonic() - start))
|
|
161
|
+
return f"{sec}s"
|
|
162
|
+
|
|
163
|
+
class _ProgressIterable:
|
|
164
|
+
def __init__(self, f, total: int, name: str, chunk_size: int = 64 * 1024):
|
|
165
|
+
self._f = f # file-like object
|
|
166
|
+
self._total = max(0, int(total))
|
|
167
|
+
self._name = name
|
|
168
|
+
self._chunk_size = max(1, int(chunk_size))
|
|
169
|
+
self._read = 0
|
|
170
|
+
self._start = time.monotonic()
|
|
171
|
+
self._completed = False
|
|
172
|
+
self._out = sys.stdout
|
|
173
|
+
try:
|
|
174
|
+
self._is_tty = bool(self._out.isatty())
|
|
175
|
+
except Exception:
|
|
176
|
+
self._is_tty = False
|
|
177
|
+
|
|
178
|
+
def _render_line(self, display_read: int) -> str:
|
|
179
|
+
display_read = max(0, min(int(display_read), self._total))
|
|
180
|
+
pct = (display_read / self._total) * 100.0 if self._total > 0 else 0.0
|
|
181
|
+
bar = _render_bar(display_read, self._total, width=28)
|
|
182
|
+
elapsed = _format_elapsed_seconds(self._start)
|
|
183
|
+
return (
|
|
184
|
+
f"uploading {self._name} [{bar}] {pct:6.2f}% "
|
|
185
|
+
f"({_format_bytes_iec(display_read)}/{_format_bytes_iec(self._total)}) "
|
|
186
|
+
f"elapsed {elapsed}"
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
def _print_line(self, line: str, final: bool = False) -> None:
|
|
190
|
+
if self._is_tty:
|
|
191
|
+
# Refresh same line in terminal.
|
|
192
|
+
print("\r" + line, end="" if not final else "\n", file=self._out, flush=True)
|
|
193
|
+
else:
|
|
194
|
+
# Always visible in non-TTY environments.
|
|
195
|
+
print(line, file=self._out, flush=True)
|
|
196
|
+
|
|
197
|
+
def __iter__(self):
|
|
198
|
+
stop_event = threading.Event()
|
|
199
|
+
|
|
200
|
+
def ticker() -> None:
|
|
201
|
+
last_sec = -1
|
|
202
|
+
# Print immediately so users see something right away.
|
|
203
|
+
self._print_line(self._render_line(self._read))
|
|
204
|
+
while not stop_event.is_set():
|
|
205
|
+
sec = int(max(0.0, time.monotonic() - self._start))
|
|
206
|
+
if sec != last_sec:
|
|
207
|
+
last_sec = sec
|
|
208
|
+
self._print_line(self._render_line(self._read))
|
|
209
|
+
# check frequently to avoid skipping seconds
|
|
210
|
+
stop_event.wait(0.05)
|
|
211
|
+
|
|
212
|
+
t = threading.Thread(target=ticker, name="mlops-upload-progress", daemon=True)
|
|
213
|
+
t.start()
|
|
214
|
+
try:
|
|
215
|
+
while True:
|
|
216
|
+
chunk = self._f.read(self._chunk_size)
|
|
217
|
+
if not chunk:
|
|
218
|
+
break
|
|
219
|
+
self._read += len(chunk)
|
|
220
|
+
yield chunk
|
|
221
|
+
finally:
|
|
222
|
+
# Ensure a final 100% line and stop ticker.
|
|
223
|
+
self._read = self._total
|
|
224
|
+
self._completed = True
|
|
225
|
+
stop_event.set()
|
|
226
|
+
t.join(timeout=0.2)
|
|
227
|
+
self._print_line(self._render_line(self._read), final=True)
|
|
228
|
+
|
|
58
229
|
size = file_path.stat().st_size
|
|
59
230
|
# Use a dedicated client for S3 presigned upload (avoid leaking API auth headers).
|
|
60
231
|
with httpx.Client(timeout=timeout) as client:
|
|
61
232
|
with file_path.open("rb") as f:
|
|
233
|
+
content = f
|
|
234
|
+
if size > 0:
|
|
235
|
+
content = _ProgressIterable(f, total=size, name=file_path.name)
|
|
62
236
|
resp = client.put(
|
|
63
237
|
url,
|
|
64
|
-
content=
|
|
238
|
+
content=content,
|
|
65
239
|
headers={
|
|
66
240
|
"Content-Length": str(size),
|
|
67
241
|
"Content-Type": "application/octet-stream",
|
|
@@ -147,6 +321,7 @@ class Task:
|
|
|
147
321
|
config.api_key = api_key
|
|
148
322
|
if domain is not None:
|
|
149
323
|
config.domain = domain
|
|
324
|
+
config.build_api_url()
|
|
150
325
|
if debug is not None:
|
|
151
326
|
config.debug = debug
|
|
152
327
|
if request_timeout is not None:
|
|
@@ -229,84 +404,82 @@ class Task:
|
|
|
229
404
|
request_kwargs["ntasks"] = 1
|
|
230
405
|
|
|
231
406
|
if file_path:
|
|
232
|
-
local_path =
|
|
407
|
+
local_path, cleanup = _path_to_archive_path(name, file_path)
|
|
233
408
|
timeout = self._config.get_request_timeout()
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
)
|
|
240
|
-
presign_upload = presign_upload_obj.parsed
|
|
241
|
-
if isinstance(presign_upload, ErrorResponse):
|
|
242
|
-
status_code = (
|
|
243
|
-
presign_upload.code
|
|
244
|
-
if presign_upload.code != UNSET and presign_upload.code != 0
|
|
245
|
-
else presign_upload_obj.status_code.value
|
|
409
|
+
try:
|
|
410
|
+
# 1) Get presigned upload URL
|
|
411
|
+
presign_upload_obj = get_storage_presign_upload.sync_detailed(
|
|
412
|
+
client=self._client,
|
|
413
|
+
filename=local_path.name,
|
|
246
414
|
)
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
415
|
+
presign_upload = presign_upload_obj.parsed
|
|
416
|
+
if isinstance(presign_upload, ErrorResponse):
|
|
417
|
+
status_code = (
|
|
418
|
+
presign_upload.code
|
|
419
|
+
if presign_upload.code != UNSET and presign_upload.code != 0
|
|
420
|
+
else presign_upload_obj.status_code.value
|
|
253
421
|
)
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
):
|
|
262
|
-
raise APIException("Failed to get presigned upload url: empty response")
|
|
263
|
-
|
|
264
|
-
# 2) Upload file to S3 (presigned URL)
|
|
265
|
-
_upload_file_to_presigned_url(
|
|
266
|
-
url=str(presign_upload.url),
|
|
267
|
-
file_path=local_path,
|
|
268
|
-
timeout=timeout,
|
|
269
|
-
)
|
|
270
|
-
|
|
271
|
-
# 3) Get presigned download URL
|
|
272
|
-
presign_download_obj = get_storage_presign_download.sync_detailed(
|
|
273
|
-
client=self._client,
|
|
274
|
-
key=str(presign_upload.key),
|
|
275
|
-
)
|
|
276
|
-
presign_download = presign_download_obj.parsed
|
|
277
|
-
if isinstance(presign_download, ErrorResponse):
|
|
278
|
-
status_code = (
|
|
279
|
-
presign_download.code
|
|
280
|
-
if presign_download.code != UNSET and presign_download.code != 0
|
|
281
|
-
else presign_download_obj.status_code.value
|
|
282
|
-
)
|
|
283
|
-
exception = handle_api_exception(
|
|
284
|
-
Response(
|
|
285
|
-
status_code=HTTPStatus(status_code),
|
|
286
|
-
content=presign_download_obj.content,
|
|
287
|
-
headers=presign_download_obj.headers,
|
|
288
|
-
parsed=None,
|
|
422
|
+
exception = handle_api_exception(
|
|
423
|
+
Response(
|
|
424
|
+
status_code=HTTPStatus(status_code),
|
|
425
|
+
content=presign_upload_obj.content,
|
|
426
|
+
headers=presign_upload_obj.headers,
|
|
427
|
+
parsed=None,
|
|
428
|
+
)
|
|
289
429
|
)
|
|
430
|
+
raise exception
|
|
431
|
+
|
|
432
|
+
if (
|
|
433
|
+
presign_upload is None
|
|
434
|
+
or presign_upload.url in (UNSET, None)
|
|
435
|
+
or presign_upload.key in (UNSET, None)
|
|
436
|
+
):
|
|
437
|
+
raise APIException("Failed to get presigned upload url: empty response")
|
|
438
|
+
|
|
439
|
+
# 2) Upload file to S3 (presigned URL)
|
|
440
|
+
_upload_file_to_presigned_url(
|
|
441
|
+
url=str(presign_upload.url),
|
|
442
|
+
file_path=local_path,
|
|
443
|
+
timeout=timeout,
|
|
290
444
|
)
|
|
291
|
-
raise exception
|
|
292
445
|
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
446
|
+
# 3) Get presigned download URL
|
|
447
|
+
presign_download_obj = get_storage_presign_download.sync_detailed(
|
|
448
|
+
client=self._client,
|
|
449
|
+
key=str(presign_upload.key),
|
|
296
450
|
)
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
451
|
+
presign_download = presign_download_obj.parsed
|
|
452
|
+
if isinstance(presign_download, ErrorResponse):
|
|
453
|
+
status_code = (
|
|
454
|
+
presign_download.code
|
|
455
|
+
if presign_download.code != UNSET and presign_download.code != 0
|
|
456
|
+
else presign_download_obj.status_code.value
|
|
457
|
+
)
|
|
458
|
+
exception = handle_api_exception(
|
|
459
|
+
Response(
|
|
460
|
+
status_code=HTTPStatus(status_code),
|
|
461
|
+
content=presign_download_obj.content,
|
|
462
|
+
headers=presign_download_obj.headers,
|
|
463
|
+
parsed=None,
|
|
464
|
+
)
|
|
465
|
+
)
|
|
466
|
+
raise exception
|
|
467
|
+
|
|
468
|
+
if presign_download is None or presign_download.url in (UNSET, None):
|
|
469
|
+
raise APIException("Failed to get presigned download url: empty response")
|
|
470
|
+
|
|
471
|
+
# 4) Set env var (merge if user already provided environment)
|
|
472
|
+
env: dict[str, str] = {}
|
|
473
|
+
existing_env = request_kwargs.get("environment")
|
|
474
|
+
if isinstance(existing_env, TaskSubmitRequestEnvironmentType0):
|
|
475
|
+
env.update(existing_env.additional_properties)
|
|
476
|
+
elif isinstance(existing_env, dict):
|
|
477
|
+
env.update(existing_env)
|
|
478
|
+
|
|
479
|
+
env["SYSTEM_DOWNLOAD_ARCHIVE_URL"] = str(presign_download.url)
|
|
480
|
+
request_kwargs["environment"] = TaskSubmitRequestEnvironmentType0.from_dict(env)
|
|
481
|
+
finally:
|
|
482
|
+
cleanup()
|
|
310
483
|
|
|
311
484
|
request = TaskSubmitRequest(**request_kwargs)
|
|
312
485
|
|
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: mlops-python-sdk
|
|
3
|
+
Version: 1.0.4
|
|
4
|
+
Summary: MLOps Python SDK for XCloud Service API
|
|
5
|
+
License: MIT
|
|
6
|
+
Author: mlops
|
|
7
|
+
Author-email: mlops@example.com
|
|
8
|
+
Requires-Python: >=3.9,<4.0
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Requires-Dist: attrs (>=23.2.0)
|
|
17
|
+
Requires-Dist: httpx (>=0.27.0,<1.0.0)
|
|
18
|
+
Requires-Dist: packaging (>=24.1)
|
|
19
|
+
Requires-Dist: python-dateutil (>=2.8.2)
|
|
20
|
+
Requires-Dist: typing-extensions (>=4.1.0)
|
|
21
|
+
Project-URL: Bug Tracker, https://github.com/xcloud-service/xservice/issues
|
|
22
|
+
Project-URL: Homepage, https://mlops.cloud/
|
|
23
|
+
Project-URL: Repository, https://github.com/xcloud-service/xservice
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
|
|
26
|
+
# SDK
|
|
27
|
+
|
|
28
|
+
Software Development Kits for integrating with the XCloud Service API.
|
|
29
|
+
|
|
30
|
+
> [!NOTE] SDK Support
|
|
31
|
+
> SDKs provide type-safe, high-level interfaces for interacting with the platform API. They handle authentication, error handling, and request retries automatically.
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
## Installation
|
|
35
|
+
|
|
36
|
+
The Python SDK installation.
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
pip install mlops-python-sdk
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
### Configuration
|
|
43
|
+
|
|
44
|
+
The SDK reads configuration from environment variables by default:
|
|
45
|
+
|
|
46
|
+
- `MLOPS_API_KEY`: API key (required)
|
|
47
|
+
- `MLOPS_DOMAIN`: API domain, e.g. `localhost:8090` or `https://example.com`
|
|
48
|
+
- `MLOPS_API_PATH`: API path prefix (default: `/api/v1`)
|
|
49
|
+
- `MLOPS_DEBUG`: `true|false` (default: `false`)
|
|
50
|
+
|
|
51
|
+
Or configure in code:
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
from mlops import ConnectionConfig, Task
|
|
55
|
+
|
|
56
|
+
config = ConnectionConfig(
|
|
57
|
+
api_key="xck_...",
|
|
58
|
+
domain="https://example.com",
|
|
59
|
+
api_path="/api/v1",
|
|
60
|
+
debug=False,
|
|
61
|
+
)
|
|
62
|
+
task = Task(config=config)
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## SDK Usage
|
|
66
|
+
|
|
67
|
+
### Initialize client
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
from mlops import Task
|
|
71
|
+
|
|
72
|
+
task = Task() # uses environment variables by default
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### Submit a GPU task
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
from mlops import Task
|
|
79
|
+
|
|
80
|
+
task = Task()
|
|
81
|
+
resp = task.submit(
|
|
82
|
+
name="gpu-task-from-sdk",
|
|
83
|
+
cluster_name="slurm-cn",
|
|
84
|
+
team_id=1,
|
|
85
|
+
image="/mnt/minio/images/01ai-registry.cn-shanghai.cr.aliyuncs.com+public+llamafactory+0.9.3.sqsh",
|
|
86
|
+
entry_command="llamafactory-cli train /workspace/config/test_lora.yaml",
|
|
87
|
+
resources={
|
|
88
|
+
"partition": "gpu",
|
|
89
|
+
"nodes": 2,
|
|
90
|
+
"ntasks": 2,
|
|
91
|
+
"cpus_per_task": 2,
|
|
92
|
+
"memory": "4G",
|
|
93
|
+
"time": "01:00:00",
|
|
94
|
+
"gres": "gpu:nvidia_a10:1",
|
|
95
|
+
"qos": "qos_xcloud",
|
|
96
|
+
},
|
|
97
|
+
file_path="/path/to/xservice.zip", # optional: .zip/.tar.gz/.tgz
|
|
98
|
+
)
|
|
99
|
+
print(resp.job_id)
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
### Submit a CPU task
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
from mlops import Task
|
|
106
|
+
|
|
107
|
+
task = Task()
|
|
108
|
+
resp = task.submit(
|
|
109
|
+
name="cpu-task-from-sdk",
|
|
110
|
+
cluster_name="slurm-cn",
|
|
111
|
+
team_id=1,
|
|
112
|
+
image="docker://01ai-registry.cn-shanghai.cr.aliyuncs.com/01-ai/xcs/v2/alpine:3.23.0",
|
|
113
|
+
entry_command="echo hello",
|
|
114
|
+
resources={
|
|
115
|
+
"partition": "cpu",
|
|
116
|
+
"nodes": 1,
|
|
117
|
+
"ntasks": 1,
|
|
118
|
+
"cpus_per_task": 1,
|
|
119
|
+
"memory": "1G",
|
|
120
|
+
"time": "01:00:00",
|
|
121
|
+
"qos": "qos_xcloud",
|
|
122
|
+
},
|
|
123
|
+
)
|
|
124
|
+
print(resp.job_id)
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
### List tasks
|
|
128
|
+
|
|
129
|
+
```python
|
|
130
|
+
from mlops import Task
|
|
131
|
+
from mlops.api.client.models.task_status import TaskStatus
|
|
132
|
+
|
|
133
|
+
task = Task()
|
|
134
|
+
resp = task.list(status=TaskStatus.COMPLETED, cluster_name="slurm-cn", page=1, page_size=20)
|
|
135
|
+
print(len(resp.tasks or []))
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
### Get task details
|
|
139
|
+
|
|
140
|
+
```python
|
|
141
|
+
from mlops import Task
|
|
142
|
+
|
|
143
|
+
task = Task()
|
|
144
|
+
task_info = task.get(task_id=12345, cluster_name="slurm-cn")
|
|
145
|
+
print(task_info)
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
### Cancel a task
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
from mlops import Task
|
|
152
|
+
|
|
153
|
+
task = Task()
|
|
154
|
+
task.cancel(task_id=12345, cluster_name="slurm-cn")
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
### Delete a task
|
|
158
|
+
|
|
159
|
+
```python
|
|
160
|
+
from mlops import Task
|
|
161
|
+
|
|
162
|
+
task = Task()
|
|
163
|
+
task.delete(task_id=12345, cluster_name="slurm-cn")
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
**Task Management Methods:**
|
|
167
|
+
|
|
168
|
+
- `submit()` - Submit a new task with container image and entry command
|
|
169
|
+
- `get()` - Get task details by task ID
|
|
170
|
+
- `list()` - List tasks with optional filters (status, cluster_name, team_id, user_id)
|
|
171
|
+
- `cancel()` - Cancel a running task
|
|
172
|
+
- `delete()` - Delete a task record
|
|
173
|
+
|
|
174
|
+
**Task Status Values:**
|
|
175
|
+
|
|
176
|
+
```python
|
|
177
|
+
from mlops.api.client.models.task_status import TaskStatus
|
|
178
|
+
|
|
179
|
+
TaskStatus.PENDING # Task is pending
|
|
180
|
+
TaskStatus.QUEUED # Task is queued
|
|
181
|
+
TaskStatus.RUNNING # Task is running
|
|
182
|
+
TaskStatus.COMPLETED # Task completed successfully
|
|
183
|
+
TaskStatus.SUCCEEDED # Task succeeded
|
|
184
|
+
TaskStatus.FAILED # Task failed
|
|
185
|
+
TaskStatus.CANCELLED # Task was cancelled
|
|
186
|
+
TaskStatus.CREATED # Task was created
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
**Error Handling:**
|
|
190
|
+
|
|
191
|
+
```python
|
|
192
|
+
from mlops.exceptions import (
|
|
193
|
+
APIException,
|
|
194
|
+
AuthenticationException,
|
|
195
|
+
NotFoundException,
|
|
196
|
+
RateLimitException,
|
|
197
|
+
TimeoutException,
|
|
198
|
+
InvalidArgumentException,
|
|
199
|
+
NotEnoughSpaceException
|
|
200
|
+
)
|
|
201
|
+
from mlops import Task
|
|
202
|
+
|
|
203
|
+
task = Task()
|
|
204
|
+
|
|
205
|
+
try:
|
|
206
|
+
result = task.submit(
|
|
207
|
+
name="test",
|
|
208
|
+
cluster_name="slurm-cn",
|
|
209
|
+
image="docker://alpine:3.23.0",
|
|
210
|
+
entry_command="echo hello",
|
|
211
|
+
)
|
|
212
|
+
except AuthenticationException as e:
|
|
213
|
+
print(f"Authentication failed: {e}")
|
|
214
|
+
except NotFoundException as e:
|
|
215
|
+
print(f"Resource not found: {e}")
|
|
216
|
+
except APIException as e:
|
|
217
|
+
print(f"API error: {e}")
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
> [!TIP] Error Handling
|
|
221
|
+
> SDKs automatically parse typed responses and raise structured exceptions.
|
|
222
|
+
|
|
223
|
+
## Features
|
|
224
|
+
|
|
225
|
+
- Type-safe API clients
|
|
226
|
+
- Automatic authentication
|
|
227
|
+
- Error handling
|
|
228
|
+
- Typed response parsing (generated models)
|
|
229
|
+
- Unexpected-status guard (optional)
|
|
230
|
+
|
|
231
|
+
## Resources
|
|
232
|
+
|
|
233
|
+
- [Python SDK Documentation](https://github.com/xcloud-service/xservice/tree/main/client/python-sdk)
|
|
234
|
+
- [API Reference](https://xcloud-service.com/docs/api)
|
|
235
|
+
|
|
@@ -42,11 +42,11 @@ mlops/api/client/models/task_tres_type_0.py,sha256=rEaiQG7A19mlTIHDppzxuWa4oPfh9
|
|
|
42
42
|
mlops/api/client/models/task_tres_used_type_0.py,sha256=4w6An7-ZCqa8cc3SPi7mcwGK-ekT6AYq_dEdf8KzoYA,1320
|
|
43
43
|
mlops/api/client/py.typed,sha256=8ZJUsxZiuOy1oJeVhsTWQhTG_6pTVHVXk5hJL79ebTk,25
|
|
44
44
|
mlops/api/client/types.py,sha256=AX4orxQZQJat3vZrgjJ-TYb2sNBL8kNo9yqYDT-n8y8,1391
|
|
45
|
-
mlops/connection_config.py,sha256=
|
|
45
|
+
mlops/connection_config.py,sha256=yrY-FKyqtgqXmbAQyhlLIwDy1wDyjnT_mOhAFHAzek0,3170
|
|
46
46
|
mlops/exceptions.py,sha256=3kfda-Rz0km9kV-gvnPCw7ueemWkXIGGdT0NXx6z9Xk,1680
|
|
47
47
|
mlops/task/__init__.py,sha256=M983vMPLj3tZQNFXQyTP5I2RsRorFElezLeppr3WLsw,133
|
|
48
48
|
mlops/task/client.py,sha256=V131WLVJl1raGAVixUhJCX8s1neN15mxAjQwO01qlIg,3552
|
|
49
|
-
mlops/task/task.py,sha256=
|
|
50
|
-
mlops_python_sdk-1.0.
|
|
51
|
-
mlops_python_sdk-1.0.
|
|
52
|
-
mlops_python_sdk-1.0.
|
|
49
|
+
mlops/task/task.py,sha256=HT7TtOqLw4FvF80c-_I-XWK97_9OBR7pC2i2NGZNVO4,30663
|
|
50
|
+
mlops_python_sdk-1.0.4.dist-info/METADATA,sha256=3_g9WfaGdDtmNBnqMONGOTL_Mol8UAdC607tE947kLs,5679
|
|
51
|
+
mlops_python_sdk-1.0.4.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
52
|
+
mlops_python_sdk-1.0.4.dist-info/RECORD,,
|
|
@@ -1,254 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.3
|
|
2
|
-
Name: mlops-python-sdk
|
|
3
|
-
Version: 1.0.2
|
|
4
|
-
Summary: MLOps Python SDK for XCloud Service API
|
|
5
|
-
License: MIT
|
|
6
|
-
Author: mlops
|
|
7
|
-
Author-email: mlops@example.com
|
|
8
|
-
Requires-Python: >=3.9,<4.0
|
|
9
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
-
Classifier: Programming Language :: Python :: 3
|
|
11
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
12
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
-
Requires-Dist: attrs (>=23.2.0)
|
|
17
|
-
Requires-Dist: httpx (>=0.27.0,<1.0.0)
|
|
18
|
-
Requires-Dist: packaging (>=24.1)
|
|
19
|
-
Requires-Dist: python-dateutil (>=2.8.2)
|
|
20
|
-
Requires-Dist: typing-extensions (>=4.1.0)
|
|
21
|
-
Project-URL: Bug Tracker, https://github.com/xcloud-service/xservice/issues
|
|
22
|
-
Project-URL: Homepage, https://mlops.cloud/
|
|
23
|
-
Project-URL: Repository, https://github.com/xcloud-service/xservice
|
|
24
|
-
Description-Content-Type: text/markdown
|
|
25
|
-
|
|
26
|
-
# SDK
|
|
27
|
-
|
|
28
|
-
Software Development Kits for integrating with the XCloud Service API.
|
|
29
|
-
|
|
30
|
-
> [!NOTE] SDK Support
|
|
31
|
-
> SDKs provide type-safe, high-level interfaces for interacting with the platform API. They handle authentication, error handling, and request retries automatically.
|
|
32
|
-
|
|
33
|
-
## Available SDKs
|
|
34
|
-
|
|
35
|
-
### Python SDK
|
|
36
|
-
|
|
37
|
-
### Installation
|
|
38
|
-
|
|
39
|
-
The Python SDK installation.
|
|
40
|
-
|
|
41
|
-
```bash
|
|
42
|
-
pip install mlops-python-sdk
|
|
43
|
-
```
|
|
44
|
-
|
|
45
|
-
### Configuration
|
|
46
|
-
|
|
47
|
-
The SDK reads configuration from environment variables by default:
|
|
48
|
-
|
|
49
|
-
- `MLOPS_API_KEY`: API key (required)
|
|
50
|
-
- `MLOPS_DOMAIN`: API domain, e.g. `localhost:8090` or `https://example.com`
|
|
51
|
-
- `MLOPS_API_PATH`: API path prefix (default: `/api/v1`)
|
|
52
|
-
- `MLOPS_DEBUG`: `true|false` (default: `false`)
|
|
53
|
-
|
|
54
|
-
Or configure in code:
|
|
55
|
-
|
|
56
|
-
```python
|
|
57
|
-
from mlops import ConnectionConfig, Task
|
|
58
|
-
|
|
59
|
-
config = ConnectionConfig(
|
|
60
|
-
api_key="xck_...",
|
|
61
|
-
domain="https://example.com",
|
|
62
|
-
api_path="/api/v1",
|
|
63
|
-
debug=False,
|
|
64
|
-
)
|
|
65
|
-
task = Task(config=config)
|
|
66
|
-
```
|
|
67
|
-
|
|
68
|
-
### Usage
|
|
69
|
-
|
|
70
|
-
```python
|
|
71
|
-
from mlops import Task
|
|
72
|
-
from mlops.api.client.models.task_status import TaskStatus
|
|
73
|
-
from pathlib import Path
|
|
74
|
-
|
|
75
|
-
# Initialize Task client (uses environment variables by default)
|
|
76
|
-
task = Task()
|
|
77
|
-
|
|
78
|
-
# Submit a task with gpu type
|
|
79
|
-
try:
|
|
80
|
-
result = task.submit(
|
|
81
|
-
name="gpu-task-from-sdk",
|
|
82
|
-
image="/mnt/minio/images/01ai-registry.cn-shanghai.cr.aliyuncs.com+public+llamafactory+0.9.3.sqsh",
|
|
83
|
-
entry_command="llamafactory-cli train /workspace/config/test_lora.yaml",
|
|
84
|
-
resources={
|
|
85
|
-
"partition": "gpu",
|
|
86
|
-
"nodes": 2,
|
|
87
|
-
"ntasks": 2,
|
|
88
|
-
"cpus_per_task": 2,
|
|
89
|
-
"memory": "4G",
|
|
90
|
-
"time": "01:00:00",
|
|
91
|
-
"gres": "gpu:nvidia_a10:1",
|
|
92
|
-
"qos": "qos_xcloud",
|
|
93
|
-
},
|
|
94
|
-
cluster_name="slurm-cn",
|
|
95
|
-
team_id=1,
|
|
96
|
-
file_path="your file path", # optional, support for .zip, .tar.gz, .tgz
|
|
97
|
-
)
|
|
98
|
-
|
|
99
|
-
if result is not None:
|
|
100
|
-
print("==== gpu task submitted successfully ====")
|
|
101
|
-
job_id = result.job_id
|
|
102
|
-
else:
|
|
103
|
-
print("==== gpu task submitted failed ====")
|
|
104
|
-
except Exception as e:
|
|
105
|
-
print("==== gpu task submitted failed error ====", e)
|
|
106
|
-
|
|
107
|
-
# Submit a task with cpu type
|
|
108
|
-
try:
|
|
109
|
-
entry_content = Path("entry.sh").read_text(encoding="utf-8")
|
|
110
|
-
result = task.submit(
|
|
111
|
-
name="cpu-task-from-sdk",
|
|
112
|
-
image="docker://01ai-registry.cn-shanghai.cr.aliyuncs.com/01-ai/xcs/v2/alpine:3.23.0",
|
|
113
|
-
entry_command=entry_content,
|
|
114
|
-
resources={
|
|
115
|
-
"partition": "cpu",
|
|
116
|
-
"nodes": 1,
|
|
117
|
-
"ntasks": 1,
|
|
118
|
-
"cpus_per_task": 1,
|
|
119
|
-
"memory": "1G",
|
|
120
|
-
"time": "01:00:00",
|
|
121
|
-
"qos": "qos_xcloud",
|
|
122
|
-
},
|
|
123
|
-
cluster_name="slurm-cn",
|
|
124
|
-
team_id=1,
|
|
125
|
-
)
|
|
126
|
-
|
|
127
|
-
if result is not None:
|
|
128
|
-
print("==== cpu task submitted successfully ====")
|
|
129
|
-
job_id = result.job_id
|
|
130
|
-
else:
|
|
131
|
-
print("==== cpu task submitted failed ====")
|
|
132
|
-
except Exception as e:
|
|
133
|
-
print("==== cpu task submitted failed error ====", e)
|
|
134
|
-
|
|
135
|
-
# List tasks with filters
|
|
136
|
-
try:
|
|
137
|
-
completed_tasks = task.list(
|
|
138
|
-
status=TaskStatus.COMPLETED,
|
|
139
|
-
cluster_name="slurm-cn",
|
|
140
|
-
page=1,
|
|
141
|
-
page_size=20
|
|
142
|
-
)
|
|
143
|
-
|
|
144
|
-
# Get task details
|
|
145
|
-
if completed_tasks is not None and len(completed_tasks.tasks) > 0:
|
|
146
|
-
print("==== completed_tasks number ====", len(completed_tasks.tasks))
|
|
147
|
-
task_info = task.get(task_id=completed_tasks.tasks[0].job_id, cluster_name="slurm-cn")
|
|
148
|
-
print("==== task_info ====", task_info)
|
|
149
|
-
else:
|
|
150
|
-
print("==== no completed tasks to get details ====")
|
|
151
|
-
except Exception as e:
|
|
152
|
-
print("==== get task details failed error ====", e)
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
# Cancel a running task
|
|
156
|
-
try:
|
|
157
|
-
running_tasks = task.list(
|
|
158
|
-
status=TaskStatus.RUNNING,
|
|
159
|
-
cluster_name="slurm-cn",
|
|
160
|
-
page=1,
|
|
161
|
-
page_size=20
|
|
162
|
-
)
|
|
163
|
-
if running_tasks is not None and len(running_tasks.tasks) > 0:
|
|
164
|
-
print("==== running_tasks number ====", len(running_tasks.tasks))
|
|
165
|
-
# Cancel a task
|
|
166
|
-
result = task.cancel(task_id=running_tasks.tasks[0].job_id, cluster_name="slurm-cn")
|
|
167
|
-
print("==== task cancelled ====", running_tasks.tasks[0].job_id, result)
|
|
168
|
-
else:
|
|
169
|
-
print("==== no running tasks to cancel ====")
|
|
170
|
-
except Exception as e:
|
|
171
|
-
print("==== cancel running task failed error ====", e)
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
# Delete a task
|
|
175
|
-
try:
|
|
176
|
-
completed_tasks = task.list(
|
|
177
|
-
status=TaskStatus.COMPLETED,
|
|
178
|
-
cluster_name="slurm-cn",
|
|
179
|
-
page=1,
|
|
180
|
-
page_size=20
|
|
181
|
-
)
|
|
182
|
-
if completed_tasks is not None and len(completed_tasks.tasks) > 0:
|
|
183
|
-
print("==== completed_tasks number ====", len(completed_tasks.tasks))
|
|
184
|
-
# Delete a task
|
|
185
|
-
result = task.delete(task_id=completed_tasks.tasks[0].job_id, cluster_name="slurm-cn")
|
|
186
|
-
print("==== task deleted ====", completed_tasks.tasks[0].job_id, result)
|
|
187
|
-
else:
|
|
188
|
-
print("==== no completed tasks to delete ====")
|
|
189
|
-
except Exception as e:
|
|
190
|
-
print("==== delete completed task failed error ====", e)
|
|
191
|
-
```
|
|
192
|
-
|
|
193
|
-
**Task Management Methods:**
|
|
194
|
-
|
|
195
|
-
- `submit()` - Submit a new task with container image and entry command
|
|
196
|
-
- `get()` - Get task details by task ID
|
|
197
|
-
- `list()` - List tasks with optional filters (status, cluster_name, team_id, user_id)
|
|
198
|
-
- `cancel()` - Cancel a running task
|
|
199
|
-
- `delete()` - Delete a task record
|
|
200
|
-
|
|
201
|
-
**Task Status Values:**
|
|
202
|
-
|
|
203
|
-
```python
|
|
204
|
-
from mlops.api.client.models.task_status import TaskStatus
|
|
205
|
-
|
|
206
|
-
TaskStatus.PENDING # Task is pending
|
|
207
|
-
TaskStatus.QUEUED # Task is queued
|
|
208
|
-
TaskStatus.RUNNING # Task is running
|
|
209
|
-
TaskStatus.COMPLETED # Task completed successfully
|
|
210
|
-
TaskStatus.SUCCEEDED # Task succeeded
|
|
211
|
-
TaskStatus.FAILED # Task failed
|
|
212
|
-
TaskStatus.CANCELLED # Task was cancelled
|
|
213
|
-
TaskStatus.CREATED # Task was created
|
|
214
|
-
```
|
|
215
|
-
|
|
216
|
-
**Error Handling:**
|
|
217
|
-
|
|
218
|
-
```python
|
|
219
|
-
from mlops.exceptions import (
|
|
220
|
-
APIException,
|
|
221
|
-
AuthenticationException,
|
|
222
|
-
NotFoundException,
|
|
223
|
-
RateLimitException,
|
|
224
|
-
TimeoutException,
|
|
225
|
-
InvalidArgumentException,
|
|
226
|
-
NotEnoughSpaceException
|
|
227
|
-
)
|
|
228
|
-
|
|
229
|
-
try:
|
|
230
|
-
result = task.submit(name="test", cluster_name="slurm-cn", command="echo hello")
|
|
231
|
-
except AuthenticationException as e:
|
|
232
|
-
print(f"Authentication failed: {e}")
|
|
233
|
-
except NotFoundException as e:
|
|
234
|
-
print(f"Resource not found: {e}")
|
|
235
|
-
except APIException as e:
|
|
236
|
-
print(f"API error: {e}")
|
|
237
|
-
```
|
|
238
|
-
|
|
239
|
-
> [!TIP] Error Handling
|
|
240
|
-
> SDKs automatically handle common errors and retry failed requests. Check SDK documentation for error handling best practices.
|
|
241
|
-
|
|
242
|
-
## Features
|
|
243
|
-
|
|
244
|
-
- Type-safe API clients
|
|
245
|
-
- Automatic authentication
|
|
246
|
-
- Error handling
|
|
247
|
-
- Request retry logic
|
|
248
|
-
- Response validation
|
|
249
|
-
|
|
250
|
-
## Resources
|
|
251
|
-
|
|
252
|
-
- [Python SDK Documentation](https://github.com/xcloud-service/xservice/tree/main/client/python-sdk)
|
|
253
|
-
- [API Reference](https://xcloud-service.com/docs/api)
|
|
254
|
-
|
|
File without changes
|