mlops-python-sdk 1.0.2__py3-none-any.whl → 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -66,17 +66,7 @@ class ConnectionConfig:
66
66
  self.api_path = "/" + self.api_path
67
67
 
68
68
  # Build API URL
69
- if self.debug:
70
- base_url = "http://localhost:8090"
71
- else:
72
- # If domain already includes protocol, use it as-is
73
- # Otherwise, default to http:// for backward compatibility
74
- if self.domain.startswith(("http://", "https://")):
75
- base_url = self.domain
76
- else:
77
- base_url = f"http://{self.domain}"
78
-
79
- self.api_url = f"{base_url}{self.api_path}"
69
+ self.build_api_url()
80
70
 
81
71
  @staticmethod
82
72
  def _get_request_timeout(
@@ -89,7 +79,22 @@ class ConnectionConfig:
89
79
  return request_timeout
90
80
  else:
91
81
  return default_timeout
92
-
82
+
83
+ def build_api_url(self) -> None:
84
+ if self.debug:
85
+ base_url = "http://localhost:8090"
86
+ else:
87
+ # If domain already includes protocol, use it as-is
88
+ # Otherwise, default to http:// for backward compatibility
89
+ if self.domain.startswith(("http://", "https://")):
90
+ base_url = self.domain
91
+ elif self.domain.startswith("localhost") or self.domain.startswith("127.0.0.1"):
92
+ base_url = f"http://{self.domain}"
93
+ else:
94
+ base_url = f"https://{self.domain}"
95
+
96
+ self.api_url = f"{base_url}{self.api_path}"
97
+
93
98
  def get_request_timeout(self, request_timeout: Optional[float] = None):
94
99
  return self._get_request_timeout(self.request_timeout, request_timeout)
95
100
 
mlops/task/task.py CHANGED
@@ -6,9 +6,15 @@ This module provides a convenient interface for managing tasks through the MLOps
6
6
 
7
7
  import json
8
8
  import os
9
+ import shutil
10
+ import sys
11
+ import threading
12
+ import tempfile
13
+ import time
14
+ import zipfile
9
15
  from http import HTTPStatus
10
16
  from pathlib import Path
11
- from typing import Optional
17
+ from typing import Callable, Optional
12
18
 
13
19
  import httpx
14
20
 
@@ -53,15 +59,183 @@ def _validate_archive_file_path(file_path: str) -> Path:
53
59
  raise APIException(f"file_path must be one of .zip, .tar.gz, .tgz: {p}")
54
60
  return p
55
61
 
62
+ def _is_archive_path(p: Path) -> bool:
63
+ lower = p.name.lower()
64
+ return lower.endswith(".zip") or lower.endswith(".tar.gz") or lower.endswith(".tgz")
65
+
66
+
67
+ def _zip_directory(src_dir: Path, dst_zip: Path) -> None:
68
+ src_dir = src_dir.resolve()
69
+ root_name = src_dir.name
70
+ with zipfile.ZipFile(dst_zip, "w", compression=zipfile.ZIP_DEFLATED) as zf:
71
+ for p in src_dir.rglob("*"):
72
+ if p.is_dir():
73
+ continue
74
+ rel = p.relative_to(src_dir).as_posix()
75
+ if rel in ("", "."):
76
+ continue
77
+ zf.write(p, arcname=f"{root_name}/{rel}")
78
+
79
+
80
+ def _zip_file(src_file: Path, dst_zip: Path) -> None:
81
+ src_file = src_file.resolve()
82
+ with zipfile.ZipFile(dst_zip, "w", compression=zipfile.ZIP_DEFLATED) as zf:
83
+ zf.write(src_file, arcname=src_file.name)
84
+
85
+
86
+ def _path_to_archive_path(task_name: str, file_path: str) -> tuple[Path, Callable[[], None]]:
87
+ """
88
+ Mirror cli-go `pathToArchivePath` behavior:
89
+ - directory: zip it
90
+ - file: if already an archive (.zip/.tar.gz/.tgz) return as-is; otherwise zip it
91
+ Returns (archive_path, cleanup_callable).
92
+ """
93
+ p = Path(os.path.expanduser(file_path)).resolve()
94
+ if not p.exists():
95
+ raise APIException(f"File not found: {p}")
96
+
97
+ if p.is_dir():
98
+ tmp_dir = (
99
+ Path(tempfile.gettempdir())
100
+ / "xservice-cli"
101
+ / task_name
102
+ / time.strftime("%Y%m%d%H%M%S")
103
+ )
104
+ tmp_dir.mkdir(parents=True, exist_ok=True)
105
+ archive_path = tmp_dir / f"{p.name}.zip"
106
+ try:
107
+ _zip_directory(p, archive_path)
108
+ except Exception as e:
109
+ shutil.rmtree(tmp_dir, ignore_errors=True)
110
+ raise APIException(f"failed to compress directory: {e}") from e
111
+ return archive_path, lambda: shutil.rmtree(tmp_dir, ignore_errors=True)
112
+
113
+ if not p.is_file():
114
+ raise APIException(f"file_path must be a file or directory: {p}")
115
+
116
+ if _is_archive_path(p):
117
+ return _validate_archive_file_path(str(p)), lambda: None
118
+
119
+ tmp_dir = (
120
+ Path(tempfile.gettempdir())
121
+ / "xservice-cli"
122
+ / task_name
123
+ / time.strftime("%Y%m%d%H%M%S")
124
+ )
125
+ tmp_dir.mkdir(parents=True, exist_ok=True)
126
+ archive_path = tmp_dir / f"{p.name}.zip"
127
+ try:
128
+ _zip_file(p, archive_path)
129
+ except Exception as e:
130
+ shutil.rmtree(tmp_dir, ignore_errors=True)
131
+ raise APIException(f"failed to compress file: {e}") from e
132
+ return archive_path, lambda: shutil.rmtree(tmp_dir, ignore_errors=True)
133
+
56
134
 
57
135
  def _upload_file_to_presigned_url(url: str, file_path: Path, timeout: Optional[float]) -> None:
136
+ def _format_bytes_iec(n: int) -> str:
137
+ if n < 1024:
138
+ return f"{n}B"
139
+ unit = 1024.0
140
+ suffixes = ["KiB", "MiB", "GiB", "TiB", "PiB"]
141
+ v = float(n)
142
+ i = -1
143
+ while v >= unit and i < len(suffixes) - 1:
144
+ v /= unit
145
+ i += 1
146
+ return f"{v:.1f}{suffixes[i]}"
147
+
148
+ def _render_bar(done: int, total: int, width: int = 28) -> str:
149
+ if total <= 0 or width <= 1:
150
+ return ">"
151
+ done = max(0, min(done, total))
152
+ filled = int(width * (done / total))
153
+ if filled >= width:
154
+ return "=" * width
155
+ if filled <= 0:
156
+ return ">" + (" " * (width - 1))
157
+ return ("=" * filled) + ">" + (" " * (width - filled - 1))
158
+
159
+ def _format_elapsed_seconds(start: float) -> str:
160
+ sec = int(max(0.0, time.monotonic() - start))
161
+ return f"{sec}s"
162
+
163
+ class _ProgressIterable:
164
+ def __init__(self, f, total: int, name: str, chunk_size: int = 64 * 1024):
165
+ self._f = f # file-like object
166
+ self._total = max(0, int(total))
167
+ self._name = name
168
+ self._chunk_size = max(1, int(chunk_size))
169
+ self._read = 0
170
+ self._start = time.monotonic()
171
+ self._completed = False
172
+ self._out = sys.stdout
173
+ try:
174
+ self._is_tty = bool(self._out.isatty())
175
+ except Exception:
176
+ self._is_tty = False
177
+
178
+ def _render_line(self, display_read: int) -> str:
179
+ display_read = max(0, min(int(display_read), self._total))
180
+ pct = (display_read / self._total) * 100.0 if self._total > 0 else 0.0
181
+ bar = _render_bar(display_read, self._total, width=28)
182
+ elapsed = _format_elapsed_seconds(self._start)
183
+ return (
184
+ f"uploading {self._name} [{bar}] {pct:6.2f}% "
185
+ f"({_format_bytes_iec(display_read)}/{_format_bytes_iec(self._total)}) "
186
+ f"elapsed {elapsed}"
187
+ )
188
+
189
+ def _print_line(self, line: str, final: bool = False) -> None:
190
+ if self._is_tty:
191
+ # Refresh same line in terminal.
192
+ print("\r" + line, end="" if not final else "\n", file=self._out, flush=True)
193
+ else:
194
+ # Always visible in non-TTY environments.
195
+ print(line, file=self._out, flush=True)
196
+
197
+ def __iter__(self):
198
+ stop_event = threading.Event()
199
+
200
+ def ticker() -> None:
201
+ last_sec = -1
202
+ # Print immediately so users see something right away.
203
+ self._print_line(self._render_line(self._read))
204
+ while not stop_event.is_set():
205
+ sec = int(max(0.0, time.monotonic() - self._start))
206
+ if sec != last_sec:
207
+ last_sec = sec
208
+ self._print_line(self._render_line(self._read))
209
+ # check frequently to avoid skipping seconds
210
+ stop_event.wait(0.05)
211
+
212
+ t = threading.Thread(target=ticker, name="mlops-upload-progress", daemon=True)
213
+ t.start()
214
+ try:
215
+ while True:
216
+ chunk = self._f.read(self._chunk_size)
217
+ if not chunk:
218
+ break
219
+ self._read += len(chunk)
220
+ yield chunk
221
+ finally:
222
+ # Ensure a final 100% line and stop ticker.
223
+ self._read = self._total
224
+ self._completed = True
225
+ stop_event.set()
226
+ t.join(timeout=0.2)
227
+ self._print_line(self._render_line(self._read), final=True)
228
+
58
229
  size = file_path.stat().st_size
59
230
  # Use a dedicated client for S3 presigned upload (avoid leaking API auth headers).
60
231
  with httpx.Client(timeout=timeout) as client:
61
232
  with file_path.open("rb") as f:
233
+ content = f
234
+ if size > 0:
235
+ content = _ProgressIterable(f, total=size, name=file_path.name)
62
236
  resp = client.put(
63
237
  url,
64
- content=f,
238
+ content=content,
65
239
  headers={
66
240
  "Content-Length": str(size),
67
241
  "Content-Type": "application/octet-stream",
@@ -147,6 +321,7 @@ class Task:
147
321
  config.api_key = api_key
148
322
  if domain is not None:
149
323
  config.domain = domain
324
+ config.build_api_url()
150
325
  if debug is not None:
151
326
  config.debug = debug
152
327
  if request_timeout is not None:
@@ -229,84 +404,82 @@ class Task:
229
404
  request_kwargs["ntasks"] = 1
230
405
 
231
406
  if file_path:
232
- local_path = _validate_archive_file_path(file_path)
407
+ local_path, cleanup = _path_to_archive_path(name, file_path)
233
408
  timeout = self._config.get_request_timeout()
234
-
235
- # 1) Get presigned upload URL
236
- presign_upload_obj = get_storage_presign_upload.sync_detailed(
237
- client=self._client,
238
- filename=local_path.name,
239
- )
240
- presign_upload = presign_upload_obj.parsed
241
- if isinstance(presign_upload, ErrorResponse):
242
- status_code = (
243
- presign_upload.code
244
- if presign_upload.code != UNSET and presign_upload.code != 0
245
- else presign_upload_obj.status_code.value
409
+ try:
410
+ # 1) Get presigned upload URL
411
+ presign_upload_obj = get_storage_presign_upload.sync_detailed(
412
+ client=self._client,
413
+ filename=local_path.name,
246
414
  )
247
- exception = handle_api_exception(
248
- Response(
249
- status_code=HTTPStatus(status_code),
250
- content=presign_upload_obj.content,
251
- headers=presign_upload_obj.headers,
252
- parsed=None,
415
+ presign_upload = presign_upload_obj.parsed
416
+ if isinstance(presign_upload, ErrorResponse):
417
+ status_code = (
418
+ presign_upload.code
419
+ if presign_upload.code != UNSET and presign_upload.code != 0
420
+ else presign_upload_obj.status_code.value
253
421
  )
254
- )
255
- raise exception
256
-
257
- if (
258
- presign_upload is None
259
- or presign_upload.url in (UNSET, None)
260
- or presign_upload.key in (UNSET, None)
261
- ):
262
- raise APIException("Failed to get presigned upload url: empty response")
263
-
264
- # 2) Upload file to S3 (presigned URL)
265
- _upload_file_to_presigned_url(
266
- url=str(presign_upload.url),
267
- file_path=local_path,
268
- timeout=timeout,
269
- )
270
-
271
- # 3) Get presigned download URL
272
- presign_download_obj = get_storage_presign_download.sync_detailed(
273
- client=self._client,
274
- key=str(presign_upload.key),
275
- )
276
- presign_download = presign_download_obj.parsed
277
- if isinstance(presign_download, ErrorResponse):
278
- status_code = (
279
- presign_download.code
280
- if presign_download.code != UNSET and presign_download.code != 0
281
- else presign_download_obj.status_code.value
282
- )
283
- exception = handle_api_exception(
284
- Response(
285
- status_code=HTTPStatus(status_code),
286
- content=presign_download_obj.content,
287
- headers=presign_download_obj.headers,
288
- parsed=None,
422
+ exception = handle_api_exception(
423
+ Response(
424
+ status_code=HTTPStatus(status_code),
425
+ content=presign_upload_obj.content,
426
+ headers=presign_upload_obj.headers,
427
+ parsed=None,
428
+ )
289
429
  )
430
+ raise exception
431
+
432
+ if (
433
+ presign_upload is None
434
+ or presign_upload.url in (UNSET, None)
435
+ or presign_upload.key in (UNSET, None)
436
+ ):
437
+ raise APIException("Failed to get presigned upload url: empty response")
438
+
439
+ # 2) Upload file to S3 (presigned URL)
440
+ _upload_file_to_presigned_url(
441
+ url=str(presign_upload.url),
442
+ file_path=local_path,
443
+ timeout=timeout,
290
444
  )
291
- raise exception
292
445
 
293
- if presign_download is None or presign_download.url in (UNSET, None):
294
- raise APIException(
295
- "Failed to get presigned download url: empty response"
446
+ # 3) Get presigned download URL
447
+ presign_download_obj = get_storage_presign_download.sync_detailed(
448
+ client=self._client,
449
+ key=str(presign_upload.key),
296
450
  )
297
-
298
- # 4) Set env var (merge if user already provided environment)
299
- env: dict[str, str] = {}
300
- existing_env = request_kwargs.get("environment")
301
- if isinstance(existing_env, TaskSubmitRequestEnvironmentType0):
302
- env.update(existing_env.additional_properties)
303
- elif isinstance(existing_env, dict):
304
- env.update(existing_env)
305
-
306
- env["SYSTEM_DOWNLOAD_ARCHIVE_URL"] = str(presign_download.url)
307
- request_kwargs["environment"] = TaskSubmitRequestEnvironmentType0.from_dict(
308
- env
309
- )
451
+ presign_download = presign_download_obj.parsed
452
+ if isinstance(presign_download, ErrorResponse):
453
+ status_code = (
454
+ presign_download.code
455
+ if presign_download.code != UNSET and presign_download.code != 0
456
+ else presign_download_obj.status_code.value
457
+ )
458
+ exception = handle_api_exception(
459
+ Response(
460
+ status_code=HTTPStatus(status_code),
461
+ content=presign_download_obj.content,
462
+ headers=presign_download_obj.headers,
463
+ parsed=None,
464
+ )
465
+ )
466
+ raise exception
467
+
468
+ if presign_download is None or presign_download.url in (UNSET, None):
469
+ raise APIException("Failed to get presigned download url: empty response")
470
+
471
+ # 4) Set env var (merge if user already provided environment)
472
+ env: dict[str, str] = {}
473
+ existing_env = request_kwargs.get("environment")
474
+ if isinstance(existing_env, TaskSubmitRequestEnvironmentType0):
475
+ env.update(existing_env.additional_properties)
476
+ elif isinstance(existing_env, dict):
477
+ env.update(existing_env)
478
+
479
+ env["SYSTEM_DOWNLOAD_ARCHIVE_URL"] = str(presign_download.url)
480
+ request_kwargs["environment"] = TaskSubmitRequestEnvironmentType0.from_dict(env)
481
+ finally:
482
+ cleanup()
310
483
 
311
484
  request = TaskSubmitRequest(**request_kwargs)
312
485
 
@@ -0,0 +1,235 @@
1
+ Metadata-Version: 2.3
2
+ Name: mlops-python-sdk
3
+ Version: 1.0.4
4
+ Summary: MLOps Python SDK for XCloud Service API
5
+ License: MIT
6
+ Author: mlops
7
+ Author-email: mlops@example.com
8
+ Requires-Python: >=3.9,<4.0
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.9
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Requires-Dist: attrs (>=23.2.0)
17
+ Requires-Dist: httpx (>=0.27.0,<1.0.0)
18
+ Requires-Dist: packaging (>=24.1)
19
+ Requires-Dist: python-dateutil (>=2.8.2)
20
+ Requires-Dist: typing-extensions (>=4.1.0)
21
+ Project-URL: Bug Tracker, https://github.com/xcloud-service/xservice/issues
22
+ Project-URL: Homepage, https://mlops.cloud/
23
+ Project-URL: Repository, https://github.com/xcloud-service/xservice
24
+ Description-Content-Type: text/markdown
25
+
26
+ # SDK
27
+
28
+ Software Development Kits for integrating with the XCloud Service API.
29
+
30
+ > [!NOTE] SDK Support
31
+ > SDKs provide type-safe, high-level interfaces for interacting with the platform API. They handle authentication, error handling, and request retries automatically.
32
+
33
+
34
+ ## Installation
35
+
36
+ The Python SDK installation.
37
+
38
+ ```bash
39
+ pip install mlops-python-sdk
40
+ ```
41
+
42
+ ### Configuration
43
+
44
+ The SDK reads configuration from environment variables by default:
45
+
46
+ - `MLOPS_API_KEY`: API key (required)
47
+ - `MLOPS_DOMAIN`: API domain, e.g. `localhost:8090` or `https://example.com`
48
+ - `MLOPS_API_PATH`: API path prefix (default: `/api/v1`)
49
+ - `MLOPS_DEBUG`: `true|false` (default: `false`)
50
+
51
+ Or configure in code:
52
+
53
+ ```python
54
+ from mlops import ConnectionConfig, Task
55
+
56
+ config = ConnectionConfig(
57
+ api_key="xck_...",
58
+ domain="https://example.com",
59
+ api_path="/api/v1",
60
+ debug=False,
61
+ )
62
+ task = Task(config=config)
63
+ ```
64
+
65
+ ## SDK Usage
66
+
67
+ ### Initialize client
68
+
69
+ ```python
70
+ from mlops import Task
71
+
72
+ task = Task() # uses environment variables by default
73
+ ```
74
+
75
+ ### Submit a GPU task
76
+
77
+ ```python
78
+ from mlops import Task
79
+
80
+ task = Task()
81
+ resp = task.submit(
82
+ name="gpu-task-from-sdk",
83
+ cluster_name="slurm-cn",
84
+ team_id=1,
85
+ image="/mnt/minio/images/01ai-registry.cn-shanghai.cr.aliyuncs.com+public+llamafactory+0.9.3.sqsh",
86
+ entry_command="llamafactory-cli train /workspace/config/test_lora.yaml",
87
+ resources={
88
+ "partition": "gpu",
89
+ "nodes": 2,
90
+ "ntasks": 2,
91
+ "cpus_per_task": 2,
92
+ "memory": "4G",
93
+ "time": "01:00:00",
94
+ "gres": "gpu:nvidia_a10:1",
95
+ "qos": "qos_xcloud",
96
+ },
97
+ file_path="/path/to/xservice.zip", # optional: .zip/.tar.gz/.tgz
98
+ )
99
+ print(resp.job_id)
100
+ ```
101
+
102
+ ### Submit a CPU task
103
+
104
+ ```python
105
+ from mlops import Task
106
+
107
+ task = Task()
108
+ resp = task.submit(
109
+ name="cpu-task-from-sdk",
110
+ cluster_name="slurm-cn",
111
+ team_id=1,
112
+ image="docker://01ai-registry.cn-shanghai.cr.aliyuncs.com/01-ai/xcs/v2/alpine:3.23.0",
113
+ entry_command="echo hello",
114
+ resources={
115
+ "partition": "cpu",
116
+ "nodes": 1,
117
+ "ntasks": 1,
118
+ "cpus_per_task": 1,
119
+ "memory": "1G",
120
+ "time": "01:00:00",
121
+ "qos": "qos_xcloud",
122
+ },
123
+ )
124
+ print(resp.job_id)
125
+ ```
126
+
127
+ ### List tasks
128
+
129
+ ```python
130
+ from mlops import Task
131
+ from mlops.api.client.models.task_status import TaskStatus
132
+
133
+ task = Task()
134
+ resp = task.list(status=TaskStatus.COMPLETED, cluster_name="slurm-cn", page=1, page_size=20)
135
+ print(len(resp.tasks or []))
136
+ ```
137
+
138
+ ### Get task details
139
+
140
+ ```python
141
+ from mlops import Task
142
+
143
+ task = Task()
144
+ task_info = task.get(task_id=12345, cluster_name="slurm-cn")
145
+ print(task_info)
146
+ ```
147
+
148
+ ### Cancel a task
149
+
150
+ ```python
151
+ from mlops import Task
152
+
153
+ task = Task()
154
+ task.cancel(task_id=12345, cluster_name="slurm-cn")
155
+ ```
156
+
157
+ ### Delete a task
158
+
159
+ ```python
160
+ from mlops import Task
161
+
162
+ task = Task()
163
+ task.delete(task_id=12345, cluster_name="slurm-cn")
164
+ ```
165
+
166
+ **Task Management Methods:**
167
+
168
+ - `submit()` - Submit a new task with container image and entry command
169
+ - `get()` - Get task details by task ID
170
+ - `list()` - List tasks with optional filters (status, cluster_name, team_id, user_id)
171
+ - `cancel()` - Cancel a running task
172
+ - `delete()` - Delete a task record
173
+
174
+ **Task Status Values:**
175
+
176
+ ```python
177
+ from mlops.api.client.models.task_status import TaskStatus
178
+
179
+ TaskStatus.PENDING # Task is pending
180
+ TaskStatus.QUEUED # Task is queued
181
+ TaskStatus.RUNNING # Task is running
182
+ TaskStatus.COMPLETED # Task completed successfully
183
+ TaskStatus.SUCCEEDED # Task succeeded
184
+ TaskStatus.FAILED # Task failed
185
+ TaskStatus.CANCELLED # Task was cancelled
186
+ TaskStatus.CREATED # Task was created
187
+ ```
188
+
189
+ **Error Handling:**
190
+
191
+ ```python
192
+ from mlops.exceptions import (
193
+ APIException,
194
+ AuthenticationException,
195
+ NotFoundException,
196
+ RateLimitException,
197
+ TimeoutException,
198
+ InvalidArgumentException,
199
+ NotEnoughSpaceException
200
+ )
201
+ from mlops import Task
202
+
203
+ task = Task()
204
+
205
+ try:
206
+ result = task.submit(
207
+ name="test",
208
+ cluster_name="slurm-cn",
209
+ image="docker://alpine:3.23.0",
210
+ entry_command="echo hello",
211
+ )
212
+ except AuthenticationException as e:
213
+ print(f"Authentication failed: {e}")
214
+ except NotFoundException as e:
215
+ print(f"Resource not found: {e}")
216
+ except APIException as e:
217
+ print(f"API error: {e}")
218
+ ```
219
+
220
+ > [!TIP] Error Handling
221
+ > SDKs automatically parse typed responses and raise structured exceptions.
222
+
223
+ ## Features
224
+
225
+ - Type-safe API clients
226
+ - Automatic authentication
227
+ - Error handling
228
+ - Typed response parsing (generated models)
229
+ - Unexpected-status guard (optional)
230
+
231
+ ## Resources
232
+
233
+ - [Python SDK Documentation](https://github.com/xcloud-service/xservice/tree/main/client/python-sdk)
234
+ - [API Reference](https://xcloud-service.com/docs/api)
235
+
@@ -42,11 +42,11 @@ mlops/api/client/models/task_tres_type_0.py,sha256=rEaiQG7A19mlTIHDppzxuWa4oPfh9
42
42
  mlops/api/client/models/task_tres_used_type_0.py,sha256=4w6An7-ZCqa8cc3SPi7mcwGK-ekT6AYq_dEdf8KzoYA,1320
43
43
  mlops/api/client/py.typed,sha256=8ZJUsxZiuOy1oJeVhsTWQhTG_6pTVHVXk5hJL79ebTk,25
44
44
  mlops/api/client/types.py,sha256=AX4orxQZQJat3vZrgjJ-TYb2sNBL8kNo9yqYDT-n8y8,1391
45
- mlops/connection_config.py,sha256=_b9sVFGJtf1GynmIB4NtKCzg7kkgE-wSrsG3LwzlOqk,2946
45
+ mlops/connection_config.py,sha256=yrY-FKyqtgqXmbAQyhlLIwDy1wDyjnT_mOhAFHAzek0,3170
46
46
  mlops/exceptions.py,sha256=3kfda-Rz0km9kV-gvnPCw7ueemWkXIGGdT0NXx6z9Xk,1680
47
47
  mlops/task/__init__.py,sha256=M983vMPLj3tZQNFXQyTP5I2RsRorFElezLeppr3WLsw,133
48
48
  mlops/task/client.py,sha256=V131WLVJl1raGAVixUhJCX8s1neN15mxAjQwO01qlIg,3552
49
- mlops/task/task.py,sha256=Y_lWpIVY9Wq-2iuaoZYuskcWHasUzLSpXi9fkwn7S3s,23882
50
- mlops_python_sdk-1.0.2.dist-info/METADATA,sha256=lBkRytOiRISGMHHzk93fijbmF9EC9iKSpHm-6I9QNsM,7637
51
- mlops_python_sdk-1.0.2.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
52
- mlops_python_sdk-1.0.2.dist-info/RECORD,,
49
+ mlops/task/task.py,sha256=HT7TtOqLw4FvF80c-_I-XWK97_9OBR7pC2i2NGZNVO4,30663
50
+ mlops_python_sdk-1.0.4.dist-info/METADATA,sha256=3_g9WfaGdDtmNBnqMONGOTL_Mol8UAdC607tE947kLs,5679
51
+ mlops_python_sdk-1.0.4.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
52
+ mlops_python_sdk-1.0.4.dist-info/RECORD,,
@@ -1,254 +0,0 @@
1
- Metadata-Version: 2.3
2
- Name: mlops-python-sdk
3
- Version: 1.0.2
4
- Summary: MLOps Python SDK for XCloud Service API
5
- License: MIT
6
- Author: mlops
7
- Author-email: mlops@example.com
8
- Requires-Python: >=3.9,<4.0
9
- Classifier: License :: OSI Approved :: MIT License
10
- Classifier: Programming Language :: Python :: 3
11
- Classifier: Programming Language :: Python :: 3.9
12
- Classifier: Programming Language :: Python :: 3.10
13
- Classifier: Programming Language :: Python :: 3.11
14
- Classifier: Programming Language :: Python :: 3.12
15
- Classifier: Programming Language :: Python :: 3.13
16
- Requires-Dist: attrs (>=23.2.0)
17
- Requires-Dist: httpx (>=0.27.0,<1.0.0)
18
- Requires-Dist: packaging (>=24.1)
19
- Requires-Dist: python-dateutil (>=2.8.2)
20
- Requires-Dist: typing-extensions (>=4.1.0)
21
- Project-URL: Bug Tracker, https://github.com/xcloud-service/xservice/issues
22
- Project-URL: Homepage, https://mlops.cloud/
23
- Project-URL: Repository, https://github.com/xcloud-service/xservice
24
- Description-Content-Type: text/markdown
25
-
26
- # SDK
27
-
28
- Software Development Kits for integrating with the XCloud Service API.
29
-
30
- > [!NOTE] SDK Support
31
- > SDKs provide type-safe, high-level interfaces for interacting with the platform API. They handle authentication, error handling, and request retries automatically.
32
-
33
- ## Available SDKs
34
-
35
- ### Python SDK
36
-
37
- ### Installation
38
-
39
- The Python SDK installation.
40
-
41
- ```bash
42
- pip install mlops-python-sdk
43
- ```
44
-
45
- ### Configuration
46
-
47
- The SDK reads configuration from environment variables by default:
48
-
49
- - `MLOPS_API_KEY`: API key (required)
50
- - `MLOPS_DOMAIN`: API domain, e.g. `localhost:8090` or `https://example.com`
51
- - `MLOPS_API_PATH`: API path prefix (default: `/api/v1`)
52
- - `MLOPS_DEBUG`: `true|false` (default: `false`)
53
-
54
- Or configure in code:
55
-
56
- ```python
57
- from mlops import ConnectionConfig, Task
58
-
59
- config = ConnectionConfig(
60
- api_key="xck_...",
61
- domain="https://example.com",
62
- api_path="/api/v1",
63
- debug=False,
64
- )
65
- task = Task(config=config)
66
- ```
67
-
68
- ### Usage
69
-
70
- ```python
71
- from mlops import Task
72
- from mlops.api.client.models.task_status import TaskStatus
73
- from pathlib import Path
74
-
75
- # Initialize Task client (uses environment variables by default)
76
- task = Task()
77
-
78
- # Submit a task with gpu type
79
- try:
80
- result = task.submit(
81
- name="gpu-task-from-sdk",
82
- image="/mnt/minio/images/01ai-registry.cn-shanghai.cr.aliyuncs.com+public+llamafactory+0.9.3.sqsh",
83
- entry_command="llamafactory-cli train /workspace/config/test_lora.yaml",
84
- resources={
85
- "partition": "gpu",
86
- "nodes": 2,
87
- "ntasks": 2,
88
- "cpus_per_task": 2,
89
- "memory": "4G",
90
- "time": "01:00:00",
91
- "gres": "gpu:nvidia_a10:1",
92
- "qos": "qos_xcloud",
93
- },
94
- cluster_name="slurm-cn",
95
- team_id=1,
96
- file_path="your file path", # optional, support for .zip, .tar.gz, .tgz
97
- )
98
-
99
- if result is not None:
100
- print("==== gpu task submitted successfully ====")
101
- job_id = result.job_id
102
- else:
103
- print("==== gpu task submitted failed ====")
104
- except Exception as e:
105
- print("==== gpu task submitted failed error ====", e)
106
-
107
- # Submit a task with cpu type
108
- try:
109
- entry_content = Path("entry.sh").read_text(encoding="utf-8")
110
- result = task.submit(
111
- name="cpu-task-from-sdk",
112
- image="docker://01ai-registry.cn-shanghai.cr.aliyuncs.com/01-ai/xcs/v2/alpine:3.23.0",
113
- entry_command=entry_content,
114
- resources={
115
- "partition": "cpu",
116
- "nodes": 1,
117
- "ntasks": 1,
118
- "cpus_per_task": 1,
119
- "memory": "1G",
120
- "time": "01:00:00",
121
- "qos": "qos_xcloud",
122
- },
123
- cluster_name="slurm-cn",
124
- team_id=1,
125
- )
126
-
127
- if result is not None:
128
- print("==== cpu task submitted successfully ====")
129
- job_id = result.job_id
130
- else:
131
- print("==== cpu task submitted failed ====")
132
- except Exception as e:
133
- print("==== cpu task submitted failed error ====", e)
134
-
135
- # List tasks with filters
136
- try:
137
- completed_tasks = task.list(
138
- status=TaskStatus.COMPLETED,
139
- cluster_name="slurm-cn",
140
- page=1,
141
- page_size=20
142
- )
143
-
144
- # Get task details
145
- if completed_tasks is not None and len(completed_tasks.tasks) > 0:
146
- print("==== completed_tasks number ====", len(completed_tasks.tasks))
147
- task_info = task.get(task_id=completed_tasks.tasks[0].job_id, cluster_name="slurm-cn")
148
- print("==== task_info ====", task_info)
149
- else:
150
- print("==== no completed tasks to get details ====")
151
- except Exception as e:
152
- print("==== get task details failed error ====", e)
153
-
154
-
155
- # Cancel a running task
156
- try:
157
- running_tasks = task.list(
158
- status=TaskStatus.RUNNING,
159
- cluster_name="slurm-cn",
160
- page=1,
161
- page_size=20
162
- )
163
- if running_tasks is not None and len(running_tasks.tasks) > 0:
164
- print("==== running_tasks number ====", len(running_tasks.tasks))
165
- # Cancel a task
166
- result = task.cancel(task_id=running_tasks.tasks[0].job_id, cluster_name="slurm-cn")
167
- print("==== task cancelled ====", running_tasks.tasks[0].job_id, result)
168
- else:
169
- print("==== no running tasks to cancel ====")
170
- except Exception as e:
171
- print("==== cancel running task failed error ====", e)
172
-
173
-
174
- # Delete a task
175
- try:
176
- completed_tasks = task.list(
177
- status=TaskStatus.COMPLETED,
178
- cluster_name="slurm-cn",
179
- page=1,
180
- page_size=20
181
- )
182
- if completed_tasks is not None and len(completed_tasks.tasks) > 0:
183
- print("==== completed_tasks number ====", len(completed_tasks.tasks))
184
- # Delete a task
185
- result = task.delete(task_id=completed_tasks.tasks[0].job_id, cluster_name="slurm-cn")
186
- print("==== task deleted ====", completed_tasks.tasks[0].job_id, result)
187
- else:
188
- print("==== no completed tasks to delete ====")
189
- except Exception as e:
190
- print("==== delete completed task failed error ====", e)
191
- ```
192
-
193
- **Task Management Methods:**
194
-
195
- - `submit()` - Submit a new task with container image and entry command
196
- - `get()` - Get task details by task ID
197
- - `list()` - List tasks with optional filters (status, cluster_name, team_id, user_id)
198
- - `cancel()` - Cancel a running task
199
- - `delete()` - Delete a task record
200
-
201
- **Task Status Values:**
202
-
203
- ```python
204
- from mlops.api.client.models.task_status import TaskStatus
205
-
206
- TaskStatus.PENDING # Task is pending
207
- TaskStatus.QUEUED # Task is queued
208
- TaskStatus.RUNNING # Task is running
209
- TaskStatus.COMPLETED # Task completed successfully
210
- TaskStatus.SUCCEEDED # Task succeeded
211
- TaskStatus.FAILED # Task failed
212
- TaskStatus.CANCELLED # Task was cancelled
213
- TaskStatus.CREATED # Task was created
214
- ```
215
-
216
- **Error Handling:**
217
-
218
- ```python
219
- from mlops.exceptions import (
220
- APIException,
221
- AuthenticationException,
222
- NotFoundException,
223
- RateLimitException,
224
- TimeoutException,
225
- InvalidArgumentException,
226
- NotEnoughSpaceException
227
- )
228
-
229
- try:
230
- result = task.submit(name="test", cluster_name="slurm-cn", command="echo hello")
231
- except AuthenticationException as e:
232
- print(f"Authentication failed: {e}")
233
- except NotFoundException as e:
234
- print(f"Resource not found: {e}")
235
- except APIException as e:
236
- print(f"API error: {e}")
237
- ```
238
-
239
- > [!TIP] Error Handling
240
- > SDKs automatically handle common errors and retry failed requests. Check SDK documentation for error handling best practices.
241
-
242
- ## Features
243
-
244
- - Type-safe API clients
245
- - Automatic authentication
246
- - Error handling
247
- - Request retry logic
248
- - Response validation
249
-
250
- ## Resources
251
-
252
- - [Python SDK Documentation](https://github.com/xcloud-service/xservice/tree/main/client/python-sdk)
253
- - [API Reference](https://xcloud-service.com/docs/api)
254
-