mlops-python-sdk 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. mlops/__init__.py +46 -0
  2. mlops/api/client/__init__.py +8 -0
  3. mlops/api/client/api/__init__.py +1 -0
  4. mlops/api/client/api/tasks/__init__.py +1 -0
  5. mlops/api/client/api/tasks/cancel_task.py +196 -0
  6. mlops/api/client/api/tasks/delete_task.py +204 -0
  7. mlops/api/client/api/tasks/get_task.py +196 -0
  8. mlops/api/client/api/tasks/list_tasks.py +255 -0
  9. mlops/api/client/api/tasks/submit_task.py +188 -0
  10. mlops/api/client/client.py +268 -0
  11. mlops/api/client/errors.py +16 -0
  12. mlops/api/client/models/__init__.py +33 -0
  13. mlops/api/client/models/error_response.py +68 -0
  14. mlops/api/client/models/message_response.py +59 -0
  15. mlops/api/client/models/task.py +1629 -0
  16. mlops/api/client/models/task_alloc_tres_type_0.py +49 -0
  17. mlops/api/client/models/task_gres_detail_type_0_item.py +44 -0
  18. mlops/api/client/models/task_job_resources_type_0.py +49 -0
  19. mlops/api/client/models/task_list_response.py +102 -0
  20. mlops/api/client/models/task_resources_type_0.py +49 -0
  21. mlops/api/client/models/task_status.py +15 -0
  22. mlops/api/client/models/task_submit_request.py +640 -0
  23. mlops/api/client/models/task_submit_request_environment_type_0.py +49 -0
  24. mlops/api/client/models/task_submit_response.py +78 -0
  25. mlops/api/client/models/task_tres_type_0.py +49 -0
  26. mlops/api/client/models/task_tres_used_type_0.py +49 -0
  27. mlops/api/client/py.typed +1 -0
  28. mlops/api/client/types.py +54 -0
  29. mlops/connection_config.py +106 -0
  30. mlops/exceptions.py +82 -0
  31. mlops/task/__init__.py +10 -0
  32. mlops/task/client.py +146 -0
  33. mlops/task/task.py +464 -0
  34. mlops_python_sdk-0.0.1.dist-info/METADATA +416 -0
  35. mlops_python_sdk-0.0.1.dist-info/RECORD +36 -0
  36. mlops_python_sdk-0.0.1.dist-info/WHEEL +4 -0
mlops/task/task.py ADDED
@@ -0,0 +1,464 @@
1
+ """
2
+ High-level Task SDK interface for XClient.
3
+
4
+ This module provides a convenient interface for managing tasks through the XClient API.
5
+ """
6
+
7
+ import json
8
+ from http import HTTPStatus
9
+ from typing import Optional
10
+ from ..api.client.api.tasks import (
11
+ submit_task,
12
+ get_task,
13
+ list_tasks,
14
+ cancel_task,
15
+ delete_task,
16
+ )
17
+ from ..api.client.models.task import Task as TaskModel
18
+ from ..api.client.models.task_submit_request import TaskSubmitRequest
19
+ from ..api.client.models.task_submit_response import TaskSubmitResponse
20
+ from ..api.client.models.task_list_response import TaskListResponse
21
+ from ..api.client.models.task_status import TaskStatus
22
+ from ..api.client.models.error_response import ErrorResponse
23
+ from ..api.client.types import Response, UNSET, UNSET
24
+ from ..connection_config import ConnectionConfig
25
+ from ..exceptions import (
26
+ NotFoundException,
27
+ APIException,
28
+ )
29
+ from .client import TaskClient, handle_api_exception
30
+
31
+
32
+ class Task:
33
+ """
34
+ High-level interface for managing tasks.
35
+
36
+ Example:
37
+ ```python
38
+ from xclient import Task, ConnectionConfig
39
+
40
+ config = ConnectionConfig(api_key="your_api_key")
41
+ task = Task(config=config)
42
+
43
+ # Submit a task with script
44
+ result = task.submit(
45
+ name="my-task",
46
+ cluster_id=1,
47
+ script="#!/bin/bash\\necho 'Hello World'"
48
+ )
49
+
50
+ # Or submit with command
51
+ result = task.submit(
52
+ name="my-task",
53
+ cluster_id=1,
54
+ command="echo 'Hello World'"
55
+ )
56
+
57
+ # Get task details
58
+ task_info = task.get(task_id=result.job_id, cluster_id=1)
59
+
60
+ # List tasks
61
+ tasks = task.list(status=TaskStatus.RUNNING)
62
+
63
+ # Cancel a task
64
+ task.cancel(task_id=result.job_id, cluster_id=1)
65
+
66
+ # Delete a task
67
+ task.delete(task_id=result.job_id, cluster_id=1)
68
+ ```
69
+ """
70
+
71
+ def __init__(
72
+ self,
73
+ config: Optional["ConnectionConfig"] = None,
74
+ api_key: Optional[str] = None,
75
+ access_token: Optional[str] = None,
76
+ domain: Optional[str] = None,
77
+ debug: Optional[bool] = None,
78
+ request_timeout: Optional[float] = None,
79
+ ):
80
+ """
81
+ Initialize the Task client.
82
+
83
+ Args:
84
+ config: ConnectionConfig instance. If not provided, a new one will be created.
85
+ api_key: API key for authentication. Overrides config.api_key.
86
+ access_token: Access token for authentication. Overrides config.access_token.
87
+ domain: API domain. Overrides config.domain.
88
+ debug: Enable debug mode. Overrides config.debug.
89
+ request_timeout: Request timeout in seconds. Overrides config.request_timeout.
90
+ """
91
+
92
+ if config is None:
93
+ config = ConnectionConfig()
94
+
95
+ # Override config values if provided
96
+ if api_key is not None:
97
+ config.api_key = api_key
98
+ if access_token is not None:
99
+ config.access_token = access_token
100
+ if domain is not None:
101
+ config.domain = domain
102
+ if debug is not None:
103
+ config.debug = debug
104
+ if request_timeout is not None:
105
+ config.request_timeout = request_timeout
106
+
107
+ self._config = config
108
+ self._client = TaskClient(config=config)
109
+
110
+ def submit(
111
+ self,
112
+ name: str,
113
+ cluster_id: Optional[int] = None,
114
+ script: Optional[str] = None,
115
+ command: Optional[str] = None,
116
+ resources: Optional[dict] = None,
117
+ team_id: Optional[int] = None,
118
+ ) -> TaskSubmitResponse:
119
+ """
120
+ Submit a new task.
121
+
122
+ Args:
123
+ name: Task name
124
+ cluster_id: Cluster ID to submit the task to
125
+ script: Task script content (optional, but at least one of script or command is required)
126
+ command: Command to execute (optional, but at least one of script or command is required)
127
+ resources: Resource requirements dict (optional)
128
+ team_id: Team ID (optional)
129
+
130
+ Returns:
131
+ TaskSubmitResponse containing the submitted task information
132
+
133
+ Raises:
134
+ APIException: If the API returns an error
135
+ AuthenticationException: If authentication fails
136
+ """
137
+ # Validate required fields
138
+ if cluster_id is None:
139
+ raise APIException("cluster_id is required")
140
+
141
+ # At least one of script or command must be provided
142
+ if not script and not command:
143
+ raise APIException("At least one of 'script' or 'command' must be provided")
144
+
145
+ # Map resources dict to individual fields
146
+ # resources dict can contain: cpu, cpus_per_task, memory, nodes, gres, time, partition, etc.
147
+ request_kwargs = {
148
+ "name": name,
149
+ "cluster_id": cluster_id,
150
+ }
151
+
152
+ # Handle script and command (at least one is required)
153
+ # script is Union[Unset, str], so we need to set it or leave as UNSET
154
+ if script:
155
+ request_kwargs["script"] = script
156
+ # command is Union[None, Unset, str], so we can set it or leave as UNSET
157
+ if command:
158
+ request_kwargs["command"] = command
159
+
160
+ # team_id is Union[None, Unset, int]
161
+ if team_id is not None:
162
+ request_kwargs["team_id"] = team_id
163
+
164
+ # Map resources dict to TaskSubmitRequest fields
165
+ if resources:
166
+ if "cpu" in resources or "cpus_per_task" in resources:
167
+ request_kwargs["cpus_per_task"] = resources.get("cpus_per_task") or resources.get("cpu")
168
+ if "memory" in resources:
169
+ request_kwargs["memory"] = resources.get("memory")
170
+ if "nodes" in resources:
171
+ request_kwargs["nodes"] = resources.get("nodes")
172
+ if "gres" in resources:
173
+ request_kwargs["gres"] = resources.get("gres")
174
+ if "time" in resources:
175
+ request_kwargs["time"] = resources.get("time")
176
+ if "partition" in resources:
177
+ request_kwargs["partition"] = resources.get("partition")
178
+ if "tres" in resources:
179
+ request_kwargs["tres"] = resources.get("tres")
180
+
181
+ request = TaskSubmitRequest(**request_kwargs)
182
+
183
+ # Use sync_detailed to get full response information
184
+ response_obj = submit_task.sync_detailed(client=self._client, body=request)
185
+ response = response_obj.parsed
186
+
187
+ if isinstance(response, ErrorResponse):
188
+ # Check status code to determine exception type
189
+ status_code = response.code if response.code != UNSET and response.code != 0 else response_obj.status_code.value
190
+
191
+ # Extract error message from ErrorResponse
192
+ error_msg = "Unknown error"
193
+ if response.error and response.error != UNSET:
194
+ error_msg = response.error
195
+ elif response_obj.content:
196
+ try:
197
+ error_data = json.loads(response_obj.content.decode())
198
+ error_msg = error_data.get("error", "Unknown error")
199
+ except (json.JSONDecodeError, UnicodeDecodeError):
200
+ error_msg = response_obj.content.decode(errors="replace")
201
+
202
+ # Raise appropriate exception based on status code
203
+ if status_code == 404:
204
+ raise NotFoundException(error_msg)
205
+
206
+ # Use handle_api_exception which returns an exception object
207
+ exception = handle_api_exception(
208
+ Response(
209
+ status_code=HTTPStatus(status_code),
210
+ content=response_obj.content,
211
+ headers=response_obj.headers,
212
+ parsed=None,
213
+ )
214
+ )
215
+ raise exception
216
+
217
+ if response is None:
218
+ # If response is None, try to extract error from raw response
219
+ error_msg = "No response from server"
220
+ if response_obj.content:
221
+ try:
222
+ error_data = json.loads(response_obj.content.decode())
223
+ error_msg = error_data.get("error", f"HTTP {response_obj.status_code.value}: {response_obj.content.decode()}")
224
+ except (json.JSONDecodeError, UnicodeDecodeError):
225
+ error_msg = f"HTTP {response_obj.status_code.value}: {response_obj.content.decode(errors='replace')}"
226
+ raise APIException(f"Failed to submit task: {error_msg}")
227
+
228
+ return response
229
+
230
+ def get(
231
+ self,
232
+ task_id: int,
233
+ cluster_id: int,
234
+ ) -> TaskModel:
235
+ """
236
+ Get task details by task ID.
237
+
238
+ Args:
239
+ task_id: Task ID
240
+ cluster_id: Cluster ID
241
+
242
+ Returns:
243
+ Task model with task details
244
+
245
+ Raises:
246
+ NotFoundException: If the task is not found
247
+ APIException: If the API returns an error
248
+ """
249
+ # Use sync_detailed to get full response information
250
+ response_obj = get_task.sync_detailed(
251
+ id=task_id,
252
+ client=self._client,
253
+ cluster_id=cluster_id,
254
+ )
255
+ response = response_obj.parsed
256
+
257
+ if isinstance(response, ErrorResponse):
258
+ # Extract error message from ErrorResponse
259
+ error_msg = f"Task {task_id} not found"
260
+ if response.error and response.error != UNSET:
261
+ error_msg = response.error
262
+ elif response_obj.content:
263
+ try:
264
+ error_data = json.loads(response_obj.content.decode())
265
+ error_msg = error_data.get("error", error_msg)
266
+ except (json.JSONDecodeError, UnicodeDecodeError):
267
+ error_msg = response_obj.content.decode(errors="replace")
268
+
269
+ # Check status code to determine exception type
270
+ status_code = response.code if response.code != UNSET and response.code != 0 else response_obj.status_code.value
271
+ if status_code == 404:
272
+ raise NotFoundException(error_msg)
273
+
274
+ # Use handle_api_exception which returns an exception object
275
+ exception = handle_api_exception(
276
+ Response(
277
+ status_code=HTTPStatus(status_code),
278
+ content=response_obj.content,
279
+ headers=response_obj.headers,
280
+ parsed=None,
281
+ )
282
+ )
283
+ raise exception
284
+
285
+ if response is None:
286
+ # If response is None, try to extract error from raw response
287
+ error_msg = f"Task {task_id} not found"
288
+ if response_obj.content:
289
+ try:
290
+ error_data = json.loads(response_obj.content.decode())
291
+ error_msg = error_data.get("error", f"HTTP {response_obj.status_code.value}: {response_obj.content.decode()}")
292
+ except (json.JSONDecodeError, UnicodeDecodeError):
293
+ error_msg = f"HTTP {response_obj.status_code.value}: {response_obj.content.decode(errors='replace')}"
294
+ raise NotFoundException(error_msg)
295
+
296
+ return response
297
+
298
+ def list(
299
+ self,
300
+ page: int = 1,
301
+ page_size: int = 20,
302
+ status: Optional[TaskStatus] = None,
303
+ user_id: Optional[int] = None,
304
+ team_id: Optional[int] = None,
305
+ cluster_id: Optional[int] = None,
306
+ ) -> TaskListResponse:
307
+ """
308
+ List tasks with optional filtering.
309
+
310
+ Args:
311
+ page: Page number (default: 1)
312
+ page_size: Number of items per page (default: 20)
313
+ status: Filter by task status (optional)
314
+ user_id: Filter by user ID (optional)
315
+ team_id: Filter by team ID (optional)
316
+ cluster_id: Filter by cluster ID (optional)
317
+
318
+ Returns:
319
+ TaskListResponse containing the list of tasks
320
+
321
+ Raises:
322
+ APIException: If the API returns an error
323
+ """
324
+ response = list_tasks.sync(
325
+ client=self._client,
326
+ page=page,
327
+ page_size=page_size,
328
+ status=status if status is not None else UNSET,
329
+ user_id=user_id if user_id is not None else UNSET,
330
+ team_id=team_id if team_id is not None else UNSET,
331
+ cluster_id=cluster_id if cluster_id is not None else UNSET,
332
+ )
333
+
334
+ if isinstance(response, ErrorResponse):
335
+ raise handle_api_exception(
336
+ Response(
337
+ status_code=HTTPStatus(response.code if response.code != 0 else 400),
338
+ content=json.dumps({"error": response.error}).encode() if response.error else b"",
339
+ headers={},
340
+ parsed=None,
341
+ )
342
+ )
343
+
344
+ if response is None:
345
+ raise APIException("Failed to list tasks: No response from server")
346
+
347
+ return response
348
+
349
+ def cancel(
350
+ self,
351
+ task_id: int,
352
+ cluster_id: int,
353
+ ) -> bool:
354
+ """
355
+ Cancel a task.
356
+
357
+ Args:
358
+ task_id: Task ID to cancel
359
+ cluster_id: Cluster ID where the task is running
360
+
361
+ Returns:
362
+ True if the task was cancelled successfully
363
+
364
+ Raises:
365
+ NotFoundException: If the task is not found
366
+ APIException: If the API returns an error
367
+ """
368
+ # Use sync_detailed to get full response information
369
+ response_obj = cancel_task.sync_detailed(
370
+ id=task_id,
371
+ client=self._client,
372
+ cluster_id=cluster_id,
373
+ )
374
+ response = response_obj.parsed
375
+
376
+ if isinstance(response, ErrorResponse):
377
+ # Extract error message from ErrorResponse
378
+ error_msg = f"Task {task_id} not found"
379
+ if response.error and response.error != UNSET:
380
+ error_msg = response.error
381
+ elif response_obj.content:
382
+ try:
383
+ error_data = json.loads(response_obj.content.decode())
384
+ error_msg = error_data.get("error", error_msg)
385
+ except (json.JSONDecodeError, UnicodeDecodeError):
386
+ error_msg = response_obj.content.decode(errors="replace")
387
+
388
+ # Check status code to determine exception type
389
+ status_code = response.code if response.code != UNSET and response.code != 0 else response_obj.status_code.value
390
+ if status_code == 404:
391
+ raise NotFoundException(error_msg)
392
+
393
+ # Use handle_api_exception which returns an exception object
394
+ exception = handle_api_exception(
395
+ Response(
396
+ status_code=HTTPStatus(status_code),
397
+ content=response_obj.content,
398
+ headers=response_obj.headers,
399
+ parsed=None,
400
+ )
401
+ )
402
+ raise exception
403
+
404
+ return response is not None
405
+
406
+ def delete(
407
+ self,
408
+ task_id: int,
409
+ cluster_id: int,
410
+ ) -> bool:
411
+ """
412
+ Delete a task.
413
+
414
+ Args:
415
+ task_id: Task ID to delete
416
+ cluster_id: Cluster ID where the task is running
417
+
418
+ Returns:
419
+ True if the task was deleted successfully
420
+
421
+ Raises:
422
+ NotFoundException: If the task is not found
423
+ APIException: If the API returns an error
424
+ """
425
+ # Use sync_detailed to get full response information
426
+ response_obj = delete_task.sync_detailed(
427
+ id=task_id,
428
+ client=self._client,
429
+ cluster_id=cluster_id,
430
+ )
431
+ response = response_obj.parsed
432
+
433
+ if isinstance(response, ErrorResponse):
434
+ # Extract error message from ErrorResponse
435
+ error_msg = f"Task {task_id} not found"
436
+ if response.error and response.error != UNSET:
437
+ error_msg = response.error
438
+ elif response_obj.content:
439
+ try:
440
+ error_data = json.loads(response_obj.content.decode())
441
+ error_msg = error_data.get("error", error_msg)
442
+ except (json.JSONDecodeError, UnicodeDecodeError):
443
+ error_msg = response_obj.content.decode(errors="replace")
444
+
445
+ # Check status code to determine exception type
446
+ status_code = response.code if response.code != UNSET and response.code != 0 else response_obj.status_code.value
447
+ if status_code == 404:
448
+ raise NotFoundException(error_msg)
449
+
450
+ # Use handle_api_exception which returns an exception object
451
+ exception = handle_api_exception(
452
+ Response(
453
+ status_code=HTTPStatus(response.code if response.code != 0 else 400),
454
+ content=json.dumps({"error": response.error}).encode() if response.error else b"",
455
+ headers={},
456
+ parsed=None,
457
+ )
458
+ )
459
+ raise exception
460
+
461
+ if response is None:
462
+ raise APIException("Failed to delete task: No response from server")
463
+
464
+ return response is not None