mlops-python-sdk 0.0.1__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mlops/api/client/models/__init__.py +6 -0
- mlops/api/client/models/job_spec.py +273 -0
- mlops/api/client/models/job_spec_env.py +44 -0
- mlops/api/client/models/job_spec_master_strategy.py +8 -0
- mlops/api/client/models/task_submit_request.py +18 -0
- mlops/connection_config.py +5 -5
- mlops/task/task.py +34 -6
- {mlops_python_sdk-0.0.1.dist-info → mlops_python_sdk-1.0.0.dist-info}/METADATA +19 -19
- {mlops_python_sdk-0.0.1.dist-info → mlops_python_sdk-1.0.0.dist-info}/RECORD +10 -7
- {mlops_python_sdk-0.0.1.dist-info → mlops_python_sdk-1.0.0.dist-info}/WHEEL +0 -0
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
"""Contains all the data models used in inputs/outputs"""
|
|
2
2
|
|
|
3
3
|
from .error_response import ErrorResponse
|
|
4
|
+
from .job_spec import JobSpec
|
|
5
|
+
from .job_spec_env import JobSpecEnv
|
|
6
|
+
from .job_spec_master_strategy import JobSpecMasterStrategy
|
|
4
7
|
from .message_response import MessageResponse
|
|
5
8
|
from .task import Task
|
|
6
9
|
from .task_alloc_tres_type_0 import TaskAllocTresType0
|
|
@@ -17,6 +20,9 @@ from .task_tres_used_type_0 import TaskTresUsedType0
|
|
|
17
20
|
|
|
18
21
|
__all__ = (
|
|
19
22
|
"ErrorResponse",
|
|
23
|
+
"JobSpec",
|
|
24
|
+
"JobSpecEnv",
|
|
25
|
+
"JobSpecMasterStrategy",
|
|
20
26
|
"MessageResponse",
|
|
21
27
|
"Task",
|
|
22
28
|
"TaskAllocTresType0",
|
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
from collections.abc import Mapping
|
|
2
|
+
from typing import TYPE_CHECKING, Any, TypeVar, Union, cast
|
|
3
|
+
|
|
4
|
+
from attrs import define as _attrs_define
|
|
5
|
+
from attrs import field as _attrs_field
|
|
6
|
+
|
|
7
|
+
from ..models.job_spec_master_strategy import JobSpecMasterStrategy
|
|
8
|
+
from ..types import UNSET, Unset
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from ..models.job_spec_env import JobSpecEnv
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
T = TypeVar("T", bound="JobSpec")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@_attrs_define
|
|
18
|
+
class JobSpec:
|
|
19
|
+
"""Domain-specific job specification (rendered into slurm script)
|
|
20
|
+
|
|
21
|
+
Attributes:
|
|
22
|
+
artifact_dir (Union[Unset, str]): Artifacts directory
|
|
23
|
+
cpus_per_task (Union[Unset, int]): CPUs per task
|
|
24
|
+
env (Union[Unset, JobSpecEnv]): Environment variables
|
|
25
|
+
gpus_per_node (Union[Unset, int]): GPUs per node
|
|
26
|
+
log_dir (Union[Unset, str]): Logs directory
|
|
27
|
+
master_strategy (Union[Unset, JobSpecMasterStrategy]): Strategy for master node selection Default:
|
|
28
|
+
JobSpecMasterStrategy.FIRST_NODE.
|
|
29
|
+
mem (Union[Unset, str]): Memory requirement (e.g., "8G")
|
|
30
|
+
mounts (Union[Unset, list[str]]): Container mounts (e.g., ["/path1:/path1"])
|
|
31
|
+
nodes (Union[Unset, int]): Number of nodes
|
|
32
|
+
num_tool_workers (Union[Unset, int]): Number of tool workers
|
|
33
|
+
num_train_workers (Union[Unset, int]): Number of training workers
|
|
34
|
+
partition (Union[Unset, str]): Partition name
|
|
35
|
+
proxy_image (Union[Unset, str]): Proxy container image
|
|
36
|
+
qos (Union[Unset, str]): Quality of Service
|
|
37
|
+
run_dir (Union[Unset, str]): Run directory
|
|
38
|
+
time (Union[Unset, str]): Time limit
|
|
39
|
+
tool_image (Union[Unset, str]): Tooling container image
|
|
40
|
+
tool_master_cmd (Union[Unset, str]): Tooling master command
|
|
41
|
+
tool_worker_cmd (Union[Unset, str]): Tooling worker command
|
|
42
|
+
train_image (Union[Unset, str]): Training container image
|
|
43
|
+
train_master_cmd (Union[Unset, str]): Training master command
|
|
44
|
+
train_worker_cmd (Union[Unset, str]): Training worker command
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
artifact_dir: Union[Unset, str] = UNSET
|
|
48
|
+
cpus_per_task: Union[Unset, int] = UNSET
|
|
49
|
+
env: Union[Unset, "JobSpecEnv"] = UNSET
|
|
50
|
+
gpus_per_node: Union[Unset, int] = UNSET
|
|
51
|
+
log_dir: Union[Unset, str] = UNSET
|
|
52
|
+
master_strategy: Union[Unset, JobSpecMasterStrategy] = JobSpecMasterStrategy.FIRST_NODE
|
|
53
|
+
mem: Union[Unset, str] = UNSET
|
|
54
|
+
mounts: Union[Unset, list[str]] = UNSET
|
|
55
|
+
nodes: Union[Unset, int] = UNSET
|
|
56
|
+
num_tool_workers: Union[Unset, int] = UNSET
|
|
57
|
+
num_train_workers: Union[Unset, int] = UNSET
|
|
58
|
+
partition: Union[Unset, str] = UNSET
|
|
59
|
+
proxy_image: Union[Unset, str] = UNSET
|
|
60
|
+
qos: Union[Unset, str] = UNSET
|
|
61
|
+
run_dir: Union[Unset, str] = UNSET
|
|
62
|
+
time: Union[Unset, str] = UNSET
|
|
63
|
+
tool_image: Union[Unset, str] = UNSET
|
|
64
|
+
tool_master_cmd: Union[Unset, str] = UNSET
|
|
65
|
+
tool_worker_cmd: Union[Unset, str] = UNSET
|
|
66
|
+
train_image: Union[Unset, str] = UNSET
|
|
67
|
+
train_master_cmd: Union[Unset, str] = UNSET
|
|
68
|
+
train_worker_cmd: Union[Unset, str] = UNSET
|
|
69
|
+
additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)
|
|
70
|
+
|
|
71
|
+
def to_dict(self) -> dict[str, Any]:
|
|
72
|
+
artifact_dir = self.artifact_dir
|
|
73
|
+
|
|
74
|
+
cpus_per_task = self.cpus_per_task
|
|
75
|
+
|
|
76
|
+
env: Union[Unset, dict[str, Any]] = UNSET
|
|
77
|
+
if not isinstance(self.env, Unset):
|
|
78
|
+
env = self.env.to_dict()
|
|
79
|
+
|
|
80
|
+
gpus_per_node = self.gpus_per_node
|
|
81
|
+
|
|
82
|
+
log_dir = self.log_dir
|
|
83
|
+
|
|
84
|
+
master_strategy: Union[Unset, str] = UNSET
|
|
85
|
+
if not isinstance(self.master_strategy, Unset):
|
|
86
|
+
master_strategy = self.master_strategy.value
|
|
87
|
+
|
|
88
|
+
mem = self.mem
|
|
89
|
+
|
|
90
|
+
mounts: Union[Unset, list[str]] = UNSET
|
|
91
|
+
if not isinstance(self.mounts, Unset):
|
|
92
|
+
mounts = self.mounts
|
|
93
|
+
|
|
94
|
+
nodes = self.nodes
|
|
95
|
+
|
|
96
|
+
num_tool_workers = self.num_tool_workers
|
|
97
|
+
|
|
98
|
+
num_train_workers = self.num_train_workers
|
|
99
|
+
|
|
100
|
+
partition = self.partition
|
|
101
|
+
|
|
102
|
+
proxy_image = self.proxy_image
|
|
103
|
+
|
|
104
|
+
qos = self.qos
|
|
105
|
+
|
|
106
|
+
run_dir = self.run_dir
|
|
107
|
+
|
|
108
|
+
time = self.time
|
|
109
|
+
|
|
110
|
+
tool_image = self.tool_image
|
|
111
|
+
|
|
112
|
+
tool_master_cmd = self.tool_master_cmd
|
|
113
|
+
|
|
114
|
+
tool_worker_cmd = self.tool_worker_cmd
|
|
115
|
+
|
|
116
|
+
train_image = self.train_image
|
|
117
|
+
|
|
118
|
+
train_master_cmd = self.train_master_cmd
|
|
119
|
+
|
|
120
|
+
train_worker_cmd = self.train_worker_cmd
|
|
121
|
+
|
|
122
|
+
field_dict: dict[str, Any] = {}
|
|
123
|
+
field_dict.update(self.additional_properties)
|
|
124
|
+
field_dict.update({})
|
|
125
|
+
if artifact_dir is not UNSET:
|
|
126
|
+
field_dict["artifact_dir"] = artifact_dir
|
|
127
|
+
if cpus_per_task is not UNSET:
|
|
128
|
+
field_dict["cpus_per_task"] = cpus_per_task
|
|
129
|
+
if env is not UNSET:
|
|
130
|
+
field_dict["env"] = env
|
|
131
|
+
if gpus_per_node is not UNSET:
|
|
132
|
+
field_dict["gpus_per_node"] = gpus_per_node
|
|
133
|
+
if log_dir is not UNSET:
|
|
134
|
+
field_dict["log_dir"] = log_dir
|
|
135
|
+
if master_strategy is not UNSET:
|
|
136
|
+
field_dict["master_strategy"] = master_strategy
|
|
137
|
+
if mem is not UNSET:
|
|
138
|
+
field_dict["mem"] = mem
|
|
139
|
+
if mounts is not UNSET:
|
|
140
|
+
field_dict["mounts"] = mounts
|
|
141
|
+
if nodes is not UNSET:
|
|
142
|
+
field_dict["nodes"] = nodes
|
|
143
|
+
if num_tool_workers is not UNSET:
|
|
144
|
+
field_dict["num_tool_workers"] = num_tool_workers
|
|
145
|
+
if num_train_workers is not UNSET:
|
|
146
|
+
field_dict["num_train_workers"] = num_train_workers
|
|
147
|
+
if partition is not UNSET:
|
|
148
|
+
field_dict["partition"] = partition
|
|
149
|
+
if proxy_image is not UNSET:
|
|
150
|
+
field_dict["proxy_image"] = proxy_image
|
|
151
|
+
if qos is not UNSET:
|
|
152
|
+
field_dict["qos"] = qos
|
|
153
|
+
if run_dir is not UNSET:
|
|
154
|
+
field_dict["run_dir"] = run_dir
|
|
155
|
+
if time is not UNSET:
|
|
156
|
+
field_dict["time"] = time
|
|
157
|
+
if tool_image is not UNSET:
|
|
158
|
+
field_dict["tool_image"] = tool_image
|
|
159
|
+
if tool_master_cmd is not UNSET:
|
|
160
|
+
field_dict["tool_master_cmd"] = tool_master_cmd
|
|
161
|
+
if tool_worker_cmd is not UNSET:
|
|
162
|
+
field_dict["tool_worker_cmd"] = tool_worker_cmd
|
|
163
|
+
if train_image is not UNSET:
|
|
164
|
+
field_dict["train_image"] = train_image
|
|
165
|
+
if train_master_cmd is not UNSET:
|
|
166
|
+
field_dict["train_master_cmd"] = train_master_cmd
|
|
167
|
+
if train_worker_cmd is not UNSET:
|
|
168
|
+
field_dict["train_worker_cmd"] = train_worker_cmd
|
|
169
|
+
|
|
170
|
+
return field_dict
|
|
171
|
+
|
|
172
|
+
@classmethod
|
|
173
|
+
def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
|
|
174
|
+
from ..models.job_spec_env import JobSpecEnv
|
|
175
|
+
|
|
176
|
+
d = dict(src_dict)
|
|
177
|
+
artifact_dir = d.pop("artifact_dir", UNSET)
|
|
178
|
+
|
|
179
|
+
cpus_per_task = d.pop("cpus_per_task", UNSET)
|
|
180
|
+
|
|
181
|
+
_env = d.pop("env", UNSET)
|
|
182
|
+
env: Union[Unset, JobSpecEnv]
|
|
183
|
+
if isinstance(_env, Unset):
|
|
184
|
+
env = UNSET
|
|
185
|
+
else:
|
|
186
|
+
env = JobSpecEnv.from_dict(_env)
|
|
187
|
+
|
|
188
|
+
gpus_per_node = d.pop("gpus_per_node", UNSET)
|
|
189
|
+
|
|
190
|
+
log_dir = d.pop("log_dir", UNSET)
|
|
191
|
+
|
|
192
|
+
_master_strategy = d.pop("master_strategy", UNSET)
|
|
193
|
+
master_strategy: Union[Unset, JobSpecMasterStrategy]
|
|
194
|
+
if isinstance(_master_strategy, Unset):
|
|
195
|
+
master_strategy = UNSET
|
|
196
|
+
else:
|
|
197
|
+
master_strategy = JobSpecMasterStrategy(_master_strategy)
|
|
198
|
+
|
|
199
|
+
mem = d.pop("mem", UNSET)
|
|
200
|
+
|
|
201
|
+
mounts = cast(list[str], d.pop("mounts", UNSET))
|
|
202
|
+
|
|
203
|
+
nodes = d.pop("nodes", UNSET)
|
|
204
|
+
|
|
205
|
+
num_tool_workers = d.pop("num_tool_workers", UNSET)
|
|
206
|
+
|
|
207
|
+
num_train_workers = d.pop("num_train_workers", UNSET)
|
|
208
|
+
|
|
209
|
+
partition = d.pop("partition", UNSET)
|
|
210
|
+
|
|
211
|
+
proxy_image = d.pop("proxy_image", UNSET)
|
|
212
|
+
|
|
213
|
+
qos = d.pop("qos", UNSET)
|
|
214
|
+
|
|
215
|
+
run_dir = d.pop("run_dir", UNSET)
|
|
216
|
+
|
|
217
|
+
time = d.pop("time", UNSET)
|
|
218
|
+
|
|
219
|
+
tool_image = d.pop("tool_image", UNSET)
|
|
220
|
+
|
|
221
|
+
tool_master_cmd = d.pop("tool_master_cmd", UNSET)
|
|
222
|
+
|
|
223
|
+
tool_worker_cmd = d.pop("tool_worker_cmd", UNSET)
|
|
224
|
+
|
|
225
|
+
train_image = d.pop("train_image", UNSET)
|
|
226
|
+
|
|
227
|
+
train_master_cmd = d.pop("train_master_cmd", UNSET)
|
|
228
|
+
|
|
229
|
+
train_worker_cmd = d.pop("train_worker_cmd", UNSET)
|
|
230
|
+
|
|
231
|
+
job_spec = cls(
|
|
232
|
+
artifact_dir=artifact_dir,
|
|
233
|
+
cpus_per_task=cpus_per_task,
|
|
234
|
+
env=env,
|
|
235
|
+
gpus_per_node=gpus_per_node,
|
|
236
|
+
log_dir=log_dir,
|
|
237
|
+
master_strategy=master_strategy,
|
|
238
|
+
mem=mem,
|
|
239
|
+
mounts=mounts,
|
|
240
|
+
nodes=nodes,
|
|
241
|
+
num_tool_workers=num_tool_workers,
|
|
242
|
+
num_train_workers=num_train_workers,
|
|
243
|
+
partition=partition,
|
|
244
|
+
proxy_image=proxy_image,
|
|
245
|
+
qos=qos,
|
|
246
|
+
run_dir=run_dir,
|
|
247
|
+
time=time,
|
|
248
|
+
tool_image=tool_image,
|
|
249
|
+
tool_master_cmd=tool_master_cmd,
|
|
250
|
+
tool_worker_cmd=tool_worker_cmd,
|
|
251
|
+
train_image=train_image,
|
|
252
|
+
train_master_cmd=train_master_cmd,
|
|
253
|
+
train_worker_cmd=train_worker_cmd,
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
job_spec.additional_properties = d
|
|
257
|
+
return job_spec
|
|
258
|
+
|
|
259
|
+
@property
|
|
260
|
+
def additional_keys(self) -> list[str]:
|
|
261
|
+
return list(self.additional_properties.keys())
|
|
262
|
+
|
|
263
|
+
def __getitem__(self, key: str) -> Any:
|
|
264
|
+
return self.additional_properties[key]
|
|
265
|
+
|
|
266
|
+
def __setitem__(self, key: str, value: Any) -> None:
|
|
267
|
+
self.additional_properties[key] = value
|
|
268
|
+
|
|
269
|
+
def __delitem__(self, key: str) -> None:
|
|
270
|
+
del self.additional_properties[key]
|
|
271
|
+
|
|
272
|
+
def __contains__(self, key: str) -> bool:
|
|
273
|
+
return key in self.additional_properties
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from collections.abc import Mapping
|
|
2
|
+
from typing import Any, TypeVar
|
|
3
|
+
|
|
4
|
+
from attrs import define as _attrs_define
|
|
5
|
+
from attrs import field as _attrs_field
|
|
6
|
+
|
|
7
|
+
T = TypeVar("T", bound="JobSpecEnv")
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@_attrs_define
|
|
11
|
+
class JobSpecEnv:
|
|
12
|
+
"""Environment variables"""
|
|
13
|
+
|
|
14
|
+
additional_properties: dict[str, str] = _attrs_field(init=False, factory=dict)
|
|
15
|
+
|
|
16
|
+
def to_dict(self) -> dict[str, Any]:
|
|
17
|
+
field_dict: dict[str, Any] = {}
|
|
18
|
+
field_dict.update(self.additional_properties)
|
|
19
|
+
|
|
20
|
+
return field_dict
|
|
21
|
+
|
|
22
|
+
@classmethod
|
|
23
|
+
def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
|
|
24
|
+
d = dict(src_dict)
|
|
25
|
+
job_spec_env = cls()
|
|
26
|
+
|
|
27
|
+
job_spec_env.additional_properties = d
|
|
28
|
+
return job_spec_env
|
|
29
|
+
|
|
30
|
+
@property
|
|
31
|
+
def additional_keys(self) -> list[str]:
|
|
32
|
+
return list(self.additional_properties.keys())
|
|
33
|
+
|
|
34
|
+
def __getitem__(self, key: str) -> str:
|
|
35
|
+
return self.additional_properties[key]
|
|
36
|
+
|
|
37
|
+
def __setitem__(self, key: str, value: str) -> None:
|
|
38
|
+
self.additional_properties[key] = value
|
|
39
|
+
|
|
40
|
+
def __delitem__(self, key: str) -> None:
|
|
41
|
+
del self.additional_properties[key]
|
|
42
|
+
|
|
43
|
+
def __contains__(self, key: str) -> bool:
|
|
44
|
+
return key in self.additional_properties
|
|
@@ -7,6 +7,7 @@ from attrs import field as _attrs_field
|
|
|
7
7
|
from ..types import UNSET, Unset
|
|
8
8
|
|
|
9
9
|
if TYPE_CHECKING:
|
|
10
|
+
from ..models.job_spec import JobSpec
|
|
10
11
|
from ..models.task_submit_request_environment_type_0 import TaskSubmitRequestEnvironmentType0
|
|
11
12
|
|
|
12
13
|
|
|
@@ -35,6 +36,7 @@ class TaskSubmitRequest:
|
|
|
35
36
|
export (Union[None, Unset, str]): Environment export Example: ALL.
|
|
36
37
|
gres (Union[None, Unset, str]): Generic resources (e.g., "gpu:1", "gpu:tesla:2") Example: gpu:1.
|
|
37
38
|
input_ (Union[None, Unset, str]): Standard input file
|
|
39
|
+
job_spec (Union[Unset, JobSpec]): Domain-specific job specification (rendered into slurm script)
|
|
38
40
|
mem_bind (Union[None, Unset, str]): Memory binding
|
|
39
41
|
memory (Union[None, Unset, str]): Memory requirement (e.g., "8G", "4096M") Example: 8G.
|
|
40
42
|
nice (Union[None, Unset, int]): Nice value
|
|
@@ -69,6 +71,7 @@ class TaskSubmitRequest:
|
|
|
69
71
|
export: Union[None, Unset, str] = UNSET
|
|
70
72
|
gres: Union[None, Unset, str] = UNSET
|
|
71
73
|
input_: Union[None, Unset, str] = UNSET
|
|
74
|
+
job_spec: Union[Unset, "JobSpec"] = UNSET
|
|
72
75
|
mem_bind: Union[None, Unset, str] = UNSET
|
|
73
76
|
memory: Union[None, Unset, str] = UNSET
|
|
74
77
|
nice: Union[None, Unset, int] = UNSET
|
|
@@ -178,6 +181,10 @@ class TaskSubmitRequest:
|
|
|
178
181
|
else:
|
|
179
182
|
input_ = self.input_
|
|
180
183
|
|
|
184
|
+
job_spec: Union[Unset, dict[str, Any]] = UNSET
|
|
185
|
+
if not isinstance(self.job_spec, Unset):
|
|
186
|
+
job_spec = self.job_spec.to_dict()
|
|
187
|
+
|
|
181
188
|
mem_bind: Union[None, Unset, str]
|
|
182
189
|
if isinstance(self.mem_bind, Unset):
|
|
183
190
|
mem_bind = UNSET
|
|
@@ -294,6 +301,8 @@ class TaskSubmitRequest:
|
|
|
294
301
|
field_dict["gres"] = gres
|
|
295
302
|
if input_ is not UNSET:
|
|
296
303
|
field_dict["input"] = input_
|
|
304
|
+
if job_spec is not UNSET:
|
|
305
|
+
field_dict["job_spec"] = job_spec
|
|
297
306
|
if mem_bind is not UNSET:
|
|
298
307
|
field_dict["mem_bind"] = mem_bind
|
|
299
308
|
if memory is not UNSET:
|
|
@@ -327,6 +336,7 @@ class TaskSubmitRequest:
|
|
|
327
336
|
|
|
328
337
|
@classmethod
|
|
329
338
|
def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
|
|
339
|
+
from ..models.job_spec import JobSpec
|
|
330
340
|
from ..models.task_submit_request_environment_type_0 import TaskSubmitRequestEnvironmentType0
|
|
331
341
|
|
|
332
342
|
d = dict(src_dict)
|
|
@@ -468,6 +478,13 @@ class TaskSubmitRequest:
|
|
|
468
478
|
|
|
469
479
|
input_ = _parse_input_(d.pop("input", UNSET))
|
|
470
480
|
|
|
481
|
+
_job_spec = d.pop("job_spec", UNSET)
|
|
482
|
+
job_spec: Union[Unset, JobSpec]
|
|
483
|
+
if isinstance(_job_spec, Unset):
|
|
484
|
+
job_spec = UNSET
|
|
485
|
+
else:
|
|
486
|
+
job_spec = JobSpec.from_dict(_job_spec)
|
|
487
|
+
|
|
471
488
|
def _parse_mem_bind(data: object) -> Union[None, Unset, str]:
|
|
472
489
|
if data is None:
|
|
473
490
|
return data
|
|
@@ -604,6 +621,7 @@ class TaskSubmitRequest:
|
|
|
604
621
|
export=export,
|
|
605
622
|
gres=gres,
|
|
606
623
|
input_=input_,
|
|
624
|
+
job_spec=job_spec,
|
|
607
625
|
mem_bind=mem_bind,
|
|
608
626
|
memory=memory,
|
|
609
627
|
nice=nice,
|
mlops/connection_config.py
CHANGED
|
@@ -19,23 +19,23 @@ class ConnectionConfig:
|
|
|
19
19
|
|
|
20
20
|
@staticmethod
|
|
21
21
|
def _domain():
|
|
22
|
-
return os.getenv(
|
|
22
|
+
return os.getenv('MLOPS_DOMAIN', "localhost:8090")
|
|
23
23
|
|
|
24
24
|
@staticmethod
|
|
25
25
|
def _debug():
|
|
26
|
-
return os.getenv(
|
|
26
|
+
return os.getenv('MLOPS_DEBUG', "false").lower() == "true"
|
|
27
27
|
|
|
28
28
|
@staticmethod
|
|
29
29
|
def _api_key():
|
|
30
|
-
return os.getenv(
|
|
30
|
+
return os.getenv('MLOPS_API_KEY')
|
|
31
31
|
|
|
32
32
|
@staticmethod
|
|
33
33
|
def _access_token():
|
|
34
|
-
return os.getenv(
|
|
34
|
+
return os.getenv('MLOPS_ACCESS_TOKEN')
|
|
35
35
|
|
|
36
36
|
@staticmethod
|
|
37
37
|
def _api_path():
|
|
38
|
-
return os.getenv(
|
|
38
|
+
return os.getenv('MLOPS_API_PATH', DEFAULT_API_PATH)
|
|
39
39
|
|
|
40
40
|
def __init__(
|
|
41
41
|
self,
|
mlops/task/task.py
CHANGED
|
@@ -321,7 +321,8 @@ class Task:
|
|
|
321
321
|
Raises:
|
|
322
322
|
APIException: If the API returns an error
|
|
323
323
|
"""
|
|
324
|
-
response
|
|
324
|
+
# Use sync_detailed to get full response information
|
|
325
|
+
response_obj = list_tasks.sync_detailed(
|
|
325
326
|
client=self._client,
|
|
326
327
|
page=page,
|
|
327
328
|
page_size=page_size,
|
|
@@ -330,19 +331,46 @@ class Task:
|
|
|
330
331
|
team_id=team_id if team_id is not None else UNSET,
|
|
331
332
|
cluster_id=cluster_id if cluster_id is not None else UNSET,
|
|
332
333
|
)
|
|
334
|
+
response = response_obj.parsed
|
|
333
335
|
|
|
334
336
|
if isinstance(response, ErrorResponse):
|
|
335
|
-
|
|
337
|
+
# Extract error message from ErrorResponse
|
|
338
|
+
error_msg = "Unknown error"
|
|
339
|
+
if response.error and response.error != UNSET:
|
|
340
|
+
error_msg = response.error
|
|
341
|
+
elif response_obj.content:
|
|
342
|
+
try:
|
|
343
|
+
error_data = json.loads(response_obj.content.decode())
|
|
344
|
+
error_msg = error_data.get("error", "Unknown error")
|
|
345
|
+
except (json.JSONDecodeError, UnicodeDecodeError):
|
|
346
|
+
error_msg = response_obj.content.decode(errors="replace")
|
|
347
|
+
|
|
348
|
+
# Check status code to determine exception type
|
|
349
|
+
status_code = response.code if response.code != UNSET and response.code != 0 else response_obj.status_code.value
|
|
350
|
+
if status_code == 404:
|
|
351
|
+
raise NotFoundException(error_msg)
|
|
352
|
+
|
|
353
|
+
# Use handle_api_exception which returns an exception object
|
|
354
|
+
exception = handle_api_exception(
|
|
336
355
|
Response(
|
|
337
|
-
status_code=HTTPStatus(
|
|
338
|
-
content=
|
|
339
|
-
headers=
|
|
356
|
+
status_code=HTTPStatus(status_code),
|
|
357
|
+
content=response_obj.content,
|
|
358
|
+
headers=response_obj.headers,
|
|
340
359
|
parsed=None,
|
|
341
360
|
)
|
|
342
361
|
)
|
|
362
|
+
raise exception
|
|
343
363
|
|
|
344
364
|
if response is None:
|
|
345
|
-
|
|
365
|
+
# If response is None, try to extract error from raw response
|
|
366
|
+
error_msg = "No response from server"
|
|
367
|
+
if response_obj.content:
|
|
368
|
+
try:
|
|
369
|
+
error_data = json.loads(response_obj.content.decode())
|
|
370
|
+
error_msg = error_data.get("error", f"HTTP {response_obj.status_code.value}: {response_obj.content.decode()}")
|
|
371
|
+
except (json.JSONDecodeError, UnicodeDecodeError):
|
|
372
|
+
error_msg = f"HTTP {response_obj.status_code.value}: {response_obj.content.decode(errors='replace')}"
|
|
373
|
+
raise APIException(f"Failed to list tasks: {error_msg}")
|
|
346
374
|
|
|
347
375
|
return response
|
|
348
376
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: mlops-python-sdk
|
|
3
|
-
Version: 0.0
|
|
3
|
+
Version: 1.0.0
|
|
4
4
|
Summary: MLOps Python SDK for XCloud Service API
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: mlops
|
|
@@ -25,7 +25,7 @@ Description-Content-Type: text/markdown
|
|
|
25
25
|
|
|
26
26
|
# MLOps Python SDK
|
|
27
27
|
|
|
28
|
-
[MLOps](https://
|
|
28
|
+
[MLOps](https://xcloud-service.com) Python SDK for XCloud Service API. Manage and execute tasks with confidence.
|
|
29
29
|
|
|
30
30
|
## Installation
|
|
31
31
|
|
|
@@ -62,8 +62,8 @@ export MLOPS_DOMAIN=localhost:8090 # optional
|
|
|
62
62
|
### 2. Basic Usage
|
|
63
63
|
|
|
64
64
|
```python
|
|
65
|
-
from
|
|
66
|
-
from
|
|
65
|
+
from mlops import Task, ConnectionConfig
|
|
66
|
+
from mlops.api.client.models.task_status import TaskStatus
|
|
67
67
|
|
|
68
68
|
# Initialize Task client (uses environment variables by default)
|
|
69
69
|
task = Task()
|
|
@@ -119,7 +119,7 @@ The `Task` class provides a high-level interface for managing tasks.
|
|
|
119
119
|
#### Initialization
|
|
120
120
|
|
|
121
121
|
```python
|
|
122
|
-
from
|
|
122
|
+
from mlops import Task, ConnectionConfig
|
|
123
123
|
|
|
124
124
|
# Using environment variables
|
|
125
125
|
task = Task()
|
|
@@ -222,7 +222,7 @@ tasks = task.list(
|
|
|
222
222
|
**Example:**
|
|
223
223
|
|
|
224
224
|
```python
|
|
225
|
-
from
|
|
225
|
+
from mlops.api.client.models.task_status import TaskStatus
|
|
226
226
|
|
|
227
227
|
# List all running tasks
|
|
228
228
|
running_tasks = task.list(status=TaskStatus.RUNNING)
|
|
@@ -261,7 +261,7 @@ task.cancel(task_id=12345, cluster_id=1)
|
|
|
261
261
|
Task status values for filtering:
|
|
262
262
|
|
|
263
263
|
```python
|
|
264
|
-
from
|
|
264
|
+
from mlops.api.client.models.task_status import TaskStatus
|
|
265
265
|
|
|
266
266
|
TaskStatus.PENDING # Task is pending
|
|
267
267
|
TaskStatus.QUEUED # Task is queued
|
|
@@ -290,7 +290,7 @@ The SDK reads configuration from environment variables:
|
|
|
290
290
|
You can also configure the connection programmatically:
|
|
291
291
|
|
|
292
292
|
```python
|
|
293
|
-
from
|
|
293
|
+
from mlops import ConnectionConfig
|
|
294
294
|
|
|
295
295
|
config = ConnectionConfig(
|
|
296
296
|
domain="api.example.com",
|
|
@@ -306,7 +306,7 @@ config = ConnectionConfig(
|
|
|
306
306
|
The SDK provides specific exception types:
|
|
307
307
|
|
|
308
308
|
```python
|
|
309
|
-
from
|
|
309
|
+
from mlops.exceptions import (
|
|
310
310
|
APIException, # General API errors
|
|
311
311
|
AuthenticationException, # Authentication failures
|
|
312
312
|
NotFoundException, # Resource not found
|
|
@@ -330,7 +330,7 @@ except APIException as e:
|
|
|
330
330
|
### Submit a Machine Learning Training Job
|
|
331
331
|
|
|
332
332
|
```python
|
|
333
|
-
from
|
|
333
|
+
from mlops import Task
|
|
334
334
|
|
|
335
335
|
task = Task()
|
|
336
336
|
|
|
@@ -339,16 +339,16 @@ result = task.submit(
|
|
|
339
339
|
cluster_id=1,
|
|
340
340
|
script="""#!/bin/bash
|
|
341
341
|
#SBATCH --gres=gpu:1
|
|
342
|
-
#SBATCH --cpus-per-task=
|
|
343
|
-
#SBATCH --mem=
|
|
342
|
+
#SBATCH --cpus-per-task=2
|
|
343
|
+
#SBATCH --mem=4GB
|
|
344
344
|
|
|
345
345
|
python train.py --config config.yaml
|
|
346
346
|
""",
|
|
347
347
|
resources={
|
|
348
|
-
"cpus_per_task":
|
|
349
|
-
"memory": "
|
|
348
|
+
"cpus_per_task": 2,
|
|
349
|
+
"memory": "4GB",
|
|
350
350
|
"gres": "gpu:1",
|
|
351
|
-
"time": "
|
|
351
|
+
"time": "1-00:00:00", # 1 days
|
|
352
352
|
"partition": "gpu"
|
|
353
353
|
}
|
|
354
354
|
)
|
|
@@ -359,8 +359,8 @@ print(f"Training job submitted: {result.job_id}")
|
|
|
359
359
|
### Monitor Task Status
|
|
360
360
|
|
|
361
361
|
```python
|
|
362
|
-
from
|
|
363
|
-
from
|
|
362
|
+
from mlops import Task
|
|
363
|
+
from mlops.api.client.models.task_status import TaskStatus
|
|
364
364
|
import time
|
|
365
365
|
|
|
366
366
|
task = Task()
|
|
@@ -380,8 +380,8 @@ while True:
|
|
|
380
380
|
### List and Filter Tasks
|
|
381
381
|
|
|
382
382
|
```python
|
|
383
|
-
from
|
|
384
|
-
from
|
|
383
|
+
from mlops import Task
|
|
384
|
+
from mlops.api.client.models.task_status import TaskStatus
|
|
385
385
|
|
|
386
386
|
task = Task()
|
|
387
387
|
|
|
@@ -9,8 +9,11 @@ mlops/api/client/api/tasks/list_tasks.py,sha256=FczH2eLwPiOWLy7RqKDsOyWNkPZlEVtm
|
|
|
9
9
|
mlops/api/client/api/tasks/submit_task.py,sha256=fXV8QSM3J_vT4aCdYFBnqCjrNFj-pz-wBmOZWSEhttI,5015
|
|
10
10
|
mlops/api/client/client.py,sha256=o_mdLqyBCQstu5tS1WZFwqIEbGwkvWQ7eQjuCJw_5VY,12419
|
|
11
11
|
mlops/api/client/errors.py,sha256=gO8GBmKqmSNgAg-E5oT-oOyxztvp7V_6XG7OUTT15q0,546
|
|
12
|
-
mlops/api/client/models/__init__.py,sha256=
|
|
12
|
+
mlops/api/client/models/__init__.py,sha256=bCNYCRFGZi7lCcN3PqG9IdRZYI9PfcLKq_vZ78sE5Gw,1330
|
|
13
13
|
mlops/api/client/models/error_response.py,sha256=gmFOtAcZZTBPGrR3MXJe-_viEpfJOR9r_ffXNotaPlQ,1809
|
|
14
|
+
mlops/api/client/models/job_spec.py,sha256=rBOb7NsJhWel8kb6gEclGQAkFu93OJddxhemmcdxyyw,9372
|
|
15
|
+
mlops/api/client/models/job_spec_env.py,sha256=EtFZzKvfCEFyiriz4EKzmyVPgJmTVpBxpXer6ULZ94M,1218
|
|
16
|
+
mlops/api/client/models/job_spec_master_strategy.py,sha256=UmzktsAfMhyQYYKDKrjEQm6gcEC8iEr0h892jhoGNd4,156
|
|
14
17
|
mlops/api/client/models/message_response.py,sha256=rV3BMdP_fnmyxJW3mYLGECuUQ7VNVUKdzJwnY8Jdvu4,1609
|
|
15
18
|
mlops/api/client/models/task.py,sha256=XAioNH5gHvP5mDlDJ5cJlq10jHSKJuk82YNxuAV9rZo,61827
|
|
16
19
|
mlops/api/client/models/task_alloc_tres_type_0.py,sha256=U3lUyNmFDVyEZABMXJ997zn6vnPOJ_gjBJsczoPd41A,1330
|
|
@@ -19,18 +22,18 @@ mlops/api/client/models/task_job_resources_type_0.py,sha256=A63CcQn2at3q9Vq137BB
|
|
|
19
22
|
mlops/api/client/models/task_list_response.py,sha256=nVhi5LFiTUlyvmHz9Sc9-2A-awJ7s6lNKQQfwG2OfL4,2909
|
|
20
23
|
mlops/api/client/models/task_resources_type_0.py,sha256=36nxeOqAJS4ksfQtzoXigWVMhEV1Tnq5Z_64sHa3gGQ,1341
|
|
21
24
|
mlops/api/client/models/task_status.py,sha256=Tht4F2UeBp-QBLhh-z0fEw45r5cBCfkFUro-la42BPY,315
|
|
22
|
-
mlops/api/client/models/task_submit_request.py,sha256=
|
|
25
|
+
mlops/api/client/models/task_submit_request.py,sha256=IJvGHpITXJSVGQloPO3CeTYcyubVQGm3Fcuo-yvyeMo,22910
|
|
23
26
|
mlops/api/client/models/task_submit_request_environment_type_0.py,sha256=Wx6ye6vVHytSex186AeUm27-XMWMmZe6lbL2Ons2mkw,1454
|
|
24
27
|
mlops/api/client/models/task_submit_response.py,sha256=EK3ZXxo_XO5Yn2zdOrR-VMPKg9om49qQ1ywS2Smgink,2200
|
|
25
28
|
mlops/api/client/models/task_tres_type_0.py,sha256=rEaiQG7A19mlTIHDppzxuWa4oPfh9qsKjPhhVOlBf4g,1292
|
|
26
29
|
mlops/api/client/models/task_tres_used_type_0.py,sha256=4w6An7-ZCqa8cc3SPi7mcwGK-ekT6AYq_dEdf8KzoYA,1320
|
|
27
30
|
mlops/api/client/py.typed,sha256=8ZJUsxZiuOy1oJeVhsTWQhTG_6pTVHVXk5hJL79ebTk,25
|
|
28
31
|
mlops/api/client/types.py,sha256=AX4orxQZQJat3vZrgjJ-TYb2sNBL8kNo9yqYDT-n8y8,1391
|
|
29
|
-
mlops/connection_config.py,sha256=
|
|
32
|
+
mlops/connection_config.py,sha256=YSbvG2N2la0cu-cvgOTGnOG-UMXiRdKnaZSfZ6DyCkc,3166
|
|
30
33
|
mlops/exceptions.py,sha256=NmWmo5Vx0ATfDq9eNT8arKUxmhniWGlcI9vViDvaf_Y,1700
|
|
31
34
|
mlops/task/__init__.py,sha256=wJYM-85Xqomn5E44VmOPBXLwm3ZRgDgKgsYeDuH0kAs,135
|
|
32
35
|
mlops/task/client.py,sha256=LvZqJ1n9O7ae1NZ61xwGw1_-AnlIhMxFD6cnpgwZtDk,4555
|
|
33
|
-
mlops/task/task.py,sha256=
|
|
34
|
-
mlops_python_sdk-0.0.
|
|
35
|
-
mlops_python_sdk-0.0.
|
|
36
|
-
mlops_python_sdk-0.0.
|
|
36
|
+
mlops/task/task.py,sha256=XyPClTmYLRvcCiE0qN0wC4imYUEFHXwiJAeuyI1Uhf0,18806
|
|
37
|
+
mlops_python_sdk-1.0.0.dist-info/METADATA,sha256=3FT-LXVLrNp8DC6Saa5DLTetBkMPB-A_wPm8PtSm-DE,9871
|
|
38
|
+
mlops_python_sdk-1.0.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
39
|
+
mlops_python_sdk-1.0.0.dist-info/RECORD,,
|
|
File without changes
|