polyaxon 2.6.1__py3-none-any.whl → 2.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- polyaxon/_deploy/schemas/deployment.py +1 -0
- polyaxon/_flow/component/component.py +4 -0
- polyaxon/_flow/operations/compiled_operation.py +4 -0
- polyaxon/_flow/operations/operation.py +5 -2
- polyaxon/_flow/run/__init__.py +3 -0
- polyaxon/_flow/run/dask/dask.py +10 -0
- polyaxon/_flow/run/kubeflow/mpi_job.py +8 -0
- polyaxon/_flow/run/kubeflow/mx_job.py +16 -0
- polyaxon/_flow/run/kubeflow/paddle_job.py +8 -0
- polyaxon/_flow/run/kubeflow/pytorch_job.py +8 -0
- polyaxon/_flow/run/kubeflow/tf_job.py +12 -0
- polyaxon/_flow/run/kubeflow/xgboost_job.py +19 -33
- polyaxon/_flow/run/patch.py +37 -9
- polyaxon/_flow/run/ray/ray.py +9 -0
- polyaxon/_polyaxonfile/specs/compiled_operation.py +5 -1
- polyaxon/_polyaxonfile/specs/operation.py +5 -1
- polyaxon/_sdk/api/organizations_v1_api.py +1643 -617
- polyaxon/_sdk/schemas/v1_organization.py +3 -2
- polyaxon/_sdk/schemas/v1_project_settings.py +2 -1
- polyaxon/_sdk/schemas/v1_user_access.py +3 -2
- polyaxon/pkg.py +1 -1
- {polyaxon-2.6.1.dist-info → polyaxon-2.7.0.dist-info}/METADATA +7 -7
- {polyaxon-2.6.1.dist-info → polyaxon-2.7.0.dist-info}/RECORD +27 -27
- {polyaxon-2.6.1.dist-info → polyaxon-2.7.0.dist-info}/LICENSE +0 -0
- {polyaxon-2.6.1.dist-info → polyaxon-2.7.0.dist-info}/WHEEL +0 -0
- {polyaxon-2.6.1.dist-info → polyaxon-2.7.0.dist-info}/entry_points.txt +0 -0
- {polyaxon-2.6.1.dist-info → polyaxon-2.7.0.dist-info}/top_level.txt +0 -0
@@ -135,3 +135,7 @@ class V1CompiledOperation(BaseOp, RunMixin):
|
|
135
135
|
raise PolyaxonSchemaError(
|
136
136
|
"Operations with dag runtime do not support the `build` section."
|
137
137
|
)
|
138
|
+
|
139
|
+
def get_replica_types(self):
|
140
|
+
if self.is_distributed_run:
|
141
|
+
return self.run.get_replica_types()
|
@@ -648,8 +648,11 @@ class V1Operation(BaseOp, TemplateMixinConfig):
|
|
648
648
|
return result
|
649
649
|
|
650
650
|
kind = config.component.run.kind
|
651
|
-
|
652
|
-
|
651
|
+
replica_types = config.component.get_replica_types()
|
652
|
+
value = validate_run_patch(value, kind, replica_types=replica_types)
|
653
|
+
current_value = validate_run_patch(
|
654
|
+
current_value, kind, replica_types=replica_types
|
655
|
+
)
|
653
656
|
run_patch = current_value.patch(value, strategy)
|
654
657
|
run_patch = run_patch.to_dict()
|
655
658
|
run_patch.pop("kind")
|
polyaxon/_flow/run/__init__.py
CHANGED
polyaxon/_flow/run/dask/dask.py
CHANGED
@@ -175,3 +175,13 @@ class V1DaskJob(BaseRun, DestinationImageMixin):
|
|
175
175
|
if self.scheduler:
|
176
176
|
init += self.scheduler.get_all_init()
|
177
177
|
return init
|
178
|
+
|
179
|
+
def get_replica_types(self):
|
180
|
+
types = []
|
181
|
+
if self.job:
|
182
|
+
types.append("job")
|
183
|
+
if self.worker:
|
184
|
+
types.append("worker")
|
185
|
+
if self.scheduler:
|
186
|
+
types.append("scheduler")
|
187
|
+
return types
|
@@ -185,3 +185,11 @@ class V1MPIJob(BaseRun, DestinationImageMixin):
|
|
185
185
|
if self.worker:
|
186
186
|
init += self.worker.get_all_init()
|
187
187
|
return init
|
188
|
+
|
189
|
+
def get_replica_types(self):
|
190
|
+
types = []
|
191
|
+
if self.launcher:
|
192
|
+
types.append("launcher")
|
193
|
+
if self.worker:
|
194
|
+
types.append("worker")
|
195
|
+
return types
|
@@ -313,3 +313,19 @@ class V1MXJob(BaseRun, DestinationImageMixin):
|
|
313
313
|
if self.tuner_server:
|
314
314
|
init += self.tuner_server.get_all_init()
|
315
315
|
return init
|
316
|
+
|
317
|
+
def get_replica_types(self):
|
318
|
+
types = []
|
319
|
+
if self.scheduler:
|
320
|
+
types.append("scheduler")
|
321
|
+
if self.server:
|
322
|
+
types.append("server")
|
323
|
+
if self.worker:
|
324
|
+
types.append("worker")
|
325
|
+
if self.tuner:
|
326
|
+
types.append("tuner")
|
327
|
+
if self.tuner_tracker:
|
328
|
+
types.append("tuner_tracker")
|
329
|
+
if self.tuner_server:
|
330
|
+
types.append("tuner_server")
|
331
|
+
return types
|
@@ -205,3 +205,11 @@ class V1PaddleJob(BaseRun, DestinationImageMixin):
|
|
205
205
|
if self.worker:
|
206
206
|
init += self.worker.get_all_init()
|
207
207
|
return init
|
208
|
+
|
209
|
+
def get_replica_types(self):
|
210
|
+
types = []
|
211
|
+
if self.master:
|
212
|
+
types.append("master")
|
213
|
+
if self.worker:
|
214
|
+
types.append("worker")
|
215
|
+
return types
|
@@ -218,3 +218,11 @@ class V1PytorchJob(BaseRun, DestinationImageMixin):
|
|
218
218
|
if self.worker:
|
219
219
|
init += self.worker.get_all_init()
|
220
220
|
return init
|
221
|
+
|
222
|
+
def get_replica_types(self):
|
223
|
+
types = []
|
224
|
+
if self.master:
|
225
|
+
types.append("master")
|
226
|
+
if self.worker:
|
227
|
+
types.append("worker")
|
228
|
+
return types
|
@@ -248,3 +248,15 @@ class V1TFJob(BaseRun, DestinationImageMixin):
|
|
248
248
|
if self.evaluator:
|
249
249
|
init += self.evaluator.get_all_init()
|
250
250
|
return init
|
251
|
+
|
252
|
+
def get_replica_types(self):
|
253
|
+
types = []
|
254
|
+
if self.chief:
|
255
|
+
types.append("chief")
|
256
|
+
if self.ps:
|
257
|
+
types.append("ps")
|
258
|
+
if self.worker:
|
259
|
+
types.append("worker")
|
260
|
+
if self.evaluator:
|
261
|
+
types.append("evaluator")
|
262
|
+
return types
|
@@ -130,63 +130,49 @@ class V1XGBoostJob(BaseRun, DestinationImageMixin):
|
|
130
130
|
worker: Optional[Union[V1KFReplica, RefField]] = None
|
131
131
|
|
132
132
|
def apply_image_destination(self, image: str):
|
133
|
-
if self.
|
134
|
-
self.
|
135
|
-
self.
|
136
|
-
if self.ps:
|
137
|
-
self.ps.container = self.ps.container or V1Container()
|
138
|
-
self.ps.container.image = image
|
133
|
+
if self.master:
|
134
|
+
self.master.container = self.master.container or V1Container()
|
135
|
+
self.master.container.image = image
|
139
136
|
if self.worker:
|
140
137
|
self.worker.container = self.worker.container or V1Container()
|
141
138
|
self.worker.container.image = image
|
142
|
-
if self.evaluator:
|
143
|
-
self.evaluator.container = self.evaluator.container or V1Container()
|
144
|
-
self.evaluator.container.image = image
|
145
139
|
|
146
140
|
def get_resources(self):
|
147
141
|
resources = V1RunResources()
|
148
|
-
if self.
|
149
|
-
resources += self.
|
150
|
-
if self.ps:
|
151
|
-
resources += self.ps.get_resources()
|
142
|
+
if self.master:
|
143
|
+
resources += self.master.get_resources()
|
152
144
|
if self.worker:
|
153
145
|
resources += self.worker.get_resources()
|
154
|
-
if self.evaluator:
|
155
|
-
resources += self.evaluator.get_resources()
|
156
146
|
return resources
|
157
147
|
|
158
148
|
def get_all_containers(self):
|
159
149
|
containers = []
|
160
|
-
if self.
|
161
|
-
containers += self.
|
162
|
-
if self.ps:
|
163
|
-
containers += self.ps.get_all_containers()
|
150
|
+
if self.master:
|
151
|
+
containers += self.master.get_all_containers()
|
164
152
|
if self.worker:
|
165
153
|
containers += self.worker.get_all_containers()
|
166
|
-
if self.evaluator:
|
167
|
-
containers += self.evaluator.get_all_containers()
|
168
154
|
return containers
|
169
155
|
|
170
156
|
def get_all_connections(self):
|
171
157
|
connections = []
|
172
|
-
if self.
|
173
|
-
connections += self.
|
174
|
-
if self.ps:
|
175
|
-
connections += self.ps.get_all_connections()
|
158
|
+
if self.master:
|
159
|
+
connections += self.master.get_all_connections()
|
176
160
|
if self.worker:
|
177
161
|
connections += self.worker.get_all_connections()
|
178
|
-
if self.evaluator:
|
179
|
-
connections += self.evaluator.get_all_connections()
|
180
162
|
return connections
|
181
163
|
|
182
164
|
def get_all_init(self):
|
183
165
|
init = []
|
184
|
-
if self.
|
185
|
-
init += self.
|
186
|
-
if self.ps:
|
187
|
-
init += self.ps.get_all_init()
|
166
|
+
if self.master:
|
167
|
+
init += self.master.get_all_init()
|
188
168
|
if self.worker:
|
189
169
|
init += self.worker.get_all_init()
|
190
|
-
if self.evaluator:
|
191
|
-
init += self.evaluator.get_all_init()
|
192
170
|
return init
|
171
|
+
|
172
|
+
def get_replica_types(self):
|
173
|
+
types = []
|
174
|
+
if self.master:
|
175
|
+
types.append(self.master.replicas)
|
176
|
+
if self.worker:
|
177
|
+
types.append(self.worker.replicas)
|
178
|
+
return types
|
polyaxon/_flow/run/patch.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
from typing import Dict
|
1
|
+
from typing import Dict, List
|
2
2
|
|
3
3
|
from clipped.compact.pydantic import ValidationError
|
4
4
|
|
@@ -21,7 +21,9 @@ from polyaxon._flow.run.tuner import V1TunerJob
|
|
21
21
|
from polyaxon.exceptions import PolyaxonValidationError
|
22
22
|
|
23
23
|
|
24
|
-
def validate_run_patch(
|
24
|
+
def validate_run_patch(
|
25
|
+
run_patch: Dict, kind: V1RunKind, replica_types: List[str] = None
|
26
|
+
):
|
25
27
|
if kind == V1RunKind.JOB:
|
26
28
|
patch = V1Job.from_dict(run_patch)
|
27
29
|
elif kind == V1RunKind.SERVICE:
|
@@ -37,37 +39,63 @@ def validate_run_patch(run_patch: Dict, kind: V1RunKind):
|
|
37
39
|
try:
|
38
40
|
patch = V1PytorchJob.from_dict(run_patch)
|
39
41
|
except ValidationError:
|
40
|
-
|
42
|
+
if replica_types:
|
43
|
+
patch = V1PytorchJob.from_dict({k: run_patch for k in replica_types})
|
44
|
+
else:
|
45
|
+
patch = V1KFReplica.from_dict(run_patch)
|
41
46
|
elif kind == V1RunKind.PADDLEJOB:
|
42
47
|
try:
|
43
48
|
patch = V1PaddleJob.from_dict(run_patch)
|
44
49
|
except ValidationError:
|
45
|
-
|
50
|
+
if replica_types:
|
51
|
+
patch = V1PaddleJob.from_dict({k: run_patch for k in replica_types})
|
52
|
+
else:
|
53
|
+
patch = V1KFReplica.from_dict(run_patch)
|
46
54
|
elif kind == V1RunKind.TFJOB:
|
47
55
|
try:
|
48
56
|
patch = V1TFJob.from_dict(run_patch)
|
49
57
|
except ValidationError:
|
50
|
-
|
58
|
+
if replica_types:
|
59
|
+
patch = V1TFJob.from_dict({k: run_patch for k in replica_types})
|
60
|
+
else:
|
61
|
+
patch = V1KFReplica.from_dict(run_patch)
|
51
62
|
elif kind == V1RunKind.MXJOB:
|
52
63
|
try:
|
53
64
|
patch = V1MXJob.from_dict(run_patch)
|
54
65
|
except ValidationError:
|
55
|
-
|
66
|
+
if replica_types:
|
67
|
+
patch = V1MXJob.from_dict({k: run_patch for k in replica_types})
|
68
|
+
else:
|
69
|
+
patch = V1KFReplica.from_dict(run_patch)
|
56
70
|
elif kind == V1RunKind.XGBJOB:
|
57
71
|
try:
|
58
72
|
patch = V1XGBoostJob.from_dict(run_patch)
|
59
73
|
except ValidationError:
|
60
|
-
|
74
|
+
if replica_types:
|
75
|
+
patch = V1XGBoostJob.from_dict({k: run_patch for k in replica_types})
|
76
|
+
else:
|
77
|
+
patch = V1KFReplica.from_dict(run_patch)
|
61
78
|
elif kind == V1RunKind.RAYJOB:
|
62
79
|
try:
|
63
80
|
patch = V1RayJob.from_dict(run_patch)
|
64
81
|
except ValidationError:
|
65
|
-
|
82
|
+
if replica_types:
|
83
|
+
replicas = {}
|
84
|
+
if "head" in replica_types:
|
85
|
+
replicas["head"] = run_patch
|
86
|
+
replica_types = [r for r in replica_types if r != "head"]
|
87
|
+
replicas["workers"] = {replica: run_patch for replica in replica_types}
|
88
|
+
patch = V1RayJob.from_dict(replicas)
|
89
|
+
else:
|
90
|
+
patch = V1RayReplica.from_dict(run_patch)
|
66
91
|
elif kind == V1RunKind.DASKJOB:
|
67
92
|
try:
|
68
93
|
patch = V1DaskJob.from_dict(run_patch)
|
69
94
|
except ValidationError:
|
70
|
-
|
95
|
+
if replica_types:
|
96
|
+
patch = V1DaskJob.from_dict({k: run_patch for k in replica_types})
|
97
|
+
else:
|
98
|
+
patch = V1DaskReplica.from_dict(run_patch)
|
71
99
|
elif kind == V1RunKind.NOTIFIER:
|
72
100
|
patch = V1NotifierJob.from_dict(run_patch)
|
73
101
|
elif kind == V1RunKind.TUNER:
|
polyaxon/_flow/run/ray/ray.py
CHANGED
@@ -136,6 +136,7 @@ class V1RayJob(BaseRun, DestinationImageMixin):
|
|
136
136
|
|
137
137
|
_IDENTIFIER = V1RunKind.RAYJOB
|
138
138
|
_CUSTOM_DUMP_FIELDS = {"head", "workers"}
|
139
|
+
_FIELDS_DICT_PATCH = ["workers"]
|
139
140
|
|
140
141
|
kind: Literal[_IDENTIFIER] = _IDENTIFIER
|
141
142
|
entrypoint: Optional[str] = None
|
@@ -196,3 +197,11 @@ class V1RayJob(BaseRun, DestinationImageMixin):
|
|
196
197
|
worker = self.workers[worker_name]
|
197
198
|
init += worker.get_all_init()
|
198
199
|
return init
|
200
|
+
|
201
|
+
def get_replica_types(self):
|
202
|
+
types = []
|
203
|
+
if self.head:
|
204
|
+
types.append("head")
|
205
|
+
if self.workers:
|
206
|
+
types += list(self.workers.keys())
|
207
|
+
return types
|
@@ -321,7 +321,11 @@ class CompiledOperationSpecification(BaseSpecification):
|
|
321
321
|
preset = OperationSpecification.read(preset, is_preset=True) # type: V1Operation
|
322
322
|
if preset.run_patch:
|
323
323
|
config.run = config.run.patch(
|
324
|
-
validate_run_patch(
|
324
|
+
validate_run_patch(
|
325
|
+
preset.run_patch,
|
326
|
+
config.run.kind,
|
327
|
+
replica_types=config.get_replica_types(),
|
328
|
+
),
|
325
329
|
strategy=preset.patch_strategy,
|
326
330
|
)
|
327
331
|
patch_keys = {
|
@@ -54,7 +54,11 @@ class OperationSpecification(BaseSpecification):
|
|
54
54
|
else config.patch_strategy
|
55
55
|
)
|
56
56
|
component.run = component.run.patch(
|
57
|
-
validate_run_patch(
|
57
|
+
validate_run_patch(
|
58
|
+
config.run_patch,
|
59
|
+
component.run.kind,
|
60
|
+
replica_types=component.get_replica_types(),
|
61
|
+
),
|
58
62
|
strategy=patch_strategy,
|
59
63
|
)
|
60
64
|
|