polyaxon 2.0.0rc49__py3-none-any.whl → 2.4.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- polyaxon/_auxiliaries/cleaner.py +8 -3
- polyaxon/_auxiliaries/init.py +7 -2
- polyaxon/_auxiliaries/notifier.py +8 -2
- polyaxon/_auxiliaries/sidecar.py +30 -2
- polyaxon/_cli/artifacts.py +96 -11
- polyaxon/_cli/components.py +96 -11
- polyaxon/_cli/config.py +118 -22
- polyaxon/_cli/dashboard.py +15 -2
- polyaxon/_cli/init.py +1 -1
- polyaxon/_cli/models.py +96 -11
- polyaxon/_cli/operations.py +267 -90
- polyaxon/_cli/project_versions.py +139 -6
- polyaxon/_cli/projects.py +23 -9
- polyaxon/_cli/run.py +37 -9
- polyaxon/_cli/services/agent.py +2 -2
- polyaxon/_cli/services/clean_artifacts.py +1 -1
- polyaxon/_cli/services/sidecar.py +8 -1
- polyaxon/_client/client.py +17 -0
- polyaxon/_client/mixin.py +39 -0
- polyaxon/_client/project.py +218 -23
- polyaxon/_client/run.py +131 -33
- polyaxon/_compiler/contexts/contexts.py +2 -2
- polyaxon/_compiler/contexts/ray_job.py +4 -2
- polyaxon/_compiler/resolver/agent.py +12 -2
- polyaxon/_compiler/resolver/runtime.py +2 -2
- polyaxon/_contexts/paths.py +4 -7
- polyaxon/_deploy/operators/compose.py +1 -27
- polyaxon/_deploy/schemas/deployment.py +4 -1
- polyaxon/_deploy/schemas/intervals.py +0 -7
- polyaxon/_deploy/schemas/proxy.py +1 -0
- polyaxon/_deploy/schemas/service.py +11 -1
- polyaxon/_docker/converter/base/base.py +8 -0
- polyaxon/_docker/executor.py +10 -4
- polyaxon/_env_vars/getters/owner_entity.py +4 -2
- polyaxon/_env_vars/getters/project.py +4 -2
- polyaxon/_env_vars/getters/run.py +5 -2
- polyaxon/_env_vars/keys.py +7 -1
- polyaxon/_flow/__init__.py +2 -0
- polyaxon/_flow/builds/__init__.py +19 -6
- polyaxon/_flow/component/base.py +1 -0
- polyaxon/_flow/component/component.py +14 -0
- polyaxon/_flow/environment/__init__.py +8 -8
- polyaxon/_flow/hooks/__init__.py +19 -6
- polyaxon/_flow/init/__init__.py +6 -6
- polyaxon/_flow/matrix/iterative.py +0 -1
- polyaxon/_flow/matrix/tuner.py +18 -6
- polyaxon/_flow/operations/operation.py +44 -17
- polyaxon/_flow/plugins/__init__.py +6 -0
- polyaxon/_flow/run/__init__.py +2 -2
- polyaxon/_flow/run/dag.py +2 -2
- polyaxon/_flow/run/dask/dask.py +0 -1
- polyaxon/_flow/run/dask/replica.py +3 -3
- polyaxon/_flow/run/enums.py +5 -0
- polyaxon/_flow/run/job.py +4 -4
- polyaxon/_flow/run/kubeflow/mpi_job.py +1 -2
- polyaxon/_flow/run/kubeflow/mx_job.py +1 -2
- polyaxon/_flow/run/kubeflow/paddle_job.py +35 -4
- polyaxon/_flow/run/kubeflow/pytorch_job.py +51 -5
- polyaxon/_flow/run/kubeflow/replica.py +4 -4
- polyaxon/_flow/run/kubeflow/scheduling_policy.py +12 -0
- polyaxon/_flow/run/kubeflow/tf_job.py +3 -3
- polyaxon/_flow/run/kubeflow/xgboost_job.py +1 -2
- polyaxon/_flow/run/ray/ray.py +2 -3
- polyaxon/_flow/run/ray/replica.py +3 -3
- polyaxon/_flow/run/service.py +4 -4
- polyaxon/_fs/fs.py +7 -2
- polyaxon/_fs/utils.py +3 -2
- polyaxon/_k8s/converter/base/base.py +2 -1
- polyaxon/_k8s/converter/base/main.py +1 -0
- polyaxon/_k8s/converter/base/sidecar.py +16 -1
- polyaxon/_k8s/converter/common/accelerators.py +7 -4
- polyaxon/_k8s/converter/converters/job.py +1 -1
- polyaxon/_k8s/converter/converters/kubeflow/paddle_job.py +1 -0
- polyaxon/_k8s/converter/converters/kubeflow/pytroch_job.py +2 -0
- polyaxon/_k8s/converter/converters/kubeflow/tf_job.py +1 -0
- polyaxon/_k8s/converter/converters/ray_job.py +4 -2
- polyaxon/_k8s/custom_resources/dask_job.py +3 -0
- polyaxon/_k8s/custom_resources/kubeflow/common.py +4 -1
- polyaxon/_k8s/custom_resources/kubeflow/paddle_job.py +10 -1
- polyaxon/_k8s/custom_resources/kubeflow/pytorch_job.py +14 -1
- polyaxon/_k8s/custom_resources/kubeflow/tf_job.py +4 -0
- polyaxon/_k8s/custom_resources/ray_job.py +3 -0
- polyaxon/_k8s/custom_resources/setter.py +1 -1
- polyaxon/_k8s/executor/async_executor.py +2 -0
- polyaxon/_k8s/executor/base.py +23 -6
- polyaxon/_k8s/logging/async_monitor.py +150 -5
- polyaxon/_k8s/manager/async_manager.py +96 -23
- polyaxon/_k8s/manager/base.py +4 -0
- polyaxon/_k8s/manager/manager.py +282 -134
- polyaxon/_local_process/__init__.py +0 -0
- polyaxon/_local_process/agent.py +6 -0
- polyaxon/_local_process/converter/__init__.py +1 -0
- polyaxon/_local_process/converter/base/__init__.py +1 -0
- polyaxon/_local_process/converter/base/base.py +140 -0
- polyaxon/_local_process/converter/base/containers.py +69 -0
- polyaxon/_local_process/converter/base/env_vars.py +253 -0
- polyaxon/_local_process/converter/base/init.py +414 -0
- polyaxon/_local_process/converter/base/main.py +74 -0
- polyaxon/_local_process/converter/base/mounts.py +82 -0
- polyaxon/_local_process/converter/converters/__init__.py +8 -0
- polyaxon/_local_process/converter/converters/job.py +40 -0
- polyaxon/_local_process/converter/converters/service.py +41 -0
- polyaxon/_local_process/converter/mixins.py +38 -0
- polyaxon/_local_process/executor.py +132 -0
- polyaxon/_local_process/process_types.py +39 -0
- polyaxon/_managers/agent.py +2 -0
- polyaxon/_managers/home.py +2 -1
- polyaxon/_operations/tuner.py +1 -0
- polyaxon/_polyaxonfile/check.py +2 -0
- polyaxon/_polyaxonfile/manager/operations.py +3 -0
- polyaxon/_polyaxonfile/manager/workflows.py +2 -0
- polyaxon/_polyaxonfile/specs/compiled_operation.py +1 -0
- polyaxon/_polyaxonfile/specs/operation.py +1 -0
- polyaxon/_polyaxonfile/specs/sections.py +3 -0
- polyaxon/_pql/manager.py +1 -1
- polyaxon/_runner/agent/async_agent.py +97 -21
- polyaxon/_runner/agent/base_agent.py +27 -9
- polyaxon/_runner/agent/client.py +15 -1
- polyaxon/_runner/agent/sync_agent.py +85 -20
- polyaxon/_runner/converter/converter.py +6 -2
- polyaxon/_runner/executor.py +13 -7
- polyaxon/_schemas/agent.py +27 -1
- polyaxon/_schemas/client.py +30 -3
- polyaxon/_schemas/installation.py +4 -3
- polyaxon/_schemas/lifecycle.py +10 -5
- polyaxon/_schemas/log_handler.py +2 -3
- polyaxon/_schemas/types/artifacts.py +3 -3
- polyaxon/_schemas/types/dockerfile.py +3 -3
- polyaxon/_schemas/types/file.py +3 -3
- polyaxon/_schemas/types/git.py +3 -3
- polyaxon/_schemas/types/tensorboard.py +3 -3
- polyaxon/_sdk/api/agents_v1_api.py +1076 -73
- polyaxon/_sdk/api/organizations_v1_api.py +371 -10
- polyaxon/_sdk/api/project_dashboards_v1_api.py +12 -12
- polyaxon/_sdk/api/project_searches_v1_api.py +12 -12
- polyaxon/_sdk/api/projects_v1_api.py +221 -44
- polyaxon/_sdk/api/runs_v1_api.py +917 -445
- polyaxon/_sdk/api/service_accounts_v1_api.py +16 -16
- polyaxon/_sdk/api/teams_v1_api.py +2827 -375
- polyaxon/_sdk/api/users_v1_api.py +231 -55
- polyaxon/_sdk/async_client/api_client.py +4 -0
- polyaxon/_sdk/schemas/__init__.py +10 -2
- polyaxon/_sdk/schemas/v1_agent.py +2 -1
- polyaxon/_sdk/schemas/v1_agent_reconcile_body_request.py +14 -0
- polyaxon/_sdk/schemas/v1_artifact_tree.py +1 -1
- polyaxon/_sdk/schemas/v1_dashboard_spec.py +4 -0
- polyaxon/_sdk/schemas/v1_events_response.py +4 -0
- polyaxon/_sdk/schemas/v1_organization.py +1 -0
- polyaxon/_sdk/schemas/v1_preset.py +8 -0
- polyaxon/_sdk/schemas/v1_project.py +1 -0
- polyaxon/_sdk/schemas/v1_project_settings.py +4 -2
- polyaxon/_sdk/schemas/v1_run.py +2 -2
- polyaxon/_sdk/schemas/v1_run_edge_lineage.py +14 -0
- polyaxon/_sdk/schemas/v1_run_edges_graph.py +9 -0
- polyaxon/_sdk/schemas/v1_section_spec.py +7 -2
- polyaxon/_sdk/schemas/v1_settings_catalog.py +1 -0
- polyaxon/_sdk/schemas/v1_team.py +3 -0
- polyaxon/_sdk/schemas/v1_user.py +1 -2
- polyaxon/_sdk/schemas/v1_user_access.py +17 -0
- polyaxon/_services/values.py +1 -0
- polyaxon/_sidecar/container/__init__.py +39 -18
- polyaxon/_sidecar/container/monitors/__init__.py +1 -0
- polyaxon/_sidecar/container/monitors/logs.py +10 -13
- polyaxon/_sidecar/container/monitors/spec.py +24 -0
- polyaxon/_sidecar/ignore.py +0 -1
- polyaxon/_utils/fqn_utils.py +25 -2
- polyaxon/client.py +1 -1
- polyaxon/pkg.py +1 -1
- polyaxon/schemas.py +8 -1
- polyaxon/settings.py +6 -0
- {polyaxon-2.0.0rc49.dist-info → polyaxon-2.4.0rc1.dist-info}/METADATA +43 -43
- {polyaxon-2.0.0rc49.dist-info → polyaxon-2.4.0rc1.dist-info}/RECORD +176 -155
- {polyaxon-2.0.0rc49.dist-info → polyaxon-2.4.0rc1.dist-info}/WHEEL +1 -1
- polyaxon/_sdk/schemas/v1_project_user_access.py +0 -10
- {polyaxon-2.0.0rc49.dist-info → polyaxon-2.4.0rc1.dist-info}/LICENSE +0 -0
- {polyaxon-2.0.0rc49.dist-info → polyaxon-2.4.0rc1.dist-info}/entry_points.txt +0 -0
- {polyaxon-2.0.0rc49.dist-info → polyaxon-2.4.0rc1.dist-info}/top_level.txt +0 -0
@@ -45,6 +45,7 @@ class V1Operation(BaseOp, TemplateMixinConfig):
|
|
45
45
|
tags: List[str], optional
|
46
46
|
presets: str, optional
|
47
47
|
queue: str, optional
|
48
|
+
namespace: str, optional
|
48
49
|
cache: [V1Cache](/docs/automation/helpers/cache/), optional
|
49
50
|
termination: [V1Termination](/docs/core/specification/termination/), optional
|
50
51
|
plugins: [V1Plugins](/docs/core/specification/plugins/), optional
|
@@ -81,6 +82,7 @@ class V1Operation(BaseOp, TemplateMixinConfig):
|
|
81
82
|
>>> tags:
|
82
83
|
>>> presets:
|
83
84
|
>>> queue:
|
85
|
+
>>> namespace:
|
84
86
|
>>> cache:
|
85
87
|
>>> termination:
|
86
88
|
>>> plugins:
|
@@ -111,6 +113,7 @@ class V1Operation(BaseOp, TemplateMixinConfig):
|
|
111
113
|
>>> tags=["test"],
|
112
114
|
>>> presets=["test"],
|
113
115
|
>>> queue="test",
|
116
|
+
>>> namespace="test",
|
114
117
|
>>> cache=V1Cache(...),
|
115
118
|
>>> termination=V1Termination(...),
|
116
119
|
>>> plugins=V1Plugins(...),
|
@@ -248,6 +251,20 @@ class V1Operation(BaseOp, TemplateMixinConfig):
|
|
248
251
|
>>> queue: queue-name
|
249
252
|
```
|
250
253
|
|
254
|
+
### namespace
|
255
|
+
|
256
|
+
> **Note**: Please note that this field is only available in some commercial editions.
|
257
|
+
|
258
|
+
The namespace to use for this operation run,
|
259
|
+
if provided, it will override the component's namespace otherwise
|
260
|
+
the namesace of the component will be used if it exists or
|
261
|
+
it will default to the agent's namespace.
|
262
|
+
|
263
|
+
```yaml
|
264
|
+
>>> operation:
|
265
|
+
>>> namespace: polyaxon
|
266
|
+
```
|
267
|
+
|
251
268
|
### cache
|
252
269
|
|
253
270
|
The [cache](/docs/automation/helpers/cache/) to use for this operation run,
|
@@ -645,13 +662,16 @@ class V1Operation(BaseOp, TemplateMixinConfig):
|
|
645
662
|
for k, v in contexts.items():
|
646
663
|
params[k] = V1Param(value=v, context_only=True)
|
647
664
|
|
648
|
-
|
649
|
-
|
650
|
-
hub_ref=hook.hub_ref
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
665
|
+
content = {"run_patch": run_patch, "params": params}
|
666
|
+
if hook.hub_ref:
|
667
|
+
content["hub_ref"] = hook.hub_ref
|
668
|
+
if hook.presets:
|
669
|
+
content["presets"] = hook.presets
|
670
|
+
if hook.queue:
|
671
|
+
content["queue"] = hook.queue
|
672
|
+
if hook.namespace:
|
673
|
+
content["namespace"] = hook.namespace
|
674
|
+
return cls.construct(**content)
|
655
675
|
|
656
676
|
@classmethod
|
657
677
|
def from_build(cls, build: V1Build, contexts: Optional[Dict] = None):
|
@@ -667,16 +687,23 @@ class V1Operation(BaseOp, TemplateMixinConfig):
|
|
667
687
|
if not destination.connection or build.connection:
|
668
688
|
destination.connection = build.connection
|
669
689
|
params["destination"] = destination
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
690
|
+
content = {
|
691
|
+
"run_patch": build.run_patch,
|
692
|
+
"patch_strategy": build.patch_strategy,
|
693
|
+
"params": params,
|
694
|
+
}
|
695
|
+
if build.hub_ref:
|
696
|
+
content["hub_ref"] = build.hub_ref
|
697
|
+
if build.presets:
|
698
|
+
content["presets"] = build.presets
|
699
|
+
if build.queue:
|
700
|
+
content["queue"] = build.queue
|
701
|
+
if build.namespace:
|
702
|
+
content["namespace"] = build.namespace
|
703
|
+
if build.cache:
|
704
|
+
content["cache"] = build.cache
|
705
|
+
|
706
|
+
return cls.construct(**content)
|
680
707
|
|
681
708
|
|
682
709
|
PartialV1Operation = to_partial(V1Operation)
|
@@ -329,6 +329,12 @@ class V1Plugins(BaseSchemaModel):
|
|
329
329
|
elif self.collect_logs is None:
|
330
330
|
self.collect_logs = default
|
331
331
|
|
332
|
+
def set_collect_spec(self, default: bool = True):
|
333
|
+
if self.no_api():
|
334
|
+
self.collect_spec = False
|
335
|
+
elif self.collect_spec is None:
|
336
|
+
self.collect_spec = default
|
337
|
+
|
332
338
|
def set_collect_resources(self, default: bool = True):
|
333
339
|
if self.no_api():
|
334
340
|
self.collect_resources = False
|
polyaxon/_flow/run/__init__.py
CHANGED
@@ -17,8 +17,8 @@ from polyaxon._flow.run.job import V1Job
|
|
17
17
|
from polyaxon._flow.run.kubeflow.clean_pod_policy import V1CleanPodPolicy
|
18
18
|
from polyaxon._flow.run.kubeflow.mpi_job import V1MPIJob
|
19
19
|
from polyaxon._flow.run.kubeflow.mx_job import MXJobMode, V1MXJob
|
20
|
-
from polyaxon._flow.run.kubeflow.paddle_job import V1PaddleJob
|
21
|
-
from polyaxon._flow.run.kubeflow.pytorch_job import V1PytorchJob
|
20
|
+
from polyaxon._flow.run.kubeflow.paddle_job import V1PaddleElasticPolicy, V1PaddleJob
|
21
|
+
from polyaxon._flow.run.kubeflow.pytorch_job import V1PytorchElasticPolicy, V1PytorchJob
|
22
22
|
from polyaxon._flow.run.kubeflow.replica import V1KFReplica
|
23
23
|
from polyaxon._flow.run.kubeflow.scheduling_policy import V1SchedulingPolicy
|
24
24
|
from polyaxon._flow.run.kubeflow.tf_job import V1TFJob
|
polyaxon/_flow/run/dag.py
CHANGED
@@ -83,13 +83,13 @@ class V1Dag(BaseRun):
|
|
83
83
|
|
84
84
|
```python
|
85
85
|
>>> from polyaxon.schemas import V1Dag, V1Component, V1Environment, V1Operation
|
86
|
-
>>> from polyaxon
|
86
|
+
>>> from polyaxon import k8s
|
87
87
|
>>> dag = V1Dag(
|
88
88
|
>>> operations=[V1Operation(...)],
|
89
89
|
>>> components=[V1Component(...), V1Component(...)],
|
90
90
|
>>> environment=V1Environment(...),
|
91
91
|
>>> connections=["connection-name1"],
|
92
|
-
>>> volumes=[
|
92
|
+
>>> volumes=[k8s.V1Volume(...)],
|
93
93
|
>>> )
|
94
94
|
```
|
95
95
|
|
polyaxon/_flow/run/dask/dask.py
CHANGED
@@ -43,7 +43,6 @@ class V1DaskJob(BaseRun, DestinationImageMixin):
|
|
43
43
|
|
44
44
|
```python
|
45
45
|
>>> from polyaxon.schemas import V1Environment, V1Init, V1DaskJob, V1DaskReplica
|
46
|
-
>>> from polyaxon.k8s import k8s_schemas
|
47
46
|
>>> dask_job = V1DaskJob(
|
48
47
|
>>> job=V1DaskReplica(...),
|
49
48
|
>>> worker=V1DaskReplica(...),
|
@@ -40,13 +40,13 @@ class V1DaskReplica(BaseSchemaModel):
|
|
40
40
|
|
41
41
|
```python
|
42
42
|
>>> from polyaxon.schemas import V1Environment, V1Init, V1DaskReplica
|
43
|
-
>>> from polyaxon
|
43
|
+
>>> from polyaxon import k8s
|
44
44
|
>>> replica = V1DaskReplica(
|
45
45
|
>>> replicas=2,
|
46
46
|
>>> environment=V1Environment(...),
|
47
47
|
>>> init=[V1Init(...)],
|
48
|
-
>>> sidecars=[
|
49
|
-
>>> container=
|
48
|
+
>>> sidecars=[k8s.V1Container(...)],
|
49
|
+
>>> container=k8s.V1Container(...),
|
50
50
|
>>> )
|
51
51
|
```
|
52
52
|
|
polyaxon/_flow/run/enums.py
CHANGED
@@ -21,6 +21,10 @@ class V1RunKind(str, PEnum):
|
|
21
21
|
CLEANER = "cleaner"
|
22
22
|
BUILDER = "builder"
|
23
23
|
|
24
|
+
@classmethod
|
25
|
+
def has_pipeline(cls, kind: str):
|
26
|
+
return kind in (cls.DAG, cls.MATRIX, cls.SCHEDULE, cls.TUNER)
|
27
|
+
|
24
28
|
@classmethod
|
25
29
|
def has_service(cls, kind: str):
|
26
30
|
return kind in (
|
@@ -56,6 +60,7 @@ class V1RunEdgeKind(str, PEnum):
|
|
56
60
|
RUN = "run"
|
57
61
|
TB = "tb"
|
58
62
|
BUILD = "build"
|
63
|
+
MANUAL = "manual"
|
59
64
|
|
60
65
|
|
61
66
|
class V1RunPending(str, PEnum):
|
polyaxon/_flow/run/job.py
CHANGED
@@ -45,14 +45,14 @@ class V1Job(BaseRun, DestinationImageMixin):
|
|
45
45
|
|
46
46
|
```python
|
47
47
|
>>> from polyaxon.schemas import V1Environment, V1Init, V1Job
|
48
|
-
>>> from polyaxon
|
48
|
+
>>> from polyaxon import k8s
|
49
49
|
>>> job = V1Job(
|
50
50
|
>>> environment=V1Environment(...),
|
51
51
|
>>> connections=["connection-name1"],
|
52
|
-
>>> volumes=[
|
52
|
+
>>> volumes=[k8s.V1Volume(...)],
|
53
53
|
>>> init=[V1Init(...)],
|
54
|
-
>>> sidecars=[
|
55
|
-
>>> container=
|
54
|
+
>>> sidecars=[k8s.V1Container(...)],
|
55
|
+
>>> container=k8s.V1Container(...),
|
56
56
|
>>> )
|
57
57
|
```
|
58
58
|
|
@@ -20,7 +20,7 @@ class V1MPIJob(BaseRun, DestinationImageMixin):
|
|
20
20
|
Args:
|
21
21
|
kind: str, should be equal `mpijob`
|
22
22
|
clean_pod_policy: str, one of [`All`, `Running`, `None`]
|
23
|
-
scheduling_policy: [V1SchedulingPolicy](/docs/experimentation/distributed/scheduling-policy/), optional # noqa
|
23
|
+
scheduling_policy: [V1SchedulingPolicy](/docs/experimentation/distributed/kubeflow-scheduling-policy/), optional # noqa
|
24
24
|
slots_per_worker: int, optional
|
25
25
|
launcher: [V1KFReplica](/docs/experimentation/distributed/kubeflow-replica/), optional
|
26
26
|
worker: [V1KFReplica](/docs/experimentation/distributed/kubeflow-replica/), optional
|
@@ -41,7 +41,6 @@ class V1MPIJob(BaseRun, DestinationImageMixin):
|
|
41
41
|
|
42
42
|
```python
|
43
43
|
>>> from polyaxon.schemas import V1KFReplica, V1MPIJob
|
44
|
-
>>> from polyaxon.k8s import k8s_schemas
|
45
44
|
>>> mpi_job = V1MPIJob(
|
46
45
|
>>> clean_pod_policy='All',
|
47
46
|
>>> launcher=V1KFReplica(...),
|
@@ -26,7 +26,7 @@ class V1MXJob(BaseRun, DestinationImageMixin):
|
|
26
26
|
Args:
|
27
27
|
kind: str, should be equal `mxjob`
|
28
28
|
clean_pod_policy: str, one of [`All`, `Running`, `None`]
|
29
|
-
scheduling_policy: [V1SchedulingPolicy](/docs/experimentation/distributed/scheduling-policy/), optional # noqa
|
29
|
+
scheduling_policy: [V1SchedulingPolicy](/docs/experimentation/distributed/kubeflow-scheduling-policy/), optional # noqa
|
30
30
|
mode: str, one of [`MXTrain`, `MXTune`]
|
31
31
|
scheduler: [V1KFReplica](/docs/experimentation/distributed/kubeflow-replica/), optional
|
32
32
|
server: [V1KFReplica](/docs/experimentation/distributed/kubeflow-replica/), optional
|
@@ -55,7 +55,6 @@ class V1MXJob(BaseRun, DestinationImageMixin):
|
|
55
55
|
|
56
56
|
```python
|
57
57
|
>>> from polyaxon.schemas import V1KFReplica, V1MXJob
|
58
|
-
>>> from polyaxon.k8s import k8s_schemas
|
59
58
|
>>> mx_job = V1MXJob(
|
60
59
|
>>> clean_pod_policy='All',
|
61
60
|
>>> scheduler=V1KFReplica(...),
|
@@ -1,8 +1,8 @@
|
|
1
|
-
from typing import Optional, Union
|
1
|
+
from typing import Dict, List, Optional, Union
|
2
2
|
from typing_extensions import Literal
|
3
3
|
|
4
4
|
from clipped.compact.pydantic import Field
|
5
|
-
from clipped.types.ref_or_obj import RefField
|
5
|
+
from clipped.types.ref_or_obj import IntOrRef, RefField
|
6
6
|
|
7
7
|
from polyaxon._flow.run.base import BaseRun
|
8
8
|
from polyaxon._flow.run.enums import V1RunKind
|
@@ -12,6 +12,25 @@ from polyaxon._flow.run.kubeflow.scheduling_policy import V1SchedulingPolicy
|
|
12
12
|
from polyaxon._flow.run.resources import V1RunResources
|
13
13
|
from polyaxon._flow.run.utils import DestinationImageMixin
|
14
14
|
from polyaxon._k8s.k8s_schemas import V1Container
|
15
|
+
from polyaxon._schemas.base import BaseSchemaModel
|
16
|
+
|
17
|
+
|
18
|
+
class V1PaddleElasticPolicy(BaseSchemaModel):
|
19
|
+
"""Elastic policy for Paddle distributed runs.
|
20
|
+
|
21
|
+
Args:
|
22
|
+
minReplicas: int, optional
|
23
|
+
maxReplicas: int, optional
|
24
|
+
maxRestarts: int, optional
|
25
|
+
metrics: List[Dict], optional
|
26
|
+
"""
|
27
|
+
|
28
|
+
_IDENTIFIER = "elasticPolicy"
|
29
|
+
|
30
|
+
min_replicas: Optional[IntOrRef] = Field(alias="minReplicas")
|
31
|
+
max_replicas: Optional[IntOrRef] = Field(alias="maxReplicas")
|
32
|
+
max_restarts: Optional[IntOrRef] = Field(alias="maxRestarts")
|
33
|
+
metrics: Optional[List[Dict]] = Field(alias="Metrics")
|
15
34
|
|
16
35
|
|
17
36
|
class V1PaddleJob(BaseRun, DestinationImageMixin):
|
@@ -21,7 +40,7 @@ class V1PaddleJob(BaseRun, DestinationImageMixin):
|
|
21
40
|
Args:
|
22
41
|
kind: str, should be equal `paddlejob`
|
23
42
|
clean_pod_policy: str, one of [`All`, `Running`, `None`]
|
24
|
-
scheduling_policy: [V1SchedulingPolicy](/docs/experimentation/distributed/scheduling-policy/), optional # noqa
|
43
|
+
scheduling_policy: [V1SchedulingPolicy](/docs/experimentation/distributed/kubeflow-scheduling-policy/), optional # noqa
|
25
44
|
master: [V1KFReplica](/docs/experimentation/distributed/kubeflow-replica/), optional
|
26
45
|
worker: [V1KFReplica](/docs/experimentation/distributed/kubeflow-replica/), optional
|
27
46
|
|
@@ -40,7 +59,6 @@ class V1PaddleJob(BaseRun, DestinationImageMixin):
|
|
40
59
|
|
41
60
|
```python
|
42
61
|
>>> from polyaxon.schemas import V1KFReplica, V1PaddleJob
|
43
|
-
>>> from polyaxon.k8s import k8s_schemas
|
44
62
|
>>> paddle_job = V1PaddleJob(
|
45
63
|
>>> clean_pod_policy='All',
|
46
64
|
>>> masterf=V1KFReplica(...),
|
@@ -89,6 +107,18 @@ class V1PaddleJob(BaseRun, DestinationImageMixin):
|
|
89
107
|
>>> ...
|
90
108
|
```
|
91
109
|
|
110
|
+
### elasticPolicy
|
111
|
+
|
112
|
+
Elastic policy for Paddle distributed runs.
|
113
|
+
|
114
|
+
```yaml
|
115
|
+
>>> run:
|
116
|
+
>>> kind: paddlejob
|
117
|
+
>>> elasticPolicy:
|
118
|
+
>>> ...
|
119
|
+
>>> ...
|
120
|
+
```
|
121
|
+
|
92
122
|
### master
|
93
123
|
|
94
124
|
The ,aster is responsible for orchestrating training and performing
|
@@ -125,6 +155,7 @@ class V1PaddleJob(BaseRun, DestinationImageMixin):
|
|
125
155
|
kind: Literal[_IDENTIFIER] = _IDENTIFIER
|
126
156
|
clean_pod_policy: Optional[V1CleanPodPolicy] = Field(alias="cleanPodPolicy")
|
127
157
|
scheduling_policy: Optional[V1SchedulingPolicy] = Field(alias="schedulingPolicy")
|
158
|
+
elastic_policy: Optional[V1PaddleElasticPolicy] = Field(alias="elasticPolicy")
|
128
159
|
master: Optional[Union[V1KFReplica, RefField]]
|
129
160
|
worker: Optional[Union[V1KFReplica, RefField]]
|
130
161
|
|
@@ -1,8 +1,8 @@
|
|
1
|
-
from typing import Optional, Union
|
1
|
+
from typing import Dict, List, Optional, Union
|
2
2
|
from typing_extensions import Literal
|
3
3
|
|
4
|
-
from clipped.compact.pydantic import Field
|
5
|
-
from clipped.types.ref_or_obj import RefField
|
4
|
+
from clipped.compact.pydantic import Field, StrictStr
|
5
|
+
from clipped.types.ref_or_obj import BoolOrRef, IntOrRef, RefField
|
6
6
|
|
7
7
|
from polyaxon._flow.run.base import BaseRun
|
8
8
|
from polyaxon._flow.run.enums import V1RunKind
|
@@ -12,6 +12,39 @@ from polyaxon._flow.run.kubeflow.scheduling_policy import V1SchedulingPolicy
|
|
12
12
|
from polyaxon._flow.run.resources import V1RunResources
|
13
13
|
from polyaxon._flow.run.utils import DestinationImageMixin
|
14
14
|
from polyaxon._k8s.k8s_schemas import V1Container
|
15
|
+
from polyaxon._schemas.base import BaseSchemaModel
|
16
|
+
|
17
|
+
|
18
|
+
class V1PytorchElasticPolicy(BaseSchemaModel):
|
19
|
+
"""Elastic policy for Pytorch distributed runs.
|
20
|
+
|
21
|
+
Args:
|
22
|
+
min_replicas: int, optional
|
23
|
+
max_replicas: int, optional
|
24
|
+
rdvz_backend: str, optional
|
25
|
+
rdvz_port: int, optional
|
26
|
+
rdvz_host: str, optional
|
27
|
+
rdvz_id: str, optional
|
28
|
+
rdvz_conf: List[Dict], optional
|
29
|
+
standalone: bool, optional
|
30
|
+
n_proc_per_node: int, optional
|
31
|
+
max_restarts: int, optional
|
32
|
+
metrics: List[Dict], optional
|
33
|
+
"""
|
34
|
+
|
35
|
+
_IDENTIFIER = "elasticPolicy"
|
36
|
+
|
37
|
+
min_replicas: Optional[IntOrRef] = Field(alias="minReplicas")
|
38
|
+
max_replicas: Optional[IntOrRef] = Field(alias="maxReplicas")
|
39
|
+
rdvz_backend: Optional[StrictStr] = Field(alias="rdvzBackend")
|
40
|
+
rdvz_port: Optional[IntOrRef] = Field(alias="rdvzPort")
|
41
|
+
rdvz_host: Optional[StrictStr] = Field(alias="rdvzHost")
|
42
|
+
rdvz_id: Optional[StrictStr] = Field(alias="rdvzId")
|
43
|
+
rdvz_conf: Optional[List[Dict]] = Field(alias="rdvzConf")
|
44
|
+
standalone: Optional[BoolOrRef]
|
45
|
+
n_proc_per_node: Optional[IntOrRef] = Field(alias="nProcPerNode")
|
46
|
+
max_restarts: Optional[IntOrRef] = Field(alias="maxRestarts")
|
47
|
+
metrics: Optional[List[Dict]] = Field(alias="Metrics")
|
15
48
|
|
16
49
|
|
17
50
|
class V1PytorchJob(BaseRun, DestinationImageMixin):
|
@@ -20,7 +53,7 @@ class V1PytorchJob(BaseRun, DestinationImageMixin):
|
|
20
53
|
Args:
|
21
54
|
kind: str, should be equal `pytorchjob`
|
22
55
|
clean_pod_policy: str, one of [`All`, `Running`, `None`]
|
23
|
-
scheduling_policy: [V1SchedulingPolicy](/docs/experimentation/distributed/scheduling-policy/), optional # noqa
|
56
|
+
scheduling_policy: [V1SchedulingPolicy](/docs/experimentation/distributed/kubeflow-scheduling-policy/), optional # noqa
|
24
57
|
master: [V1KFReplica](/docs/experimentation/distributed/kubeflow-replica/), optional
|
25
58
|
worker: [V1KFReplica](/docs/experimentation/distributed/kubeflow-replica/), optional
|
26
59
|
|
@@ -39,7 +72,6 @@ class V1PytorchJob(BaseRun, DestinationImageMixin):
|
|
39
72
|
|
40
73
|
```python
|
41
74
|
>>> from polyaxon.schemas import V1KFReplica, V1PytorchJob
|
42
|
-
>>> from polyaxon.k8s import k8s_schemas
|
43
75
|
>>> pytorch_job = V1PytorchJob(
|
44
76
|
>>> clean_pod_policy='All',
|
45
77
|
>>> master=V1KFReplica(...),
|
@@ -89,6 +121,18 @@ class V1PytorchJob(BaseRun, DestinationImageMixin):
|
|
89
121
|
>>> ...
|
90
122
|
```
|
91
123
|
|
124
|
+
### elasticPolicy
|
125
|
+
|
126
|
+
ElasticPolicy encapsulates various policies for elastic distributed training job.
|
127
|
+
|
128
|
+
```yaml
|
129
|
+
>>> run:
|
130
|
+
>>> kind: pytorchjob
|
131
|
+
>>> elasticPolicy:
|
132
|
+
>>> ...
|
133
|
+
>>> ...
|
134
|
+
```
|
135
|
+
|
92
136
|
### master
|
93
137
|
|
94
138
|
The master replica in the distributed PytorchJob
|
@@ -123,6 +167,8 @@ class V1PytorchJob(BaseRun, DestinationImageMixin):
|
|
123
167
|
kind: Literal[_IDENTIFIER] = _IDENTIFIER
|
124
168
|
clean_pod_policy: Optional[V1CleanPodPolicy] = Field(alias="cleanPodPolicy")
|
125
169
|
scheduling_policy: Optional[V1SchedulingPolicy] = Field(alias="schedulingPolicy")
|
170
|
+
elastic_policy: Optional[V1PytorchElasticPolicy] = Field(alias="elasticPolicy")
|
171
|
+
n_proc_per_node: Optional[IntOrRef] = Field(alias="nProcPerNode")
|
126
172
|
master: Optional[Union[V1KFReplica, RefField]]
|
127
173
|
worker: Optional[Union[V1KFReplica, RefField]]
|
128
174
|
|
@@ -42,15 +42,15 @@ class V1KFReplica(BaseSchemaModel):
|
|
42
42
|
|
43
43
|
```python
|
44
44
|
>>> from polyaxon.schemas import V1Environment, V1Init, V1KFReplica
|
45
|
-
>>> from polyaxon
|
45
|
+
>>> from polyaxon import k8s
|
46
46
|
>>> replica = V1KFReplica(
|
47
47
|
>>> replicas=2,
|
48
48
|
>>> environment=V1Environment(...),
|
49
49
|
>>> connections=["connection-name1"],
|
50
|
-
>>> volumes=[
|
50
|
+
>>> volumes=[k8s.V1Volume(...)],
|
51
51
|
>>> init=[V1Init(...)],
|
52
|
-
>>> sidecars=[
|
53
|
-
>>> container=
|
52
|
+
>>> sidecars=[k8s.V1Container(...)],
|
53
|
+
>>> container=k8s.V1Container(...),
|
54
54
|
>>> )
|
55
55
|
```
|
56
56
|
|
@@ -7,8 +7,20 @@ from polyaxon._schemas.base import BaseSchemaModel
|
|
7
7
|
|
8
8
|
|
9
9
|
class V1SchedulingPolicy(BaseSchemaModel):
|
10
|
+
"""Scheduling policy for Kubeflow distributed runs.
|
11
|
+
|
12
|
+
Args:
|
13
|
+
min_available: int, optional
|
14
|
+
queue: str, optional
|
15
|
+
min_resources: int, optional
|
16
|
+
priority_class: str, optional
|
17
|
+
schedule_timeout_seconds: int, optional
|
18
|
+
"""
|
19
|
+
|
10
20
|
_IDENTIFIER = "schedulingPolicy"
|
11
21
|
|
12
22
|
min_available: Optional[IntOrRef] = Field(alias="minAvailable")
|
13
23
|
queue: Optional[StrictStr]
|
24
|
+
min_resources: Optional[IntOrRef] = Field(alias="minResources")
|
14
25
|
priority_class: Optional[StrictStr] = Field(alias="priorityClass")
|
26
|
+
schedule_timeout_seconds: Optional[IntOrRef] = Field(alias="scheduleTimeoutSeconds")
|
@@ -20,7 +20,7 @@ class V1TFJob(BaseRun, DestinationImageMixin):
|
|
20
20
|
Args:
|
21
21
|
kind: str, should be equal `tfjob`
|
22
22
|
clean_pod_policy: str, one of [`All`, `Running`, `None`]
|
23
|
-
scheduling_policy: [V1SchedulingPolicy](/docs/experimentation/distributed/scheduling-policy/), optional # noqa
|
23
|
+
scheduling_policy: [V1SchedulingPolicy](/docs/experimentation/distributed/kubeflow-scheduling-policy/), optional # noqa
|
24
24
|
enable_dynamic_worker: boolean
|
25
25
|
chief: [V1KFReplica](/docs/experimentation/distributed/kubeflow-replica/), optional
|
26
26
|
ps: [V1KFReplica](/docs/experimentation/distributed/kubeflow-replica/), optional
|
@@ -45,7 +45,6 @@ class V1TFJob(BaseRun, DestinationImageMixin):
|
|
45
45
|
|
46
46
|
```python
|
47
47
|
>>> from polyaxon.schemas import V1KFReplica, V1TFJob
|
48
|
-
>>> from polyaxon.k8s import k8s_schemas
|
49
48
|
>>> tf_job = V1TFJob(
|
50
49
|
>>> clean_pod_policy='All',
|
51
50
|
>>> chief=V1KFReplica(...),
|
@@ -173,8 +172,9 @@ class V1TFJob(BaseRun, DestinationImageMixin):
|
|
173
172
|
|
174
173
|
kind: Literal[_IDENTIFIER] = _IDENTIFIER
|
175
174
|
clean_pod_policy: Optional[V1CleanPodPolicy] = Field(alias="cleanPodPolicy")
|
176
|
-
enable_dynamic_worker: Optional[bool] = Field(alias="enableDynamicWorker")
|
177
175
|
scheduling_policy: Optional[V1SchedulingPolicy] = Field(alias="schedulingPolicy")
|
176
|
+
enable_dynamic_worker: Optional[bool] = Field(alias="enableDynamicWorker")
|
177
|
+
success_policy: Optional[str] = Field(alias="successPolicy")
|
178
178
|
chief: Optional[Union[V1KFReplica, RefField]]
|
179
179
|
ps: Optional[Union[V1KFReplica, RefField]]
|
180
180
|
worker: Optional[Union[V1KFReplica, RefField]]
|
@@ -20,7 +20,7 @@ class V1XGBoostJob(BaseRun, DestinationImageMixin):
|
|
20
20
|
Args:
|
21
21
|
kind: str, should be equal `xgbjob`
|
22
22
|
clean_pod_policy: str, one of [`All`, `Running`, `None`]
|
23
|
-
scheduling_policy: [V1SchedulingPolicy](/docs/experimentation/distributed/scheduling-policy/), optional # noqa
|
23
|
+
scheduling_policy: [V1SchedulingPolicy](/docs/experimentation/distributed/kubeflow-scheduling-policy/), optional # noqa
|
24
24
|
master: [V1KFReplica](/docs/experimentation/distributed/kubeflow-replica/), optional
|
25
25
|
worker: [V1KFReplica](/docs/experimentation/distributed/kubeflow-replica/), optional
|
26
26
|
|
@@ -39,7 +39,6 @@ class V1XGBoostJob(BaseRun, DestinationImageMixin):
|
|
39
39
|
|
40
40
|
```python
|
41
41
|
>>> from polyaxon.schemas import V1KFReplica, V1XGBoostJob
|
42
|
-
>>> from polyaxon.k8s import k8s_schemas
|
43
42
|
>>> xgb_job = V1XGBoostJob(
|
44
43
|
>>> clean_pod_policy='All',
|
45
44
|
>>> master=V1KFReplica(...),
|
polyaxon/_flow/run/ray/ray.py
CHANGED
@@ -46,10 +46,9 @@ class V1RayJob(BaseRun, DestinationImageMixin):
|
|
46
46
|
|
47
47
|
```python
|
48
48
|
>>> from polyaxon.schemas import V1Environment, V1Init, V1RayJob, V1RayReplica
|
49
|
-
>>>
|
50
|
-
>>> ray_job = V1Ray(
|
49
|
+
>>> ray_job = V1RayJob(
|
51
50
|
>>> connections=["connection-name1"],
|
52
|
-
>>> volumes=[
|
51
|
+
>>> volumes=[k8s.V1Volume(...)],
|
53
52
|
>>> ray_version="2.5.0",
|
54
53
|
>>> head=V1RayReplica(...),
|
55
54
|
>>> worker=V1RayReplica(...),
|
@@ -46,13 +46,13 @@ class V1RayReplica(BaseSchemaModel):
|
|
46
46
|
|
47
47
|
```python
|
48
48
|
>>> from polyaxon.schemas import V1Environment, V1Init, V1RayReplica
|
49
|
-
>>> from polyaxon
|
49
|
+
>>> from polyaxon import k8s
|
50
50
|
>>> replica = V1RayReplica(
|
51
51
|
>>> replicas=2,
|
52
52
|
>>> environment=V1Environment(...),
|
53
53
|
>>> init=[V1Init(...)],
|
54
|
-
>>> sidecars=[
|
55
|
-
>>> container=
|
54
|
+
>>> sidecars=[k8s.V1Container(...)],
|
55
|
+
>>> container=k8s.V1Container(...),
|
56
56
|
>>> )
|
57
57
|
```
|
58
58
|
|
polyaxon/_flow/run/service.py
CHANGED
@@ -48,14 +48,14 @@ class V1Service(V1Job):
|
|
48
48
|
|
49
49
|
```python
|
50
50
|
>>> from polyaxon.schemas import V1Environment, V1Init, V1Service
|
51
|
-
>>> from polyaxon
|
51
|
+
>>> from polyaxon import k8s
|
52
52
|
>>> service = V1Service(
|
53
53
|
>>> environment=V1Environment(...),
|
54
54
|
>>> connections=["connection-name1"],
|
55
|
-
>>> volumes=[
|
55
|
+
>>> volumes=[k8s.V1Volume(...)],
|
56
56
|
>>> init=[V1Init(...)],
|
57
|
-
>>> sidecars=[
|
58
|
-
>>> container=
|
57
|
+
>>> sidecars=[k8s.V1Container(...)],
|
58
|
+
>>> container=k8s.V1Container(...),
|
59
59
|
>>> ports=[6006],
|
60
60
|
>>> rewritePath=True,
|
61
61
|
>>> )
|
polyaxon/_fs/fs.py
CHANGED
@@ -56,7 +56,7 @@ def _get_fs_from_connection(
|
|
56
56
|
|
57
57
|
async def get_async_fs_from_connection(connection: Optional[V1Connection], **kwargs):
|
58
58
|
fs = _get_fs_from_connection(connection=connection, asynchronous=True, **kwargs)
|
59
|
-
if fs
|
59
|
+
if hasattr(fs, "async_impl") and hasattr(fs, "set_session"):
|
60
60
|
await fs.set_session()
|
61
61
|
return fs
|
62
62
|
|
@@ -72,13 +72,18 @@ def get_fs_from_name(connection_name: str, asynchronous: bool = False, **kwargs)
|
|
72
72
|
)
|
73
73
|
|
74
74
|
|
75
|
+
def get_sync_default_fs(**kwargs):
|
76
|
+
connection = get_artifacts_connection()
|
77
|
+
return get_sync_fs_from_connection(connection=connection, **kwargs)
|
78
|
+
|
79
|
+
|
75
80
|
async def get_default_fs(**kwargs):
|
76
81
|
connection = get_artifacts_connection()
|
77
82
|
return await get_async_fs_from_connection(connection=connection, **kwargs)
|
78
83
|
|
79
84
|
|
80
85
|
async def close_fs(fs):
|
81
|
-
if hasattr(fs.session, "close"):
|
86
|
+
if hasattr(fs, "session") and hasattr(fs.session, "close"):
|
82
87
|
try:
|
83
88
|
await fs.session.close()
|
84
89
|
except: # noqa
|
polyaxon/_fs/utils.py
CHANGED
@@ -2,14 +2,15 @@ import os
|
|
2
2
|
|
3
3
|
from typing import Optional
|
4
4
|
|
5
|
-
from polyaxon._env_vars.keys import
|
5
|
+
from polyaxon._env_vars.keys import ENV_KEYS_SERVICE_MODE
|
6
6
|
from polyaxon._schemas.lifecycle import V1ProjectFeature
|
7
|
+
from polyaxon._services import PolyaxonServices
|
7
8
|
|
8
9
|
|
9
10
|
def get_store_path(store_path: str, subpath: str, entity: Optional[str] = None) -> str:
|
10
11
|
full_path = store_path
|
11
12
|
|
12
|
-
if os.environ.get(
|
13
|
+
if os.environ.get(ENV_KEYS_SERVICE_MODE) == PolyaxonServices.VIEWER:
|
13
14
|
dir_path = "runs" if entity == V1ProjectFeature.RUNTIME else f"{entity}s"
|
14
15
|
full_path = os.path.join(full_path, dir_path)
|
15
16
|
|
@@ -2,6 +2,7 @@ import copy
|
|
2
2
|
|
3
3
|
from typing import Dict, Iterable, List, Optional
|
4
4
|
|
5
|
+
from clipped.utils.enums import get_enum_value
|
5
6
|
from clipped.utils.sanitizers import sanitize_string_dict
|
6
7
|
from clipped.utils.strings import slugify
|
7
8
|
|
@@ -85,7 +86,7 @@ class BaseConverter(
|
|
85
86
|
"operation.polyaxon.com/name": self.run_name,
|
86
87
|
"operation.polyaxon.com/owner": self.owner_name,
|
87
88
|
"operation.polyaxon.com/project": self.project_name,
|
88
|
-
"operation.polyaxon.com/kind": self.K8S_ANNOTATIONS_KIND,
|
89
|
+
"operation.polyaxon.com/kind": get_enum_value(self.K8S_ANNOTATIONS_KIND),
|
89
90
|
}
|
90
91
|
|
91
92
|
def get_annotations(
|