polyaxon 2.0.0rc49__py3-none-any.whl → 2.4.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. polyaxon/_auxiliaries/cleaner.py +8 -3
  2. polyaxon/_auxiliaries/init.py +7 -2
  3. polyaxon/_auxiliaries/notifier.py +8 -2
  4. polyaxon/_auxiliaries/sidecar.py +30 -2
  5. polyaxon/_cli/artifacts.py +96 -11
  6. polyaxon/_cli/components.py +96 -11
  7. polyaxon/_cli/config.py +118 -22
  8. polyaxon/_cli/dashboard.py +15 -2
  9. polyaxon/_cli/init.py +1 -1
  10. polyaxon/_cli/models.py +96 -11
  11. polyaxon/_cli/operations.py +267 -90
  12. polyaxon/_cli/project_versions.py +139 -6
  13. polyaxon/_cli/projects.py +23 -9
  14. polyaxon/_cli/run.py +37 -9
  15. polyaxon/_cli/services/agent.py +2 -2
  16. polyaxon/_cli/services/clean_artifacts.py +1 -1
  17. polyaxon/_cli/services/sidecar.py +8 -1
  18. polyaxon/_client/client.py +17 -0
  19. polyaxon/_client/mixin.py +39 -0
  20. polyaxon/_client/project.py +218 -23
  21. polyaxon/_client/run.py +131 -33
  22. polyaxon/_compiler/contexts/contexts.py +2 -2
  23. polyaxon/_compiler/contexts/ray_job.py +4 -2
  24. polyaxon/_compiler/resolver/agent.py +12 -2
  25. polyaxon/_compiler/resolver/runtime.py +2 -2
  26. polyaxon/_contexts/paths.py +4 -7
  27. polyaxon/_deploy/operators/compose.py +1 -27
  28. polyaxon/_deploy/schemas/deployment.py +4 -1
  29. polyaxon/_deploy/schemas/intervals.py +0 -7
  30. polyaxon/_deploy/schemas/proxy.py +1 -0
  31. polyaxon/_deploy/schemas/service.py +11 -1
  32. polyaxon/_docker/converter/base/base.py +8 -0
  33. polyaxon/_docker/executor.py +10 -4
  34. polyaxon/_env_vars/getters/owner_entity.py +4 -2
  35. polyaxon/_env_vars/getters/project.py +4 -2
  36. polyaxon/_env_vars/getters/run.py +5 -2
  37. polyaxon/_env_vars/keys.py +7 -1
  38. polyaxon/_flow/__init__.py +2 -0
  39. polyaxon/_flow/builds/__init__.py +19 -6
  40. polyaxon/_flow/component/base.py +1 -0
  41. polyaxon/_flow/component/component.py +14 -0
  42. polyaxon/_flow/environment/__init__.py +8 -8
  43. polyaxon/_flow/hooks/__init__.py +19 -6
  44. polyaxon/_flow/init/__init__.py +6 -6
  45. polyaxon/_flow/matrix/iterative.py +0 -1
  46. polyaxon/_flow/matrix/tuner.py +18 -6
  47. polyaxon/_flow/operations/operation.py +44 -17
  48. polyaxon/_flow/plugins/__init__.py +6 -0
  49. polyaxon/_flow/run/__init__.py +2 -2
  50. polyaxon/_flow/run/dag.py +2 -2
  51. polyaxon/_flow/run/dask/dask.py +0 -1
  52. polyaxon/_flow/run/dask/replica.py +3 -3
  53. polyaxon/_flow/run/enums.py +5 -0
  54. polyaxon/_flow/run/job.py +4 -4
  55. polyaxon/_flow/run/kubeflow/mpi_job.py +1 -2
  56. polyaxon/_flow/run/kubeflow/mx_job.py +1 -2
  57. polyaxon/_flow/run/kubeflow/paddle_job.py +35 -4
  58. polyaxon/_flow/run/kubeflow/pytorch_job.py +51 -5
  59. polyaxon/_flow/run/kubeflow/replica.py +4 -4
  60. polyaxon/_flow/run/kubeflow/scheduling_policy.py +12 -0
  61. polyaxon/_flow/run/kubeflow/tf_job.py +3 -3
  62. polyaxon/_flow/run/kubeflow/xgboost_job.py +1 -2
  63. polyaxon/_flow/run/ray/ray.py +2 -3
  64. polyaxon/_flow/run/ray/replica.py +3 -3
  65. polyaxon/_flow/run/service.py +4 -4
  66. polyaxon/_fs/fs.py +7 -2
  67. polyaxon/_fs/utils.py +3 -2
  68. polyaxon/_k8s/converter/base/base.py +2 -1
  69. polyaxon/_k8s/converter/base/main.py +1 -0
  70. polyaxon/_k8s/converter/base/sidecar.py +16 -1
  71. polyaxon/_k8s/converter/common/accelerators.py +7 -4
  72. polyaxon/_k8s/converter/converters/job.py +1 -1
  73. polyaxon/_k8s/converter/converters/kubeflow/paddle_job.py +1 -0
  74. polyaxon/_k8s/converter/converters/kubeflow/pytroch_job.py +2 -0
  75. polyaxon/_k8s/converter/converters/kubeflow/tf_job.py +1 -0
  76. polyaxon/_k8s/converter/converters/ray_job.py +4 -2
  77. polyaxon/_k8s/custom_resources/dask_job.py +3 -0
  78. polyaxon/_k8s/custom_resources/kubeflow/common.py +4 -1
  79. polyaxon/_k8s/custom_resources/kubeflow/paddle_job.py +10 -1
  80. polyaxon/_k8s/custom_resources/kubeflow/pytorch_job.py +14 -1
  81. polyaxon/_k8s/custom_resources/kubeflow/tf_job.py +4 -0
  82. polyaxon/_k8s/custom_resources/ray_job.py +3 -0
  83. polyaxon/_k8s/custom_resources/setter.py +1 -1
  84. polyaxon/_k8s/executor/async_executor.py +2 -0
  85. polyaxon/_k8s/executor/base.py +23 -6
  86. polyaxon/_k8s/logging/async_monitor.py +150 -5
  87. polyaxon/_k8s/manager/async_manager.py +96 -23
  88. polyaxon/_k8s/manager/base.py +4 -0
  89. polyaxon/_k8s/manager/manager.py +282 -134
  90. polyaxon/_local_process/__init__.py +0 -0
  91. polyaxon/_local_process/agent.py +6 -0
  92. polyaxon/_local_process/converter/__init__.py +1 -0
  93. polyaxon/_local_process/converter/base/__init__.py +1 -0
  94. polyaxon/_local_process/converter/base/base.py +140 -0
  95. polyaxon/_local_process/converter/base/containers.py +69 -0
  96. polyaxon/_local_process/converter/base/env_vars.py +253 -0
  97. polyaxon/_local_process/converter/base/init.py +414 -0
  98. polyaxon/_local_process/converter/base/main.py +74 -0
  99. polyaxon/_local_process/converter/base/mounts.py +82 -0
  100. polyaxon/_local_process/converter/converters/__init__.py +8 -0
  101. polyaxon/_local_process/converter/converters/job.py +40 -0
  102. polyaxon/_local_process/converter/converters/service.py +41 -0
  103. polyaxon/_local_process/converter/mixins.py +38 -0
  104. polyaxon/_local_process/executor.py +132 -0
  105. polyaxon/_local_process/process_types.py +39 -0
  106. polyaxon/_managers/agent.py +2 -0
  107. polyaxon/_managers/home.py +2 -1
  108. polyaxon/_operations/tuner.py +1 -0
  109. polyaxon/_polyaxonfile/check.py +2 -0
  110. polyaxon/_polyaxonfile/manager/operations.py +3 -0
  111. polyaxon/_polyaxonfile/manager/workflows.py +2 -0
  112. polyaxon/_polyaxonfile/specs/compiled_operation.py +1 -0
  113. polyaxon/_polyaxonfile/specs/operation.py +1 -0
  114. polyaxon/_polyaxonfile/specs/sections.py +3 -0
  115. polyaxon/_pql/manager.py +1 -1
  116. polyaxon/_runner/agent/async_agent.py +97 -21
  117. polyaxon/_runner/agent/base_agent.py +27 -9
  118. polyaxon/_runner/agent/client.py +15 -1
  119. polyaxon/_runner/agent/sync_agent.py +85 -20
  120. polyaxon/_runner/converter/converter.py +6 -2
  121. polyaxon/_runner/executor.py +13 -7
  122. polyaxon/_schemas/agent.py +27 -1
  123. polyaxon/_schemas/client.py +30 -3
  124. polyaxon/_schemas/installation.py +4 -3
  125. polyaxon/_schemas/lifecycle.py +10 -5
  126. polyaxon/_schemas/log_handler.py +2 -3
  127. polyaxon/_schemas/types/artifacts.py +3 -3
  128. polyaxon/_schemas/types/dockerfile.py +3 -3
  129. polyaxon/_schemas/types/file.py +3 -3
  130. polyaxon/_schemas/types/git.py +3 -3
  131. polyaxon/_schemas/types/tensorboard.py +3 -3
  132. polyaxon/_sdk/api/agents_v1_api.py +1076 -73
  133. polyaxon/_sdk/api/organizations_v1_api.py +371 -10
  134. polyaxon/_sdk/api/project_dashboards_v1_api.py +12 -12
  135. polyaxon/_sdk/api/project_searches_v1_api.py +12 -12
  136. polyaxon/_sdk/api/projects_v1_api.py +221 -44
  137. polyaxon/_sdk/api/runs_v1_api.py +917 -445
  138. polyaxon/_sdk/api/service_accounts_v1_api.py +16 -16
  139. polyaxon/_sdk/api/teams_v1_api.py +2827 -375
  140. polyaxon/_sdk/api/users_v1_api.py +231 -55
  141. polyaxon/_sdk/async_client/api_client.py +4 -0
  142. polyaxon/_sdk/schemas/__init__.py +10 -2
  143. polyaxon/_sdk/schemas/v1_agent.py +2 -1
  144. polyaxon/_sdk/schemas/v1_agent_reconcile_body_request.py +14 -0
  145. polyaxon/_sdk/schemas/v1_artifact_tree.py +1 -1
  146. polyaxon/_sdk/schemas/v1_dashboard_spec.py +4 -0
  147. polyaxon/_sdk/schemas/v1_events_response.py +4 -0
  148. polyaxon/_sdk/schemas/v1_organization.py +1 -0
  149. polyaxon/_sdk/schemas/v1_preset.py +8 -0
  150. polyaxon/_sdk/schemas/v1_project.py +1 -0
  151. polyaxon/_sdk/schemas/v1_project_settings.py +4 -2
  152. polyaxon/_sdk/schemas/v1_run.py +2 -2
  153. polyaxon/_sdk/schemas/v1_run_edge_lineage.py +14 -0
  154. polyaxon/_sdk/schemas/v1_run_edges_graph.py +9 -0
  155. polyaxon/_sdk/schemas/v1_section_spec.py +7 -2
  156. polyaxon/_sdk/schemas/v1_settings_catalog.py +1 -0
  157. polyaxon/_sdk/schemas/v1_team.py +3 -0
  158. polyaxon/_sdk/schemas/v1_user.py +1 -2
  159. polyaxon/_sdk/schemas/v1_user_access.py +17 -0
  160. polyaxon/_services/values.py +1 -0
  161. polyaxon/_sidecar/container/__init__.py +39 -18
  162. polyaxon/_sidecar/container/monitors/__init__.py +1 -0
  163. polyaxon/_sidecar/container/monitors/logs.py +10 -13
  164. polyaxon/_sidecar/container/monitors/spec.py +24 -0
  165. polyaxon/_sidecar/ignore.py +0 -1
  166. polyaxon/_utils/fqn_utils.py +25 -2
  167. polyaxon/client.py +1 -1
  168. polyaxon/pkg.py +1 -1
  169. polyaxon/schemas.py +8 -1
  170. polyaxon/settings.py +6 -0
  171. {polyaxon-2.0.0rc49.dist-info → polyaxon-2.4.0rc1.dist-info}/METADATA +43 -43
  172. {polyaxon-2.0.0rc49.dist-info → polyaxon-2.4.0rc1.dist-info}/RECORD +176 -155
  173. {polyaxon-2.0.0rc49.dist-info → polyaxon-2.4.0rc1.dist-info}/WHEEL +1 -1
  174. polyaxon/_sdk/schemas/v1_project_user_access.py +0 -10
  175. {polyaxon-2.0.0rc49.dist-info → polyaxon-2.4.0rc1.dist-info}/LICENSE +0 -0
  176. {polyaxon-2.0.0rc49.dist-info → polyaxon-2.4.0rc1.dist-info}/entry_points.txt +0 -0
  177. {polyaxon-2.0.0rc49.dist-info → polyaxon-2.4.0rc1.dist-info}/top_level.txt +0 -0
@@ -45,6 +45,7 @@ class V1Operation(BaseOp, TemplateMixinConfig):
45
45
  tags: List[str], optional
46
46
  presets: str, optional
47
47
  queue: str, optional
48
+ namespace: str, optional
48
49
  cache: [V1Cache](/docs/automation/helpers/cache/), optional
49
50
  termination: [V1Termination](/docs/core/specification/termination/), optional
50
51
  plugins: [V1Plugins](/docs/core/specification/plugins/), optional
@@ -81,6 +82,7 @@ class V1Operation(BaseOp, TemplateMixinConfig):
81
82
  >>> tags:
82
83
  >>> presets:
83
84
  >>> queue:
85
+ >>> namespace:
84
86
  >>> cache:
85
87
  >>> termination:
86
88
  >>> plugins:
@@ -111,6 +113,7 @@ class V1Operation(BaseOp, TemplateMixinConfig):
111
113
  >>> tags=["test"],
112
114
  >>> presets=["test"],
113
115
  >>> queue="test",
116
+ >>> namespace="test",
114
117
  >>> cache=V1Cache(...),
115
118
  >>> termination=V1Termination(...),
116
119
  >>> plugins=V1Plugins(...),
@@ -248,6 +251,20 @@ class V1Operation(BaseOp, TemplateMixinConfig):
248
251
  >>> queue: queue-name
249
252
  ```
250
253
 
254
+ ### namespace
255
+
256
+ > **Note**: Please note that this field is only available in some commercial editions.
257
+
258
+ The namespace to use for this operation run,
259
+ if provided, it will override the component's namespace otherwise
260
+ the namesace of the component will be used if it exists or
261
+ it will default to the agent's namespace.
262
+
263
+ ```yaml
264
+ >>> operation:
265
+ >>> namespace: polyaxon
266
+ ```
267
+
251
268
  ### cache
252
269
 
253
270
  The [cache](/docs/automation/helpers/cache/) to use for this operation run,
@@ -645,13 +662,16 @@ class V1Operation(BaseOp, TemplateMixinConfig):
645
662
  for k, v in contexts.items():
646
663
  params[k] = V1Param(value=v, context_only=True)
647
664
 
648
- return cls.construct(
649
- run_patch=run_patch,
650
- hub_ref=hook.hub_ref,
651
- presets=hook.presets,
652
- queue=hook.queue,
653
- params=params,
654
- )
665
+ content = {"run_patch": run_patch, "params": params}
666
+ if hook.hub_ref:
667
+ content["hub_ref"] = hook.hub_ref
668
+ if hook.presets:
669
+ content["presets"] = hook.presets
670
+ if hook.queue:
671
+ content["queue"] = hook.queue
672
+ if hook.namespace:
673
+ content["namespace"] = hook.namespace
674
+ return cls.construct(**content)
655
675
 
656
676
  @classmethod
657
677
  def from_build(cls, build: V1Build, contexts: Optional[Dict] = None):
@@ -667,16 +687,23 @@ class V1Operation(BaseOp, TemplateMixinConfig):
667
687
  if not destination.connection or build.connection:
668
688
  destination.connection = build.connection
669
689
  params["destination"] = destination
670
-
671
- return cls.construct(
672
- run_patch=build.run_patch,
673
- patch_strategy=build.patch_strategy,
674
- hub_ref=build.hub_ref,
675
- presets=build.presets,
676
- queue=build.queue,
677
- cache=build.cache,
678
- params=params,
679
- )
690
+ content = {
691
+ "run_patch": build.run_patch,
692
+ "patch_strategy": build.patch_strategy,
693
+ "params": params,
694
+ }
695
+ if build.hub_ref:
696
+ content["hub_ref"] = build.hub_ref
697
+ if build.presets:
698
+ content["presets"] = build.presets
699
+ if build.queue:
700
+ content["queue"] = build.queue
701
+ if build.namespace:
702
+ content["namespace"] = build.namespace
703
+ if build.cache:
704
+ content["cache"] = build.cache
705
+
706
+ return cls.construct(**content)
680
707
 
681
708
 
682
709
  PartialV1Operation = to_partial(V1Operation)
@@ -329,6 +329,12 @@ class V1Plugins(BaseSchemaModel):
329
329
  elif self.collect_logs is None:
330
330
  self.collect_logs = default
331
331
 
332
+ def set_collect_spec(self, default: bool = True):
333
+ if self.no_api():
334
+ self.collect_spec = False
335
+ elif self.collect_spec is None:
336
+ self.collect_spec = default
337
+
332
338
  def set_collect_resources(self, default: bool = True):
333
339
  if self.no_api():
334
340
  self.collect_resources = False
@@ -17,8 +17,8 @@ from polyaxon._flow.run.job import V1Job
17
17
  from polyaxon._flow.run.kubeflow.clean_pod_policy import V1CleanPodPolicy
18
18
  from polyaxon._flow.run.kubeflow.mpi_job import V1MPIJob
19
19
  from polyaxon._flow.run.kubeflow.mx_job import MXJobMode, V1MXJob
20
- from polyaxon._flow.run.kubeflow.paddle_job import V1PaddleJob
21
- from polyaxon._flow.run.kubeflow.pytorch_job import V1PytorchJob
20
+ from polyaxon._flow.run.kubeflow.paddle_job import V1PaddleElasticPolicy, V1PaddleJob
21
+ from polyaxon._flow.run.kubeflow.pytorch_job import V1PytorchElasticPolicy, V1PytorchJob
22
22
  from polyaxon._flow.run.kubeflow.replica import V1KFReplica
23
23
  from polyaxon._flow.run.kubeflow.scheduling_policy import V1SchedulingPolicy
24
24
  from polyaxon._flow.run.kubeflow.tf_job import V1TFJob
polyaxon/_flow/run/dag.py CHANGED
@@ -83,13 +83,13 @@ class V1Dag(BaseRun):
83
83
 
84
84
  ```python
85
85
  >>> from polyaxon.schemas import V1Dag, V1Component, V1Environment, V1Operation
86
- >>> from polyaxon.k8s import k8s_schemas
86
+ >>> from polyaxon import k8s
87
87
  >>> dag = V1Dag(
88
88
  >>> operations=[V1Operation(...)],
89
89
  >>> components=[V1Component(...), V1Component(...)],
90
90
  >>> environment=V1Environment(...),
91
91
  >>> connections=["connection-name1"],
92
- >>> volumes=[k8s_schemas.V1Volume(...)],
92
+ >>> volumes=[k8s.V1Volume(...)],
93
93
  >>> )
94
94
  ```
95
95
 
@@ -43,7 +43,6 @@ class V1DaskJob(BaseRun, DestinationImageMixin):
43
43
 
44
44
  ```python
45
45
  >>> from polyaxon.schemas import V1Environment, V1Init, V1DaskJob, V1DaskReplica
46
- >>> from polyaxon.k8s import k8s_schemas
47
46
  >>> dask_job = V1DaskJob(
48
47
  >>> job=V1DaskReplica(...),
49
48
  >>> worker=V1DaskReplica(...),
@@ -40,13 +40,13 @@ class V1DaskReplica(BaseSchemaModel):
40
40
 
41
41
  ```python
42
42
  >>> from polyaxon.schemas import V1Environment, V1Init, V1DaskReplica
43
- >>> from polyaxon.k8s import k8s_schemas
43
+ >>> from polyaxon import k8s
44
44
  >>> replica = V1DaskReplica(
45
45
  >>> replicas=2,
46
46
  >>> environment=V1Environment(...),
47
47
  >>> init=[V1Init(...)],
48
- >>> sidecars=[k8s_schemas.V1Container(...)],
49
- >>> container=k8s_schemas.V1Container(...),
48
+ >>> sidecars=[k8s.V1Container(...)],
49
+ >>> container=k8s.V1Container(...),
50
50
  >>> )
51
51
  ```
52
52
 
@@ -21,6 +21,10 @@ class V1RunKind(str, PEnum):
21
21
  CLEANER = "cleaner"
22
22
  BUILDER = "builder"
23
23
 
24
+ @classmethod
25
+ def has_pipeline(cls, kind: str):
26
+ return kind in (cls.DAG, cls.MATRIX, cls.SCHEDULE, cls.TUNER)
27
+
24
28
  @classmethod
25
29
  def has_service(cls, kind: str):
26
30
  return kind in (
@@ -56,6 +60,7 @@ class V1RunEdgeKind(str, PEnum):
56
60
  RUN = "run"
57
61
  TB = "tb"
58
62
  BUILD = "build"
63
+ MANUAL = "manual"
59
64
 
60
65
 
61
66
  class V1RunPending(str, PEnum):
polyaxon/_flow/run/job.py CHANGED
@@ -45,14 +45,14 @@ class V1Job(BaseRun, DestinationImageMixin):
45
45
 
46
46
  ```python
47
47
  >>> from polyaxon.schemas import V1Environment, V1Init, V1Job
48
- >>> from polyaxon.k8s import k8s_schemas
48
+ >>> from polyaxon import k8s
49
49
  >>> job = V1Job(
50
50
  >>> environment=V1Environment(...),
51
51
  >>> connections=["connection-name1"],
52
- >>> volumes=[k8s_schemas.V1Volume(...)],
52
+ >>> volumes=[k8s.V1Volume(...)],
53
53
  >>> init=[V1Init(...)],
54
- >>> sidecars=[k8s_schemas.V1Container(...)],
55
- >>> container=k8s_schemas.V1Container(...),
54
+ >>> sidecars=[k8s.V1Container(...)],
55
+ >>> container=k8s.V1Container(...),
56
56
  >>> )
57
57
  ```
58
58
 
@@ -20,7 +20,7 @@ class V1MPIJob(BaseRun, DestinationImageMixin):
20
20
  Args:
21
21
  kind: str, should be equal `mpijob`
22
22
  clean_pod_policy: str, one of [`All`, `Running`, `None`]
23
- scheduling_policy: [V1SchedulingPolicy](/docs/experimentation/distributed/scheduling-policy/), optional # noqa
23
+ scheduling_policy: [V1SchedulingPolicy](/docs/experimentation/distributed/kubeflow-scheduling-policy/), optional # noqa
24
24
  slots_per_worker: int, optional
25
25
  launcher: [V1KFReplica](/docs/experimentation/distributed/kubeflow-replica/), optional
26
26
  worker: [V1KFReplica](/docs/experimentation/distributed/kubeflow-replica/), optional
@@ -41,7 +41,6 @@ class V1MPIJob(BaseRun, DestinationImageMixin):
41
41
 
42
42
  ```python
43
43
  >>> from polyaxon.schemas import V1KFReplica, V1MPIJob
44
- >>> from polyaxon.k8s import k8s_schemas
45
44
  >>> mpi_job = V1MPIJob(
46
45
  >>> clean_pod_policy='All',
47
46
  >>> launcher=V1KFReplica(...),
@@ -26,7 +26,7 @@ class V1MXJob(BaseRun, DestinationImageMixin):
26
26
  Args:
27
27
  kind: str, should be equal `mxjob`
28
28
  clean_pod_policy: str, one of [`All`, `Running`, `None`]
29
- scheduling_policy: [V1SchedulingPolicy](/docs/experimentation/distributed/scheduling-policy/), optional # noqa
29
+ scheduling_policy: [V1SchedulingPolicy](/docs/experimentation/distributed/kubeflow-scheduling-policy/), optional # noqa
30
30
  mode: str, one of [`MXTrain`, `MXTune`]
31
31
  scheduler: [V1KFReplica](/docs/experimentation/distributed/kubeflow-replica/), optional
32
32
  server: [V1KFReplica](/docs/experimentation/distributed/kubeflow-replica/), optional
@@ -55,7 +55,6 @@ class V1MXJob(BaseRun, DestinationImageMixin):
55
55
 
56
56
  ```python
57
57
  >>> from polyaxon.schemas import V1KFReplica, V1MXJob
58
- >>> from polyaxon.k8s import k8s_schemas
59
58
  >>> mx_job = V1MXJob(
60
59
  >>> clean_pod_policy='All',
61
60
  >>> scheduler=V1KFReplica(...),
@@ -1,8 +1,8 @@
1
- from typing import Optional, Union
1
+ from typing import Dict, List, Optional, Union
2
2
  from typing_extensions import Literal
3
3
 
4
4
  from clipped.compact.pydantic import Field
5
- from clipped.types.ref_or_obj import RefField
5
+ from clipped.types.ref_or_obj import IntOrRef, RefField
6
6
 
7
7
  from polyaxon._flow.run.base import BaseRun
8
8
  from polyaxon._flow.run.enums import V1RunKind
@@ -12,6 +12,25 @@ from polyaxon._flow.run.kubeflow.scheduling_policy import V1SchedulingPolicy
12
12
  from polyaxon._flow.run.resources import V1RunResources
13
13
  from polyaxon._flow.run.utils import DestinationImageMixin
14
14
  from polyaxon._k8s.k8s_schemas import V1Container
15
+ from polyaxon._schemas.base import BaseSchemaModel
16
+
17
+
18
+ class V1PaddleElasticPolicy(BaseSchemaModel):
19
+ """Elastic policy for Paddle distributed runs.
20
+
21
+ Args:
22
+ minReplicas: int, optional
23
+ maxReplicas: int, optional
24
+ maxRestarts: int, optional
25
+ metrics: List[Dict], optional
26
+ """
27
+
28
+ _IDENTIFIER = "elasticPolicy"
29
+
30
+ min_replicas: Optional[IntOrRef] = Field(alias="minReplicas")
31
+ max_replicas: Optional[IntOrRef] = Field(alias="maxReplicas")
32
+ max_restarts: Optional[IntOrRef] = Field(alias="maxRestarts")
33
+ metrics: Optional[List[Dict]] = Field(alias="Metrics")
15
34
 
16
35
 
17
36
  class V1PaddleJob(BaseRun, DestinationImageMixin):
@@ -21,7 +40,7 @@ class V1PaddleJob(BaseRun, DestinationImageMixin):
21
40
  Args:
22
41
  kind: str, should be equal `paddlejob`
23
42
  clean_pod_policy: str, one of [`All`, `Running`, `None`]
24
- scheduling_policy: [V1SchedulingPolicy](/docs/experimentation/distributed/scheduling-policy/), optional # noqa
43
+ scheduling_policy: [V1SchedulingPolicy](/docs/experimentation/distributed/kubeflow-scheduling-policy/), optional # noqa
25
44
  master: [V1KFReplica](/docs/experimentation/distributed/kubeflow-replica/), optional
26
45
  worker: [V1KFReplica](/docs/experimentation/distributed/kubeflow-replica/), optional
27
46
 
@@ -40,7 +59,6 @@ class V1PaddleJob(BaseRun, DestinationImageMixin):
40
59
 
41
60
  ```python
42
61
  >>> from polyaxon.schemas import V1KFReplica, V1PaddleJob
43
- >>> from polyaxon.k8s import k8s_schemas
44
62
  >>> paddle_job = V1PaddleJob(
45
63
  >>> clean_pod_policy='All',
46
64
  >>> masterf=V1KFReplica(...),
@@ -89,6 +107,18 @@ class V1PaddleJob(BaseRun, DestinationImageMixin):
89
107
  >>> ...
90
108
  ```
91
109
 
110
+ ### elasticPolicy
111
+
112
+ Elastic policy for Paddle distributed runs.
113
+
114
+ ```yaml
115
+ >>> run:
116
+ >>> kind: paddlejob
117
+ >>> elasticPolicy:
118
+ >>> ...
119
+ >>> ...
120
+ ```
121
+
92
122
  ### master
93
123
 
94
124
  The ,aster is responsible for orchestrating training and performing
@@ -125,6 +155,7 @@ class V1PaddleJob(BaseRun, DestinationImageMixin):
125
155
  kind: Literal[_IDENTIFIER] = _IDENTIFIER
126
156
  clean_pod_policy: Optional[V1CleanPodPolicy] = Field(alias="cleanPodPolicy")
127
157
  scheduling_policy: Optional[V1SchedulingPolicy] = Field(alias="schedulingPolicy")
158
+ elastic_policy: Optional[V1PaddleElasticPolicy] = Field(alias="elasticPolicy")
128
159
  master: Optional[Union[V1KFReplica, RefField]]
129
160
  worker: Optional[Union[V1KFReplica, RefField]]
130
161
 
@@ -1,8 +1,8 @@
1
- from typing import Optional, Union
1
+ from typing import Dict, List, Optional, Union
2
2
  from typing_extensions import Literal
3
3
 
4
- from clipped.compact.pydantic import Field
5
- from clipped.types.ref_or_obj import RefField
4
+ from clipped.compact.pydantic import Field, StrictStr
5
+ from clipped.types.ref_or_obj import BoolOrRef, IntOrRef, RefField
6
6
 
7
7
  from polyaxon._flow.run.base import BaseRun
8
8
  from polyaxon._flow.run.enums import V1RunKind
@@ -12,6 +12,39 @@ from polyaxon._flow.run.kubeflow.scheduling_policy import V1SchedulingPolicy
12
12
  from polyaxon._flow.run.resources import V1RunResources
13
13
  from polyaxon._flow.run.utils import DestinationImageMixin
14
14
  from polyaxon._k8s.k8s_schemas import V1Container
15
+ from polyaxon._schemas.base import BaseSchemaModel
16
+
17
+
18
+ class V1PytorchElasticPolicy(BaseSchemaModel):
19
+ """Elastic policy for Pytorch distributed runs.
20
+
21
+ Args:
22
+ min_replicas: int, optional
23
+ max_replicas: int, optional
24
+ rdvz_backend: str, optional
25
+ rdvz_port: int, optional
26
+ rdvz_host: str, optional
27
+ rdvz_id: str, optional
28
+ rdvz_conf: List[Dict], optional
29
+ standalone: bool, optional
30
+ n_proc_per_node: int, optional
31
+ max_restarts: int, optional
32
+ metrics: List[Dict], optional
33
+ """
34
+
35
+ _IDENTIFIER = "elasticPolicy"
36
+
37
+ min_replicas: Optional[IntOrRef] = Field(alias="minReplicas")
38
+ max_replicas: Optional[IntOrRef] = Field(alias="maxReplicas")
39
+ rdvz_backend: Optional[StrictStr] = Field(alias="rdvzBackend")
40
+ rdvz_port: Optional[IntOrRef] = Field(alias="rdvzPort")
41
+ rdvz_host: Optional[StrictStr] = Field(alias="rdvzHost")
42
+ rdvz_id: Optional[StrictStr] = Field(alias="rdvzId")
43
+ rdvz_conf: Optional[List[Dict]] = Field(alias="rdvzConf")
44
+ standalone: Optional[BoolOrRef]
45
+ n_proc_per_node: Optional[IntOrRef] = Field(alias="nProcPerNode")
46
+ max_restarts: Optional[IntOrRef] = Field(alias="maxRestarts")
47
+ metrics: Optional[List[Dict]] = Field(alias="Metrics")
15
48
 
16
49
 
17
50
  class V1PytorchJob(BaseRun, DestinationImageMixin):
@@ -20,7 +53,7 @@ class V1PytorchJob(BaseRun, DestinationImageMixin):
20
53
  Args:
21
54
  kind: str, should be equal `pytorchjob`
22
55
  clean_pod_policy: str, one of [`All`, `Running`, `None`]
23
- scheduling_policy: [V1SchedulingPolicy](/docs/experimentation/distributed/scheduling-policy/), optional # noqa
56
+ scheduling_policy: [V1SchedulingPolicy](/docs/experimentation/distributed/kubeflow-scheduling-policy/), optional # noqa
24
57
  master: [V1KFReplica](/docs/experimentation/distributed/kubeflow-replica/), optional
25
58
  worker: [V1KFReplica](/docs/experimentation/distributed/kubeflow-replica/), optional
26
59
 
@@ -39,7 +72,6 @@ class V1PytorchJob(BaseRun, DestinationImageMixin):
39
72
 
40
73
  ```python
41
74
  >>> from polyaxon.schemas import V1KFReplica, V1PytorchJob
42
- >>> from polyaxon.k8s import k8s_schemas
43
75
  >>> pytorch_job = V1PytorchJob(
44
76
  >>> clean_pod_policy='All',
45
77
  >>> master=V1KFReplica(...),
@@ -89,6 +121,18 @@ class V1PytorchJob(BaseRun, DestinationImageMixin):
89
121
  >>> ...
90
122
  ```
91
123
 
124
+ ### elasticPolicy
125
+
126
+ ElasticPolicy encapsulates various policies for elastic distributed training job.
127
+
128
+ ```yaml
129
+ >>> run:
130
+ >>> kind: pytorchjob
131
+ >>> elasticPolicy:
132
+ >>> ...
133
+ >>> ...
134
+ ```
135
+
92
136
  ### master
93
137
 
94
138
  The master replica in the distributed PytorchJob
@@ -123,6 +167,8 @@ class V1PytorchJob(BaseRun, DestinationImageMixin):
123
167
  kind: Literal[_IDENTIFIER] = _IDENTIFIER
124
168
  clean_pod_policy: Optional[V1CleanPodPolicy] = Field(alias="cleanPodPolicy")
125
169
  scheduling_policy: Optional[V1SchedulingPolicy] = Field(alias="schedulingPolicy")
170
+ elastic_policy: Optional[V1PytorchElasticPolicy] = Field(alias="elasticPolicy")
171
+ n_proc_per_node: Optional[IntOrRef] = Field(alias="nProcPerNode")
126
172
  master: Optional[Union[V1KFReplica, RefField]]
127
173
  worker: Optional[Union[V1KFReplica, RefField]]
128
174
 
@@ -42,15 +42,15 @@ class V1KFReplica(BaseSchemaModel):
42
42
 
43
43
  ```python
44
44
  >>> from polyaxon.schemas import V1Environment, V1Init, V1KFReplica
45
- >>> from polyaxon.k8s import k8s_schemas
45
+ >>> from polyaxon import k8s
46
46
  >>> replica = V1KFReplica(
47
47
  >>> replicas=2,
48
48
  >>> environment=V1Environment(...),
49
49
  >>> connections=["connection-name1"],
50
- >>> volumes=[k8s_schemas.V1Volume(...)],
50
+ >>> volumes=[k8s.V1Volume(...)],
51
51
  >>> init=[V1Init(...)],
52
- >>> sidecars=[k8s_schemas.V1Container(...)],
53
- >>> container=k8s_schemas.V1Container(...),
52
+ >>> sidecars=[k8s.V1Container(...)],
53
+ >>> container=k8s.V1Container(...),
54
54
  >>> )
55
55
  ```
56
56
 
@@ -7,8 +7,20 @@ from polyaxon._schemas.base import BaseSchemaModel
7
7
 
8
8
 
9
9
  class V1SchedulingPolicy(BaseSchemaModel):
10
+ """Scheduling policy for Kubeflow distributed runs.
11
+
12
+ Args:
13
+ min_available: int, optional
14
+ queue: str, optional
15
+ min_resources: int, optional
16
+ priority_class: str, optional
17
+ schedule_timeout_seconds: int, optional
18
+ """
19
+
10
20
  _IDENTIFIER = "schedulingPolicy"
11
21
 
12
22
  min_available: Optional[IntOrRef] = Field(alias="minAvailable")
13
23
  queue: Optional[StrictStr]
24
+ min_resources: Optional[IntOrRef] = Field(alias="minResources")
14
25
  priority_class: Optional[StrictStr] = Field(alias="priorityClass")
26
+ schedule_timeout_seconds: Optional[IntOrRef] = Field(alias="scheduleTimeoutSeconds")
@@ -20,7 +20,7 @@ class V1TFJob(BaseRun, DestinationImageMixin):
20
20
  Args:
21
21
  kind: str, should be equal `tfjob`
22
22
  clean_pod_policy: str, one of [`All`, `Running`, `None`]
23
- scheduling_policy: [V1SchedulingPolicy](/docs/experimentation/distributed/scheduling-policy/), optional # noqa
23
+ scheduling_policy: [V1SchedulingPolicy](/docs/experimentation/distributed/kubeflow-scheduling-policy/), optional # noqa
24
24
  enable_dynamic_worker: boolean
25
25
  chief: [V1KFReplica](/docs/experimentation/distributed/kubeflow-replica/), optional
26
26
  ps: [V1KFReplica](/docs/experimentation/distributed/kubeflow-replica/), optional
@@ -45,7 +45,6 @@ class V1TFJob(BaseRun, DestinationImageMixin):
45
45
 
46
46
  ```python
47
47
  >>> from polyaxon.schemas import V1KFReplica, V1TFJob
48
- >>> from polyaxon.k8s import k8s_schemas
49
48
  >>> tf_job = V1TFJob(
50
49
  >>> clean_pod_policy='All',
51
50
  >>> chief=V1KFReplica(...),
@@ -173,8 +172,9 @@ class V1TFJob(BaseRun, DestinationImageMixin):
173
172
 
174
173
  kind: Literal[_IDENTIFIER] = _IDENTIFIER
175
174
  clean_pod_policy: Optional[V1CleanPodPolicy] = Field(alias="cleanPodPolicy")
176
- enable_dynamic_worker: Optional[bool] = Field(alias="enableDynamicWorker")
177
175
  scheduling_policy: Optional[V1SchedulingPolicy] = Field(alias="schedulingPolicy")
176
+ enable_dynamic_worker: Optional[bool] = Field(alias="enableDynamicWorker")
177
+ success_policy: Optional[str] = Field(alias="successPolicy")
178
178
  chief: Optional[Union[V1KFReplica, RefField]]
179
179
  ps: Optional[Union[V1KFReplica, RefField]]
180
180
  worker: Optional[Union[V1KFReplica, RefField]]
@@ -20,7 +20,7 @@ class V1XGBoostJob(BaseRun, DestinationImageMixin):
20
20
  Args:
21
21
  kind: str, should be equal `xgbjob`
22
22
  clean_pod_policy: str, one of [`All`, `Running`, `None`]
23
- scheduling_policy: [V1SchedulingPolicy](/docs/experimentation/distributed/scheduling-policy/), optional # noqa
23
+ scheduling_policy: [V1SchedulingPolicy](/docs/experimentation/distributed/kubeflow-scheduling-policy/), optional # noqa
24
24
  master: [V1KFReplica](/docs/experimentation/distributed/kubeflow-replica/), optional
25
25
  worker: [V1KFReplica](/docs/experimentation/distributed/kubeflow-replica/), optional
26
26
 
@@ -39,7 +39,6 @@ class V1XGBoostJob(BaseRun, DestinationImageMixin):
39
39
 
40
40
  ```python
41
41
  >>> from polyaxon.schemas import V1KFReplica, V1XGBoostJob
42
- >>> from polyaxon.k8s import k8s_schemas
43
42
  >>> xgb_job = V1XGBoostJob(
44
43
  >>> clean_pod_policy='All',
45
44
  >>> master=V1KFReplica(...),
@@ -46,10 +46,9 @@ class V1RayJob(BaseRun, DestinationImageMixin):
46
46
 
47
47
  ```python
48
48
  >>> from polyaxon.schemas import V1Environment, V1Init, V1RayJob, V1RayReplica
49
- >>> from polyaxon.k8s import k8s_schemas
50
- >>> ray_job = V1Ray(
49
+ >>> ray_job = V1RayJob(
51
50
  >>> connections=["connection-name1"],
52
- >>> volumes=[k8s_schemas.V1Volume(...)],
51
+ >>> volumes=[k8s.V1Volume(...)],
53
52
  >>> ray_version="2.5.0",
54
53
  >>> head=V1RayReplica(...),
55
54
  >>> worker=V1RayReplica(...),
@@ -46,13 +46,13 @@ class V1RayReplica(BaseSchemaModel):
46
46
 
47
47
  ```python
48
48
  >>> from polyaxon.schemas import V1Environment, V1Init, V1RayReplica
49
- >>> from polyaxon.k8s import k8s_schemas
49
+ >>> from polyaxon import k8s
50
50
  >>> replica = V1RayReplica(
51
51
  >>> replicas=2,
52
52
  >>> environment=V1Environment(...),
53
53
  >>> init=[V1Init(...)],
54
- >>> sidecars=[k8s_schemas.V1Container(...)],
55
- >>> container=k8s_schemas.V1Container(...),
54
+ >>> sidecars=[k8s.V1Container(...)],
55
+ >>> container=k8s.V1Container(...),
56
56
  >>> )
57
57
  ```
58
58
 
@@ -48,14 +48,14 @@ class V1Service(V1Job):
48
48
 
49
49
  ```python
50
50
  >>> from polyaxon.schemas import V1Environment, V1Init, V1Service
51
- >>> from polyaxon.k8s import k8s_schemas
51
+ >>> from polyaxon import k8s
52
52
  >>> service = V1Service(
53
53
  >>> environment=V1Environment(...),
54
54
  >>> connections=["connection-name1"],
55
- >>> volumes=[k8s_schemas.V1Volume(...)],
55
+ >>> volumes=[k8s.V1Volume(...)],
56
56
  >>> init=[V1Init(...)],
57
- >>> sidecars=[k8s_schemas.V1Container(...)],
58
- >>> container=k8s_schemas.V1Container(...),
57
+ >>> sidecars=[k8s.V1Container(...)],
58
+ >>> container=k8s.V1Container(...),
59
59
  >>> ports=[6006],
60
60
  >>> rewritePath=True,
61
61
  >>> )
polyaxon/_fs/fs.py CHANGED
@@ -56,7 +56,7 @@ def _get_fs_from_connection(
56
56
 
57
57
  async def get_async_fs_from_connection(connection: Optional[V1Connection], **kwargs):
58
58
  fs = _get_fs_from_connection(connection=connection, asynchronous=True, **kwargs)
59
- if fs.async_impl and hasattr(fs, "set_session"):
59
+ if hasattr(fs, "async_impl") and hasattr(fs, "set_session"):
60
60
  await fs.set_session()
61
61
  return fs
62
62
 
@@ -72,13 +72,18 @@ def get_fs_from_name(connection_name: str, asynchronous: bool = False, **kwargs)
72
72
  )
73
73
 
74
74
 
75
+ def get_sync_default_fs(**kwargs):
76
+ connection = get_artifacts_connection()
77
+ return get_sync_fs_from_connection(connection=connection, **kwargs)
78
+
79
+
75
80
  async def get_default_fs(**kwargs):
76
81
  connection = get_artifacts_connection()
77
82
  return await get_async_fs_from_connection(connection=connection, **kwargs)
78
83
 
79
84
 
80
85
  async def close_fs(fs):
81
- if hasattr(fs.session, "close"):
86
+ if hasattr(fs, "session") and hasattr(fs.session, "close"):
82
87
  try:
83
88
  await fs.session.close()
84
89
  except: # noqa
polyaxon/_fs/utils.py CHANGED
@@ -2,14 +2,15 @@ import os
2
2
 
3
3
  from typing import Optional
4
4
 
5
- from polyaxon._env_vars.keys import ENV_KEYS_SANDBOX_IS_LOCAL
5
+ from polyaxon._env_vars.keys import ENV_KEYS_SERVICE_MODE
6
6
  from polyaxon._schemas.lifecycle import V1ProjectFeature
7
+ from polyaxon._services import PolyaxonServices
7
8
 
8
9
 
9
10
  def get_store_path(store_path: str, subpath: str, entity: Optional[str] = None) -> str:
10
11
  full_path = store_path
11
12
 
12
- if os.environ.get(ENV_KEYS_SANDBOX_IS_LOCAL):
13
+ if os.environ.get(ENV_KEYS_SERVICE_MODE) == PolyaxonServices.VIEWER:
13
14
  dir_path = "runs" if entity == V1ProjectFeature.RUNTIME else f"{entity}s"
14
15
  full_path = os.path.join(full_path, dir_path)
15
16
 
@@ -2,6 +2,7 @@ import copy
2
2
 
3
3
  from typing import Dict, Iterable, List, Optional
4
4
 
5
+ from clipped.utils.enums import get_enum_value
5
6
  from clipped.utils.sanitizers import sanitize_string_dict
6
7
  from clipped.utils.strings import slugify
7
8
 
@@ -85,7 +86,7 @@ class BaseConverter(
85
86
  "operation.polyaxon.com/name": self.run_name,
86
87
  "operation.polyaxon.com/owner": self.owner_name,
87
88
  "operation.polyaxon.com/project": self.project_name,
88
- "operation.polyaxon.com/kind": self.K8S_ANNOTATIONS_KIND,
89
+ "operation.polyaxon.com/kind": get_enum_value(self.K8S_ANNOTATIONS_KIND),
89
90
  }
90
91
 
91
92
  def get_annotations(
@@ -81,6 +81,7 @@ class MainConverter(_BaseConverter):
81
81
  secrets=requested_secrets,
82
82
  config_maps=requested_config_maps,
83
83
  )
84
+ env += self._get_resources_env_vars(main_container.resources)
84
85
 
85
86
  # Env from
86
87
  env_from = self._get_env_from_k8s_resources(