polyaxon 2.6.0rc1__py3-none-any.whl → 2.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. polyaxon/_cli/operations.py +1 -1
  2. polyaxon/_cli/projects.py +2 -2
  3. polyaxon/_compiler/contexts/contexts.py +5 -5
  4. polyaxon/_compiler/contexts/dask_job.py +1 -1
  5. polyaxon/_compiler/contexts/kubeflow/mpi_job.py +1 -1
  6. polyaxon/_compiler/contexts/kubeflow/mx_job.py +1 -1
  7. polyaxon/_compiler/contexts/kubeflow/paddle_job.py +1 -1
  8. polyaxon/_compiler/contexts/kubeflow/pytroch_job.py +1 -1
  9. polyaxon/_compiler/contexts/kubeflow/tf_job.py +1 -1
  10. polyaxon/_compiler/contexts/kubeflow/xgb_job.py +1 -1
  11. polyaxon/_compiler/contexts/ray_job.py +1 -1
  12. polyaxon/_compiler/resolver/agent.py +1 -1
  13. polyaxon/_compiler/resolver/runtime.py +1 -1
  14. polyaxon/_deploy/schemas/deployment.py +1 -0
  15. polyaxon/_deploy/schemas/ui.py +3 -1
  16. polyaxon/_docker/converter/base/base.py +1 -1
  17. polyaxon/_docker/converter/base/env_vars.py +1 -1
  18. polyaxon/_docker/converter/converters/job.py +1 -1
  19. polyaxon/_docker/converter/converters/service.py +1 -1
  20. polyaxon/_flow/component/component.py +4 -0
  21. polyaxon/_flow/environment/__init__.py +5 -4
  22. polyaxon/_flow/io/io.py +2 -0
  23. polyaxon/_flow/matrix/params.py +1 -1
  24. polyaxon/_flow/operations/compiled_operation.py +4 -0
  25. polyaxon/_flow/operations/operation.py +5 -3
  26. polyaxon/_flow/params/params.py +2 -2
  27. polyaxon/_flow/run/__init__.py +3 -0
  28. polyaxon/_flow/run/dag.py +6 -6
  29. polyaxon/_flow/run/dask/dask.py +10 -0
  30. polyaxon/_flow/run/kubeflow/mpi_job.py +8 -0
  31. polyaxon/_flow/run/kubeflow/mx_job.py +16 -0
  32. polyaxon/_flow/run/kubeflow/paddle_job.py +8 -0
  33. polyaxon/_flow/run/kubeflow/pytorch_job.py +8 -0
  34. polyaxon/_flow/run/kubeflow/tf_job.py +12 -0
  35. polyaxon/_flow/run/kubeflow/xgboost_job.py +19 -33
  36. polyaxon/_flow/run/patch.py +37 -9
  37. polyaxon/_flow/run/ray/ray.py +9 -0
  38. polyaxon/_flow/run/resources.py +1 -1
  39. polyaxon/_fs/fs.py +1 -1
  40. polyaxon/_k8s/converter/base/containers.py +1 -1
  41. polyaxon/_k8s/converter/converters/dask_job.py +1 -1
  42. polyaxon/_k8s/converter/converters/job.py +1 -1
  43. polyaxon/_k8s/converter/converters/kubeflow/mpi_job.py +1 -1
  44. polyaxon/_k8s/converter/converters/kubeflow/mx_job.py +1 -1
  45. polyaxon/_k8s/converter/converters/kubeflow/paddle_job.py +1 -1
  46. polyaxon/_k8s/converter/converters/kubeflow/pytroch_job.py +1 -1
  47. polyaxon/_k8s/converter/converters/kubeflow/tf_job.py +1 -1
  48. polyaxon/_k8s/converter/converters/kubeflow/xgboost_job.py +1 -1
  49. polyaxon/_k8s/converter/converters/ray_job.py +1 -2
  50. polyaxon/_k8s/converter/converters/service.py +1 -1
  51. polyaxon/_k8s/k8s_validation.py +16 -16
  52. polyaxon/_k8s/logging/async_monitor.py +4 -4
  53. polyaxon/_k8s/manager/async_manager.py +5 -3
  54. polyaxon/_k8s/manager/manager.py +6 -6
  55. polyaxon/_local_process/converter/base/env_vars.py +1 -1
  56. polyaxon/_local_process/converter/base/init.py +1 -1
  57. polyaxon/_local_process/converter/base/mounts.py +0 -2
  58. polyaxon/_local_process/converter/converters/job.py +1 -1
  59. polyaxon/_local_process/converter/converters/service.py +1 -1
  60. polyaxon/_managers/project.py +3 -2
  61. polyaxon/_polyaxonfile/check.py +4 -4
  62. polyaxon/_polyaxonfile/specs/compiled_operation.py +8 -6
  63. polyaxon/_polyaxonfile/specs/libs/parser.py +1 -3
  64. polyaxon/_polyaxonfile/specs/libs/validator.py +1 -1
  65. polyaxon/_polyaxonfile/specs/operation.py +7 -3
  66. polyaxon/_pql/manager.py +1 -1
  67. polyaxon/_pql/parser.py +29 -26
  68. polyaxon/_runner/agent/base_agent.py +1 -2
  69. polyaxon/_runner/converter/init/artifacts.py +3 -2
  70. polyaxon/_runner/converter/init/tensorboard.py +1 -1
  71. polyaxon/_schemas/client.py +1 -1
  72. polyaxon/_sdk/api/agents_v1_api.py +56 -80
  73. polyaxon/_sdk/api/artifacts_stores_v1_api.py +2 -2
  74. polyaxon/_sdk/api/auth_v1_api.py +2 -6
  75. polyaxon/_sdk/api/connections_v1_api.py +16 -20
  76. polyaxon/_sdk/api/dashboards_v1_api.py +14 -14
  77. polyaxon/_sdk/api/organizations_v1_api.py +1735 -755
  78. polyaxon/_sdk/api/presets_v1_api.py +18 -26
  79. polyaxon/_sdk/api/project_dashboards_v1_api.py +17 -19
  80. polyaxon/_sdk/api/project_searches_v1_api.py +20 -28
  81. polyaxon/_sdk/api/projects_v1_api.py +74 -92
  82. polyaxon/_sdk/api/queues_v1_api.py +22 -30
  83. polyaxon/_sdk/api/runs_v1_api.py +157 -207
  84. polyaxon/_sdk/api/searches_v1_api.py +16 -20
  85. polyaxon/_sdk/api/service_accounts_v1_api.py +30 -38
  86. polyaxon/_sdk/api/tags_v1_api.py +19 -25
  87. polyaxon/_sdk/api/teams_v1_api.py +64 -90
  88. polyaxon/_sdk/api/users_v1_api.py +17 -19
  89. polyaxon/_sdk/api/versions_v1_api.py +4 -4
  90. polyaxon/_sdk/async_client/rest.py +1 -3
  91. polyaxon/_sdk/schemas/v1_organization.py +3 -2
  92. polyaxon/_sdk/schemas/v1_project_settings.py +2 -1
  93. polyaxon/_sdk/schemas/v1_section_spec.py +1 -1
  94. polyaxon/_sdk/schemas/v1_user_access.py +3 -2
  95. polyaxon/_sdk/sync_client/api_client.py +2 -6
  96. polyaxon/_sdk/sync_client/rest.py +4 -8
  97. polyaxon/pkg.py +1 -1
  98. {polyaxon-2.6.0rc1.dist-info → polyaxon-2.7.0.dist-info}/METADATA +9 -9
  99. {polyaxon-2.6.0rc1.dist-info → polyaxon-2.7.0.dist-info}/RECORD +103 -103
  100. {polyaxon-2.6.0rc1.dist-info → polyaxon-2.7.0.dist-info}/LICENSE +0 -0
  101. {polyaxon-2.6.0rc1.dist-info → polyaxon-2.7.0.dist-info}/WHEEL +0 -0
  102. {polyaxon-2.6.0rc1.dist-info → polyaxon-2.7.0.dist-info}/entry_points.txt +0 -0
  103. {polyaxon-2.6.0rc1.dist-info → polyaxon-2.7.0.dist-info}/top_level.txt +0 -0
@@ -1466,7 +1466,7 @@ def logs(ctx, project, uid, follow, hide_time, all_containers, all_info, offline
1466
1466
  )
1467
1467
 
1468
1468
  try:
1469
- from traceml.logging.streamer import get_logs_streamer, load_logs_from_path
1469
+ from traceml.logging.streamer import load_logs_from_path
1470
1470
 
1471
1471
  load_logs_from_path(
1472
1472
  logs_path=logs_path,
polyaxon/_cli/projects.py CHANGED
@@ -46,7 +46,7 @@ def project(ctx, _project): # pylint:disable=redefined-outer-name
46
46
  @click.option(
47
47
  *OPTIONS_NAME["args"],
48
48
  type=str,
49
- help="The project name, e.g. 'mnist' or 'acme/mnist'."
49
+ help="The project name, e.g. 'mnist' or 'acme/mnist'.",
50
50
  )
51
51
  @click.option("--description", type=str, help="Description of the project.")
52
52
  @click.option("--tags", type=str, help="Tags of the project (comma separated values).")
@@ -287,7 +287,7 @@ def delete(ctx, _project, yes):
287
287
  @click.option(
288
288
  *OPTIONS_NAME["args"],
289
289
  type=str,
290
- help="Name of the project, must be unique for the same user."
290
+ help="Name of the project, must be unique for the same user.",
291
291
  )
292
292
  @click.option("--description", type=str, help="Description of the project.")
293
293
  @click.option("--tags", type=str, help="Tags of the project (comma separated values).")
@@ -98,24 +98,24 @@ def resolve_globals_contexts(
98
98
  run_outputs_path = ctx_paths.CONTEXT_MOUNT_RUN_OUTPUTS_FORMAT.format(run_path)
99
99
  resolved_contexts[ctx_sections.GLOBALS][
100
100
  ctx_keys.RUN_ARTIFACTS_PATH
101
- ] = run_artifacts_path
101
+ ] = run_artifacts_path # fmt: skip
102
102
  resolved_contexts[ctx_sections.GLOBALS][
103
103
  ctx_keys.RUN_OUTPUTS_PATH
104
- ] = run_outputs_path
104
+ ] = run_outputs_path # fmt: skip
105
105
  elif artifacts_store:
106
106
  run_artifacts_path = os.path.join(artifacts_store.store_path, run_path)
107
107
  run_outputs_path = os.path.join(run_artifacts_path, "outputs")
108
108
  resolved_contexts[ctx_sections.GLOBALS][
109
109
  ctx_keys.RUN_ARTIFACTS_PATH
110
- ] = run_artifacts_path
110
+ ] = run_artifacts_path # fmt: skip
111
111
  resolved_contexts[ctx_sections.GLOBALS][
112
112
  ctx_keys.RUN_OUTPUTS_PATH
113
- ] = run_outputs_path
113
+ ] = run_outputs_path # fmt: skip
114
114
 
115
115
  if plugins.mount_artifacts_store and artifacts_store:
116
116
  resolved_contexts[ctx_sections.GLOBALS][
117
117
  ctx_keys.STORE_PATH
118
- ] = artifacts_store.store_path
118
+ ] = artifacts_store.store_path # fmt: skip
119
119
  return resolved_contexts
120
120
 
121
121
 
@@ -21,7 +21,7 @@ class DaskJobContextsManager(BaseContextsManager):
21
21
  ) -> Dict:
22
22
  contexts["init"] = {}
23
23
  contexts["connections"] = {}
24
- job = compiled_operation.run # type: V1DaskJob
24
+ job: V1DaskJob = compiled_operation.run
25
25
 
26
26
  def _get_replica(replica: Optional[V1DaskReplica]) -> Dict:
27
27
  if not replica:
@@ -21,7 +21,7 @@ class MPIJobContextsManager(BaseContextsManager):
21
21
  ) -> Dict:
22
22
  contexts["init"] = {}
23
23
  contexts["connections"] = {}
24
- job = compiled_operation.run # type: V1MPIJob
24
+ job: V1MPIJob = compiled_operation.run
25
25
 
26
26
  def _get_replica(replica: Optional[V1KFReplica]) -> Dict:
27
27
  if not replica:
@@ -21,7 +21,7 @@ class MXJobContextsManager(BaseContextsManager):
21
21
  ) -> Dict:
22
22
  contexts["init"] = {}
23
23
  contexts["connections"] = {}
24
- job = compiled_operation.run # type: V1MXJob
24
+ job: V1MXJob = compiled_operation.run
25
25
 
26
26
  def _get_replica(replica: Optional[V1KFReplica]) -> Dict:
27
27
  if not replica:
@@ -21,7 +21,7 @@ class PaddleJobContextsManager(BaseContextsManager):
21
21
  ) -> Dict:
22
22
  contexts["init"] = {}
23
23
  contexts["connections"] = {}
24
- job = compiled_operation.run # type: V1PaddleJob
24
+ job: V1PaddleJob = compiled_operation.run
25
25
 
26
26
  def _get_replica(replica: Optional[V1KFReplica]) -> Dict:
27
27
  if not replica:
@@ -21,7 +21,7 @@ class PytorchJobContextsManager(BaseContextsManager):
21
21
  ) -> Dict:
22
22
  contexts["init"] = {}
23
23
  contexts["connections"] = {}
24
- job = compiled_operation.run # type: V1PytorchJob
24
+ job: V1PytorchJob = compiled_operation.run
25
25
 
26
26
  def _get_replica(replica: Optional[V1KFReplica]) -> Dict:
27
27
  if not replica:
@@ -21,7 +21,7 @@ class TfJobContextsManager(BaseContextsManager):
21
21
  ) -> Dict:
22
22
  contexts["init"] = {}
23
23
  contexts["connections"] = {}
24
- job = compiled_operation.run # type: V1TFJob
24
+ job: V1TFJob = compiled_operation.run
25
25
 
26
26
  def _get_replica(replica: Optional[V1KFReplica]) -> Dict:
27
27
  if not replica:
@@ -21,7 +21,7 @@ class XGBoostJobContextsManager(BaseContextsManager):
21
21
  ) -> Dict:
22
22
  contexts["init"] = {}
23
23
  contexts["connections"] = {}
24
- job = compiled_operation.run # type: V1XGBoostJob
24
+ job: V1XGBoostJob = compiled_operation.run
25
25
 
26
26
  def _get_replica(replica: Optional[V1KFReplica]) -> Dict:
27
27
  if not replica:
@@ -21,7 +21,7 @@ class RayJobContextsManager(BaseContextsManager):
21
21
  ) -> Dict:
22
22
  contexts["init"] = {}
23
23
  contexts["connections"] = {}
24
- job = compiled_operation.run # type: V1RayJob
24
+ job: V1RayJob = compiled_operation.run
25
25
 
26
26
  def _get_replica(replica: Optional[V1RayReplica]) -> Dict:
27
27
  if not replica:
@@ -71,7 +71,7 @@ class AgentResolver(BaseSchemaModel):
71
71
  if agent_config.artifacts_store: # Resolve default artifacts store
72
72
  self.connection_by_names[
73
73
  agent_config.artifacts_store.name
74
- ] = agent_config.artifacts_store
74
+ ] = agent_config.artifacts_store # fmt: skip
75
75
 
76
76
  if (
77
77
  compiled_operation.is_job_run
@@ -1,5 +1,5 @@
1
1
  from datetime import datetime
2
- from typing import Any, Dict, List, Optional
2
+ from typing import Dict, List, Optional
3
3
 
4
4
  from polyaxon import settings
5
5
  from polyaxon._compiler.contexts import resolve_contexts, resolve_globals_contexts
@@ -183,6 +183,7 @@ def validate_deployment_chart(
183
183
 
184
184
 
185
185
  class DeploymentConfig(BaseSchemaModel):
186
+ _SWAGGER_FIELDS_LISTS = ["tolerations", "celeryTolerations"]
186
187
  _SWAGGER_FIELDS = [
187
188
  "tolerations",
188
189
  "affinity",
@@ -13,4 +13,6 @@ class UIConfig(BaseSchemaModel):
13
13
  assets_version: Optional[StrictStr] = Field(alias="assetsVersion", default=None)
14
14
  admin_enabled: Optional[bool] = Field(alias="adminEnabled", default=None)
15
15
  single_url: Optional[bool] = Field(alias="singleUrl", default=None)
16
- default_streams_url: Optional[StrictStr] = Field(alias="defaultStreamsUrl", default=None)
16
+ default_streams_url: Optional[StrictStr] = Field(
17
+ alias="defaultStreamsUrl", default=None
18
+ )
@@ -162,7 +162,7 @@ class BaseConverter(
162
162
  if memory:
163
163
  docker_resources[
164
164
  "memory"
165
- ] = docker_types.V1ResourceRequirements.from_k8s_memory(memory)
165
+ ] = docker_types.V1ResourceRequirements.from_k8s_memory(memory) # fmt: skip
166
166
  if gpus:
167
167
  docker_resources["gpus"] = gpus
168
168
  return docker_types.V1ResourceRequirements.from_dict(docker_resources)
@@ -98,7 +98,7 @@ class EnvMixin(BaseConverter):
98
98
 
99
99
  try:
100
100
  secret_value = orjson_loads(resource.name)
101
- except orjson.JSONDecodeError as e:
101
+ except orjson.JSONDecodeError:
102
102
  return items_from
103
103
 
104
104
  for item in resource.items:
@@ -18,7 +18,7 @@ class JobConverter(JobMixin, BaseConverter):
18
18
  default_sa: Optional[str] = None,
19
19
  default_auth: bool = False,
20
20
  ) -> List[docker_types.V1Container]:
21
- job = compiled_operation.run # type: V1Job
21
+ job: V1Job = compiled_operation.run
22
22
  plugins = V1Plugins.get_or_create(
23
23
  config=compiled_operation.plugins, auth=default_auth
24
24
  )
@@ -4,7 +4,7 @@ from polyaxon._connections import V1Connection, V1ConnectionResource
4
4
  from polyaxon._docker import docker_types
5
5
  from polyaxon._docker.converter.base import BaseConverter
6
6
  from polyaxon._docker.converter.mixins import ServiceMixin
7
- from polyaxon._flow import V1CompiledOperation, V1Plugins, V1Service
7
+ from polyaxon._flow import V1CompiledOperation, V1Plugins
8
8
 
9
9
 
10
10
  class ServiceConverter(ServiceMixin, BaseConverter):
@@ -396,3 +396,7 @@ class V1Component(
396
396
 
397
397
  def get_name(self):
398
398
  return self.name.split(":")[0] if self.name else None
399
+
400
+ def get_replica_types(self):
401
+ if self.is_distributed_run:
402
+ return self.run.get_replica_types()
@@ -8,6 +8,7 @@ from clipped.compact.pydantic import (
8
8
  validation_always,
9
9
  validation_before,
10
10
  )
11
+ from clipped.types import GenericStr
11
12
 
12
13
  from polyaxon._k8s import k8s_schemas, k8s_validation
13
14
  from polyaxon._schemas.base import BaseSchemaModel
@@ -395,9 +396,9 @@ class V1Environment(BaseSchemaModel):
395
396
  "dnsConfig",
396
397
  ]
397
398
 
398
- labels: Optional[Dict[StrictStr, StrictStr]] = None
399
- annotations: Optional[Dict[StrictStr, StrictStr]] = None
400
- node_selector: Optional[Dict[StrictStr, StrictStr]] = Field(
399
+ labels: Optional[Dict[StrictStr, GenericStr]] = None
400
+ annotations: Optional[Dict[StrictStr, GenericStr]] = None
401
+ node_selector: Optional[Dict[StrictStr, GenericStr]] = Field(
401
402
  alias="nodeSelector", default=None
402
403
  )
403
404
  affinity: Optional[Union[k8s_schemas.V1Affinity, Dict]] = None
@@ -428,7 +429,7 @@ class V1Environment(BaseSchemaModel):
428
429
  priority: Optional[int] = None
429
430
  restart_policy: Optional[
430
431
  Literal["Always", "OnFailure", "Never", "ExitCode"]
431
- ] = Field(alias="restartPolicy", default=None)
432
+ ] = Field(alias="restartPolicy", default=None) # fmt: skip
432
433
 
433
434
  @field_validator("affinity", **validation_always, **validation_before)
434
435
  def validate_affinity(cls, v):
polyaxon/_flow/io/io.py CHANGED
@@ -46,6 +46,8 @@ def validate_io_value(
46
46
  default=default,
47
47
  )
48
48
  if validation:
49
+ if isinstance(validation, dict):
50
+ validation = V1Validation(**validation)
49
51
  validation.run_validation(
50
52
  value=parsed_value, type=type, is_optional=is_optional
51
53
  )
@@ -1039,7 +1039,7 @@ class V1HpQLogUniform(BaseHpParamConfig):
1039
1039
  kind: Literal[_IDENTIFIER] = _IDENTIFIER
1040
1040
  value: Optional[
1041
1041
  Union[QLogUniform, QLogUniformList, QLogUniformStr, RefField]
1042
- ] = None
1042
+ ] = None # fmt: skip
1043
1043
 
1044
1044
  @property
1045
1045
  def is_distribution(self):
@@ -135,3 +135,7 @@ class V1CompiledOperation(BaseOp, RunMixin):
135
135
  raise PolyaxonSchemaError(
136
136
  "Operations with dag runtime do not support the `build` section."
137
137
  )
138
+
139
+ def get_replica_types(self):
140
+ if self.is_distributed_run:
141
+ return self.run.get_replica_types()
@@ -8,7 +8,6 @@ from clipped.compact.pydantic import (
8
8
  field_validator,
9
9
  model_validator,
10
10
  validation_after,
11
- validation_before,
12
11
  )
13
12
  from clipped.config.patch_strategy import PatchStrategy
14
13
  from clipped.config.schema import skip_partial, to_partial
@@ -649,8 +648,11 @@ class V1Operation(BaseOp, TemplateMixinConfig):
649
648
  return result
650
649
 
651
650
  kind = config.component.run.kind
652
- value = validate_run_patch(value, kind)
653
- current_value = validate_run_patch(current_value, kind)
651
+ replica_types = config.component.get_replica_types()
652
+ value = validate_run_patch(value, kind, replica_types=replica_types)
653
+ current_value = validate_run_patch(
654
+ current_value, kind, replica_types=replica_types
655
+ )
654
656
  run_patch = current_value.patch(value, strategy)
655
657
  run_patch = run_patch.to_dict()
656
658
  run_patch.pop("kind")
@@ -386,7 +386,7 @@ class V1Param(BaseSchemaModel, ctx_refs.RefMixin, ParamValueMixin):
386
386
 
387
387
  _IDENTIFIER = "param"
388
388
 
389
- value: Any
389
+ value: Optional[Any] = None
390
390
  ref: Optional[StrictStr] = None
391
391
  context_only: Optional[bool] = Field(alias="contextOnly", default=False)
392
392
  connection: Optional[StrictStr] = None
@@ -479,7 +479,7 @@ class ParamSpec(
479
479
  raise PolyaxonValidationError(
480
480
  "Param `{}` with type `{}`, "
481
481
  "cannot be turned to an init container automatically.".format(
482
- self.name, self.type, self.param.ref
482
+ self.name, self.type
483
483
  )
484
484
  )
485
485
 
@@ -52,6 +52,9 @@ V1Runtime = Annotated[
52
52
 
53
53
 
54
54
  class RunMixin:
55
+ def get_replica_types(self):
56
+ raise NotImplementedError
57
+
55
58
  def get_run_kind(self):
56
59
  raise NotImplementedError
57
60
 
polyaxon/_flow/run/dag.py CHANGED
@@ -444,7 +444,7 @@ class V1Dag(BaseRun):
444
444
  for g_context in ctx_sections.GLOBALS_CONTEXTS:
445
445
  self._context[
446
446
  "dag.{}.{}".format(ctx_sections.GLOBALS, g_context)
447
- ] = V1IO.construct(name=g_context, type="str", value="", is_optional=True)
447
+ ] = V1IO.construct(name=g_context, type="str", value="", is_optional=True) # fmt: skip
448
448
 
449
449
  self._context["dag.{}".format(ctx_sections.INPUTS)] = V1IO.construct(
450
450
  name="inputs", type="dict", value={}, is_optional=True
@@ -541,19 +541,19 @@ class V1Dag(BaseRun):
541
541
  # We allow to resolve name, status, project, all outputs/inputs, iteration
542
542
  self._context[
543
543
  "ops.{}.{}".format(op_name, ctx_sections.INPUTS)
544
- ] = V1IO.construct(name="inputs", type="dict", value={}, is_optional=True)
544
+ ] = V1IO.construct(name="inputs", type="dict", value={}, is_optional=True) # fmt: skip
545
545
  self._context[
546
546
  "ops.{}.{}".format(op_name, ctx_sections.OUTPUTS)
547
- ] = V1IO.construct(name="outputs", type="dict", value={}, is_optional=True)
547
+ ] = V1IO.construct(name="outputs", type="dict", value={}, is_optional=True) # fmt: skip
548
548
  self._context[
549
549
  "ops.{}.{}".format(op_name, ctx_sections.GLOBALS)
550
- ] = V1IO.construct(name="globals", type="str", value="", is_optional=True)
550
+ ] = V1IO.construct(name="globals", type="str", value="", is_optional=True) # fmt: skip
551
551
  self._context[
552
552
  "ops.{}.{}".format(op_name, ctx_sections.ARTIFACTS)
553
- ] = V1IO.construct(name="artifacts", type="str", value="", is_optional=True)
553
+ ] = V1IO.construct(name="artifacts", type="str", value="", is_optional=True) # fmt: skip
554
554
  self._context[
555
555
  "ops.{}.{}".format(op_name, ctx_sections.INPUTS_OUTPUTS)
556
- ] = V1IO.construct(name="io", type="str", value={}, is_optional=True)
556
+ ] = V1IO.construct(name="io", type="str", value={}, is_optional=True) # fmt: skip
557
557
 
558
558
  for op in self.operations:
559
559
  if op.has_component_reference:
@@ -175,3 +175,13 @@ class V1DaskJob(BaseRun, DestinationImageMixin):
175
175
  if self.scheduler:
176
176
  init += self.scheduler.get_all_init()
177
177
  return init
178
+
179
+ def get_replica_types(self):
180
+ types = []
181
+ if self.job:
182
+ types.append("job")
183
+ if self.worker:
184
+ types.append("worker")
185
+ if self.scheduler:
186
+ types.append("scheduler")
187
+ return types
@@ -185,3 +185,11 @@ class V1MPIJob(BaseRun, DestinationImageMixin):
185
185
  if self.worker:
186
186
  init += self.worker.get_all_init()
187
187
  return init
188
+
189
+ def get_replica_types(self):
190
+ types = []
191
+ if self.launcher:
192
+ types.append("launcher")
193
+ if self.worker:
194
+ types.append("worker")
195
+ return types
@@ -313,3 +313,19 @@ class V1MXJob(BaseRun, DestinationImageMixin):
313
313
  if self.tuner_server:
314
314
  init += self.tuner_server.get_all_init()
315
315
  return init
316
+
317
+ def get_replica_types(self):
318
+ types = []
319
+ if self.scheduler:
320
+ types.append("scheduler")
321
+ if self.server:
322
+ types.append("server")
323
+ if self.worker:
324
+ types.append("worker")
325
+ if self.tuner:
326
+ types.append("tuner")
327
+ if self.tuner_tracker:
328
+ types.append("tuner_tracker")
329
+ if self.tuner_server:
330
+ types.append("tuner_server")
331
+ return types
@@ -205,3 +205,11 @@ class V1PaddleJob(BaseRun, DestinationImageMixin):
205
205
  if self.worker:
206
206
  init += self.worker.get_all_init()
207
207
  return init
208
+
209
+ def get_replica_types(self):
210
+ types = []
211
+ if self.master:
212
+ types.append("master")
213
+ if self.worker:
214
+ types.append("worker")
215
+ return types
@@ -218,3 +218,11 @@ class V1PytorchJob(BaseRun, DestinationImageMixin):
218
218
  if self.worker:
219
219
  init += self.worker.get_all_init()
220
220
  return init
221
+
222
+ def get_replica_types(self):
223
+ types = []
224
+ if self.master:
225
+ types.append("master")
226
+ if self.worker:
227
+ types.append("worker")
228
+ return types
@@ -248,3 +248,15 @@ class V1TFJob(BaseRun, DestinationImageMixin):
248
248
  if self.evaluator:
249
249
  init += self.evaluator.get_all_init()
250
250
  return init
251
+
252
+ def get_replica_types(self):
253
+ types = []
254
+ if self.chief:
255
+ types.append("chief")
256
+ if self.ps:
257
+ types.append("ps")
258
+ if self.worker:
259
+ types.append("worker")
260
+ if self.evaluator:
261
+ types.append("evaluator")
262
+ return types
@@ -130,63 +130,49 @@ class V1XGBoostJob(BaseRun, DestinationImageMixin):
130
130
  worker: Optional[Union[V1KFReplica, RefField]] = None
131
131
 
132
132
  def apply_image_destination(self, image: str):
133
- if self.chief:
134
- self.chief.container = self.chief.container or V1Container()
135
- self.chief.container.image = image
136
- if self.ps:
137
- self.ps.container = self.ps.container or V1Container()
138
- self.ps.container.image = image
133
+ if self.master:
134
+ self.master.container = self.master.container or V1Container()
135
+ self.master.container.image = image
139
136
  if self.worker:
140
137
  self.worker.container = self.worker.container or V1Container()
141
138
  self.worker.container.image = image
142
- if self.evaluator:
143
- self.evaluator.container = self.evaluator.container or V1Container()
144
- self.evaluator.container.image = image
145
139
 
146
140
  def get_resources(self):
147
141
  resources = V1RunResources()
148
- if self.chief:
149
- resources += self.chief.get_resources()
150
- if self.ps:
151
- resources += self.ps.get_resources()
142
+ if self.master:
143
+ resources += self.master.get_resources()
152
144
  if self.worker:
153
145
  resources += self.worker.get_resources()
154
- if self.evaluator:
155
- resources += self.evaluator.get_resources()
156
146
  return resources
157
147
 
158
148
  def get_all_containers(self):
159
149
  containers = []
160
- if self.chief:
161
- containers += self.chief.get_all_containers()
162
- if self.ps:
163
- containers += self.ps.get_all_containers()
150
+ if self.master:
151
+ containers += self.master.get_all_containers()
164
152
  if self.worker:
165
153
  containers += self.worker.get_all_containers()
166
- if self.evaluator:
167
- containers += self.evaluator.get_all_containers()
168
154
  return containers
169
155
 
170
156
  def get_all_connections(self):
171
157
  connections = []
172
- if self.chief:
173
- connections += self.chief.get_all_connections()
174
- if self.ps:
175
- connections += self.ps.get_all_connections()
158
+ if self.master:
159
+ connections += self.master.get_all_connections()
176
160
  if self.worker:
177
161
  connections += self.worker.get_all_connections()
178
- if self.evaluator:
179
- connections += self.evaluator.get_all_connections()
180
162
  return connections
181
163
 
182
164
  def get_all_init(self):
183
165
  init = []
184
- if self.chief:
185
- init += self.chief.get_all_init()
186
- if self.ps:
187
- init += self.ps.get_all_init()
166
+ if self.master:
167
+ init += self.master.get_all_init()
188
168
  if self.worker:
189
169
  init += self.worker.get_all_init()
190
- if self.evaluator:
191
- init += self.evaluator.get_all_init()
192
170
  return init
171
+
172
+ def get_replica_types(self):
173
+ types = []
174
+ if self.master:
175
+ types.append(self.master.replicas)
176
+ if self.worker:
177
+ types.append(self.worker.replicas)
178
+ return types
@@ -1,4 +1,4 @@
1
- from typing import Dict
1
+ from typing import Dict, List
2
2
 
3
3
  from clipped.compact.pydantic import ValidationError
4
4
 
@@ -21,7 +21,9 @@ from polyaxon._flow.run.tuner import V1TunerJob
21
21
  from polyaxon.exceptions import PolyaxonValidationError
22
22
 
23
23
 
24
- def validate_run_patch(run_patch: Dict, kind: V1RunKind):
24
+ def validate_run_patch(
25
+ run_patch: Dict, kind: V1RunKind, replica_types: List[str] = None
26
+ ):
25
27
  if kind == V1RunKind.JOB:
26
28
  patch = V1Job.from_dict(run_patch)
27
29
  elif kind == V1RunKind.SERVICE:
@@ -37,37 +39,63 @@ def validate_run_patch(run_patch: Dict, kind: V1RunKind):
37
39
  try:
38
40
  patch = V1PytorchJob.from_dict(run_patch)
39
41
  except ValidationError:
40
- patch = V1KFReplica.from_dict(run_patch)
42
+ if replica_types:
43
+ patch = V1PytorchJob.from_dict({k: run_patch for k in replica_types})
44
+ else:
45
+ patch = V1KFReplica.from_dict(run_patch)
41
46
  elif kind == V1RunKind.PADDLEJOB:
42
47
  try:
43
48
  patch = V1PaddleJob.from_dict(run_patch)
44
49
  except ValidationError:
45
- patch = V1KFReplica.from_dict(run_patch)
50
+ if replica_types:
51
+ patch = V1PaddleJob.from_dict({k: run_patch for k in replica_types})
52
+ else:
53
+ patch = V1KFReplica.from_dict(run_patch)
46
54
  elif kind == V1RunKind.TFJOB:
47
55
  try:
48
56
  patch = V1TFJob.from_dict(run_patch)
49
57
  except ValidationError:
50
- patch = V1KFReplica.from_dict(run_patch)
58
+ if replica_types:
59
+ patch = V1TFJob.from_dict({k: run_patch for k in replica_types})
60
+ else:
61
+ patch = V1KFReplica.from_dict(run_patch)
51
62
  elif kind == V1RunKind.MXJOB:
52
63
  try:
53
64
  patch = V1MXJob.from_dict(run_patch)
54
65
  except ValidationError:
55
- patch = V1KFReplica.from_dict(run_patch)
66
+ if replica_types:
67
+ patch = V1MXJob.from_dict({k: run_patch for k in replica_types})
68
+ else:
69
+ patch = V1KFReplica.from_dict(run_patch)
56
70
  elif kind == V1RunKind.XGBJOB:
57
71
  try:
58
72
  patch = V1XGBoostJob.from_dict(run_patch)
59
73
  except ValidationError:
60
- patch = V1KFReplica.from_dict(run_patch)
74
+ if replica_types:
75
+ patch = V1XGBoostJob.from_dict({k: run_patch for k in replica_types})
76
+ else:
77
+ patch = V1KFReplica.from_dict(run_patch)
61
78
  elif kind == V1RunKind.RAYJOB:
62
79
  try:
63
80
  patch = V1RayJob.from_dict(run_patch)
64
81
  except ValidationError:
65
- patch = V1RayReplica.from_dict(run_patch)
82
+ if replica_types:
83
+ replicas = {}
84
+ if "head" in replica_types:
85
+ replicas["head"] = run_patch
86
+ replica_types = [r for r in replica_types if r != "head"]
87
+ replicas["workers"] = {replica: run_patch for replica in replica_types}
88
+ patch = V1RayJob.from_dict(replicas)
89
+ else:
90
+ patch = V1RayReplica.from_dict(run_patch)
66
91
  elif kind == V1RunKind.DASKJOB:
67
92
  try:
68
93
  patch = V1DaskJob.from_dict(run_patch)
69
94
  except ValidationError:
70
- patch = V1DaskReplica.from_dict(run_patch)
95
+ if replica_types:
96
+ patch = V1DaskJob.from_dict({k: run_patch for k in replica_types})
97
+ else:
98
+ patch = V1DaskReplica.from_dict(run_patch)
71
99
  elif kind == V1RunKind.NOTIFIER:
72
100
  patch = V1NotifierJob.from_dict(run_patch)
73
101
  elif kind == V1RunKind.TUNER: