polyaxon 2.1.0rc9__py3-none-any.whl → 2.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. polyaxon/_auxiliaries/default_scheduling.py +17 -7
  2. polyaxon/_auxiliaries/init.py +14 -6
  3. polyaxon/_auxiliaries/sidecar.py +10 -8
  4. polyaxon/_cli/artifacts.py +96 -11
  5. polyaxon/_cli/components.py +96 -11
  6. polyaxon/_cli/config.py +31 -0
  7. polyaxon/_cli/dashboard.py +12 -2
  8. polyaxon/_cli/init.py +1 -1
  9. polyaxon/_cli/models.py +96 -11
  10. polyaxon/_cli/operations.py +133 -58
  11. polyaxon/_cli/project_versions.py +139 -6
  12. polyaxon/_cli/projects.py +23 -9
  13. polyaxon/_cli/run.py +43 -9
  14. polyaxon/_cli/services/agent.py +2 -2
  15. polyaxon/_cli/version.py +4 -1
  16. polyaxon/_client/mixin.py +39 -0
  17. polyaxon/_client/project.py +218 -23
  18. polyaxon/_client/run.py +84 -27
  19. polyaxon/_compiler/contexts/contexts.py +4 -0
  20. polyaxon/_compiler/contexts/ray_job.py +4 -2
  21. polyaxon/_compiler/resolver/agent.py +22 -10
  22. polyaxon/_compiler/resolver/runtime.py +7 -3
  23. polyaxon/_constants/metadata.py +1 -0
  24. polyaxon/_contexts/keys.py +1 -0
  25. polyaxon/_contexts/paths.py +1 -1
  26. polyaxon/_deploy/operators/compose.py +1 -27
  27. polyaxon/_deploy/schemas/auth.py +3 -3
  28. polyaxon/_deploy/schemas/celery.py +10 -8
  29. polyaxon/_deploy/schemas/deployment.py +148 -115
  30. polyaxon/_deploy/schemas/email.py +8 -8
  31. polyaxon/_deploy/schemas/ingress.py +7 -7
  32. polyaxon/_deploy/schemas/intervals.py +2 -7
  33. polyaxon/_deploy/schemas/operators.py +8 -8
  34. polyaxon/_deploy/schemas/proxy.py +9 -8
  35. polyaxon/_deploy/schemas/rbac.py +1 -1
  36. polyaxon/_deploy/schemas/root_user.py +5 -5
  37. polyaxon/_deploy/schemas/security_context.py +25 -15
  38. polyaxon/_deploy/schemas/service.py +75 -66
  39. polyaxon/_deploy/schemas/ssl.py +3 -3
  40. polyaxon/_deploy/schemas/ui.py +10 -6
  41. polyaxon/_docker/builder/builder.py +4 -1
  42. polyaxon/_docker/converter/base/containers.py +4 -7
  43. polyaxon/_docker/converter/base/env_vars.py +5 -5
  44. polyaxon/_docker/converter/base/mounts.py +2 -2
  45. polyaxon/_docker/docker_types.py +57 -30
  46. polyaxon/_env_vars/getters/owner_entity.py +4 -2
  47. polyaxon/_env_vars/getters/project.py +4 -2
  48. polyaxon/_env_vars/getters/run.py +5 -2
  49. polyaxon/_env_vars/keys.py +3 -0
  50. polyaxon/_flow/__init__.py +3 -2
  51. polyaxon/_flow/builds/__init__.py +8 -8
  52. polyaxon/_flow/cache/__init__.py +4 -4
  53. polyaxon/_flow/component/base.py +25 -18
  54. polyaxon/_flow/component/component.py +4 -3
  55. polyaxon/_flow/early_stopping/__init__.py +1 -1
  56. polyaxon/_flow/early_stopping/policies.py +12 -10
  57. polyaxon/_flow/environment/__init__.py +43 -25
  58. polyaxon/_flow/events/__init__.py +1 -1
  59. polyaxon/_flow/hooks/__init__.py +11 -11
  60. polyaxon/_flow/init/__init__.py +41 -25
  61. polyaxon/_flow/io/io.py +57 -47
  62. polyaxon/_flow/joins/__init__.py +5 -5
  63. polyaxon/_flow/matrix/bayes.py +23 -17
  64. polyaxon/_flow/matrix/grid_search.py +16 -7
  65. polyaxon/_flow/matrix/hyperband.py +10 -10
  66. polyaxon/_flow/matrix/hyperopt.py +14 -9
  67. polyaxon/_flow/matrix/iterative.py +14 -8
  68. polyaxon/_flow/matrix/mapping.py +4 -4
  69. polyaxon/_flow/matrix/params.py +138 -77
  70. polyaxon/_flow/matrix/random_search.py +10 -5
  71. polyaxon/_flow/matrix/tuner.py +4 -4
  72. polyaxon/_flow/mounts/artifacts_mounts.py +1 -1
  73. polyaxon/_flow/notifications/__init__.py +1 -1
  74. polyaxon/_flow/operations/base.py +10 -8
  75. polyaxon/_flow/operations/compiled_operation.py +5 -4
  76. polyaxon/_flow/operations/operation.py +57 -41
  77. polyaxon/_flow/optimization/__init__.py +2 -2
  78. polyaxon/_flow/params/params.py +10 -9
  79. polyaxon/_flow/plugins/__init__.py +19 -13
  80. polyaxon/_flow/run/dag.py +12 -9
  81. polyaxon/_flow/run/dask/dask.py +4 -4
  82. polyaxon/_flow/run/dask/replica.py +17 -11
  83. polyaxon/_flow/run/job.py +17 -11
  84. polyaxon/_flow/run/kubeflow/mpi_job.py +10 -5
  85. polyaxon/_flow/run/kubeflow/mx_job.py +25 -9
  86. polyaxon/_flow/run/kubeflow/paddle_job.py +16 -9
  87. polyaxon/_flow/run/kubeflow/pytorch_job.py +24 -17
  88. polyaxon/_flow/run/kubeflow/replica.py +17 -11
  89. polyaxon/_flow/run/kubeflow/scheduling_policy.py +7 -5
  90. polyaxon/_flow/run/kubeflow/tf_job.py +15 -8
  91. polyaxon/_flow/run/kubeflow/xgboost_job.py +9 -4
  92. polyaxon/_flow/run/ray/ray.py +9 -6
  93. polyaxon/_flow/run/ray/replica.py +25 -16
  94. polyaxon/_flow/run/resources.py +14 -13
  95. polyaxon/_flow/run/service.py +4 -4
  96. polyaxon/_flow/schedules/cron.py +4 -4
  97. polyaxon/_flow/schedules/interval.py +4 -4
  98. polyaxon/_flow/templates/__init__.py +3 -3
  99. polyaxon/_flow/termination/__init__.py +3 -3
  100. polyaxon/_fs/async_manager.py +1 -1
  101. polyaxon/_fs/fs.py +1 -1
  102. polyaxon/_fs/watcher.py +26 -27
  103. polyaxon/_k8s/converter/base/base.py +2 -1
  104. polyaxon/_k8s/converter/base/main.py +1 -0
  105. polyaxon/_k8s/converter/common/accelerators.py +7 -4
  106. polyaxon/_k8s/converter/converters/ray_job.py +4 -2
  107. polyaxon/_k8s/custom_resources/dask_job.py +3 -0
  108. polyaxon/_k8s/custom_resources/kubeflow/common.py +4 -1
  109. polyaxon/_k8s/custom_resources/ray_job.py +3 -0
  110. polyaxon/_k8s/custom_resources/setter.py +1 -1
  111. polyaxon/_k8s/executor/async_executor.py +2 -0
  112. polyaxon/_k8s/k8s_validation.py +1 -1
  113. polyaxon/_k8s/logging/async_monitor.py +82 -11
  114. polyaxon/_k8s/manager/async_manager.py +15 -0
  115. polyaxon/_k8s/manager/manager.py +16 -1
  116. polyaxon/_local_process/__init__.py +0 -0
  117. polyaxon/_local_process/agent.py +6 -0
  118. polyaxon/_local_process/converter/__init__.py +1 -0
  119. polyaxon/_local_process/converter/base/__init__.py +1 -0
  120. polyaxon/_local_process/converter/base/base.py +140 -0
  121. polyaxon/_local_process/converter/base/containers.py +66 -0
  122. polyaxon/_local_process/converter/base/env_vars.py +253 -0
  123. polyaxon/_local_process/converter/base/init.py +414 -0
  124. polyaxon/_local_process/converter/base/main.py +74 -0
  125. polyaxon/_local_process/converter/base/mounts.py +82 -0
  126. polyaxon/_local_process/converter/converters/__init__.py +8 -0
  127. polyaxon/_local_process/converter/converters/job.py +40 -0
  128. polyaxon/_local_process/converter/converters/service.py +41 -0
  129. polyaxon/_local_process/converter/mixins.py +38 -0
  130. polyaxon/_local_process/executor.py +132 -0
  131. polyaxon/_local_process/process_types.py +42 -0
  132. polyaxon/_polyaxonfile/specs/compiled_operation.py +1 -1
  133. polyaxon/_polyaxonfile/specs/libs/parser.py +1 -1
  134. polyaxon/_polyaxonfile/specs/libs/validator.py +1 -1
  135. polyaxon/_polyaxonfile/specs/operation.py +1 -1
  136. polyaxon/_polyaxonfile/specs/sections.py +8 -0
  137. polyaxon/_pql/manager.py +1 -1
  138. polyaxon/_runner/agent/async_agent.py +25 -11
  139. polyaxon/_runner/agent/base_agent.py +19 -10
  140. polyaxon/_runner/agent/sync_agent.py +24 -10
  141. polyaxon/_runner/converter/converter.py +12 -4
  142. polyaxon/_runner/executor.py +1 -1
  143. polyaxon/_schemas/agent.py +69 -37
  144. polyaxon/_schemas/authentication.py +4 -4
  145. polyaxon/_schemas/base.py +26 -2
  146. polyaxon/_schemas/checks.py +3 -3
  147. polyaxon/_schemas/cli.py +4 -6
  148. polyaxon/_schemas/client.py +20 -18
  149. polyaxon/_schemas/compatibility.py +4 -4
  150. polyaxon/_schemas/container_resources.py +1 -1
  151. polyaxon/_schemas/home.py +3 -3
  152. polyaxon/_schemas/installation.py +13 -9
  153. polyaxon/_schemas/lifecycle.py +23 -23
  154. polyaxon/_schemas/log_handler.py +2 -2
  155. polyaxon/_schemas/services.py +26 -14
  156. polyaxon/_schemas/types/artifacts.py +3 -3
  157. polyaxon/_schemas/types/dockerfile.py +14 -12
  158. polyaxon/_schemas/types/event.py +2 -2
  159. polyaxon/_schemas/types/file.py +3 -3
  160. polyaxon/_schemas/types/git.py +12 -4
  161. polyaxon/_schemas/types/tensorboard.py +14 -8
  162. polyaxon/_schemas/user.py +3 -3
  163. polyaxon/_schemas/version.py +2 -2
  164. polyaxon/_sdk/api/agents_v1_api.py +222 -43
  165. polyaxon/_sdk/api/artifacts_stores_v1_api.py +3 -3
  166. polyaxon/_sdk/api/auth_v1_api.py +13 -13
  167. polyaxon/_sdk/api/connections_v1_api.py +15 -15
  168. polyaxon/_sdk/api/dashboards_v1_api.py +15 -15
  169. polyaxon/_sdk/api/organizations_v1_api.py +85 -85
  170. polyaxon/_sdk/api/presets_v1_api.py +15 -15
  171. polyaxon/_sdk/api/project_dashboards_v1_api.py +29 -29
  172. polyaxon/_sdk/api/project_searches_v1_api.py +29 -29
  173. polyaxon/_sdk/api/projects_v1_api.py +284 -107
  174. polyaxon/_sdk/api/queues_v1_api.py +19 -19
  175. polyaxon/_sdk/api/runs_v1_api.py +313 -359
  176. polyaxon/_sdk/api/searches_v1_api.py +15 -15
  177. polyaxon/_sdk/api/service_accounts_v1_api.py +31 -31
  178. polyaxon/_sdk/api/tags_v1_api.py +17 -17
  179. polyaxon/_sdk/api/teams_v1_api.py +2854 -402
  180. polyaxon/_sdk/api/users_v1_api.py +254 -78
  181. polyaxon/_sdk/api/versions_v1_api.py +7 -7
  182. polyaxon/_sdk/async_client/api_client.py +4 -0
  183. polyaxon/_sdk/schemas/__init__.py +1 -1
  184. polyaxon/_sdk/schemas/v1_activity.py +8 -8
  185. polyaxon/_sdk/schemas/v1_agent.py +18 -16
  186. polyaxon/_sdk/schemas/v1_agent_state_response.py +4 -4
  187. polyaxon/_sdk/schemas/v1_agent_state_response_agent_state.py +10 -10
  188. polyaxon/_sdk/schemas/v1_agent_status_body_request.py +3 -3
  189. polyaxon/_sdk/schemas/v1_analytics_spec.py +4 -4
  190. polyaxon/_sdk/schemas/v1_artifact_tree.py +3 -3
  191. polyaxon/_sdk/schemas/v1_auth.py +1 -1
  192. polyaxon/_sdk/schemas/v1_cloning.py +3 -3
  193. polyaxon/_sdk/schemas/v1_connection_response.py +9 -9
  194. polyaxon/_sdk/schemas/v1_dashboard.py +9 -9
  195. polyaxon/_sdk/schemas/v1_dashboard_spec.py +5 -1
  196. polyaxon/_sdk/schemas/v1_entities_tags.py +2 -2
  197. polyaxon/_sdk/schemas/v1_entities_transfer.py +2 -2
  198. polyaxon/_sdk/schemas/v1_entity_notification_body.py +7 -7
  199. polyaxon/_sdk/schemas/v1_entity_stage_body_request.py +5 -5
  200. polyaxon/_sdk/schemas/v1_entity_status_body_request.py +5 -5
  201. polyaxon/_sdk/schemas/v1_events_response.py +2 -2
  202. polyaxon/_sdk/schemas/v1_list_activities_response.py +4 -4
  203. polyaxon/_sdk/schemas/v1_list_agents_response.py +4 -4
  204. polyaxon/_sdk/schemas/v1_list_bookmarks_response.py +4 -4
  205. polyaxon/_sdk/schemas/v1_list_connections_response.py +4 -4
  206. polyaxon/_sdk/schemas/v1_list_dashboards_response.py +4 -4
  207. polyaxon/_sdk/schemas/v1_list_organization_members_response.py +4 -4
  208. polyaxon/_sdk/schemas/v1_list_organizations_response.py +4 -4
  209. polyaxon/_sdk/schemas/v1_list_presets_response.py +4 -4
  210. polyaxon/_sdk/schemas/v1_list_project_versions_response.py +4 -4
  211. polyaxon/_sdk/schemas/v1_list_projects_response.py +4 -4
  212. polyaxon/_sdk/schemas/v1_list_queues_response.py +4 -4
  213. polyaxon/_sdk/schemas/v1_list_run_artifacts_response.py +4 -4
  214. polyaxon/_sdk/schemas/v1_list_run_connections_response.py +4 -4
  215. polyaxon/_sdk/schemas/v1_list_run_edges_response.py +4 -4
  216. polyaxon/_sdk/schemas/v1_list_runs_response.py +4 -4
  217. polyaxon/_sdk/schemas/v1_list_searches_response.py +4 -4
  218. polyaxon/_sdk/schemas/v1_list_service_accounts_response.py +4 -4
  219. polyaxon/_sdk/schemas/v1_list_tags_response.py +4 -4
  220. polyaxon/_sdk/schemas/v1_list_team_members_response.py +4 -4
  221. polyaxon/_sdk/schemas/v1_list_teams_response.py +4 -4
  222. polyaxon/_sdk/schemas/v1_list_token_response.py +4 -4
  223. polyaxon/_sdk/schemas/v1_operation_body.py +8 -8
  224. polyaxon/_sdk/schemas/v1_organization.py +16 -16
  225. polyaxon/_sdk/schemas/v1_organization_member.py +6 -6
  226. polyaxon/_sdk/schemas/v1_password_change.py +3 -3
  227. polyaxon/_sdk/schemas/v1_pipeline.py +3 -3
  228. polyaxon/_sdk/schemas/v1_preset.py +16 -9
  229. polyaxon/_sdk/schemas/v1_project.py +17 -17
  230. polyaxon/_sdk/schemas/v1_project_settings.py +12 -10
  231. polyaxon/_sdk/schemas/v1_project_version.py +20 -20
  232. polyaxon/_sdk/schemas/v1_queue.py +12 -12
  233. polyaxon/_sdk/schemas/v1_run.py +38 -38
  234. polyaxon/_sdk/schemas/v1_run_connection.py +3 -3
  235. polyaxon/_sdk/schemas/v1_run_edge.py +5 -5
  236. polyaxon/_sdk/schemas/v1_run_edge_lineage.py +3 -3
  237. polyaxon/_sdk/schemas/v1_run_edges_graph.py +1 -1
  238. polyaxon/_sdk/schemas/v1_run_reference_catalog.py +4 -4
  239. polyaxon/_sdk/schemas/v1_run_settings.py +9 -9
  240. polyaxon/_sdk/schemas/v1_search.py +10 -10
  241. polyaxon/_sdk/schemas/v1_search_spec.py +14 -14
  242. polyaxon/_sdk/schemas/v1_section_spec.py +12 -7
  243. polyaxon/_sdk/schemas/v1_service_account.py +9 -9
  244. polyaxon/_sdk/schemas/v1_settings_catalog.py +4 -3
  245. polyaxon/_sdk/schemas/v1_tag.py +6 -6
  246. polyaxon/_sdk/schemas/v1_team.py +11 -8
  247. polyaxon/_sdk/schemas/v1_team_member.py +6 -6
  248. polyaxon/_sdk/schemas/v1_team_settings.py +2 -2
  249. polyaxon/_sdk/schemas/v1_token.py +10 -10
  250. polyaxon/_sdk/schemas/v1_trial_start.py +6 -6
  251. polyaxon/_sdk/schemas/v1_user.py +6 -7
  252. polyaxon/_sdk/schemas/v1_user_access.py +17 -0
  253. polyaxon/_sdk/schemas/v1_user_email.py +1 -1
  254. polyaxon/_sdk/schemas/v1_user_singup.py +5 -5
  255. polyaxon/_sdk/schemas/v1_uuids.py +1 -1
  256. polyaxon/_sidecar/container/__init__.py +39 -20
  257. polyaxon/_sidecar/container/monitors/logs.py +10 -13
  258. polyaxon/_sidecar/ignore.py +0 -1
  259. polyaxon/_utils/cli_constants.py +2 -0
  260. polyaxon/_utils/fqn_utils.py +25 -2
  261. polyaxon/_utils/test_utils.py +2 -1
  262. polyaxon/pkg.py +1 -1
  263. polyaxon/schemas.py +1 -1
  264. {polyaxon-2.1.0rc9.dist-info → polyaxon-2.6.0.dist-info}/METADATA +43 -43
  265. {polyaxon-2.1.0rc9.dist-info → polyaxon-2.6.0.dist-info}/RECORD +269 -252
  266. {polyaxon-2.1.0rc9.dist-info → polyaxon-2.6.0.dist-info}/WHEEL +1 -1
  267. polyaxon/_sdk/schemas/v1_project_user_access.py +0 -10
  268. {polyaxon-2.1.0rc9.dist-info → polyaxon-2.6.0.dist-info}/LICENSE +0 -0
  269. {polyaxon-2.1.0rc9.dist-info → polyaxon-2.6.0.dist-info}/entry_points.txt +0 -0
  270. {polyaxon-2.1.0rc9.dist-info → polyaxon-2.6.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,74 @@
1
+ from typing import Dict, Iterable, List, Optional
2
+
3
+ from clipped.utils.lists import to_list
4
+
5
+ from polyaxon._connections import V1Connection, V1ConnectionResource
6
+ from polyaxon._flow import V1Init, V1Plugins
7
+ from polyaxon._local_process import process_types
8
+ from polyaxon._runner.converter import BaseConverter as _BaseConverter
9
+ from polyaxon.exceptions import PolyaxonConverterError
10
+
11
+
12
+ class MainConverter(_BaseConverter):
13
+ def _get_main_container(
14
+ self,
15
+ container_id: str,
16
+ main_container: process_types.V1Container,
17
+ plugins: V1Plugins,
18
+ artifacts_store: Optional[V1Connection],
19
+ init: Optional[List[V1Init]],
20
+ connections: Optional[List[str]],
21
+ connection_by_names: Dict[str, V1Connection],
22
+ secrets: Optional[Iterable[V1ConnectionResource]],
23
+ config_maps: Optional[Iterable[V1ConnectionResource]],
24
+ run_path: Optional[str],
25
+ kv_env_vars: List[List] = None,
26
+ ports: List[int] = None,
27
+ ) -> process_types.V1Container:
28
+ connections = connections or []
29
+ connection_by_names = connection_by_names or {}
30
+ secrets = secrets or []
31
+ config_maps = config_maps or []
32
+
33
+ if artifacts_store and not run_path:
34
+ raise PolyaxonConverterError("Run path is required for main container.")
35
+
36
+ if artifacts_store and (
37
+ not plugins.collect_artifacts or plugins.mount_artifacts_store
38
+ ):
39
+ if artifacts_store.name not in connection_by_names:
40
+ connection_by_names[artifacts_store.name] = artifacts_store
41
+ if artifacts_store.name not in connections:
42
+ connections.append(artifacts_store.name)
43
+
44
+ requested_connections = [connection_by_names[c] for c in connections]
45
+ requested_config_maps = V1Connection.get_requested_resources(
46
+ resources=config_maps,
47
+ connections=requested_connections,
48
+ resource_key="config_map",
49
+ )
50
+ requested_secrets = V1Connection.get_requested_resources(
51
+ resources=secrets, connections=requested_connections, resource_key="secret"
52
+ )
53
+
54
+ # Env vars
55
+ env = self._get_main_env_vars(
56
+ plugins=plugins,
57
+ kv_env_vars=kv_env_vars,
58
+ artifacts_store_name=artifacts_store.name if artifacts_store else None,
59
+ connections=requested_connections,
60
+ secrets=requested_secrets,
61
+ config_maps=requested_config_maps,
62
+ )
63
+
64
+ # Env from
65
+ resources = to_list(requested_secrets, check_none=True) + to_list(
66
+ requested_config_maps, check_none=True
67
+ )
68
+ env += self._get_env_from_json_resources(resources=resources)
69
+
70
+ return self._patch_container(
71
+ container=main_container,
72
+ name=container_id,
73
+ env=env,
74
+ )
@@ -0,0 +1,82 @@
1
+ from typing import List, Optional
2
+
3
+ from polyaxon._connections import V1Connection, V1ConnectionResource
4
+ from polyaxon._contexts import paths as ctx_paths
5
+ from polyaxon._local_process import process_types
6
+ from polyaxon._runner.converter import BaseConverter
7
+
8
+
9
+ class MountsMixin(BaseConverter):
10
+ @classmethod
11
+ def _get_mount_from_store(
12
+ cls,
13
+ store: V1Connection,
14
+ ):
15
+ pass
16
+
17
+ @classmethod
18
+ def _get_mount_from_resource(
19
+ cls,
20
+ resource: V1ConnectionResource,
21
+ ):
22
+ pass
23
+
24
+ @classmethod
25
+ def _get_volume(
26
+ cls,
27
+ mount_path: str,
28
+ host_path: Optional[str] = None,
29
+ read_only: Optional[bool] = None,
30
+ ):
31
+ pass
32
+
33
+ @classmethod
34
+ def _get_docker_context_mount(cls):
35
+ pass
36
+
37
+ @classmethod
38
+ def _get_auth_context_mount(
39
+ cls,
40
+ read_only: Optional[bool] = None,
41
+ run_path: Optional[str] = None,
42
+ ):
43
+ pass
44
+
45
+ @classmethod
46
+ def _get_artifacts_context_mount(
47
+ cls,
48
+ read_only: bool = False,
49
+ run_path: Optional[str] = None,
50
+ ):
51
+ pass
52
+
53
+ @classmethod
54
+ def _get_connections_context_mount(
55
+ cls,
56
+ name: str,
57
+ mount_path: str,
58
+ run_path: str,
59
+ ):
60
+ pass
61
+
62
+ @classmethod
63
+ def _get_shm_context_mount(cls):
64
+ """
65
+ Mount a tmpfs volume to /dev/shm.
66
+ This will set /dev/shm size to half of the RAM of node.
67
+ By default, /dev/shm is very small, only 64MB.
68
+ Some experiments will fail due to lack of share memory,
69
+ such as some experiments running on Pytorch.
70
+ """
71
+ pass
72
+
73
+ @classmethod
74
+ def _get_mounts(
75
+ cls,
76
+ use_auth_context: bool,
77
+ use_docker_context: bool,
78
+ use_shm_context: bool,
79
+ use_artifacts_context: bool,
80
+ run_path: Optional[str] = None,
81
+ ) -> List:
82
+ return []
@@ -0,0 +1,8 @@
1
+ from polyaxon._flow import V1RunKind
2
+ from polyaxon._local_process.converter.converters.job import JobConverter
3
+ from polyaxon._local_process.converter.converters.service import ServiceConverter
4
+
5
+ CONVERTERS = {
6
+ V1RunKind.JOB: JobConverter,
7
+ V1RunKind.SERVICE: ServiceConverter,
8
+ }
@@ -0,0 +1,40 @@
1
+ from typing import Dict, Iterable, List, Optional
2
+
3
+ from polyaxon._connections import V1Connection, V1ConnectionResource
4
+ from polyaxon._flow import V1CompiledOperation, V1Job, V1Plugins
5
+ from polyaxon._k8s.converter.mixins import JobMixin
6
+ from polyaxon._local_process import process_types
7
+ from polyaxon._local_process.converter.base import BaseConverter
8
+
9
+
10
+ class JobConverter(JobMixin, BaseConverter):
11
+ def get_resource(
12
+ self,
13
+ compiled_operation: V1CompiledOperation,
14
+ artifacts_store: V1Connection,
15
+ connection_by_names: Dict[str, V1Connection],
16
+ secrets: Optional[Iterable[V1ConnectionResource]],
17
+ config_maps: Optional[Iterable[V1ConnectionResource]],
18
+ default_sa: Optional[str] = None,
19
+ default_auth: bool = False,
20
+ ) -> List[process_types.V1Container]:
21
+ job = compiled_operation.run # type: V1Job
22
+ plugins = V1Plugins.get_or_create(
23
+ config=compiled_operation.plugins, auth=default_auth
24
+ )
25
+ kv_env_vars = compiled_operation.get_env_io()
26
+ return self.get_replica_resource(
27
+ environment=job.environment,
28
+ plugins=plugins,
29
+ volumes=job.volumes,
30
+ init=job.init,
31
+ sidecars=job.sidecars,
32
+ container=job.container,
33
+ artifacts_store=artifacts_store,
34
+ connections=job.connections,
35
+ connection_by_names=connection_by_names,
36
+ secrets=secrets,
37
+ config_maps=config_maps,
38
+ kv_env_vars=kv_env_vars,
39
+ default_sa=default_sa,
40
+ )
@@ -0,0 +1,41 @@
1
+ from typing import Dict, Iterable, List, Optional
2
+
3
+ from polyaxon._connections import V1Connection, V1ConnectionResource
4
+ from polyaxon._flow import V1CompiledOperation, V1Plugins, V1Service
5
+ from polyaxon._local_process import process_types
6
+ from polyaxon._local_process.converter.base import BaseConverter
7
+ from polyaxon._local_process.converter.mixins import ServiceMixin
8
+
9
+
10
+ class ServiceConverter(ServiceMixin, BaseConverter):
11
+ def get_resource(
12
+ self,
13
+ compiled_operation: V1CompiledOperation,
14
+ artifacts_store: V1Connection,
15
+ connection_by_names: Dict[str, V1Connection],
16
+ secrets: Optional[Iterable[V1ConnectionResource]],
17
+ config_maps: Optional[Iterable[V1ConnectionResource]],
18
+ default_sa: Optional[str] = None,
19
+ default_auth: bool = False,
20
+ ) -> List[process_types.V1Container]:
21
+ service = compiled_operation.run # type: V1Service
22
+ plugins = V1Plugins.get_or_create(
23
+ config=compiled_operation.plugins, auth=default_auth
24
+ )
25
+ kv_env_vars = compiled_operation.get_env_io()
26
+ return self.get_replica_resource(
27
+ plugins=plugins,
28
+ environment=service.environment,
29
+ volumes=service.volumes,
30
+ init=service.init,
31
+ sidecars=service.sidecars,
32
+ container=service.container,
33
+ artifacts_store=artifacts_store,
34
+ connections=service.connections,
35
+ connection_by_names=connection_by_names,
36
+ secrets=secrets,
37
+ config_maps=config_maps,
38
+ kv_env_vars=kv_env_vars,
39
+ default_sa=default_sa,
40
+ ports=service.ports,
41
+ )
@@ -0,0 +1,38 @@
1
+ from typing import Dict
2
+
3
+ from polyaxon._containers.names import MAIN_JOB_CONTAINER
4
+ from polyaxon._flow import V1RunKind
5
+
6
+
7
+ class JobMixin:
8
+ K8S_ANNOTATIONS_KIND = V1RunKind.JOB
9
+ MAIN_CONTAINER_ID = MAIN_JOB_CONTAINER
10
+
11
+
12
+ class NotifierMixin:
13
+ K8S_ANNOTATIONS_KIND = V1RunKind.NOTIFIER
14
+ MAIN_CONTAINER_ID = MAIN_JOB_CONTAINER
15
+
16
+
17
+ class CleanerMixin:
18
+ K8S_ANNOTATIONS_KIND = V1RunKind.CLEANER
19
+ MAIN_CONTAINER_ID = MAIN_JOB_CONTAINER
20
+
21
+
22
+ class TunerMixin:
23
+ K8S_ANNOTATIONS_KIND = V1RunKind.TUNER
24
+ MAIN_CONTAINER_ID = MAIN_JOB_CONTAINER
25
+
26
+
27
+ class ServiceMixin:
28
+ K8S_ANNOTATIONS_KIND = V1RunKind.SERVICE
29
+ MAIN_CONTAINER_ID = MAIN_JOB_CONTAINER
30
+
31
+
32
+ MIXIN_MAPPING: Dict = {
33
+ V1RunKind.JOB: JobMixin,
34
+ V1RunKind.NOTIFIER: NotifierMixin,
35
+ V1RunKind.CLEANER: CleanerMixin,
36
+ V1RunKind.TUNER: TunerMixin,
37
+ V1RunKind.SERVICE: ServiceMixin,
38
+ }
@@ -0,0 +1,132 @@
1
+ import os
2
+ import signal
3
+ import subprocess
4
+
5
+ from typing import Dict, List
6
+
7
+ from polyaxon._deploy.operators.cmd_operator import CmdOperator
8
+ from polyaxon._deploy.operators.conda import CondaOperator
9
+ from polyaxon._local_process import process_types
10
+ from polyaxon._local_process.converter.converters import CONVERTERS
11
+ from polyaxon._local_process.converter.mixins import MIXIN_MAPPING
12
+ from polyaxon._runner.executor import BaseExecutor
13
+ from polyaxon._runner.kinds import RunnerKind
14
+ from polyaxon._schemas.lifecycle import V1Statuses
15
+ from polyaxon.exceptions import PolyaxonAgentError
16
+ from polyaxon.logger import logger
17
+
18
+
19
+ class Executor(BaseExecutor):
20
+ MIXIN_MAPPING = MIXIN_MAPPING
21
+ CONVERTERS = CONVERTERS
22
+ RUNNER_KIND = RunnerKind.PROCESS
23
+
24
+ def __init__(self, conda_env: str = None, venv: str = None):
25
+ super().__init__()
26
+ self._ops = {}
27
+ self._conda_env = conda_env
28
+ self._venv = venv
29
+
30
+ def _get_manager(self):
31
+ if self._conda_env:
32
+ return CondaOperator()
33
+ return CmdOperator()
34
+
35
+ def _check_conda(self):
36
+ if not self.manager.check():
37
+ raise logger.error("Conda is required to run this command.")
38
+
39
+ envs = self.manager.execute(["env", "list", "--json"], is_json=True)
40
+ env_names = [os.path.basename(env) for env in envs["envs"]]
41
+ if self._conda_env not in env_names:
42
+ raise logger.error(
43
+ "Conda env `{}` is not installed.".format(self._conda_env),
44
+ sys_exit=True,
45
+ )
46
+
47
+ def _run_in_conda(self, cmd_bash, cmd_args):
48
+ cmd_args = ["source activate {}".format(self._conda_env)] + cmd_args
49
+ subprocess.Popen(cmd_bash + [" && ".join(cmd_args)], close_fds=True)
50
+
51
+ def _get_op_proc(self, run_uuid: str) -> List[subprocess.Popen]:
52
+ return self._ops.get(run_uuid)
53
+
54
+ def create(
55
+ self,
56
+ run_uuid: str,
57
+ run_kind: str,
58
+ resource: List[process_types.V1Container],
59
+ namespace: str = None,
60
+ ) -> Dict:
61
+ logger.info(f"[Executor] Starting operation {run_uuid} {run_kind}.")
62
+ self._ops[run_uuid] = []
63
+ for task in resource:
64
+ logger.info(
65
+ f"[Executor] Starting task container {task.name} {task.image} ."
66
+ )
67
+ proc = self.manager.execute(
68
+ task.get_cmd_args(), env=os.environ, output_only=False
69
+ )
70
+ self._ops[run_uuid].append(proc)
71
+ proc.wait()
72
+ task_status = self._get_task_status(proc)
73
+ message = f"Task container {task.name} {task.image} with id {proc.pid} {task_status}"
74
+ if task_status == V1Statuses.SUCCEEDED:
75
+ logger.info(f"[Executor] {message}")
76
+ else:
77
+ logger.warning(f"[Executor] {message}")
78
+ self._clean_temp_execution_path(run_uuid)
79
+ return {
80
+ "status": V1Statuses.FAILED,
81
+ "tasks": self._ops[run_uuid],
82
+ "message": message,
83
+ }
84
+ self._clean_temp_execution_path(run_uuid)
85
+ return {"status": V1Statuses.SUCCEEDED, "tasks": self._ops[run_uuid]}
86
+
87
+ def apply(
88
+ self, run_uuid: str, run_kind: str, resource: Dict, namespace: str = None
89
+ ) -> Dict:
90
+ raise PolyaxonAgentError(
91
+ "Docker executor does not support apply method.\n"
92
+ "Run: <kind: {}, uuid: {}>".format(run_kind, run_uuid)
93
+ )
94
+
95
+ def stop(self, run_uuid: str, run_kind: str, namespace: str = None):
96
+ proc = self._get_op_proc(run_uuid)
97
+ if proc.poll() is None:
98
+ # Kill the process tree rooted at the child if it's the leader of its own process
99
+ # group, otherwise just kill the child
100
+ try:
101
+ if proc.pid == os.getpgid(proc.pid):
102
+ os.killpg(proc.pid, signal.SIGTERM)
103
+ else:
104
+ proc.terminate()
105
+ except OSError:
106
+ # The child process may have exited before we attempted to terminate it, so we
107
+ # ignore OSErrors raised during child process termination
108
+ _msg = f"Failed to terminate operation {run_kind} {run_uuid} child process PID {proc.pid}"
109
+ logger.debug(_msg)
110
+ proc.wait()
111
+
112
+ def clean(self, run_uuid: str, run_kind: str, namespace: str = None):
113
+ return self.apply(
114
+ run_uuid=run_uuid,
115
+ run_kind=run_kind,
116
+ resource={"metadata": {"finalizers": None}},
117
+ )
118
+
119
+ def _get_task_status(self, proc) -> V1Statuses:
120
+ exit_code = proc.poll()
121
+ if exit_code is None:
122
+ return V1Statuses.RUNNING
123
+ if exit_code == 0:
124
+ return V1Statuses.SUCCEEDED
125
+ return V1Statuses.FAILED
126
+
127
+ def get(self, run_uuid: str, run_kind: str, namespace: str = None) -> V1Statuses:
128
+ procs = self._get_op_proc(run_uuid)
129
+ return self._get_task_status(procs[-1])
130
+
131
+ def list_ops(self, namespace: str = None):
132
+ return []
@@ -0,0 +1,42 @@
1
+ from typing import Dict, List, Optional, Tuple, Union
2
+
3
+ from clipped.compact.pydantic import PYDANTIC_VERSION, Field
4
+
5
+ from polyaxon._schemas.base import BaseSchemaModel, RootModel
6
+
7
+
8
+ class V1EnvVar(RootModel):
9
+ if PYDANTIC_VERSION.startswith("2."):
10
+ root: Union[Tuple[str, str], Dict[str, str]]
11
+ else:
12
+ __root__: Union[Tuple[str, str], Dict[str, str]]
13
+
14
+ def to_cmd(self):
15
+ if isinstance(self._root, tuple):
16
+ value = self._root
17
+ else:
18
+ value = self._root.items()
19
+ return [f"{value[0]}={value[1]}"]
20
+
21
+
22
+ class V1Container(BaseSchemaModel):
23
+ name: Optional[str] = None
24
+ command: Optional[List[str]] = None
25
+ args: Optional[List[str]] = None
26
+ env: Optional[List[V1EnvVar]] = None
27
+ working_dir: Optional[str] = Field(alias="workingDir", default=None)
28
+
29
+ def get_cmd_args(self):
30
+ cmd_args = ["run", "--rm"]
31
+ for env in self.env:
32
+ cmd_args += ["-e"] + env.to_cmd()
33
+ if self.working_dir:
34
+ cmd_args += ["-w", self.working_dir]
35
+ if self.command:
36
+ cmd_args += ["--entrypoint", self.command[0]]
37
+ cmd_args += [self.image]
38
+ if self.command:
39
+ cmd_args += self.command[1:]
40
+ if self.args:
41
+ cmd_args += self.args
42
+ return cmd_args
@@ -348,7 +348,7 @@ class CompiledOperationSpecification(BaseSpecification):
348
348
  "conditions",
349
349
  "skip_on_upstream_skip",
350
350
  }
351
- patch_keys = patch_keys.intersection(preset.__fields_set__)
351
+ patch_keys = patch_keys.intersection(preset.model_fields_set)
352
352
  patch_data = {k: getattr(preset, k) for k in patch_keys}
353
353
  patch_compiled = V1CompiledOperation.construct(**patch_data)
354
354
  return config.patch(patch_compiled, strategy=preset.patch_strategy)
@@ -93,7 +93,7 @@ class PolyaxonfileParser:
93
93
  # Check workflow
94
94
  for section in Sections.PARSING_SECTIONS:
95
95
  config_section = cls._get_section(config, section)
96
- if config_section:
96
+ if config_section is not None:
97
97
  parsed_data[section] = cls.parse_expression(
98
98
  config_section, parsed_params
99
99
  )
@@ -11,7 +11,7 @@ def validate(spec, data):
11
11
 
12
12
  def validate_keys(section, config, section_data):
13
13
  extra_args = [
14
- key for key in section_data.keys() if key not in config.__fields__.keys()
14
+ key for key in section_data.keys() if key not in config.model_fields.keys()
15
15
  ]
16
16
  if extra_args:
17
17
  raise PolyaxonfileError(
@@ -107,7 +107,7 @@ class OperationSpecification(BaseSpecification):
107
107
  "conditions",
108
108
  "skip_on_upstream_skip",
109
109
  }
110
- patch_keys = patch_keys.intersection(config.__fields_set__)
110
+ patch_keys = patch_keys.intersection(config.model_fields_set)
111
111
  patch_data = {k: getattr(config, k) for k in patch_keys}
112
112
  patch_compiled = V1CompiledOperation.construct(contexts=contexts, **patch_data)
113
113
 
@@ -99,6 +99,14 @@ class Sections:
99
99
  CONDITIONS,
100
100
  SKIP_ON_UPSTREAM_SKIP,
101
101
  PATCH_STRATEGY,
102
+ "is_approved",
103
+ "patch_strategy",
104
+ "is_preset",
105
+ "hub_ref",
106
+ "dag_ref",
107
+ "path_ref",
108
+ "url_ref",
109
+ "skip_on_upstream_skip",
102
110
  )
103
111
 
104
112
  REQUIRED_SECTIONS = (VERSION, KIND)
polyaxon/_pql/manager.py CHANGED
@@ -13,7 +13,7 @@ class PQLManager:
13
13
  FIELDS_PROXY = {}
14
14
  FIELDS_TRANS = {}
15
15
  FIELDS_ORDERING = None
16
- FIELDS_ORDERING_PROXY = None
16
+ FIELDS_ORDERING_PROXY = None # Do not set a field on both field and proxy
17
17
  FIELDS_DEFAULT_ORDERING = None
18
18
  FIELDS_DISTINCT = None
19
19
  CHECK_ALIVE = True
@@ -14,6 +14,7 @@ from polyaxon._env_vars.getters import get_run_info
14
14
  from polyaxon._runner.agent.base_agent import BaseAgent
15
15
  from polyaxon._sdk.schemas.v1_agent import V1Agent
16
16
  from polyaxon._sdk.schemas.v1_agent_state_response import V1AgentStateResponse
17
+ from polyaxon._utils.fqn_utils import get_run_instance
17
18
  from polyaxon.exceptions import ApiException as SDKApiException
18
19
  from polyaxon.exceptions import PolyaxonAgentError, PolyaxonConverterError
19
20
  from polyaxon.logger import logger
@@ -23,16 +24,16 @@ class BaseAsyncAgent(BaseAgent):
23
24
  IS_ASYNC = True
24
25
 
25
26
  async def _enter(self):
27
+ logger.warning("Agent is starting.")
28
+ await self.executor.refresh()
26
29
  if not self.client._is_managed:
27
30
  return self
28
- print("Agent is starting.")
29
- await self.executor.refresh()
30
31
  try:
31
32
  agent = await self.client.get_info()
32
33
  self._check_status(agent)
33
34
  await self.sync()
34
35
  await self.client.log_agent_running()
35
- print("Agent is running.")
36
+ logger.warning("Agent is running.")
36
37
  return self
37
38
  except (ApiException, SDKApiException, HTTPError) as e:
38
39
  message = "Could not start the agent."
@@ -80,13 +81,16 @@ class BaseAsyncAgent(BaseAgent):
80
81
 
81
82
  async def reconcile(self):
82
83
  if (
83
- now() - self._last_reconciled_at
84
+ now() - self._last_data_collected_at
84
85
  ).total_seconds() > self.SLEEP_AGENT_DATA_COLLECT_TIME:
86
+ await self.collect_agent_data()
87
+ if (
88
+ now() - self._last_reconciled_at
89
+ ).total_seconds() < self.SLEEP_AGENT_DATA_RECONCILE_TIME:
85
90
  return
86
91
 
87
- # Collect data
88
- await self.collect_agent_data()
89
-
92
+ logger.info("Checking cluster state.")
93
+ self._last_reconciled_at = now()
90
94
  # Update reconcile
91
95
  namespaces = [settings.AGENT_CONFIG.namespace]
92
96
  namespaces += settings.AGENT_CONFIG.additional_namespaces or []
@@ -96,9 +100,19 @@ class BaseAsyncAgent(BaseAgent):
96
100
  if _ops:
97
101
  ops += [
98
102
  (
99
- op["metadata"]["name"],
100
- op["metadata"]["labels"]["app.kubernetes.io/instance"],
101
- op["metadata"]["annotations"]["polyaxon.com/run_kind"],
103
+ get_run_instance(
104
+ owner=op["metadata"]["annotations"][
105
+ "operation.polyaxon.com/owner"
106
+ ],
107
+ project=op["metadata"]["annotations"][
108
+ "operation.polyaxon.com/project"
109
+ ],
110
+ run_uuid=op["metadata"]["labels"][
111
+ "app.kubernetes.io/instance"
112
+ ],
113
+ ),
114
+ op["metadata"]["annotations"]["operation.polyaxon.com/kind"],
115
+ op["metadata"]["annotations"]["operation.polyaxon.com/name"],
102
116
  namespace,
103
117
  )
104
118
  for op in _ops
@@ -142,7 +156,7 @@ class BaseAsyncAgent(BaseAgent):
142
156
  timeout = get_wait(index, max_interval=self.max_interval)
143
157
  logger.info("Sleeping for {} seconds".format(timeout))
144
158
  except Exception as e:
145
- print(e)
159
+ logger.warning("Agent failed to start: {}".format(repr(e)))
146
160
  finally:
147
161
  self.end()
148
162
 
@@ -4,7 +4,7 @@ import traceback
4
4
  from concurrent.futures import ThreadPoolExecutor
5
5
  from typing import Any, Dict, Optional, Tuple, Type
6
6
 
7
- from clipped.utils.tz import now
7
+ from clipped.utils.tz import get_datetime_from_now, now
8
8
 
9
9
  from polyaxon import settings
10
10
  from polyaxon._auxiliaries import V1PolyaxonInitContainer, V1PolyaxonSidecarContainer
@@ -24,7 +24,8 @@ class BaseAgent:
24
24
  HEALTH_FILE = "/tmp/.healthz"
25
25
  SLEEP_STOP_TIME = 60 * 5
26
26
  SLEEP_ARCHIVED_TIME = 60 * 60
27
- SLEEP_AGENT_DATA_COLLECT_TIME = 60 * 30
27
+ SLEEP_AGENT_DATA_COLLECT_TIME = 60 * 15
28
+ SLEEP_AGENT_DATA_RECONCILE_TIME = 60 * 5
28
29
  IS_ASYNC = False
29
30
 
30
31
  def __init__(
@@ -38,11 +39,13 @@ class BaseAgent:
38
39
  self.max_interval = max(max_interval, 3)
39
40
  if not agent_uuid and not owner:
40
41
  owner = DEFAULT
42
+ last_hour = get_datetime_from_now(days=0, hours=1)
41
43
  self.executor = None
42
44
  self._default_auth = bool(agent_uuid)
43
- self._executor_refreshed_at = now()
45
+ self._executor_refreshed_at = last_hour
44
46
  self._graceful_shutdown = False
45
- self._last_reconciled_at = now()
47
+ self._last_data_collected_at = last_hour
48
+ self._last_reconciled_at = last_hour
46
49
  self.client = AgentClient(
47
50
  owner=owner, agent_uuid=agent_uuid, is_async=self.IS_ASYNC
48
51
  )
@@ -60,10 +63,16 @@ class BaseAgent:
60
63
 
61
64
  def collect_agent_data(self):
62
65
  logger.info("Collecting agent data.")
63
- self._last_reconciled_at = now()
64
- return self.client.collect_agent_data(
65
- namespace=settings.CLIENT_CONFIG.namespace
66
- )
66
+ self._last_data_collected_at = now()
67
+ try:
68
+ return self.client.collect_agent_data(
69
+ namespace=settings.CLIENT_CONFIG.namespace
70
+ )
71
+ except Exception as e:
72
+ logger.warning(
73
+ "Agent failed to collect agent data: {}\n"
74
+ "Retrying ...".format(repr(e))
75
+ )
67
76
 
68
77
  def sync_compatible_updates(self, compatible_updates: Dict):
69
78
  if compatible_updates and settings.AGENT_CONFIG:
@@ -126,14 +135,14 @@ class BaseAgent:
126
135
 
127
136
  def _check_status(self, agent_state):
128
137
  if agent_state.status == V1Statuses.STOPPED:
129
- print(
138
+ logger.warning(
130
139
  "Agent has been stopped from the platform,"
131
140
  "but the deployment is still running."
132
141
  "Please either set the agent to starting or teardown the agent deployment."
133
142
  )
134
143
  return self.end(sleep=self.SLEEP_STOP_TIME)
135
144
  elif agent_state.live_state < LiveState.LIVE:
136
- print(
145
+ logger.warning(
137
146
  "Agent has been archived from the platform,"
138
147
  "but the deployment is still running."
139
148
  "Please either restore the agent or teardown the agent deployment."