polyaxon 2.1.0rc9__py3-none-any.whl → 2.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- polyaxon/_auxiliaries/default_scheduling.py +17 -7
- polyaxon/_auxiliaries/init.py +14 -6
- polyaxon/_auxiliaries/sidecar.py +10 -8
- polyaxon/_cli/artifacts.py +96 -11
- polyaxon/_cli/components.py +96 -11
- polyaxon/_cli/config.py +31 -0
- polyaxon/_cli/dashboard.py +12 -2
- polyaxon/_cli/init.py +1 -1
- polyaxon/_cli/models.py +96 -11
- polyaxon/_cli/operations.py +133 -58
- polyaxon/_cli/project_versions.py +139 -6
- polyaxon/_cli/projects.py +23 -9
- polyaxon/_cli/run.py +43 -9
- polyaxon/_cli/services/agent.py +2 -2
- polyaxon/_cli/version.py +4 -1
- polyaxon/_client/mixin.py +39 -0
- polyaxon/_client/project.py +218 -23
- polyaxon/_client/run.py +84 -27
- polyaxon/_compiler/contexts/contexts.py +4 -0
- polyaxon/_compiler/contexts/ray_job.py +4 -2
- polyaxon/_compiler/resolver/agent.py +22 -10
- polyaxon/_compiler/resolver/runtime.py +7 -3
- polyaxon/_constants/metadata.py +1 -0
- polyaxon/_contexts/keys.py +1 -0
- polyaxon/_contexts/paths.py +1 -1
- polyaxon/_deploy/operators/compose.py +1 -27
- polyaxon/_deploy/schemas/auth.py +3 -3
- polyaxon/_deploy/schemas/celery.py +10 -8
- polyaxon/_deploy/schemas/deployment.py +148 -115
- polyaxon/_deploy/schemas/email.py +8 -8
- polyaxon/_deploy/schemas/ingress.py +7 -7
- polyaxon/_deploy/schemas/intervals.py +2 -7
- polyaxon/_deploy/schemas/operators.py +8 -8
- polyaxon/_deploy/schemas/proxy.py +9 -8
- polyaxon/_deploy/schemas/rbac.py +1 -1
- polyaxon/_deploy/schemas/root_user.py +5 -5
- polyaxon/_deploy/schemas/security_context.py +25 -15
- polyaxon/_deploy/schemas/service.py +75 -66
- polyaxon/_deploy/schemas/ssl.py +3 -3
- polyaxon/_deploy/schemas/ui.py +10 -6
- polyaxon/_docker/builder/builder.py +4 -1
- polyaxon/_docker/converter/base/containers.py +4 -7
- polyaxon/_docker/converter/base/env_vars.py +5 -5
- polyaxon/_docker/converter/base/mounts.py +2 -2
- polyaxon/_docker/docker_types.py +57 -30
- polyaxon/_env_vars/getters/owner_entity.py +4 -2
- polyaxon/_env_vars/getters/project.py +4 -2
- polyaxon/_env_vars/getters/run.py +5 -2
- polyaxon/_env_vars/keys.py +3 -0
- polyaxon/_flow/__init__.py +3 -2
- polyaxon/_flow/builds/__init__.py +8 -8
- polyaxon/_flow/cache/__init__.py +4 -4
- polyaxon/_flow/component/base.py +25 -18
- polyaxon/_flow/component/component.py +4 -3
- polyaxon/_flow/early_stopping/__init__.py +1 -1
- polyaxon/_flow/early_stopping/policies.py +12 -10
- polyaxon/_flow/environment/__init__.py +43 -25
- polyaxon/_flow/events/__init__.py +1 -1
- polyaxon/_flow/hooks/__init__.py +11 -11
- polyaxon/_flow/init/__init__.py +41 -25
- polyaxon/_flow/io/io.py +57 -47
- polyaxon/_flow/joins/__init__.py +5 -5
- polyaxon/_flow/matrix/bayes.py +23 -17
- polyaxon/_flow/matrix/grid_search.py +16 -7
- polyaxon/_flow/matrix/hyperband.py +10 -10
- polyaxon/_flow/matrix/hyperopt.py +14 -9
- polyaxon/_flow/matrix/iterative.py +14 -8
- polyaxon/_flow/matrix/mapping.py +4 -4
- polyaxon/_flow/matrix/params.py +138 -77
- polyaxon/_flow/matrix/random_search.py +10 -5
- polyaxon/_flow/matrix/tuner.py +4 -4
- polyaxon/_flow/mounts/artifacts_mounts.py +1 -1
- polyaxon/_flow/notifications/__init__.py +1 -1
- polyaxon/_flow/operations/base.py +10 -8
- polyaxon/_flow/operations/compiled_operation.py +5 -4
- polyaxon/_flow/operations/operation.py +57 -41
- polyaxon/_flow/optimization/__init__.py +2 -2
- polyaxon/_flow/params/params.py +10 -9
- polyaxon/_flow/plugins/__init__.py +19 -13
- polyaxon/_flow/run/dag.py +12 -9
- polyaxon/_flow/run/dask/dask.py +4 -4
- polyaxon/_flow/run/dask/replica.py +17 -11
- polyaxon/_flow/run/job.py +17 -11
- polyaxon/_flow/run/kubeflow/mpi_job.py +10 -5
- polyaxon/_flow/run/kubeflow/mx_job.py +25 -9
- polyaxon/_flow/run/kubeflow/paddle_job.py +16 -9
- polyaxon/_flow/run/kubeflow/pytorch_job.py +24 -17
- polyaxon/_flow/run/kubeflow/replica.py +17 -11
- polyaxon/_flow/run/kubeflow/scheduling_policy.py +7 -5
- polyaxon/_flow/run/kubeflow/tf_job.py +15 -8
- polyaxon/_flow/run/kubeflow/xgboost_job.py +9 -4
- polyaxon/_flow/run/ray/ray.py +9 -6
- polyaxon/_flow/run/ray/replica.py +25 -16
- polyaxon/_flow/run/resources.py +14 -13
- polyaxon/_flow/run/service.py +4 -4
- polyaxon/_flow/schedules/cron.py +4 -4
- polyaxon/_flow/schedules/interval.py +4 -4
- polyaxon/_flow/templates/__init__.py +3 -3
- polyaxon/_flow/termination/__init__.py +3 -3
- polyaxon/_fs/async_manager.py +1 -1
- polyaxon/_fs/fs.py +1 -1
- polyaxon/_fs/watcher.py +26 -27
- polyaxon/_k8s/converter/base/base.py +2 -1
- polyaxon/_k8s/converter/base/main.py +1 -0
- polyaxon/_k8s/converter/common/accelerators.py +7 -4
- polyaxon/_k8s/converter/converters/ray_job.py +4 -2
- polyaxon/_k8s/custom_resources/dask_job.py +3 -0
- polyaxon/_k8s/custom_resources/kubeflow/common.py +4 -1
- polyaxon/_k8s/custom_resources/ray_job.py +3 -0
- polyaxon/_k8s/custom_resources/setter.py +1 -1
- polyaxon/_k8s/executor/async_executor.py +2 -0
- polyaxon/_k8s/k8s_validation.py +1 -1
- polyaxon/_k8s/logging/async_monitor.py +82 -11
- polyaxon/_k8s/manager/async_manager.py +15 -0
- polyaxon/_k8s/manager/manager.py +16 -1
- polyaxon/_local_process/__init__.py +0 -0
- polyaxon/_local_process/agent.py +6 -0
- polyaxon/_local_process/converter/__init__.py +1 -0
- polyaxon/_local_process/converter/base/__init__.py +1 -0
- polyaxon/_local_process/converter/base/base.py +140 -0
- polyaxon/_local_process/converter/base/containers.py +66 -0
- polyaxon/_local_process/converter/base/env_vars.py +253 -0
- polyaxon/_local_process/converter/base/init.py +414 -0
- polyaxon/_local_process/converter/base/main.py +74 -0
- polyaxon/_local_process/converter/base/mounts.py +82 -0
- polyaxon/_local_process/converter/converters/__init__.py +8 -0
- polyaxon/_local_process/converter/converters/job.py +40 -0
- polyaxon/_local_process/converter/converters/service.py +41 -0
- polyaxon/_local_process/converter/mixins.py +38 -0
- polyaxon/_local_process/executor.py +132 -0
- polyaxon/_local_process/process_types.py +42 -0
- polyaxon/_polyaxonfile/specs/compiled_operation.py +1 -1
- polyaxon/_polyaxonfile/specs/libs/parser.py +1 -1
- polyaxon/_polyaxonfile/specs/libs/validator.py +1 -1
- polyaxon/_polyaxonfile/specs/operation.py +1 -1
- polyaxon/_polyaxonfile/specs/sections.py +8 -0
- polyaxon/_pql/manager.py +1 -1
- polyaxon/_runner/agent/async_agent.py +25 -11
- polyaxon/_runner/agent/base_agent.py +19 -10
- polyaxon/_runner/agent/sync_agent.py +24 -10
- polyaxon/_runner/converter/converter.py +12 -4
- polyaxon/_runner/executor.py +1 -1
- polyaxon/_schemas/agent.py +69 -37
- polyaxon/_schemas/authentication.py +4 -4
- polyaxon/_schemas/base.py +26 -2
- polyaxon/_schemas/checks.py +3 -3
- polyaxon/_schemas/cli.py +4 -6
- polyaxon/_schemas/client.py +20 -18
- polyaxon/_schemas/compatibility.py +4 -4
- polyaxon/_schemas/container_resources.py +1 -1
- polyaxon/_schemas/home.py +3 -3
- polyaxon/_schemas/installation.py +13 -9
- polyaxon/_schemas/lifecycle.py +23 -23
- polyaxon/_schemas/log_handler.py +2 -2
- polyaxon/_schemas/services.py +26 -14
- polyaxon/_schemas/types/artifacts.py +3 -3
- polyaxon/_schemas/types/dockerfile.py +14 -12
- polyaxon/_schemas/types/event.py +2 -2
- polyaxon/_schemas/types/file.py +3 -3
- polyaxon/_schemas/types/git.py +12 -4
- polyaxon/_schemas/types/tensorboard.py +14 -8
- polyaxon/_schemas/user.py +3 -3
- polyaxon/_schemas/version.py +2 -2
- polyaxon/_sdk/api/agents_v1_api.py +222 -43
- polyaxon/_sdk/api/artifacts_stores_v1_api.py +3 -3
- polyaxon/_sdk/api/auth_v1_api.py +13 -13
- polyaxon/_sdk/api/connections_v1_api.py +15 -15
- polyaxon/_sdk/api/dashboards_v1_api.py +15 -15
- polyaxon/_sdk/api/organizations_v1_api.py +85 -85
- polyaxon/_sdk/api/presets_v1_api.py +15 -15
- polyaxon/_sdk/api/project_dashboards_v1_api.py +29 -29
- polyaxon/_sdk/api/project_searches_v1_api.py +29 -29
- polyaxon/_sdk/api/projects_v1_api.py +284 -107
- polyaxon/_sdk/api/queues_v1_api.py +19 -19
- polyaxon/_sdk/api/runs_v1_api.py +313 -359
- polyaxon/_sdk/api/searches_v1_api.py +15 -15
- polyaxon/_sdk/api/service_accounts_v1_api.py +31 -31
- polyaxon/_sdk/api/tags_v1_api.py +17 -17
- polyaxon/_sdk/api/teams_v1_api.py +2854 -402
- polyaxon/_sdk/api/users_v1_api.py +254 -78
- polyaxon/_sdk/api/versions_v1_api.py +7 -7
- polyaxon/_sdk/async_client/api_client.py +4 -0
- polyaxon/_sdk/schemas/__init__.py +1 -1
- polyaxon/_sdk/schemas/v1_activity.py +8 -8
- polyaxon/_sdk/schemas/v1_agent.py +18 -16
- polyaxon/_sdk/schemas/v1_agent_state_response.py +4 -4
- polyaxon/_sdk/schemas/v1_agent_state_response_agent_state.py +10 -10
- polyaxon/_sdk/schemas/v1_agent_status_body_request.py +3 -3
- polyaxon/_sdk/schemas/v1_analytics_spec.py +4 -4
- polyaxon/_sdk/schemas/v1_artifact_tree.py +3 -3
- polyaxon/_sdk/schemas/v1_auth.py +1 -1
- polyaxon/_sdk/schemas/v1_cloning.py +3 -3
- polyaxon/_sdk/schemas/v1_connection_response.py +9 -9
- polyaxon/_sdk/schemas/v1_dashboard.py +9 -9
- polyaxon/_sdk/schemas/v1_dashboard_spec.py +5 -1
- polyaxon/_sdk/schemas/v1_entities_tags.py +2 -2
- polyaxon/_sdk/schemas/v1_entities_transfer.py +2 -2
- polyaxon/_sdk/schemas/v1_entity_notification_body.py +7 -7
- polyaxon/_sdk/schemas/v1_entity_stage_body_request.py +5 -5
- polyaxon/_sdk/schemas/v1_entity_status_body_request.py +5 -5
- polyaxon/_sdk/schemas/v1_events_response.py +2 -2
- polyaxon/_sdk/schemas/v1_list_activities_response.py +4 -4
- polyaxon/_sdk/schemas/v1_list_agents_response.py +4 -4
- polyaxon/_sdk/schemas/v1_list_bookmarks_response.py +4 -4
- polyaxon/_sdk/schemas/v1_list_connections_response.py +4 -4
- polyaxon/_sdk/schemas/v1_list_dashboards_response.py +4 -4
- polyaxon/_sdk/schemas/v1_list_organization_members_response.py +4 -4
- polyaxon/_sdk/schemas/v1_list_organizations_response.py +4 -4
- polyaxon/_sdk/schemas/v1_list_presets_response.py +4 -4
- polyaxon/_sdk/schemas/v1_list_project_versions_response.py +4 -4
- polyaxon/_sdk/schemas/v1_list_projects_response.py +4 -4
- polyaxon/_sdk/schemas/v1_list_queues_response.py +4 -4
- polyaxon/_sdk/schemas/v1_list_run_artifacts_response.py +4 -4
- polyaxon/_sdk/schemas/v1_list_run_connections_response.py +4 -4
- polyaxon/_sdk/schemas/v1_list_run_edges_response.py +4 -4
- polyaxon/_sdk/schemas/v1_list_runs_response.py +4 -4
- polyaxon/_sdk/schemas/v1_list_searches_response.py +4 -4
- polyaxon/_sdk/schemas/v1_list_service_accounts_response.py +4 -4
- polyaxon/_sdk/schemas/v1_list_tags_response.py +4 -4
- polyaxon/_sdk/schemas/v1_list_team_members_response.py +4 -4
- polyaxon/_sdk/schemas/v1_list_teams_response.py +4 -4
- polyaxon/_sdk/schemas/v1_list_token_response.py +4 -4
- polyaxon/_sdk/schemas/v1_operation_body.py +8 -8
- polyaxon/_sdk/schemas/v1_organization.py +16 -16
- polyaxon/_sdk/schemas/v1_organization_member.py +6 -6
- polyaxon/_sdk/schemas/v1_password_change.py +3 -3
- polyaxon/_sdk/schemas/v1_pipeline.py +3 -3
- polyaxon/_sdk/schemas/v1_preset.py +16 -9
- polyaxon/_sdk/schemas/v1_project.py +17 -17
- polyaxon/_sdk/schemas/v1_project_settings.py +12 -10
- polyaxon/_sdk/schemas/v1_project_version.py +20 -20
- polyaxon/_sdk/schemas/v1_queue.py +12 -12
- polyaxon/_sdk/schemas/v1_run.py +38 -38
- polyaxon/_sdk/schemas/v1_run_connection.py +3 -3
- polyaxon/_sdk/schemas/v1_run_edge.py +5 -5
- polyaxon/_sdk/schemas/v1_run_edge_lineage.py +3 -3
- polyaxon/_sdk/schemas/v1_run_edges_graph.py +1 -1
- polyaxon/_sdk/schemas/v1_run_reference_catalog.py +4 -4
- polyaxon/_sdk/schemas/v1_run_settings.py +9 -9
- polyaxon/_sdk/schemas/v1_search.py +10 -10
- polyaxon/_sdk/schemas/v1_search_spec.py +14 -14
- polyaxon/_sdk/schemas/v1_section_spec.py +12 -7
- polyaxon/_sdk/schemas/v1_service_account.py +9 -9
- polyaxon/_sdk/schemas/v1_settings_catalog.py +4 -3
- polyaxon/_sdk/schemas/v1_tag.py +6 -6
- polyaxon/_sdk/schemas/v1_team.py +11 -8
- polyaxon/_sdk/schemas/v1_team_member.py +6 -6
- polyaxon/_sdk/schemas/v1_team_settings.py +2 -2
- polyaxon/_sdk/schemas/v1_token.py +10 -10
- polyaxon/_sdk/schemas/v1_trial_start.py +6 -6
- polyaxon/_sdk/schemas/v1_user.py +6 -7
- polyaxon/_sdk/schemas/v1_user_access.py +17 -0
- polyaxon/_sdk/schemas/v1_user_email.py +1 -1
- polyaxon/_sdk/schemas/v1_user_singup.py +5 -5
- polyaxon/_sdk/schemas/v1_uuids.py +1 -1
- polyaxon/_sidecar/container/__init__.py +39 -20
- polyaxon/_sidecar/container/monitors/logs.py +10 -13
- polyaxon/_sidecar/ignore.py +0 -1
- polyaxon/_utils/cli_constants.py +2 -0
- polyaxon/_utils/fqn_utils.py +25 -2
- polyaxon/_utils/test_utils.py +2 -1
- polyaxon/pkg.py +1 -1
- polyaxon/schemas.py +1 -1
- {polyaxon-2.1.0rc9.dist-info → polyaxon-2.6.0.dist-info}/METADATA +43 -43
- {polyaxon-2.1.0rc9.dist-info → polyaxon-2.6.0.dist-info}/RECORD +269 -252
- {polyaxon-2.1.0rc9.dist-info → polyaxon-2.6.0.dist-info}/WHEEL +1 -1
- polyaxon/_sdk/schemas/v1_project_user_access.py +0 -10
- {polyaxon-2.1.0rc9.dist-info → polyaxon-2.6.0.dist-info}/LICENSE +0 -0
- {polyaxon-2.1.0rc9.dist-info → polyaxon-2.6.0.dist-info}/entry_points.txt +0 -0
- {polyaxon-2.1.0rc9.dist-info → polyaxon-2.6.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,74 @@
|
|
1
|
+
from typing import Dict, Iterable, List, Optional
|
2
|
+
|
3
|
+
from clipped.utils.lists import to_list
|
4
|
+
|
5
|
+
from polyaxon._connections import V1Connection, V1ConnectionResource
|
6
|
+
from polyaxon._flow import V1Init, V1Plugins
|
7
|
+
from polyaxon._local_process import process_types
|
8
|
+
from polyaxon._runner.converter import BaseConverter as _BaseConverter
|
9
|
+
from polyaxon.exceptions import PolyaxonConverterError
|
10
|
+
|
11
|
+
|
12
|
+
class MainConverter(_BaseConverter):
|
13
|
+
def _get_main_container(
|
14
|
+
self,
|
15
|
+
container_id: str,
|
16
|
+
main_container: process_types.V1Container,
|
17
|
+
plugins: V1Plugins,
|
18
|
+
artifacts_store: Optional[V1Connection],
|
19
|
+
init: Optional[List[V1Init]],
|
20
|
+
connections: Optional[List[str]],
|
21
|
+
connection_by_names: Dict[str, V1Connection],
|
22
|
+
secrets: Optional[Iterable[V1ConnectionResource]],
|
23
|
+
config_maps: Optional[Iterable[V1ConnectionResource]],
|
24
|
+
run_path: Optional[str],
|
25
|
+
kv_env_vars: List[List] = None,
|
26
|
+
ports: List[int] = None,
|
27
|
+
) -> process_types.V1Container:
|
28
|
+
connections = connections or []
|
29
|
+
connection_by_names = connection_by_names or {}
|
30
|
+
secrets = secrets or []
|
31
|
+
config_maps = config_maps or []
|
32
|
+
|
33
|
+
if artifacts_store and not run_path:
|
34
|
+
raise PolyaxonConverterError("Run path is required for main container.")
|
35
|
+
|
36
|
+
if artifacts_store and (
|
37
|
+
not plugins.collect_artifacts or plugins.mount_artifacts_store
|
38
|
+
):
|
39
|
+
if artifacts_store.name not in connection_by_names:
|
40
|
+
connection_by_names[artifacts_store.name] = artifacts_store
|
41
|
+
if artifacts_store.name not in connections:
|
42
|
+
connections.append(artifacts_store.name)
|
43
|
+
|
44
|
+
requested_connections = [connection_by_names[c] for c in connections]
|
45
|
+
requested_config_maps = V1Connection.get_requested_resources(
|
46
|
+
resources=config_maps,
|
47
|
+
connections=requested_connections,
|
48
|
+
resource_key="config_map",
|
49
|
+
)
|
50
|
+
requested_secrets = V1Connection.get_requested_resources(
|
51
|
+
resources=secrets, connections=requested_connections, resource_key="secret"
|
52
|
+
)
|
53
|
+
|
54
|
+
# Env vars
|
55
|
+
env = self._get_main_env_vars(
|
56
|
+
plugins=plugins,
|
57
|
+
kv_env_vars=kv_env_vars,
|
58
|
+
artifacts_store_name=artifacts_store.name if artifacts_store else None,
|
59
|
+
connections=requested_connections,
|
60
|
+
secrets=requested_secrets,
|
61
|
+
config_maps=requested_config_maps,
|
62
|
+
)
|
63
|
+
|
64
|
+
# Env from
|
65
|
+
resources = to_list(requested_secrets, check_none=True) + to_list(
|
66
|
+
requested_config_maps, check_none=True
|
67
|
+
)
|
68
|
+
env += self._get_env_from_json_resources(resources=resources)
|
69
|
+
|
70
|
+
return self._patch_container(
|
71
|
+
container=main_container,
|
72
|
+
name=container_id,
|
73
|
+
env=env,
|
74
|
+
)
|
@@ -0,0 +1,82 @@
|
|
1
|
+
from typing import List, Optional
|
2
|
+
|
3
|
+
from polyaxon._connections import V1Connection, V1ConnectionResource
|
4
|
+
from polyaxon._contexts import paths as ctx_paths
|
5
|
+
from polyaxon._local_process import process_types
|
6
|
+
from polyaxon._runner.converter import BaseConverter
|
7
|
+
|
8
|
+
|
9
|
+
class MountsMixin(BaseConverter):
|
10
|
+
@classmethod
|
11
|
+
def _get_mount_from_store(
|
12
|
+
cls,
|
13
|
+
store: V1Connection,
|
14
|
+
):
|
15
|
+
pass
|
16
|
+
|
17
|
+
@classmethod
|
18
|
+
def _get_mount_from_resource(
|
19
|
+
cls,
|
20
|
+
resource: V1ConnectionResource,
|
21
|
+
):
|
22
|
+
pass
|
23
|
+
|
24
|
+
@classmethod
|
25
|
+
def _get_volume(
|
26
|
+
cls,
|
27
|
+
mount_path: str,
|
28
|
+
host_path: Optional[str] = None,
|
29
|
+
read_only: Optional[bool] = None,
|
30
|
+
):
|
31
|
+
pass
|
32
|
+
|
33
|
+
@classmethod
|
34
|
+
def _get_docker_context_mount(cls):
|
35
|
+
pass
|
36
|
+
|
37
|
+
@classmethod
|
38
|
+
def _get_auth_context_mount(
|
39
|
+
cls,
|
40
|
+
read_only: Optional[bool] = None,
|
41
|
+
run_path: Optional[str] = None,
|
42
|
+
):
|
43
|
+
pass
|
44
|
+
|
45
|
+
@classmethod
|
46
|
+
def _get_artifacts_context_mount(
|
47
|
+
cls,
|
48
|
+
read_only: bool = False,
|
49
|
+
run_path: Optional[str] = None,
|
50
|
+
):
|
51
|
+
pass
|
52
|
+
|
53
|
+
@classmethod
|
54
|
+
def _get_connections_context_mount(
|
55
|
+
cls,
|
56
|
+
name: str,
|
57
|
+
mount_path: str,
|
58
|
+
run_path: str,
|
59
|
+
):
|
60
|
+
pass
|
61
|
+
|
62
|
+
@classmethod
|
63
|
+
def _get_shm_context_mount(cls):
|
64
|
+
"""
|
65
|
+
Mount a tmpfs volume to /dev/shm.
|
66
|
+
This will set /dev/shm size to half of the RAM of node.
|
67
|
+
By default, /dev/shm is very small, only 64MB.
|
68
|
+
Some experiments will fail due to lack of share memory,
|
69
|
+
such as some experiments running on Pytorch.
|
70
|
+
"""
|
71
|
+
pass
|
72
|
+
|
73
|
+
@classmethod
|
74
|
+
def _get_mounts(
|
75
|
+
cls,
|
76
|
+
use_auth_context: bool,
|
77
|
+
use_docker_context: bool,
|
78
|
+
use_shm_context: bool,
|
79
|
+
use_artifacts_context: bool,
|
80
|
+
run_path: Optional[str] = None,
|
81
|
+
) -> List:
|
82
|
+
return []
|
@@ -0,0 +1,8 @@
|
|
1
|
+
from polyaxon._flow import V1RunKind
|
2
|
+
from polyaxon._local_process.converter.converters.job import JobConverter
|
3
|
+
from polyaxon._local_process.converter.converters.service import ServiceConverter
|
4
|
+
|
5
|
+
CONVERTERS = {
|
6
|
+
V1RunKind.JOB: JobConverter,
|
7
|
+
V1RunKind.SERVICE: ServiceConverter,
|
8
|
+
}
|
@@ -0,0 +1,40 @@
|
|
1
|
+
from typing import Dict, Iterable, List, Optional
|
2
|
+
|
3
|
+
from polyaxon._connections import V1Connection, V1ConnectionResource
|
4
|
+
from polyaxon._flow import V1CompiledOperation, V1Job, V1Plugins
|
5
|
+
from polyaxon._k8s.converter.mixins import JobMixin
|
6
|
+
from polyaxon._local_process import process_types
|
7
|
+
from polyaxon._local_process.converter.base import BaseConverter
|
8
|
+
|
9
|
+
|
10
|
+
class JobConverter(JobMixin, BaseConverter):
|
11
|
+
def get_resource(
|
12
|
+
self,
|
13
|
+
compiled_operation: V1CompiledOperation,
|
14
|
+
artifacts_store: V1Connection,
|
15
|
+
connection_by_names: Dict[str, V1Connection],
|
16
|
+
secrets: Optional[Iterable[V1ConnectionResource]],
|
17
|
+
config_maps: Optional[Iterable[V1ConnectionResource]],
|
18
|
+
default_sa: Optional[str] = None,
|
19
|
+
default_auth: bool = False,
|
20
|
+
) -> List[process_types.V1Container]:
|
21
|
+
job = compiled_operation.run # type: V1Job
|
22
|
+
plugins = V1Plugins.get_or_create(
|
23
|
+
config=compiled_operation.plugins, auth=default_auth
|
24
|
+
)
|
25
|
+
kv_env_vars = compiled_operation.get_env_io()
|
26
|
+
return self.get_replica_resource(
|
27
|
+
environment=job.environment,
|
28
|
+
plugins=plugins,
|
29
|
+
volumes=job.volumes,
|
30
|
+
init=job.init,
|
31
|
+
sidecars=job.sidecars,
|
32
|
+
container=job.container,
|
33
|
+
artifacts_store=artifacts_store,
|
34
|
+
connections=job.connections,
|
35
|
+
connection_by_names=connection_by_names,
|
36
|
+
secrets=secrets,
|
37
|
+
config_maps=config_maps,
|
38
|
+
kv_env_vars=kv_env_vars,
|
39
|
+
default_sa=default_sa,
|
40
|
+
)
|
@@ -0,0 +1,41 @@
|
|
1
|
+
from typing import Dict, Iterable, List, Optional
|
2
|
+
|
3
|
+
from polyaxon._connections import V1Connection, V1ConnectionResource
|
4
|
+
from polyaxon._flow import V1CompiledOperation, V1Plugins, V1Service
|
5
|
+
from polyaxon._local_process import process_types
|
6
|
+
from polyaxon._local_process.converter.base import BaseConverter
|
7
|
+
from polyaxon._local_process.converter.mixins import ServiceMixin
|
8
|
+
|
9
|
+
|
10
|
+
class ServiceConverter(ServiceMixin, BaseConverter):
|
11
|
+
def get_resource(
|
12
|
+
self,
|
13
|
+
compiled_operation: V1CompiledOperation,
|
14
|
+
artifacts_store: V1Connection,
|
15
|
+
connection_by_names: Dict[str, V1Connection],
|
16
|
+
secrets: Optional[Iterable[V1ConnectionResource]],
|
17
|
+
config_maps: Optional[Iterable[V1ConnectionResource]],
|
18
|
+
default_sa: Optional[str] = None,
|
19
|
+
default_auth: bool = False,
|
20
|
+
) -> List[process_types.V1Container]:
|
21
|
+
service = compiled_operation.run # type: V1Service
|
22
|
+
plugins = V1Plugins.get_or_create(
|
23
|
+
config=compiled_operation.plugins, auth=default_auth
|
24
|
+
)
|
25
|
+
kv_env_vars = compiled_operation.get_env_io()
|
26
|
+
return self.get_replica_resource(
|
27
|
+
plugins=plugins,
|
28
|
+
environment=service.environment,
|
29
|
+
volumes=service.volumes,
|
30
|
+
init=service.init,
|
31
|
+
sidecars=service.sidecars,
|
32
|
+
container=service.container,
|
33
|
+
artifacts_store=artifacts_store,
|
34
|
+
connections=service.connections,
|
35
|
+
connection_by_names=connection_by_names,
|
36
|
+
secrets=secrets,
|
37
|
+
config_maps=config_maps,
|
38
|
+
kv_env_vars=kv_env_vars,
|
39
|
+
default_sa=default_sa,
|
40
|
+
ports=service.ports,
|
41
|
+
)
|
@@ -0,0 +1,38 @@
|
|
1
|
+
from typing import Dict
|
2
|
+
|
3
|
+
from polyaxon._containers.names import MAIN_JOB_CONTAINER
|
4
|
+
from polyaxon._flow import V1RunKind
|
5
|
+
|
6
|
+
|
7
|
+
class JobMixin:
|
8
|
+
K8S_ANNOTATIONS_KIND = V1RunKind.JOB
|
9
|
+
MAIN_CONTAINER_ID = MAIN_JOB_CONTAINER
|
10
|
+
|
11
|
+
|
12
|
+
class NotifierMixin:
|
13
|
+
K8S_ANNOTATIONS_KIND = V1RunKind.NOTIFIER
|
14
|
+
MAIN_CONTAINER_ID = MAIN_JOB_CONTAINER
|
15
|
+
|
16
|
+
|
17
|
+
class CleanerMixin:
|
18
|
+
K8S_ANNOTATIONS_KIND = V1RunKind.CLEANER
|
19
|
+
MAIN_CONTAINER_ID = MAIN_JOB_CONTAINER
|
20
|
+
|
21
|
+
|
22
|
+
class TunerMixin:
|
23
|
+
K8S_ANNOTATIONS_KIND = V1RunKind.TUNER
|
24
|
+
MAIN_CONTAINER_ID = MAIN_JOB_CONTAINER
|
25
|
+
|
26
|
+
|
27
|
+
class ServiceMixin:
|
28
|
+
K8S_ANNOTATIONS_KIND = V1RunKind.SERVICE
|
29
|
+
MAIN_CONTAINER_ID = MAIN_JOB_CONTAINER
|
30
|
+
|
31
|
+
|
32
|
+
MIXIN_MAPPING: Dict = {
|
33
|
+
V1RunKind.JOB: JobMixin,
|
34
|
+
V1RunKind.NOTIFIER: NotifierMixin,
|
35
|
+
V1RunKind.CLEANER: CleanerMixin,
|
36
|
+
V1RunKind.TUNER: TunerMixin,
|
37
|
+
V1RunKind.SERVICE: ServiceMixin,
|
38
|
+
}
|
@@ -0,0 +1,132 @@
|
|
1
|
+
import os
|
2
|
+
import signal
|
3
|
+
import subprocess
|
4
|
+
|
5
|
+
from typing import Dict, List
|
6
|
+
|
7
|
+
from polyaxon._deploy.operators.cmd_operator import CmdOperator
|
8
|
+
from polyaxon._deploy.operators.conda import CondaOperator
|
9
|
+
from polyaxon._local_process import process_types
|
10
|
+
from polyaxon._local_process.converter.converters import CONVERTERS
|
11
|
+
from polyaxon._local_process.converter.mixins import MIXIN_MAPPING
|
12
|
+
from polyaxon._runner.executor import BaseExecutor
|
13
|
+
from polyaxon._runner.kinds import RunnerKind
|
14
|
+
from polyaxon._schemas.lifecycle import V1Statuses
|
15
|
+
from polyaxon.exceptions import PolyaxonAgentError
|
16
|
+
from polyaxon.logger import logger
|
17
|
+
|
18
|
+
|
19
|
+
class Executor(BaseExecutor):
|
20
|
+
MIXIN_MAPPING = MIXIN_MAPPING
|
21
|
+
CONVERTERS = CONVERTERS
|
22
|
+
RUNNER_KIND = RunnerKind.PROCESS
|
23
|
+
|
24
|
+
def __init__(self, conda_env: str = None, venv: str = None):
|
25
|
+
super().__init__()
|
26
|
+
self._ops = {}
|
27
|
+
self._conda_env = conda_env
|
28
|
+
self._venv = venv
|
29
|
+
|
30
|
+
def _get_manager(self):
|
31
|
+
if self._conda_env:
|
32
|
+
return CondaOperator()
|
33
|
+
return CmdOperator()
|
34
|
+
|
35
|
+
def _check_conda(self):
|
36
|
+
if not self.manager.check():
|
37
|
+
raise logger.error("Conda is required to run this command.")
|
38
|
+
|
39
|
+
envs = self.manager.execute(["env", "list", "--json"], is_json=True)
|
40
|
+
env_names = [os.path.basename(env) for env in envs["envs"]]
|
41
|
+
if self._conda_env not in env_names:
|
42
|
+
raise logger.error(
|
43
|
+
"Conda env `{}` is not installed.".format(self._conda_env),
|
44
|
+
sys_exit=True,
|
45
|
+
)
|
46
|
+
|
47
|
+
def _run_in_conda(self, cmd_bash, cmd_args):
|
48
|
+
cmd_args = ["source activate {}".format(self._conda_env)] + cmd_args
|
49
|
+
subprocess.Popen(cmd_bash + [" && ".join(cmd_args)], close_fds=True)
|
50
|
+
|
51
|
+
def _get_op_proc(self, run_uuid: str) -> List[subprocess.Popen]:
|
52
|
+
return self._ops.get(run_uuid)
|
53
|
+
|
54
|
+
def create(
|
55
|
+
self,
|
56
|
+
run_uuid: str,
|
57
|
+
run_kind: str,
|
58
|
+
resource: List[process_types.V1Container],
|
59
|
+
namespace: str = None,
|
60
|
+
) -> Dict:
|
61
|
+
logger.info(f"[Executor] Starting operation {run_uuid} {run_kind}.")
|
62
|
+
self._ops[run_uuid] = []
|
63
|
+
for task in resource:
|
64
|
+
logger.info(
|
65
|
+
f"[Executor] Starting task container {task.name} {task.image} ."
|
66
|
+
)
|
67
|
+
proc = self.manager.execute(
|
68
|
+
task.get_cmd_args(), env=os.environ, output_only=False
|
69
|
+
)
|
70
|
+
self._ops[run_uuid].append(proc)
|
71
|
+
proc.wait()
|
72
|
+
task_status = self._get_task_status(proc)
|
73
|
+
message = f"Task container {task.name} {task.image} with id {proc.pid} {task_status}"
|
74
|
+
if task_status == V1Statuses.SUCCEEDED:
|
75
|
+
logger.info(f"[Executor] {message}")
|
76
|
+
else:
|
77
|
+
logger.warning(f"[Executor] {message}")
|
78
|
+
self._clean_temp_execution_path(run_uuid)
|
79
|
+
return {
|
80
|
+
"status": V1Statuses.FAILED,
|
81
|
+
"tasks": self._ops[run_uuid],
|
82
|
+
"message": message,
|
83
|
+
}
|
84
|
+
self._clean_temp_execution_path(run_uuid)
|
85
|
+
return {"status": V1Statuses.SUCCEEDED, "tasks": self._ops[run_uuid]}
|
86
|
+
|
87
|
+
def apply(
|
88
|
+
self, run_uuid: str, run_kind: str, resource: Dict, namespace: str = None
|
89
|
+
) -> Dict:
|
90
|
+
raise PolyaxonAgentError(
|
91
|
+
"Docker executor does not support apply method.\n"
|
92
|
+
"Run: <kind: {}, uuid: {}>".format(run_kind, run_uuid)
|
93
|
+
)
|
94
|
+
|
95
|
+
def stop(self, run_uuid: str, run_kind: str, namespace: str = None):
|
96
|
+
proc = self._get_op_proc(run_uuid)
|
97
|
+
if proc.poll() is None:
|
98
|
+
# Kill the process tree rooted at the child if it's the leader of its own process
|
99
|
+
# group, otherwise just kill the child
|
100
|
+
try:
|
101
|
+
if proc.pid == os.getpgid(proc.pid):
|
102
|
+
os.killpg(proc.pid, signal.SIGTERM)
|
103
|
+
else:
|
104
|
+
proc.terminate()
|
105
|
+
except OSError:
|
106
|
+
# The child process may have exited before we attempted to terminate it, so we
|
107
|
+
# ignore OSErrors raised during child process termination
|
108
|
+
_msg = f"Failed to terminate operation {run_kind} {run_uuid} child process PID {proc.pid}"
|
109
|
+
logger.debug(_msg)
|
110
|
+
proc.wait()
|
111
|
+
|
112
|
+
def clean(self, run_uuid: str, run_kind: str, namespace: str = None):
|
113
|
+
return self.apply(
|
114
|
+
run_uuid=run_uuid,
|
115
|
+
run_kind=run_kind,
|
116
|
+
resource={"metadata": {"finalizers": None}},
|
117
|
+
)
|
118
|
+
|
119
|
+
def _get_task_status(self, proc) -> V1Statuses:
|
120
|
+
exit_code = proc.poll()
|
121
|
+
if exit_code is None:
|
122
|
+
return V1Statuses.RUNNING
|
123
|
+
if exit_code == 0:
|
124
|
+
return V1Statuses.SUCCEEDED
|
125
|
+
return V1Statuses.FAILED
|
126
|
+
|
127
|
+
def get(self, run_uuid: str, run_kind: str, namespace: str = None) -> V1Statuses:
|
128
|
+
procs = self._get_op_proc(run_uuid)
|
129
|
+
return self._get_task_status(procs[-1])
|
130
|
+
|
131
|
+
def list_ops(self, namespace: str = None):
|
132
|
+
return []
|
@@ -0,0 +1,42 @@
|
|
1
|
+
from typing import Dict, List, Optional, Tuple, Union
|
2
|
+
|
3
|
+
from clipped.compact.pydantic import PYDANTIC_VERSION, Field
|
4
|
+
|
5
|
+
from polyaxon._schemas.base import BaseSchemaModel, RootModel
|
6
|
+
|
7
|
+
|
8
|
+
class V1EnvVar(RootModel):
|
9
|
+
if PYDANTIC_VERSION.startswith("2."):
|
10
|
+
root: Union[Tuple[str, str], Dict[str, str]]
|
11
|
+
else:
|
12
|
+
__root__: Union[Tuple[str, str], Dict[str, str]]
|
13
|
+
|
14
|
+
def to_cmd(self):
|
15
|
+
if isinstance(self._root, tuple):
|
16
|
+
value = self._root
|
17
|
+
else:
|
18
|
+
value = self._root.items()
|
19
|
+
return [f"{value[0]}={value[1]}"]
|
20
|
+
|
21
|
+
|
22
|
+
class V1Container(BaseSchemaModel):
|
23
|
+
name: Optional[str] = None
|
24
|
+
command: Optional[List[str]] = None
|
25
|
+
args: Optional[List[str]] = None
|
26
|
+
env: Optional[List[V1EnvVar]] = None
|
27
|
+
working_dir: Optional[str] = Field(alias="workingDir", default=None)
|
28
|
+
|
29
|
+
def get_cmd_args(self):
|
30
|
+
cmd_args = ["run", "--rm"]
|
31
|
+
for env in self.env:
|
32
|
+
cmd_args += ["-e"] + env.to_cmd()
|
33
|
+
if self.working_dir:
|
34
|
+
cmd_args += ["-w", self.working_dir]
|
35
|
+
if self.command:
|
36
|
+
cmd_args += ["--entrypoint", self.command[0]]
|
37
|
+
cmd_args += [self.image]
|
38
|
+
if self.command:
|
39
|
+
cmd_args += self.command[1:]
|
40
|
+
if self.args:
|
41
|
+
cmd_args += self.args
|
42
|
+
return cmd_args
|
@@ -348,7 +348,7 @@ class CompiledOperationSpecification(BaseSpecification):
|
|
348
348
|
"conditions",
|
349
349
|
"skip_on_upstream_skip",
|
350
350
|
}
|
351
|
-
patch_keys = patch_keys.intersection(preset.
|
351
|
+
patch_keys = patch_keys.intersection(preset.model_fields_set)
|
352
352
|
patch_data = {k: getattr(preset, k) for k in patch_keys}
|
353
353
|
patch_compiled = V1CompiledOperation.construct(**patch_data)
|
354
354
|
return config.patch(patch_compiled, strategy=preset.patch_strategy)
|
@@ -93,7 +93,7 @@ class PolyaxonfileParser:
|
|
93
93
|
# Check workflow
|
94
94
|
for section in Sections.PARSING_SECTIONS:
|
95
95
|
config_section = cls._get_section(config, section)
|
96
|
-
if config_section:
|
96
|
+
if config_section is not None:
|
97
97
|
parsed_data[section] = cls.parse_expression(
|
98
98
|
config_section, parsed_params
|
99
99
|
)
|
@@ -11,7 +11,7 @@ def validate(spec, data):
|
|
11
11
|
|
12
12
|
def validate_keys(section, config, section_data):
|
13
13
|
extra_args = [
|
14
|
-
key for key in section_data.keys() if key not in config.
|
14
|
+
key for key in section_data.keys() if key not in config.model_fields.keys()
|
15
15
|
]
|
16
16
|
if extra_args:
|
17
17
|
raise PolyaxonfileError(
|
@@ -107,7 +107,7 @@ class OperationSpecification(BaseSpecification):
|
|
107
107
|
"conditions",
|
108
108
|
"skip_on_upstream_skip",
|
109
109
|
}
|
110
|
-
patch_keys = patch_keys.intersection(config.
|
110
|
+
patch_keys = patch_keys.intersection(config.model_fields_set)
|
111
111
|
patch_data = {k: getattr(config, k) for k in patch_keys}
|
112
112
|
patch_compiled = V1CompiledOperation.construct(contexts=contexts, **patch_data)
|
113
113
|
|
@@ -99,6 +99,14 @@ class Sections:
|
|
99
99
|
CONDITIONS,
|
100
100
|
SKIP_ON_UPSTREAM_SKIP,
|
101
101
|
PATCH_STRATEGY,
|
102
|
+
"is_approved",
|
103
|
+
"patch_strategy",
|
104
|
+
"is_preset",
|
105
|
+
"hub_ref",
|
106
|
+
"dag_ref",
|
107
|
+
"path_ref",
|
108
|
+
"url_ref",
|
109
|
+
"skip_on_upstream_skip",
|
102
110
|
)
|
103
111
|
|
104
112
|
REQUIRED_SECTIONS = (VERSION, KIND)
|
polyaxon/_pql/manager.py
CHANGED
@@ -13,7 +13,7 @@ class PQLManager:
|
|
13
13
|
FIELDS_PROXY = {}
|
14
14
|
FIELDS_TRANS = {}
|
15
15
|
FIELDS_ORDERING = None
|
16
|
-
FIELDS_ORDERING_PROXY = None
|
16
|
+
FIELDS_ORDERING_PROXY = None # Do not set a field on both field and proxy
|
17
17
|
FIELDS_DEFAULT_ORDERING = None
|
18
18
|
FIELDS_DISTINCT = None
|
19
19
|
CHECK_ALIVE = True
|
@@ -14,6 +14,7 @@ from polyaxon._env_vars.getters import get_run_info
|
|
14
14
|
from polyaxon._runner.agent.base_agent import BaseAgent
|
15
15
|
from polyaxon._sdk.schemas.v1_agent import V1Agent
|
16
16
|
from polyaxon._sdk.schemas.v1_agent_state_response import V1AgentStateResponse
|
17
|
+
from polyaxon._utils.fqn_utils import get_run_instance
|
17
18
|
from polyaxon.exceptions import ApiException as SDKApiException
|
18
19
|
from polyaxon.exceptions import PolyaxonAgentError, PolyaxonConverterError
|
19
20
|
from polyaxon.logger import logger
|
@@ -23,16 +24,16 @@ class BaseAsyncAgent(BaseAgent):
|
|
23
24
|
IS_ASYNC = True
|
24
25
|
|
25
26
|
async def _enter(self):
|
27
|
+
logger.warning("Agent is starting.")
|
28
|
+
await self.executor.refresh()
|
26
29
|
if not self.client._is_managed:
|
27
30
|
return self
|
28
|
-
print("Agent is starting.")
|
29
|
-
await self.executor.refresh()
|
30
31
|
try:
|
31
32
|
agent = await self.client.get_info()
|
32
33
|
self._check_status(agent)
|
33
34
|
await self.sync()
|
34
35
|
await self.client.log_agent_running()
|
35
|
-
|
36
|
+
logger.warning("Agent is running.")
|
36
37
|
return self
|
37
38
|
except (ApiException, SDKApiException, HTTPError) as e:
|
38
39
|
message = "Could not start the agent."
|
@@ -80,13 +81,16 @@ class BaseAsyncAgent(BaseAgent):
|
|
80
81
|
|
81
82
|
async def reconcile(self):
|
82
83
|
if (
|
83
|
-
now() - self.
|
84
|
+
now() - self._last_data_collected_at
|
84
85
|
).total_seconds() > self.SLEEP_AGENT_DATA_COLLECT_TIME:
|
86
|
+
await self.collect_agent_data()
|
87
|
+
if (
|
88
|
+
now() - self._last_reconciled_at
|
89
|
+
).total_seconds() < self.SLEEP_AGENT_DATA_RECONCILE_TIME:
|
85
90
|
return
|
86
91
|
|
87
|
-
|
88
|
-
|
89
|
-
|
92
|
+
logger.info("Checking cluster state.")
|
93
|
+
self._last_reconciled_at = now()
|
90
94
|
# Update reconcile
|
91
95
|
namespaces = [settings.AGENT_CONFIG.namespace]
|
92
96
|
namespaces += settings.AGENT_CONFIG.additional_namespaces or []
|
@@ -96,9 +100,19 @@ class BaseAsyncAgent(BaseAgent):
|
|
96
100
|
if _ops:
|
97
101
|
ops += [
|
98
102
|
(
|
99
|
-
|
100
|
-
|
101
|
-
|
103
|
+
get_run_instance(
|
104
|
+
owner=op["metadata"]["annotations"][
|
105
|
+
"operation.polyaxon.com/owner"
|
106
|
+
],
|
107
|
+
project=op["metadata"]["annotations"][
|
108
|
+
"operation.polyaxon.com/project"
|
109
|
+
],
|
110
|
+
run_uuid=op["metadata"]["labels"][
|
111
|
+
"app.kubernetes.io/instance"
|
112
|
+
],
|
113
|
+
),
|
114
|
+
op["metadata"]["annotations"]["operation.polyaxon.com/kind"],
|
115
|
+
op["metadata"]["annotations"]["operation.polyaxon.com/name"],
|
102
116
|
namespace,
|
103
117
|
)
|
104
118
|
for op in _ops
|
@@ -142,7 +156,7 @@ class BaseAsyncAgent(BaseAgent):
|
|
142
156
|
timeout = get_wait(index, max_interval=self.max_interval)
|
143
157
|
logger.info("Sleeping for {} seconds".format(timeout))
|
144
158
|
except Exception as e:
|
145
|
-
|
159
|
+
logger.warning("Agent failed to start: {}".format(repr(e)))
|
146
160
|
finally:
|
147
161
|
self.end()
|
148
162
|
|
@@ -4,7 +4,7 @@ import traceback
|
|
4
4
|
from concurrent.futures import ThreadPoolExecutor
|
5
5
|
from typing import Any, Dict, Optional, Tuple, Type
|
6
6
|
|
7
|
-
from clipped.utils.tz import now
|
7
|
+
from clipped.utils.tz import get_datetime_from_now, now
|
8
8
|
|
9
9
|
from polyaxon import settings
|
10
10
|
from polyaxon._auxiliaries import V1PolyaxonInitContainer, V1PolyaxonSidecarContainer
|
@@ -24,7 +24,8 @@ class BaseAgent:
|
|
24
24
|
HEALTH_FILE = "/tmp/.healthz"
|
25
25
|
SLEEP_STOP_TIME = 60 * 5
|
26
26
|
SLEEP_ARCHIVED_TIME = 60 * 60
|
27
|
-
SLEEP_AGENT_DATA_COLLECT_TIME = 60 *
|
27
|
+
SLEEP_AGENT_DATA_COLLECT_TIME = 60 * 15
|
28
|
+
SLEEP_AGENT_DATA_RECONCILE_TIME = 60 * 5
|
28
29
|
IS_ASYNC = False
|
29
30
|
|
30
31
|
def __init__(
|
@@ -38,11 +39,13 @@ class BaseAgent:
|
|
38
39
|
self.max_interval = max(max_interval, 3)
|
39
40
|
if not agent_uuid and not owner:
|
40
41
|
owner = DEFAULT
|
42
|
+
last_hour = get_datetime_from_now(days=0, hours=1)
|
41
43
|
self.executor = None
|
42
44
|
self._default_auth = bool(agent_uuid)
|
43
|
-
self._executor_refreshed_at =
|
45
|
+
self._executor_refreshed_at = last_hour
|
44
46
|
self._graceful_shutdown = False
|
45
|
-
self.
|
47
|
+
self._last_data_collected_at = last_hour
|
48
|
+
self._last_reconciled_at = last_hour
|
46
49
|
self.client = AgentClient(
|
47
50
|
owner=owner, agent_uuid=agent_uuid, is_async=self.IS_ASYNC
|
48
51
|
)
|
@@ -60,10 +63,16 @@ class BaseAgent:
|
|
60
63
|
|
61
64
|
def collect_agent_data(self):
|
62
65
|
logger.info("Collecting agent data.")
|
63
|
-
self.
|
64
|
-
|
65
|
-
|
66
|
-
|
66
|
+
self._last_data_collected_at = now()
|
67
|
+
try:
|
68
|
+
return self.client.collect_agent_data(
|
69
|
+
namespace=settings.CLIENT_CONFIG.namespace
|
70
|
+
)
|
71
|
+
except Exception as e:
|
72
|
+
logger.warning(
|
73
|
+
"Agent failed to collect agent data: {}\n"
|
74
|
+
"Retrying ...".format(repr(e))
|
75
|
+
)
|
67
76
|
|
68
77
|
def sync_compatible_updates(self, compatible_updates: Dict):
|
69
78
|
if compatible_updates and settings.AGENT_CONFIG:
|
@@ -126,14 +135,14 @@ class BaseAgent:
|
|
126
135
|
|
127
136
|
def _check_status(self, agent_state):
|
128
137
|
if agent_state.status == V1Statuses.STOPPED:
|
129
|
-
|
138
|
+
logger.warning(
|
130
139
|
"Agent has been stopped from the platform,"
|
131
140
|
"but the deployment is still running."
|
132
141
|
"Please either set the agent to starting or teardown the agent deployment."
|
133
142
|
)
|
134
143
|
return self.end(sleep=self.SLEEP_STOP_TIME)
|
135
144
|
elif agent_state.live_state < LiveState.LIVE:
|
136
|
-
|
145
|
+
logger.warning(
|
137
146
|
"Agent has been archived from the platform,"
|
138
147
|
"but the deployment is still running."
|
139
148
|
"Please either restore the agent or teardown the agent deployment."
|