paasta-tools 1.27.0__py3-none-any.whl → 1.35.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of paasta-tools might be problematic. Click here for more details.
- paasta_tools/__init__.py +1 -1
- paasta_tools/api/api_docs/swagger.json +9 -1
- paasta_tools/api/tweens/auth.py +2 -1
- paasta_tools/api/views/instance.py +9 -2
- paasta_tools/api/views/remote_run.py +2 -0
- paasta_tools/async_utils.py +4 -1
- paasta_tools/bounce_lib.py +8 -5
- paasta_tools/check_services_replication_tools.py +10 -4
- paasta_tools/check_spark_jobs.py +1 -1
- paasta_tools/cli/cli.py +4 -4
- paasta_tools/cli/cmds/autoscale.py +2 -0
- paasta_tools/cli/cmds/check.py +2 -0
- paasta_tools/cli/cmds/cook_image.py +2 -0
- paasta_tools/cli/cmds/get_docker_image.py +2 -0
- paasta_tools/cli/cmds/get_image_version.py +2 -0
- paasta_tools/cli/cmds/get_latest_deployment.py +2 -0
- paasta_tools/cli/cmds/info.py +10 -3
- paasta_tools/cli/cmds/itest.py +2 -0
- paasta_tools/cli/cmds/list_namespaces.py +2 -0
- paasta_tools/cli/cmds/local_run.py +122 -27
- paasta_tools/cli/cmds/logs.py +31 -7
- paasta_tools/cli/cmds/mark_for_deployment.py +14 -4
- paasta_tools/cli/cmds/mesh_status.py +3 -2
- paasta_tools/cli/cmds/push_to_registry.py +2 -0
- paasta_tools/cli/cmds/remote_run.py +156 -12
- paasta_tools/cli/cmds/rollback.py +6 -2
- paasta_tools/cli/cmds/secret.py +4 -2
- paasta_tools/cli/cmds/security_check.py +2 -0
- paasta_tools/cli/cmds/spark_run.py +7 -3
- paasta_tools/cli/cmds/status.py +59 -29
- paasta_tools/cli/cmds/validate.py +325 -40
- paasta_tools/cli/cmds/wait_for_deployment.py +2 -0
- paasta_tools/cli/schemas/adhoc_schema.json +3 -0
- paasta_tools/cli/schemas/autoscaling_schema.json +3 -2
- paasta_tools/cli/schemas/eks_schema.json +24 -1
- paasta_tools/cli/schemas/kubernetes_schema.json +1 -0
- paasta_tools/cli/schemas/smartstack_schema.json +14 -0
- paasta_tools/cli/utils.py +34 -20
- paasta_tools/contrib/bounce_log_latency_parser.py +1 -1
- paasta_tools/contrib/check_orphans.py +1 -1
- paasta_tools/contrib/get_running_task_allocation.py +1 -1
- paasta_tools/contrib/ide_helper.py +14 -14
- paasta_tools/contrib/mock_patch_checker.py +1 -1
- paasta_tools/contrib/paasta_update_soa_memcpu.py +10 -14
- paasta_tools/contrib/render_template.py +1 -1
- paasta_tools/contrib/shared_ip_check.py +1 -1
- paasta_tools/generate_deployments_for_service.py +2 -0
- paasta_tools/instance/hpa_metrics_parser.py +3 -5
- paasta_tools/instance/kubernetes.py +70 -36
- paasta_tools/kubernetes/application/controller_wrappers.py +23 -2
- paasta_tools/kubernetes/remote_run.py +52 -25
- paasta_tools/kubernetes_tools.py +60 -69
- paasta_tools/long_running_service_tools.py +15 -5
- paasta_tools/mesos/master.py +1 -1
- paasta_tools/metrics/metastatus_lib.py +1 -25
- paasta_tools/metrics/metrics_lib.py +12 -3
- paasta_tools/paastaapi/__init__.py +1 -1
- paasta_tools/paastaapi/api/autoscaler_api.py +1 -1
- paasta_tools/paastaapi/api/default_api.py +1 -1
- paasta_tools/paastaapi/api/remote_run_api.py +1 -1
- paasta_tools/paastaapi/api/resources_api.py +1 -1
- paasta_tools/paastaapi/api/service_api.py +1 -1
- paasta_tools/paastaapi/api_client.py +1 -1
- paasta_tools/paastaapi/configuration.py +2 -2
- paasta_tools/paastaapi/exceptions.py +1 -1
- paasta_tools/paastaapi/model/adhoc_launch_history.py +1 -1
- paasta_tools/paastaapi/model/autoscaler_count_msg.py +1 -1
- paasta_tools/paastaapi/model/autoscaling_override.py +1 -1
- paasta_tools/paastaapi/model/deploy_queue.py +1 -1
- paasta_tools/paastaapi/model/deploy_queue_service_instance.py +1 -1
- paasta_tools/paastaapi/model/envoy_backend.py +1 -1
- paasta_tools/paastaapi/model/envoy_location.py +1 -1
- paasta_tools/paastaapi/model/envoy_status.py +1 -1
- paasta_tools/paastaapi/model/flink_cluster_overview.py +1 -1
- paasta_tools/paastaapi/model/flink_config.py +1 -1
- paasta_tools/paastaapi/model/flink_job.py +1 -1
- paasta_tools/paastaapi/model/flink_job_details.py +1 -1
- paasta_tools/paastaapi/model/flink_jobs.py +1 -1
- paasta_tools/paastaapi/model/float_and_error.py +1 -1
- paasta_tools/paastaapi/model/hpa_metric.py +1 -1
- paasta_tools/paastaapi/model/inline_object.py +1 -1
- paasta_tools/paastaapi/model/inline_response200.py +1 -1
- paasta_tools/paastaapi/model/inline_response2001.py +1 -1
- paasta_tools/paastaapi/model/inline_response202.py +1 -1
- paasta_tools/paastaapi/model/inline_response403.py +1 -1
- paasta_tools/paastaapi/model/instance_bounce_status.py +1 -1
- paasta_tools/paastaapi/model/instance_mesh_status.py +1 -1
- paasta_tools/paastaapi/model/instance_status.py +1 -1
- paasta_tools/paastaapi/model/instance_status_adhoc.py +1 -1
- paasta_tools/paastaapi/model/instance_status_cassandracluster.py +1 -1
- paasta_tools/paastaapi/model/instance_status_flink.py +1 -1
- paasta_tools/paastaapi/model/instance_status_kafkacluster.py +1 -1
- paasta_tools/paastaapi/model/instance_status_kubernetes.py +1 -1
- paasta_tools/paastaapi/model/instance_status_kubernetes_autoscaling_status.py +1 -1
- paasta_tools/paastaapi/model/instance_status_kubernetes_v2.py +1 -1
- paasta_tools/paastaapi/model/instance_status_tron.py +1 -1
- paasta_tools/paastaapi/model/instance_tasks.py +1 -1
- paasta_tools/paastaapi/model/integer_and_error.py +1 -1
- paasta_tools/paastaapi/model/kubernetes_container.py +1 -1
- paasta_tools/paastaapi/model/kubernetes_container_v2.py +1 -1
- paasta_tools/paastaapi/model/kubernetes_healthcheck.py +1 -1
- paasta_tools/paastaapi/model/kubernetes_pod.py +1 -1
- paasta_tools/paastaapi/model/kubernetes_pod_event.py +1 -1
- paasta_tools/paastaapi/model/kubernetes_pod_v2.py +1 -1
- paasta_tools/paastaapi/model/kubernetes_replica_set.py +1 -1
- paasta_tools/paastaapi/model/kubernetes_version.py +4 -1
- paasta_tools/paastaapi/model/remote_run_outcome.py +1 -1
- paasta_tools/paastaapi/model/remote_run_start.py +4 -1
- paasta_tools/paastaapi/model/remote_run_stop.py +1 -1
- paasta_tools/paastaapi/model/remote_run_token.py +1 -1
- paasta_tools/paastaapi/model/resource.py +1 -1
- paasta_tools/paastaapi/model/resource_item.py +1 -1
- paasta_tools/paastaapi/model/resource_value.py +1 -1
- paasta_tools/paastaapi/model/smartstack_backend.py +1 -1
- paasta_tools/paastaapi/model/smartstack_location.py +1 -1
- paasta_tools/paastaapi/model/smartstack_status.py +1 -1
- paasta_tools/paastaapi/model/task_tail_lines.py +1 -1
- paasta_tools/paastaapi/model_utils.py +1 -1
- paasta_tools/paastaapi/rest.py +1 -1
- paasta_tools/remote_git.py +2 -2
- paasta_tools/run-paasta-api-in-dev-mode.py +2 -2
- paasta_tools/run-paasta-api-playground.py +2 -2
- paasta_tools/setup_kubernetes_job.py +43 -1
- paasta_tools/setup_prometheus_adapter_config.py +82 -0
- paasta_tools/setup_tron_namespace.py +2 -2
- paasta_tools/tron_tools.py +4 -1
- paasta_tools/utils.py +29 -11
- paasta_tools/yaml_tools.py +1 -1
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/check_orphans.py +1 -1
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/check_spark_jobs.py +1 -1
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/generate_deployments_for_service.py +2 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/get_running_task_allocation.py +1 -1
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/ide_helper.py +14 -14
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/paasta_update_soa_memcpu.py +10 -14
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/setup_kubernetes_job.py +43 -1
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/setup_prometheus_adapter_config.py +82 -0
- paasta_tools-1.35.8.dist-info/METADATA +79 -0
- {paasta_tools-1.27.0.dist-info → paasta_tools-1.35.8.dist-info}/RECORD +186 -191
- {paasta_tools-1.27.0.dist-info → paasta_tools-1.35.8.dist-info}/WHEEL +1 -1
- paasta_tools/frameworks/adhoc_scheduler.py +0 -71
- paasta_tools/frameworks/native_scheduler.py +0 -652
- paasta_tools/frameworks/task_store.py +0 -245
- paasta_tools/mesos_maintenance.py +0 -848
- paasta_tools/paasta_native_serviceinit.py +0 -21
- paasta_tools-1.27.0.dist-info/METADATA +0 -75
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/apply_external_resources.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/bounce_log_latency_parser.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/check_autoscaler_max_instances.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/check_cassandracluster_services_replication.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/check_flink_services_health.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/check_kubernetes_api.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/check_kubernetes_services_replication.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/check_manual_oapi_changes.sh +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/check_oom_events.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/cleanup_kubernetes_cr.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/cleanup_kubernetes_crd.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/cleanup_kubernetes_jobs.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/create_dynamodb_table.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/create_paasta_playground.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/delete_kubernetes_deployments.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/emit_allocated_cpu_metrics.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/generate_all_deployments +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/generate_authenticating_services.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/generate_services_file.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/generate_services_yaml.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/habitat_fixer.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/is_pod_healthy_in_proxy.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/is_pod_healthy_in_smartstack.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/kill_bad_containers.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/kubernetes_remove_evicted_pods.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/mass-deploy-tag.sh +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/mock_patch_checker.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/paasta_cleanup_remote_run_resources.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/paasta_cleanup_stale_nodes.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/paasta_deploy_tron_jobs +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/paasta_execute_docker_command.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/paasta_secrets_sync.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/paasta_tabcomplete.sh +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/render_template.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/rightsizer_soaconfigs_update.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/service_shard_remove.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/service_shard_update.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/setup_istio_mesh.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/setup_kubernetes_cr.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/setup_kubernetes_crd.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/setup_kubernetes_internal_crd.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/shared_ip_check.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/synapse_srv_namespaces_fact.py +0 -0
- {paasta_tools-1.27.0.data → paasta_tools-1.35.8.data}/scripts/timeouts_metrics_prom.py +0 -0
- {paasta_tools-1.27.0.dist-info → paasta_tools-1.35.8.dist-info}/entry_points.txt +0 -0
- {paasta_tools-1.27.0.dist-info → paasta_tools-1.35.8.dist-info/licenses}/LICENSE +0 -0
- {paasta_tools-1.27.0.dist-info → paasta_tools-1.35.8.dist-info}/top_level.txt +0 -0
|
@@ -20,6 +20,7 @@ from typing import Sequence
|
|
|
20
20
|
from typing import TypedDict
|
|
21
21
|
|
|
22
22
|
from kubernetes.client import AuthenticationV1TokenRequest
|
|
23
|
+
from kubernetes.client import RbacV1Subject
|
|
23
24
|
from kubernetes.client import V1Job
|
|
24
25
|
from kubernetes.client import V1ObjectMeta
|
|
25
26
|
from kubernetes.client import V1Pod
|
|
@@ -28,7 +29,6 @@ from kubernetes.client import V1Role
|
|
|
28
29
|
from kubernetes.client import V1RoleBinding
|
|
29
30
|
from kubernetes.client import V1RoleRef
|
|
30
31
|
from kubernetes.client import V1ServiceAccount
|
|
31
|
-
from kubernetes.client import V1Subject
|
|
32
32
|
from kubernetes.client import V1TokenRequestSpec
|
|
33
33
|
from kubernetes.client.exceptions import ApiException
|
|
34
34
|
|
|
@@ -44,7 +44,7 @@ from paasta_tools.kubernetes_tools import KubeClient
|
|
|
44
44
|
from paasta_tools.kubernetes_tools import limit_size_with_hash
|
|
45
45
|
from paasta_tools.kubernetes_tools import paasta_prefixed
|
|
46
46
|
from paasta_tools.utils import load_system_paasta_config
|
|
47
|
-
|
|
47
|
+
from paasta_tools.utils import NoConfigurationForServiceError
|
|
48
48
|
|
|
49
49
|
logger = logging.getLogger(__name__)
|
|
50
50
|
REMOTE_RUN_JOB_LABEL = "remote-run"
|
|
@@ -66,8 +66,8 @@ class RemoteRunOutcome(TypedDict, total=False):
|
|
|
66
66
|
namespace: str
|
|
67
67
|
|
|
68
68
|
|
|
69
|
-
def
|
|
70
|
-
|
|
69
|
+
def format_remote_run_job_name(
|
|
70
|
+
job_name: str,
|
|
71
71
|
user: str,
|
|
72
72
|
) -> str:
|
|
73
73
|
"""Format name for remote run job
|
|
@@ -76,7 +76,35 @@ def _format_remote_run_job_name(
|
|
|
76
76
|
:param str user: the user requesting the remote-run
|
|
77
77
|
:return: job name
|
|
78
78
|
"""
|
|
79
|
-
return limit_size_with_hash(f"remote-run-{user}-{
|
|
79
|
+
return limit_size_with_hash(f"remote-run-{user}-{job_name}")
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def load_eks_or_adhoc_deployment_config(
|
|
83
|
+
service: str,
|
|
84
|
+
instance: str,
|
|
85
|
+
cluster: str,
|
|
86
|
+
is_toolbox: bool = False,
|
|
87
|
+
user: Optional[str] = None,
|
|
88
|
+
) -> EksDeploymentConfig:
|
|
89
|
+
assert user or not is_toolbox, "User required for toolbox deployment"
|
|
90
|
+
try:
|
|
91
|
+
deployment_config = (
|
|
92
|
+
generate_toolbox_deployment(service, cluster, user)
|
|
93
|
+
if is_toolbox
|
|
94
|
+
else load_eks_service_config(service, instance, cluster)
|
|
95
|
+
)
|
|
96
|
+
except NoConfigurationForServiceError:
|
|
97
|
+
# Perhaps they are trying to use an adhoc instance
|
|
98
|
+
deployment_config = load_adhoc_job_config(service, instance, cluster)
|
|
99
|
+
deployment_config = EksDeploymentConfig(
|
|
100
|
+
service,
|
|
101
|
+
cluster,
|
|
102
|
+
instance,
|
|
103
|
+
config_dict=deployment_config.config_dict,
|
|
104
|
+
branch_dict=deployment_config.branch_dict,
|
|
105
|
+
)
|
|
106
|
+
deployment_config.config_filename_prefix = "adhoc"
|
|
107
|
+
return deployment_config
|
|
80
108
|
|
|
81
109
|
|
|
82
110
|
def remote_run_start(
|
|
@@ -88,6 +116,7 @@ def remote_run_start(
|
|
|
88
116
|
recreate: bool,
|
|
89
117
|
max_duration: int,
|
|
90
118
|
is_toolbox: bool,
|
|
119
|
+
command: Optional[str] = None,
|
|
91
120
|
) -> RemoteRunOutcome:
|
|
92
121
|
"""Trigger remote-run job
|
|
93
122
|
|
|
@@ -99,19 +128,20 @@ def remote_run_start(
|
|
|
99
128
|
:param bool recreate: whether to recreate remote-run job if existing
|
|
100
129
|
:param int max_duration: maximum allowed duration for the remote-ruh job
|
|
101
130
|
:param bool is_toolbox: requested job is for a toolbox container
|
|
131
|
+
:param str command: command override to execute in the job container
|
|
102
132
|
:return: outcome of the operation, and resulting Kubernetes pod information
|
|
103
133
|
"""
|
|
104
134
|
kube_client = KubeClient()
|
|
105
135
|
|
|
106
136
|
# Load the service deployment settings
|
|
107
|
-
deployment_config = (
|
|
108
|
-
|
|
109
|
-
if is_toolbox
|
|
110
|
-
else load_eks_service_config(service, instance, cluster)
|
|
137
|
+
deployment_config = load_eks_or_adhoc_deployment_config(
|
|
138
|
+
service, instance, cluster, is_toolbox, user
|
|
111
139
|
)
|
|
112
140
|
|
|
113
|
-
# Set
|
|
114
|
-
if
|
|
141
|
+
# Set override command, or sleep for interactive mode
|
|
142
|
+
if command and not is_toolbox:
|
|
143
|
+
deployment_config.config_dict["cmd"] = command
|
|
144
|
+
elif interactive and not is_toolbox:
|
|
115
145
|
deployment_config.config_dict["cmd"] = f"sleep {max_duration}"
|
|
116
146
|
|
|
117
147
|
# Create the app with a new name
|
|
@@ -120,10 +150,11 @@ def remote_run_start(
|
|
|
120
150
|
deadline_seconds=max_duration,
|
|
121
151
|
keep_routable_ip=is_toolbox,
|
|
122
152
|
)
|
|
123
|
-
job_name =
|
|
153
|
+
job_name = format_remote_run_job_name(formatted_job.metadata.name, user)
|
|
124
154
|
formatted_job.metadata.name = job_name
|
|
125
155
|
app_wrapper = get_application_wrapper(formatted_job)
|
|
126
156
|
app_wrapper.soa_config = deployment_config
|
|
157
|
+
app_wrapper.ensure_service_account(kube_client)
|
|
127
158
|
|
|
128
159
|
# Launch pod
|
|
129
160
|
logger.info(f"Starting {job_name}")
|
|
@@ -178,10 +209,8 @@ def remote_run_ready(
|
|
|
178
209
|
kube_client = KubeClient()
|
|
179
210
|
|
|
180
211
|
# Load the service deployment settings
|
|
181
|
-
deployment_config = (
|
|
182
|
-
|
|
183
|
-
if is_toolbox
|
|
184
|
-
else load_eks_service_config(service, instance, cluster)
|
|
212
|
+
deployment_config = load_eks_or_adhoc_deployment_config(
|
|
213
|
+
service, instance, cluster, is_toolbox, user
|
|
185
214
|
)
|
|
186
215
|
namespace = deployment_config.get_namespace()
|
|
187
216
|
|
|
@@ -225,17 +254,15 @@ def remote_run_stop(
|
|
|
225
254
|
kube_client = KubeClient()
|
|
226
255
|
|
|
227
256
|
# Load the service deployment settings
|
|
228
|
-
deployment_config = (
|
|
229
|
-
|
|
230
|
-
if is_toolbox
|
|
231
|
-
else load_eks_service_config(service, instance, cluster)
|
|
257
|
+
deployment_config = load_eks_or_adhoc_deployment_config(
|
|
258
|
+
service, instance, cluster, is_toolbox, user
|
|
232
259
|
)
|
|
233
260
|
|
|
234
261
|
# Rebuild the job metadata
|
|
235
262
|
formatted_job = deployment_config.format_kubernetes_job(
|
|
236
263
|
job_label=REMOTE_RUN_JOB_LABEL
|
|
237
264
|
)
|
|
238
|
-
job_name =
|
|
265
|
+
job_name = format_remote_run_job_name(formatted_job.metadata.name, user)
|
|
239
266
|
formatted_job.metadata.name = job_name
|
|
240
267
|
|
|
241
268
|
# Stop the job
|
|
@@ -263,14 +290,14 @@ def remote_run_token(
|
|
|
263
290
|
kube_client = KubeClient()
|
|
264
291
|
|
|
265
292
|
# Load the service deployment settings
|
|
266
|
-
deployment_config =
|
|
293
|
+
deployment_config = load_eks_or_adhoc_deployment_config(service, instance, cluster)
|
|
267
294
|
namespace = deployment_config.get_namespace()
|
|
268
295
|
|
|
269
296
|
# Rebuild the job metadata
|
|
270
297
|
formatted_job = deployment_config.format_kubernetes_job(
|
|
271
298
|
job_label=REMOTE_RUN_JOB_LABEL
|
|
272
299
|
)
|
|
273
|
-
job_name =
|
|
300
|
+
job_name = format_remote_run_job_name(formatted_job.metadata.name, user)
|
|
274
301
|
|
|
275
302
|
# Find pod and create exec token for it
|
|
276
303
|
pod = find_job_pod(kube_client, namespace, job_name)
|
|
@@ -449,7 +476,7 @@ def create_pod_scoped_role(
|
|
|
449
476
|
role_name = f"remote-run-role-{pod_name_hash}"
|
|
450
477
|
policy = V1PolicyRule(
|
|
451
478
|
verbs=["create", "get"],
|
|
452
|
-
resources=["pods", "pods/exec"],
|
|
479
|
+
resources=["pods", "pods/exec", "pods/log"],
|
|
453
480
|
resource_names=[pod_name],
|
|
454
481
|
api_groups=[""],
|
|
455
482
|
)
|
|
@@ -495,7 +522,7 @@ def bind_role_to_service_account(
|
|
|
495
522
|
name=role,
|
|
496
523
|
),
|
|
497
524
|
subjects=[
|
|
498
|
-
|
|
525
|
+
RbacV1Subject(
|
|
499
526
|
kind="ServiceAccount",
|
|
500
527
|
name=service_account,
|
|
501
528
|
),
|
paasta_tools/kubernetes_tools.py
CHANGED
|
@@ -50,6 +50,7 @@ from kubernetes import client as kube_client
|
|
|
50
50
|
from kubernetes import config as kube_config
|
|
51
51
|
from kubernetes.client import CoreV1Event
|
|
52
52
|
from kubernetes.client import models
|
|
53
|
+
from kubernetes.client import RbacV1Subject
|
|
53
54
|
from kubernetes.client import V1Affinity
|
|
54
55
|
from kubernetes.client import V1AWSElasticBlockStoreVolumeSource
|
|
55
56
|
from kubernetes.client import V1Capabilities
|
|
@@ -113,7 +114,6 @@ from kubernetes.client import V1ServiceAccount
|
|
|
113
114
|
from kubernetes.client import V1ServiceAccountTokenProjection
|
|
114
115
|
from kubernetes.client import V1StatefulSet
|
|
115
116
|
from kubernetes.client import V1StatefulSetSpec
|
|
116
|
-
from kubernetes.client import V1Subject
|
|
117
117
|
from kubernetes.client import V1TCPSocketAction
|
|
118
118
|
from kubernetes.client import V1TopologySpreadConstraint
|
|
119
119
|
from kubernetes.client import V1Volume
|
|
@@ -151,6 +151,7 @@ from paasta_tools.long_running_service_tools import METRICS_PROVIDER_PISCINA
|
|
|
151
151
|
from paasta_tools.long_running_service_tools import METRICS_PROVIDER_PROMQL
|
|
152
152
|
from paasta_tools.long_running_service_tools import METRICS_PROVIDER_UWSGI
|
|
153
153
|
from paasta_tools.long_running_service_tools import METRICS_PROVIDER_UWSGI_V2
|
|
154
|
+
from paasta_tools.long_running_service_tools import METRICS_PROVIDER_WORKER_LOAD
|
|
154
155
|
from paasta_tools.long_running_service_tools import ServiceNamespaceConfig
|
|
155
156
|
from paasta_tools.secret_tools import get_secret_name_from_ref
|
|
156
157
|
from paasta_tools.secret_tools import is_secret_ref
|
|
@@ -195,10 +196,8 @@ KUBE_DEPLOY_STATEGY_MAP = {
|
|
|
195
196
|
"brutal": "RollingUpdate",
|
|
196
197
|
}
|
|
197
198
|
HACHECK_POD_NAME = "hacheck"
|
|
198
|
-
GUNICORN_EXPORTER_POD_NAME = "gunicorn--exporter"
|
|
199
199
|
SIDECAR_CONTAINER_NAMES = [
|
|
200
200
|
HACHECK_POD_NAME,
|
|
201
|
-
GUNICORN_EXPORTER_POD_NAME,
|
|
202
201
|
]
|
|
203
202
|
KUBERNETES_NAMESPACE = "paasta"
|
|
204
203
|
PAASTA_WORKLOAD_OWNER = "compute_infra_platform_experience"
|
|
@@ -270,6 +269,10 @@ class KubeDeployment(NamedTuple):
|
|
|
270
269
|
namespace: str
|
|
271
270
|
replicas: Optional[int]
|
|
272
271
|
|
|
272
|
+
@property
|
|
273
|
+
def deployment_version(self) -> DeploymentVersion:
|
|
274
|
+
return DeploymentVersion(self.git_sha, self.image_version)
|
|
275
|
+
|
|
273
276
|
|
|
274
277
|
class KubeCustomResource(NamedTuple):
|
|
275
278
|
service: str
|
|
@@ -872,7 +875,10 @@ class KubernetesDeploymentConfig(LongRunningServiceConfig):
|
|
|
872
875
|
),
|
|
873
876
|
),
|
|
874
877
|
)
|
|
875
|
-
elif provider["type"]
|
|
878
|
+
elif provider["type"] in {
|
|
879
|
+
METRICS_PROVIDER_UWSGI_V2,
|
|
880
|
+
METRICS_PROVIDER_WORKER_LOAD,
|
|
881
|
+
}:
|
|
876
882
|
return V2MetricSpec(
|
|
877
883
|
type="Object",
|
|
878
884
|
object=V2ObjectMetricSource(
|
|
@@ -1068,15 +1074,10 @@ class KubernetesDeploymentConfig(LongRunningServiceConfig):
|
|
|
1068
1074
|
service_namespace_config,
|
|
1069
1075
|
hacheck_sidecar_volumes,
|
|
1070
1076
|
)
|
|
1071
|
-
gunicorn_exporter_container = self.get_gunicorn_exporter_sidecar_container(
|
|
1072
|
-
system_paasta_config
|
|
1073
|
-
)
|
|
1074
1077
|
|
|
1075
1078
|
sidecars = []
|
|
1076
1079
|
if hacheck_container:
|
|
1077
1080
|
sidecars.append(hacheck_container)
|
|
1078
|
-
if gunicorn_exporter_container:
|
|
1079
|
-
sidecars.append(gunicorn_exporter_container)
|
|
1080
1081
|
return sidecars
|
|
1081
1082
|
|
|
1082
1083
|
def get_readiness_check_prefix(
|
|
@@ -1164,37 +1165,6 @@ class KubernetesDeploymentConfig(LongRunningServiceConfig):
|
|
|
1164
1165
|
)
|
|
1165
1166
|
return None
|
|
1166
1167
|
|
|
1167
|
-
def get_gunicorn_exporter_sidecar_container(
|
|
1168
|
-
self,
|
|
1169
|
-
system_paasta_config: SystemPaastaConfig,
|
|
1170
|
-
) -> Optional[V1Container]:
|
|
1171
|
-
|
|
1172
|
-
if self.should_use_metrics_provider(METRICS_PROVIDER_GUNICORN):
|
|
1173
|
-
return V1Container(
|
|
1174
|
-
image=system_paasta_config.get_gunicorn_exporter_sidecar_image_url(),
|
|
1175
|
-
resources=self.get_sidecar_resource_requirements(
|
|
1176
|
-
"gunicorn_exporter", system_paasta_config
|
|
1177
|
-
),
|
|
1178
|
-
name=GUNICORN_EXPORTER_POD_NAME,
|
|
1179
|
-
env=self.get_kubernetes_environment(),
|
|
1180
|
-
ports=[V1ContainerPort(container_port=9117)],
|
|
1181
|
-
lifecycle=V1Lifecycle(
|
|
1182
|
-
pre_stop=V1LifecycleHandler(
|
|
1183
|
-
_exec=V1ExecAction(
|
|
1184
|
-
command=[
|
|
1185
|
-
"/bin/sh",
|
|
1186
|
-
"-c",
|
|
1187
|
-
# we sleep for the same amount of time as we do after an hadown to ensure that we have accurate
|
|
1188
|
-
# metrics up until our Pod dies
|
|
1189
|
-
f"sleep {self.get_hacheck_prestop_sleep_seconds()}",
|
|
1190
|
-
]
|
|
1191
|
-
)
|
|
1192
|
-
)
|
|
1193
|
-
),
|
|
1194
|
-
)
|
|
1195
|
-
|
|
1196
|
-
return None
|
|
1197
|
-
|
|
1198
1168
|
def get_env(
|
|
1199
1169
|
self, system_paasta_config: Optional["SystemPaastaConfig"] = None
|
|
1200
1170
|
) -> Dict[str, str]:
|
|
@@ -1460,6 +1430,8 @@ class KubernetesDeploymentConfig(LongRunningServiceConfig):
|
|
|
1460
1430
|
secret_volumes: Sequence[SecretVolume],
|
|
1461
1431
|
service_namespace_config: ServiceNamespaceConfig,
|
|
1462
1432
|
include_sidecars: bool = True,
|
|
1433
|
+
include_liveness_probe: bool = True,
|
|
1434
|
+
include_readiness_probe: bool = True,
|
|
1463
1435
|
) -> Sequence[V1Container]:
|
|
1464
1436
|
ports = [self.get_container_port()]
|
|
1465
1437
|
# MONK-1130
|
|
@@ -1485,8 +1457,16 @@ class KubernetesDeploymentConfig(LongRunningServiceConfig):
|
|
|
1485
1457
|
)
|
|
1486
1458
|
),
|
|
1487
1459
|
name=self.get_sanitised_instance_name(),
|
|
1488
|
-
liveness_probe=
|
|
1489
|
-
|
|
1460
|
+
liveness_probe=(
|
|
1461
|
+
self.get_liveness_probe(service_namespace_config)
|
|
1462
|
+
if include_liveness_probe
|
|
1463
|
+
else None
|
|
1464
|
+
),
|
|
1465
|
+
readiness_probe=(
|
|
1466
|
+
self.get_readiness_probe(service_namespace_config)
|
|
1467
|
+
if include_readiness_probe
|
|
1468
|
+
else None
|
|
1469
|
+
),
|
|
1490
1470
|
ports=[V1ContainerPort(container_port=port) for port in ports],
|
|
1491
1471
|
security_context=self.get_security_context(),
|
|
1492
1472
|
volume_mounts=self.get_volume_mounts(
|
|
@@ -1532,7 +1512,7 @@ class KubernetesDeploymentConfig(LongRunningServiceConfig):
|
|
|
1532
1512
|
and the service will be removed from smartstack, which is the same effect we get after running hadown.
|
|
1533
1513
|
"""
|
|
1534
1514
|
|
|
1535
|
-
# Everywhere this value is currently used (hacheck sidecar
|
|
1515
|
+
# Everywhere this value is currently used (hacheck sidecar), we can pretty safely
|
|
1536
1516
|
# assume that the service is in smartstack.
|
|
1537
1517
|
return self.get_prestop_sleep_seconds(is_in_smartstack=True) + 1
|
|
1538
1518
|
|
|
@@ -1912,7 +1892,7 @@ class KubernetesDeploymentConfig(LongRunningServiceConfig):
|
|
|
1912
1892
|
if self.get_datastore_credentials_secret_hash():
|
|
1913
1893
|
volume_mounts.append(
|
|
1914
1894
|
V1VolumeMount(
|
|
1915
|
-
mount_path=
|
|
1895
|
+
mount_path="/datastore",
|
|
1916
1896
|
name=self.get_datastore_secret_volume_name(),
|
|
1917
1897
|
read_only=True,
|
|
1918
1898
|
)
|
|
@@ -2165,6 +2145,8 @@ class KubernetesDeploymentConfig(LongRunningServiceConfig):
|
|
|
2165
2145
|
restart_on_failure=False,
|
|
2166
2146
|
include_sidecars=include_sidecars,
|
|
2167
2147
|
force_no_routable_ip=not keep_routable_ip,
|
|
2148
|
+
include_liveness_probe=False,
|
|
2149
|
+
include_readiness_probe=False,
|
|
2168
2150
|
)
|
|
2169
2151
|
pod_template.metadata.labels.update(additional_labels)
|
|
2170
2152
|
complete_config = V1Job(
|
|
@@ -2290,6 +2272,7 @@ class KubernetesDeploymentConfig(LongRunningServiceConfig):
|
|
|
2290
2272
|
or self.get_prometheus_port() is not None
|
|
2291
2273
|
or self.should_use_metrics_provider(METRICS_PROVIDER_UWSGI)
|
|
2292
2274
|
or self.should_use_metrics_provider(METRICS_PROVIDER_GUNICORN)
|
|
2275
|
+
or self.should_use_metrics_provider(METRICS_PROVIDER_WORKER_LOAD)
|
|
2293
2276
|
):
|
|
2294
2277
|
return "true"
|
|
2295
2278
|
return "false"
|
|
@@ -2304,6 +2287,8 @@ class KubernetesDeploymentConfig(LongRunningServiceConfig):
|
|
|
2304
2287
|
restart_on_failure: bool = True,
|
|
2305
2288
|
include_sidecars: bool = True,
|
|
2306
2289
|
force_no_routable_ip: bool = False,
|
|
2290
|
+
include_liveness_probe: bool = True,
|
|
2291
|
+
include_readiness_probe: bool = True,
|
|
2307
2292
|
) -> V1PodTemplateSpec:
|
|
2308
2293
|
service_namespace_config = load_service_namespace_config(
|
|
2309
2294
|
service=self.service, namespace=self.get_nerve_namespace()
|
|
@@ -2341,6 +2326,8 @@ class KubernetesDeploymentConfig(LongRunningServiceConfig):
|
|
|
2341
2326
|
system_paasta_config=system_paasta_config,
|
|
2342
2327
|
service_namespace_config=service_namespace_config,
|
|
2343
2328
|
include_sidecars=include_sidecars,
|
|
2329
|
+
include_liveness_probe=include_liveness_probe,
|
|
2330
|
+
include_readiness_probe=include_readiness_probe,
|
|
2344
2331
|
),
|
|
2345
2332
|
share_process_namespace=True,
|
|
2346
2333
|
node_selector=self.get_node_selector(),
|
|
@@ -2438,6 +2425,10 @@ class KubernetesDeploymentConfig(LongRunningServiceConfig):
|
|
|
2438
2425
|
"paasta.yelp.com/cluster": self.cluster,
|
|
2439
2426
|
"yelp.com/owner": "compute_infra_platform_experience",
|
|
2440
2427
|
"paasta.yelp.com/managed": "true",
|
|
2428
|
+
# NOTE: this is mostly here for autoscaling purposes: we use information from the deploy group
|
|
2429
|
+
# during Prometheus relabeling - but it's not a bad label to have around in general, thus its
|
|
2430
|
+
# inclusion here
|
|
2431
|
+
"paasta.yelp.com/deploy_group": self.get_deploy_group(),
|
|
2441
2432
|
}
|
|
2442
2433
|
if service_namespace_config.is_in_smartstack():
|
|
2443
2434
|
labels["paasta.yelp.com/weight"] = str(self.get_weight())
|
|
@@ -2463,22 +2454,13 @@ class KubernetesDeploymentConfig(LongRunningServiceConfig):
|
|
|
2463
2454
|
|
|
2464
2455
|
# not all services use autoscaling, so we label those that do in order to have
|
|
2465
2456
|
# prometheus selectively discover/scrape them
|
|
2466
|
-
|
|
2467
|
-
|
|
2468
|
-
# But we do still need deploy_group for relabeling properly
|
|
2469
|
-
# this should probably eventually be made into a default label,
|
|
2470
|
-
# but for now we're fine with it being behind these feature toggles.
|
|
2471
|
-
# ideally, we'd also have the docker image here for ease-of-use
|
|
2472
|
-
# in Prometheus relabeling, but that information is over the
|
|
2473
|
-
# character limit for k8s labels (63 chars)
|
|
2474
|
-
labels["paasta.yelp.com/deploy_group"] = self.get_deploy_group()
|
|
2475
|
-
|
|
2476
|
-
elif self.should_use_metrics_provider(METRICS_PROVIDER_PISCINA):
|
|
2477
|
-
labels["paasta.yelp.com/deploy_group"] = self.get_deploy_group()
|
|
2457
|
+
# NOTE: these are not mutually exclusive as a service could use multiple autoscaling types
|
|
2458
|
+
if self.should_use_metrics_provider(METRICS_PROVIDER_PISCINA):
|
|
2478
2459
|
labels["paasta.yelp.com/scrape_piscina_prometheus"] = "true"
|
|
2479
2460
|
|
|
2480
|
-
|
|
2481
|
-
|
|
2461
|
+
if self.should_use_metrics_provider(
|
|
2462
|
+
METRICS_PROVIDER_GUNICORN
|
|
2463
|
+
) or self.should_use_metrics_provider(METRICS_PROVIDER_WORKER_LOAD):
|
|
2482
2464
|
labels["paasta.yelp.com/scrape_gunicorn_prometheus"] = "true"
|
|
2483
2465
|
|
|
2484
2466
|
# the default AWS LB Controller behavior is to enable this by-namespace
|
|
@@ -3010,7 +2992,7 @@ def ensure_paasta_api_rolebinding(kube_client: KubeClient, namespace: str) -> No
|
|
|
3010
2992
|
name="paasta-api-server-per-namespace",
|
|
3011
2993
|
),
|
|
3012
2994
|
subjects=[
|
|
3013
|
-
|
|
2995
|
+
RbacV1Subject(
|
|
3014
2996
|
kind="User",
|
|
3015
2997
|
name="yelp.com/paasta-api-server",
|
|
3016
2998
|
),
|
|
@@ -3392,21 +3374,26 @@ def pod_disruption_budget_for_service_instance(
|
|
|
3392
3374
|
instance: str,
|
|
3393
3375
|
max_unavailable: Union[str, int],
|
|
3394
3376
|
namespace: str,
|
|
3377
|
+
unhealthy_pod_eviction_policy: str,
|
|
3395
3378
|
) -> V1PodDisruptionBudget:
|
|
3379
|
+
selector = V1LabelSelector(
|
|
3380
|
+
match_labels={
|
|
3381
|
+
"paasta.yelp.com/service": service,
|
|
3382
|
+
"paasta.yelp.com/instance": instance,
|
|
3383
|
+
}
|
|
3384
|
+
)
|
|
3385
|
+
spec = V1PodDisruptionBudgetSpec(
|
|
3386
|
+
max_unavailable=max_unavailable,
|
|
3387
|
+
unhealthy_pod_eviction_policy=unhealthy_pod_eviction_policy,
|
|
3388
|
+
selector=selector,
|
|
3389
|
+
)
|
|
3390
|
+
|
|
3396
3391
|
return V1PodDisruptionBudget(
|
|
3397
3392
|
metadata=V1ObjectMeta(
|
|
3398
3393
|
name=get_kubernetes_app_name(service, instance),
|
|
3399
3394
|
namespace=namespace,
|
|
3400
3395
|
),
|
|
3401
|
-
spec=
|
|
3402
|
-
max_unavailable=max_unavailable,
|
|
3403
|
-
selector=V1LabelSelector(
|
|
3404
|
-
match_labels={
|
|
3405
|
-
"paasta.yelp.com/service": service,
|
|
3406
|
-
"paasta.yelp.com/instance": instance,
|
|
3407
|
-
}
|
|
3408
|
-
),
|
|
3409
|
-
),
|
|
3396
|
+
spec=spec,
|
|
3410
3397
|
)
|
|
3411
3398
|
|
|
3412
3399
|
|
|
@@ -4190,6 +4177,10 @@ def create_pod_topology_spread_constraints(
|
|
|
4190
4177
|
when_unsatisfiable=constraint.get(
|
|
4191
4178
|
"when_unsatisfiable", "ScheduleAnyway"
|
|
4192
4179
|
),
|
|
4180
|
+
# we might want to default this to someting else in the future
|
|
4181
|
+
# but for now, make this opt-in
|
|
4182
|
+
# (null or empty list means only match against the labelSelector)
|
|
4183
|
+
match_label_keys=constraint.get("match_label_keys", None),
|
|
4193
4184
|
)
|
|
4194
4185
|
)
|
|
4195
4186
|
|
|
@@ -4393,7 +4384,7 @@ def ensure_service_account(
|
|
|
4393
4384
|
name=k8s_role,
|
|
4394
4385
|
),
|
|
4395
4386
|
subjects=[
|
|
4396
|
-
|
|
4387
|
+
RbacV1Subject(
|
|
4397
4388
|
kind="ServiceAccount",
|
|
4398
4389
|
namespace=namespace,
|
|
4399
4390
|
name=sa_name,
|
|
@@ -41,6 +41,7 @@ DEFAULT_ACTIVE_REQUESTS_AUTOSCALING_MOVING_AVERAGE_WINDOW = 1800
|
|
|
41
41
|
DEFAULT_UWSGI_AUTOSCALING_MOVING_AVERAGE_WINDOW = 1800
|
|
42
42
|
DEFAULT_PISCINA_AUTOSCALING_MOVING_AVERAGE_WINDOW = 1800
|
|
43
43
|
DEFAULT_GUNICORN_AUTOSCALING_MOVING_AVERAGE_WINDOW = 1800
|
|
44
|
+
DEFAULT_WORKER_LOAD_AUTOSCALING_MOVING_AVERAGE_WINDOW = 1800
|
|
44
45
|
|
|
45
46
|
METRICS_PROVIDER_CPU = "cpu"
|
|
46
47
|
METRICS_PROVIDER_UWSGI = "uwsgi"
|
|
@@ -48,7 +49,8 @@ METRICS_PROVIDER_UWSGI_V2 = "uwsgi-v2"
|
|
|
48
49
|
METRICS_PROVIDER_GUNICORN = "gunicorn"
|
|
49
50
|
METRICS_PROVIDER_PISCINA = "piscina"
|
|
50
51
|
METRICS_PROVIDER_ACTIVE_REQUESTS = "active-requests"
|
|
51
|
-
METRICS_PROVIDER_PROMQL = "
|
|
52
|
+
METRICS_PROVIDER_PROMQL = "arbitrary-promql"
|
|
53
|
+
METRICS_PROVIDER_WORKER_LOAD = "worker-load"
|
|
52
54
|
|
|
53
55
|
ALL_METRICS_PROVIDERS = [
|
|
54
56
|
METRICS_PROVIDER_CPU,
|
|
@@ -58,6 +60,7 @@ ALL_METRICS_PROVIDERS = [
|
|
|
58
60
|
METRICS_PROVIDER_PISCINA,
|
|
59
61
|
METRICS_PROVIDER_ACTIVE_REQUESTS,
|
|
60
62
|
METRICS_PROVIDER_PROMQL,
|
|
63
|
+
METRICS_PROVIDER_WORKER_LOAD,
|
|
61
64
|
]
|
|
62
65
|
|
|
63
66
|
|
|
@@ -85,6 +88,7 @@ class LongRunningServiceConfigDict(InstanceConfigDict, total=False):
|
|
|
85
88
|
bounce_margin_factor: float
|
|
86
89
|
should_ping_for_unhealthy_pods: bool
|
|
87
90
|
weight: int
|
|
91
|
+
unhealthy_pod_eviction_policy: str
|
|
88
92
|
|
|
89
93
|
|
|
90
94
|
class ServiceNamespaceConfig(dict):
|
|
@@ -95,9 +99,12 @@ class ServiceNamespaceConfig(dict):
|
|
|
95
99
|
"""
|
|
96
100
|
healthcheck_mode = self.get("healthcheck_mode", None)
|
|
97
101
|
if not healthcheck_mode:
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
102
|
+
mode = self.get_mode()
|
|
103
|
+
if mode == "http2":
|
|
104
|
+
healthcheck_mode = "http"
|
|
105
|
+
else:
|
|
106
|
+
healthcheck_mode = mode
|
|
107
|
+
return healthcheck_mode
|
|
101
108
|
|
|
102
109
|
def get_mode(self) -> str:
|
|
103
110
|
"""Get the mode that the service runs in and check that we support it.
|
|
@@ -112,7 +119,7 @@ class ServiceNamespaceConfig(dict):
|
|
|
112
119
|
return None
|
|
113
120
|
else:
|
|
114
121
|
return "http"
|
|
115
|
-
elif mode in ["http", "tcp", "https"]:
|
|
122
|
+
elif mode in ["http", "http2", "tcp", "https"]:
|
|
116
123
|
return mode
|
|
117
124
|
else:
|
|
118
125
|
raise InvalidSmartstackMode("Unknown mode: %s" % mode)
|
|
@@ -407,6 +414,9 @@ class LongRunningServiceConfig(InstanceConfig):
|
|
|
407
414
|
def get_bounce_margin_factor(self) -> float:
|
|
408
415
|
return self.config_dict.get("bounce_margin_factor", 0.95)
|
|
409
416
|
|
|
417
|
+
def get_unhealthy_pod_eviction_policy(self) -> str:
|
|
418
|
+
return self.config_dict.get("unhealthy_pod_eviction_policy", "IfHealthyBudget")
|
|
419
|
+
|
|
410
420
|
def get_should_ping_for_unhealthy_pods(self, default: bool) -> bool:
|
|
411
421
|
return self.config_dict.get("should_ping_for_unhealthy_pods", default)
|
|
412
422
|
|
paasta_tools/mesos/master.py
CHANGED
|
@@ -147,7 +147,7 @@ class MesosMaster:
|
|
|
147
147
|
def _file_resolver(self, cfg):
|
|
148
148
|
return self.resolve(open(cfg[6:], "r+").read().strip())
|
|
149
149
|
|
|
150
|
-
@retry(KazooTimeoutError, tries=5, delay=0.5, logger=logger)
|
|
150
|
+
@retry(KazooTimeoutError, tries=5, delay=0.5, logger=logger) # type: ignore
|
|
151
151
|
def _zookeeper_resolver(self, cfg):
|
|
152
152
|
hosts, path = cfg[5:].split("/", 1)
|
|
153
153
|
path = "/" + path
|
|
@@ -43,7 +43,6 @@ from paasta_tools.kubernetes_tools import paasta_prefixed
|
|
|
43
43
|
from paasta_tools.kubernetes_tools import PodStatus
|
|
44
44
|
from paasta_tools.mesos.master import MesosMetrics
|
|
45
45
|
from paasta_tools.mesos.master import MesosState
|
|
46
|
-
from paasta_tools.mesos_maintenance import MAINTENANCE_ROLE
|
|
47
46
|
from paasta_tools.mesos_tools import get_all_tasks_from_state
|
|
48
47
|
from paasta_tools.mesos_tools import get_mesos_quorum
|
|
49
48
|
from paasta_tools.mesos_tools import get_number_of_mesos_masters
|
|
@@ -97,9 +96,6 @@ def get_mesos_cpu_status(
|
|
|
97
96
|
total = metrics["master/cpus_total"]
|
|
98
97
|
used = metrics["master/cpus_used"]
|
|
99
98
|
|
|
100
|
-
for slave in mesos_state["slaves"]:
|
|
101
|
-
used += reserved_maintenence_resources(slave["reserved_resources"])["cpus"]
|
|
102
|
-
|
|
103
99
|
available = total - used
|
|
104
100
|
return total, used, available
|
|
105
101
|
|
|
@@ -135,9 +131,6 @@ def get_mesos_memory_status(
|
|
|
135
131
|
total = metrics["master/mem_total"]
|
|
136
132
|
used = metrics["master/mem_used"]
|
|
137
133
|
|
|
138
|
-
for slave in mesos_state["slaves"]:
|
|
139
|
-
used += reserved_maintenence_resources(slave["reserved_resources"])["mem"]
|
|
140
|
-
|
|
141
134
|
available = total - used
|
|
142
135
|
|
|
143
136
|
return total, used, available
|
|
@@ -176,9 +169,6 @@ def get_mesos_disk_status(
|
|
|
176
169
|
total = metrics["master/disk_total"]
|
|
177
170
|
used = metrics["master/disk_used"]
|
|
178
171
|
|
|
179
|
-
for slave in mesos_state["slaves"]:
|
|
180
|
-
used += reserved_maintenence_resources(slave["reserved_resources"])["disk"]
|
|
181
|
-
|
|
182
172
|
available = total - used
|
|
183
173
|
return total, used, available
|
|
184
174
|
|
|
@@ -216,9 +206,6 @@ def get_mesos_gpu_status(
|
|
|
216
206
|
total = metrics["master/gpus_total"]
|
|
217
207
|
used = metrics["master/gpus_used"]
|
|
218
208
|
|
|
219
|
-
for slave in mesos_state["slaves"]:
|
|
220
|
-
used += reserved_maintenence_resources(slave["reserved_resources"])["gpus"]
|
|
221
|
-
|
|
222
209
|
available = total - used
|
|
223
210
|
return total, used, available
|
|
224
211
|
|
|
@@ -614,7 +601,7 @@ def group_slaves_by_key_func(
|
|
|
614
601
|
"""
|
|
615
602
|
sorted_slaves: Sequence[_GenericNodeT]
|
|
616
603
|
if sort_func is None:
|
|
617
|
-
sorted_slaves = sorted(slaves, key=key_func)
|
|
604
|
+
sorted_slaves = sorted(slaves, key=key_func) # type: ignore # this code is to be deleted
|
|
618
605
|
else:
|
|
619
606
|
sorted_slaves = sort_func(slaves)
|
|
620
607
|
|
|
@@ -647,11 +634,6 @@ def calculate_resource_utilization_for_slaves(
|
|
|
647
634
|
for task in tasks:
|
|
648
635
|
task_resources = task["resources"]
|
|
649
636
|
resource_free_dict.subtract(Counter(filter_mesos_state_metrics(task_resources)))
|
|
650
|
-
for slave in slaves:
|
|
651
|
-
filtered_resources = filter_mesos_state_metrics(
|
|
652
|
-
reserved_maintenence_resources(slave["reserved_resources"])
|
|
653
|
-
)
|
|
654
|
-
resource_free_dict.subtract(Counter(filtered_resources))
|
|
655
637
|
return {
|
|
656
638
|
"free": ResourceInfo(
|
|
657
639
|
cpus=resource_free_dict["cpus"],
|
|
@@ -1102,9 +1084,3 @@ def get_table_rows_for_resource_info_dict(
|
|
|
1102
1084
|
return attribute_values + format_row_for_resource_utilization_healthchecks(
|
|
1103
1085
|
healthcheck_utilization_pairs
|
|
1104
1086
|
)
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
def reserved_maintenence_resources(
|
|
1108
|
-
resources: MesosResources,
|
|
1109
|
-
):
|
|
1110
|
-
return resources.get(MAINTENANCE_ROLE, {"cpus": 0, "mem": 0, "disk": 0, "gpus": 0})
|
|
@@ -8,6 +8,7 @@ from inspect import currentframe
|
|
|
8
8
|
from types import TracebackType
|
|
9
9
|
from typing import Any
|
|
10
10
|
from typing import Callable
|
|
11
|
+
from typing import cast
|
|
11
12
|
from typing import Dict
|
|
12
13
|
from typing import Optional
|
|
13
14
|
from typing import Type
|
|
@@ -110,10 +111,18 @@ class MeteoriteMetrics(BaseMetrics):
|
|
|
110
111
|
)
|
|
111
112
|
|
|
112
113
|
def create_timer(self, name: str, **kwargs: Any) -> TimerProtocol:
|
|
113
|
-
|
|
114
|
+
# yelp_meteorite returns an EmptyMetric object if the timer is misconfigured
|
|
115
|
+
# ...but that doesn't have the same interface ;_;
|
|
116
|
+
return cast(
|
|
117
|
+
Timer, yelp_meteorite.create_timer(self.base_name + "." + name, **kwargs)
|
|
118
|
+
)
|
|
114
119
|
|
|
115
120
|
def create_gauge(self, name: str, **kwargs: Any) -> GaugeProtocol:
|
|
116
|
-
|
|
121
|
+
# yelp_meteorite returns an EmptyMetric object if the gauge is misconfigured
|
|
122
|
+
# ...but that doesn't have the same interface ;_;
|
|
123
|
+
return cast(
|
|
124
|
+
Gauge, yelp_meteorite.create_gauge(self.base_name + "." + name, **kwargs)
|
|
125
|
+
)
|
|
117
126
|
|
|
118
127
|
def create_counter(self, name: str, **kwargs: Any) -> CounterProtocol:
|
|
119
128
|
return yelp_meteorite.create_counter(self.base_name + "." + name, **kwargs)
|
|
@@ -163,7 +172,7 @@ class Gauge(GaugeProtocol):
|
|
|
163
172
|
log.debug(f"gauge {self.name} set to {value}")
|
|
164
173
|
|
|
165
174
|
|
|
166
|
-
class Counter(
|
|
175
|
+
class Counter(CounterProtocol):
|
|
167
176
|
def __init__(self, name: str) -> None:
|
|
168
177
|
self.name = name
|
|
169
178
|
self.counter = 0
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
|
|
8
8
|
No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator) # noqa: E501
|
|
9
9
|
|
|
10
|
-
The version of the OpenAPI document: 1.
|
|
10
|
+
The version of the OpenAPI document: 1.3.0
|
|
11
11
|
Generated by: https://openapi-generator.tech
|
|
12
12
|
"""
|
|
13
13
|
|