paasta-tools 1.21.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- k8s_itests/__init__.py +0 -0
- k8s_itests/test_autoscaling.py +23 -0
- k8s_itests/utils.py +38 -0
- paasta_tools/__init__.py +20 -0
- paasta_tools/adhoc_tools.py +142 -0
- paasta_tools/api/__init__.py +13 -0
- paasta_tools/api/api.py +330 -0
- paasta_tools/api/api_docs/swagger.json +2323 -0
- paasta_tools/api/client.py +106 -0
- paasta_tools/api/settings.py +33 -0
- paasta_tools/api/tweens/__init__.py +6 -0
- paasta_tools/api/tweens/auth.py +125 -0
- paasta_tools/api/tweens/profiling.py +108 -0
- paasta_tools/api/tweens/request_logger.py +124 -0
- paasta_tools/api/views/__init__.py +13 -0
- paasta_tools/api/views/autoscaler.py +100 -0
- paasta_tools/api/views/exception.py +45 -0
- paasta_tools/api/views/flink.py +73 -0
- paasta_tools/api/views/instance.py +395 -0
- paasta_tools/api/views/pause_autoscaler.py +71 -0
- paasta_tools/api/views/remote_run.py +113 -0
- paasta_tools/api/views/resources.py +76 -0
- paasta_tools/api/views/service.py +35 -0
- paasta_tools/api/views/version.py +25 -0
- paasta_tools/apply_external_resources.py +79 -0
- paasta_tools/async_utils.py +109 -0
- paasta_tools/autoscaling/__init__.py +0 -0
- paasta_tools/autoscaling/autoscaling_service_lib.py +57 -0
- paasta_tools/autoscaling/forecasting.py +106 -0
- paasta_tools/autoscaling/max_all_k8s_services.py +41 -0
- paasta_tools/autoscaling/pause_service_autoscaler.py +77 -0
- paasta_tools/autoscaling/utils.py +52 -0
- paasta_tools/bounce_lib.py +184 -0
- paasta_tools/broadcast_log_to_services.py +62 -0
- paasta_tools/cassandracluster_tools.py +210 -0
- paasta_tools/check_autoscaler_max_instances.py +212 -0
- paasta_tools/check_cassandracluster_services_replication.py +35 -0
- paasta_tools/check_flink_services_health.py +203 -0
- paasta_tools/check_kubernetes_api.py +57 -0
- paasta_tools/check_kubernetes_services_replication.py +141 -0
- paasta_tools/check_oom_events.py +244 -0
- paasta_tools/check_services_replication_tools.py +324 -0
- paasta_tools/check_spark_jobs.py +234 -0
- paasta_tools/cleanup_kubernetes_cr.py +138 -0
- paasta_tools/cleanup_kubernetes_crd.py +145 -0
- paasta_tools/cleanup_kubernetes_jobs.py +344 -0
- paasta_tools/cleanup_tron_namespaces.py +96 -0
- paasta_tools/cli/__init__.py +13 -0
- paasta_tools/cli/authentication.py +85 -0
- paasta_tools/cli/cli.py +260 -0
- paasta_tools/cli/cmds/__init__.py +13 -0
- paasta_tools/cli/cmds/autoscale.py +143 -0
- paasta_tools/cli/cmds/check.py +334 -0
- paasta_tools/cli/cmds/cook_image.py +147 -0
- paasta_tools/cli/cmds/get_docker_image.py +76 -0
- paasta_tools/cli/cmds/get_image_version.py +172 -0
- paasta_tools/cli/cmds/get_latest_deployment.py +93 -0
- paasta_tools/cli/cmds/info.py +155 -0
- paasta_tools/cli/cmds/itest.py +117 -0
- paasta_tools/cli/cmds/list.py +66 -0
- paasta_tools/cli/cmds/list_clusters.py +42 -0
- paasta_tools/cli/cmds/list_deploy_queue.py +171 -0
- paasta_tools/cli/cmds/list_namespaces.py +84 -0
- paasta_tools/cli/cmds/local_run.py +1396 -0
- paasta_tools/cli/cmds/logs.py +1601 -0
- paasta_tools/cli/cmds/mark_for_deployment.py +1988 -0
- paasta_tools/cli/cmds/mesh_status.py +174 -0
- paasta_tools/cli/cmds/pause_service_autoscaler.py +107 -0
- paasta_tools/cli/cmds/push_to_registry.py +275 -0
- paasta_tools/cli/cmds/remote_run.py +252 -0
- paasta_tools/cli/cmds/rollback.py +347 -0
- paasta_tools/cli/cmds/secret.py +549 -0
- paasta_tools/cli/cmds/security_check.py +59 -0
- paasta_tools/cli/cmds/spark_run.py +1400 -0
- paasta_tools/cli/cmds/start_stop_restart.py +401 -0
- paasta_tools/cli/cmds/status.py +2302 -0
- paasta_tools/cli/cmds/validate.py +1012 -0
- paasta_tools/cli/cmds/wait_for_deployment.py +275 -0
- paasta_tools/cli/fsm/__init__.py +13 -0
- paasta_tools/cli/fsm/autosuggest.py +82 -0
- paasta_tools/cli/fsm/template/README.md +8 -0
- paasta_tools/cli/fsm/template/cookiecutter.json +7 -0
- paasta_tools/cli/fsm/template/{{cookiecutter.service}}/kubernetes-PROD.yaml +91 -0
- paasta_tools/cli/fsm/template/{{cookiecutter.service}}/monitoring.yaml +20 -0
- paasta_tools/cli/fsm/template/{{cookiecutter.service}}/service.yaml +8 -0
- paasta_tools/cli/fsm/template/{{cookiecutter.service}}/smartstack.yaml +6 -0
- paasta_tools/cli/fsm_cmd.py +121 -0
- paasta_tools/cli/paasta_tabcomplete.sh +23 -0
- paasta_tools/cli/schemas/adhoc_schema.json +199 -0
- paasta_tools/cli/schemas/autoscaling_schema.json +91 -0
- paasta_tools/cli/schemas/autotuned_defaults/cassandracluster_schema.json +37 -0
- paasta_tools/cli/schemas/autotuned_defaults/kubernetes_schema.json +89 -0
- paasta_tools/cli/schemas/deploy_schema.json +173 -0
- paasta_tools/cli/schemas/eks_schema.json +970 -0
- paasta_tools/cli/schemas/kubernetes_schema.json +970 -0
- paasta_tools/cli/schemas/rollback_schema.json +160 -0
- paasta_tools/cli/schemas/service_schema.json +25 -0
- paasta_tools/cli/schemas/smartstack_schema.json +322 -0
- paasta_tools/cli/schemas/tron_schema.json +699 -0
- paasta_tools/cli/utils.py +1118 -0
- paasta_tools/clusterman.py +21 -0
- paasta_tools/config_utils.py +385 -0
- paasta_tools/contrib/__init__.py +0 -0
- paasta_tools/contrib/bounce_log_latency_parser.py +68 -0
- paasta_tools/contrib/check_manual_oapi_changes.sh +24 -0
- paasta_tools/contrib/check_orphans.py +306 -0
- paasta_tools/contrib/create_dynamodb_table.py +35 -0
- paasta_tools/contrib/create_paasta_playground.py +105 -0
- paasta_tools/contrib/emit_allocated_cpu_metrics.py +50 -0
- paasta_tools/contrib/get_running_task_allocation.py +346 -0
- paasta_tools/contrib/habitat_fixer.py +86 -0
- paasta_tools/contrib/ide_helper.py +316 -0
- paasta_tools/contrib/is_pod_healthy_in_proxy.py +139 -0
- paasta_tools/contrib/is_pod_healthy_in_smartstack.py +50 -0
- paasta_tools/contrib/kill_bad_containers.py +109 -0
- paasta_tools/contrib/mass-deploy-tag.sh +44 -0
- paasta_tools/contrib/mock_patch_checker.py +86 -0
- paasta_tools/contrib/paasta_update_soa_memcpu.py +520 -0
- paasta_tools/contrib/render_template.py +129 -0
- paasta_tools/contrib/rightsizer_soaconfigs_update.py +348 -0
- paasta_tools/contrib/service_shard_remove.py +157 -0
- paasta_tools/contrib/service_shard_update.py +373 -0
- paasta_tools/contrib/shared_ip_check.py +77 -0
- paasta_tools/contrib/timeouts_metrics_prom.py +64 -0
- paasta_tools/delete_kubernetes_deployments.py +89 -0
- paasta_tools/deployment_utils.py +44 -0
- paasta_tools/docker_wrapper.py +234 -0
- paasta_tools/docker_wrapper_imports.py +13 -0
- paasta_tools/drain_lib.py +351 -0
- paasta_tools/dump_locally_running_services.py +71 -0
- paasta_tools/eks_tools.py +119 -0
- paasta_tools/envoy_tools.py +373 -0
- paasta_tools/firewall.py +504 -0
- paasta_tools/firewall_logging.py +154 -0
- paasta_tools/firewall_update.py +172 -0
- paasta_tools/flink_tools.py +345 -0
- paasta_tools/flinkeks_tools.py +90 -0
- paasta_tools/frameworks/__init__.py +0 -0
- paasta_tools/frameworks/adhoc_scheduler.py +71 -0
- paasta_tools/frameworks/constraints.py +87 -0
- paasta_tools/frameworks/native_scheduler.py +652 -0
- paasta_tools/frameworks/native_service_config.py +301 -0
- paasta_tools/frameworks/task_store.py +245 -0
- paasta_tools/generate_all_deployments +9 -0
- paasta_tools/generate_authenticating_services.py +94 -0
- paasta_tools/generate_deployments_for_service.py +255 -0
- paasta_tools/generate_services_file.py +114 -0
- paasta_tools/generate_services_yaml.py +30 -0
- paasta_tools/hacheck.py +76 -0
- paasta_tools/instance/__init__.py +0 -0
- paasta_tools/instance/hpa_metrics_parser.py +122 -0
- paasta_tools/instance/kubernetes.py +1362 -0
- paasta_tools/iptables.py +240 -0
- paasta_tools/kafkacluster_tools.py +143 -0
- paasta_tools/kubernetes/__init__.py +0 -0
- paasta_tools/kubernetes/application/__init__.py +0 -0
- paasta_tools/kubernetes/application/controller_wrappers.py +476 -0
- paasta_tools/kubernetes/application/tools.py +90 -0
- paasta_tools/kubernetes/bin/__init__.py +0 -0
- paasta_tools/kubernetes/bin/kubernetes_remove_evicted_pods.py +164 -0
- paasta_tools/kubernetes/bin/paasta_cleanup_remote_run_resources.py +135 -0
- paasta_tools/kubernetes/bin/paasta_cleanup_stale_nodes.py +181 -0
- paasta_tools/kubernetes/bin/paasta_secrets_sync.py +758 -0
- paasta_tools/kubernetes/remote_run.py +558 -0
- paasta_tools/kubernetes_tools.py +4679 -0
- paasta_tools/list_kubernetes_service_instances.py +128 -0
- paasta_tools/list_tron_namespaces.py +60 -0
- paasta_tools/long_running_service_tools.py +678 -0
- paasta_tools/mac_address.py +44 -0
- paasta_tools/marathon_dashboard.py +0 -0
- paasta_tools/mesos/__init__.py +0 -0
- paasta_tools/mesos/cfg.py +46 -0
- paasta_tools/mesos/cluster.py +60 -0
- paasta_tools/mesos/exceptions.py +59 -0
- paasta_tools/mesos/framework.py +77 -0
- paasta_tools/mesos/log.py +48 -0
- paasta_tools/mesos/master.py +306 -0
- paasta_tools/mesos/mesos_file.py +169 -0
- paasta_tools/mesos/parallel.py +52 -0
- paasta_tools/mesos/slave.py +115 -0
- paasta_tools/mesos/task.py +94 -0
- paasta_tools/mesos/util.py +69 -0
- paasta_tools/mesos/zookeeper.py +37 -0
- paasta_tools/mesos_maintenance.py +848 -0
- paasta_tools/mesos_tools.py +1051 -0
- paasta_tools/metrics/__init__.py +0 -0
- paasta_tools/metrics/metastatus_lib.py +1110 -0
- paasta_tools/metrics/metrics_lib.py +217 -0
- paasta_tools/monitoring/__init__.py +13 -0
- paasta_tools/monitoring/check_k8s_api_performance.py +110 -0
- paasta_tools/monitoring_tools.py +652 -0
- paasta_tools/monkrelaycluster_tools.py +146 -0
- paasta_tools/nrtsearchservice_tools.py +143 -0
- paasta_tools/nrtsearchserviceeks_tools.py +68 -0
- paasta_tools/oom_logger.py +321 -0
- paasta_tools/paasta_deploy_tron_jobs +3 -0
- paasta_tools/paasta_execute_docker_command.py +123 -0
- paasta_tools/paasta_native_serviceinit.py +21 -0
- paasta_tools/paasta_service_config_loader.py +201 -0
- paasta_tools/paastaapi/__init__.py +29 -0
- paasta_tools/paastaapi/api/__init__.py +3 -0
- paasta_tools/paastaapi/api/autoscaler_api.py +302 -0
- paasta_tools/paastaapi/api/default_api.py +569 -0
- paasta_tools/paastaapi/api/remote_run_api.py +604 -0
- paasta_tools/paastaapi/api/resources_api.py +157 -0
- paasta_tools/paastaapi/api/service_api.py +1736 -0
- paasta_tools/paastaapi/api_client.py +818 -0
- paasta_tools/paastaapi/apis/__init__.py +22 -0
- paasta_tools/paastaapi/configuration.py +455 -0
- paasta_tools/paastaapi/exceptions.py +137 -0
- paasta_tools/paastaapi/model/__init__.py +5 -0
- paasta_tools/paastaapi/model/adhoc_launch_history.py +176 -0
- paasta_tools/paastaapi/model/autoscaler_count_msg.py +176 -0
- paasta_tools/paastaapi/model/deploy_queue.py +178 -0
- paasta_tools/paastaapi/model/deploy_queue_service_instance.py +194 -0
- paasta_tools/paastaapi/model/envoy_backend.py +185 -0
- paasta_tools/paastaapi/model/envoy_location.py +184 -0
- paasta_tools/paastaapi/model/envoy_status.py +181 -0
- paasta_tools/paastaapi/model/flink_cluster_overview.py +188 -0
- paasta_tools/paastaapi/model/flink_config.py +173 -0
- paasta_tools/paastaapi/model/flink_job.py +186 -0
- paasta_tools/paastaapi/model/flink_job_details.py +192 -0
- paasta_tools/paastaapi/model/flink_jobs.py +175 -0
- paasta_tools/paastaapi/model/float_and_error.py +173 -0
- paasta_tools/paastaapi/model/hpa_metric.py +176 -0
- paasta_tools/paastaapi/model/inline_object.py +170 -0
- paasta_tools/paastaapi/model/inline_response200.py +170 -0
- paasta_tools/paastaapi/model/inline_response2001.py +170 -0
- paasta_tools/paastaapi/model/instance_bounce_status.py +200 -0
- paasta_tools/paastaapi/model/instance_mesh_status.py +186 -0
- paasta_tools/paastaapi/model/instance_status.py +220 -0
- paasta_tools/paastaapi/model/instance_status_adhoc.py +187 -0
- paasta_tools/paastaapi/model/instance_status_cassandracluster.py +173 -0
- paasta_tools/paastaapi/model/instance_status_flink.py +173 -0
- paasta_tools/paastaapi/model/instance_status_kafkacluster.py +173 -0
- paasta_tools/paastaapi/model/instance_status_kubernetes.py +263 -0
- paasta_tools/paastaapi/model/instance_status_kubernetes_autoscaling_status.py +187 -0
- paasta_tools/paastaapi/model/instance_status_kubernetes_v2.py +197 -0
- paasta_tools/paastaapi/model/instance_status_tron.py +204 -0
- paasta_tools/paastaapi/model/instance_tasks.py +182 -0
- paasta_tools/paastaapi/model/integer_and_error.py +173 -0
- paasta_tools/paastaapi/model/kubernetes_container.py +178 -0
- paasta_tools/paastaapi/model/kubernetes_container_v2.py +219 -0
- paasta_tools/paastaapi/model/kubernetes_healthcheck.py +176 -0
- paasta_tools/paastaapi/model/kubernetes_pod.py +201 -0
- paasta_tools/paastaapi/model/kubernetes_pod_event.py +176 -0
- paasta_tools/paastaapi/model/kubernetes_pod_v2.py +213 -0
- paasta_tools/paastaapi/model/kubernetes_replica_set.py +185 -0
- paasta_tools/paastaapi/model/kubernetes_version.py +202 -0
- paasta_tools/paastaapi/model/remote_run_outcome.py +189 -0
- paasta_tools/paastaapi/model/remote_run_start.py +185 -0
- paasta_tools/paastaapi/model/remote_run_stop.py +176 -0
- paasta_tools/paastaapi/model/remote_run_token.py +173 -0
- paasta_tools/paastaapi/model/resource.py +187 -0
- paasta_tools/paastaapi/model/resource_item.py +187 -0
- paasta_tools/paastaapi/model/resource_value.py +176 -0
- paasta_tools/paastaapi/model/smartstack_backend.py +191 -0
- paasta_tools/paastaapi/model/smartstack_location.py +181 -0
- paasta_tools/paastaapi/model/smartstack_status.py +181 -0
- paasta_tools/paastaapi/model/task_tail_lines.py +176 -0
- paasta_tools/paastaapi/model_utils.py +1879 -0
- paasta_tools/paastaapi/models/__init__.py +62 -0
- paasta_tools/paastaapi/rest.py +287 -0
- paasta_tools/prune_completed_pods.py +220 -0
- paasta_tools/puppet_service_tools.py +59 -0
- paasta_tools/py.typed +1 -0
- paasta_tools/remote_git.py +127 -0
- paasta_tools/run-paasta-api-in-dev-mode.py +57 -0
- paasta_tools/run-paasta-api-playground.py +51 -0
- paasta_tools/secret_providers/__init__.py +66 -0
- paasta_tools/secret_providers/vault.py +214 -0
- paasta_tools/secret_tools.py +277 -0
- paasta_tools/setup_istio_mesh.py +353 -0
- paasta_tools/setup_kubernetes_cr.py +412 -0
- paasta_tools/setup_kubernetes_crd.py +138 -0
- paasta_tools/setup_kubernetes_internal_crd.py +154 -0
- paasta_tools/setup_kubernetes_job.py +353 -0
- paasta_tools/setup_prometheus_adapter_config.py +1028 -0
- paasta_tools/setup_tron_namespace.py +248 -0
- paasta_tools/slack.py +75 -0
- paasta_tools/smartstack_tools.py +676 -0
- paasta_tools/spark_tools.py +283 -0
- paasta_tools/synapse_srv_namespaces_fact.py +42 -0
- paasta_tools/tron/__init__.py +0 -0
- paasta_tools/tron/client.py +158 -0
- paasta_tools/tron/tron_command_context.py +194 -0
- paasta_tools/tron/tron_timeutils.py +101 -0
- paasta_tools/tron_tools.py +1448 -0
- paasta_tools/utils.py +4307 -0
- paasta_tools/yaml_tools.py +44 -0
- paasta_tools-1.21.3.data/scripts/apply_external_resources.py +79 -0
- paasta_tools-1.21.3.data/scripts/bounce_log_latency_parser.py +68 -0
- paasta_tools-1.21.3.data/scripts/check_autoscaler_max_instances.py +212 -0
- paasta_tools-1.21.3.data/scripts/check_cassandracluster_services_replication.py +35 -0
- paasta_tools-1.21.3.data/scripts/check_flink_services_health.py +203 -0
- paasta_tools-1.21.3.data/scripts/check_kubernetes_api.py +57 -0
- paasta_tools-1.21.3.data/scripts/check_kubernetes_services_replication.py +141 -0
- paasta_tools-1.21.3.data/scripts/check_manual_oapi_changes.sh +24 -0
- paasta_tools-1.21.3.data/scripts/check_oom_events.py +244 -0
- paasta_tools-1.21.3.data/scripts/check_orphans.py +306 -0
- paasta_tools-1.21.3.data/scripts/check_spark_jobs.py +234 -0
- paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_cr.py +138 -0
- paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_crd.py +145 -0
- paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_jobs.py +344 -0
- paasta_tools-1.21.3.data/scripts/create_dynamodb_table.py +35 -0
- paasta_tools-1.21.3.data/scripts/create_paasta_playground.py +105 -0
- paasta_tools-1.21.3.data/scripts/delete_kubernetes_deployments.py +89 -0
- paasta_tools-1.21.3.data/scripts/emit_allocated_cpu_metrics.py +50 -0
- paasta_tools-1.21.3.data/scripts/generate_all_deployments +9 -0
- paasta_tools-1.21.3.data/scripts/generate_authenticating_services.py +94 -0
- paasta_tools-1.21.3.data/scripts/generate_deployments_for_service.py +255 -0
- paasta_tools-1.21.3.data/scripts/generate_services_file.py +114 -0
- paasta_tools-1.21.3.data/scripts/generate_services_yaml.py +30 -0
- paasta_tools-1.21.3.data/scripts/get_running_task_allocation.py +346 -0
- paasta_tools-1.21.3.data/scripts/habitat_fixer.py +86 -0
- paasta_tools-1.21.3.data/scripts/ide_helper.py +316 -0
- paasta_tools-1.21.3.data/scripts/is_pod_healthy_in_proxy.py +139 -0
- paasta_tools-1.21.3.data/scripts/is_pod_healthy_in_smartstack.py +50 -0
- paasta_tools-1.21.3.data/scripts/kill_bad_containers.py +109 -0
- paasta_tools-1.21.3.data/scripts/kubernetes_remove_evicted_pods.py +164 -0
- paasta_tools-1.21.3.data/scripts/mass-deploy-tag.sh +44 -0
- paasta_tools-1.21.3.data/scripts/mock_patch_checker.py +86 -0
- paasta_tools-1.21.3.data/scripts/paasta_cleanup_remote_run_resources.py +135 -0
- paasta_tools-1.21.3.data/scripts/paasta_cleanup_stale_nodes.py +181 -0
- paasta_tools-1.21.3.data/scripts/paasta_deploy_tron_jobs +3 -0
- paasta_tools-1.21.3.data/scripts/paasta_execute_docker_command.py +123 -0
- paasta_tools-1.21.3.data/scripts/paasta_secrets_sync.py +758 -0
- paasta_tools-1.21.3.data/scripts/paasta_tabcomplete.sh +23 -0
- paasta_tools-1.21.3.data/scripts/paasta_update_soa_memcpu.py +520 -0
- paasta_tools-1.21.3.data/scripts/render_template.py +129 -0
- paasta_tools-1.21.3.data/scripts/rightsizer_soaconfigs_update.py +348 -0
- paasta_tools-1.21.3.data/scripts/service_shard_remove.py +157 -0
- paasta_tools-1.21.3.data/scripts/service_shard_update.py +373 -0
- paasta_tools-1.21.3.data/scripts/setup_istio_mesh.py +353 -0
- paasta_tools-1.21.3.data/scripts/setup_kubernetes_cr.py +412 -0
- paasta_tools-1.21.3.data/scripts/setup_kubernetes_crd.py +138 -0
- paasta_tools-1.21.3.data/scripts/setup_kubernetes_internal_crd.py +154 -0
- paasta_tools-1.21.3.data/scripts/setup_kubernetes_job.py +353 -0
- paasta_tools-1.21.3.data/scripts/setup_prometheus_adapter_config.py +1028 -0
- paasta_tools-1.21.3.data/scripts/shared_ip_check.py +77 -0
- paasta_tools-1.21.3.data/scripts/synapse_srv_namespaces_fact.py +42 -0
- paasta_tools-1.21.3.data/scripts/timeouts_metrics_prom.py +64 -0
- paasta_tools-1.21.3.dist-info/LICENSE +201 -0
- paasta_tools-1.21.3.dist-info/METADATA +74 -0
- paasta_tools-1.21.3.dist-info/RECORD +348 -0
- paasta_tools-1.21.3.dist-info/WHEEL +5 -0
- paasta_tools-1.21.3.dist-info/entry_points.txt +20 -0
- paasta_tools-1.21.3.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# Copyright 2015-2016 Yelp Inc.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
import logging
|
|
16
|
+
import math
|
|
17
|
+
from typing import Callable
|
|
18
|
+
from typing import Collection
|
|
19
|
+
from typing import Dict
|
|
20
|
+
from typing import Sequence
|
|
21
|
+
from typing import Set
|
|
22
|
+
|
|
23
|
+
from mypy_extensions import Arg
|
|
24
|
+
from mypy_extensions import DefaultArg
|
|
25
|
+
from mypy_extensions import TypedDict
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
log = logging.getLogger(__name__)
|
|
29
|
+
log.addHandler(logging.NullHandler())
|
|
30
|
+
logging.getLogger("requests").setLevel(logging.WARNING)
|
|
31
|
+
|
|
32
|
+
ZK_LOCK_CONNECT_TIMEOUT_S = 10.0 # seconds to wait to connect to zookeeper
|
|
33
|
+
ZK_LOCK_PATH = "/bounce"
|
|
34
|
+
WAIT_CREATE_S = 3
|
|
35
|
+
WAIT_DELETE_S = 5
|
|
36
|
+
|
|
37
|
+
BounceMethodConfigDict = TypedDict("BounceMethodConfigDict", {"instances": int})
|
|
38
|
+
|
|
39
|
+
BounceMethodResult = TypedDict(
|
|
40
|
+
"BounceMethodResult", {"create_app": bool, "tasks_to_drain": Set}
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
BounceMethod = Callable[
|
|
44
|
+
[
|
|
45
|
+
Arg(BounceMethodConfigDict, "new_config"),
|
|
46
|
+
Arg(bool, "new_app_running"),
|
|
47
|
+
Arg(Collection, "happy_new_tasks"),
|
|
48
|
+
Arg(Sequence, "old_non_draining_tasks"),
|
|
49
|
+
DefaultArg(float, "margin_factor"),
|
|
50
|
+
],
|
|
51
|
+
BounceMethodResult,
|
|
52
|
+
]
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
_bounce_method_funcs: Dict[str, BounceMethod] = {}
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def register_bounce_method(name: str) -> Callable[[BounceMethod], BounceMethod]:
|
|
59
|
+
"""Returns a decorator that registers that bounce function at a given name
|
|
60
|
+
so get_bounce_method_func can find it."""
|
|
61
|
+
|
|
62
|
+
def outer(bounce_func: BounceMethod):
|
|
63
|
+
_bounce_method_funcs[name] = bounce_func
|
|
64
|
+
return bounce_func
|
|
65
|
+
|
|
66
|
+
return outer
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def get_bounce_method_func(name) -> BounceMethod:
|
|
70
|
+
return _bounce_method_funcs[name]
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def list_bounce_methods() -> Collection[str]:
|
|
74
|
+
return _bounce_method_funcs.keys()
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@register_bounce_method("brutal")
|
|
78
|
+
def brutal_bounce(
|
|
79
|
+
new_config: BounceMethodConfigDict,
|
|
80
|
+
new_app_running: bool,
|
|
81
|
+
happy_new_tasks: Collection,
|
|
82
|
+
old_non_draining_tasks: Sequence,
|
|
83
|
+
margin_factor=1.0,
|
|
84
|
+
) -> BounceMethodResult:
|
|
85
|
+
"""Pays no regard to safety. Starts the new app if necessary, and kills any
|
|
86
|
+
old ones. Mostly meant as an example of the simplest working bounce method,
|
|
87
|
+
but might be tolerable for some services.
|
|
88
|
+
|
|
89
|
+
:param new_config: The configuration dictionary representing the desired new app.
|
|
90
|
+
:param new_app_running: Whether there is an app in Marathon with the same ID as the new config.
|
|
91
|
+
:param happy_new_tasks: Set of MarathonTasks belonging to the new application that are considered healthy and up.
|
|
92
|
+
:param old_non_draining_tasks: A sequence of tasks not belonging to the new version. Tasks should be ordered from
|
|
93
|
+
most desirable to least desirable.
|
|
94
|
+
:param margin_factor: the multiplication factor used to calculate the number of instances to be drained
|
|
95
|
+
when the crossover method is used.
|
|
96
|
+
:return: A dictionary representing the desired bounce actions and containing the following keys:
|
|
97
|
+
- create_app: True if we should start the new Marathon app, False otherwise.
|
|
98
|
+
- tasks_to_drain: a set of task objects which should be drained and killed. May be empty.
|
|
99
|
+
"""
|
|
100
|
+
return {
|
|
101
|
+
"create_app": not new_app_running,
|
|
102
|
+
"tasks_to_drain": set(old_non_draining_tasks),
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@register_bounce_method("upthendown")
|
|
107
|
+
def upthendown_bounce(
|
|
108
|
+
new_config: BounceMethodConfigDict,
|
|
109
|
+
new_app_running: bool,
|
|
110
|
+
happy_new_tasks: Collection,
|
|
111
|
+
old_non_draining_tasks: Sequence,
|
|
112
|
+
margin_factor=1.0,
|
|
113
|
+
) -> BounceMethodResult:
|
|
114
|
+
"""Starts a new app if necessary; only kills old apps once all the requested tasks for the new version are running.
|
|
115
|
+
|
|
116
|
+
See the docstring for brutal_bounce() for parameters and return value.
|
|
117
|
+
"""
|
|
118
|
+
if new_app_running and len(happy_new_tasks) == new_config["instances"]:
|
|
119
|
+
return {"create_app": False, "tasks_to_drain": set(old_non_draining_tasks)}
|
|
120
|
+
else:
|
|
121
|
+
return {"create_app": not new_app_running, "tasks_to_drain": set()}
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
@register_bounce_method("crossover")
|
|
125
|
+
def crossover_bounce(
|
|
126
|
+
new_config: BounceMethodConfigDict,
|
|
127
|
+
new_app_running: bool,
|
|
128
|
+
happy_new_tasks: Collection,
|
|
129
|
+
old_non_draining_tasks: Sequence,
|
|
130
|
+
margin_factor=1.0,
|
|
131
|
+
) -> BounceMethodResult:
|
|
132
|
+
"""Starts a new app if necessary; slowly kills old apps as instances of the new app become happy.
|
|
133
|
+
|
|
134
|
+
See the docstring for brutal_bounce() for parameters and return value.
|
|
135
|
+
"""
|
|
136
|
+
|
|
137
|
+
assert margin_factor > 0
|
|
138
|
+
assert margin_factor <= 1
|
|
139
|
+
|
|
140
|
+
needed_count = max(
|
|
141
|
+
int(math.ceil(new_config["instances"] * margin_factor)) - len(happy_new_tasks),
|
|
142
|
+
0,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
return {
|
|
146
|
+
"create_app": not new_app_running,
|
|
147
|
+
"tasks_to_drain": set(old_non_draining_tasks[needed_count:]),
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
@register_bounce_method("downthenup")
|
|
152
|
+
def downthenup_bounce(
|
|
153
|
+
new_config: BounceMethodConfigDict,
|
|
154
|
+
new_app_running: bool,
|
|
155
|
+
happy_new_tasks: Collection,
|
|
156
|
+
old_non_draining_tasks: Sequence,
|
|
157
|
+
margin_factor=1.0,
|
|
158
|
+
) -> BounceMethodResult:
|
|
159
|
+
"""Stops any old apps and waits for them to die before starting a new one.
|
|
160
|
+
|
|
161
|
+
See the docstring for brutal_bounce() for parameters and return value.
|
|
162
|
+
"""
|
|
163
|
+
return {
|
|
164
|
+
"create_app": not old_non_draining_tasks and not new_app_running,
|
|
165
|
+
"tasks_to_drain": set(old_non_draining_tasks),
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
@register_bounce_method("down")
|
|
170
|
+
def down_bounce(
|
|
171
|
+
new_config: BounceMethodConfigDict,
|
|
172
|
+
new_app_running: bool,
|
|
173
|
+
happy_new_tasks: Collection,
|
|
174
|
+
old_non_draining_tasks: Sequence,
|
|
175
|
+
margin_factor=1.0,
|
|
176
|
+
) -> BounceMethodResult:
|
|
177
|
+
"""
|
|
178
|
+
Stops old apps, doesn't start any new apps.
|
|
179
|
+
Used for the graceful_app_drain script.
|
|
180
|
+
"""
|
|
181
|
+
return {"create_app": False, "tasks_to_drain": set(old_non_draining_tasks)}
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# Copyright 2015-2016 Yelp Inc.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
import sys
|
|
16
|
+
|
|
17
|
+
from paasta_tools.kubernetes_tools import get_all_kubernetes_services_running_here
|
|
18
|
+
from paasta_tools.mesos_tools import MesosSlaveConnectionError
|
|
19
|
+
from paasta_tools.tron_tools import tron_jobs_running_here
|
|
20
|
+
from paasta_tools.utils import _log
|
|
21
|
+
from paasta_tools.utils import DEFAULT_SOA_DIR
|
|
22
|
+
from paasta_tools.utils import load_system_paasta_config
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def broadcast_log_all_services_running_here(line: str, soa_dir=DEFAULT_SOA_DIR) -> None:
|
|
26
|
+
"""Log a line of text to paasta logs of all services running on this host.
|
|
27
|
+
|
|
28
|
+
:param line: text to log
|
|
29
|
+
"""
|
|
30
|
+
system_paasta_config = load_system_paasta_config()
|
|
31
|
+
cluster = system_paasta_config.get_cluster()
|
|
32
|
+
services = get_all_services_running_here(cluster, soa_dir)
|
|
33
|
+
for service, instance, _ in services:
|
|
34
|
+
_log(
|
|
35
|
+
line=line,
|
|
36
|
+
service=service,
|
|
37
|
+
instance=instance,
|
|
38
|
+
component="monitoring",
|
|
39
|
+
cluster=cluster,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def get_all_services_running_here(cluster, soa_dir):
|
|
44
|
+
try:
|
|
45
|
+
tron_services = tron_jobs_running_here()
|
|
46
|
+
except MesosSlaveConnectionError:
|
|
47
|
+
tron_services = []
|
|
48
|
+
|
|
49
|
+
try:
|
|
50
|
+
kubernetes_services = get_all_kubernetes_services_running_here()
|
|
51
|
+
except Exception:
|
|
52
|
+
kubernetes_services = []
|
|
53
|
+
|
|
54
|
+
return tron_services + kubernetes_services
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def main() -> None:
|
|
58
|
+
broadcast_log_all_services_running_here(sys.stdin.read().strip())
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
if __name__ == "__main__":
|
|
62
|
+
main()
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
# Copyright 2015-2019 Yelp Inc.
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
import logging
|
|
14
|
+
from typing import List
|
|
15
|
+
from typing import Mapping
|
|
16
|
+
from typing import Optional
|
|
17
|
+
|
|
18
|
+
import service_configuration_lib
|
|
19
|
+
|
|
20
|
+
from paasta_tools.kubernetes_tools import sanitise_kubernetes_name
|
|
21
|
+
from paasta_tools.kubernetes_tools import sanitised_cr_name
|
|
22
|
+
from paasta_tools.long_running_service_tools import LongRunningServiceConfig
|
|
23
|
+
from paasta_tools.long_running_service_tools import LongRunningServiceConfigDict
|
|
24
|
+
from paasta_tools.utils import BranchDictV2
|
|
25
|
+
from paasta_tools.utils import compose_job_id
|
|
26
|
+
from paasta_tools.utils import decompose_job_id
|
|
27
|
+
from paasta_tools.utils import deep_merge_dictionaries
|
|
28
|
+
from paasta_tools.utils import DEFAULT_SOA_DIR
|
|
29
|
+
from paasta_tools.utils import InvalidJobNameError
|
|
30
|
+
from paasta_tools.utils import load_service_instance_config
|
|
31
|
+
from paasta_tools.utils import load_v2_deployments_json
|
|
32
|
+
|
|
33
|
+
KUBERNETES_NAMESPACE = "paasta-cassandraclusters"
|
|
34
|
+
|
|
35
|
+
log = logging.getLogger(__name__)
|
|
36
|
+
log.addHandler(logging.NullHandler())
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class CassandraClusterDeploymentConfigDict(LongRunningServiceConfigDict, total=False):
|
|
40
|
+
replicas: int
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class CassandraClusterDeploymentConfig(LongRunningServiceConfig):
|
|
44
|
+
config_dict: CassandraClusterDeploymentConfigDict
|
|
45
|
+
|
|
46
|
+
config_filename_prefix = "cassandracluster"
|
|
47
|
+
|
|
48
|
+
def __init__(
|
|
49
|
+
self,
|
|
50
|
+
service: str,
|
|
51
|
+
cluster: str,
|
|
52
|
+
instance: str,
|
|
53
|
+
config_dict: CassandraClusterDeploymentConfigDict,
|
|
54
|
+
branch_dict: Optional[BranchDictV2],
|
|
55
|
+
soa_dir: str = DEFAULT_SOA_DIR,
|
|
56
|
+
) -> None:
|
|
57
|
+
|
|
58
|
+
super().__init__(
|
|
59
|
+
cluster=cluster,
|
|
60
|
+
instance=instance,
|
|
61
|
+
service=service,
|
|
62
|
+
soa_dir=soa_dir,
|
|
63
|
+
config_dict=config_dict,
|
|
64
|
+
branch_dict=branch_dict,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
def get_service_name_smartstack(self) -> str:
|
|
68
|
+
"""
|
|
69
|
+
To support apollo we always register in
|
|
70
|
+
cassandra_<cluster>.main
|
|
71
|
+
"""
|
|
72
|
+
return "cassandra_" + self.get_instance()
|
|
73
|
+
|
|
74
|
+
def get_nerve_namespace(self) -> str:
|
|
75
|
+
"""
|
|
76
|
+
To support apollo we always register in
|
|
77
|
+
cassandra_<cluster>.main
|
|
78
|
+
"""
|
|
79
|
+
return "main"
|
|
80
|
+
|
|
81
|
+
def get_registrations(self) -> List[str]:
|
|
82
|
+
"""
|
|
83
|
+
To support apollo we always register in
|
|
84
|
+
cassandra_<cluster>.main
|
|
85
|
+
"""
|
|
86
|
+
registrations = self.config_dict.get("registrations", [])
|
|
87
|
+
for registration in registrations:
|
|
88
|
+
try:
|
|
89
|
+
decompose_job_id(registration)
|
|
90
|
+
except InvalidJobNameError:
|
|
91
|
+
log.error(
|
|
92
|
+
"Provided registration {} for service "
|
|
93
|
+
"{} is invalid".format(registration, self.service)
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
return registrations or [
|
|
97
|
+
compose_job_id(self.get_service_name_smartstack(), "main")
|
|
98
|
+
]
|
|
99
|
+
|
|
100
|
+
def get_kubernetes_namespace(self) -> str:
|
|
101
|
+
return KUBERNETES_NAMESPACE
|
|
102
|
+
|
|
103
|
+
def get_namespace(self) -> str:
|
|
104
|
+
"""Get namespace from config, default to 'paasta'"""
|
|
105
|
+
return self.config_dict.get("namespace", KUBERNETES_NAMESPACE)
|
|
106
|
+
|
|
107
|
+
def get_instances(self, with_limit: bool = True) -> int:
|
|
108
|
+
return self.config_dict.get("replicas", 1)
|
|
109
|
+
|
|
110
|
+
def get_bounce_method(self) -> str:
|
|
111
|
+
"""
|
|
112
|
+
This isn't really true since we use the StatefulSet RollingUpdate strategy
|
|
113
|
+
However for the paasta-api we need to map to a paasta bounce method and
|
|
114
|
+
crossover is the closest
|
|
115
|
+
"""
|
|
116
|
+
return "crossover"
|
|
117
|
+
|
|
118
|
+
def get_sanitised_service_name(self) -> str:
|
|
119
|
+
return sanitise_kubernetes_name(self.get_service())
|
|
120
|
+
|
|
121
|
+
def get_sanitised_instance_name(self) -> str:
|
|
122
|
+
return sanitise_kubernetes_name(self.get_instance())
|
|
123
|
+
|
|
124
|
+
def get_sanitised_deployment_name(self) -> str:
|
|
125
|
+
return self.get_sanitised_instance_name()
|
|
126
|
+
|
|
127
|
+
def validate(
|
|
128
|
+
self,
|
|
129
|
+
params: List[str] = [
|
|
130
|
+
"cpus",
|
|
131
|
+
"security",
|
|
132
|
+
"dependencies_reference",
|
|
133
|
+
"deploy_group",
|
|
134
|
+
],
|
|
135
|
+
) -> List[str]:
|
|
136
|
+
# Use InstanceConfig to validate shared config keys like cpus and mem
|
|
137
|
+
# TODO: add mem back to this list once we fix PAASTA-15582 and
|
|
138
|
+
# move to using the same units as flink/marathon etc.
|
|
139
|
+
error_msgs = super().validate(params=params)
|
|
140
|
+
|
|
141
|
+
if error_msgs:
|
|
142
|
+
name = self.get_instance()
|
|
143
|
+
return [f"{name}: {msg}" for msg in error_msgs]
|
|
144
|
+
else:
|
|
145
|
+
return []
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def load_cassandracluster_instance_config(
|
|
149
|
+
service: str,
|
|
150
|
+
instance: str,
|
|
151
|
+
cluster: str,
|
|
152
|
+
load_deployments: bool = True,
|
|
153
|
+
soa_dir: str = DEFAULT_SOA_DIR,
|
|
154
|
+
) -> CassandraClusterDeploymentConfig:
|
|
155
|
+
"""Read a service instance's configuration for CassandraCluster.
|
|
156
|
+
|
|
157
|
+
If a branch isn't specified for a config, the 'branch' key defaults to
|
|
158
|
+
paasta-${cluster}.${instance}.
|
|
159
|
+
|
|
160
|
+
:param service: The service name
|
|
161
|
+
:param instance: The instance of the service to retrieve
|
|
162
|
+
:param cluster: The cluster to read the configuration for
|
|
163
|
+
:param load_deployments: A boolean indicating if the corresponding deployments.json for this service
|
|
164
|
+
should also be loaded
|
|
165
|
+
:param soa_dir: The SOA configuration directory to read from
|
|
166
|
+
:returns: A dictionary of whatever was in the config for the service instance"""
|
|
167
|
+
general_config = service_configuration_lib.read_service_configuration(
|
|
168
|
+
service, soa_dir=soa_dir
|
|
169
|
+
)
|
|
170
|
+
instance_config = load_service_instance_config(
|
|
171
|
+
service, instance, "cassandracluster", cluster, soa_dir=soa_dir
|
|
172
|
+
)
|
|
173
|
+
general_config = deep_merge_dictionaries(
|
|
174
|
+
overrides=instance_config, defaults=general_config
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
branch_dict: Optional[BranchDictV2] = None
|
|
178
|
+
if load_deployments:
|
|
179
|
+
deployments_json = load_v2_deployments_json(service, soa_dir=soa_dir)
|
|
180
|
+
temp_instance_config = CassandraClusterDeploymentConfig(
|
|
181
|
+
service=service,
|
|
182
|
+
cluster=cluster,
|
|
183
|
+
instance=instance,
|
|
184
|
+
config_dict=general_config,
|
|
185
|
+
branch_dict=None,
|
|
186
|
+
soa_dir=soa_dir,
|
|
187
|
+
)
|
|
188
|
+
branch = temp_instance_config.get_branch()
|
|
189
|
+
deploy_group = temp_instance_config.get_deploy_group()
|
|
190
|
+
branch_dict = deployments_json.get_branch_dict(service, branch, deploy_group)
|
|
191
|
+
|
|
192
|
+
return CassandraClusterDeploymentConfig(
|
|
193
|
+
service=service,
|
|
194
|
+
cluster=cluster,
|
|
195
|
+
instance=instance,
|
|
196
|
+
config_dict=general_config,
|
|
197
|
+
branch_dict=branch_dict,
|
|
198
|
+
soa_dir=soa_dir,
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
# TODO: read this from CRD in service configs
|
|
203
|
+
def cr_id(service: str, instance: str) -> Mapping[str, str]:
|
|
204
|
+
return dict(
|
|
205
|
+
group="yelp.com",
|
|
206
|
+
version="v1alpha1",
|
|
207
|
+
namespace=KUBERNETES_NAMESPACE,
|
|
208
|
+
plural="cassandraclusters",
|
|
209
|
+
name=sanitised_cr_name(service, instance),
|
|
210
|
+
)
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
import argparse
|
|
3
|
+
import asyncio
|
|
4
|
+
import logging
|
|
5
|
+
from typing import Type
|
|
6
|
+
|
|
7
|
+
import pysensu_yelp
|
|
8
|
+
|
|
9
|
+
from paasta_tools.eks_tools import EksDeploymentConfig
|
|
10
|
+
from paasta_tools.instance import kubernetes as pik
|
|
11
|
+
from paasta_tools.kubernetes_tools import get_kubernetes_app_name
|
|
12
|
+
from paasta_tools.kubernetes_tools import KubeClient
|
|
13
|
+
from paasta_tools.kubernetes_tools import KubernetesDeploymentConfig
|
|
14
|
+
from paasta_tools.metrics.metastatus_lib import suffixed_number_value
|
|
15
|
+
from paasta_tools.monitoring_tools import send_event
|
|
16
|
+
from paasta_tools.paasta_service_config_loader import PaastaServiceConfigLoader
|
|
17
|
+
from paasta_tools.utils import DEFAULT_SOA_DIR
|
|
18
|
+
from paasta_tools.utils import list_services
|
|
19
|
+
from paasta_tools.utils import load_system_paasta_config
|
|
20
|
+
from paasta_tools.utils import SystemPaastaConfig
|
|
21
|
+
|
|
22
|
+
log = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def parse_args():
|
|
26
|
+
parser = argparse.ArgumentParser(
|
|
27
|
+
description=(
|
|
28
|
+
"Check all autoscaled services to see if they're at their max_instances. If"
|
|
29
|
+
" so, send an alert if their utilization is above"
|
|
30
|
+
" max_instances_alert_threshold."
|
|
31
|
+
)
|
|
32
|
+
)
|
|
33
|
+
parser.add_argument(
|
|
34
|
+
"-d",
|
|
35
|
+
"--soa-dir",
|
|
36
|
+
dest="soa_dir",
|
|
37
|
+
default=DEFAULT_SOA_DIR,
|
|
38
|
+
help="Use a different soa config directory",
|
|
39
|
+
)
|
|
40
|
+
parser.add_argument(
|
|
41
|
+
"--dry-run",
|
|
42
|
+
dest="dry_run",
|
|
43
|
+
action="store_true",
|
|
44
|
+
help="Print Sensu alert events instead of sending them",
|
|
45
|
+
)
|
|
46
|
+
return parser.parse_args()
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
async def check_max_instances(
|
|
50
|
+
soa_dir: str,
|
|
51
|
+
cluster: str,
|
|
52
|
+
instance_type_class: Type[KubernetesDeploymentConfig],
|
|
53
|
+
system_paasta_config: SystemPaastaConfig,
|
|
54
|
+
dry_run: bool = False,
|
|
55
|
+
):
|
|
56
|
+
kube_client = KubeClient()
|
|
57
|
+
for service in list_services(soa_dir=soa_dir):
|
|
58
|
+
service_config = PaastaServiceConfigLoader(service=service, soa_dir=soa_dir)
|
|
59
|
+
for job_config in service_config.instance_configs(
|
|
60
|
+
cluster=cluster, instance_type_class=instance_type_class
|
|
61
|
+
):
|
|
62
|
+
instance = job_config.get_instance()
|
|
63
|
+
if not job_config.get_autoscaling_metric_spec(
|
|
64
|
+
name=get_kubernetes_app_name(service, instance),
|
|
65
|
+
cluster=cluster,
|
|
66
|
+
kube_client=kube_client,
|
|
67
|
+
namespace=job_config.get_namespace(),
|
|
68
|
+
):
|
|
69
|
+
# Not an instance that uses HPA, don't check.
|
|
70
|
+
# TODO: should we send status=0 here, in case someone disables autoscaling for their service / changes
|
|
71
|
+
# to bespoke autoscaler?
|
|
72
|
+
continue
|
|
73
|
+
|
|
74
|
+
if not job_config.get_docker_image():
|
|
75
|
+
# skip services that haven't been marked for deployment yet.
|
|
76
|
+
continue
|
|
77
|
+
|
|
78
|
+
autoscaling_status = await pik.autoscaling_status(
|
|
79
|
+
kube_client=kube_client,
|
|
80
|
+
job_config=job_config,
|
|
81
|
+
namespace=job_config.get_namespace(),
|
|
82
|
+
)
|
|
83
|
+
if autoscaling_status["min_instances"] == -1:
|
|
84
|
+
log.warning(
|
|
85
|
+
f"HPA {job_config.get_sanitised_deployment_name()} not found."
|
|
86
|
+
)
|
|
87
|
+
continue
|
|
88
|
+
|
|
89
|
+
if (
|
|
90
|
+
autoscaling_status["min_instances"]
|
|
91
|
+
== autoscaling_status["max_instances"]
|
|
92
|
+
) and "canary" in instance:
|
|
93
|
+
status = pysensu_yelp.Status.OK
|
|
94
|
+
output = (
|
|
95
|
+
f"Not checking {service}.{instance} as the instance name contains"
|
|
96
|
+
' "canary" and min_instances == max_instances.'
|
|
97
|
+
)
|
|
98
|
+
elif (
|
|
99
|
+
autoscaling_status["desired_replicas"]
|
|
100
|
+
>= autoscaling_status["max_instances"]
|
|
101
|
+
):
|
|
102
|
+
|
|
103
|
+
metrics_provider_configs = job_config.get_autoscaling_params()[
|
|
104
|
+
"metrics_providers"
|
|
105
|
+
]
|
|
106
|
+
|
|
107
|
+
status = pysensu_yelp.Status.UNKNOWN
|
|
108
|
+
output = "how are there no metrics for this thing?"
|
|
109
|
+
|
|
110
|
+
# This makes an assumption that the metrics currently used by the HPA are exactly the same order (and
|
|
111
|
+
# length) as the list of metrics_providers dictionaries. This should generally be true, but between
|
|
112
|
+
# yelpsoa-configs being pushed and the HPA actually being updated it may not be true. This might cause
|
|
113
|
+
# spurious alerts, but hopefully the frequency is low. We can add some safeguards if it's a problem.
|
|
114
|
+
# (E.g. smarter matching between the status dicts and the config dicts, or bailing/not alerting if the
|
|
115
|
+
# lists aren't the same lengths.)
|
|
116
|
+
for metric, metrics_provider_config in zip(
|
|
117
|
+
autoscaling_status["metrics"], metrics_provider_configs
|
|
118
|
+
):
|
|
119
|
+
|
|
120
|
+
setpoint = metrics_provider_config["setpoint"]
|
|
121
|
+
threshold = metrics_provider_config.get(
|
|
122
|
+
"max_instances_alert_threshold",
|
|
123
|
+
setpoint,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
try:
|
|
127
|
+
current_value = suffixed_number_value(metric["current_value"])
|
|
128
|
+
target_value = suffixed_number_value(metric["target_value"])
|
|
129
|
+
except KeyError:
|
|
130
|
+
# we likely couldn't find values for the current metric from autoscaling status
|
|
131
|
+
# if this is the only metric, we will return UNKNOWN+this error
|
|
132
|
+
# suggest fixing their autoscaling config
|
|
133
|
+
output = f'{service}.{instance}: Service is at max_instances, and there is an error fetching your {metrics_provider_config["type"]} metric. Check your autoscaling configs or reach out to #paasta.'
|
|
134
|
+
else:
|
|
135
|
+
# target_value can be 100*setpoint (for cpu), 1 (for uwsgi, piscina, gunicorn,
|
|
136
|
+
# active_requests), or setpoint (for promql).
|
|
137
|
+
# Here we divide current_value by target_value to find the ratio of utilization to setpoint,
|
|
138
|
+
# and then multiply by setpoint to find the actual utilization in the same units as setpoint.
|
|
139
|
+
utilization = setpoint * current_value / target_value
|
|
140
|
+
|
|
141
|
+
if threshold == setpoint:
|
|
142
|
+
threshold_description = f"setpoint ({threshold})"
|
|
143
|
+
else:
|
|
144
|
+
threshold_description = (
|
|
145
|
+
f"max_instances_alert_threshold ({threshold})"
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
if utilization > threshold:
|
|
149
|
+
status = pysensu_yelp.Status.CRITICAL
|
|
150
|
+
output = (
|
|
151
|
+
f"{service}.{instance}: Service is at max_instances, and"
|
|
152
|
+
f" utilization ({utilization}) is greater than"
|
|
153
|
+
f" {threshold_description}."
|
|
154
|
+
)
|
|
155
|
+
else:
|
|
156
|
+
status = pysensu_yelp.Status.OK
|
|
157
|
+
output = (
|
|
158
|
+
f"{service}.{instance}: Service is at max_instances, but"
|
|
159
|
+
f" utilization ({utilization}) is less than"
|
|
160
|
+
f" {threshold_description}."
|
|
161
|
+
)
|
|
162
|
+
else:
|
|
163
|
+
status = pysensu_yelp.Status.OK
|
|
164
|
+
output = f"{service}.{instance} is below max_instances."
|
|
165
|
+
|
|
166
|
+
monitoring_overrides = job_config.get_monitoring()
|
|
167
|
+
monitoring_overrides.update(
|
|
168
|
+
{
|
|
169
|
+
"page": False, # TODO: remove this line once this alert has been deployed for a little while.
|
|
170
|
+
"runbook": "y/check-autoscaler-max-instances",
|
|
171
|
+
"realert_every": 60, # The check runs once a minute, so this would realert every hour.
|
|
172
|
+
"tip": (
|
|
173
|
+
"The autoscaler wants to scale up to handle additional load"
|
|
174
|
+
" because your service is overloaded, but cannot scale any"
|
|
175
|
+
" higher because of max_instances. You may want to bump"
|
|
176
|
+
" max_instances. To make this alert quieter, adjust"
|
|
177
|
+
" autoscaling.metrics_providers[n].max_instances_alert_threshold in yelpsoa-configs."
|
|
178
|
+
),
|
|
179
|
+
}
|
|
180
|
+
)
|
|
181
|
+
send_event(
|
|
182
|
+
service,
|
|
183
|
+
check_name=f"check_autoscaler_max_instances.{service}.{instance}",
|
|
184
|
+
overrides=monitoring_overrides,
|
|
185
|
+
status=status,
|
|
186
|
+
output=output,
|
|
187
|
+
soa_dir=soa_dir,
|
|
188
|
+
ttl=None,
|
|
189
|
+
cluster=cluster,
|
|
190
|
+
system_paasta_config=system_paasta_config,
|
|
191
|
+
dry_run=dry_run,
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def main():
|
|
196
|
+
args = parse_args()
|
|
197
|
+
system_paasta_config = load_system_paasta_config()
|
|
198
|
+
|
|
199
|
+
for instance_type_class in [KubernetesDeploymentConfig, EksDeploymentConfig]:
|
|
200
|
+
asyncio.run(
|
|
201
|
+
check_max_instances(
|
|
202
|
+
soa_dir=args.soa_dir,
|
|
203
|
+
cluster=system_paasta_config.get_cluster(),
|
|
204
|
+
instance_type_class=instance_type_class,
|
|
205
|
+
system_paasta_config=system_paasta_config,
|
|
206
|
+
dry_run=args.dry_run,
|
|
207
|
+
)
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
if __name__ == "__main__":
|
|
212
|
+
main()
|