paasta-tools 1.21.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- k8s_itests/__init__.py +0 -0
- k8s_itests/test_autoscaling.py +23 -0
- k8s_itests/utils.py +38 -0
- paasta_tools/__init__.py +20 -0
- paasta_tools/adhoc_tools.py +142 -0
- paasta_tools/api/__init__.py +13 -0
- paasta_tools/api/api.py +330 -0
- paasta_tools/api/api_docs/swagger.json +2323 -0
- paasta_tools/api/client.py +106 -0
- paasta_tools/api/settings.py +33 -0
- paasta_tools/api/tweens/__init__.py +6 -0
- paasta_tools/api/tweens/auth.py +125 -0
- paasta_tools/api/tweens/profiling.py +108 -0
- paasta_tools/api/tweens/request_logger.py +124 -0
- paasta_tools/api/views/__init__.py +13 -0
- paasta_tools/api/views/autoscaler.py +100 -0
- paasta_tools/api/views/exception.py +45 -0
- paasta_tools/api/views/flink.py +73 -0
- paasta_tools/api/views/instance.py +395 -0
- paasta_tools/api/views/pause_autoscaler.py +71 -0
- paasta_tools/api/views/remote_run.py +113 -0
- paasta_tools/api/views/resources.py +76 -0
- paasta_tools/api/views/service.py +35 -0
- paasta_tools/api/views/version.py +25 -0
- paasta_tools/apply_external_resources.py +79 -0
- paasta_tools/async_utils.py +109 -0
- paasta_tools/autoscaling/__init__.py +0 -0
- paasta_tools/autoscaling/autoscaling_service_lib.py +57 -0
- paasta_tools/autoscaling/forecasting.py +106 -0
- paasta_tools/autoscaling/max_all_k8s_services.py +41 -0
- paasta_tools/autoscaling/pause_service_autoscaler.py +77 -0
- paasta_tools/autoscaling/utils.py +52 -0
- paasta_tools/bounce_lib.py +184 -0
- paasta_tools/broadcast_log_to_services.py +62 -0
- paasta_tools/cassandracluster_tools.py +210 -0
- paasta_tools/check_autoscaler_max_instances.py +212 -0
- paasta_tools/check_cassandracluster_services_replication.py +35 -0
- paasta_tools/check_flink_services_health.py +203 -0
- paasta_tools/check_kubernetes_api.py +57 -0
- paasta_tools/check_kubernetes_services_replication.py +141 -0
- paasta_tools/check_oom_events.py +244 -0
- paasta_tools/check_services_replication_tools.py +324 -0
- paasta_tools/check_spark_jobs.py +234 -0
- paasta_tools/cleanup_kubernetes_cr.py +138 -0
- paasta_tools/cleanup_kubernetes_crd.py +145 -0
- paasta_tools/cleanup_kubernetes_jobs.py +344 -0
- paasta_tools/cleanup_tron_namespaces.py +96 -0
- paasta_tools/cli/__init__.py +13 -0
- paasta_tools/cli/authentication.py +85 -0
- paasta_tools/cli/cli.py +260 -0
- paasta_tools/cli/cmds/__init__.py +13 -0
- paasta_tools/cli/cmds/autoscale.py +143 -0
- paasta_tools/cli/cmds/check.py +334 -0
- paasta_tools/cli/cmds/cook_image.py +147 -0
- paasta_tools/cli/cmds/get_docker_image.py +76 -0
- paasta_tools/cli/cmds/get_image_version.py +172 -0
- paasta_tools/cli/cmds/get_latest_deployment.py +93 -0
- paasta_tools/cli/cmds/info.py +155 -0
- paasta_tools/cli/cmds/itest.py +117 -0
- paasta_tools/cli/cmds/list.py +66 -0
- paasta_tools/cli/cmds/list_clusters.py +42 -0
- paasta_tools/cli/cmds/list_deploy_queue.py +171 -0
- paasta_tools/cli/cmds/list_namespaces.py +84 -0
- paasta_tools/cli/cmds/local_run.py +1396 -0
- paasta_tools/cli/cmds/logs.py +1601 -0
- paasta_tools/cli/cmds/mark_for_deployment.py +1988 -0
- paasta_tools/cli/cmds/mesh_status.py +174 -0
- paasta_tools/cli/cmds/pause_service_autoscaler.py +107 -0
- paasta_tools/cli/cmds/push_to_registry.py +275 -0
- paasta_tools/cli/cmds/remote_run.py +252 -0
- paasta_tools/cli/cmds/rollback.py +347 -0
- paasta_tools/cli/cmds/secret.py +549 -0
- paasta_tools/cli/cmds/security_check.py +59 -0
- paasta_tools/cli/cmds/spark_run.py +1400 -0
- paasta_tools/cli/cmds/start_stop_restart.py +401 -0
- paasta_tools/cli/cmds/status.py +2302 -0
- paasta_tools/cli/cmds/validate.py +1012 -0
- paasta_tools/cli/cmds/wait_for_deployment.py +275 -0
- paasta_tools/cli/fsm/__init__.py +13 -0
- paasta_tools/cli/fsm/autosuggest.py +82 -0
- paasta_tools/cli/fsm/template/README.md +8 -0
- paasta_tools/cli/fsm/template/cookiecutter.json +7 -0
- paasta_tools/cli/fsm/template/{{cookiecutter.service}}/kubernetes-PROD.yaml +91 -0
- paasta_tools/cli/fsm/template/{{cookiecutter.service}}/monitoring.yaml +20 -0
- paasta_tools/cli/fsm/template/{{cookiecutter.service}}/service.yaml +8 -0
- paasta_tools/cli/fsm/template/{{cookiecutter.service}}/smartstack.yaml +6 -0
- paasta_tools/cli/fsm_cmd.py +121 -0
- paasta_tools/cli/paasta_tabcomplete.sh +23 -0
- paasta_tools/cli/schemas/adhoc_schema.json +199 -0
- paasta_tools/cli/schemas/autoscaling_schema.json +91 -0
- paasta_tools/cli/schemas/autotuned_defaults/cassandracluster_schema.json +37 -0
- paasta_tools/cli/schemas/autotuned_defaults/kubernetes_schema.json +89 -0
- paasta_tools/cli/schemas/deploy_schema.json +173 -0
- paasta_tools/cli/schemas/eks_schema.json +970 -0
- paasta_tools/cli/schemas/kubernetes_schema.json +970 -0
- paasta_tools/cli/schemas/rollback_schema.json +160 -0
- paasta_tools/cli/schemas/service_schema.json +25 -0
- paasta_tools/cli/schemas/smartstack_schema.json +322 -0
- paasta_tools/cli/schemas/tron_schema.json +699 -0
- paasta_tools/cli/utils.py +1118 -0
- paasta_tools/clusterman.py +21 -0
- paasta_tools/config_utils.py +385 -0
- paasta_tools/contrib/__init__.py +0 -0
- paasta_tools/contrib/bounce_log_latency_parser.py +68 -0
- paasta_tools/contrib/check_manual_oapi_changes.sh +24 -0
- paasta_tools/contrib/check_orphans.py +306 -0
- paasta_tools/contrib/create_dynamodb_table.py +35 -0
- paasta_tools/contrib/create_paasta_playground.py +105 -0
- paasta_tools/contrib/emit_allocated_cpu_metrics.py +50 -0
- paasta_tools/contrib/get_running_task_allocation.py +346 -0
- paasta_tools/contrib/habitat_fixer.py +86 -0
- paasta_tools/contrib/ide_helper.py +316 -0
- paasta_tools/contrib/is_pod_healthy_in_proxy.py +139 -0
- paasta_tools/contrib/is_pod_healthy_in_smartstack.py +50 -0
- paasta_tools/contrib/kill_bad_containers.py +109 -0
- paasta_tools/contrib/mass-deploy-tag.sh +44 -0
- paasta_tools/contrib/mock_patch_checker.py +86 -0
- paasta_tools/contrib/paasta_update_soa_memcpu.py +520 -0
- paasta_tools/contrib/render_template.py +129 -0
- paasta_tools/contrib/rightsizer_soaconfigs_update.py +348 -0
- paasta_tools/contrib/service_shard_remove.py +157 -0
- paasta_tools/contrib/service_shard_update.py +373 -0
- paasta_tools/contrib/shared_ip_check.py +77 -0
- paasta_tools/contrib/timeouts_metrics_prom.py +64 -0
- paasta_tools/delete_kubernetes_deployments.py +89 -0
- paasta_tools/deployment_utils.py +44 -0
- paasta_tools/docker_wrapper.py +234 -0
- paasta_tools/docker_wrapper_imports.py +13 -0
- paasta_tools/drain_lib.py +351 -0
- paasta_tools/dump_locally_running_services.py +71 -0
- paasta_tools/eks_tools.py +119 -0
- paasta_tools/envoy_tools.py +373 -0
- paasta_tools/firewall.py +504 -0
- paasta_tools/firewall_logging.py +154 -0
- paasta_tools/firewall_update.py +172 -0
- paasta_tools/flink_tools.py +345 -0
- paasta_tools/flinkeks_tools.py +90 -0
- paasta_tools/frameworks/__init__.py +0 -0
- paasta_tools/frameworks/adhoc_scheduler.py +71 -0
- paasta_tools/frameworks/constraints.py +87 -0
- paasta_tools/frameworks/native_scheduler.py +652 -0
- paasta_tools/frameworks/native_service_config.py +301 -0
- paasta_tools/frameworks/task_store.py +245 -0
- paasta_tools/generate_all_deployments +9 -0
- paasta_tools/generate_authenticating_services.py +94 -0
- paasta_tools/generate_deployments_for_service.py +255 -0
- paasta_tools/generate_services_file.py +114 -0
- paasta_tools/generate_services_yaml.py +30 -0
- paasta_tools/hacheck.py +76 -0
- paasta_tools/instance/__init__.py +0 -0
- paasta_tools/instance/hpa_metrics_parser.py +122 -0
- paasta_tools/instance/kubernetes.py +1362 -0
- paasta_tools/iptables.py +240 -0
- paasta_tools/kafkacluster_tools.py +143 -0
- paasta_tools/kubernetes/__init__.py +0 -0
- paasta_tools/kubernetes/application/__init__.py +0 -0
- paasta_tools/kubernetes/application/controller_wrappers.py +476 -0
- paasta_tools/kubernetes/application/tools.py +90 -0
- paasta_tools/kubernetes/bin/__init__.py +0 -0
- paasta_tools/kubernetes/bin/kubernetes_remove_evicted_pods.py +164 -0
- paasta_tools/kubernetes/bin/paasta_cleanup_remote_run_resources.py +135 -0
- paasta_tools/kubernetes/bin/paasta_cleanup_stale_nodes.py +181 -0
- paasta_tools/kubernetes/bin/paasta_secrets_sync.py +758 -0
- paasta_tools/kubernetes/remote_run.py +558 -0
- paasta_tools/kubernetes_tools.py +4679 -0
- paasta_tools/list_kubernetes_service_instances.py +128 -0
- paasta_tools/list_tron_namespaces.py +60 -0
- paasta_tools/long_running_service_tools.py +678 -0
- paasta_tools/mac_address.py +44 -0
- paasta_tools/marathon_dashboard.py +0 -0
- paasta_tools/mesos/__init__.py +0 -0
- paasta_tools/mesos/cfg.py +46 -0
- paasta_tools/mesos/cluster.py +60 -0
- paasta_tools/mesos/exceptions.py +59 -0
- paasta_tools/mesos/framework.py +77 -0
- paasta_tools/mesos/log.py +48 -0
- paasta_tools/mesos/master.py +306 -0
- paasta_tools/mesos/mesos_file.py +169 -0
- paasta_tools/mesos/parallel.py +52 -0
- paasta_tools/mesos/slave.py +115 -0
- paasta_tools/mesos/task.py +94 -0
- paasta_tools/mesos/util.py +69 -0
- paasta_tools/mesos/zookeeper.py +37 -0
- paasta_tools/mesos_maintenance.py +848 -0
- paasta_tools/mesos_tools.py +1051 -0
- paasta_tools/metrics/__init__.py +0 -0
- paasta_tools/metrics/metastatus_lib.py +1110 -0
- paasta_tools/metrics/metrics_lib.py +217 -0
- paasta_tools/monitoring/__init__.py +13 -0
- paasta_tools/monitoring/check_k8s_api_performance.py +110 -0
- paasta_tools/monitoring_tools.py +652 -0
- paasta_tools/monkrelaycluster_tools.py +146 -0
- paasta_tools/nrtsearchservice_tools.py +143 -0
- paasta_tools/nrtsearchserviceeks_tools.py +68 -0
- paasta_tools/oom_logger.py +321 -0
- paasta_tools/paasta_deploy_tron_jobs +3 -0
- paasta_tools/paasta_execute_docker_command.py +123 -0
- paasta_tools/paasta_native_serviceinit.py +21 -0
- paasta_tools/paasta_service_config_loader.py +201 -0
- paasta_tools/paastaapi/__init__.py +29 -0
- paasta_tools/paastaapi/api/__init__.py +3 -0
- paasta_tools/paastaapi/api/autoscaler_api.py +302 -0
- paasta_tools/paastaapi/api/default_api.py +569 -0
- paasta_tools/paastaapi/api/remote_run_api.py +604 -0
- paasta_tools/paastaapi/api/resources_api.py +157 -0
- paasta_tools/paastaapi/api/service_api.py +1736 -0
- paasta_tools/paastaapi/api_client.py +818 -0
- paasta_tools/paastaapi/apis/__init__.py +22 -0
- paasta_tools/paastaapi/configuration.py +455 -0
- paasta_tools/paastaapi/exceptions.py +137 -0
- paasta_tools/paastaapi/model/__init__.py +5 -0
- paasta_tools/paastaapi/model/adhoc_launch_history.py +176 -0
- paasta_tools/paastaapi/model/autoscaler_count_msg.py +176 -0
- paasta_tools/paastaapi/model/deploy_queue.py +178 -0
- paasta_tools/paastaapi/model/deploy_queue_service_instance.py +194 -0
- paasta_tools/paastaapi/model/envoy_backend.py +185 -0
- paasta_tools/paastaapi/model/envoy_location.py +184 -0
- paasta_tools/paastaapi/model/envoy_status.py +181 -0
- paasta_tools/paastaapi/model/flink_cluster_overview.py +188 -0
- paasta_tools/paastaapi/model/flink_config.py +173 -0
- paasta_tools/paastaapi/model/flink_job.py +186 -0
- paasta_tools/paastaapi/model/flink_job_details.py +192 -0
- paasta_tools/paastaapi/model/flink_jobs.py +175 -0
- paasta_tools/paastaapi/model/float_and_error.py +173 -0
- paasta_tools/paastaapi/model/hpa_metric.py +176 -0
- paasta_tools/paastaapi/model/inline_object.py +170 -0
- paasta_tools/paastaapi/model/inline_response200.py +170 -0
- paasta_tools/paastaapi/model/inline_response2001.py +170 -0
- paasta_tools/paastaapi/model/instance_bounce_status.py +200 -0
- paasta_tools/paastaapi/model/instance_mesh_status.py +186 -0
- paasta_tools/paastaapi/model/instance_status.py +220 -0
- paasta_tools/paastaapi/model/instance_status_adhoc.py +187 -0
- paasta_tools/paastaapi/model/instance_status_cassandracluster.py +173 -0
- paasta_tools/paastaapi/model/instance_status_flink.py +173 -0
- paasta_tools/paastaapi/model/instance_status_kafkacluster.py +173 -0
- paasta_tools/paastaapi/model/instance_status_kubernetes.py +263 -0
- paasta_tools/paastaapi/model/instance_status_kubernetes_autoscaling_status.py +187 -0
- paasta_tools/paastaapi/model/instance_status_kubernetes_v2.py +197 -0
- paasta_tools/paastaapi/model/instance_status_tron.py +204 -0
- paasta_tools/paastaapi/model/instance_tasks.py +182 -0
- paasta_tools/paastaapi/model/integer_and_error.py +173 -0
- paasta_tools/paastaapi/model/kubernetes_container.py +178 -0
- paasta_tools/paastaapi/model/kubernetes_container_v2.py +219 -0
- paasta_tools/paastaapi/model/kubernetes_healthcheck.py +176 -0
- paasta_tools/paastaapi/model/kubernetes_pod.py +201 -0
- paasta_tools/paastaapi/model/kubernetes_pod_event.py +176 -0
- paasta_tools/paastaapi/model/kubernetes_pod_v2.py +213 -0
- paasta_tools/paastaapi/model/kubernetes_replica_set.py +185 -0
- paasta_tools/paastaapi/model/kubernetes_version.py +202 -0
- paasta_tools/paastaapi/model/remote_run_outcome.py +189 -0
- paasta_tools/paastaapi/model/remote_run_start.py +185 -0
- paasta_tools/paastaapi/model/remote_run_stop.py +176 -0
- paasta_tools/paastaapi/model/remote_run_token.py +173 -0
- paasta_tools/paastaapi/model/resource.py +187 -0
- paasta_tools/paastaapi/model/resource_item.py +187 -0
- paasta_tools/paastaapi/model/resource_value.py +176 -0
- paasta_tools/paastaapi/model/smartstack_backend.py +191 -0
- paasta_tools/paastaapi/model/smartstack_location.py +181 -0
- paasta_tools/paastaapi/model/smartstack_status.py +181 -0
- paasta_tools/paastaapi/model/task_tail_lines.py +176 -0
- paasta_tools/paastaapi/model_utils.py +1879 -0
- paasta_tools/paastaapi/models/__init__.py +62 -0
- paasta_tools/paastaapi/rest.py +287 -0
- paasta_tools/prune_completed_pods.py +220 -0
- paasta_tools/puppet_service_tools.py +59 -0
- paasta_tools/py.typed +1 -0
- paasta_tools/remote_git.py +127 -0
- paasta_tools/run-paasta-api-in-dev-mode.py +57 -0
- paasta_tools/run-paasta-api-playground.py +51 -0
- paasta_tools/secret_providers/__init__.py +66 -0
- paasta_tools/secret_providers/vault.py +214 -0
- paasta_tools/secret_tools.py +277 -0
- paasta_tools/setup_istio_mesh.py +353 -0
- paasta_tools/setup_kubernetes_cr.py +412 -0
- paasta_tools/setup_kubernetes_crd.py +138 -0
- paasta_tools/setup_kubernetes_internal_crd.py +154 -0
- paasta_tools/setup_kubernetes_job.py +353 -0
- paasta_tools/setup_prometheus_adapter_config.py +1028 -0
- paasta_tools/setup_tron_namespace.py +248 -0
- paasta_tools/slack.py +75 -0
- paasta_tools/smartstack_tools.py +676 -0
- paasta_tools/spark_tools.py +283 -0
- paasta_tools/synapse_srv_namespaces_fact.py +42 -0
- paasta_tools/tron/__init__.py +0 -0
- paasta_tools/tron/client.py +158 -0
- paasta_tools/tron/tron_command_context.py +194 -0
- paasta_tools/tron/tron_timeutils.py +101 -0
- paasta_tools/tron_tools.py +1448 -0
- paasta_tools/utils.py +4307 -0
- paasta_tools/yaml_tools.py +44 -0
- paasta_tools-1.21.3.data/scripts/apply_external_resources.py +79 -0
- paasta_tools-1.21.3.data/scripts/bounce_log_latency_parser.py +68 -0
- paasta_tools-1.21.3.data/scripts/check_autoscaler_max_instances.py +212 -0
- paasta_tools-1.21.3.data/scripts/check_cassandracluster_services_replication.py +35 -0
- paasta_tools-1.21.3.data/scripts/check_flink_services_health.py +203 -0
- paasta_tools-1.21.3.data/scripts/check_kubernetes_api.py +57 -0
- paasta_tools-1.21.3.data/scripts/check_kubernetes_services_replication.py +141 -0
- paasta_tools-1.21.3.data/scripts/check_manual_oapi_changes.sh +24 -0
- paasta_tools-1.21.3.data/scripts/check_oom_events.py +244 -0
- paasta_tools-1.21.3.data/scripts/check_orphans.py +306 -0
- paasta_tools-1.21.3.data/scripts/check_spark_jobs.py +234 -0
- paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_cr.py +138 -0
- paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_crd.py +145 -0
- paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_jobs.py +344 -0
- paasta_tools-1.21.3.data/scripts/create_dynamodb_table.py +35 -0
- paasta_tools-1.21.3.data/scripts/create_paasta_playground.py +105 -0
- paasta_tools-1.21.3.data/scripts/delete_kubernetes_deployments.py +89 -0
- paasta_tools-1.21.3.data/scripts/emit_allocated_cpu_metrics.py +50 -0
- paasta_tools-1.21.3.data/scripts/generate_all_deployments +9 -0
- paasta_tools-1.21.3.data/scripts/generate_authenticating_services.py +94 -0
- paasta_tools-1.21.3.data/scripts/generate_deployments_for_service.py +255 -0
- paasta_tools-1.21.3.data/scripts/generate_services_file.py +114 -0
- paasta_tools-1.21.3.data/scripts/generate_services_yaml.py +30 -0
- paasta_tools-1.21.3.data/scripts/get_running_task_allocation.py +346 -0
- paasta_tools-1.21.3.data/scripts/habitat_fixer.py +86 -0
- paasta_tools-1.21.3.data/scripts/ide_helper.py +316 -0
- paasta_tools-1.21.3.data/scripts/is_pod_healthy_in_proxy.py +139 -0
- paasta_tools-1.21.3.data/scripts/is_pod_healthy_in_smartstack.py +50 -0
- paasta_tools-1.21.3.data/scripts/kill_bad_containers.py +109 -0
- paasta_tools-1.21.3.data/scripts/kubernetes_remove_evicted_pods.py +164 -0
- paasta_tools-1.21.3.data/scripts/mass-deploy-tag.sh +44 -0
- paasta_tools-1.21.3.data/scripts/mock_patch_checker.py +86 -0
- paasta_tools-1.21.3.data/scripts/paasta_cleanup_remote_run_resources.py +135 -0
- paasta_tools-1.21.3.data/scripts/paasta_cleanup_stale_nodes.py +181 -0
- paasta_tools-1.21.3.data/scripts/paasta_deploy_tron_jobs +3 -0
- paasta_tools-1.21.3.data/scripts/paasta_execute_docker_command.py +123 -0
- paasta_tools-1.21.3.data/scripts/paasta_secrets_sync.py +758 -0
- paasta_tools-1.21.3.data/scripts/paasta_tabcomplete.sh +23 -0
- paasta_tools-1.21.3.data/scripts/paasta_update_soa_memcpu.py +520 -0
- paasta_tools-1.21.3.data/scripts/render_template.py +129 -0
- paasta_tools-1.21.3.data/scripts/rightsizer_soaconfigs_update.py +348 -0
- paasta_tools-1.21.3.data/scripts/service_shard_remove.py +157 -0
- paasta_tools-1.21.3.data/scripts/service_shard_update.py +373 -0
- paasta_tools-1.21.3.data/scripts/setup_istio_mesh.py +353 -0
- paasta_tools-1.21.3.data/scripts/setup_kubernetes_cr.py +412 -0
- paasta_tools-1.21.3.data/scripts/setup_kubernetes_crd.py +138 -0
- paasta_tools-1.21.3.data/scripts/setup_kubernetes_internal_crd.py +154 -0
- paasta_tools-1.21.3.data/scripts/setup_kubernetes_job.py +353 -0
- paasta_tools-1.21.3.data/scripts/setup_prometheus_adapter_config.py +1028 -0
- paasta_tools-1.21.3.data/scripts/shared_ip_check.py +77 -0
- paasta_tools-1.21.3.data/scripts/synapse_srv_namespaces_fact.py +42 -0
- paasta_tools-1.21.3.data/scripts/timeouts_metrics_prom.py +64 -0
- paasta_tools-1.21.3.dist-info/LICENSE +201 -0
- paasta_tools-1.21.3.dist-info/METADATA +74 -0
- paasta_tools-1.21.3.dist-info/RECORD +348 -0
- paasta_tools-1.21.3.dist-info/WHEEL +5 -0
- paasta_tools-1.21.3.dist-info/entry_points.txt +20 -0
- paasta_tools-1.21.3.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,1028 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# Copyright 2015-2021 Yelp Inc.
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
"""
|
|
15
|
+
Small utility to update the Prometheus adapter's config to match soaconfigs.
|
|
16
|
+
"""
|
|
17
|
+
import argparse
|
|
18
|
+
import logging
|
|
19
|
+
import sys
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import cast
|
|
22
|
+
from typing import Dict
|
|
23
|
+
from typing import List
|
|
24
|
+
from typing import Optional
|
|
25
|
+
|
|
26
|
+
import ruamel.yaml as yaml
|
|
27
|
+
from kubernetes.client import V1ConfigMap
|
|
28
|
+
from kubernetes.client import V1DeleteOptions
|
|
29
|
+
from kubernetes.client import V1ObjectMeta
|
|
30
|
+
from kubernetes.client.rest import ApiException
|
|
31
|
+
from mypy_extensions import TypedDict
|
|
32
|
+
|
|
33
|
+
from paasta_tools.autoscaling.utils import MetricsProviderDict
|
|
34
|
+
from paasta_tools.eks_tools import EksDeploymentConfig
|
|
35
|
+
from paasta_tools.kubernetes_tools import ensure_namespace
|
|
36
|
+
from paasta_tools.kubernetes_tools import get_kubernetes_app_name
|
|
37
|
+
from paasta_tools.kubernetes_tools import KubeClient
|
|
38
|
+
from paasta_tools.kubernetes_tools import KubernetesDeploymentConfig
|
|
39
|
+
from paasta_tools.kubernetes_tools import V1Pod
|
|
40
|
+
from paasta_tools.long_running_service_tools import ALL_METRICS_PROVIDERS
|
|
41
|
+
from paasta_tools.long_running_service_tools import (
|
|
42
|
+
DEFAULT_ACTIVE_REQUESTS_AUTOSCALING_MOVING_AVERAGE_WINDOW,
|
|
43
|
+
)
|
|
44
|
+
from paasta_tools.long_running_service_tools import (
|
|
45
|
+
DEFAULT_DESIRED_ACTIVE_REQUESTS_PER_REPLICA,
|
|
46
|
+
)
|
|
47
|
+
from paasta_tools.long_running_service_tools import (
|
|
48
|
+
DEFAULT_GUNICORN_AUTOSCALING_MOVING_AVERAGE_WINDOW,
|
|
49
|
+
)
|
|
50
|
+
from paasta_tools.long_running_service_tools import (
|
|
51
|
+
DEFAULT_PISCINA_AUTOSCALING_MOVING_AVERAGE_WINDOW,
|
|
52
|
+
)
|
|
53
|
+
from paasta_tools.long_running_service_tools import (
|
|
54
|
+
DEFAULT_UWSGI_AUTOSCALING_MOVING_AVERAGE_WINDOW,
|
|
55
|
+
)
|
|
56
|
+
from paasta_tools.long_running_service_tools import METRICS_PROVIDER_ACTIVE_REQUESTS
|
|
57
|
+
from paasta_tools.long_running_service_tools import METRICS_PROVIDER_CPU
|
|
58
|
+
from paasta_tools.long_running_service_tools import METRICS_PROVIDER_GUNICORN
|
|
59
|
+
from paasta_tools.long_running_service_tools import METRICS_PROVIDER_PISCINA
|
|
60
|
+
from paasta_tools.long_running_service_tools import METRICS_PROVIDER_PROMQL
|
|
61
|
+
from paasta_tools.long_running_service_tools import METRICS_PROVIDER_UWSGI
|
|
62
|
+
from paasta_tools.long_running_service_tools import METRICS_PROVIDER_UWSGI_V2
|
|
63
|
+
from paasta_tools.paasta_service_config_loader import PaastaServiceConfigLoader
|
|
64
|
+
from paasta_tools.utils import DEFAULT_SOA_DIR
|
|
65
|
+
from paasta_tools.utils import get_services_for_cluster
|
|
66
|
+
|
|
67
|
+
log = logging.getLogger(__name__)
|
|
68
|
+
|
|
69
|
+
PROMETHEUS_ADAPTER_CONFIGMAP_NAMESPACE = "custom-metrics"
|
|
70
|
+
PROMETHEUS_ADAPTER_POD_NAMESPACE = "custom-metrics"
|
|
71
|
+
PROMETHEUS_ADAPTER_CONFIGMAP_NAME = "adapter-config"
|
|
72
|
+
PROMETHEUS_ADAPTER_CONFIGMAP_FILENAME = "config.yaml"
|
|
73
|
+
PROMETHEUS_ADAPTER_POD_NAME_PREFIX = "custom-metrics-apiserver"
|
|
74
|
+
PROMETHEUS_ADAPTER_POD_PHASES_TO_REMOVE = (
|
|
75
|
+
"Running",
|
|
76
|
+
"Pending",
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
DEFAULT_SCRAPE_PERIOD_S = 10
|
|
80
|
+
DEFAULT_EXTRAPOLATION_PERIODS = 10
|
|
81
|
+
DEFAULT_EXTRAPOLATION_TIME = DEFAULT_SCRAPE_PERIOD_S * DEFAULT_EXTRAPOLATION_PERIODS
|
|
82
|
+
|
|
83
|
+
K8S_INSTANCE_TYPE_CLASSES = (
|
|
84
|
+
KubernetesDeploymentConfig,
|
|
85
|
+
EksDeploymentConfig,
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class PrometheusAdapterResourceConfig(TypedDict, total=False):
|
|
90
|
+
"""
|
|
91
|
+
Configuration for resource association in the Prometheus adapter.
|
|
92
|
+
|
|
93
|
+
NOTE: this dict is not total as there's no existing way in mypy to annotate
|
|
94
|
+
that you only need one of these keys can be populated (and that both can be
|
|
95
|
+
populated if so desired)
|
|
96
|
+
|
|
97
|
+
For more information, see:
|
|
98
|
+
https://github.com/kubernetes-sigs/prometheus-adapter/blob/master/docs/config.md#association
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
# this should be a Go template string (e.g., "kube_<<.Resource>>") and will be used to
|
|
102
|
+
# extract k8s resources from a label
|
|
103
|
+
template: str
|
|
104
|
+
# if your labels don't have a common prefix (or if you only want to inspect certain labels)
|
|
105
|
+
# you'd want to use an override - these are of the form:
|
|
106
|
+
# {
|
|
107
|
+
# "$SOME_PROMETHEUS_LABEL": {
|
|
108
|
+
# "group": "$SOME_K8S_GROUP",
|
|
109
|
+
# "resource": "$SOME_K8S_RESOURCE",
|
|
110
|
+
# }
|
|
111
|
+
# }
|
|
112
|
+
overrides: Dict[str, Dict[str, str]]
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class PrometheusAdapterRule(TypedDict):
|
|
116
|
+
"""
|
|
117
|
+
Typed version of the (minimal) set of Prometheus adapter rule configuration options that we use
|
|
118
|
+
|
|
119
|
+
For more information, see:
|
|
120
|
+
https://github.com/kubernetes-sigs/prometheus-adapter/blob/master/docs/config.md
|
|
121
|
+
"""
|
|
122
|
+
|
|
123
|
+
# used for discovering what resources should be scaled
|
|
124
|
+
seriesQuery: str
|
|
125
|
+
# configuration for how to expose this rule to the HPA
|
|
126
|
+
name: Dict[str, str]
|
|
127
|
+
# used to associate metrics with k8s resources
|
|
128
|
+
resources: PrometheusAdapterResourceConfig
|
|
129
|
+
# the actual query we want to send to Prometheus to use for scaling
|
|
130
|
+
metricsQuery: str
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class PrometheusAdapterConfig(TypedDict):
|
|
134
|
+
"""
|
|
135
|
+
Typed version of the Prometheus adapter configuration dictionary.
|
|
136
|
+
"""
|
|
137
|
+
|
|
138
|
+
rules: List[PrometheusAdapterRule]
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def parse_args() -> argparse.Namespace:
|
|
142
|
+
parser = argparse.ArgumentParser(
|
|
143
|
+
description="Syncs the Prometheus metric adapter config with soaconfigs.",
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
parser.add_argument(
|
|
147
|
+
"-d",
|
|
148
|
+
"--soa-dir",
|
|
149
|
+
dest="soa_dir",
|
|
150
|
+
metavar="SOA_DIR",
|
|
151
|
+
default=Path(DEFAULT_SOA_DIR),
|
|
152
|
+
help="Directory to read service configs from. Default is %(default)s.",
|
|
153
|
+
type=Path,
|
|
154
|
+
)
|
|
155
|
+
# TODO: do we need to be able to pass multiple clusters in?
|
|
156
|
+
parser.add_argument(
|
|
157
|
+
"-c",
|
|
158
|
+
"--cluster",
|
|
159
|
+
dest="cluster",
|
|
160
|
+
help="PaaSTA cluster to generate configs for.",
|
|
161
|
+
required=True,
|
|
162
|
+
)
|
|
163
|
+
parser.add_argument(
|
|
164
|
+
"-v",
|
|
165
|
+
"--verbose",
|
|
166
|
+
action="store_true",
|
|
167
|
+
dest="verbose",
|
|
168
|
+
default=False,
|
|
169
|
+
help="Enable verbose logging.",
|
|
170
|
+
)
|
|
171
|
+
parser.add_argument(
|
|
172
|
+
"--dry-run",
|
|
173
|
+
dest="dry_run",
|
|
174
|
+
action="store_true",
|
|
175
|
+
default=False,
|
|
176
|
+
help="Enable verbose logging.",
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
return parser.parse_args()
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _minify_promql(query: str) -> str:
|
|
183
|
+
"""
|
|
184
|
+
Given a PromQL query, return the same query with most whitespace collapsed.
|
|
185
|
+
|
|
186
|
+
This is useful for allowing us to nicely format queries in code, but minimize the size of our
|
|
187
|
+
queries when they're actually sent to Prometheus by the adapter.
|
|
188
|
+
"""
|
|
189
|
+
trimmed_query = []
|
|
190
|
+
# while we could potentially do some regex magic, we want to ensure
|
|
191
|
+
# that we don't mess up any labels (even though they really shouldn't
|
|
192
|
+
# have any whitespace in them in the first place) - thus we just just
|
|
193
|
+
# strip any leading/trailing whitespace and leave everything else alone
|
|
194
|
+
for line in query.split("\n"):
|
|
195
|
+
trimmed_query.append(line.strip())
|
|
196
|
+
|
|
197
|
+
return (" ".join(trimmed_query)).strip()
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def create_instance_scaling_rule(
|
|
201
|
+
service: str,
|
|
202
|
+
instance_config: KubernetesDeploymentConfig,
|
|
203
|
+
metrics_provider_config: MetricsProviderDict,
|
|
204
|
+
paasta_cluster: str,
|
|
205
|
+
) -> Optional[PrometheusAdapterRule]:
|
|
206
|
+
if metrics_provider_config["type"] == METRICS_PROVIDER_CPU:
|
|
207
|
+
log.debug("[{service}] prometheus-based CPU scaling is not supported")
|
|
208
|
+
return None
|
|
209
|
+
if metrics_provider_config["type"] == METRICS_PROVIDER_UWSGI:
|
|
210
|
+
return create_instance_uwsgi_scaling_rule(
|
|
211
|
+
service, instance_config, metrics_provider_config, paasta_cluster
|
|
212
|
+
)
|
|
213
|
+
if metrics_provider_config["type"] == METRICS_PROVIDER_UWSGI_V2:
|
|
214
|
+
return create_instance_uwsgi_v2_scaling_rule(
|
|
215
|
+
service, instance_config, metrics_provider_config, paasta_cluster
|
|
216
|
+
)
|
|
217
|
+
if metrics_provider_config["type"] == METRICS_PROVIDER_PISCINA:
|
|
218
|
+
return create_instance_piscina_scaling_rule(
|
|
219
|
+
service, instance_config, metrics_provider_config, paasta_cluster
|
|
220
|
+
)
|
|
221
|
+
if metrics_provider_config["type"] == METRICS_PROVIDER_GUNICORN:
|
|
222
|
+
return create_instance_gunicorn_scaling_rule(
|
|
223
|
+
service, instance_config, metrics_provider_config, paasta_cluster
|
|
224
|
+
)
|
|
225
|
+
if metrics_provider_config["type"] == METRICS_PROVIDER_ACTIVE_REQUESTS:
|
|
226
|
+
return create_instance_active_requests_scaling_rule(
|
|
227
|
+
service, instance_config, metrics_provider_config, paasta_cluster
|
|
228
|
+
)
|
|
229
|
+
if metrics_provider_config["type"] == METRICS_PROVIDER_PROMQL:
|
|
230
|
+
return create_instance_arbitrary_promql_scaling_rule(
|
|
231
|
+
service, instance_config, metrics_provider_config, paasta_cluster
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
raise ValueError(
|
|
235
|
+
f"unknown metrics provider type: {metrics_provider_config['type']}"
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def create_instance_active_requests_scaling_rule(
|
|
240
|
+
service: str,
|
|
241
|
+
instance_config: KubernetesDeploymentConfig,
|
|
242
|
+
metrics_provider_config: MetricsProviderDict,
|
|
243
|
+
paasta_cluster: str,
|
|
244
|
+
) -> PrometheusAdapterRule:
|
|
245
|
+
"""
|
|
246
|
+
Creates a Prometheus adapter rule config for a given service instance.
|
|
247
|
+
"""
|
|
248
|
+
instance = instance_config.instance
|
|
249
|
+
namespace = instance_config.get_namespace()
|
|
250
|
+
desired_active_requests_per_replica = metrics_provider_config.get(
|
|
251
|
+
"desired_active_requests_per_replica",
|
|
252
|
+
DEFAULT_DESIRED_ACTIVE_REQUESTS_PER_REPLICA,
|
|
253
|
+
)
|
|
254
|
+
moving_average_window = metrics_provider_config.get(
|
|
255
|
+
"moving_average_window_seconds",
|
|
256
|
+
DEFAULT_ACTIVE_REQUESTS_AUTOSCALING_MOVING_AVERAGE_WINDOW,
|
|
257
|
+
)
|
|
258
|
+
deployment_name = get_kubernetes_app_name(service=service, instance=instance)
|
|
259
|
+
|
|
260
|
+
# In order for autoscaling to work safely while a service migrates from one namespace to another, the HPA needs to
|
|
261
|
+
# make sure that the deployment in the new namespace is scaled up enough to handle _all_ the load.
|
|
262
|
+
# This is because once the new deployment is 100% healthy, cleanup_kubernetes_job will delete the deployment out of
|
|
263
|
+
# the old namespace all at once, suddenly putting all the load onto the deployment in the new namespace.
|
|
264
|
+
# To ensure this, we must:
|
|
265
|
+
# - DO NOT filter on namespace in worker_filter_terms (which is used when calculating desired_instances).
|
|
266
|
+
# - DO filter on namespace in replica_filter_terms (which is used to calculate current_replicas).
|
|
267
|
+
# This makes sure that desired_instances includes load from all namespaces, but that the scaling ratio calculated
|
|
268
|
+
# by (desired_instances / current_replicas) is meaningful for each namespace.
|
|
269
|
+
worker_filter_terms = f"paasta_cluster='{paasta_cluster}',paasta_service='{service}',paasta_instance='{instance}'"
|
|
270
|
+
replica_filter_terms = f"paasta_cluster='{paasta_cluster}',deployment='{deployment_name}',namespace='{namespace}'"
|
|
271
|
+
|
|
272
|
+
current_replicas = f"""
|
|
273
|
+
sum(
|
|
274
|
+
label_join(
|
|
275
|
+
(
|
|
276
|
+
kube_deployment_spec_replicas{{{replica_filter_terms}}} >= 0
|
|
277
|
+
or
|
|
278
|
+
max_over_time(
|
|
279
|
+
kube_deployment_spec_replicas{{{replica_filter_terms}}}[{DEFAULT_EXTRAPOLATION_TIME}s]
|
|
280
|
+
)
|
|
281
|
+
),
|
|
282
|
+
"kube_deployment", "", "deployment"
|
|
283
|
+
)
|
|
284
|
+
) by (kube_deployment)
|
|
285
|
+
"""
|
|
286
|
+
|
|
287
|
+
# Envoy tracks metrics at the smartstack namespace level. In most cases the paasta instance name matches the smartstack namespace.
|
|
288
|
+
# In rare cases, there are custom registration added to instance configs.
|
|
289
|
+
# If there is no custom registration the envoy and instance names match and no need to update the worker_filter_terms.
|
|
290
|
+
# If there is a single custom registration for an instance, we will process the registration value and extract the value to be used.
|
|
291
|
+
# The registrations usually follow the format of {service_name}.{smartstack_name}. Hence we split the string by dot and extract the last token.
|
|
292
|
+
# More than one custom registrations are not supported and config validation takes care of rejecting such configs.
|
|
293
|
+
registrations = instance_config.get_registrations()
|
|
294
|
+
|
|
295
|
+
mesh_instance = registrations[0].split(".")[-1] if len(registrations) == 1 else None
|
|
296
|
+
envoy_filter_terms = f"paasta_cluster='{paasta_cluster}',paasta_service='{service}',paasta_instance='{mesh_instance or instance}'"
|
|
297
|
+
|
|
298
|
+
# envoy-based metrics have no labels corresponding to the k8s resources that they
|
|
299
|
+
# front, but we can trivially add one in since our deployment names are of the form
|
|
300
|
+
# {service_name}-{instance_name} - which are both things in `worker_filter_terms` so
|
|
301
|
+
# it's safe to unconditionally add.
|
|
302
|
+
# This is necessary as otherwise the HPA/prometheus adapter does not know what these
|
|
303
|
+
# metrics are for.
|
|
304
|
+
total_load = f"""
|
|
305
|
+
(
|
|
306
|
+
sum(
|
|
307
|
+
label_replace(
|
|
308
|
+
paasta_instance:envoy_cluster__egress_cluster_upstream_rq_active{{{envoy_filter_terms}}},
|
|
309
|
+
"kube_deployment", "{deployment_name}", "", ""
|
|
310
|
+
)
|
|
311
|
+
) by (kube_deployment)
|
|
312
|
+
)
|
|
313
|
+
"""
|
|
314
|
+
desired_instances_at_each_point_in_time = f"""
|
|
315
|
+
{total_load} / {desired_active_requests_per_replica}
|
|
316
|
+
"""
|
|
317
|
+
desired_instances = f"""
|
|
318
|
+
avg_over_time(
|
|
319
|
+
(
|
|
320
|
+
{desired_instances_at_each_point_in_time}
|
|
321
|
+
)[{moving_average_window}s:]
|
|
322
|
+
)
|
|
323
|
+
"""
|
|
324
|
+
|
|
325
|
+
# The prometheus HPA adapter needs kube_deployment and kube_namespace labels attached to the metrics its scaling on.
|
|
326
|
+
# The envoy-based metrics have no labels corresponding to the k8s resources, so we can add them in.
|
|
327
|
+
metrics_query = f"""
|
|
328
|
+
label_replace(
|
|
329
|
+
label_replace(
|
|
330
|
+
{desired_instances} / {current_replicas},
|
|
331
|
+
"kube_deployment", "{deployment_name}", "", ""
|
|
332
|
+
),
|
|
333
|
+
"kube_namespace", "{namespace}", "", ""
|
|
334
|
+
)
|
|
335
|
+
"""
|
|
336
|
+
series_query = f"""
|
|
337
|
+
k8s:deployment:pods_status_ready{{{worker_filter_terms}}}
|
|
338
|
+
"""
|
|
339
|
+
|
|
340
|
+
metric_name = f"{deployment_name}-active-requests-prom"
|
|
341
|
+
|
|
342
|
+
return {
|
|
343
|
+
"name": {"as": metric_name},
|
|
344
|
+
"seriesQuery": _minify_promql(series_query),
|
|
345
|
+
"resources": {"template": "kube_<<.Resource>>"},
|
|
346
|
+
"metricsQuery": _minify_promql(metrics_query),
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
def create_instance_uwsgi_scaling_rule(
|
|
351
|
+
service: str,
|
|
352
|
+
instance_config: KubernetesDeploymentConfig,
|
|
353
|
+
metrics_provider_config: MetricsProviderDict,
|
|
354
|
+
paasta_cluster: str,
|
|
355
|
+
) -> PrometheusAdapterRule:
|
|
356
|
+
"""
|
|
357
|
+
Creates a Prometheus adapter rule config for a given service instance.
|
|
358
|
+
"""
|
|
359
|
+
instance = instance_config.instance
|
|
360
|
+
namespace = instance_config.get_namespace()
|
|
361
|
+
setpoint = metrics_provider_config["setpoint"]
|
|
362
|
+
moving_average_window = metrics_provider_config.get(
|
|
363
|
+
"moving_average_window_seconds", DEFAULT_UWSGI_AUTOSCALING_MOVING_AVERAGE_WINDOW
|
|
364
|
+
)
|
|
365
|
+
deployment_name = get_kubernetes_app_name(service=service, instance=instance)
|
|
366
|
+
|
|
367
|
+
# In order for autoscaling to work safely while a service migrates from one namespace to another, the HPA needs to
|
|
368
|
+
# make sure that the deployment in the new namespace is scaled up enough to handle _all_ the load.
|
|
369
|
+
# This is because once the new deployment is 100% healthy, cleanup_kubernetes_job will delete the deployment out of
|
|
370
|
+
# the old namespace all at once, suddenly putting all the load onto the deployment in the new namespace.
|
|
371
|
+
# To ensure this, we must:
|
|
372
|
+
# - DO NOT filter on namespace in worker_filter_terms (which is used when calculating desired_instances).
|
|
373
|
+
# - DO filter on namespace in replica_filter_terms (which is used to calculate current_replicas).
|
|
374
|
+
# This makes sure that desired_instances includes load from all namespaces, but that the scaling ratio calculated
|
|
375
|
+
# by (desired_instances / current_replicas) is meaningful for each namespace.
|
|
376
|
+
worker_filter_terms = f"paasta_cluster='{paasta_cluster}',paasta_service='{service}',paasta_instance='{instance}'"
|
|
377
|
+
replica_filter_terms = f"paasta_cluster='{paasta_cluster}',kube_deployment='{deployment_name}',namespace='{namespace}'"
|
|
378
|
+
|
|
379
|
+
# k8s:deployment:pods_status_ready is a metric created by summing kube_pod_status_ready
|
|
380
|
+
# over paasta service/instance/cluster. it counts the number of ready pods in a paasta
|
|
381
|
+
# deployment.
|
|
382
|
+
ready_pods = f"""
|
|
383
|
+
(sum(
|
|
384
|
+
k8s:deployment:pods_status_ready{{{worker_filter_terms}}} >= 0
|
|
385
|
+
or
|
|
386
|
+
max_over_time(
|
|
387
|
+
k8s:deployment:pods_status_ready{{{worker_filter_terms}}}[{DEFAULT_EXTRAPOLATION_TIME}s]
|
|
388
|
+
)
|
|
389
|
+
) by (kube_deployment))
|
|
390
|
+
"""
|
|
391
|
+
# as mentioned above: we want to get the overload by counting load across namespces - but we need
|
|
392
|
+
# to divide by the ready pods in the target namespace - which is done by using a namespace filter here
|
|
393
|
+
ready_pods_namespaced = f"""
|
|
394
|
+
(sum(
|
|
395
|
+
k8s:deployment:pods_status_ready{{{replica_filter_terms}}} >= 0
|
|
396
|
+
or
|
|
397
|
+
max_over_time(
|
|
398
|
+
k8s:deployment:pods_status_ready{{{replica_filter_terms}}}[{DEFAULT_EXTRAPOLATION_TIME}s]
|
|
399
|
+
)
|
|
400
|
+
) by (kube_deployment))
|
|
401
|
+
"""
|
|
402
|
+
load_per_instance = f"""
|
|
403
|
+
avg(
|
|
404
|
+
uwsgi_worker_busy{{{worker_filter_terms}}}
|
|
405
|
+
) by (kube_pod, kube_deployment)
|
|
406
|
+
"""
|
|
407
|
+
missing_instances = f"""
|
|
408
|
+
clamp_min(
|
|
409
|
+
{ready_pods} - count({load_per_instance}) by (kube_deployment),
|
|
410
|
+
0
|
|
411
|
+
)
|
|
412
|
+
"""
|
|
413
|
+
total_load = f"""
|
|
414
|
+
(
|
|
415
|
+
sum(
|
|
416
|
+
{load_per_instance}
|
|
417
|
+
) by (kube_deployment)
|
|
418
|
+
+
|
|
419
|
+
{missing_instances}
|
|
420
|
+
)
|
|
421
|
+
"""
|
|
422
|
+
desired_instances_at_each_point_in_time = f"""
|
|
423
|
+
{total_load} / {setpoint}
|
|
424
|
+
"""
|
|
425
|
+
desired_instances = f"""
|
|
426
|
+
avg_over_time(
|
|
427
|
+
(
|
|
428
|
+
{desired_instances_at_each_point_in_time}
|
|
429
|
+
)[{moving_average_window}s:]
|
|
430
|
+
)
|
|
431
|
+
"""
|
|
432
|
+
|
|
433
|
+
# our Prometheus query is calculating a desired number of replicas, and then k8s wants that expressed as an average utilization
|
|
434
|
+
# so as long as we divide by the number that k8s ends up multiplying by, we should be able to convince k8s to run any arbitrary
|
|
435
|
+
# number of replicas.
|
|
436
|
+
# k8s happens to multiply by the # of ready pods - so we divide by that rather than by the amount of current replicas (which may
|
|
437
|
+
# include non-ready pods)
|
|
438
|
+
# ref: https://github.com/kubernetes/kubernetes/blob/7ec1a89a509906dad9fd6a4635d7bfc157b47790/pkg/controller/podautoscaler/replica_calculator.go#L278
|
|
439
|
+
metrics_query = f"""
|
|
440
|
+
{desired_instances} / {ready_pods_namespaced}
|
|
441
|
+
"""
|
|
442
|
+
|
|
443
|
+
metric_name = f"{deployment_name}-uwsgi-prom"
|
|
444
|
+
|
|
445
|
+
return {
|
|
446
|
+
"name": {"as": metric_name},
|
|
447
|
+
"seriesQuery": f"uwsgi_worker_busy{{{worker_filter_terms}}}",
|
|
448
|
+
"resources": {"template": "kube_<<.Resource>>"},
|
|
449
|
+
"metricsQuery": _minify_promql(metrics_query),
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
|
|
453
|
+
def create_instance_uwsgi_v2_scaling_rule(
|
|
454
|
+
service: str,
|
|
455
|
+
instance_config: KubernetesDeploymentConfig,
|
|
456
|
+
metrics_provider_config: MetricsProviderDict,
|
|
457
|
+
paasta_cluster: str,
|
|
458
|
+
) -> PrometheusAdapterRule:
|
|
459
|
+
"""
|
|
460
|
+
Creates a Prometheus adapter rule config for a given service instance.
|
|
461
|
+
"""
|
|
462
|
+
instance = instance_config.instance
|
|
463
|
+
moving_average_window = metrics_provider_config.get(
|
|
464
|
+
"moving_average_window_seconds", DEFAULT_UWSGI_AUTOSCALING_MOVING_AVERAGE_WINDOW
|
|
465
|
+
)
|
|
466
|
+
deployment_name = get_kubernetes_app_name(service=service, instance=instance)
|
|
467
|
+
|
|
468
|
+
# In order for autoscaling to work safely while a service migrates from one namespace to another, the HPA needs to
|
|
469
|
+
# make sure that the deployment in the new namespace is scaled up enough to handle _all_ the load.
|
|
470
|
+
# This is because once the new deployment is 100% healthy, cleanup_kubernetes_job will delete the deployment out of
|
|
471
|
+
# the old namespace all at once, suddenly putting all the load onto the deployment in the new namespace.
|
|
472
|
+
# To ensure this, we must NOT filter on namespace in worker_filter_terms (which is used when calculating total_load.
|
|
473
|
+
# This makes sure that desired_instances includes load from all namespaces.
|
|
474
|
+
worker_filter_terms = f"paasta_cluster='{paasta_cluster}',paasta_service='{service}',paasta_instance='{instance}'"
|
|
475
|
+
|
|
476
|
+
# k8s:deployment:pods_status_ready is a metric created by summing kube_pod_status_ready
|
|
477
|
+
# over paasta service/instance/cluster. it counts the number of ready pods in a paasta
|
|
478
|
+
# deployment.
|
|
479
|
+
ready_pods = f"""
|
|
480
|
+
(sum(
|
|
481
|
+
k8s:deployment:pods_status_ready{{{worker_filter_terms}}} >= 0
|
|
482
|
+
or
|
|
483
|
+
max_over_time(
|
|
484
|
+
k8s:deployment:pods_status_ready{{{worker_filter_terms}}}[{DEFAULT_EXTRAPOLATION_TIME}s]
|
|
485
|
+
)
|
|
486
|
+
) by (kube_deployment))
|
|
487
|
+
"""
|
|
488
|
+
load_per_instance = f"""
|
|
489
|
+
avg(
|
|
490
|
+
uwsgi_worker_busy{{{worker_filter_terms}}}
|
|
491
|
+
) by (kube_pod, kube_deployment)
|
|
492
|
+
"""
|
|
493
|
+
missing_instances = f"""
|
|
494
|
+
clamp_min(
|
|
495
|
+
{ready_pods} - count({load_per_instance}) by (kube_deployment),
|
|
496
|
+
0
|
|
497
|
+
)
|
|
498
|
+
"""
|
|
499
|
+
total_load = f"""
|
|
500
|
+
(
|
|
501
|
+
sum(
|
|
502
|
+
{load_per_instance}
|
|
503
|
+
) by (kube_deployment)
|
|
504
|
+
+
|
|
505
|
+
{missing_instances}
|
|
506
|
+
)
|
|
507
|
+
"""
|
|
508
|
+
total_load_smoothed = f"""
|
|
509
|
+
avg_over_time(
|
|
510
|
+
(
|
|
511
|
+
{total_load}
|
|
512
|
+
)[{moving_average_window}s:]
|
|
513
|
+
)
|
|
514
|
+
"""
|
|
515
|
+
|
|
516
|
+
metric_name = f"{deployment_name}-uwsgi-v2-prom"
|
|
517
|
+
|
|
518
|
+
return {
|
|
519
|
+
"name": {"as": metric_name},
|
|
520
|
+
"seriesQuery": f"uwsgi_worker_busy{{{worker_filter_terms}}}",
|
|
521
|
+
"resources": {"template": "kube_<<.Resource>>"},
|
|
522
|
+
"metricsQuery": _minify_promql(total_load_smoothed),
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
|
|
526
|
+
def create_instance_piscina_scaling_rule(
|
|
527
|
+
service: str,
|
|
528
|
+
instance_config: KubernetesDeploymentConfig,
|
|
529
|
+
metrics_provider_config: MetricsProviderDict,
|
|
530
|
+
paasta_cluster: str,
|
|
531
|
+
) -> PrometheusAdapterRule:
|
|
532
|
+
"""
|
|
533
|
+
Creates a Prometheus adapter rule config for a given service instance.
|
|
534
|
+
"""
|
|
535
|
+
instance = instance_config.instance
|
|
536
|
+
namespace = instance_config.get_namespace()
|
|
537
|
+
setpoint = metrics_provider_config["setpoint"]
|
|
538
|
+
moving_average_window = metrics_provider_config.get(
|
|
539
|
+
"moving_average_window_seconds",
|
|
540
|
+
DEFAULT_PISCINA_AUTOSCALING_MOVING_AVERAGE_WINDOW,
|
|
541
|
+
)
|
|
542
|
+
deployment_name = get_kubernetes_app_name(service=service, instance=instance)
|
|
543
|
+
|
|
544
|
+
# In order for autoscaling to work safely while a service migrates from one namespace to another, the HPA needs to
|
|
545
|
+
# make sure that the deployment in the new namespace is scaled up enough to handle _all_ the load.
|
|
546
|
+
# This is because once the new deployment is 100% healthy, cleanup_kubernetes_job will delete the deployment out of
|
|
547
|
+
# the old namespace all at once, suddenly putting all the load onto the deployment in the new namespace.
|
|
548
|
+
# To ensure this, we must:
|
|
549
|
+
# - DO NOT filter on namespace in worker_filter_terms (which is used when calculating desired_instances).
|
|
550
|
+
# - DO filter on namespace in replica_filter_terms (which is used to calculate current_replicas).
|
|
551
|
+
# This makes sure that desired_instances includes load from all namespaces, but that the scaling ratio calculated
|
|
552
|
+
# by (desired_instances / current_replicas) is meaningful for each namespace.
|
|
553
|
+
worker_filter_terms = f"paasta_cluster='{paasta_cluster}',paasta_service='{service}',paasta_instance='{instance}'"
|
|
554
|
+
replica_filter_terms = f"paasta_cluster='{paasta_cluster}',deployment='{deployment_name}',namespace='{namespace}'"
|
|
555
|
+
|
|
556
|
+
current_replicas = f"""
|
|
557
|
+
sum(
|
|
558
|
+
label_join(
|
|
559
|
+
(
|
|
560
|
+
kube_deployment_spec_replicas{{{replica_filter_terms}}} >= 0
|
|
561
|
+
or
|
|
562
|
+
max_over_time(
|
|
563
|
+
kube_deployment_spec_replicas{{{replica_filter_terms}}}[{DEFAULT_EXTRAPOLATION_TIME}s]
|
|
564
|
+
)
|
|
565
|
+
),
|
|
566
|
+
"kube_deployment", "", "deployment"
|
|
567
|
+
)
|
|
568
|
+
) by (kube_deployment)
|
|
569
|
+
"""
|
|
570
|
+
# k8s:deployment:pods_status_ready is a metric created by summing kube_pod_status_ready
|
|
571
|
+
# over paasta service/instance/cluster. it counts the number of ready pods in a paasta
|
|
572
|
+
# deployment.
|
|
573
|
+
ready_pods = f"""
|
|
574
|
+
(sum(
|
|
575
|
+
k8s:deployment:pods_status_ready{{{worker_filter_terms}}} >= 0
|
|
576
|
+
or
|
|
577
|
+
max_over_time(
|
|
578
|
+
k8s:deployment:pods_status_ready{{{worker_filter_terms}}}[{DEFAULT_EXTRAPOLATION_TIME}s]
|
|
579
|
+
)
|
|
580
|
+
) by (kube_deployment))
|
|
581
|
+
"""
|
|
582
|
+
load_per_instance = f"""
|
|
583
|
+
(piscina_pool_utilization{{{worker_filter_terms}}})
|
|
584
|
+
"""
|
|
585
|
+
missing_instances = f"""
|
|
586
|
+
clamp_min(
|
|
587
|
+
{ready_pods} - count({load_per_instance}) by (kube_deployment),
|
|
588
|
+
0
|
|
589
|
+
)
|
|
590
|
+
"""
|
|
591
|
+
total_load = f"""
|
|
592
|
+
(
|
|
593
|
+
sum(
|
|
594
|
+
{load_per_instance}
|
|
595
|
+
) by (kube_deployment)
|
|
596
|
+
+
|
|
597
|
+
{missing_instances}
|
|
598
|
+
)
|
|
599
|
+
"""
|
|
600
|
+
desired_instances_at_each_point_in_time = f"""
|
|
601
|
+
{total_load} / {setpoint}
|
|
602
|
+
"""
|
|
603
|
+
desired_instances = f"""
|
|
604
|
+
avg_over_time(
|
|
605
|
+
(
|
|
606
|
+
{desired_instances_at_each_point_in_time}
|
|
607
|
+
)[{moving_average_window}s:]
|
|
608
|
+
)
|
|
609
|
+
"""
|
|
610
|
+
metrics_query = f"""
|
|
611
|
+
{desired_instances} / {current_replicas}
|
|
612
|
+
"""
|
|
613
|
+
|
|
614
|
+
return {
|
|
615
|
+
"name": {"as": f"{deployment_name}-piscina-prom"},
|
|
616
|
+
"seriesQuery": f"piscina_pool_utilization{{{worker_filter_terms}}}",
|
|
617
|
+
"resources": {"template": "kube_<<.Resource>>"},
|
|
618
|
+
"metricsQuery": _minify_promql(metrics_query),
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
|
|
622
|
+
def create_instance_gunicorn_scaling_rule(
|
|
623
|
+
service: str,
|
|
624
|
+
instance_config: KubernetesDeploymentConfig,
|
|
625
|
+
metrics_provider_config: MetricsProviderDict,
|
|
626
|
+
paasta_cluster: str,
|
|
627
|
+
) -> PrometheusAdapterRule:
|
|
628
|
+
"""
|
|
629
|
+
Creates a Prometheus adapter rule config for a given service instance.
|
|
630
|
+
"""
|
|
631
|
+
instance = instance_config.instance
|
|
632
|
+
namespace = instance_config.get_namespace()
|
|
633
|
+
setpoint = metrics_provider_config["setpoint"]
|
|
634
|
+
moving_average_window = metrics_provider_config.get(
|
|
635
|
+
"moving_average_window_seconds",
|
|
636
|
+
DEFAULT_GUNICORN_AUTOSCALING_MOVING_AVERAGE_WINDOW,
|
|
637
|
+
)
|
|
638
|
+
|
|
639
|
+
deployment_name = get_kubernetes_app_name(service=service, instance=instance)
|
|
640
|
+
|
|
641
|
+
# In order for autoscaling to work safely while a service migrates from one namespace to another, the HPA needs to
|
|
642
|
+
# make sure that the deployment in the new namespace is scaled up enough to handle _all_ the load.
|
|
643
|
+
# This is because once the new deployment is 100% healthy, cleanup_kubernetes_job will delete the deployment out of
|
|
644
|
+
# the old namespace all at once, suddenly putting all the load onto the deployment in the new namespace.
|
|
645
|
+
# To ensure this, we must:
|
|
646
|
+
# - DO NOT filter on namespace in worker_filter_terms (which is used when calculating desired_instances).
|
|
647
|
+
# - DO filter on namespace in replica_filter_terms (which is used to calculate current_replicas).
|
|
648
|
+
# This makes sure that desired_instances includes load from all namespaces, but that the scaling ratio calculated
|
|
649
|
+
# by (desired_instances / current_replicas) is meaningful for each namespace.
|
|
650
|
+
worker_filter_terms = f"paasta_cluster='{paasta_cluster}',paasta_service='{service}',paasta_instance='{instance}'"
|
|
651
|
+
replica_filter_terms = f"paasta_cluster='{paasta_cluster}',deployment='{deployment_name}',namespace='{namespace}'"
|
|
652
|
+
|
|
653
|
+
current_replicas = f"""
|
|
654
|
+
sum(
|
|
655
|
+
label_join(
|
|
656
|
+
(
|
|
657
|
+
kube_deployment_spec_replicas{{{replica_filter_terms}}} >= 0
|
|
658
|
+
or
|
|
659
|
+
max_over_time(
|
|
660
|
+
kube_deployment_spec_replicas{{{replica_filter_terms}}}[{DEFAULT_EXTRAPOLATION_TIME}s]
|
|
661
|
+
)
|
|
662
|
+
),
|
|
663
|
+
"kube_deployment", "", "deployment"
|
|
664
|
+
)
|
|
665
|
+
) by (kube_deployment)
|
|
666
|
+
"""
|
|
667
|
+
# k8s:deployment:pods_status_ready is a metric created by summing kube_pod_status_ready
|
|
668
|
+
# over paasta service/instance/cluster. it counts the number of ready pods in a paasta
|
|
669
|
+
# deployment.
|
|
670
|
+
ready_pods = f"""
|
|
671
|
+
(sum(
|
|
672
|
+
k8s:deployment:pods_status_ready{{{worker_filter_terms}}} >= 0
|
|
673
|
+
or
|
|
674
|
+
max_over_time(
|
|
675
|
+
k8s:deployment:pods_status_ready{{{worker_filter_terms}}}[{DEFAULT_EXTRAPOLATION_TIME}s]
|
|
676
|
+
)
|
|
677
|
+
) by (kube_deployment))
|
|
678
|
+
"""
|
|
679
|
+
load_per_instance = f"""
|
|
680
|
+
avg(
|
|
681
|
+
gunicorn_worker_busy{{{worker_filter_terms}}}
|
|
682
|
+
) by (kube_pod, kube_deployment)
|
|
683
|
+
"""
|
|
684
|
+
missing_instances = f"""
|
|
685
|
+
clamp_min(
|
|
686
|
+
{ready_pods} - count({load_per_instance}) by (kube_deployment),
|
|
687
|
+
0
|
|
688
|
+
)
|
|
689
|
+
"""
|
|
690
|
+
total_load = f"""
|
|
691
|
+
(
|
|
692
|
+
sum(
|
|
693
|
+
{load_per_instance}
|
|
694
|
+
) by (kube_deployment)
|
|
695
|
+
+
|
|
696
|
+
{missing_instances}
|
|
697
|
+
)
|
|
698
|
+
"""
|
|
699
|
+
desired_instances_at_each_point_in_time = f"""
|
|
700
|
+
{total_load} / {setpoint}
|
|
701
|
+
"""
|
|
702
|
+
desired_instances = f"""
|
|
703
|
+
avg_over_time(
|
|
704
|
+
(
|
|
705
|
+
{desired_instances_at_each_point_in_time}
|
|
706
|
+
)[{moving_average_window}s:]
|
|
707
|
+
)
|
|
708
|
+
"""
|
|
709
|
+
metrics_query = f"""
|
|
710
|
+
{desired_instances} / {current_replicas}
|
|
711
|
+
"""
|
|
712
|
+
|
|
713
|
+
metric_name = f"{deployment_name}-gunicorn-prom"
|
|
714
|
+
|
|
715
|
+
return {
|
|
716
|
+
"name": {"as": metric_name},
|
|
717
|
+
"seriesQuery": f"gunicorn_worker_busy{{{worker_filter_terms}}}",
|
|
718
|
+
"resources": {"template": "kube_<<.Resource>>"},
|
|
719
|
+
"metricsQuery": _minify_promql(metrics_query),
|
|
720
|
+
}
|
|
721
|
+
|
|
722
|
+
|
|
723
|
+
def create_instance_arbitrary_promql_scaling_rule(
|
|
724
|
+
service: str,
|
|
725
|
+
instance_config: KubernetesDeploymentConfig,
|
|
726
|
+
metrics_provider_config: MetricsProviderDict,
|
|
727
|
+
paasta_cluster: str,
|
|
728
|
+
) -> PrometheusAdapterRule:
|
|
729
|
+
instance = instance_config.instance
|
|
730
|
+
namespace = instance_config.get_namespace()
|
|
731
|
+
prometheus_adapter_config = metrics_provider_config["prometheus_adapter_config"]
|
|
732
|
+
deployment_name = get_kubernetes_app_name(service=service, instance=instance)
|
|
733
|
+
|
|
734
|
+
if "seriesQuery" in prometheus_adapter_config:
|
|
735
|
+
# If the user specifies seriesQuery, don't wrap their metricsQuery, under the assumption that they may not want
|
|
736
|
+
# us to mess with their labels.
|
|
737
|
+
series_query = prometheus_adapter_config["seriesQuery"]
|
|
738
|
+
metrics_query = prometheus_adapter_config["metricsQuery"]
|
|
739
|
+
else:
|
|
740
|
+
# If the user doesn't specify seriesQuery, assume they want to just write some promql that returns a number.
|
|
741
|
+
# Set up series_query to match the default `resources`
|
|
742
|
+
series_query = f"""
|
|
743
|
+
kube_deployment_labels{{
|
|
744
|
+
deployment='{deployment_name}',
|
|
745
|
+
paasta_cluster='{paasta_cluster}',
|
|
746
|
+
namespace='{namespace}'
|
|
747
|
+
}}
|
|
748
|
+
"""
|
|
749
|
+
# Wrap their promql with label_replace() calls that add `deployment` / `namespace` labels which match the default `resources`.
|
|
750
|
+
metrics_query = f"""
|
|
751
|
+
label_replace(
|
|
752
|
+
label_replace(
|
|
753
|
+
{prometheus_adapter_config["metricsQuery"]},
|
|
754
|
+
'deployment',
|
|
755
|
+
'{deployment_name}',
|
|
756
|
+
'',
|
|
757
|
+
''
|
|
758
|
+
),
|
|
759
|
+
'namespace',
|
|
760
|
+
'{namespace}',
|
|
761
|
+
'',
|
|
762
|
+
''
|
|
763
|
+
)
|
|
764
|
+
"""
|
|
765
|
+
|
|
766
|
+
return {
|
|
767
|
+
"name": {
|
|
768
|
+
"as": f"{deployment_name}-arbitrary-promql",
|
|
769
|
+
},
|
|
770
|
+
"seriesQuery": _minify_promql(series_query),
|
|
771
|
+
"metricsQuery": _minify_promql(metrics_query),
|
|
772
|
+
"resources": prometheus_adapter_config.get(
|
|
773
|
+
"resources",
|
|
774
|
+
{
|
|
775
|
+
"overrides": {
|
|
776
|
+
"namespace": {"resource": "namespace"},
|
|
777
|
+
"deployment": {"group": "apps", "resource": "deployments"},
|
|
778
|
+
},
|
|
779
|
+
},
|
|
780
|
+
),
|
|
781
|
+
}
|
|
782
|
+
|
|
783
|
+
|
|
784
|
+
def get_rules_for_service_instance(
|
|
785
|
+
service_name: str,
|
|
786
|
+
instance_config: KubernetesDeploymentConfig,
|
|
787
|
+
paasta_cluster: str,
|
|
788
|
+
) -> List[PrometheusAdapterRule]:
|
|
789
|
+
"""
|
|
790
|
+
Returns a list of Prometheus Adapter rules for a given service instance. For now, this
|
|
791
|
+
will always be a 0 or 1-element list - but when we support scaling on multiple metrics
|
|
792
|
+
we will return N rules for a given service instance.
|
|
793
|
+
"""
|
|
794
|
+
rules: List[PrometheusAdapterRule] = []
|
|
795
|
+
|
|
796
|
+
for metrics_provider_type in ALL_METRICS_PROVIDERS:
|
|
797
|
+
metrics_provider_config = instance_config.get_autoscaling_metrics_provider(
|
|
798
|
+
metrics_provider_type
|
|
799
|
+
)
|
|
800
|
+
if metrics_provider_config is None:
|
|
801
|
+
log.debug(
|
|
802
|
+
f"Skipping {service_name}.{instance_config.instance} - no Prometheus-based autoscaling configured for {metrics_provider_type}"
|
|
803
|
+
)
|
|
804
|
+
continue
|
|
805
|
+
|
|
806
|
+
rule = create_instance_scaling_rule(
|
|
807
|
+
service=service_name,
|
|
808
|
+
instance_config=instance_config,
|
|
809
|
+
metrics_provider_config=metrics_provider_config,
|
|
810
|
+
paasta_cluster=paasta_cluster,
|
|
811
|
+
)
|
|
812
|
+
if rule is not None:
|
|
813
|
+
rules.append(rule)
|
|
814
|
+
|
|
815
|
+
return rules
|
|
816
|
+
|
|
817
|
+
|
|
818
|
+
def create_prometheus_adapter_config(
|
|
819
|
+
paasta_cluster: str, soa_dir: Path
|
|
820
|
+
) -> PrometheusAdapterConfig:
|
|
821
|
+
"""
|
|
822
|
+
Given a paasta cluster and a soaconfigs directory, create the necessary Prometheus adapter
|
|
823
|
+
config to autoscale services.
|
|
824
|
+
Currently supports the following metrics providers:
|
|
825
|
+
* uwsgi
|
|
826
|
+
"""
|
|
827
|
+
rules: List[PrometheusAdapterRule] = []
|
|
828
|
+
# get_services_for_cluster() returns a list of (service, instance) tuples, but this
|
|
829
|
+
# is not great for us: if we were to iterate over that we'd end up getting duplicates
|
|
830
|
+
# for every service as PaastaServiceConfigLoader does not expose a way to get configs
|
|
831
|
+
# for a single instance by name. instead, we get the unique set of service names and then
|
|
832
|
+
# let PaastaServiceConfigLoader iterate over instances for us later
|
|
833
|
+
services = {
|
|
834
|
+
service_name
|
|
835
|
+
for service_name, _ in get_services_for_cluster(
|
|
836
|
+
cluster=paasta_cluster, instance_type="kubernetes", soa_dir=str(soa_dir)
|
|
837
|
+
)
|
|
838
|
+
}
|
|
839
|
+
services.update(
|
|
840
|
+
{
|
|
841
|
+
service_name
|
|
842
|
+
for service_name, _ in get_services_for_cluster(
|
|
843
|
+
cluster=paasta_cluster, instance_type="eks", soa_dir=str(soa_dir)
|
|
844
|
+
)
|
|
845
|
+
}
|
|
846
|
+
)
|
|
847
|
+
for service_name in services:
|
|
848
|
+
config_loader = PaastaServiceConfigLoader(
|
|
849
|
+
service=service_name, soa_dir=str(soa_dir)
|
|
850
|
+
)
|
|
851
|
+
for instance_type_class in K8S_INSTANCE_TYPE_CLASSES:
|
|
852
|
+
for instance_config in config_loader.instance_configs(
|
|
853
|
+
cluster=paasta_cluster,
|
|
854
|
+
instance_type_class=instance_type_class,
|
|
855
|
+
):
|
|
856
|
+
rules.extend(
|
|
857
|
+
get_rules_for_service_instance(
|
|
858
|
+
service_name=service_name,
|
|
859
|
+
instance_config=instance_config,
|
|
860
|
+
paasta_cluster=paasta_cluster,
|
|
861
|
+
)
|
|
862
|
+
)
|
|
863
|
+
|
|
864
|
+
return {
|
|
865
|
+
# we sort our rules so that we can easily compare between two different configmaps
|
|
866
|
+
# as otherwise we'd need to do fancy order-independent comparisons between the two
|
|
867
|
+
# sets of rules later due to the fact that we're not iterating in a deterministic
|
|
868
|
+
# way and can add rules in any arbitrary order
|
|
869
|
+
"rules": sorted(rules, key=lambda rule: rule["name"]["as"]),
|
|
870
|
+
}
|
|
871
|
+
|
|
872
|
+
|
|
873
|
+
def update_prometheus_adapter_configmap(
|
|
874
|
+
kube_client: KubeClient, config: PrometheusAdapterConfig
|
|
875
|
+
) -> None:
|
|
876
|
+
kube_client.core.replace_namespaced_config_map(
|
|
877
|
+
name=PROMETHEUS_ADAPTER_CONFIGMAP_NAME,
|
|
878
|
+
namespace=PROMETHEUS_ADAPTER_CONFIGMAP_NAMESPACE,
|
|
879
|
+
body=V1ConfigMap(
|
|
880
|
+
metadata=V1ObjectMeta(name=PROMETHEUS_ADAPTER_CONFIGMAP_NAME),
|
|
881
|
+
data={
|
|
882
|
+
PROMETHEUS_ADAPTER_CONFIGMAP_FILENAME: yaml.dump(
|
|
883
|
+
config,
|
|
884
|
+
default_flow_style=False,
|
|
885
|
+
explicit_start=True,
|
|
886
|
+
width=sys.maxsize,
|
|
887
|
+
)
|
|
888
|
+
},
|
|
889
|
+
),
|
|
890
|
+
)
|
|
891
|
+
|
|
892
|
+
|
|
893
|
+
def create_prometheus_adapter_configmap(
|
|
894
|
+
kube_client: KubeClient, config: PrometheusAdapterConfig
|
|
895
|
+
) -> None:
|
|
896
|
+
kube_client.core.create_namespaced_config_map(
|
|
897
|
+
namespace=PROMETHEUS_ADAPTER_CONFIGMAP_NAMESPACE,
|
|
898
|
+
body=V1ConfigMap(
|
|
899
|
+
metadata=V1ObjectMeta(name=PROMETHEUS_ADAPTER_CONFIGMAP_NAME),
|
|
900
|
+
data={
|
|
901
|
+
PROMETHEUS_ADAPTER_CONFIGMAP_FILENAME: yaml.dump(
|
|
902
|
+
config, default_flow_style=False, explicit_start=True
|
|
903
|
+
)
|
|
904
|
+
},
|
|
905
|
+
),
|
|
906
|
+
)
|
|
907
|
+
|
|
908
|
+
|
|
909
|
+
def get_prometheus_adapter_configmap(
|
|
910
|
+
kube_client: KubeClient,
|
|
911
|
+
) -> Optional[PrometheusAdapterConfig]:
|
|
912
|
+
try:
|
|
913
|
+
config = cast(
|
|
914
|
+
# we cast since mypy infers the wrong type since the k8s clientlib is untyped
|
|
915
|
+
V1ConfigMap,
|
|
916
|
+
kube_client.core.read_namespaced_config_map(
|
|
917
|
+
name=PROMETHEUS_ADAPTER_CONFIGMAP_NAME,
|
|
918
|
+
namespace=PROMETHEUS_ADAPTER_CONFIGMAP_NAMESPACE,
|
|
919
|
+
),
|
|
920
|
+
)
|
|
921
|
+
except ApiException as e:
|
|
922
|
+
if e.status == 404:
|
|
923
|
+
return None
|
|
924
|
+
else:
|
|
925
|
+
raise
|
|
926
|
+
|
|
927
|
+
if not config:
|
|
928
|
+
return None
|
|
929
|
+
|
|
930
|
+
return yaml.safe_load(config.data[PROMETHEUS_ADAPTER_CONFIGMAP_FILENAME])
|
|
931
|
+
|
|
932
|
+
|
|
933
|
+
def restart_prometheus_adapter(kube_client: KubeClient) -> None:
|
|
934
|
+
log.info("Attempting to remove existing adapter pod(s).")
|
|
935
|
+
all_pods = cast(
|
|
936
|
+
# once again, we cast since the kubernetes python api isn't typed
|
|
937
|
+
List[V1Pod],
|
|
938
|
+
kube_client.core.list_namespaced_pod(
|
|
939
|
+
namespace=PROMETHEUS_ADAPTER_POD_NAMESPACE
|
|
940
|
+
).items,
|
|
941
|
+
)
|
|
942
|
+
# there should only ever be one pod actually up, but we might as well enforce that here
|
|
943
|
+
# just in case there are more
|
|
944
|
+
pods_to_delete = [
|
|
945
|
+
pod
|
|
946
|
+
for pod in all_pods
|
|
947
|
+
if pod.metadata.name.startswith(PROMETHEUS_ADAPTER_POD_NAME_PREFIX)
|
|
948
|
+
and pod.status.phase in PROMETHEUS_ADAPTER_POD_PHASES_TO_REMOVE
|
|
949
|
+
]
|
|
950
|
+
log.debug("Found the following pods to delete: %s", pods_to_delete)
|
|
951
|
+
|
|
952
|
+
for pod in pods_to_delete:
|
|
953
|
+
log.debug("Attempting to remove %s.", pod.metadata.name)
|
|
954
|
+
kube_client.core.delete_namespaced_pod(
|
|
955
|
+
name=pod.metadata.name,
|
|
956
|
+
namespace=pod.metadata.namespace,
|
|
957
|
+
body=V1DeleteOptions(),
|
|
958
|
+
# background propagation with no grace period is equivalent to doing a force-delete from kubectl
|
|
959
|
+
grace_period_seconds=0,
|
|
960
|
+
propagation_policy="Background",
|
|
961
|
+
)
|
|
962
|
+
log.debug("Removed %s.", pod.metadata.name)
|
|
963
|
+
|
|
964
|
+
log.info("Adapter restarted successfully")
|
|
965
|
+
|
|
966
|
+
|
|
967
|
+
def main() -> int:
|
|
968
|
+
args = parse_args()
|
|
969
|
+
if args.verbose:
|
|
970
|
+
logging.basicConfig(level=logging.DEBUG)
|
|
971
|
+
else:
|
|
972
|
+
logging.basicConfig(level=logging.INFO)
|
|
973
|
+
|
|
974
|
+
log.info("Generating adapter config from soaconfigs.")
|
|
975
|
+
config = create_prometheus_adapter_config(
|
|
976
|
+
paasta_cluster=args.cluster,
|
|
977
|
+
soa_dir=args.soa_dir,
|
|
978
|
+
)
|
|
979
|
+
log.info("Generated adapter config from soaconfigs.")
|
|
980
|
+
if args.dry_run:
|
|
981
|
+
log.info(
|
|
982
|
+
"Generated the following config:\n%s",
|
|
983
|
+
yaml.dump(
|
|
984
|
+
config, default_flow_style=False, explicit_start=True, width=sys.maxsize
|
|
985
|
+
),
|
|
986
|
+
)
|
|
987
|
+
return 0 # everything after this point requires creds/updates state
|
|
988
|
+
else:
|
|
989
|
+
log.debug(
|
|
990
|
+
"Generated the following config:\n%s",
|
|
991
|
+
yaml.dump(
|
|
992
|
+
config, default_flow_style=False, explicit_start=True, width=sys.maxsize
|
|
993
|
+
),
|
|
994
|
+
)
|
|
995
|
+
|
|
996
|
+
if not config["rules"]:
|
|
997
|
+
log.error("Got empty rule configuration - refusing to continue.")
|
|
998
|
+
return 0
|
|
999
|
+
|
|
1000
|
+
kube_client = KubeClient()
|
|
1001
|
+
if not args.dry_run:
|
|
1002
|
+
ensure_namespace(kube_client, namespace=PROMETHEUS_ADAPTER_CONFIGMAP_NAMESPACE)
|
|
1003
|
+
|
|
1004
|
+
existing_config = get_prometheus_adapter_configmap(kube_client=kube_client)
|
|
1005
|
+
if existing_config and existing_config != config:
|
|
1006
|
+
log.info("Existing config differs from soaconfigs - updating.")
|
|
1007
|
+
log.debug("Existing data: %s", existing_config)
|
|
1008
|
+
log.debug("Desired data: %s", config)
|
|
1009
|
+
update_prometheus_adapter_configmap(kube_client=kube_client, config=config)
|
|
1010
|
+
log.info("Updated adapter config.")
|
|
1011
|
+
elif existing_config:
|
|
1012
|
+
log.info("Existing config matches soaconfigs - exiting.")
|
|
1013
|
+
return 0
|
|
1014
|
+
else:
|
|
1015
|
+
log.info("No existing config - creating.")
|
|
1016
|
+
create_prometheus_adapter_configmap(kube_client=kube_client, config=config)
|
|
1017
|
+
log.info("Created adapter config.")
|
|
1018
|
+
|
|
1019
|
+
# the prometheus adapter doesn't currently have a good way to reload on config changes
|
|
1020
|
+
# so we do the next best thing: restart the pod so that it picks up the new config.
|
|
1021
|
+
# see: https://github.com/DirectXMan12/k8s-prometheus-adapter/issues/104
|
|
1022
|
+
restart_prometheus_adapter(kube_client=kube_client)
|
|
1023
|
+
|
|
1024
|
+
return 0
|
|
1025
|
+
|
|
1026
|
+
|
|
1027
|
+
if __name__ == "__main__":
|
|
1028
|
+
sys.exit(main())
|