paasta-tools 1.21.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- k8s_itests/__init__.py +0 -0
- k8s_itests/test_autoscaling.py +23 -0
- k8s_itests/utils.py +38 -0
- paasta_tools/__init__.py +20 -0
- paasta_tools/adhoc_tools.py +142 -0
- paasta_tools/api/__init__.py +13 -0
- paasta_tools/api/api.py +330 -0
- paasta_tools/api/api_docs/swagger.json +2323 -0
- paasta_tools/api/client.py +106 -0
- paasta_tools/api/settings.py +33 -0
- paasta_tools/api/tweens/__init__.py +6 -0
- paasta_tools/api/tweens/auth.py +125 -0
- paasta_tools/api/tweens/profiling.py +108 -0
- paasta_tools/api/tweens/request_logger.py +124 -0
- paasta_tools/api/views/__init__.py +13 -0
- paasta_tools/api/views/autoscaler.py +100 -0
- paasta_tools/api/views/exception.py +45 -0
- paasta_tools/api/views/flink.py +73 -0
- paasta_tools/api/views/instance.py +395 -0
- paasta_tools/api/views/pause_autoscaler.py +71 -0
- paasta_tools/api/views/remote_run.py +113 -0
- paasta_tools/api/views/resources.py +76 -0
- paasta_tools/api/views/service.py +35 -0
- paasta_tools/api/views/version.py +25 -0
- paasta_tools/apply_external_resources.py +79 -0
- paasta_tools/async_utils.py +109 -0
- paasta_tools/autoscaling/__init__.py +0 -0
- paasta_tools/autoscaling/autoscaling_service_lib.py +57 -0
- paasta_tools/autoscaling/forecasting.py +106 -0
- paasta_tools/autoscaling/max_all_k8s_services.py +41 -0
- paasta_tools/autoscaling/pause_service_autoscaler.py +77 -0
- paasta_tools/autoscaling/utils.py +52 -0
- paasta_tools/bounce_lib.py +184 -0
- paasta_tools/broadcast_log_to_services.py +62 -0
- paasta_tools/cassandracluster_tools.py +210 -0
- paasta_tools/check_autoscaler_max_instances.py +212 -0
- paasta_tools/check_cassandracluster_services_replication.py +35 -0
- paasta_tools/check_flink_services_health.py +203 -0
- paasta_tools/check_kubernetes_api.py +57 -0
- paasta_tools/check_kubernetes_services_replication.py +141 -0
- paasta_tools/check_oom_events.py +244 -0
- paasta_tools/check_services_replication_tools.py +324 -0
- paasta_tools/check_spark_jobs.py +234 -0
- paasta_tools/cleanup_kubernetes_cr.py +138 -0
- paasta_tools/cleanup_kubernetes_crd.py +145 -0
- paasta_tools/cleanup_kubernetes_jobs.py +344 -0
- paasta_tools/cleanup_tron_namespaces.py +96 -0
- paasta_tools/cli/__init__.py +13 -0
- paasta_tools/cli/authentication.py +85 -0
- paasta_tools/cli/cli.py +260 -0
- paasta_tools/cli/cmds/__init__.py +13 -0
- paasta_tools/cli/cmds/autoscale.py +143 -0
- paasta_tools/cli/cmds/check.py +334 -0
- paasta_tools/cli/cmds/cook_image.py +147 -0
- paasta_tools/cli/cmds/get_docker_image.py +76 -0
- paasta_tools/cli/cmds/get_image_version.py +172 -0
- paasta_tools/cli/cmds/get_latest_deployment.py +93 -0
- paasta_tools/cli/cmds/info.py +155 -0
- paasta_tools/cli/cmds/itest.py +117 -0
- paasta_tools/cli/cmds/list.py +66 -0
- paasta_tools/cli/cmds/list_clusters.py +42 -0
- paasta_tools/cli/cmds/list_deploy_queue.py +171 -0
- paasta_tools/cli/cmds/list_namespaces.py +84 -0
- paasta_tools/cli/cmds/local_run.py +1396 -0
- paasta_tools/cli/cmds/logs.py +1601 -0
- paasta_tools/cli/cmds/mark_for_deployment.py +1988 -0
- paasta_tools/cli/cmds/mesh_status.py +174 -0
- paasta_tools/cli/cmds/pause_service_autoscaler.py +107 -0
- paasta_tools/cli/cmds/push_to_registry.py +275 -0
- paasta_tools/cli/cmds/remote_run.py +252 -0
- paasta_tools/cli/cmds/rollback.py +347 -0
- paasta_tools/cli/cmds/secret.py +549 -0
- paasta_tools/cli/cmds/security_check.py +59 -0
- paasta_tools/cli/cmds/spark_run.py +1400 -0
- paasta_tools/cli/cmds/start_stop_restart.py +401 -0
- paasta_tools/cli/cmds/status.py +2302 -0
- paasta_tools/cli/cmds/validate.py +1012 -0
- paasta_tools/cli/cmds/wait_for_deployment.py +275 -0
- paasta_tools/cli/fsm/__init__.py +13 -0
- paasta_tools/cli/fsm/autosuggest.py +82 -0
- paasta_tools/cli/fsm/template/README.md +8 -0
- paasta_tools/cli/fsm/template/cookiecutter.json +7 -0
- paasta_tools/cli/fsm/template/{{cookiecutter.service}}/kubernetes-PROD.yaml +91 -0
- paasta_tools/cli/fsm/template/{{cookiecutter.service}}/monitoring.yaml +20 -0
- paasta_tools/cli/fsm/template/{{cookiecutter.service}}/service.yaml +8 -0
- paasta_tools/cli/fsm/template/{{cookiecutter.service}}/smartstack.yaml +6 -0
- paasta_tools/cli/fsm_cmd.py +121 -0
- paasta_tools/cli/paasta_tabcomplete.sh +23 -0
- paasta_tools/cli/schemas/adhoc_schema.json +199 -0
- paasta_tools/cli/schemas/autoscaling_schema.json +91 -0
- paasta_tools/cli/schemas/autotuned_defaults/cassandracluster_schema.json +37 -0
- paasta_tools/cli/schemas/autotuned_defaults/kubernetes_schema.json +89 -0
- paasta_tools/cli/schemas/deploy_schema.json +173 -0
- paasta_tools/cli/schemas/eks_schema.json +970 -0
- paasta_tools/cli/schemas/kubernetes_schema.json +970 -0
- paasta_tools/cli/schemas/rollback_schema.json +160 -0
- paasta_tools/cli/schemas/service_schema.json +25 -0
- paasta_tools/cli/schemas/smartstack_schema.json +322 -0
- paasta_tools/cli/schemas/tron_schema.json +699 -0
- paasta_tools/cli/utils.py +1118 -0
- paasta_tools/clusterman.py +21 -0
- paasta_tools/config_utils.py +385 -0
- paasta_tools/contrib/__init__.py +0 -0
- paasta_tools/contrib/bounce_log_latency_parser.py +68 -0
- paasta_tools/contrib/check_manual_oapi_changes.sh +24 -0
- paasta_tools/contrib/check_orphans.py +306 -0
- paasta_tools/contrib/create_dynamodb_table.py +35 -0
- paasta_tools/contrib/create_paasta_playground.py +105 -0
- paasta_tools/contrib/emit_allocated_cpu_metrics.py +50 -0
- paasta_tools/contrib/get_running_task_allocation.py +346 -0
- paasta_tools/contrib/habitat_fixer.py +86 -0
- paasta_tools/contrib/ide_helper.py +316 -0
- paasta_tools/contrib/is_pod_healthy_in_proxy.py +139 -0
- paasta_tools/contrib/is_pod_healthy_in_smartstack.py +50 -0
- paasta_tools/contrib/kill_bad_containers.py +109 -0
- paasta_tools/contrib/mass-deploy-tag.sh +44 -0
- paasta_tools/contrib/mock_patch_checker.py +86 -0
- paasta_tools/contrib/paasta_update_soa_memcpu.py +520 -0
- paasta_tools/contrib/render_template.py +129 -0
- paasta_tools/contrib/rightsizer_soaconfigs_update.py +348 -0
- paasta_tools/contrib/service_shard_remove.py +157 -0
- paasta_tools/contrib/service_shard_update.py +373 -0
- paasta_tools/contrib/shared_ip_check.py +77 -0
- paasta_tools/contrib/timeouts_metrics_prom.py +64 -0
- paasta_tools/delete_kubernetes_deployments.py +89 -0
- paasta_tools/deployment_utils.py +44 -0
- paasta_tools/docker_wrapper.py +234 -0
- paasta_tools/docker_wrapper_imports.py +13 -0
- paasta_tools/drain_lib.py +351 -0
- paasta_tools/dump_locally_running_services.py +71 -0
- paasta_tools/eks_tools.py +119 -0
- paasta_tools/envoy_tools.py +373 -0
- paasta_tools/firewall.py +504 -0
- paasta_tools/firewall_logging.py +154 -0
- paasta_tools/firewall_update.py +172 -0
- paasta_tools/flink_tools.py +345 -0
- paasta_tools/flinkeks_tools.py +90 -0
- paasta_tools/frameworks/__init__.py +0 -0
- paasta_tools/frameworks/adhoc_scheduler.py +71 -0
- paasta_tools/frameworks/constraints.py +87 -0
- paasta_tools/frameworks/native_scheduler.py +652 -0
- paasta_tools/frameworks/native_service_config.py +301 -0
- paasta_tools/frameworks/task_store.py +245 -0
- paasta_tools/generate_all_deployments +9 -0
- paasta_tools/generate_authenticating_services.py +94 -0
- paasta_tools/generate_deployments_for_service.py +255 -0
- paasta_tools/generate_services_file.py +114 -0
- paasta_tools/generate_services_yaml.py +30 -0
- paasta_tools/hacheck.py +76 -0
- paasta_tools/instance/__init__.py +0 -0
- paasta_tools/instance/hpa_metrics_parser.py +122 -0
- paasta_tools/instance/kubernetes.py +1362 -0
- paasta_tools/iptables.py +240 -0
- paasta_tools/kafkacluster_tools.py +143 -0
- paasta_tools/kubernetes/__init__.py +0 -0
- paasta_tools/kubernetes/application/__init__.py +0 -0
- paasta_tools/kubernetes/application/controller_wrappers.py +476 -0
- paasta_tools/kubernetes/application/tools.py +90 -0
- paasta_tools/kubernetes/bin/__init__.py +0 -0
- paasta_tools/kubernetes/bin/kubernetes_remove_evicted_pods.py +164 -0
- paasta_tools/kubernetes/bin/paasta_cleanup_remote_run_resources.py +135 -0
- paasta_tools/kubernetes/bin/paasta_cleanup_stale_nodes.py +181 -0
- paasta_tools/kubernetes/bin/paasta_secrets_sync.py +758 -0
- paasta_tools/kubernetes/remote_run.py +558 -0
- paasta_tools/kubernetes_tools.py +4679 -0
- paasta_tools/list_kubernetes_service_instances.py +128 -0
- paasta_tools/list_tron_namespaces.py +60 -0
- paasta_tools/long_running_service_tools.py +678 -0
- paasta_tools/mac_address.py +44 -0
- paasta_tools/marathon_dashboard.py +0 -0
- paasta_tools/mesos/__init__.py +0 -0
- paasta_tools/mesos/cfg.py +46 -0
- paasta_tools/mesos/cluster.py +60 -0
- paasta_tools/mesos/exceptions.py +59 -0
- paasta_tools/mesos/framework.py +77 -0
- paasta_tools/mesos/log.py +48 -0
- paasta_tools/mesos/master.py +306 -0
- paasta_tools/mesos/mesos_file.py +169 -0
- paasta_tools/mesos/parallel.py +52 -0
- paasta_tools/mesos/slave.py +115 -0
- paasta_tools/mesos/task.py +94 -0
- paasta_tools/mesos/util.py +69 -0
- paasta_tools/mesos/zookeeper.py +37 -0
- paasta_tools/mesos_maintenance.py +848 -0
- paasta_tools/mesos_tools.py +1051 -0
- paasta_tools/metrics/__init__.py +0 -0
- paasta_tools/metrics/metastatus_lib.py +1110 -0
- paasta_tools/metrics/metrics_lib.py +217 -0
- paasta_tools/monitoring/__init__.py +13 -0
- paasta_tools/monitoring/check_k8s_api_performance.py +110 -0
- paasta_tools/monitoring_tools.py +652 -0
- paasta_tools/monkrelaycluster_tools.py +146 -0
- paasta_tools/nrtsearchservice_tools.py +143 -0
- paasta_tools/nrtsearchserviceeks_tools.py +68 -0
- paasta_tools/oom_logger.py +321 -0
- paasta_tools/paasta_deploy_tron_jobs +3 -0
- paasta_tools/paasta_execute_docker_command.py +123 -0
- paasta_tools/paasta_native_serviceinit.py +21 -0
- paasta_tools/paasta_service_config_loader.py +201 -0
- paasta_tools/paastaapi/__init__.py +29 -0
- paasta_tools/paastaapi/api/__init__.py +3 -0
- paasta_tools/paastaapi/api/autoscaler_api.py +302 -0
- paasta_tools/paastaapi/api/default_api.py +569 -0
- paasta_tools/paastaapi/api/remote_run_api.py +604 -0
- paasta_tools/paastaapi/api/resources_api.py +157 -0
- paasta_tools/paastaapi/api/service_api.py +1736 -0
- paasta_tools/paastaapi/api_client.py +818 -0
- paasta_tools/paastaapi/apis/__init__.py +22 -0
- paasta_tools/paastaapi/configuration.py +455 -0
- paasta_tools/paastaapi/exceptions.py +137 -0
- paasta_tools/paastaapi/model/__init__.py +5 -0
- paasta_tools/paastaapi/model/adhoc_launch_history.py +176 -0
- paasta_tools/paastaapi/model/autoscaler_count_msg.py +176 -0
- paasta_tools/paastaapi/model/deploy_queue.py +178 -0
- paasta_tools/paastaapi/model/deploy_queue_service_instance.py +194 -0
- paasta_tools/paastaapi/model/envoy_backend.py +185 -0
- paasta_tools/paastaapi/model/envoy_location.py +184 -0
- paasta_tools/paastaapi/model/envoy_status.py +181 -0
- paasta_tools/paastaapi/model/flink_cluster_overview.py +188 -0
- paasta_tools/paastaapi/model/flink_config.py +173 -0
- paasta_tools/paastaapi/model/flink_job.py +186 -0
- paasta_tools/paastaapi/model/flink_job_details.py +192 -0
- paasta_tools/paastaapi/model/flink_jobs.py +175 -0
- paasta_tools/paastaapi/model/float_and_error.py +173 -0
- paasta_tools/paastaapi/model/hpa_metric.py +176 -0
- paasta_tools/paastaapi/model/inline_object.py +170 -0
- paasta_tools/paastaapi/model/inline_response200.py +170 -0
- paasta_tools/paastaapi/model/inline_response2001.py +170 -0
- paasta_tools/paastaapi/model/instance_bounce_status.py +200 -0
- paasta_tools/paastaapi/model/instance_mesh_status.py +186 -0
- paasta_tools/paastaapi/model/instance_status.py +220 -0
- paasta_tools/paastaapi/model/instance_status_adhoc.py +187 -0
- paasta_tools/paastaapi/model/instance_status_cassandracluster.py +173 -0
- paasta_tools/paastaapi/model/instance_status_flink.py +173 -0
- paasta_tools/paastaapi/model/instance_status_kafkacluster.py +173 -0
- paasta_tools/paastaapi/model/instance_status_kubernetes.py +263 -0
- paasta_tools/paastaapi/model/instance_status_kubernetes_autoscaling_status.py +187 -0
- paasta_tools/paastaapi/model/instance_status_kubernetes_v2.py +197 -0
- paasta_tools/paastaapi/model/instance_status_tron.py +204 -0
- paasta_tools/paastaapi/model/instance_tasks.py +182 -0
- paasta_tools/paastaapi/model/integer_and_error.py +173 -0
- paasta_tools/paastaapi/model/kubernetes_container.py +178 -0
- paasta_tools/paastaapi/model/kubernetes_container_v2.py +219 -0
- paasta_tools/paastaapi/model/kubernetes_healthcheck.py +176 -0
- paasta_tools/paastaapi/model/kubernetes_pod.py +201 -0
- paasta_tools/paastaapi/model/kubernetes_pod_event.py +176 -0
- paasta_tools/paastaapi/model/kubernetes_pod_v2.py +213 -0
- paasta_tools/paastaapi/model/kubernetes_replica_set.py +185 -0
- paasta_tools/paastaapi/model/kubernetes_version.py +202 -0
- paasta_tools/paastaapi/model/remote_run_outcome.py +189 -0
- paasta_tools/paastaapi/model/remote_run_start.py +185 -0
- paasta_tools/paastaapi/model/remote_run_stop.py +176 -0
- paasta_tools/paastaapi/model/remote_run_token.py +173 -0
- paasta_tools/paastaapi/model/resource.py +187 -0
- paasta_tools/paastaapi/model/resource_item.py +187 -0
- paasta_tools/paastaapi/model/resource_value.py +176 -0
- paasta_tools/paastaapi/model/smartstack_backend.py +191 -0
- paasta_tools/paastaapi/model/smartstack_location.py +181 -0
- paasta_tools/paastaapi/model/smartstack_status.py +181 -0
- paasta_tools/paastaapi/model/task_tail_lines.py +176 -0
- paasta_tools/paastaapi/model_utils.py +1879 -0
- paasta_tools/paastaapi/models/__init__.py +62 -0
- paasta_tools/paastaapi/rest.py +287 -0
- paasta_tools/prune_completed_pods.py +220 -0
- paasta_tools/puppet_service_tools.py +59 -0
- paasta_tools/py.typed +1 -0
- paasta_tools/remote_git.py +127 -0
- paasta_tools/run-paasta-api-in-dev-mode.py +57 -0
- paasta_tools/run-paasta-api-playground.py +51 -0
- paasta_tools/secret_providers/__init__.py +66 -0
- paasta_tools/secret_providers/vault.py +214 -0
- paasta_tools/secret_tools.py +277 -0
- paasta_tools/setup_istio_mesh.py +353 -0
- paasta_tools/setup_kubernetes_cr.py +412 -0
- paasta_tools/setup_kubernetes_crd.py +138 -0
- paasta_tools/setup_kubernetes_internal_crd.py +154 -0
- paasta_tools/setup_kubernetes_job.py +353 -0
- paasta_tools/setup_prometheus_adapter_config.py +1028 -0
- paasta_tools/setup_tron_namespace.py +248 -0
- paasta_tools/slack.py +75 -0
- paasta_tools/smartstack_tools.py +676 -0
- paasta_tools/spark_tools.py +283 -0
- paasta_tools/synapse_srv_namespaces_fact.py +42 -0
- paasta_tools/tron/__init__.py +0 -0
- paasta_tools/tron/client.py +158 -0
- paasta_tools/tron/tron_command_context.py +194 -0
- paasta_tools/tron/tron_timeutils.py +101 -0
- paasta_tools/tron_tools.py +1448 -0
- paasta_tools/utils.py +4307 -0
- paasta_tools/yaml_tools.py +44 -0
- paasta_tools-1.21.3.data/scripts/apply_external_resources.py +79 -0
- paasta_tools-1.21.3.data/scripts/bounce_log_latency_parser.py +68 -0
- paasta_tools-1.21.3.data/scripts/check_autoscaler_max_instances.py +212 -0
- paasta_tools-1.21.3.data/scripts/check_cassandracluster_services_replication.py +35 -0
- paasta_tools-1.21.3.data/scripts/check_flink_services_health.py +203 -0
- paasta_tools-1.21.3.data/scripts/check_kubernetes_api.py +57 -0
- paasta_tools-1.21.3.data/scripts/check_kubernetes_services_replication.py +141 -0
- paasta_tools-1.21.3.data/scripts/check_manual_oapi_changes.sh +24 -0
- paasta_tools-1.21.3.data/scripts/check_oom_events.py +244 -0
- paasta_tools-1.21.3.data/scripts/check_orphans.py +306 -0
- paasta_tools-1.21.3.data/scripts/check_spark_jobs.py +234 -0
- paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_cr.py +138 -0
- paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_crd.py +145 -0
- paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_jobs.py +344 -0
- paasta_tools-1.21.3.data/scripts/create_dynamodb_table.py +35 -0
- paasta_tools-1.21.3.data/scripts/create_paasta_playground.py +105 -0
- paasta_tools-1.21.3.data/scripts/delete_kubernetes_deployments.py +89 -0
- paasta_tools-1.21.3.data/scripts/emit_allocated_cpu_metrics.py +50 -0
- paasta_tools-1.21.3.data/scripts/generate_all_deployments +9 -0
- paasta_tools-1.21.3.data/scripts/generate_authenticating_services.py +94 -0
- paasta_tools-1.21.3.data/scripts/generate_deployments_for_service.py +255 -0
- paasta_tools-1.21.3.data/scripts/generate_services_file.py +114 -0
- paasta_tools-1.21.3.data/scripts/generate_services_yaml.py +30 -0
- paasta_tools-1.21.3.data/scripts/get_running_task_allocation.py +346 -0
- paasta_tools-1.21.3.data/scripts/habitat_fixer.py +86 -0
- paasta_tools-1.21.3.data/scripts/ide_helper.py +316 -0
- paasta_tools-1.21.3.data/scripts/is_pod_healthy_in_proxy.py +139 -0
- paasta_tools-1.21.3.data/scripts/is_pod_healthy_in_smartstack.py +50 -0
- paasta_tools-1.21.3.data/scripts/kill_bad_containers.py +109 -0
- paasta_tools-1.21.3.data/scripts/kubernetes_remove_evicted_pods.py +164 -0
- paasta_tools-1.21.3.data/scripts/mass-deploy-tag.sh +44 -0
- paasta_tools-1.21.3.data/scripts/mock_patch_checker.py +86 -0
- paasta_tools-1.21.3.data/scripts/paasta_cleanup_remote_run_resources.py +135 -0
- paasta_tools-1.21.3.data/scripts/paasta_cleanup_stale_nodes.py +181 -0
- paasta_tools-1.21.3.data/scripts/paasta_deploy_tron_jobs +3 -0
- paasta_tools-1.21.3.data/scripts/paasta_execute_docker_command.py +123 -0
- paasta_tools-1.21.3.data/scripts/paasta_secrets_sync.py +758 -0
- paasta_tools-1.21.3.data/scripts/paasta_tabcomplete.sh +23 -0
- paasta_tools-1.21.3.data/scripts/paasta_update_soa_memcpu.py +520 -0
- paasta_tools-1.21.3.data/scripts/render_template.py +129 -0
- paasta_tools-1.21.3.data/scripts/rightsizer_soaconfigs_update.py +348 -0
- paasta_tools-1.21.3.data/scripts/service_shard_remove.py +157 -0
- paasta_tools-1.21.3.data/scripts/service_shard_update.py +373 -0
- paasta_tools-1.21.3.data/scripts/setup_istio_mesh.py +353 -0
- paasta_tools-1.21.3.data/scripts/setup_kubernetes_cr.py +412 -0
- paasta_tools-1.21.3.data/scripts/setup_kubernetes_crd.py +138 -0
- paasta_tools-1.21.3.data/scripts/setup_kubernetes_internal_crd.py +154 -0
- paasta_tools-1.21.3.data/scripts/setup_kubernetes_job.py +353 -0
- paasta_tools-1.21.3.data/scripts/setup_prometheus_adapter_config.py +1028 -0
- paasta_tools-1.21.3.data/scripts/shared_ip_check.py +77 -0
- paasta_tools-1.21.3.data/scripts/synapse_srv_namespaces_fact.py +42 -0
- paasta_tools-1.21.3.data/scripts/timeouts_metrics_prom.py +64 -0
- paasta_tools-1.21.3.dist-info/LICENSE +201 -0
- paasta_tools-1.21.3.dist-info/METADATA +74 -0
- paasta_tools-1.21.3.dist-info/RECORD +348 -0
- paasta_tools-1.21.3.dist-info/WHEEL +5 -0
- paasta_tools-1.21.3.dist-info/entry_points.txt +20 -0
- paasta_tools-1.21.3.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,1012 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# Copyright 2015-2016 Yelp Inc.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
import functools
|
|
16
|
+
import json
|
|
17
|
+
import os
|
|
18
|
+
import pkgutil
|
|
19
|
+
import re
|
|
20
|
+
from collections import Counter
|
|
21
|
+
from datetime import datetime
|
|
22
|
+
from functools import lru_cache
|
|
23
|
+
from functools import partial
|
|
24
|
+
from glob import glob
|
|
25
|
+
from typing import Any
|
|
26
|
+
from typing import Callable
|
|
27
|
+
from typing import cast
|
|
28
|
+
from typing import Dict
|
|
29
|
+
from typing import List
|
|
30
|
+
from typing import Optional
|
|
31
|
+
from typing import Set
|
|
32
|
+
from typing import Tuple
|
|
33
|
+
from typing import Union
|
|
34
|
+
|
|
35
|
+
import pytz
|
|
36
|
+
from croniter import croniter
|
|
37
|
+
from jsonschema import Draft4Validator
|
|
38
|
+
from jsonschema import exceptions
|
|
39
|
+
from jsonschema import FormatChecker
|
|
40
|
+
from jsonschema import RefResolver
|
|
41
|
+
from jsonschema import ValidationError
|
|
42
|
+
from mypy_extensions import TypedDict
|
|
43
|
+
from ruamel.yaml import SafeConstructor
|
|
44
|
+
from ruamel.yaml import YAML
|
|
45
|
+
from ruamel.yaml.comments import CommentedMap
|
|
46
|
+
|
|
47
|
+
from paasta_tools import yaml_tools as yaml
|
|
48
|
+
from paasta_tools.autoscaling.utils import MetricsProviderDict
|
|
49
|
+
from paasta_tools.cli.utils import failure
|
|
50
|
+
from paasta_tools.cli.utils import get_file_contents
|
|
51
|
+
from paasta_tools.cli.utils import get_instance_config
|
|
52
|
+
from paasta_tools.cli.utils import guess_service_name
|
|
53
|
+
from paasta_tools.cli.utils import info_message
|
|
54
|
+
from paasta_tools.cli.utils import lazy_choices_completer
|
|
55
|
+
from paasta_tools.cli.utils import PaastaColors
|
|
56
|
+
from paasta_tools.cli.utils import success
|
|
57
|
+
from paasta_tools.kubernetes_tools import sanitise_kubernetes_name
|
|
58
|
+
from paasta_tools.long_running_service_tools import DEFAULT_AUTOSCALING_SETPOINT
|
|
59
|
+
from paasta_tools.long_running_service_tools import LongRunningServiceConfig
|
|
60
|
+
from paasta_tools.long_running_service_tools import METRICS_PROVIDER_ACTIVE_REQUESTS
|
|
61
|
+
from paasta_tools.long_running_service_tools import METRICS_PROVIDER_CPU
|
|
62
|
+
from paasta_tools.long_running_service_tools import METRICS_PROVIDER_GUNICORN
|
|
63
|
+
from paasta_tools.long_running_service_tools import METRICS_PROVIDER_PISCINA
|
|
64
|
+
from paasta_tools.long_running_service_tools import METRICS_PROVIDER_PROMQL
|
|
65
|
+
from paasta_tools.long_running_service_tools import METRICS_PROVIDER_UWSGI
|
|
66
|
+
from paasta_tools.long_running_service_tools import METRICS_PROVIDER_UWSGI_V2
|
|
67
|
+
from paasta_tools.secret_tools import get_secret_name_from_ref
|
|
68
|
+
from paasta_tools.secret_tools import is_secret_ref
|
|
69
|
+
from paasta_tools.secret_tools import is_shared_secret
|
|
70
|
+
from paasta_tools.tron_tools import DEFAULT_TZ
|
|
71
|
+
from paasta_tools.tron_tools import list_tron_clusters
|
|
72
|
+
from paasta_tools.tron_tools import load_tron_service_config
|
|
73
|
+
from paasta_tools.tron_tools import TronJobConfig
|
|
74
|
+
from paasta_tools.tron_tools import validate_complete_config
|
|
75
|
+
from paasta_tools.utils import get_service_instance_list
|
|
76
|
+
from paasta_tools.utils import InstanceConfig
|
|
77
|
+
from paasta_tools.utils import InstanceConfigDict
|
|
78
|
+
from paasta_tools.utils import list_all_instances_for_service
|
|
79
|
+
from paasta_tools.utils import list_clusters
|
|
80
|
+
from paasta_tools.utils import list_services
|
|
81
|
+
from paasta_tools.utils import load_system_paasta_config
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class SoaValidationError(Exception):
|
|
85
|
+
pass
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class AutoscalingValidationError(SoaValidationError):
|
|
89
|
+
pass
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
SCHEMA_VALID = success("Successfully validated schema")
|
|
93
|
+
|
|
94
|
+
SCHEMA_ERROR = failure(
|
|
95
|
+
"Failed to load schema.",
|
|
96
|
+
"http://paasta.readthedocs.io/en/latest/yelpsoa_configs.html",
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
SCHEMA_INVALID = failure(
|
|
100
|
+
"Failed to validate schema. More info:",
|
|
101
|
+
"http://paasta.readthedocs.io/en/latest/yelpsoa_configs.html",
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
FAILED_READING_FILE = failure(
|
|
105
|
+
"Failed to read file. More info:",
|
|
106
|
+
"http://paasta.readthedocs.io/en/latest/yelpsoa_configs.html",
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
UNKNOWN_SERVICE = (
|
|
110
|
+
"Unable to determine service to validate.\n"
|
|
111
|
+
"Please supply the %s name you wish to "
|
|
112
|
+
"validate with the %s option."
|
|
113
|
+
% (PaastaColors.cyan("SERVICE"), PaastaColors.cyan("-s"))
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
SCHEMA_TYPES = {
|
|
117
|
+
"service", # service metadata
|
|
118
|
+
"adhoc",
|
|
119
|
+
"kubernetes", # long-running services
|
|
120
|
+
"rollback", # automatic rollbacks during deployments
|
|
121
|
+
"tron", # batch workloads
|
|
122
|
+
"eks", # eks workloads
|
|
123
|
+
"autotuned_defaults/kubernetes",
|
|
124
|
+
"autotuned_defaults/cassandracluster",
|
|
125
|
+
}
|
|
126
|
+
# we expect a comment that looks like # override-cpu-setting PROJ-1234
|
|
127
|
+
# but we don't have a $ anchor in case users want to add an additional
|
|
128
|
+
# comment
|
|
129
|
+
OVERRIDE_CPU_AUTOTUNE_ACK_PATTERN = r"#\s*override-cpu-setting\s+\(.+[A-Z]+-[0-9]+.+\)"
|
|
130
|
+
|
|
131
|
+
# we expect a comment that looks like # override-cpu-burst PROJ-1234
|
|
132
|
+
# but we don't have a $ anchor in case users want to add an additional
|
|
133
|
+
# comment
|
|
134
|
+
OVERRIDE_CPU_BURST_ACK_PATTERN = r"#\s*override-cpu-burst\s+\(.+[A-Z]+-[0-9]+.+\)"
|
|
135
|
+
# for now, double the autotune cap to give people the benefit of the doubt
|
|
136
|
+
# if we see that people are still misusing this configuration, we can lower
|
|
137
|
+
# this to the autotune cap (i.e., 1)
|
|
138
|
+
CPU_BURST_THRESHOLD = 2
|
|
139
|
+
|
|
140
|
+
K8S_TYPES = {"eks", "kubernetes"}
|
|
141
|
+
|
|
142
|
+
INVALID_AUTOSCALING_FIELDS = {
|
|
143
|
+
# setpoint isn't included here because we need to confirm that setpoint = 0.8
|
|
144
|
+
# (since it's auto-added at parse-time)
|
|
145
|
+
METRICS_PROVIDER_ACTIVE_REQUESTS: {"prometheus-adapter-config"},
|
|
146
|
+
METRICS_PROVIDER_CPU: {
|
|
147
|
+
"desired_active_requests_per_replica",
|
|
148
|
+
"prometheus-adapter-config",
|
|
149
|
+
},
|
|
150
|
+
METRICS_PROVIDER_GUNICORN: {
|
|
151
|
+
"desired_active_requests_per_replica",
|
|
152
|
+
"prometheus-adapter-config",
|
|
153
|
+
},
|
|
154
|
+
METRICS_PROVIDER_PISCINA: {
|
|
155
|
+
"desired_active_requests_per_replica",
|
|
156
|
+
"prometheus-adapter-config",
|
|
157
|
+
},
|
|
158
|
+
METRICS_PROVIDER_UWSGI: {
|
|
159
|
+
"desired_active_requests_per_replica",
|
|
160
|
+
"prometheus-adapter-config",
|
|
161
|
+
},
|
|
162
|
+
METRICS_PROVIDER_UWSGI_V2: {
|
|
163
|
+
"desired_active_requests_per_replica",
|
|
164
|
+
"prometheus-adapter-config",
|
|
165
|
+
},
|
|
166
|
+
METRICS_PROVIDER_PROMQL: {"desired_active_requests_per_replica"},
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
class ConditionConfig(TypedDict, total=False):
|
|
171
|
+
"""
|
|
172
|
+
Common config options for all Conditions
|
|
173
|
+
"""
|
|
174
|
+
|
|
175
|
+
# for now, this is the only key required by the schema
|
|
176
|
+
query: str
|
|
177
|
+
# and only one of these needs to be present (enforced in code, not schema)
|
|
178
|
+
upper_bound: Optional[Union[int, float]]
|
|
179
|
+
lower_bound: Optional[Union[int, float]]
|
|
180
|
+
|
|
181
|
+
# truly optional
|
|
182
|
+
dry_run: bool
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
@functools.lru_cache()
|
|
186
|
+
def load_all_instance_configs_for_service(
|
|
187
|
+
service: str, cluster: str, soa_dir: str
|
|
188
|
+
) -> Tuple[Tuple[str, InstanceConfig], ...]:
|
|
189
|
+
ret = []
|
|
190
|
+
for instance in list_all_instances_for_service(
|
|
191
|
+
service=service, clusters=[cluster], soa_dir=soa_dir
|
|
192
|
+
):
|
|
193
|
+
instance_config = get_instance_config(
|
|
194
|
+
service=service,
|
|
195
|
+
instance=instance,
|
|
196
|
+
cluster=cluster,
|
|
197
|
+
load_deployments=False,
|
|
198
|
+
soa_dir=soa_dir,
|
|
199
|
+
)
|
|
200
|
+
ret.append((instance, instance_config))
|
|
201
|
+
|
|
202
|
+
return tuple(ret)
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def invalid_tron_namespace(cluster, output, filename):
|
|
206
|
+
return failure(
|
|
207
|
+
"%s is invalid:\n %s\n " "More info:" % (filename, output),
|
|
208
|
+
"http://tron.readthedocs.io/en/latest/jobs.html",
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def valid_tron_namespace(cluster, filename):
|
|
213
|
+
return success(f"{filename} is valid.")
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def duplicate_instance_names_message(service, cluster, instance_names):
|
|
217
|
+
instance_name_list = "\n\t".join(instance_names)
|
|
218
|
+
message = (
|
|
219
|
+
f"Service {service} uses the following duplicate instance names for "
|
|
220
|
+
f"cluster {cluster}:\n\t{instance_name_list}\n"
|
|
221
|
+
)
|
|
222
|
+
return failure(
|
|
223
|
+
message, "https://paasta.readthedocs.io/en/latest/yelpsoa_configs.html"
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def no_duplicate_instance_names_message(service, cluster):
|
|
228
|
+
return success(f"All {service}'s instance names in cluster {cluster} are unique")
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def get_schema_validator(file_type: str) -> Draft4Validator:
|
|
232
|
+
"""Get the correct schema to use for validation
|
|
233
|
+
|
|
234
|
+
:param file_type: what schema type should we validate against
|
|
235
|
+
"""
|
|
236
|
+
schema_path = f"schemas/{file_type}_schema.json"
|
|
237
|
+
autoscaling_path = "schemas/autoscaling_schema.json"
|
|
238
|
+
schema = pkgutil.get_data("paasta_tools.cli", schema_path).decode()
|
|
239
|
+
autoscaling_ref = pkgutil.get_data("paasta_tools.cli", autoscaling_path).decode()
|
|
240
|
+
|
|
241
|
+
# This bit of code loads the base schemas and any relevant "referenced" schemas
|
|
242
|
+
# into a shared "store" -- so that you can reference the shared schema without
|
|
243
|
+
# having to find the exact right path on disk in your schema file. If you want
|
|
244
|
+
# to reference one schema from another, you still have to include a
|
|
245
|
+
# {"$ref": "<schema_id>#field"} section in your JsonSchema
|
|
246
|
+
#
|
|
247
|
+
# (see https://python-jsonschema.readthedocs.io/en/v2.6.0/references/ and this
|
|
248
|
+
# stack overflow answer https://stackoverflow.com/a/65150457 for details)
|
|
249
|
+
#
|
|
250
|
+
# Also note that this functionality has changed significantly in modern versions
|
|
251
|
+
# of python-jsonschema, so if we ever update we'll need to do some work here.
|
|
252
|
+
base_schema = json.loads(schema)
|
|
253
|
+
autoscaling_schema = json.loads(autoscaling_ref)
|
|
254
|
+
store = {
|
|
255
|
+
"base": base_schema,
|
|
256
|
+
autoscaling_schema["$id"]: json.loads(autoscaling_ref),
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
resolver = RefResolver.from_schema(base_schema, store=store)
|
|
260
|
+
return Draft4Validator(
|
|
261
|
+
json.loads(schema),
|
|
262
|
+
resolver=resolver,
|
|
263
|
+
format_checker=FormatChecker(),
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def validate_rollback_bounds(
|
|
268
|
+
config: Dict[str, List[ConditionConfig]], file_loc: str
|
|
269
|
+
) -> bool:
|
|
270
|
+
"""
|
|
271
|
+
Ensure that at least one of upper_bound or lower_bound is set (and set to non-null values)
|
|
272
|
+
"""
|
|
273
|
+
errors = []
|
|
274
|
+
|
|
275
|
+
for source, queries in config.items():
|
|
276
|
+
for query in queries:
|
|
277
|
+
if not any(
|
|
278
|
+
(
|
|
279
|
+
query.get("lower_bound"),
|
|
280
|
+
query.get("upper_bound"),
|
|
281
|
+
),
|
|
282
|
+
):
|
|
283
|
+
errors.append(
|
|
284
|
+
f"{file_loc}:{source}: {query['query']} needs one of lower_bound OR upper_bound set."
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
for error in errors:
|
|
288
|
+
print(
|
|
289
|
+
failure(error, link=""), # TODO: point to actual docs once they exist
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
return len(errors) == 0
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def validate_instance_names(config_file_object, file_path):
|
|
296
|
+
errors = []
|
|
297
|
+
for instance_name in config_file_object:
|
|
298
|
+
if (
|
|
299
|
+
not instance_name.startswith("_")
|
|
300
|
+
and len(sanitise_kubernetes_name(instance_name)) > 63
|
|
301
|
+
):
|
|
302
|
+
errors.append(instance_name)
|
|
303
|
+
if errors:
|
|
304
|
+
error_string = "\n".join(errors)
|
|
305
|
+
print(
|
|
306
|
+
failure(
|
|
307
|
+
f"Length of instance name \n{error_string}\n should be no more than 63."
|
|
308
|
+
+ " Note _ is replaced with -- due to Kubernetes restriction",
|
|
309
|
+
"http://paasta.readthedocs.io/en/latest/yelpsoa_configs.html",
|
|
310
|
+
)
|
|
311
|
+
)
|
|
312
|
+
return len(errors) == 0
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def validate_service_name(service):
|
|
316
|
+
if len(sanitise_kubernetes_name(service)) > 63:
|
|
317
|
+
print(
|
|
318
|
+
failure(
|
|
319
|
+
f"Length of service name {service} should be no more than 63."
|
|
320
|
+
+ " Note _ is replaced with - due to Kubernetes restriction",
|
|
321
|
+
"http://paasta.readthedocs.io/en/latest/yelpsoa_configs.html",
|
|
322
|
+
)
|
|
323
|
+
)
|
|
324
|
+
return False
|
|
325
|
+
return True
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
@lru_cache()
|
|
329
|
+
def get_config_file_dict(file_path: str, use_ruamel: bool = False) -> Dict[Any, Any]:
|
|
330
|
+
basename = os.path.basename(file_path)
|
|
331
|
+
extension = os.path.splitext(basename)[1]
|
|
332
|
+
try:
|
|
333
|
+
config_file = get_file_contents(file_path)
|
|
334
|
+
if extension == ".yaml":
|
|
335
|
+
if use_ruamel:
|
|
336
|
+
ruamel_loader = YAML(typ="rt")
|
|
337
|
+
# there are templates that define keys that are later overwritten
|
|
338
|
+
# when those templates are actually used (e.g., a template that
|
|
339
|
+
# sets disk: 100 -> an instance uses that template and overwrites
|
|
340
|
+
# it with disk: 1000)
|
|
341
|
+
ruamel_loader.allow_duplicate_keys = True
|
|
342
|
+
# we want to actually expand out all anchors so that we still get
|
|
343
|
+
# comments from the original block
|
|
344
|
+
ruamel_loader.Constructor.flatten_mapping = (
|
|
345
|
+
SafeConstructor.flatten_mapping
|
|
346
|
+
)
|
|
347
|
+
return ruamel_loader.load(config_file)
|
|
348
|
+
else:
|
|
349
|
+
return yaml.safe_load(config_file)
|
|
350
|
+
elif extension == ".json":
|
|
351
|
+
return json.loads(config_file)
|
|
352
|
+
else:
|
|
353
|
+
return config_file
|
|
354
|
+
except Exception:
|
|
355
|
+
print(f"{FAILED_READING_FILE}: {file_path}")
|
|
356
|
+
raise
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
def validate_schema(file_path: str, file_type: str) -> bool:
|
|
360
|
+
"""Check if the specified config file has a valid schema
|
|
361
|
+
|
|
362
|
+
:param file_path: path to file to validate
|
|
363
|
+
:param file_type: what schema type should we validate against
|
|
364
|
+
"""
|
|
365
|
+
try:
|
|
366
|
+
validator = get_schema_validator(file_type)
|
|
367
|
+
except Exception as e:
|
|
368
|
+
print(f"{SCHEMA_ERROR}: {file_type}, error: {e!r}")
|
|
369
|
+
return False
|
|
370
|
+
|
|
371
|
+
basename = os.path.basename(file_path)
|
|
372
|
+
config_file_object = get_config_file_dict(file_path)
|
|
373
|
+
try:
|
|
374
|
+
validator.validate(config_file_object)
|
|
375
|
+
if file_type in K8S_TYPES and not validate_instance_names(
|
|
376
|
+
config_file_object, file_path
|
|
377
|
+
):
|
|
378
|
+
return False
|
|
379
|
+
|
|
380
|
+
if file_type == "rollback" and not validate_rollback_bounds(
|
|
381
|
+
config_file_object["conditions"],
|
|
382
|
+
file_path,
|
|
383
|
+
):
|
|
384
|
+
return False
|
|
385
|
+
|
|
386
|
+
except ValidationError:
|
|
387
|
+
print(f"{SCHEMA_INVALID}: {file_path}")
|
|
388
|
+
|
|
389
|
+
errors = validator.iter_errors(config_file_object)
|
|
390
|
+
print(" Validation Message: %s" % exceptions.best_match(errors).message)
|
|
391
|
+
return False
|
|
392
|
+
except Exception as e:
|
|
393
|
+
print(f"{SCHEMA_ERROR}: {file_type}, error: {e!r}")
|
|
394
|
+
return False
|
|
395
|
+
else:
|
|
396
|
+
print(f"{SCHEMA_VALID}: {basename}")
|
|
397
|
+
return True
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
def validate_all_schemas(service_path: str) -> bool:
|
|
401
|
+
"""Finds all recognized config files in service directory,
|
|
402
|
+
and validates their schema.
|
|
403
|
+
|
|
404
|
+
:param service_path: path to location of configuration files
|
|
405
|
+
"""
|
|
406
|
+
|
|
407
|
+
path = os.path.join(service_path, "**/*.yaml")
|
|
408
|
+
|
|
409
|
+
returncode = True
|
|
410
|
+
for file_name in glob(path, recursive=True):
|
|
411
|
+
if os.path.islink(file_name):
|
|
412
|
+
continue
|
|
413
|
+
|
|
414
|
+
filename_without_service_path = os.path.relpath(file_name, start=service_path)
|
|
415
|
+
for file_type in SCHEMA_TYPES:
|
|
416
|
+
if filename_without_service_path.startswith(file_type):
|
|
417
|
+
if not validate_schema(file_name, file_type):
|
|
418
|
+
returncode = False
|
|
419
|
+
return returncode
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
def add_subparser(subparsers):
|
|
423
|
+
validate_parser = subparsers.add_parser(
|
|
424
|
+
"validate",
|
|
425
|
+
description="Execute 'paasta validate' from service repo root",
|
|
426
|
+
help="Validate that all paasta config files in pwd are correct",
|
|
427
|
+
)
|
|
428
|
+
validate_parser.add_argument(
|
|
429
|
+
"-s",
|
|
430
|
+
"--service",
|
|
431
|
+
required=False,
|
|
432
|
+
help="Service that you want to validate. Like 'example_service'.",
|
|
433
|
+
).completer = lazy_choices_completer(list_services)
|
|
434
|
+
validate_parser.add_argument(
|
|
435
|
+
"-v",
|
|
436
|
+
"--verbose",
|
|
437
|
+
action="store_true",
|
|
438
|
+
required=False,
|
|
439
|
+
help="Toggle to display additional validation messages for humans.",
|
|
440
|
+
)
|
|
441
|
+
validate_parser.add_argument(
|
|
442
|
+
"-y",
|
|
443
|
+
"--yelpsoa-config-root",
|
|
444
|
+
dest="yelpsoa_config_root",
|
|
445
|
+
default=os.getcwd(),
|
|
446
|
+
required=False,
|
|
447
|
+
help="Path to root of yelpsoa-configs checkout",
|
|
448
|
+
)
|
|
449
|
+
validate_parser.set_defaults(command=paasta_validate)
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
def check_service_path(service_path):
|
|
453
|
+
"""Check that the specified path exists and has yaml files
|
|
454
|
+
|
|
455
|
+
:param service_path: Path to directory that should contain yaml files
|
|
456
|
+
"""
|
|
457
|
+
if not service_path or not os.path.isdir(service_path):
|
|
458
|
+
print(
|
|
459
|
+
failure(
|
|
460
|
+
"%s is not a directory" % service_path,
|
|
461
|
+
"http://paasta.readthedocs.io/en/latest/yelpsoa_configs.html",
|
|
462
|
+
)
|
|
463
|
+
)
|
|
464
|
+
return False
|
|
465
|
+
if not glob(os.path.join(service_path, "*.yaml")):
|
|
466
|
+
print(
|
|
467
|
+
failure(
|
|
468
|
+
"%s does not contain any .yaml files" % service_path,
|
|
469
|
+
"http://paasta.readthedocs.io/en/latest/yelpsoa_configs.html",
|
|
470
|
+
)
|
|
471
|
+
)
|
|
472
|
+
return False
|
|
473
|
+
return True
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
def get_service_path(service, soa_dir):
|
|
477
|
+
"""Determine the path of the directory containing the conf files
|
|
478
|
+
|
|
479
|
+
:param service: Name of service
|
|
480
|
+
:param soa_dir: Directory containing soa configs for all services
|
|
481
|
+
"""
|
|
482
|
+
if service:
|
|
483
|
+
service_path = os.path.join(soa_dir, service)
|
|
484
|
+
else:
|
|
485
|
+
if soa_dir == os.getcwd():
|
|
486
|
+
service_path = os.getcwd()
|
|
487
|
+
else:
|
|
488
|
+
print(UNKNOWN_SERVICE)
|
|
489
|
+
return None
|
|
490
|
+
return service_path
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
def path_to_soa_dir_service(service_path):
|
|
494
|
+
"""Split a service_path into its soa_dir and service name components"""
|
|
495
|
+
soa_dir = os.path.dirname(service_path)
|
|
496
|
+
service = os.path.basename(service_path)
|
|
497
|
+
return soa_dir, service
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
def validate_tron(service_path: str, verbose: bool = False) -> bool:
|
|
501
|
+
soa_dir, service = path_to_soa_dir_service(service_path)
|
|
502
|
+
returncode = True
|
|
503
|
+
|
|
504
|
+
for cluster in list_tron_clusters(service, soa_dir):
|
|
505
|
+
if not validate_tron_namespace(service, cluster, soa_dir):
|
|
506
|
+
returncode = False
|
|
507
|
+
elif verbose:
|
|
508
|
+
# service config has been validated and cron schedules should be safe to parse
|
|
509
|
+
|
|
510
|
+
# TODO(TRON-1761): unify tron/paasta validate cron syntax validation
|
|
511
|
+
service_config = load_tron_service_config(
|
|
512
|
+
service=service, cluster=cluster, soa_dir=soa_dir
|
|
513
|
+
)
|
|
514
|
+
for config in service_config:
|
|
515
|
+
cron_expression = config.get_cron_expression()
|
|
516
|
+
if cron_expression:
|
|
517
|
+
print_upcoming_runs(config, cron_expression)
|
|
518
|
+
|
|
519
|
+
return returncode
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
def print_upcoming_runs(config: TronJobConfig, cron_expression: str) -> None:
|
|
523
|
+
print(info_message(f"Upcoming runs for {config.get_name()}:"))
|
|
524
|
+
|
|
525
|
+
config_tz = config.get_time_zone() or DEFAULT_TZ
|
|
526
|
+
|
|
527
|
+
next_cron_runs = list_upcoming_runs(
|
|
528
|
+
cron_schedule=cron_expression,
|
|
529
|
+
starting_from=pytz.timezone(config_tz).localize(datetime.today()),
|
|
530
|
+
)
|
|
531
|
+
|
|
532
|
+
for run in next_cron_runs:
|
|
533
|
+
print(f"\t{run}")
|
|
534
|
+
|
|
535
|
+
|
|
536
|
+
def validate_tron_namespace(service, cluster, soa_dir, tron_dir=False):
|
|
537
|
+
if tron_dir:
|
|
538
|
+
display_name = f"{cluster}/{service}.yaml"
|
|
539
|
+
else:
|
|
540
|
+
display_name = f"tron-{cluster}.yaml"
|
|
541
|
+
|
|
542
|
+
messages = validate_complete_config(service, cluster, soa_dir)
|
|
543
|
+
returncode = len(messages) == 0
|
|
544
|
+
|
|
545
|
+
if messages:
|
|
546
|
+
print(invalid_tron_namespace(cluster, "\n ".join(messages), display_name))
|
|
547
|
+
else:
|
|
548
|
+
print(valid_tron_namespace(cluster, display_name))
|
|
549
|
+
|
|
550
|
+
return returncode
|
|
551
|
+
|
|
552
|
+
|
|
553
|
+
def validate_paasta_objects(service_path):
|
|
554
|
+
soa_dir, service = path_to_soa_dir_service(service_path)
|
|
555
|
+
|
|
556
|
+
returncode = True
|
|
557
|
+
messages = []
|
|
558
|
+
for cluster in list_clusters(service, soa_dir):
|
|
559
|
+
for instance, instance_config in load_all_instance_configs_for_service(
|
|
560
|
+
service=service, cluster=cluster, soa_dir=soa_dir
|
|
561
|
+
):
|
|
562
|
+
messages.extend(instance_config.validate())
|
|
563
|
+
returncode = len(messages) == 0
|
|
564
|
+
|
|
565
|
+
if messages:
|
|
566
|
+
errors = "\n".join(messages)
|
|
567
|
+
print(failure((f"There were failures validating {service}: {errors}"), ""))
|
|
568
|
+
else:
|
|
569
|
+
print(success(f"All PaaSTA Instances for are valid for all clusters"))
|
|
570
|
+
|
|
571
|
+
return returncode
|
|
572
|
+
|
|
573
|
+
|
|
574
|
+
def validate_unique_instance_names(service_path):
|
|
575
|
+
"""Check that the service does not use the same instance name more than once"""
|
|
576
|
+
soa_dir, service = path_to_soa_dir_service(service_path)
|
|
577
|
+
check_passed = True
|
|
578
|
+
|
|
579
|
+
for cluster in list_clusters(service, soa_dir):
|
|
580
|
+
service_instances = get_service_instance_list(
|
|
581
|
+
service=service, cluster=cluster, soa_dir=soa_dir
|
|
582
|
+
)
|
|
583
|
+
instance_names = [service_instance[1] for service_instance in service_instances]
|
|
584
|
+
instance_name_to_count = Counter(instance_names)
|
|
585
|
+
duplicate_instance_names = [
|
|
586
|
+
instance_name
|
|
587
|
+
for instance_name, count in instance_name_to_count.items()
|
|
588
|
+
if count > 1
|
|
589
|
+
]
|
|
590
|
+
if duplicate_instance_names:
|
|
591
|
+
check_passed = False
|
|
592
|
+
print(
|
|
593
|
+
duplicate_instance_names_message(
|
|
594
|
+
service, cluster, duplicate_instance_names
|
|
595
|
+
)
|
|
596
|
+
)
|
|
597
|
+
else:
|
|
598
|
+
print(no_duplicate_instance_names_message(service, cluster))
|
|
599
|
+
|
|
600
|
+
return check_passed
|
|
601
|
+
|
|
602
|
+
|
|
603
|
+
def _get_comments_for_key(data: CommentedMap, key: Any) -> Optional[str]:
|
|
604
|
+
# this is a little weird, but ruamel is returning a list that looks like:
|
|
605
|
+
# [None, None, CommentToken(...), None] for some reason instead of just a
|
|
606
|
+
# single string
|
|
607
|
+
# Sometimes ruamel returns a recursive list of CommentTokens as well that looks like
|
|
608
|
+
# [None, None, [CommentToken(...),CommentToken(...),None], CommentToken(...), None]
|
|
609
|
+
def _flatten_comments(comments):
|
|
610
|
+
for comment in comments:
|
|
611
|
+
if comment is None:
|
|
612
|
+
continue
|
|
613
|
+
if isinstance(comment, list):
|
|
614
|
+
yield from _flatten_comments(comment)
|
|
615
|
+
else:
|
|
616
|
+
yield comment.value
|
|
617
|
+
|
|
618
|
+
raw_comments = [*_flatten_comments(data.ca.items.get(key, []))]
|
|
619
|
+
if not raw_comments:
|
|
620
|
+
# return None so that we don't return an empty string below if there really aren't
|
|
621
|
+
# any comments
|
|
622
|
+
return None
|
|
623
|
+
# joining all comments together before returning them
|
|
624
|
+
comment = "".join(raw_comments)
|
|
625
|
+
return comment
|
|
626
|
+
|
|
627
|
+
|
|
628
|
+
def __is_templated(service: str, soa_dir: str, cluster: str, workload: str) -> bool:
|
|
629
|
+
return os.path.exists(
|
|
630
|
+
os.path.join(os.path.abspath(soa_dir), service, f"{workload}-{cluster}.in")
|
|
631
|
+
)
|
|
632
|
+
|
|
633
|
+
|
|
634
|
+
def _validate_active_requests_autoscaling_configs(
|
|
635
|
+
instance_config: LongRunningServiceConfig,
|
|
636
|
+
metrics_provider_config: MetricsProviderDict,
|
|
637
|
+
) -> None:
|
|
638
|
+
if len(instance_config.get_registrations()) > 1:
|
|
639
|
+
raise AutoscalingValidationError(
|
|
640
|
+
"active-requests metrics provider doesn't support instances with multiple registrations"
|
|
641
|
+
)
|
|
642
|
+
# This is a bit incorrect, since the default autoscaling setpoint is 0.8,
|
|
643
|
+
# someone could theoretically bypass this by setting setpoint: 0.8 in their
|
|
644
|
+
# soaconfigs; but I think it's approximately fine for now
|
|
645
|
+
if (
|
|
646
|
+
metrics_provider_config.get("setpoint", DEFAULT_AUTOSCALING_SETPOINT)
|
|
647
|
+
!= DEFAULT_AUTOSCALING_SETPOINT
|
|
648
|
+
):
|
|
649
|
+
raise AutoscalingValidationError(
|
|
650
|
+
"setpoint is not supported for active-requests; use desired_active_requests_per_replica instead"
|
|
651
|
+
)
|
|
652
|
+
|
|
653
|
+
|
|
654
|
+
def _validate_arbitrary_promql_autoscaling_configs(
|
|
655
|
+
metrics_provider_config: MetricsProviderDict,
|
|
656
|
+
) -> None:
|
|
657
|
+
# This is a bit incorrect, since the default autoscaling setpoint is 0.8,
|
|
658
|
+
# someone could theoretically bypass this by setting setpoint: 0.8 in their
|
|
659
|
+
# soaconfigs; but I think it's approximately fine for now
|
|
660
|
+
if (
|
|
661
|
+
metrics_provider_config.get("setpoint", DEFAULT_AUTOSCALING_SETPOINT)
|
|
662
|
+
!= DEFAULT_AUTOSCALING_SETPOINT
|
|
663
|
+
):
|
|
664
|
+
raise AutoscalingValidationError(
|
|
665
|
+
"setpoint is not supported for arbitrary PromQL"
|
|
666
|
+
)
|
|
667
|
+
if not metrics_provider_config.get("prometheus_adapter_config"):
|
|
668
|
+
raise AutoscalingValidationError(
|
|
669
|
+
"arbitrary promql metrics provider requires prometheus_adapter_config to be set"
|
|
670
|
+
)
|
|
671
|
+
|
|
672
|
+
|
|
673
|
+
def _validate_autoscaling_config(metrics_provider_config: MetricsProviderDict) -> None:
|
|
674
|
+
metrics_provider_type = metrics_provider_config["type"]
|
|
675
|
+
for field in INVALID_AUTOSCALING_FIELDS[metrics_provider_type]:
|
|
676
|
+
if field in metrics_provider_config:
|
|
677
|
+
raise AutoscalingValidationError(
|
|
678
|
+
f"metric provider {metrics_provider_type} does not support {field}"
|
|
679
|
+
)
|
|
680
|
+
|
|
681
|
+
|
|
682
|
+
def validate_autoscaling_configs(service_path: str) -> bool:
|
|
683
|
+
"""Validate new autoscaling configurations that are not validated by jsonschema for the service of interest.
|
|
684
|
+
|
|
685
|
+
:param service_path: Path to directory containing soa conf yaml files for service
|
|
686
|
+
"""
|
|
687
|
+
soa_dir, service = path_to_soa_dir_service(service_path)
|
|
688
|
+
returncode = True
|
|
689
|
+
link = ""
|
|
690
|
+
skip_cpu_override_validation_list = (
|
|
691
|
+
load_system_paasta_config().get_skip_cpu_override_validation_services()
|
|
692
|
+
)
|
|
693
|
+
|
|
694
|
+
for cluster in list_clusters(service, soa_dir):
|
|
695
|
+
for instance, instance_config in load_all_instance_configs_for_service(
|
|
696
|
+
service=service, cluster=cluster, soa_dir=soa_dir
|
|
697
|
+
):
|
|
698
|
+
if instance_config.get_instance_type() not in K8S_TYPES:
|
|
699
|
+
continue
|
|
700
|
+
|
|
701
|
+
instance_config = cast(LongRunningServiceConfig, instance_config)
|
|
702
|
+
if (
|
|
703
|
+
# instance_config is an `InstanceConfig` object, which doesn't have an `is_autoscaling_enabled()`
|
|
704
|
+
# method, but by asserting that the type is in K8S_TYPES, we know we're dealing with either
|
|
705
|
+
# a KubernetesDeploymentConfig or an EksDeploymentConfig, so the cast is safe.
|
|
706
|
+
instance_config.is_autoscaling_enabled()
|
|
707
|
+
# we should eventually make the python templates add the override comment
|
|
708
|
+
# to the correspoding YAML line, but until then we just opt these out of that validation
|
|
709
|
+
and __is_templated(
|
|
710
|
+
service,
|
|
711
|
+
soa_dir,
|
|
712
|
+
cluster,
|
|
713
|
+
workload=instance_config.get_instance_type(),
|
|
714
|
+
)
|
|
715
|
+
is False
|
|
716
|
+
):
|
|
717
|
+
autoscaling_params = instance_config.get_autoscaling_params()
|
|
718
|
+
should_skip_cpu_override_validation = (
|
|
719
|
+
service in skip_cpu_override_validation_list
|
|
720
|
+
)
|
|
721
|
+
seen_provider_types: Set[str] = set()
|
|
722
|
+
configured_provider_count = len(autoscaling_params["metrics_providers"])
|
|
723
|
+
|
|
724
|
+
for metrics_provider in autoscaling_params["metrics_providers"]:
|
|
725
|
+
try:
|
|
726
|
+
# Generic validation of the config
|
|
727
|
+
_validate_autoscaling_config(metrics_provider)
|
|
728
|
+
|
|
729
|
+
# Multi-metrics specific validation:
|
|
730
|
+
# 1. Bespoke policies cannot use multi-metrics scaling
|
|
731
|
+
# 2. Can't set the same metrics provider multiple times
|
|
732
|
+
if (
|
|
733
|
+
metrics_provider.get("decision_policy") == "bespoke"
|
|
734
|
+
and configured_provider_count > 1
|
|
735
|
+
):
|
|
736
|
+
raise AutoscalingValidationError(
|
|
737
|
+
f"cannot use bespoke autoscaling with HPA autoscaling"
|
|
738
|
+
)
|
|
739
|
+
if metrics_provider["type"] in seen_provider_types:
|
|
740
|
+
raise AutoscalingValidationError(
|
|
741
|
+
f"cannot set the same metrics provider multiple times: {metrics_provider['type']}"
|
|
742
|
+
)
|
|
743
|
+
seen_provider_types.add(metrics_provider["type"])
|
|
744
|
+
|
|
745
|
+
# Metrics-provider specific validations
|
|
746
|
+
if metrics_provider["type"] == METRICS_PROVIDER_ACTIVE_REQUESTS:
|
|
747
|
+
_validate_active_requests_autoscaling_configs(
|
|
748
|
+
instance_config, metrics_provider
|
|
749
|
+
)
|
|
750
|
+
|
|
751
|
+
elif metrics_provider["type"] == METRICS_PROVIDER_PROMQL:
|
|
752
|
+
_validate_arbitrary_promql_autoscaling_configs(
|
|
753
|
+
metrics_provider
|
|
754
|
+
)
|
|
755
|
+
|
|
756
|
+
elif (
|
|
757
|
+
metrics_provider["type"] == METRICS_PROVIDER_CPU
|
|
758
|
+
# to enable kew autoscaling we just set a decision policy of "bespoke", but
|
|
759
|
+
# the metrics_provider is (confusingly) left as "cpu"
|
|
760
|
+
and metrics_provider.get("decision_policy") != "bespoke"
|
|
761
|
+
and not should_skip_cpu_override_validation
|
|
762
|
+
):
|
|
763
|
+
# Do some extra validation below: we don't abstract that into the above function
|
|
764
|
+
# call because it needs a lot of extra information
|
|
765
|
+
|
|
766
|
+
# we need access to the comments, so we need to read the config with ruamel to be able
|
|
767
|
+
# to actually get them in a "nice" automated fashion
|
|
768
|
+
config = get_config_file_dict(
|
|
769
|
+
os.path.join(
|
|
770
|
+
soa_dir,
|
|
771
|
+
service,
|
|
772
|
+
f"{instance_config.get_instance_type()}-{cluster}.yaml",
|
|
773
|
+
),
|
|
774
|
+
use_ruamel=True,
|
|
775
|
+
)
|
|
776
|
+
if config[instance].get("cpus") is None:
|
|
777
|
+
# If we're using multiple scaling metrics and one of them is CPU, we must
|
|
778
|
+
# opt out of CPU autotuning
|
|
779
|
+
if configured_provider_count > 1:
|
|
780
|
+
link = "y/override-cpu-autotune"
|
|
781
|
+
raise AutoscalingValidationError(
|
|
782
|
+
"using CPU-based scaling with multiple scaling metrics requires explicit "
|
|
783
|
+
"'cpus' setting; see the following link for more info:"
|
|
784
|
+
)
|
|
785
|
+
# cpu autoscaled, but using autotuned values - can skip
|
|
786
|
+
continue
|
|
787
|
+
|
|
788
|
+
cpu_comment = _get_comments_for_key(
|
|
789
|
+
data=config[instance], key="cpus"
|
|
790
|
+
)
|
|
791
|
+
# we could probably have a separate error message if there's a comment that doesn't match
|
|
792
|
+
# the ack pattern, but that seems like overkill - especially for something that could cause
|
|
793
|
+
# a DAR if people aren't being careful.
|
|
794
|
+
if (
|
|
795
|
+
cpu_comment is None
|
|
796
|
+
or re.search(
|
|
797
|
+
pattern=OVERRIDE_CPU_AUTOTUNE_ACK_PATTERN,
|
|
798
|
+
string=cpu_comment,
|
|
799
|
+
)
|
|
800
|
+
is None
|
|
801
|
+
):
|
|
802
|
+
link = "y/override-cpu-autotune"
|
|
803
|
+
raise AutoscalingValidationError(
|
|
804
|
+
f"CPU override detected for a CPU-autoscaled instance; "
|
|
805
|
+
"see the following link for next steps:"
|
|
806
|
+
)
|
|
807
|
+
except AutoscalingValidationError as e:
|
|
808
|
+
returncode = False
|
|
809
|
+
print(
|
|
810
|
+
failure(
|
|
811
|
+
msg=f"Autoscaling validation failed for {service}.{instance} in {cluster}: {str(e)}",
|
|
812
|
+
link=link,
|
|
813
|
+
)
|
|
814
|
+
)
|
|
815
|
+
|
|
816
|
+
return returncode
|
|
817
|
+
|
|
818
|
+
|
|
819
|
+
def validate_min_max_instances(service_path):
|
|
820
|
+
soa_dir, service = path_to_soa_dir_service(service_path)
|
|
821
|
+
returncode = True
|
|
822
|
+
|
|
823
|
+
for cluster in list_clusters(service, soa_dir):
|
|
824
|
+
for instance, instance_config in load_all_instance_configs_for_service(
|
|
825
|
+
service=service, cluster=cluster, soa_dir=soa_dir
|
|
826
|
+
):
|
|
827
|
+
if instance_config.get_instance_type() != "tron":
|
|
828
|
+
min_instances = instance_config.get_min_instances()
|
|
829
|
+
max_instances = instance_config.get_max_instances()
|
|
830
|
+
if min_instances is not None and max_instances is not None:
|
|
831
|
+
if max_instances < min_instances:
|
|
832
|
+
returncode = False
|
|
833
|
+
print(
|
|
834
|
+
failure(
|
|
835
|
+
f"Instance {instance} on cluster {cluster} has a greater number of min_instances than max_instances."
|
|
836
|
+
+ f"The number of min_instances ({min_instances}) cannot be greater than the max_instances ({max_instances}).",
|
|
837
|
+
"",
|
|
838
|
+
)
|
|
839
|
+
)
|
|
840
|
+
|
|
841
|
+
return returncode
|
|
842
|
+
|
|
843
|
+
|
|
844
|
+
def check_secrets_for_instance(
|
|
845
|
+
instance_config_dict: InstanceConfigDict, soa_dir: str, service: str, vault_env: str
|
|
846
|
+
) -> bool:
|
|
847
|
+
return_value = True
|
|
848
|
+
# If the service: directive is used, look for the secret there, rather than where the instance config is defined.
|
|
849
|
+
service_containing_secret = instance_config_dict.get("service", service)
|
|
850
|
+
for env_value in instance_config_dict.get("env", {}).values():
|
|
851
|
+
if is_secret_ref(env_value):
|
|
852
|
+
secret_name = get_secret_name_from_ref(env_value)
|
|
853
|
+
if is_shared_secret(env_value):
|
|
854
|
+
secret_file_name = f"{soa_dir}/_shared/secrets/{secret_name}.json"
|
|
855
|
+
else:
|
|
856
|
+
secret_file_name = (
|
|
857
|
+
f"{soa_dir}/{service_containing_secret}/secrets/{secret_name}.json"
|
|
858
|
+
)
|
|
859
|
+
if os.path.isfile(secret_file_name):
|
|
860
|
+
secret_json = get_config_file_dict(secret_file_name)
|
|
861
|
+
if "ciphertext" not in secret_json["environments"].get(vault_env, {}):
|
|
862
|
+
print(
|
|
863
|
+
failure(
|
|
864
|
+
f"Secret {secret_name} not defined for ecosystem {vault_env} on secret file {secret_file_name}",
|
|
865
|
+
"",
|
|
866
|
+
)
|
|
867
|
+
)
|
|
868
|
+
return_value = False
|
|
869
|
+
else:
|
|
870
|
+
print(failure(f"Secret file {secret_file_name} not defined", ""))
|
|
871
|
+
return_value = False
|
|
872
|
+
return return_value
|
|
873
|
+
|
|
874
|
+
|
|
875
|
+
def list_upcoming_runs(
|
|
876
|
+
cron_schedule: str, starting_from: datetime, num_runs: int = 5
|
|
877
|
+
) -> List[str]:
|
|
878
|
+
iter = croniter(cron_schedule, starting_from)
|
|
879
|
+
return [iter.get_next(datetime) for _ in range(num_runs)]
|
|
880
|
+
|
|
881
|
+
|
|
882
|
+
def validate_secrets(service_path):
|
|
883
|
+
soa_dir, service = path_to_soa_dir_service(service_path)
|
|
884
|
+
system_paasta_config = load_system_paasta_config()
|
|
885
|
+
vault_cluster_map = system_paasta_config.get_vault_cluster_config()
|
|
886
|
+
return_value = True
|
|
887
|
+
for cluster in list_clusters(service, soa_dir):
|
|
888
|
+
vault_env = vault_cluster_map.get(cluster)
|
|
889
|
+
if not vault_env:
|
|
890
|
+
print(failure(f"{cluster} not found on vault_cluster_map", ""))
|
|
891
|
+
return_value = False
|
|
892
|
+
continue
|
|
893
|
+
|
|
894
|
+
for instance, instance_config in load_all_instance_configs_for_service(
|
|
895
|
+
service=service, cluster=cluster, soa_dir=soa_dir
|
|
896
|
+
):
|
|
897
|
+
if not check_secrets_for_instance(
|
|
898
|
+
instance_config.config_dict, soa_dir, service, vault_env
|
|
899
|
+
):
|
|
900
|
+
return_value = False
|
|
901
|
+
if return_value:
|
|
902
|
+
print(success("No orphan secrets found"))
|
|
903
|
+
return return_value
|
|
904
|
+
|
|
905
|
+
|
|
906
|
+
def validate_cpu_burst(service_path: str) -> bool:
|
|
907
|
+
soa_dir, service = path_to_soa_dir_service(service_path)
|
|
908
|
+
skip_cpu_burst_validation_list = (
|
|
909
|
+
load_system_paasta_config().get_skip_cpu_burst_validation_services()
|
|
910
|
+
)
|
|
911
|
+
|
|
912
|
+
returncode = True
|
|
913
|
+
for cluster in list_clusters(service, soa_dir):
|
|
914
|
+
if __is_templated(
|
|
915
|
+
service, soa_dir, cluster, workload="kubernetes"
|
|
916
|
+
) or __is_templated(service, soa_dir, cluster, workload="eks"):
|
|
917
|
+
# we should eventually make the python templates add the override comment
|
|
918
|
+
# to the correspoding YAML line, but until then we just opt these out of that validation
|
|
919
|
+
continue
|
|
920
|
+
for instance, instance_config in load_all_instance_configs_for_service(
|
|
921
|
+
service=service, cluster=cluster, soa_dir=soa_dir
|
|
922
|
+
):
|
|
923
|
+
is_k8s_service = (
|
|
924
|
+
instance_config.get_instance_type() == "kubernetes"
|
|
925
|
+
or instance_config.get_instance_type() == "eks"
|
|
926
|
+
)
|
|
927
|
+
should_skip_cpu_burst_validation = service in skip_cpu_burst_validation_list
|
|
928
|
+
if is_k8s_service and not should_skip_cpu_burst_validation:
|
|
929
|
+
# we need access to the comments, so we need to read the config with ruamel to be able
|
|
930
|
+
# to actually get them in a "nice" automated fashion
|
|
931
|
+
config = get_config_file_dict(
|
|
932
|
+
os.path.join(
|
|
933
|
+
soa_dir,
|
|
934
|
+
service,
|
|
935
|
+
f"{instance_config.get_instance_type()}-{cluster}.yaml",
|
|
936
|
+
),
|
|
937
|
+
use_ruamel=True,
|
|
938
|
+
)
|
|
939
|
+
|
|
940
|
+
if config[instance].get("cpu_burst_add") is None:
|
|
941
|
+
# using autotuned values - can skip
|
|
942
|
+
continue
|
|
943
|
+
if config[instance]["cpu_burst_add"] <= CPU_BURST_THRESHOLD:
|
|
944
|
+
# under the threshold - can also skip
|
|
945
|
+
continue
|
|
946
|
+
|
|
947
|
+
burst_comment = _get_comments_for_key(
|
|
948
|
+
data=config[instance], key="cpu_burst_add"
|
|
949
|
+
)
|
|
950
|
+
# we could probably have a separate error message if there's a comment that doesn't match
|
|
951
|
+
# the ack pattern, but that seems like overkill - especially for something that could cause
|
|
952
|
+
# a DAR if people aren't being careful.
|
|
953
|
+
if (
|
|
954
|
+
burst_comment is None
|
|
955
|
+
or re.search(
|
|
956
|
+
pattern=OVERRIDE_CPU_BURST_ACK_PATTERN,
|
|
957
|
+
string=burst_comment,
|
|
958
|
+
)
|
|
959
|
+
is None
|
|
960
|
+
):
|
|
961
|
+
returncode = False
|
|
962
|
+
print(
|
|
963
|
+
failure(
|
|
964
|
+
msg=f"Potentially excessive CPU burst (cpu_burst_add: {config[instance]['cpu_burst_add']} "
|
|
965
|
+
f"higher than current threshold of {CPU_BURST_THRESHOLD} cores) detected in {cluster}: {service}.{instance}."
|
|
966
|
+
" Please read the following link for next steps:",
|
|
967
|
+
link="y/high-cpu-burst",
|
|
968
|
+
)
|
|
969
|
+
)
|
|
970
|
+
|
|
971
|
+
return returncode
|
|
972
|
+
|
|
973
|
+
|
|
974
|
+
def paasta_validate_soa_configs(
|
|
975
|
+
service: str, service_path: str, verbose: bool = False
|
|
976
|
+
) -> bool:
|
|
977
|
+
"""Analyze the service in service_path to determine if the conf files are valid
|
|
978
|
+
|
|
979
|
+
:param service_path: Path to directory containing soa conf yaml files for service
|
|
980
|
+
"""
|
|
981
|
+
if not check_service_path(service_path):
|
|
982
|
+
return False
|
|
983
|
+
|
|
984
|
+
if not validate_service_name(service):
|
|
985
|
+
return False
|
|
986
|
+
|
|
987
|
+
checks: List[Callable[[str], bool]] = [
|
|
988
|
+
validate_all_schemas,
|
|
989
|
+
partial(validate_tron, verbose=verbose),
|
|
990
|
+
validate_paasta_objects,
|
|
991
|
+
validate_unique_instance_names,
|
|
992
|
+
validate_autoscaling_configs,
|
|
993
|
+
validate_secrets,
|
|
994
|
+
validate_min_max_instances,
|
|
995
|
+
validate_cpu_burst,
|
|
996
|
+
]
|
|
997
|
+
|
|
998
|
+
# NOTE: we're explicitly passing a list comprehension to all()
|
|
999
|
+
# instead of a generator expression so that we run all checks
|
|
1000
|
+
# no matter what
|
|
1001
|
+
return all([check(service_path) for check in checks])
|
|
1002
|
+
|
|
1003
|
+
|
|
1004
|
+
def paasta_validate(args):
|
|
1005
|
+
"""Generate a service_path from the provided args and call paasta_validate_soa_configs
|
|
1006
|
+
|
|
1007
|
+
:param args: argparse.Namespace obj created from sys.args by cli
|
|
1008
|
+
"""
|
|
1009
|
+
service_path = get_service_path(args.service, args.yelpsoa_config_root)
|
|
1010
|
+
service = args.service or guess_service_name()
|
|
1011
|
+
if not paasta_validate_soa_configs(service, service_path, args.verbose):
|
|
1012
|
+
return 1
|