paasta-tools 1.21.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- k8s_itests/__init__.py +0 -0
- k8s_itests/test_autoscaling.py +23 -0
- k8s_itests/utils.py +38 -0
- paasta_tools/__init__.py +20 -0
- paasta_tools/adhoc_tools.py +142 -0
- paasta_tools/api/__init__.py +13 -0
- paasta_tools/api/api.py +330 -0
- paasta_tools/api/api_docs/swagger.json +2323 -0
- paasta_tools/api/client.py +106 -0
- paasta_tools/api/settings.py +33 -0
- paasta_tools/api/tweens/__init__.py +6 -0
- paasta_tools/api/tweens/auth.py +125 -0
- paasta_tools/api/tweens/profiling.py +108 -0
- paasta_tools/api/tweens/request_logger.py +124 -0
- paasta_tools/api/views/__init__.py +13 -0
- paasta_tools/api/views/autoscaler.py +100 -0
- paasta_tools/api/views/exception.py +45 -0
- paasta_tools/api/views/flink.py +73 -0
- paasta_tools/api/views/instance.py +395 -0
- paasta_tools/api/views/pause_autoscaler.py +71 -0
- paasta_tools/api/views/remote_run.py +113 -0
- paasta_tools/api/views/resources.py +76 -0
- paasta_tools/api/views/service.py +35 -0
- paasta_tools/api/views/version.py +25 -0
- paasta_tools/apply_external_resources.py +79 -0
- paasta_tools/async_utils.py +109 -0
- paasta_tools/autoscaling/__init__.py +0 -0
- paasta_tools/autoscaling/autoscaling_service_lib.py +57 -0
- paasta_tools/autoscaling/forecasting.py +106 -0
- paasta_tools/autoscaling/max_all_k8s_services.py +41 -0
- paasta_tools/autoscaling/pause_service_autoscaler.py +77 -0
- paasta_tools/autoscaling/utils.py +52 -0
- paasta_tools/bounce_lib.py +184 -0
- paasta_tools/broadcast_log_to_services.py +62 -0
- paasta_tools/cassandracluster_tools.py +210 -0
- paasta_tools/check_autoscaler_max_instances.py +212 -0
- paasta_tools/check_cassandracluster_services_replication.py +35 -0
- paasta_tools/check_flink_services_health.py +203 -0
- paasta_tools/check_kubernetes_api.py +57 -0
- paasta_tools/check_kubernetes_services_replication.py +141 -0
- paasta_tools/check_oom_events.py +244 -0
- paasta_tools/check_services_replication_tools.py +324 -0
- paasta_tools/check_spark_jobs.py +234 -0
- paasta_tools/cleanup_kubernetes_cr.py +138 -0
- paasta_tools/cleanup_kubernetes_crd.py +145 -0
- paasta_tools/cleanup_kubernetes_jobs.py +344 -0
- paasta_tools/cleanup_tron_namespaces.py +96 -0
- paasta_tools/cli/__init__.py +13 -0
- paasta_tools/cli/authentication.py +85 -0
- paasta_tools/cli/cli.py +260 -0
- paasta_tools/cli/cmds/__init__.py +13 -0
- paasta_tools/cli/cmds/autoscale.py +143 -0
- paasta_tools/cli/cmds/check.py +334 -0
- paasta_tools/cli/cmds/cook_image.py +147 -0
- paasta_tools/cli/cmds/get_docker_image.py +76 -0
- paasta_tools/cli/cmds/get_image_version.py +172 -0
- paasta_tools/cli/cmds/get_latest_deployment.py +93 -0
- paasta_tools/cli/cmds/info.py +155 -0
- paasta_tools/cli/cmds/itest.py +117 -0
- paasta_tools/cli/cmds/list.py +66 -0
- paasta_tools/cli/cmds/list_clusters.py +42 -0
- paasta_tools/cli/cmds/list_deploy_queue.py +171 -0
- paasta_tools/cli/cmds/list_namespaces.py +84 -0
- paasta_tools/cli/cmds/local_run.py +1396 -0
- paasta_tools/cli/cmds/logs.py +1601 -0
- paasta_tools/cli/cmds/mark_for_deployment.py +1988 -0
- paasta_tools/cli/cmds/mesh_status.py +174 -0
- paasta_tools/cli/cmds/pause_service_autoscaler.py +107 -0
- paasta_tools/cli/cmds/push_to_registry.py +275 -0
- paasta_tools/cli/cmds/remote_run.py +252 -0
- paasta_tools/cli/cmds/rollback.py +347 -0
- paasta_tools/cli/cmds/secret.py +549 -0
- paasta_tools/cli/cmds/security_check.py +59 -0
- paasta_tools/cli/cmds/spark_run.py +1400 -0
- paasta_tools/cli/cmds/start_stop_restart.py +401 -0
- paasta_tools/cli/cmds/status.py +2302 -0
- paasta_tools/cli/cmds/validate.py +1012 -0
- paasta_tools/cli/cmds/wait_for_deployment.py +275 -0
- paasta_tools/cli/fsm/__init__.py +13 -0
- paasta_tools/cli/fsm/autosuggest.py +82 -0
- paasta_tools/cli/fsm/template/README.md +8 -0
- paasta_tools/cli/fsm/template/cookiecutter.json +7 -0
- paasta_tools/cli/fsm/template/{{cookiecutter.service}}/kubernetes-PROD.yaml +91 -0
- paasta_tools/cli/fsm/template/{{cookiecutter.service}}/monitoring.yaml +20 -0
- paasta_tools/cli/fsm/template/{{cookiecutter.service}}/service.yaml +8 -0
- paasta_tools/cli/fsm/template/{{cookiecutter.service}}/smartstack.yaml +6 -0
- paasta_tools/cli/fsm_cmd.py +121 -0
- paasta_tools/cli/paasta_tabcomplete.sh +23 -0
- paasta_tools/cli/schemas/adhoc_schema.json +199 -0
- paasta_tools/cli/schemas/autoscaling_schema.json +91 -0
- paasta_tools/cli/schemas/autotuned_defaults/cassandracluster_schema.json +37 -0
- paasta_tools/cli/schemas/autotuned_defaults/kubernetes_schema.json +89 -0
- paasta_tools/cli/schemas/deploy_schema.json +173 -0
- paasta_tools/cli/schemas/eks_schema.json +970 -0
- paasta_tools/cli/schemas/kubernetes_schema.json +970 -0
- paasta_tools/cli/schemas/rollback_schema.json +160 -0
- paasta_tools/cli/schemas/service_schema.json +25 -0
- paasta_tools/cli/schemas/smartstack_schema.json +322 -0
- paasta_tools/cli/schemas/tron_schema.json +699 -0
- paasta_tools/cli/utils.py +1118 -0
- paasta_tools/clusterman.py +21 -0
- paasta_tools/config_utils.py +385 -0
- paasta_tools/contrib/__init__.py +0 -0
- paasta_tools/contrib/bounce_log_latency_parser.py +68 -0
- paasta_tools/contrib/check_manual_oapi_changes.sh +24 -0
- paasta_tools/contrib/check_orphans.py +306 -0
- paasta_tools/contrib/create_dynamodb_table.py +35 -0
- paasta_tools/contrib/create_paasta_playground.py +105 -0
- paasta_tools/contrib/emit_allocated_cpu_metrics.py +50 -0
- paasta_tools/contrib/get_running_task_allocation.py +346 -0
- paasta_tools/contrib/habitat_fixer.py +86 -0
- paasta_tools/contrib/ide_helper.py +316 -0
- paasta_tools/contrib/is_pod_healthy_in_proxy.py +139 -0
- paasta_tools/contrib/is_pod_healthy_in_smartstack.py +50 -0
- paasta_tools/contrib/kill_bad_containers.py +109 -0
- paasta_tools/contrib/mass-deploy-tag.sh +44 -0
- paasta_tools/contrib/mock_patch_checker.py +86 -0
- paasta_tools/contrib/paasta_update_soa_memcpu.py +520 -0
- paasta_tools/contrib/render_template.py +129 -0
- paasta_tools/contrib/rightsizer_soaconfigs_update.py +348 -0
- paasta_tools/contrib/service_shard_remove.py +157 -0
- paasta_tools/contrib/service_shard_update.py +373 -0
- paasta_tools/contrib/shared_ip_check.py +77 -0
- paasta_tools/contrib/timeouts_metrics_prom.py +64 -0
- paasta_tools/delete_kubernetes_deployments.py +89 -0
- paasta_tools/deployment_utils.py +44 -0
- paasta_tools/docker_wrapper.py +234 -0
- paasta_tools/docker_wrapper_imports.py +13 -0
- paasta_tools/drain_lib.py +351 -0
- paasta_tools/dump_locally_running_services.py +71 -0
- paasta_tools/eks_tools.py +119 -0
- paasta_tools/envoy_tools.py +373 -0
- paasta_tools/firewall.py +504 -0
- paasta_tools/firewall_logging.py +154 -0
- paasta_tools/firewall_update.py +172 -0
- paasta_tools/flink_tools.py +345 -0
- paasta_tools/flinkeks_tools.py +90 -0
- paasta_tools/frameworks/__init__.py +0 -0
- paasta_tools/frameworks/adhoc_scheduler.py +71 -0
- paasta_tools/frameworks/constraints.py +87 -0
- paasta_tools/frameworks/native_scheduler.py +652 -0
- paasta_tools/frameworks/native_service_config.py +301 -0
- paasta_tools/frameworks/task_store.py +245 -0
- paasta_tools/generate_all_deployments +9 -0
- paasta_tools/generate_authenticating_services.py +94 -0
- paasta_tools/generate_deployments_for_service.py +255 -0
- paasta_tools/generate_services_file.py +114 -0
- paasta_tools/generate_services_yaml.py +30 -0
- paasta_tools/hacheck.py +76 -0
- paasta_tools/instance/__init__.py +0 -0
- paasta_tools/instance/hpa_metrics_parser.py +122 -0
- paasta_tools/instance/kubernetes.py +1362 -0
- paasta_tools/iptables.py +240 -0
- paasta_tools/kafkacluster_tools.py +143 -0
- paasta_tools/kubernetes/__init__.py +0 -0
- paasta_tools/kubernetes/application/__init__.py +0 -0
- paasta_tools/kubernetes/application/controller_wrappers.py +476 -0
- paasta_tools/kubernetes/application/tools.py +90 -0
- paasta_tools/kubernetes/bin/__init__.py +0 -0
- paasta_tools/kubernetes/bin/kubernetes_remove_evicted_pods.py +164 -0
- paasta_tools/kubernetes/bin/paasta_cleanup_remote_run_resources.py +135 -0
- paasta_tools/kubernetes/bin/paasta_cleanup_stale_nodes.py +181 -0
- paasta_tools/kubernetes/bin/paasta_secrets_sync.py +758 -0
- paasta_tools/kubernetes/remote_run.py +558 -0
- paasta_tools/kubernetes_tools.py +4679 -0
- paasta_tools/list_kubernetes_service_instances.py +128 -0
- paasta_tools/list_tron_namespaces.py +60 -0
- paasta_tools/long_running_service_tools.py +678 -0
- paasta_tools/mac_address.py +44 -0
- paasta_tools/marathon_dashboard.py +0 -0
- paasta_tools/mesos/__init__.py +0 -0
- paasta_tools/mesos/cfg.py +46 -0
- paasta_tools/mesos/cluster.py +60 -0
- paasta_tools/mesos/exceptions.py +59 -0
- paasta_tools/mesos/framework.py +77 -0
- paasta_tools/mesos/log.py +48 -0
- paasta_tools/mesos/master.py +306 -0
- paasta_tools/mesos/mesos_file.py +169 -0
- paasta_tools/mesos/parallel.py +52 -0
- paasta_tools/mesos/slave.py +115 -0
- paasta_tools/mesos/task.py +94 -0
- paasta_tools/mesos/util.py +69 -0
- paasta_tools/mesos/zookeeper.py +37 -0
- paasta_tools/mesos_maintenance.py +848 -0
- paasta_tools/mesos_tools.py +1051 -0
- paasta_tools/metrics/__init__.py +0 -0
- paasta_tools/metrics/metastatus_lib.py +1110 -0
- paasta_tools/metrics/metrics_lib.py +217 -0
- paasta_tools/monitoring/__init__.py +13 -0
- paasta_tools/monitoring/check_k8s_api_performance.py +110 -0
- paasta_tools/monitoring_tools.py +652 -0
- paasta_tools/monkrelaycluster_tools.py +146 -0
- paasta_tools/nrtsearchservice_tools.py +143 -0
- paasta_tools/nrtsearchserviceeks_tools.py +68 -0
- paasta_tools/oom_logger.py +321 -0
- paasta_tools/paasta_deploy_tron_jobs +3 -0
- paasta_tools/paasta_execute_docker_command.py +123 -0
- paasta_tools/paasta_native_serviceinit.py +21 -0
- paasta_tools/paasta_service_config_loader.py +201 -0
- paasta_tools/paastaapi/__init__.py +29 -0
- paasta_tools/paastaapi/api/__init__.py +3 -0
- paasta_tools/paastaapi/api/autoscaler_api.py +302 -0
- paasta_tools/paastaapi/api/default_api.py +569 -0
- paasta_tools/paastaapi/api/remote_run_api.py +604 -0
- paasta_tools/paastaapi/api/resources_api.py +157 -0
- paasta_tools/paastaapi/api/service_api.py +1736 -0
- paasta_tools/paastaapi/api_client.py +818 -0
- paasta_tools/paastaapi/apis/__init__.py +22 -0
- paasta_tools/paastaapi/configuration.py +455 -0
- paasta_tools/paastaapi/exceptions.py +137 -0
- paasta_tools/paastaapi/model/__init__.py +5 -0
- paasta_tools/paastaapi/model/adhoc_launch_history.py +176 -0
- paasta_tools/paastaapi/model/autoscaler_count_msg.py +176 -0
- paasta_tools/paastaapi/model/deploy_queue.py +178 -0
- paasta_tools/paastaapi/model/deploy_queue_service_instance.py +194 -0
- paasta_tools/paastaapi/model/envoy_backend.py +185 -0
- paasta_tools/paastaapi/model/envoy_location.py +184 -0
- paasta_tools/paastaapi/model/envoy_status.py +181 -0
- paasta_tools/paastaapi/model/flink_cluster_overview.py +188 -0
- paasta_tools/paastaapi/model/flink_config.py +173 -0
- paasta_tools/paastaapi/model/flink_job.py +186 -0
- paasta_tools/paastaapi/model/flink_job_details.py +192 -0
- paasta_tools/paastaapi/model/flink_jobs.py +175 -0
- paasta_tools/paastaapi/model/float_and_error.py +173 -0
- paasta_tools/paastaapi/model/hpa_metric.py +176 -0
- paasta_tools/paastaapi/model/inline_object.py +170 -0
- paasta_tools/paastaapi/model/inline_response200.py +170 -0
- paasta_tools/paastaapi/model/inline_response2001.py +170 -0
- paasta_tools/paastaapi/model/instance_bounce_status.py +200 -0
- paasta_tools/paastaapi/model/instance_mesh_status.py +186 -0
- paasta_tools/paastaapi/model/instance_status.py +220 -0
- paasta_tools/paastaapi/model/instance_status_adhoc.py +187 -0
- paasta_tools/paastaapi/model/instance_status_cassandracluster.py +173 -0
- paasta_tools/paastaapi/model/instance_status_flink.py +173 -0
- paasta_tools/paastaapi/model/instance_status_kafkacluster.py +173 -0
- paasta_tools/paastaapi/model/instance_status_kubernetes.py +263 -0
- paasta_tools/paastaapi/model/instance_status_kubernetes_autoscaling_status.py +187 -0
- paasta_tools/paastaapi/model/instance_status_kubernetes_v2.py +197 -0
- paasta_tools/paastaapi/model/instance_status_tron.py +204 -0
- paasta_tools/paastaapi/model/instance_tasks.py +182 -0
- paasta_tools/paastaapi/model/integer_and_error.py +173 -0
- paasta_tools/paastaapi/model/kubernetes_container.py +178 -0
- paasta_tools/paastaapi/model/kubernetes_container_v2.py +219 -0
- paasta_tools/paastaapi/model/kubernetes_healthcheck.py +176 -0
- paasta_tools/paastaapi/model/kubernetes_pod.py +201 -0
- paasta_tools/paastaapi/model/kubernetes_pod_event.py +176 -0
- paasta_tools/paastaapi/model/kubernetes_pod_v2.py +213 -0
- paasta_tools/paastaapi/model/kubernetes_replica_set.py +185 -0
- paasta_tools/paastaapi/model/kubernetes_version.py +202 -0
- paasta_tools/paastaapi/model/remote_run_outcome.py +189 -0
- paasta_tools/paastaapi/model/remote_run_start.py +185 -0
- paasta_tools/paastaapi/model/remote_run_stop.py +176 -0
- paasta_tools/paastaapi/model/remote_run_token.py +173 -0
- paasta_tools/paastaapi/model/resource.py +187 -0
- paasta_tools/paastaapi/model/resource_item.py +187 -0
- paasta_tools/paastaapi/model/resource_value.py +176 -0
- paasta_tools/paastaapi/model/smartstack_backend.py +191 -0
- paasta_tools/paastaapi/model/smartstack_location.py +181 -0
- paasta_tools/paastaapi/model/smartstack_status.py +181 -0
- paasta_tools/paastaapi/model/task_tail_lines.py +176 -0
- paasta_tools/paastaapi/model_utils.py +1879 -0
- paasta_tools/paastaapi/models/__init__.py +62 -0
- paasta_tools/paastaapi/rest.py +287 -0
- paasta_tools/prune_completed_pods.py +220 -0
- paasta_tools/puppet_service_tools.py +59 -0
- paasta_tools/py.typed +1 -0
- paasta_tools/remote_git.py +127 -0
- paasta_tools/run-paasta-api-in-dev-mode.py +57 -0
- paasta_tools/run-paasta-api-playground.py +51 -0
- paasta_tools/secret_providers/__init__.py +66 -0
- paasta_tools/secret_providers/vault.py +214 -0
- paasta_tools/secret_tools.py +277 -0
- paasta_tools/setup_istio_mesh.py +353 -0
- paasta_tools/setup_kubernetes_cr.py +412 -0
- paasta_tools/setup_kubernetes_crd.py +138 -0
- paasta_tools/setup_kubernetes_internal_crd.py +154 -0
- paasta_tools/setup_kubernetes_job.py +353 -0
- paasta_tools/setup_prometheus_adapter_config.py +1028 -0
- paasta_tools/setup_tron_namespace.py +248 -0
- paasta_tools/slack.py +75 -0
- paasta_tools/smartstack_tools.py +676 -0
- paasta_tools/spark_tools.py +283 -0
- paasta_tools/synapse_srv_namespaces_fact.py +42 -0
- paasta_tools/tron/__init__.py +0 -0
- paasta_tools/tron/client.py +158 -0
- paasta_tools/tron/tron_command_context.py +194 -0
- paasta_tools/tron/tron_timeutils.py +101 -0
- paasta_tools/tron_tools.py +1448 -0
- paasta_tools/utils.py +4307 -0
- paasta_tools/yaml_tools.py +44 -0
- paasta_tools-1.21.3.data/scripts/apply_external_resources.py +79 -0
- paasta_tools-1.21.3.data/scripts/bounce_log_latency_parser.py +68 -0
- paasta_tools-1.21.3.data/scripts/check_autoscaler_max_instances.py +212 -0
- paasta_tools-1.21.3.data/scripts/check_cassandracluster_services_replication.py +35 -0
- paasta_tools-1.21.3.data/scripts/check_flink_services_health.py +203 -0
- paasta_tools-1.21.3.data/scripts/check_kubernetes_api.py +57 -0
- paasta_tools-1.21.3.data/scripts/check_kubernetes_services_replication.py +141 -0
- paasta_tools-1.21.3.data/scripts/check_manual_oapi_changes.sh +24 -0
- paasta_tools-1.21.3.data/scripts/check_oom_events.py +244 -0
- paasta_tools-1.21.3.data/scripts/check_orphans.py +306 -0
- paasta_tools-1.21.3.data/scripts/check_spark_jobs.py +234 -0
- paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_cr.py +138 -0
- paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_crd.py +145 -0
- paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_jobs.py +344 -0
- paasta_tools-1.21.3.data/scripts/create_dynamodb_table.py +35 -0
- paasta_tools-1.21.3.data/scripts/create_paasta_playground.py +105 -0
- paasta_tools-1.21.3.data/scripts/delete_kubernetes_deployments.py +89 -0
- paasta_tools-1.21.3.data/scripts/emit_allocated_cpu_metrics.py +50 -0
- paasta_tools-1.21.3.data/scripts/generate_all_deployments +9 -0
- paasta_tools-1.21.3.data/scripts/generate_authenticating_services.py +94 -0
- paasta_tools-1.21.3.data/scripts/generate_deployments_for_service.py +255 -0
- paasta_tools-1.21.3.data/scripts/generate_services_file.py +114 -0
- paasta_tools-1.21.3.data/scripts/generate_services_yaml.py +30 -0
- paasta_tools-1.21.3.data/scripts/get_running_task_allocation.py +346 -0
- paasta_tools-1.21.3.data/scripts/habitat_fixer.py +86 -0
- paasta_tools-1.21.3.data/scripts/ide_helper.py +316 -0
- paasta_tools-1.21.3.data/scripts/is_pod_healthy_in_proxy.py +139 -0
- paasta_tools-1.21.3.data/scripts/is_pod_healthy_in_smartstack.py +50 -0
- paasta_tools-1.21.3.data/scripts/kill_bad_containers.py +109 -0
- paasta_tools-1.21.3.data/scripts/kubernetes_remove_evicted_pods.py +164 -0
- paasta_tools-1.21.3.data/scripts/mass-deploy-tag.sh +44 -0
- paasta_tools-1.21.3.data/scripts/mock_patch_checker.py +86 -0
- paasta_tools-1.21.3.data/scripts/paasta_cleanup_remote_run_resources.py +135 -0
- paasta_tools-1.21.3.data/scripts/paasta_cleanup_stale_nodes.py +181 -0
- paasta_tools-1.21.3.data/scripts/paasta_deploy_tron_jobs +3 -0
- paasta_tools-1.21.3.data/scripts/paasta_execute_docker_command.py +123 -0
- paasta_tools-1.21.3.data/scripts/paasta_secrets_sync.py +758 -0
- paasta_tools-1.21.3.data/scripts/paasta_tabcomplete.sh +23 -0
- paasta_tools-1.21.3.data/scripts/paasta_update_soa_memcpu.py +520 -0
- paasta_tools-1.21.3.data/scripts/render_template.py +129 -0
- paasta_tools-1.21.3.data/scripts/rightsizer_soaconfigs_update.py +348 -0
- paasta_tools-1.21.3.data/scripts/service_shard_remove.py +157 -0
- paasta_tools-1.21.3.data/scripts/service_shard_update.py +373 -0
- paasta_tools-1.21.3.data/scripts/setup_istio_mesh.py +353 -0
- paasta_tools-1.21.3.data/scripts/setup_kubernetes_cr.py +412 -0
- paasta_tools-1.21.3.data/scripts/setup_kubernetes_crd.py +138 -0
- paasta_tools-1.21.3.data/scripts/setup_kubernetes_internal_crd.py +154 -0
- paasta_tools-1.21.3.data/scripts/setup_kubernetes_job.py +353 -0
- paasta_tools-1.21.3.data/scripts/setup_prometheus_adapter_config.py +1028 -0
- paasta_tools-1.21.3.data/scripts/shared_ip_check.py +77 -0
- paasta_tools-1.21.3.data/scripts/synapse_srv_namespaces_fact.py +42 -0
- paasta_tools-1.21.3.data/scripts/timeouts_metrics_prom.py +64 -0
- paasta_tools-1.21.3.dist-info/LICENSE +201 -0
- paasta_tools-1.21.3.dist-info/METADATA +74 -0
- paasta_tools-1.21.3.dist-info/RECORD +348 -0
- paasta_tools-1.21.3.dist-info/WHEEL +5 -0
- paasta_tools-1.21.3.dist-info/entry_points.txt +20 -0
- paasta_tools-1.21.3.dist-info/top_level.txt +2 -0
paasta_tools/utils.py
ADDED
|
@@ -0,0 +1,4307 @@
|
|
|
1
|
+
# Copyright 2015-2017 Yelp Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
import contextlib
|
|
15
|
+
import copy
|
|
16
|
+
import datetime
|
|
17
|
+
import difflib
|
|
18
|
+
import errno
|
|
19
|
+
import fcntl
|
|
20
|
+
import getpass
|
|
21
|
+
import glob
|
|
22
|
+
import hashlib
|
|
23
|
+
import io
|
|
24
|
+
import json
|
|
25
|
+
import logging
|
|
26
|
+
import math
|
|
27
|
+
import os
|
|
28
|
+
import pwd
|
|
29
|
+
import queue
|
|
30
|
+
import re
|
|
31
|
+
import shlex
|
|
32
|
+
import signal
|
|
33
|
+
import socket
|
|
34
|
+
import ssl
|
|
35
|
+
import sys
|
|
36
|
+
import tempfile
|
|
37
|
+
import threading
|
|
38
|
+
import time
|
|
39
|
+
import warnings
|
|
40
|
+
from collections import OrderedDict
|
|
41
|
+
from enum import Enum
|
|
42
|
+
from fnmatch import fnmatch
|
|
43
|
+
from functools import lru_cache
|
|
44
|
+
from functools import wraps
|
|
45
|
+
from subprocess import PIPE
|
|
46
|
+
from subprocess import Popen
|
|
47
|
+
from subprocess import STDOUT
|
|
48
|
+
from types import FrameType
|
|
49
|
+
from typing import Any
|
|
50
|
+
from typing import Callable
|
|
51
|
+
from typing import cast
|
|
52
|
+
from typing import Collection
|
|
53
|
+
from typing import ContextManager
|
|
54
|
+
from typing import Dict
|
|
55
|
+
from typing import FrozenSet
|
|
56
|
+
from typing import IO
|
|
57
|
+
from typing import Iterable
|
|
58
|
+
from typing import Iterator
|
|
59
|
+
from typing import List
|
|
60
|
+
from typing import Literal
|
|
61
|
+
from typing import Mapping
|
|
62
|
+
from typing import NamedTuple
|
|
63
|
+
from typing import Optional
|
|
64
|
+
from typing import Sequence
|
|
65
|
+
from typing import Set
|
|
66
|
+
from typing import TextIO
|
|
67
|
+
from typing import Tuple
|
|
68
|
+
from typing import Type
|
|
69
|
+
from typing import TypeVar
|
|
70
|
+
from typing import Union
|
|
71
|
+
|
|
72
|
+
import choice
|
|
73
|
+
import dateutil.tz
|
|
74
|
+
import ldap3
|
|
75
|
+
import requests_cache
|
|
76
|
+
import service_configuration_lib
|
|
77
|
+
from docker import APIClient
|
|
78
|
+
from docker.utils import kwargs_from_env
|
|
79
|
+
from kazoo.client import KazooClient
|
|
80
|
+
from mypy_extensions import TypedDict
|
|
81
|
+
from service_configuration_lib import read_extra_service_information
|
|
82
|
+
from service_configuration_lib import read_service_configuration
|
|
83
|
+
|
|
84
|
+
import paasta_tools.cli.fsm
|
|
85
|
+
from paasta_tools import yaml_tools as yaml
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
# DO NOT CHANGE SPACER, UNLESS YOU'RE PREPARED TO CHANGE ALL INSTANCES
|
|
89
|
+
# OF IT IN OTHER LIBRARIES (i.e. service_configuration_lib).
|
|
90
|
+
# It's used to compose a job's full ID from its name and instance
|
|
91
|
+
SPACER = "."
|
|
92
|
+
INFRA_ZK_PATH = "/nail/etc/zookeeper_discovery/infrastructure/"
|
|
93
|
+
PATH_TO_SYSTEM_PAASTA_CONFIG_DIR = os.environ.get(
|
|
94
|
+
"PAASTA_SYSTEM_CONFIG_DIR", "/etc/paasta/"
|
|
95
|
+
)
|
|
96
|
+
DEFAULT_SOA_DIR = service_configuration_lib.DEFAULT_SOA_DIR
|
|
97
|
+
DEFAULT_VAULT_TOKEN_FILE = "/root/.vault_token"
|
|
98
|
+
AUTO_SOACONFIG_SUBDIR = "autotuned_defaults"
|
|
99
|
+
DEFAULT_DOCKERCFG_LOCATION = "file:///root/.dockercfg"
|
|
100
|
+
DEPLOY_PIPELINE_NON_DEPLOY_STEPS = (
|
|
101
|
+
"itest",
|
|
102
|
+
"itest-and-push-to-registry",
|
|
103
|
+
"security-check",
|
|
104
|
+
"push-to-registry",
|
|
105
|
+
)
|
|
106
|
+
# Default values for _log
|
|
107
|
+
ANY_CLUSTER = "N/A"
|
|
108
|
+
ANY_INSTANCE = "N/A"
|
|
109
|
+
DEFAULT_LOGLEVEL = "event"
|
|
110
|
+
no_escape = re.compile(r"\x1B\[[0-9;]*[mK]")
|
|
111
|
+
|
|
112
|
+
# instead of the convention of using underscores in this scribe channel name,
|
|
113
|
+
# the audit log uses dashes to prevent collisions with a service that might be
|
|
114
|
+
# named 'audit_log'
|
|
115
|
+
AUDIT_LOG_STREAM = "stream_paasta-audit-log"
|
|
116
|
+
|
|
117
|
+
DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT = (
|
|
118
|
+
"http://{host:s}:{port:d}/;csv;norefresh;scope={scope:s}"
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
DEFAULT_CPU_PERIOD = 100000
|
|
122
|
+
DEFAULT_CPU_BURST_ADD = 1
|
|
123
|
+
|
|
124
|
+
DEFAULT_SOA_CONFIGS_GIT_URL = "sysgit.yelpcorp.com"
|
|
125
|
+
|
|
126
|
+
# To ensure the Spark driver not being interrupted due to spot instances,
|
|
127
|
+
# we use stable pool for drivers
|
|
128
|
+
DEFAULT_SPARK_DRIVER_POOL = "stable"
|
|
129
|
+
|
|
130
|
+
log = logging.getLogger(__name__)
|
|
131
|
+
log.addHandler(logging.NullHandler())
|
|
132
|
+
|
|
133
|
+
INSTANCE_TYPES = (
|
|
134
|
+
"paasta_native",
|
|
135
|
+
"adhoc",
|
|
136
|
+
"kubernetes",
|
|
137
|
+
"eks",
|
|
138
|
+
"tron",
|
|
139
|
+
"flink",
|
|
140
|
+
"flinkeks",
|
|
141
|
+
"cassandracluster",
|
|
142
|
+
"kafkacluster",
|
|
143
|
+
"monkrelays",
|
|
144
|
+
"nrtsearchservice",
|
|
145
|
+
"nrtsearchserviceeks",
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
PAASTA_K8S_INSTANCE_TYPES = {
|
|
149
|
+
"kubernetes",
|
|
150
|
+
"eks",
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
INSTANCE_TYPE_TO_K8S_NAMESPACE = {
|
|
154
|
+
"marathon": "paasta",
|
|
155
|
+
"adhoc": "paasta",
|
|
156
|
+
"tron": "tron",
|
|
157
|
+
"flink": "paasta-flinks",
|
|
158
|
+
"flinkeks": "paasta-flinks",
|
|
159
|
+
"cassandracluster": "paasta-cassandraclusters",
|
|
160
|
+
"kafkacluster": "paasta-kafkaclusters",
|
|
161
|
+
"nrtsearchservice": "paasta-nrtsearchservices",
|
|
162
|
+
"nrtsearchserviceeks": "paasta-nrtsearchservices",
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
SHARED_SECRETS_K8S_NAMESPACES = {"paasta-spark", "paasta-cassandraclusters"}
|
|
166
|
+
|
|
167
|
+
CAPS_DROP = [
|
|
168
|
+
"SETPCAP",
|
|
169
|
+
"MKNOD",
|
|
170
|
+
"AUDIT_WRITE",
|
|
171
|
+
"CHOWN",
|
|
172
|
+
"NET_RAW",
|
|
173
|
+
"DAC_OVERRIDE",
|
|
174
|
+
"FOWNER",
|
|
175
|
+
"FSETID",
|
|
176
|
+
"KILL",
|
|
177
|
+
"SETGID",
|
|
178
|
+
"SETUID",
|
|
179
|
+
"NET_BIND_SERVICE",
|
|
180
|
+
"SYS_CHROOT",
|
|
181
|
+
"SETFCAP",
|
|
182
|
+
]
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
class RollbackTypes(Enum):
|
|
186
|
+
AUTOMATIC_SLO_ROLLBACK = "automatic_slo_rollback"
|
|
187
|
+
AUTOMATIC_METRIC_ROLLBACK = "automatic_metric_rollback"
|
|
188
|
+
USER_INITIATED_ROLLBACK = "user_initiated_rollback"
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
class TimeCacheEntry(TypedDict):
|
|
192
|
+
data: Any
|
|
193
|
+
fetch_time: float
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
_CacheRetT = TypeVar("_CacheRetT")
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
class time_cache:
|
|
200
|
+
def __init__(self, ttl: float = 0) -> None:
|
|
201
|
+
self.configs: Dict[Tuple, TimeCacheEntry] = {}
|
|
202
|
+
self.ttl = ttl
|
|
203
|
+
|
|
204
|
+
def __call__(self, f: Callable[..., _CacheRetT]) -> Callable[..., _CacheRetT]:
|
|
205
|
+
def cache(*args: Any, **kwargs: Any) -> _CacheRetT:
|
|
206
|
+
if "ttl" in kwargs:
|
|
207
|
+
ttl = kwargs["ttl"]
|
|
208
|
+
del kwargs["ttl"]
|
|
209
|
+
else:
|
|
210
|
+
ttl = self.ttl
|
|
211
|
+
key = args
|
|
212
|
+
for item in kwargs.items():
|
|
213
|
+
key += item
|
|
214
|
+
if (
|
|
215
|
+
(not ttl)
|
|
216
|
+
or (key not in self.configs)
|
|
217
|
+
or (time.time() - self.configs[key]["fetch_time"] > ttl)
|
|
218
|
+
):
|
|
219
|
+
self.configs[key] = {
|
|
220
|
+
"data": f(*args, **kwargs),
|
|
221
|
+
"fetch_time": time.time(),
|
|
222
|
+
}
|
|
223
|
+
return self.configs[key]["data"]
|
|
224
|
+
|
|
225
|
+
return cache
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
_SortDictsT = TypeVar("_SortDictsT", bound=Mapping)
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def sort_dicts(dcts: Iterable[_SortDictsT]) -> List[_SortDictsT]:
|
|
232
|
+
def key(dct: _SortDictsT) -> Tuple:
|
|
233
|
+
return tuple(sorted(dct.items()))
|
|
234
|
+
|
|
235
|
+
return sorted(dcts, key=key)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
class InvalidInstanceConfig(Exception):
|
|
239
|
+
pass
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
DeployBlacklist = List[Tuple[str, str]]
|
|
243
|
+
DeployWhitelist = Optional[Tuple[str, List[str]]]
|
|
244
|
+
# The actual config files will have lists, since tuples are not expressible in base YAML, so we define different types
|
|
245
|
+
# here to represent that. The getter functions will convert to the safe versions above.
|
|
246
|
+
UnsafeDeployBlacklist = Optional[Sequence[Sequence[str]]]
|
|
247
|
+
UnsafeDeployWhitelist = Optional[Sequence[Union[str, Sequence[str]]]]
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
Constraint = Sequence[str]
|
|
251
|
+
|
|
252
|
+
# e.g. ['GROUP_BY', 'habitat', 2]. Tron doesn't like that so we'll convert to Constraint later.
|
|
253
|
+
UnstringifiedConstraint = Sequence[Union[str, int, float]]
|
|
254
|
+
|
|
255
|
+
SecurityConfigDict = Dict # Todo: define me.
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
class VolumeWithMode(TypedDict):
|
|
259
|
+
mode: str
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
class DockerVolume(VolumeWithMode):
|
|
263
|
+
hostPath: str
|
|
264
|
+
containerPath: str
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
class AwsEbsVolume(VolumeWithMode):
|
|
268
|
+
volume_id: str
|
|
269
|
+
fs_type: str
|
|
270
|
+
partition: int
|
|
271
|
+
container_path: str
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
class PersistentVolume(VolumeWithMode):
|
|
275
|
+
size: int
|
|
276
|
+
container_path: str
|
|
277
|
+
storage_class_name: str
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
class SecretVolumeItem(TypedDict, total=False):
|
|
281
|
+
key: str
|
|
282
|
+
path: str
|
|
283
|
+
mode: Union[str, int]
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
class SecretVolume(TypedDict, total=False):
|
|
287
|
+
secret_name: str
|
|
288
|
+
container_path: str
|
|
289
|
+
default_mode: Union[str, int]
|
|
290
|
+
items: List[SecretVolumeItem]
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
class ProjectedSAVolume(TypedDict, total=False):
|
|
294
|
+
container_path: str
|
|
295
|
+
audience: str
|
|
296
|
+
expiration_seconds: int
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
class TronSecretVolume(SecretVolume, total=False):
|
|
300
|
+
secret_volume_name: str
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
class MonitoringDict(TypedDict, total=False):
|
|
304
|
+
alert_after: Union[str, float]
|
|
305
|
+
check_every: str
|
|
306
|
+
check_oom_events: bool
|
|
307
|
+
component: str
|
|
308
|
+
description: str
|
|
309
|
+
notification_email: Union[str, bool]
|
|
310
|
+
page: bool
|
|
311
|
+
priority: str
|
|
312
|
+
project: str
|
|
313
|
+
realert_every: float
|
|
314
|
+
runbook: str
|
|
315
|
+
slack_channels: Union[str, List[str]]
|
|
316
|
+
tags: List[str]
|
|
317
|
+
team: str
|
|
318
|
+
ticket: bool
|
|
319
|
+
tip: str
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
class InstanceConfigDict(TypedDict, total=False):
|
|
323
|
+
deploy_group: str
|
|
324
|
+
mem: float
|
|
325
|
+
cpus: float
|
|
326
|
+
disk: float
|
|
327
|
+
cmd: str
|
|
328
|
+
namespace: str
|
|
329
|
+
args: List[str]
|
|
330
|
+
cfs_period_us: float
|
|
331
|
+
cpu_burst_add: float
|
|
332
|
+
cap_add: List
|
|
333
|
+
privileged: bool
|
|
334
|
+
env: Dict[str, str]
|
|
335
|
+
monitoring: MonitoringDict
|
|
336
|
+
deploy_blacklist: UnsafeDeployBlacklist
|
|
337
|
+
deploy_whitelist: UnsafeDeployWhitelist
|
|
338
|
+
pool: str
|
|
339
|
+
persistent_volumes: List[PersistentVolume]
|
|
340
|
+
role: str
|
|
341
|
+
extra_volumes: List[DockerVolume]
|
|
342
|
+
aws_ebs_volumes: List[AwsEbsVolume]
|
|
343
|
+
secret_volumes: List[SecretVolume]
|
|
344
|
+
projected_sa_volumes: List[ProjectedSAVolume]
|
|
345
|
+
security: SecurityConfigDict
|
|
346
|
+
dependencies_reference: str
|
|
347
|
+
dependencies: Dict[str, Dict]
|
|
348
|
+
constraints: List[UnstringifiedConstraint]
|
|
349
|
+
extra_constraints: List[UnstringifiedConstraint]
|
|
350
|
+
net: str
|
|
351
|
+
extra_docker_args: Dict[str, str]
|
|
352
|
+
gpus: int
|
|
353
|
+
branch: str
|
|
354
|
+
iam_role: str
|
|
355
|
+
iam_role_provider: str
|
|
356
|
+
service: str
|
|
357
|
+
uses_bulkdata: bool
|
|
358
|
+
docker_url: str
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
class BranchDictV1(TypedDict, total=False):
|
|
362
|
+
docker_image: str
|
|
363
|
+
desired_state: str
|
|
364
|
+
force_bounce: Optional[str]
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
class BranchDictV2(TypedDict):
|
|
368
|
+
git_sha: str
|
|
369
|
+
docker_image: str
|
|
370
|
+
image_version: Optional[str]
|
|
371
|
+
desired_state: str
|
|
372
|
+
force_bounce: Optional[str]
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
class DockerParameter(TypedDict):
|
|
376
|
+
key: str
|
|
377
|
+
value: str
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
KubeContainerResourceRequest = TypedDict(
|
|
381
|
+
"KubeContainerResourceRequest",
|
|
382
|
+
{
|
|
383
|
+
"cpu": float,
|
|
384
|
+
"memory": str,
|
|
385
|
+
"ephemeral-storage": str,
|
|
386
|
+
},
|
|
387
|
+
total=False,
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
def safe_deploy_blacklist(input: UnsafeDeployBlacklist) -> DeployBlacklist:
|
|
392
|
+
return [(t, l) for t, l in input]
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
def safe_deploy_whitelist(input: UnsafeDeployWhitelist) -> DeployWhitelist:
|
|
396
|
+
try:
|
|
397
|
+
location_type, allowed_values = input
|
|
398
|
+
return cast(str, location_type), cast(List[str], allowed_values)
|
|
399
|
+
except TypeError:
|
|
400
|
+
return None
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
# For mypy typing
|
|
404
|
+
InstanceConfig_T = TypeVar("InstanceConfig_T", bound="InstanceConfig")
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
class InstanceConfig:
|
|
408
|
+
config_filename_prefix: str
|
|
409
|
+
|
|
410
|
+
def __init__(
|
|
411
|
+
self,
|
|
412
|
+
cluster: str,
|
|
413
|
+
instance: str,
|
|
414
|
+
service: str,
|
|
415
|
+
config_dict: InstanceConfigDict,
|
|
416
|
+
branch_dict: Optional[BranchDictV2],
|
|
417
|
+
soa_dir: str = DEFAULT_SOA_DIR,
|
|
418
|
+
) -> None:
|
|
419
|
+
self.config_dict = config_dict
|
|
420
|
+
self.branch_dict = branch_dict
|
|
421
|
+
self.cluster = cluster
|
|
422
|
+
self.instance = instance
|
|
423
|
+
self.service = service
|
|
424
|
+
self.soa_dir = soa_dir
|
|
425
|
+
self._job_id = compose_job_id(service, instance)
|
|
426
|
+
config_interpolation_keys = ("deploy_group",)
|
|
427
|
+
interpolation_facts = self.__get_interpolation_facts()
|
|
428
|
+
for key in config_interpolation_keys:
|
|
429
|
+
if (
|
|
430
|
+
key in self.config_dict
|
|
431
|
+
and self.config_dict[key] is not None # type: ignore
|
|
432
|
+
):
|
|
433
|
+
self.config_dict[key] = self.config_dict[key].format( # type: ignore
|
|
434
|
+
**interpolation_facts
|
|
435
|
+
)
|
|
436
|
+
|
|
437
|
+
def __repr__(self) -> str:
|
|
438
|
+
return "{!s}({!r}, {!r}, {!r}, {!r}, {!r}, {!r})".format(
|
|
439
|
+
self.__class__.__name__,
|
|
440
|
+
self.service,
|
|
441
|
+
self.instance,
|
|
442
|
+
self.cluster,
|
|
443
|
+
self.config_dict,
|
|
444
|
+
self.branch_dict,
|
|
445
|
+
self.soa_dir,
|
|
446
|
+
)
|
|
447
|
+
|
|
448
|
+
def __get_interpolation_facts(self) -> Dict[str, str]:
|
|
449
|
+
return {
|
|
450
|
+
"cluster": self.cluster,
|
|
451
|
+
"instance": self.instance,
|
|
452
|
+
"service": self.service,
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
def get_cluster(self) -> str:
|
|
456
|
+
return self.cluster
|
|
457
|
+
|
|
458
|
+
def get_namespace(self) -> str:
|
|
459
|
+
"""Get namespace from config, default to the value from INSTANCE_TYPE_TO_K8S_NAMESPACE for this instance type, 'paasta' if that isn't defined."""
|
|
460
|
+
return self.config_dict.get(
|
|
461
|
+
"namespace",
|
|
462
|
+
INSTANCE_TYPE_TO_K8S_NAMESPACE.get(self.get_instance_type(), "paasta"),
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
def get_instance(self) -> str:
|
|
466
|
+
return self.instance
|
|
467
|
+
|
|
468
|
+
def get_service(self) -> str:
|
|
469
|
+
return self.service
|
|
470
|
+
|
|
471
|
+
@property
|
|
472
|
+
def job_id(self) -> str:
|
|
473
|
+
return self._job_id
|
|
474
|
+
|
|
475
|
+
def get_docker_registry(
|
|
476
|
+
self, system_paasta_config: Optional["SystemPaastaConfig"] = None
|
|
477
|
+
) -> str:
|
|
478
|
+
return get_service_docker_registry(
|
|
479
|
+
self.service, self.soa_dir, system_config=system_paasta_config
|
|
480
|
+
)
|
|
481
|
+
|
|
482
|
+
def get_branch(self) -> str:
|
|
483
|
+
return get_paasta_branch(
|
|
484
|
+
cluster=self.get_cluster(), instance=self.get_instance()
|
|
485
|
+
)
|
|
486
|
+
|
|
487
|
+
def get_deploy_group(self) -> str:
|
|
488
|
+
return self.config_dict.get("deploy_group", self.get_branch())
|
|
489
|
+
|
|
490
|
+
def get_team(self) -> str:
|
|
491
|
+
return self.config_dict.get("monitoring", {}).get("team", None)
|
|
492
|
+
|
|
493
|
+
def get_mem(self) -> float:
|
|
494
|
+
"""Gets the memory required from the service's configuration.
|
|
495
|
+
|
|
496
|
+
Defaults to 4096 (4G) if no value specified in the config.
|
|
497
|
+
|
|
498
|
+
:returns: The amount of memory specified by the config, 4096 if not specified"""
|
|
499
|
+
mem = self.config_dict.get("mem", 4096)
|
|
500
|
+
return mem
|
|
501
|
+
|
|
502
|
+
def get_mem_swap(self) -> str:
|
|
503
|
+
"""Gets the memory-swap value. This value is passed to the docker
|
|
504
|
+
container to ensure that the total memory limit (memory + swap) is the
|
|
505
|
+
same value as the 'mem' key in soa-configs. Note - this value *has* to
|
|
506
|
+
be >= to the mem key, so we always round up to the closest MB and add
|
|
507
|
+
additional 64MB for the docker executor (See PAASTA-12450).
|
|
508
|
+
"""
|
|
509
|
+
mem = self.get_mem()
|
|
510
|
+
mem_swap = int(math.ceil(mem + 64))
|
|
511
|
+
return "%sm" % mem_swap
|
|
512
|
+
|
|
513
|
+
def get_cpus(self) -> float:
|
|
514
|
+
"""Gets the number of cpus required from the service's configuration.
|
|
515
|
+
|
|
516
|
+
Defaults to 1 cpu if no value specified in the config.
|
|
517
|
+
|
|
518
|
+
:returns: The number of cpus specified in the config, 1 if not specified"""
|
|
519
|
+
cpus = self.config_dict.get("cpus", 1)
|
|
520
|
+
return cpus
|
|
521
|
+
|
|
522
|
+
def get_cpu_burst_add(self) -> float:
|
|
523
|
+
"""Returns the number of additional cpus a container is allowed to use.
|
|
524
|
+
Defaults to DEFAULT_CPU_BURST_ADD"""
|
|
525
|
+
return self.config_dict.get("cpu_burst_add", DEFAULT_CPU_BURST_ADD)
|
|
526
|
+
|
|
527
|
+
def get_cpu_period(self) -> float:
|
|
528
|
+
"""The --cpu-period option to be passed to docker
|
|
529
|
+
Comes from the cfs_period_us configuration option
|
|
530
|
+
|
|
531
|
+
:returns: The number to be passed to the --cpu-period docker flag"""
|
|
532
|
+
return self.config_dict.get("cfs_period_us", DEFAULT_CPU_PERIOD)
|
|
533
|
+
|
|
534
|
+
def get_cpu_quota(self) -> float:
|
|
535
|
+
"""Gets the --cpu-quota option to be passed to docker
|
|
536
|
+
|
|
537
|
+
Calculation: (cpus + cpus_burst_add) * cfs_period_us
|
|
538
|
+
|
|
539
|
+
:returns: The number to be passed to the --cpu-quota docker flag"""
|
|
540
|
+
cpu_burst_add = self.get_cpu_burst_add()
|
|
541
|
+
return (self.get_cpus() + cpu_burst_add) * self.get_cpu_period()
|
|
542
|
+
|
|
543
|
+
def get_extra_docker_args(self) -> Dict[str, str]:
|
|
544
|
+
return self.config_dict.get("extra_docker_args", {})
|
|
545
|
+
|
|
546
|
+
def get_cap_add(self) -> Iterable[DockerParameter]:
|
|
547
|
+
"""Get the --cap-add options to be passed to docker
|
|
548
|
+
Generated from the cap_add configuration option, which is a list of
|
|
549
|
+
capabilities.
|
|
550
|
+
|
|
551
|
+
Example configuration: {'cap_add': ['IPC_LOCK', 'SYS_PTRACE']}
|
|
552
|
+
|
|
553
|
+
:returns: A generator of cap_add options to be passed as --cap-add flags"""
|
|
554
|
+
for value in self.config_dict.get("cap_add", []):
|
|
555
|
+
yield {"key": "cap-add", "value": f"{value}"}
|
|
556
|
+
|
|
557
|
+
def get_cap_drop(self) -> Iterable[DockerParameter]:
|
|
558
|
+
"""Generates --cap-drop options to be passed to docker by default, which
|
|
559
|
+
makes them not able to perform special privilege escalation stuff
|
|
560
|
+
https://docs.docker.com/engine/reference/run/#runtime-privilege-and-linux-capabilities
|
|
561
|
+
"""
|
|
562
|
+
for cap in CAPS_DROP:
|
|
563
|
+
yield {"key": "cap-drop", "value": cap}
|
|
564
|
+
|
|
565
|
+
def get_cap_args(self) -> Iterable[DockerParameter]:
|
|
566
|
+
"""Generate all --cap-add/--cap-drop parameters, ensuring not to have overlapping settings"""
|
|
567
|
+
cap_adds = list(self.get_cap_add())
|
|
568
|
+
if cap_adds and is_using_unprivileged_containers():
|
|
569
|
+
log.warning(
|
|
570
|
+
"Unprivileged containerizer detected, adding capabilities will not work properly"
|
|
571
|
+
)
|
|
572
|
+
yield from cap_adds
|
|
573
|
+
added_caps = [cap["value"] for cap in cap_adds]
|
|
574
|
+
for cap in self.get_cap_drop():
|
|
575
|
+
if cap["value"] not in added_caps:
|
|
576
|
+
yield cap
|
|
577
|
+
|
|
578
|
+
def format_docker_parameters(
|
|
579
|
+
self,
|
|
580
|
+
with_labels: bool = True,
|
|
581
|
+
system_paasta_config: Optional["SystemPaastaConfig"] = None,
|
|
582
|
+
) -> List[DockerParameter]:
|
|
583
|
+
"""Formats extra flags for running docker. Will be added in the format
|
|
584
|
+
`["--%s=%s" % (e['key'], e['value']) for e in list]` to the `docker run` command
|
|
585
|
+
Note: values must be strings
|
|
586
|
+
|
|
587
|
+
:param with_labels: Whether to build docker parameters with or without labels
|
|
588
|
+
:returns: A list of parameters to be added to docker run"""
|
|
589
|
+
parameters: List[DockerParameter] = [
|
|
590
|
+
{"key": "memory-swap", "value": self.get_mem_swap()},
|
|
591
|
+
{"key": "cpu-period", "value": "%s" % int(self.get_cpu_period())},
|
|
592
|
+
{"key": "cpu-quota", "value": "%s" % int(self.get_cpu_quota())},
|
|
593
|
+
]
|
|
594
|
+
if self.use_docker_disk_quota(system_paasta_config=system_paasta_config):
|
|
595
|
+
parameters.append(
|
|
596
|
+
{
|
|
597
|
+
"key": "storage-opt",
|
|
598
|
+
"value": f"size={int(self.get_disk() * 1024 * 1024)}",
|
|
599
|
+
}
|
|
600
|
+
)
|
|
601
|
+
if with_labels:
|
|
602
|
+
parameters.extend(
|
|
603
|
+
[
|
|
604
|
+
{"key": "label", "value": "paasta_service=%s" % self.service},
|
|
605
|
+
{"key": "label", "value": "paasta_instance=%s" % self.instance},
|
|
606
|
+
]
|
|
607
|
+
)
|
|
608
|
+
extra_docker_args = self.get_extra_docker_args()
|
|
609
|
+
if extra_docker_args:
|
|
610
|
+
for key, value in extra_docker_args.items():
|
|
611
|
+
parameters.extend([{"key": key, "value": value}])
|
|
612
|
+
parameters.extend(self.get_docker_init())
|
|
613
|
+
parameters.extend(self.get_cap_args())
|
|
614
|
+
return parameters
|
|
615
|
+
|
|
616
|
+
def use_docker_disk_quota(
|
|
617
|
+
self, system_paasta_config: Optional["SystemPaastaConfig"] = None
|
|
618
|
+
) -> bool:
|
|
619
|
+
if system_paasta_config is None:
|
|
620
|
+
system_paasta_config = load_system_paasta_config()
|
|
621
|
+
return system_paasta_config.get_enforce_disk_quota()
|
|
622
|
+
|
|
623
|
+
def get_docker_init(self) -> Iterable[DockerParameter]:
|
|
624
|
+
return [{"key": "init", "value": "true"}]
|
|
625
|
+
|
|
626
|
+
def get_disk(self, default: float = 1024) -> float:
|
|
627
|
+
"""Gets the amount of disk space in MiB required from the service's configuration.
|
|
628
|
+
|
|
629
|
+
Defaults to 1024 (1GiB) if no value is specified in the config.
|
|
630
|
+
|
|
631
|
+
:returns: The amount of disk space specified by the config, 1024 MiB if not specified
|
|
632
|
+
"""
|
|
633
|
+
disk = self.config_dict.get("disk", default)
|
|
634
|
+
return disk
|
|
635
|
+
|
|
636
|
+
def get_gpus(self) -> Optional[int]:
|
|
637
|
+
"""Gets the number of gpus required from the service's configuration.
|
|
638
|
+
|
|
639
|
+
Default to None if no value is specified in the config.
|
|
640
|
+
|
|
641
|
+
:returns: The number of gpus specified by the config, 0 if not specified"""
|
|
642
|
+
gpus = self.config_dict.get("gpus", None)
|
|
643
|
+
return gpus
|
|
644
|
+
|
|
645
|
+
def get_container_type(self) -> Optional[str]:
|
|
646
|
+
"""Get Mesos containerizer type.
|
|
647
|
+
|
|
648
|
+
Default to DOCKER if gpus are not used.
|
|
649
|
+
|
|
650
|
+
:returns: Mesos containerizer type, DOCKER or MESOS"""
|
|
651
|
+
if self.get_gpus() is not None:
|
|
652
|
+
container_type = "MESOS"
|
|
653
|
+
else:
|
|
654
|
+
container_type = "DOCKER"
|
|
655
|
+
return container_type
|
|
656
|
+
|
|
657
|
+
def get_cmd(self) -> Optional[Union[str, List[str]]]:
|
|
658
|
+
"""Get the docker cmd specified in the service's configuration.
|
|
659
|
+
|
|
660
|
+
Defaults to None if not specified in the config.
|
|
661
|
+
|
|
662
|
+
:returns: A string specified in the config, None if not specified"""
|
|
663
|
+
return self.config_dict.get("cmd", None)
|
|
664
|
+
|
|
665
|
+
def get_instance_type(self) -> Optional[str]:
|
|
666
|
+
return getattr(self, "config_filename_prefix", None)
|
|
667
|
+
|
|
668
|
+
def get_env_dictionary(
|
|
669
|
+
self, system_paasta_config: Optional["SystemPaastaConfig"] = None
|
|
670
|
+
) -> Dict[str, str]:
|
|
671
|
+
"""A dictionary of key/value pairs that represent environment variables
|
|
672
|
+
to be injected to the container environment"""
|
|
673
|
+
env = {
|
|
674
|
+
"PAASTA_SERVICE": self.service,
|
|
675
|
+
"PAASTA_INSTANCE": self.instance,
|
|
676
|
+
"PAASTA_CLUSTER": self.cluster,
|
|
677
|
+
"PAASTA_DEPLOY_GROUP": self.get_deploy_group(),
|
|
678
|
+
"PAASTA_DOCKER_IMAGE": self.get_docker_image(),
|
|
679
|
+
"PAASTA_RESOURCE_CPUS": str(self.get_cpus()),
|
|
680
|
+
"PAASTA_RESOURCE_MEM": str(self.get_mem()),
|
|
681
|
+
"PAASTA_RESOURCE_DISK": str(self.get_disk()),
|
|
682
|
+
}
|
|
683
|
+
if self.get_gpus() is not None:
|
|
684
|
+
env["PAASTA_RESOURCE_GPUS"] = str(self.get_gpus())
|
|
685
|
+
try:
|
|
686
|
+
env["PAASTA_GIT_SHA"] = get_git_sha_from_dockerurl(
|
|
687
|
+
self.get_docker_url(system_paasta_config=system_paasta_config)
|
|
688
|
+
)
|
|
689
|
+
except Exception:
|
|
690
|
+
pass
|
|
691
|
+
image_version = self.get_image_version()
|
|
692
|
+
if image_version is not None:
|
|
693
|
+
env["PAASTA_IMAGE_VERSION"] = image_version
|
|
694
|
+
team = self.get_team()
|
|
695
|
+
if team:
|
|
696
|
+
env["PAASTA_MONITORING_TEAM"] = team
|
|
697
|
+
instance_type = self.get_instance_type()
|
|
698
|
+
if instance_type:
|
|
699
|
+
env["PAASTA_INSTANCE_TYPE"] = instance_type
|
|
700
|
+
# Our workloads interact with AWS quite a lot, so it comes handy to
|
|
701
|
+
# propagate an "application ID" in the user-agent of API requests
|
|
702
|
+
# for debugging purposes (max length is 50 chars from AWS docs).
|
|
703
|
+
env["AWS_SDK_UA_APP_ID"] = f"{self.service}.{self.instance}"[:50]
|
|
704
|
+
user_env = self.config_dict.get("env", {})
|
|
705
|
+
env.update(user_env)
|
|
706
|
+
return {str(k): str(v) for (k, v) in env.items()}
|
|
707
|
+
|
|
708
|
+
def get_env(
|
|
709
|
+
self, system_paasta_config: Optional["SystemPaastaConfig"] = None
|
|
710
|
+
) -> Dict[str, str]:
|
|
711
|
+
"""Basic get_env that simply returns the basic env, other classes
|
|
712
|
+
might need to override this getter for more implementation-specific
|
|
713
|
+
env getting"""
|
|
714
|
+
return self.get_env_dictionary(system_paasta_config=system_paasta_config)
|
|
715
|
+
|
|
716
|
+
def get_args(self) -> Optional[List[str]]:
|
|
717
|
+
"""Get the docker args specified in the service's configuration.
|
|
718
|
+
|
|
719
|
+
If not specified in the config and if cmd is not specified, defaults to an empty array.
|
|
720
|
+
If not specified in the config but cmd is specified, defaults to null.
|
|
721
|
+
If specified in the config and if cmd is also specified, throws an exception. Only one may be specified.
|
|
722
|
+
|
|
723
|
+
:param service_config: The service instance's configuration dictionary
|
|
724
|
+
:returns: An array of args specified in the config,
|
|
725
|
+
``[]`` if not specified and if cmd is not specified,
|
|
726
|
+
otherwise None if not specified but cmd is specified"""
|
|
727
|
+
if self.get_cmd() is None:
|
|
728
|
+
return self.config_dict.get("args", [])
|
|
729
|
+
else:
|
|
730
|
+
args = self.config_dict.get("args", None)
|
|
731
|
+
if args is None:
|
|
732
|
+
return args
|
|
733
|
+
else:
|
|
734
|
+
# TODO validation stuff like this should be moved into a check_*
|
|
735
|
+
raise InvalidInstanceConfig(
|
|
736
|
+
"Instance configuration can specify cmd or args, but not both."
|
|
737
|
+
)
|
|
738
|
+
|
|
739
|
+
def get_monitoring(self) -> MonitoringDict:
|
|
740
|
+
"""Get monitoring overrides defined for the given instance"""
|
|
741
|
+
return self.config_dict.get("monitoring", {})
|
|
742
|
+
|
|
743
|
+
def get_deploy_constraints(
|
|
744
|
+
self,
|
|
745
|
+
blacklist: DeployBlacklist,
|
|
746
|
+
whitelist: DeployWhitelist,
|
|
747
|
+
system_deploy_blacklist: DeployBlacklist,
|
|
748
|
+
system_deploy_whitelist: DeployWhitelist,
|
|
749
|
+
) -> List[Constraint]:
|
|
750
|
+
"""Return the combination of deploy_blacklist and deploy_whitelist
|
|
751
|
+
as a list of constraints.
|
|
752
|
+
"""
|
|
753
|
+
return (
|
|
754
|
+
deploy_blacklist_to_constraints(blacklist)
|
|
755
|
+
+ deploy_whitelist_to_constraints(whitelist)
|
|
756
|
+
+ deploy_blacklist_to_constraints(system_deploy_blacklist)
|
|
757
|
+
+ deploy_whitelist_to_constraints(system_deploy_whitelist)
|
|
758
|
+
)
|
|
759
|
+
|
|
760
|
+
def get_deploy_blacklist(self) -> DeployBlacklist:
|
|
761
|
+
"""The deploy blacklist is a list of lists, where the lists indicate
|
|
762
|
+
which locations the service should not be deployed"""
|
|
763
|
+
return safe_deploy_blacklist(self.config_dict.get("deploy_blacklist", []))
|
|
764
|
+
|
|
765
|
+
def get_deploy_whitelist(self) -> DeployWhitelist:
|
|
766
|
+
"""The deploy whitelist is a tuple of (location_type, [allowed value, allowed value, ...]).
|
|
767
|
+
To have tasks scheduled on it, a host must be covered by the deploy whitelist (if present) and not excluded by
|
|
768
|
+
the deploy blacklist."""
|
|
769
|
+
|
|
770
|
+
return safe_deploy_whitelist(self.config_dict.get("deploy_whitelist"))
|
|
771
|
+
|
|
772
|
+
def get_docker_image(self) -> str:
|
|
773
|
+
"""Get the docker image name (with tag) for a given service branch from
|
|
774
|
+
a generated deployments.json file."""
|
|
775
|
+
if self.branch_dict is not None:
|
|
776
|
+
return self.branch_dict["docker_image"]
|
|
777
|
+
else:
|
|
778
|
+
return ""
|
|
779
|
+
|
|
780
|
+
def get_image_version(self) -> Optional[str]:
|
|
781
|
+
"""Get additional information identifying the Docker image from a
|
|
782
|
+
generated deployments.json file."""
|
|
783
|
+
if self.branch_dict is not None and "image_version" in self.branch_dict:
|
|
784
|
+
return self.branch_dict["image_version"]
|
|
785
|
+
else:
|
|
786
|
+
return None
|
|
787
|
+
|
|
788
|
+
def get_docker_url(
|
|
789
|
+
self, system_paasta_config: Optional["SystemPaastaConfig"] = None
|
|
790
|
+
) -> str:
|
|
791
|
+
"""Compose the docker url.
|
|
792
|
+
:returns: '<registry_uri>/<docker_image>'
|
|
793
|
+
"""
|
|
794
|
+
# NOTE: we're explicitly only allowing this for adhoc instances to support remote-run toolboxes.
|
|
795
|
+
# If you're looking at this to expand that support for non-remote-run cases, please chat with #paasta first.
|
|
796
|
+
if "docker_url" in self.config_dict:
|
|
797
|
+
return self.config_dict["docker_url"]
|
|
798
|
+
registry_uri = self.get_docker_registry(
|
|
799
|
+
system_paasta_config=system_paasta_config
|
|
800
|
+
)
|
|
801
|
+
docker_image = self.get_docker_image()
|
|
802
|
+
if not docker_image:
|
|
803
|
+
raise NoDockerImageError(
|
|
804
|
+
"Docker url not available because there is no docker_image"
|
|
805
|
+
)
|
|
806
|
+
docker_url = f"{registry_uri}/{docker_image}"
|
|
807
|
+
return docker_url
|
|
808
|
+
|
|
809
|
+
def get_desired_state(self) -> str:
|
|
810
|
+
"""Get the desired state (either 'start' or 'stop') for a given service
|
|
811
|
+
branch from a generated deployments.json file."""
|
|
812
|
+
if self.branch_dict is not None:
|
|
813
|
+
return self.branch_dict["desired_state"]
|
|
814
|
+
else:
|
|
815
|
+
return "start"
|
|
816
|
+
|
|
817
|
+
def get_force_bounce(self) -> Optional[str]:
|
|
818
|
+
"""Get the force_bounce token for a given service branch from a generated
|
|
819
|
+
deployments.json file. This is a token that, when changed, indicates that
|
|
820
|
+
the instance should be recreated and bounced, even if no other
|
|
821
|
+
parameters have changed. This may be None or a string, generally a
|
|
822
|
+
timestamp.
|
|
823
|
+
"""
|
|
824
|
+
if self.branch_dict is not None:
|
|
825
|
+
return self.branch_dict["force_bounce"]
|
|
826
|
+
else:
|
|
827
|
+
return None
|
|
828
|
+
|
|
829
|
+
def check_cpus(self) -> Tuple[bool, str]:
|
|
830
|
+
cpus = self.get_cpus()
|
|
831
|
+
if cpus is not None:
|
|
832
|
+
if not isinstance(cpus, (float, int)):
|
|
833
|
+
return (
|
|
834
|
+
False,
|
|
835
|
+
'The specified cpus value "%s" is not a valid float or int.' % cpus,
|
|
836
|
+
)
|
|
837
|
+
return True, ""
|
|
838
|
+
|
|
839
|
+
def check_mem(self) -> Tuple[bool, str]:
|
|
840
|
+
mem = self.get_mem()
|
|
841
|
+
if mem is not None:
|
|
842
|
+
if not isinstance(mem, (float, int)):
|
|
843
|
+
return (
|
|
844
|
+
False,
|
|
845
|
+
'The specified mem value "%s" is not a valid float or int.' % mem,
|
|
846
|
+
)
|
|
847
|
+
return True, ""
|
|
848
|
+
|
|
849
|
+
def check_disk(self) -> Tuple[bool, str]:
|
|
850
|
+
disk = self.get_disk()
|
|
851
|
+
if disk is not None:
|
|
852
|
+
if not isinstance(disk, (float, int)):
|
|
853
|
+
return (
|
|
854
|
+
False,
|
|
855
|
+
'The specified disk value "%s" is not a valid float or int.' % disk,
|
|
856
|
+
)
|
|
857
|
+
return True, ""
|
|
858
|
+
|
|
859
|
+
def check_security(self) -> Tuple[bool, str]:
|
|
860
|
+
security = self.config_dict.get("security")
|
|
861
|
+
if security is None:
|
|
862
|
+
return True, ""
|
|
863
|
+
|
|
864
|
+
outbound_firewall = security.get("outbound_firewall")
|
|
865
|
+
|
|
866
|
+
if outbound_firewall is None:
|
|
867
|
+
return True, ""
|
|
868
|
+
|
|
869
|
+
if outbound_firewall is not None and outbound_firewall not in (
|
|
870
|
+
"block",
|
|
871
|
+
"monitor",
|
|
872
|
+
):
|
|
873
|
+
return (
|
|
874
|
+
False,
|
|
875
|
+
'Unrecognized outbound_firewall value "%s"' % outbound_firewall,
|
|
876
|
+
)
|
|
877
|
+
|
|
878
|
+
unknown_keys = set(security.keys()) - {
|
|
879
|
+
"outbound_firewall",
|
|
880
|
+
}
|
|
881
|
+
if unknown_keys:
|
|
882
|
+
return (
|
|
883
|
+
False,
|
|
884
|
+
'Unrecognized items in security dict of service config: "%s"'
|
|
885
|
+
% ",".join(unknown_keys),
|
|
886
|
+
)
|
|
887
|
+
|
|
888
|
+
return True, ""
|
|
889
|
+
|
|
890
|
+
def check_dependencies_reference(self) -> Tuple[bool, str]:
|
|
891
|
+
dependencies_reference = self.config_dict.get("dependencies_reference")
|
|
892
|
+
if dependencies_reference is None:
|
|
893
|
+
return True, ""
|
|
894
|
+
|
|
895
|
+
dependencies = self.config_dict.get("dependencies")
|
|
896
|
+
if dependencies is None:
|
|
897
|
+
return (
|
|
898
|
+
False,
|
|
899
|
+
'dependencies_reference "%s" declared but no dependencies found'
|
|
900
|
+
% dependencies_reference,
|
|
901
|
+
)
|
|
902
|
+
|
|
903
|
+
if dependencies_reference not in dependencies:
|
|
904
|
+
return (
|
|
905
|
+
False,
|
|
906
|
+
'dependencies_reference "%s" not found in dependencies dictionary'
|
|
907
|
+
% dependencies_reference,
|
|
908
|
+
)
|
|
909
|
+
|
|
910
|
+
return True, ""
|
|
911
|
+
|
|
912
|
+
def check(self, param: str) -> Tuple[bool, str]:
|
|
913
|
+
check_methods = {
|
|
914
|
+
"cpus": self.check_cpus,
|
|
915
|
+
"mem": self.check_mem,
|
|
916
|
+
"security": self.check_security,
|
|
917
|
+
"dependencies_reference": self.check_dependencies_reference,
|
|
918
|
+
"deploy_group": self.check_deploy_group,
|
|
919
|
+
}
|
|
920
|
+
check_method = check_methods.get(param)
|
|
921
|
+
if check_method is not None:
|
|
922
|
+
return check_method()
|
|
923
|
+
else:
|
|
924
|
+
return (
|
|
925
|
+
False,
|
|
926
|
+
'Your service config specifies "%s", an unsupported parameter.' % param,
|
|
927
|
+
)
|
|
928
|
+
|
|
929
|
+
def validate(
|
|
930
|
+
self,
|
|
931
|
+
params: Optional[List[str]] = None,
|
|
932
|
+
) -> List[str]:
|
|
933
|
+
if params is None:
|
|
934
|
+
params = [
|
|
935
|
+
"cpus",
|
|
936
|
+
"mem",
|
|
937
|
+
"security",
|
|
938
|
+
"dependencies_reference",
|
|
939
|
+
"deploy_group",
|
|
940
|
+
]
|
|
941
|
+
error_msgs = []
|
|
942
|
+
for param in params:
|
|
943
|
+
check_passed, check_msg = self.check(param)
|
|
944
|
+
if not check_passed:
|
|
945
|
+
error_msgs.append(check_msg)
|
|
946
|
+
return error_msgs
|
|
947
|
+
|
|
948
|
+
def check_deploy_group(self) -> Tuple[bool, str]:
|
|
949
|
+
deploy_group = self.get_deploy_group()
|
|
950
|
+
if deploy_group is not None:
|
|
951
|
+
pipeline_deploy_groups = get_pipeline_deploy_groups(
|
|
952
|
+
service=self.service, soa_dir=self.soa_dir
|
|
953
|
+
)
|
|
954
|
+
if deploy_group not in pipeline_deploy_groups:
|
|
955
|
+
return (
|
|
956
|
+
False,
|
|
957
|
+
f"{self.service}.{self.instance} uses deploy_group {deploy_group}, but {deploy_group} is not deployed to in deploy.yaml",
|
|
958
|
+
) # noqa: E501
|
|
959
|
+
return True, ""
|
|
960
|
+
|
|
961
|
+
def get_extra_volumes(self) -> List[DockerVolume]:
|
|
962
|
+
"""Extra volumes are a specially formatted list of dictionaries that should
|
|
963
|
+
be bind mounted in a container The format of the dictionaries should
|
|
964
|
+
conform to the `Mesos container volumes spec
|
|
965
|
+
<https://mesosphere.github.io/marathon/docs/native-docker.html>`_"""
|
|
966
|
+
return self.config_dict.get("extra_volumes", [])
|
|
967
|
+
|
|
968
|
+
def get_aws_ebs_volumes(self) -> List[AwsEbsVolume]:
|
|
969
|
+
return self.config_dict.get("aws_ebs_volumes", [])
|
|
970
|
+
|
|
971
|
+
def get_secret_volumes(self) -> List[SecretVolume]:
|
|
972
|
+
return self.config_dict.get("secret_volumes", [])
|
|
973
|
+
|
|
974
|
+
def get_projected_sa_volumes(self) -> List[ProjectedSAVolume]:
|
|
975
|
+
return self.config_dict.get("projected_sa_volumes", [])
|
|
976
|
+
|
|
977
|
+
def get_iam_role(self) -> str:
|
|
978
|
+
return self.config_dict.get("iam_role", "")
|
|
979
|
+
|
|
980
|
+
def get_iam_role_provider(self) -> str:
|
|
981
|
+
return self.config_dict.get("iam_role_provider", "aws")
|
|
982
|
+
|
|
983
|
+
def get_role(self) -> Optional[str]:
|
|
984
|
+
"""Which mesos role of nodes this job should run on."""
|
|
985
|
+
return self.config_dict.get("role")
|
|
986
|
+
|
|
987
|
+
def get_pool(self) -> str:
|
|
988
|
+
"""Which pool of nodes this job should run on. This can be used to mitigate noisy neighbors, by putting
|
|
989
|
+
particularly noisy or noise-sensitive jobs into different pools.
|
|
990
|
+
|
|
991
|
+
This is implemented with an attribute "pool" on each mesos slave and by adding a constraint or node selector.
|
|
992
|
+
|
|
993
|
+
Eventually this may be implemented with Mesos roles, once a framework can register under multiple roles.
|
|
994
|
+
|
|
995
|
+
:returns: the "pool" attribute in your config dict, or the string "default" if not specified.
|
|
996
|
+
"""
|
|
997
|
+
return self.config_dict.get("pool", "default")
|
|
998
|
+
|
|
999
|
+
def get_pool_constraints(self) -> List[Constraint]:
|
|
1000
|
+
pool = self.get_pool()
|
|
1001
|
+
return [["pool", "LIKE", pool]]
|
|
1002
|
+
|
|
1003
|
+
def get_constraints(self) -> Optional[List[Constraint]]:
|
|
1004
|
+
return stringify_constraints(self.config_dict.get("constraints", None))
|
|
1005
|
+
|
|
1006
|
+
def get_extra_constraints(self) -> List[Constraint]:
|
|
1007
|
+
return stringify_constraints(self.config_dict.get("extra_constraints", []))
|
|
1008
|
+
|
|
1009
|
+
def get_net(self) -> str:
|
|
1010
|
+
"""
|
|
1011
|
+
:returns: the docker networking mode the container should be started with.
|
|
1012
|
+
"""
|
|
1013
|
+
return self.config_dict.get("net", "bridge")
|
|
1014
|
+
|
|
1015
|
+
def get_volumes(
|
|
1016
|
+
self,
|
|
1017
|
+
system_volumes: Sequence[DockerVolume],
|
|
1018
|
+
uses_bulkdata_default: bool = False,
|
|
1019
|
+
) -> List[DockerVolume]:
|
|
1020
|
+
volumes = list(system_volumes) + list(self.get_extra_volumes())
|
|
1021
|
+
# we used to add bulkdata as a default mount - but as part of the
|
|
1022
|
+
# effort to deprecate the entire system, we're swapping to an opt-in
|
|
1023
|
+
# model so that we can shrink the blast radius of any changes
|
|
1024
|
+
if self.config_dict.get(
|
|
1025
|
+
"uses_bulkdata",
|
|
1026
|
+
uses_bulkdata_default,
|
|
1027
|
+
):
|
|
1028
|
+
# bulkdata is mounted RO as the data is produced by another
|
|
1029
|
+
# system and we want to ensure that there are no inadvertent
|
|
1030
|
+
# changes by misbehaved code
|
|
1031
|
+
volumes.append(
|
|
1032
|
+
{
|
|
1033
|
+
"hostPath": "/nail/bulkdata",
|
|
1034
|
+
"containerPath": "/nail/bulkdata",
|
|
1035
|
+
"mode": "RO",
|
|
1036
|
+
}
|
|
1037
|
+
)
|
|
1038
|
+
return _reorder_docker_volumes(volumes)
|
|
1039
|
+
|
|
1040
|
+
def get_persistent_volumes(self) -> Sequence[PersistentVolume]:
|
|
1041
|
+
return self.config_dict.get("persistent_volumes", [])
|
|
1042
|
+
|
|
1043
|
+
def get_dependencies_reference(self) -> Optional[str]:
|
|
1044
|
+
"""Get the reference to an entry in dependencies.yaml
|
|
1045
|
+
|
|
1046
|
+
Defaults to None if not specified in the config.
|
|
1047
|
+
|
|
1048
|
+
:returns: A string specified in the config, None if not specified"""
|
|
1049
|
+
return self.config_dict.get("dependencies_reference")
|
|
1050
|
+
|
|
1051
|
+
def get_dependencies(self) -> Optional[Dict]:
|
|
1052
|
+
"""Get the contents of the dependencies_dict pointed to by the dependency_reference or
|
|
1053
|
+
'main' if no dependency_reference exists
|
|
1054
|
+
|
|
1055
|
+
Defaults to None if not specified in the config.
|
|
1056
|
+
|
|
1057
|
+
:returns: A list of dictionaries specified in the dependencies_dict, None if not specified
|
|
1058
|
+
"""
|
|
1059
|
+
dependencies = self.config_dict.get("dependencies")
|
|
1060
|
+
if not dependencies:
|
|
1061
|
+
return None
|
|
1062
|
+
dependency_ref = self.get_dependencies_reference() or "main"
|
|
1063
|
+
return dependencies.get(dependency_ref)
|
|
1064
|
+
|
|
1065
|
+
def get_outbound_firewall(self) -> Optional[str]:
|
|
1066
|
+
"""Return 'block', 'monitor', or None as configured in security->outbound_firewall
|
|
1067
|
+
|
|
1068
|
+
Defaults to None if not specified in the config
|
|
1069
|
+
|
|
1070
|
+
:returns: A string specified in the config, None if not specified"""
|
|
1071
|
+
security = self.config_dict.get("security")
|
|
1072
|
+
if not security:
|
|
1073
|
+
return None
|
|
1074
|
+
return security.get("outbound_firewall")
|
|
1075
|
+
|
|
1076
|
+
def __eq__(self, other: Any) -> bool:
|
|
1077
|
+
if isinstance(other, type(self)):
|
|
1078
|
+
return (
|
|
1079
|
+
self.config_dict == other.config_dict
|
|
1080
|
+
and self.branch_dict == other.branch_dict
|
|
1081
|
+
and self.cluster == other.cluster
|
|
1082
|
+
and self.instance == other.instance
|
|
1083
|
+
and self.service == other.service
|
|
1084
|
+
)
|
|
1085
|
+
else:
|
|
1086
|
+
return False
|
|
1087
|
+
|
|
1088
|
+
|
|
1089
|
+
def stringify_constraint(usc: UnstringifiedConstraint) -> Constraint:
|
|
1090
|
+
return [str(x) for x in usc]
|
|
1091
|
+
|
|
1092
|
+
|
|
1093
|
+
def stringify_constraints(
|
|
1094
|
+
uscs: Optional[List[UnstringifiedConstraint]],
|
|
1095
|
+
) -> List[Constraint]:
|
|
1096
|
+
if uscs is None:
|
|
1097
|
+
return None
|
|
1098
|
+
return [stringify_constraint(usc) for usc in uscs]
|
|
1099
|
+
|
|
1100
|
+
|
|
1101
|
+
@time_cache(ttl=60)
|
|
1102
|
+
def validate_service_instance(
|
|
1103
|
+
service: str, instance: str, cluster: str, soa_dir: str
|
|
1104
|
+
) -> str:
|
|
1105
|
+
possibilities: List[str] = []
|
|
1106
|
+
for instance_type in INSTANCE_TYPES:
|
|
1107
|
+
sis = get_service_instance_list(
|
|
1108
|
+
service=service,
|
|
1109
|
+
cluster=cluster,
|
|
1110
|
+
instance_type=instance_type,
|
|
1111
|
+
soa_dir=soa_dir,
|
|
1112
|
+
)
|
|
1113
|
+
if (service, instance) in sis:
|
|
1114
|
+
return instance_type
|
|
1115
|
+
possibilities.extend(si[1] for si in sis)
|
|
1116
|
+
else:
|
|
1117
|
+
suggestions = suggest_possibilities(word=instance, possibilities=possibilities)
|
|
1118
|
+
raise NoConfigurationForServiceError(
|
|
1119
|
+
f"Error: {compose_job_id(service, instance)} doesn't look like it has been configured "
|
|
1120
|
+
f"to run on the {cluster} cluster.{suggestions}"
|
|
1121
|
+
)
|
|
1122
|
+
|
|
1123
|
+
|
|
1124
|
+
_ComposeRetT = TypeVar("_ComposeRetT")
|
|
1125
|
+
_ComposeInnerRetT = TypeVar("_ComposeInnerRetT")
|
|
1126
|
+
|
|
1127
|
+
|
|
1128
|
+
def compose(
|
|
1129
|
+
func_one: Callable[[_ComposeInnerRetT], _ComposeRetT],
|
|
1130
|
+
func_two: Callable[..., _ComposeInnerRetT],
|
|
1131
|
+
) -> Callable[..., _ComposeRetT]:
|
|
1132
|
+
def composed(*args: Any, **kwargs: Any) -> _ComposeRetT:
|
|
1133
|
+
return func_one(func_two(*args, **kwargs))
|
|
1134
|
+
|
|
1135
|
+
return composed
|
|
1136
|
+
|
|
1137
|
+
|
|
1138
|
+
class PaastaColors:
|
|
1139
|
+
"""Collection of static variables and methods to assist in coloring text."""
|
|
1140
|
+
|
|
1141
|
+
# ANSI color codes
|
|
1142
|
+
BLUE = "\033[34m"
|
|
1143
|
+
BOLD = "\033[1m"
|
|
1144
|
+
CYAN = "\033[36m"
|
|
1145
|
+
DEFAULT = "\033[0m"
|
|
1146
|
+
GREEN = "\033[32m"
|
|
1147
|
+
GREY = "\033[38;5;242m"
|
|
1148
|
+
MAGENTA = "\033[35m"
|
|
1149
|
+
RED = "\033[31m"
|
|
1150
|
+
YELLOW = "\033[33m"
|
|
1151
|
+
|
|
1152
|
+
@staticmethod
|
|
1153
|
+
def bold(text: str) -> str:
|
|
1154
|
+
"""Return bolded text.
|
|
1155
|
+
|
|
1156
|
+
:param text: a string
|
|
1157
|
+
:return: text color coded with ANSI bold
|
|
1158
|
+
"""
|
|
1159
|
+
return PaastaColors.color_text(PaastaColors.BOLD, text)
|
|
1160
|
+
|
|
1161
|
+
@staticmethod
|
|
1162
|
+
def blue(text: str) -> str:
|
|
1163
|
+
"""Return text that can be printed blue.
|
|
1164
|
+
|
|
1165
|
+
:param text: a string
|
|
1166
|
+
:return: text color coded with ANSI blue
|
|
1167
|
+
"""
|
|
1168
|
+
return PaastaColors.color_text(PaastaColors.BLUE, text)
|
|
1169
|
+
|
|
1170
|
+
@staticmethod
|
|
1171
|
+
def green(text: str) -> str:
|
|
1172
|
+
"""Return text that can be printed green.
|
|
1173
|
+
|
|
1174
|
+
:param text: a string
|
|
1175
|
+
:return: text color coded with ANSI green"""
|
|
1176
|
+
return PaastaColors.color_text(PaastaColors.GREEN, text)
|
|
1177
|
+
|
|
1178
|
+
@staticmethod
|
|
1179
|
+
def red(text: str) -> str:
|
|
1180
|
+
"""Return text that can be printed red.
|
|
1181
|
+
|
|
1182
|
+
:param text: a string
|
|
1183
|
+
:return: text color coded with ANSI red"""
|
|
1184
|
+
return PaastaColors.color_text(PaastaColors.RED, text)
|
|
1185
|
+
|
|
1186
|
+
@staticmethod
|
|
1187
|
+
def magenta(text: str) -> str:
|
|
1188
|
+
"""Return text that can be printed magenta.
|
|
1189
|
+
|
|
1190
|
+
:param text: a string
|
|
1191
|
+
:return: text color coded with ANSI magenta"""
|
|
1192
|
+
return PaastaColors.color_text(PaastaColors.MAGENTA, text)
|
|
1193
|
+
|
|
1194
|
+
@staticmethod
|
|
1195
|
+
def color_text(color: str, text: str) -> str:
|
|
1196
|
+
"""Return text that can be printed color.
|
|
1197
|
+
|
|
1198
|
+
:param color: ANSI color code
|
|
1199
|
+
:param text: a string
|
|
1200
|
+
:return: a string with ANSI color encoding"""
|
|
1201
|
+
|
|
1202
|
+
if os.getenv("NO_COLOR", "0") == "1":
|
|
1203
|
+
return text
|
|
1204
|
+
|
|
1205
|
+
# any time text returns to default, we want to insert our color.
|
|
1206
|
+
replaced = text.replace(PaastaColors.DEFAULT, PaastaColors.DEFAULT + color)
|
|
1207
|
+
# then wrap the beginning and end in our color/default.
|
|
1208
|
+
return color + replaced + PaastaColors.DEFAULT
|
|
1209
|
+
|
|
1210
|
+
@staticmethod
|
|
1211
|
+
def cyan(text: str) -> str:
|
|
1212
|
+
"""Return text that can be printed cyan.
|
|
1213
|
+
|
|
1214
|
+
:param text: a string
|
|
1215
|
+
:return: text color coded with ANSI cyan"""
|
|
1216
|
+
return PaastaColors.color_text(PaastaColors.CYAN, text)
|
|
1217
|
+
|
|
1218
|
+
@staticmethod
|
|
1219
|
+
def yellow(text: str) -> str:
|
|
1220
|
+
"""Return text that can be printed yellow.
|
|
1221
|
+
|
|
1222
|
+
:param text: a string
|
|
1223
|
+
:return: text color coded with ANSI yellow"""
|
|
1224
|
+
return PaastaColors.color_text(PaastaColors.YELLOW, text)
|
|
1225
|
+
|
|
1226
|
+
@staticmethod
|
|
1227
|
+
def grey(text: str) -> str:
|
|
1228
|
+
return PaastaColors.color_text(PaastaColors.GREY, text)
|
|
1229
|
+
|
|
1230
|
+
@staticmethod
|
|
1231
|
+
def default(text: str) -> str:
|
|
1232
|
+
return PaastaColors.color_text(PaastaColors.DEFAULT, text)
|
|
1233
|
+
|
|
1234
|
+
|
|
1235
|
+
LOG_COMPONENTS: Mapping[str, Mapping[str, Any]] = OrderedDict(
|
|
1236
|
+
[
|
|
1237
|
+
(
|
|
1238
|
+
"build",
|
|
1239
|
+
{
|
|
1240
|
+
"color": PaastaColors.blue,
|
|
1241
|
+
"help": (
|
|
1242
|
+
"Logs for pre-deployment steps, such as itests, "
|
|
1243
|
+
"image building, and security checks."
|
|
1244
|
+
),
|
|
1245
|
+
"source_env": "devc",
|
|
1246
|
+
},
|
|
1247
|
+
),
|
|
1248
|
+
(
|
|
1249
|
+
"deploy",
|
|
1250
|
+
{
|
|
1251
|
+
"color": PaastaColors.cyan,
|
|
1252
|
+
"help": (
|
|
1253
|
+
"Logs for deployment steps and actions, such as "
|
|
1254
|
+
"bouncing, start/stop/restart, and instance cleanup."
|
|
1255
|
+
),
|
|
1256
|
+
"additional_source_envs": ["devc"],
|
|
1257
|
+
},
|
|
1258
|
+
),
|
|
1259
|
+
(
|
|
1260
|
+
"monitoring",
|
|
1261
|
+
{
|
|
1262
|
+
"color": PaastaColors.green,
|
|
1263
|
+
"help": "Logs from Sensu checks for the service",
|
|
1264
|
+
},
|
|
1265
|
+
),
|
|
1266
|
+
(
|
|
1267
|
+
"app_output",
|
|
1268
|
+
{
|
|
1269
|
+
"color": compose(PaastaColors.yellow, PaastaColors.bold),
|
|
1270
|
+
"help": (
|
|
1271
|
+
"Stderr and stdout from a service's running processes. "
|
|
1272
|
+
"Alias for both the stdout and stderr components."
|
|
1273
|
+
),
|
|
1274
|
+
},
|
|
1275
|
+
),
|
|
1276
|
+
(
|
|
1277
|
+
"stdout",
|
|
1278
|
+
{
|
|
1279
|
+
"color": PaastaColors.yellow,
|
|
1280
|
+
"help": "Stdout from a service's running processes.",
|
|
1281
|
+
},
|
|
1282
|
+
),
|
|
1283
|
+
(
|
|
1284
|
+
"stderr",
|
|
1285
|
+
{
|
|
1286
|
+
"color": PaastaColors.yellow,
|
|
1287
|
+
"help": "Stderr from a service's running processes.",
|
|
1288
|
+
},
|
|
1289
|
+
),
|
|
1290
|
+
(
|
|
1291
|
+
"security",
|
|
1292
|
+
{
|
|
1293
|
+
"color": PaastaColors.red,
|
|
1294
|
+
"help": "Logs from security-related services such as firewall monitoring",
|
|
1295
|
+
},
|
|
1296
|
+
),
|
|
1297
|
+
("oom", {"color": PaastaColors.red, "help": "Kernel OOM events."}),
|
|
1298
|
+
(
|
|
1299
|
+
"task_lifecycle",
|
|
1300
|
+
{
|
|
1301
|
+
"color": PaastaColors.bold,
|
|
1302
|
+
"help": "Logs that tell you about task startup, failures, healthchecks, etc.",
|
|
1303
|
+
},
|
|
1304
|
+
),
|
|
1305
|
+
# I'm leaving these planned components here since they provide some hints
|
|
1306
|
+
# about where we want to go. See PAASTA-78.
|
|
1307
|
+
#
|
|
1308
|
+
# But I'm commenting them out so they don't delude users into believing we
|
|
1309
|
+
# can expose logs that we cannot actually expose. See PAASTA-927.
|
|
1310
|
+
#
|
|
1311
|
+
# ('app_request', {
|
|
1312
|
+
# 'color': PaastaColors.bold,
|
|
1313
|
+
# 'help': 'The request log for the service. Defaults to "service_NAME_requests"',
|
|
1314
|
+
# 'command': 'scribe_reader -e ENV -f service_example_happyhour_requests',
|
|
1315
|
+
# }),
|
|
1316
|
+
# ('app_errors', {
|
|
1317
|
+
# 'color': PaastaColors.red,
|
|
1318
|
+
# 'help': 'Application error log, defaults to "stream_service_NAME_errors"',
|
|
1319
|
+
# 'command': 'scribe_reader -e ENV -f stream_service_SERVICE_errors',
|
|
1320
|
+
# }),
|
|
1321
|
+
# ('lb_requests', {
|
|
1322
|
+
# 'color': PaastaColors.bold,
|
|
1323
|
+
# 'help': 'All requests from Smartstack haproxy',
|
|
1324
|
+
# 'command': 'NA - TODO: SRV-1130',
|
|
1325
|
+
# }),
|
|
1326
|
+
# ('lb_errors', {
|
|
1327
|
+
# 'color': PaastaColors.red,
|
|
1328
|
+
# 'help': 'Logs from Smartstack haproxy that have 400-500 error codes',
|
|
1329
|
+
# 'command': 'scribereader -e ENV -f stream_service_errors | grep SERVICE.instance',
|
|
1330
|
+
# }),
|
|
1331
|
+
]
|
|
1332
|
+
)
|
|
1333
|
+
|
|
1334
|
+
|
|
1335
|
+
class NoSuchLogComponent(Exception):
|
|
1336
|
+
pass
|
|
1337
|
+
|
|
1338
|
+
|
|
1339
|
+
def validate_log_component(component: str) -> bool:
|
|
1340
|
+
if component in LOG_COMPONENTS.keys():
|
|
1341
|
+
return True
|
|
1342
|
+
else:
|
|
1343
|
+
raise NoSuchLogComponent
|
|
1344
|
+
|
|
1345
|
+
|
|
1346
|
+
def get_git_url(service: str, soa_dir: str = DEFAULT_SOA_DIR) -> str:
|
|
1347
|
+
"""Get the git url for a service. Assumes that the service's
|
|
1348
|
+
repo matches its name, and that it lives in services- i.e.
|
|
1349
|
+
if this is called with the string 'test', the returned
|
|
1350
|
+
url will be git@github.yelpcorp.com:services/test.
|
|
1351
|
+
|
|
1352
|
+
:param service: The service name to get a URL for
|
|
1353
|
+
:returns: A git url to the service's repository"""
|
|
1354
|
+
general_config = service_configuration_lib.read_service_configuration(
|
|
1355
|
+
service, soa_dir=soa_dir
|
|
1356
|
+
)
|
|
1357
|
+
# TODO: PAASTA-16927: get this from system config `.git_config`
|
|
1358
|
+
default_location = format_git_url(
|
|
1359
|
+
"git", "github.yelpcorp.com", f"services/{service}"
|
|
1360
|
+
)
|
|
1361
|
+
return general_config.get("git_url", default_location)
|
|
1362
|
+
|
|
1363
|
+
|
|
1364
|
+
def format_git_url(git_user: str, git_server: str, repo_name: str) -> str:
|
|
1365
|
+
return f"{git_user}@{git_server}:{repo_name}"
|
|
1366
|
+
|
|
1367
|
+
|
|
1368
|
+
def get_service_docker_registry(
|
|
1369
|
+
service: str,
|
|
1370
|
+
soa_dir: str = DEFAULT_SOA_DIR,
|
|
1371
|
+
system_config: Optional["SystemPaastaConfig"] = None,
|
|
1372
|
+
) -> str:
|
|
1373
|
+
if service is None:
|
|
1374
|
+
raise NotImplementedError('"None" is not a valid service')
|
|
1375
|
+
service_configuration = service_configuration_lib.read_service_configuration(
|
|
1376
|
+
service, soa_dir
|
|
1377
|
+
)
|
|
1378
|
+
try:
|
|
1379
|
+
return service_configuration["docker_registry"]
|
|
1380
|
+
except KeyError:
|
|
1381
|
+
if not system_config:
|
|
1382
|
+
system_config = load_system_paasta_config()
|
|
1383
|
+
return system_config.get_system_docker_registry()
|
|
1384
|
+
|
|
1385
|
+
|
|
1386
|
+
class NoSuchLogLevel(Exception):
|
|
1387
|
+
pass
|
|
1388
|
+
|
|
1389
|
+
|
|
1390
|
+
class LogWriterConfig(TypedDict):
|
|
1391
|
+
driver: str
|
|
1392
|
+
options: Dict
|
|
1393
|
+
|
|
1394
|
+
|
|
1395
|
+
class LogReaderConfig(TypedDict):
|
|
1396
|
+
driver: str
|
|
1397
|
+
options: Dict
|
|
1398
|
+
components: Optional[List]
|
|
1399
|
+
|
|
1400
|
+
|
|
1401
|
+
# The active log writer.
|
|
1402
|
+
_log_writer = None
|
|
1403
|
+
# The map of name -> LogWriter subclasses, used by configure_log.
|
|
1404
|
+
_log_writer_classes = {}
|
|
1405
|
+
|
|
1406
|
+
|
|
1407
|
+
class LogWriter:
|
|
1408
|
+
def __init__(self, **kwargs: Any) -> None:
|
|
1409
|
+
pass
|
|
1410
|
+
|
|
1411
|
+
def log(
|
|
1412
|
+
self,
|
|
1413
|
+
service: str,
|
|
1414
|
+
line: str,
|
|
1415
|
+
component: str,
|
|
1416
|
+
level: str = DEFAULT_LOGLEVEL,
|
|
1417
|
+
cluster: str = ANY_CLUSTER,
|
|
1418
|
+
instance: str = ANY_INSTANCE,
|
|
1419
|
+
) -> None:
|
|
1420
|
+
raise NotImplementedError()
|
|
1421
|
+
|
|
1422
|
+
def log_audit(
|
|
1423
|
+
self,
|
|
1424
|
+
user: str,
|
|
1425
|
+
host: str,
|
|
1426
|
+
action: str,
|
|
1427
|
+
action_details: dict = None,
|
|
1428
|
+
service: str = None,
|
|
1429
|
+
cluster: str = ANY_CLUSTER,
|
|
1430
|
+
instance: str = ANY_INSTANCE,
|
|
1431
|
+
) -> None:
|
|
1432
|
+
raise NotImplementedError()
|
|
1433
|
+
|
|
1434
|
+
|
|
1435
|
+
_LogWriterTypeT = TypeVar("_LogWriterTypeT", bound=Type[LogWriter])
|
|
1436
|
+
|
|
1437
|
+
|
|
1438
|
+
def register_log_writer(name: str) -> Callable[[_LogWriterTypeT], _LogWriterTypeT]:
|
|
1439
|
+
"""Returns a decorator that registers that log writer class at a given name
|
|
1440
|
+
so get_log_writer_class can find it."""
|
|
1441
|
+
|
|
1442
|
+
def outer(log_writer_class: _LogWriterTypeT) -> _LogWriterTypeT:
|
|
1443
|
+
_log_writer_classes[name] = log_writer_class
|
|
1444
|
+
return log_writer_class
|
|
1445
|
+
|
|
1446
|
+
return outer
|
|
1447
|
+
|
|
1448
|
+
|
|
1449
|
+
def get_log_writer_class(name: str) -> Type[LogWriter]:
|
|
1450
|
+
return _log_writer_classes[name]
|
|
1451
|
+
|
|
1452
|
+
|
|
1453
|
+
def list_log_writers() -> Iterable[str]:
|
|
1454
|
+
return _log_writer_classes.keys()
|
|
1455
|
+
|
|
1456
|
+
|
|
1457
|
+
def configure_log() -> None:
|
|
1458
|
+
"""We will log to the yocalhost binded scribe."""
|
|
1459
|
+
log_writer_config = load_system_paasta_config().get_log_writer()
|
|
1460
|
+
global _log_writer
|
|
1461
|
+
LogWriterClass = get_log_writer_class(log_writer_config["driver"])
|
|
1462
|
+
_log_writer = LogWriterClass(**log_writer_config.get("options", {}))
|
|
1463
|
+
|
|
1464
|
+
|
|
1465
|
+
def _log(
|
|
1466
|
+
service: str,
|
|
1467
|
+
line: str,
|
|
1468
|
+
component: str,
|
|
1469
|
+
level: str = DEFAULT_LOGLEVEL,
|
|
1470
|
+
cluster: str = ANY_CLUSTER,
|
|
1471
|
+
instance: str = ANY_INSTANCE,
|
|
1472
|
+
) -> None:
|
|
1473
|
+
if _log_writer is None:
|
|
1474
|
+
configure_log()
|
|
1475
|
+
return _log_writer.log(
|
|
1476
|
+
service=service,
|
|
1477
|
+
line=line,
|
|
1478
|
+
component=component,
|
|
1479
|
+
level=level,
|
|
1480
|
+
cluster=cluster,
|
|
1481
|
+
instance=instance,
|
|
1482
|
+
)
|
|
1483
|
+
|
|
1484
|
+
|
|
1485
|
+
def _log_audit(
|
|
1486
|
+
action: str,
|
|
1487
|
+
action_details: dict = None,
|
|
1488
|
+
service: str = None,
|
|
1489
|
+
cluster: str = ANY_CLUSTER,
|
|
1490
|
+
instance: str = ANY_INSTANCE,
|
|
1491
|
+
) -> None:
|
|
1492
|
+
if _log_writer is None:
|
|
1493
|
+
configure_log()
|
|
1494
|
+
|
|
1495
|
+
user = get_username()
|
|
1496
|
+
host = get_hostname()
|
|
1497
|
+
|
|
1498
|
+
return _log_writer.log_audit(
|
|
1499
|
+
user=user,
|
|
1500
|
+
host=host,
|
|
1501
|
+
action=action,
|
|
1502
|
+
action_details=action_details,
|
|
1503
|
+
service=service,
|
|
1504
|
+
cluster=cluster,
|
|
1505
|
+
instance=instance,
|
|
1506
|
+
)
|
|
1507
|
+
|
|
1508
|
+
|
|
1509
|
+
def _now() -> str:
|
|
1510
|
+
return datetime.datetime.utcnow().isoformat()
|
|
1511
|
+
|
|
1512
|
+
|
|
1513
|
+
def remove_ansi_escape_sequences(line: str) -> str:
|
|
1514
|
+
"""Removes ansi escape sequences from the given line."""
|
|
1515
|
+
return no_escape.sub("", line)
|
|
1516
|
+
|
|
1517
|
+
|
|
1518
|
+
def format_log_line(
|
|
1519
|
+
level: str,
|
|
1520
|
+
cluster: str,
|
|
1521
|
+
service: str,
|
|
1522
|
+
instance: str,
|
|
1523
|
+
component: str,
|
|
1524
|
+
line: str,
|
|
1525
|
+
timestamp: str = None,
|
|
1526
|
+
) -> str:
|
|
1527
|
+
"""Accepts a string 'line'.
|
|
1528
|
+
|
|
1529
|
+
Returns an appropriately-formatted dictionary which can be serialized to
|
|
1530
|
+
JSON for logging and which contains 'line'.
|
|
1531
|
+
"""
|
|
1532
|
+
|
|
1533
|
+
validate_log_component(component)
|
|
1534
|
+
if not timestamp:
|
|
1535
|
+
timestamp = _now()
|
|
1536
|
+
line = remove_ansi_escape_sequences(line.strip())
|
|
1537
|
+
message = json.dumps(
|
|
1538
|
+
{
|
|
1539
|
+
"timestamp": timestamp,
|
|
1540
|
+
"level": level,
|
|
1541
|
+
"cluster": cluster,
|
|
1542
|
+
"service": service,
|
|
1543
|
+
"instance": instance,
|
|
1544
|
+
"component": component,
|
|
1545
|
+
"message": line,
|
|
1546
|
+
},
|
|
1547
|
+
sort_keys=True,
|
|
1548
|
+
)
|
|
1549
|
+
return message
|
|
1550
|
+
|
|
1551
|
+
|
|
1552
|
+
def format_audit_log_line(
|
|
1553
|
+
cluster: str,
|
|
1554
|
+
instance: str,
|
|
1555
|
+
user: str,
|
|
1556
|
+
host: str,
|
|
1557
|
+
action: str,
|
|
1558
|
+
action_details: dict = None,
|
|
1559
|
+
service: str = None,
|
|
1560
|
+
timestamp: str = None,
|
|
1561
|
+
) -> str:
|
|
1562
|
+
"""Accepts:
|
|
1563
|
+
|
|
1564
|
+
* a string 'user' describing the user that initiated the action
|
|
1565
|
+
* a string 'host' describing the server where the user initiated the action
|
|
1566
|
+
* a string 'action' describing an action performed by paasta_tools
|
|
1567
|
+
* a dict 'action_details' optional information about the action
|
|
1568
|
+
|
|
1569
|
+
Returns an appropriately-formatted dictionary which can be serialized to
|
|
1570
|
+
JSON for logging and which contains details about an action performed on
|
|
1571
|
+
a service/instance.
|
|
1572
|
+
"""
|
|
1573
|
+
if not timestamp:
|
|
1574
|
+
timestamp = _now()
|
|
1575
|
+
if not action_details:
|
|
1576
|
+
action_details = {}
|
|
1577
|
+
|
|
1578
|
+
message = json.dumps(
|
|
1579
|
+
{
|
|
1580
|
+
"timestamp": timestamp,
|
|
1581
|
+
"cluster": cluster,
|
|
1582
|
+
"service": service,
|
|
1583
|
+
"instance": instance,
|
|
1584
|
+
"user": user,
|
|
1585
|
+
"host": host,
|
|
1586
|
+
"action": action,
|
|
1587
|
+
"action_details": action_details,
|
|
1588
|
+
},
|
|
1589
|
+
sort_keys=True,
|
|
1590
|
+
)
|
|
1591
|
+
return message
|
|
1592
|
+
|
|
1593
|
+
|
|
1594
|
+
def get_log_name_for_service(service: str, prefix: str = None) -> str:
|
|
1595
|
+
if prefix:
|
|
1596
|
+
return f"stream_paasta_{prefix}_{service}"
|
|
1597
|
+
return "stream_paasta_%s" % service
|
|
1598
|
+
|
|
1599
|
+
|
|
1600
|
+
try:
|
|
1601
|
+
import clog
|
|
1602
|
+
|
|
1603
|
+
# Somehow clog turns on DeprecationWarnings, so we need to disable them
|
|
1604
|
+
# again after importing it.
|
|
1605
|
+
warnings.filterwarnings("ignore", category=DeprecationWarning)
|
|
1606
|
+
|
|
1607
|
+
class CLogWriter(LogWriter):
|
|
1608
|
+
def __init__(self, **kwargs: Any):
|
|
1609
|
+
clog.config.configure(**kwargs)
|
|
1610
|
+
|
|
1611
|
+
def log(
|
|
1612
|
+
self,
|
|
1613
|
+
service: str,
|
|
1614
|
+
line: str,
|
|
1615
|
+
component: str,
|
|
1616
|
+
level: str = DEFAULT_LOGLEVEL,
|
|
1617
|
+
cluster: str = ANY_CLUSTER,
|
|
1618
|
+
instance: str = ANY_INSTANCE,
|
|
1619
|
+
) -> None:
|
|
1620
|
+
"""This expects someone (currently the paasta cli main()) to have already
|
|
1621
|
+
configured the log object. We'll just write things to it.
|
|
1622
|
+
"""
|
|
1623
|
+
if level == "event":
|
|
1624
|
+
print(f"[service {service}] {line}", file=sys.stdout)
|
|
1625
|
+
elif level == "debug":
|
|
1626
|
+
print(f"[service {service}] {line}", file=sys.stderr)
|
|
1627
|
+
else:
|
|
1628
|
+
raise NoSuchLogLevel
|
|
1629
|
+
log_name = get_log_name_for_service(service)
|
|
1630
|
+
formatted_line = format_log_line(
|
|
1631
|
+
level, cluster, service, instance, component, line
|
|
1632
|
+
)
|
|
1633
|
+
clog.log_line(log_name, formatted_line)
|
|
1634
|
+
|
|
1635
|
+
def log_audit(
|
|
1636
|
+
self,
|
|
1637
|
+
user: str,
|
|
1638
|
+
host: str,
|
|
1639
|
+
action: str,
|
|
1640
|
+
action_details: dict = None,
|
|
1641
|
+
service: str = None,
|
|
1642
|
+
cluster: str = ANY_CLUSTER,
|
|
1643
|
+
instance: str = ANY_INSTANCE,
|
|
1644
|
+
) -> None:
|
|
1645
|
+
log_name = AUDIT_LOG_STREAM
|
|
1646
|
+
formatted_line = format_audit_log_line(
|
|
1647
|
+
user=user,
|
|
1648
|
+
host=host,
|
|
1649
|
+
action=action,
|
|
1650
|
+
action_details=action_details,
|
|
1651
|
+
service=service,
|
|
1652
|
+
cluster=cluster,
|
|
1653
|
+
instance=instance,
|
|
1654
|
+
)
|
|
1655
|
+
clog.log_line(log_name, formatted_line)
|
|
1656
|
+
|
|
1657
|
+
@register_log_writer("monk")
|
|
1658
|
+
class MonkLogWriter(CLogWriter):
|
|
1659
|
+
def __init__(
|
|
1660
|
+
self,
|
|
1661
|
+
monk_host: str = "169.254.255.254",
|
|
1662
|
+
monk_port: int = 1473,
|
|
1663
|
+
monk_disable: bool = False,
|
|
1664
|
+
**kwargs: Any,
|
|
1665
|
+
) -> None:
|
|
1666
|
+
super().__init__(
|
|
1667
|
+
monk_host=monk_host,
|
|
1668
|
+
monk_port=monk_port,
|
|
1669
|
+
monk_disable=monk_disable,
|
|
1670
|
+
)
|
|
1671
|
+
|
|
1672
|
+
@register_log_writer("scribe")
|
|
1673
|
+
class ScribeLogWriter(CLogWriter):
|
|
1674
|
+
def __init__(
|
|
1675
|
+
self,
|
|
1676
|
+
scribe_host: str = "169.254.255.254",
|
|
1677
|
+
scribe_port: int = 1463,
|
|
1678
|
+
scribe_disable: bool = False,
|
|
1679
|
+
**kwargs: Any,
|
|
1680
|
+
) -> None:
|
|
1681
|
+
super().__init__(
|
|
1682
|
+
scribe_host=scribe_host,
|
|
1683
|
+
scribe_port=scribe_port,
|
|
1684
|
+
scribe_disable=scribe_disable,
|
|
1685
|
+
)
|
|
1686
|
+
|
|
1687
|
+
except ImportError:
|
|
1688
|
+
warnings.warn("clog is unavailable")
|
|
1689
|
+
|
|
1690
|
+
|
|
1691
|
+
@register_log_writer("null")
|
|
1692
|
+
class NullLogWriter(LogWriter):
|
|
1693
|
+
"""A LogWriter class that doesn't do anything. Primarily useful for integration tests where we don't care about
|
|
1694
|
+
logs."""
|
|
1695
|
+
|
|
1696
|
+
def __init__(self, **kwargs: Any) -> None:
|
|
1697
|
+
pass
|
|
1698
|
+
|
|
1699
|
+
def log(
|
|
1700
|
+
self,
|
|
1701
|
+
service: str,
|
|
1702
|
+
line: str,
|
|
1703
|
+
component: str,
|
|
1704
|
+
level: str = DEFAULT_LOGLEVEL,
|
|
1705
|
+
cluster: str = ANY_CLUSTER,
|
|
1706
|
+
instance: str = ANY_INSTANCE,
|
|
1707
|
+
) -> None:
|
|
1708
|
+
pass
|
|
1709
|
+
|
|
1710
|
+
def log_audit(
|
|
1711
|
+
self,
|
|
1712
|
+
user: str,
|
|
1713
|
+
host: str,
|
|
1714
|
+
action: str,
|
|
1715
|
+
action_details: dict = None,
|
|
1716
|
+
service: str = None,
|
|
1717
|
+
cluster: str = ANY_CLUSTER,
|
|
1718
|
+
instance: str = ANY_INSTANCE,
|
|
1719
|
+
) -> None:
|
|
1720
|
+
pass
|
|
1721
|
+
|
|
1722
|
+
|
|
1723
|
+
@contextlib.contextmanager
|
|
1724
|
+
def _empty_context() -> Iterator[None]:
|
|
1725
|
+
yield
|
|
1726
|
+
|
|
1727
|
+
|
|
1728
|
+
_AnyIO = Union[io.IOBase, IO]
|
|
1729
|
+
|
|
1730
|
+
|
|
1731
|
+
@register_log_writer("file")
|
|
1732
|
+
class FileLogWriter(LogWriter):
|
|
1733
|
+
def __init__(
|
|
1734
|
+
self,
|
|
1735
|
+
path_format: str,
|
|
1736
|
+
mode: str = "a+",
|
|
1737
|
+
line_delimiter: str = "\n",
|
|
1738
|
+
flock: bool = False,
|
|
1739
|
+
) -> None:
|
|
1740
|
+
self.path_format = path_format
|
|
1741
|
+
self.mode = mode
|
|
1742
|
+
self.flock = flock
|
|
1743
|
+
self.line_delimiter = line_delimiter
|
|
1744
|
+
|
|
1745
|
+
def maybe_flock(self, fd: _AnyIO) -> ContextManager:
|
|
1746
|
+
if self.flock:
|
|
1747
|
+
# https://github.com/python/typeshed/issues/1548
|
|
1748
|
+
return flock(fd)
|
|
1749
|
+
else:
|
|
1750
|
+
return _empty_context()
|
|
1751
|
+
|
|
1752
|
+
def format_path(
|
|
1753
|
+
self, service: str, component: str, level: str, cluster: str, instance: str
|
|
1754
|
+
) -> str:
|
|
1755
|
+
return self.path_format.format(
|
|
1756
|
+
service=service,
|
|
1757
|
+
component=component,
|
|
1758
|
+
level=level,
|
|
1759
|
+
cluster=cluster,
|
|
1760
|
+
instance=instance,
|
|
1761
|
+
)
|
|
1762
|
+
|
|
1763
|
+
def _log_message(self, path: str, message: str) -> None:
|
|
1764
|
+
# We use io.FileIO here because it guarantees that write() is implemented with a single write syscall,
|
|
1765
|
+
# and on Linux, writes to O_APPEND files with a single write syscall are atomic.
|
|
1766
|
+
#
|
|
1767
|
+
# https://docs.python.org/2/library/io.html#io.FileIO
|
|
1768
|
+
# http://article.gmane.org/gmane.linux.kernel/43445
|
|
1769
|
+
|
|
1770
|
+
try:
|
|
1771
|
+
with io.FileIO(path, mode=self.mode, closefd=True) as f:
|
|
1772
|
+
with self.maybe_flock(f):
|
|
1773
|
+
f.write(message.encode("UTF-8"))
|
|
1774
|
+
except IOError as e:
|
|
1775
|
+
print(
|
|
1776
|
+
"Could not log to {}: {}: {} -- would have logged: {}".format(
|
|
1777
|
+
path, type(e).__name__, str(e), message
|
|
1778
|
+
),
|
|
1779
|
+
file=sys.stderr,
|
|
1780
|
+
)
|
|
1781
|
+
|
|
1782
|
+
def log(
|
|
1783
|
+
self,
|
|
1784
|
+
service: str,
|
|
1785
|
+
line: str,
|
|
1786
|
+
component: str,
|
|
1787
|
+
level: str = DEFAULT_LOGLEVEL,
|
|
1788
|
+
cluster: str = ANY_CLUSTER,
|
|
1789
|
+
instance: str = ANY_INSTANCE,
|
|
1790
|
+
) -> None:
|
|
1791
|
+
path = self.format_path(service, component, level, cluster, instance)
|
|
1792
|
+
to_write = "{}{}".format(
|
|
1793
|
+
format_log_line(level, cluster, service, instance, component, line),
|
|
1794
|
+
self.line_delimiter,
|
|
1795
|
+
)
|
|
1796
|
+
|
|
1797
|
+
self._log_message(path, to_write)
|
|
1798
|
+
|
|
1799
|
+
def log_audit(
|
|
1800
|
+
self,
|
|
1801
|
+
user: str,
|
|
1802
|
+
host: str,
|
|
1803
|
+
action: str,
|
|
1804
|
+
action_details: dict = None,
|
|
1805
|
+
service: str = None,
|
|
1806
|
+
cluster: str = ANY_CLUSTER,
|
|
1807
|
+
instance: str = ANY_INSTANCE,
|
|
1808
|
+
) -> None:
|
|
1809
|
+
path = self.format_path(AUDIT_LOG_STREAM, "", "", cluster, instance)
|
|
1810
|
+
formatted_line = format_audit_log_line(
|
|
1811
|
+
user=user,
|
|
1812
|
+
host=host,
|
|
1813
|
+
action=action,
|
|
1814
|
+
action_details=action_details,
|
|
1815
|
+
service=service,
|
|
1816
|
+
cluster=cluster,
|
|
1817
|
+
instance=instance,
|
|
1818
|
+
)
|
|
1819
|
+
|
|
1820
|
+
to_write = f"{formatted_line}{self.line_delimiter}"
|
|
1821
|
+
|
|
1822
|
+
self._log_message(path, to_write)
|
|
1823
|
+
|
|
1824
|
+
|
|
1825
|
+
@contextlib.contextmanager
|
|
1826
|
+
def flock(fd: _AnyIO) -> Iterator[None]:
|
|
1827
|
+
try:
|
|
1828
|
+
fcntl.flock(fd.fileno(), fcntl.LOCK_EX)
|
|
1829
|
+
yield
|
|
1830
|
+
finally:
|
|
1831
|
+
fcntl.flock(fd.fileno(), fcntl.LOCK_UN)
|
|
1832
|
+
|
|
1833
|
+
|
|
1834
|
+
@contextlib.contextmanager
|
|
1835
|
+
def timed_flock(fd: _AnyIO, seconds: int = 1) -> Iterator[None]:
|
|
1836
|
+
"""Attempt to grab an exclusive flock with a timeout. Uses Timeout, so will
|
|
1837
|
+
raise a TimeoutError if `seconds` elapses before the flock can be obtained
|
|
1838
|
+
"""
|
|
1839
|
+
# We don't want to wrap the user code in the timeout, just the flock grab
|
|
1840
|
+
flock_context = flock(fd)
|
|
1841
|
+
with Timeout(seconds=seconds):
|
|
1842
|
+
flock_context.__enter__()
|
|
1843
|
+
try:
|
|
1844
|
+
yield
|
|
1845
|
+
finally:
|
|
1846
|
+
flock_context.__exit__(*sys.exc_info())
|
|
1847
|
+
|
|
1848
|
+
|
|
1849
|
+
def _timeout(process: Popen) -> None:
|
|
1850
|
+
"""Helper function for _run. It terminates the process.
|
|
1851
|
+
Doesn't raise OSError, if we try to terminate a non-existing
|
|
1852
|
+
process as there can be a very small window between poll() and kill()
|
|
1853
|
+
"""
|
|
1854
|
+
if process.poll() is None:
|
|
1855
|
+
try:
|
|
1856
|
+
# sending SIGKILL to the process
|
|
1857
|
+
process.kill()
|
|
1858
|
+
except OSError as e:
|
|
1859
|
+
# No such process error
|
|
1860
|
+
# The process could have been terminated meanwhile
|
|
1861
|
+
if e.errno != errno.ESRCH:
|
|
1862
|
+
raise
|
|
1863
|
+
|
|
1864
|
+
|
|
1865
|
+
class PaastaNotConfiguredError(Exception):
|
|
1866
|
+
pass
|
|
1867
|
+
|
|
1868
|
+
|
|
1869
|
+
class NoConfigurationForServiceError(Exception):
|
|
1870
|
+
pass
|
|
1871
|
+
|
|
1872
|
+
|
|
1873
|
+
def get_readable_files_in_glob(glob: str, path: str) -> List[str]:
|
|
1874
|
+
"""
|
|
1875
|
+
Returns a sorted list of files that are readable in an input glob by recursively searching a path
|
|
1876
|
+
"""
|
|
1877
|
+
globbed_files = []
|
|
1878
|
+
for root, dirs, files in os.walk(path):
|
|
1879
|
+
for f in files:
|
|
1880
|
+
fn = os.path.join(root, f)
|
|
1881
|
+
if os.path.isfile(fn) and os.access(fn, os.R_OK) and fnmatch(fn, glob):
|
|
1882
|
+
globbed_files.append(fn)
|
|
1883
|
+
return sorted(globbed_files)
|
|
1884
|
+
|
|
1885
|
+
|
|
1886
|
+
class ClusterAutoscalingResource(TypedDict):
|
|
1887
|
+
type: str
|
|
1888
|
+
id: str
|
|
1889
|
+
region: str
|
|
1890
|
+
pool: str
|
|
1891
|
+
min_capacity: int
|
|
1892
|
+
max_capacity: int
|
|
1893
|
+
|
|
1894
|
+
|
|
1895
|
+
IdToClusterAutoscalingResourcesDict = Dict[str, ClusterAutoscalingResource]
|
|
1896
|
+
|
|
1897
|
+
|
|
1898
|
+
class ResourcePoolSettings(TypedDict):
|
|
1899
|
+
target_utilization: float
|
|
1900
|
+
drain_timeout: int
|
|
1901
|
+
|
|
1902
|
+
|
|
1903
|
+
PoolToResourcePoolSettingsDict = Dict[str, ResourcePoolSettings]
|
|
1904
|
+
|
|
1905
|
+
|
|
1906
|
+
class LocalRunConfig(TypedDict, total=False):
|
|
1907
|
+
default_cluster: str
|
|
1908
|
+
|
|
1909
|
+
|
|
1910
|
+
class SparkRunConfig(TypedDict, total=False):
|
|
1911
|
+
default_cluster: str
|
|
1912
|
+
default_pool: str
|
|
1913
|
+
default_spark_driver_iam_role: str
|
|
1914
|
+
|
|
1915
|
+
|
|
1916
|
+
class PaastaNativeConfig(TypedDict, total=False):
|
|
1917
|
+
principal: str
|
|
1918
|
+
secret: str
|
|
1919
|
+
|
|
1920
|
+
|
|
1921
|
+
ExpectedSlaveAttributes = List[Dict[str, Any]]
|
|
1922
|
+
|
|
1923
|
+
|
|
1924
|
+
class KubeKindDict(TypedDict, total=False):
|
|
1925
|
+
singular: str
|
|
1926
|
+
plural: str
|
|
1927
|
+
|
|
1928
|
+
|
|
1929
|
+
class KubeCustomResourceDict(TypedDict, total=False):
|
|
1930
|
+
version: str
|
|
1931
|
+
file_prefix: str
|
|
1932
|
+
kube_kind: KubeKindDict
|
|
1933
|
+
group: str
|
|
1934
|
+
|
|
1935
|
+
|
|
1936
|
+
class KubeStateMetricsCollectorConfigDict(TypedDict, total=False):
|
|
1937
|
+
unaggregated_metrics: List[str]
|
|
1938
|
+
summed_metric_to_group_keys: Dict[str, List[str]]
|
|
1939
|
+
label_metric_to_label_key: Dict[str, List[str]]
|
|
1940
|
+
label_renames: Dict[str, str]
|
|
1941
|
+
|
|
1942
|
+
|
|
1943
|
+
class TopologySpreadConstraintDict(TypedDict, total=False):
|
|
1944
|
+
topology_key: str
|
|
1945
|
+
when_unsatisfiable: Literal["ScheduleAnyway", "DoNotSchedule"]
|
|
1946
|
+
max_skew: int
|
|
1947
|
+
|
|
1948
|
+
|
|
1949
|
+
class SystemPaastaConfigDict(TypedDict, total=False):
|
|
1950
|
+
allowed_pools: Dict[str, List[str]]
|
|
1951
|
+
api_client_timeout: int
|
|
1952
|
+
api_endpoints: Dict[str, str]
|
|
1953
|
+
api_profiling_config: Dict
|
|
1954
|
+
api_auth_sso_oidc_client_id: str
|
|
1955
|
+
auth_certificate_ttl: str
|
|
1956
|
+
auto_config_instance_types_enabled: Dict[str, bool]
|
|
1957
|
+
auto_config_instance_type_aliases: Dict[str, str]
|
|
1958
|
+
auto_hostname_unique_size: int
|
|
1959
|
+
cluster_fqdn_format: str
|
|
1960
|
+
clusters: Sequence[str]
|
|
1961
|
+
cluster: str
|
|
1962
|
+
cr_owners: Dict[str, str]
|
|
1963
|
+
dashboard_links: Dict[str, Dict[str, str]]
|
|
1964
|
+
datastore_credentials_vault_env_overrides: Dict[str, str]
|
|
1965
|
+
default_push_groups: List
|
|
1966
|
+
default_should_use_uwsgi_exporter: bool
|
|
1967
|
+
deploy_blacklist: UnsafeDeployBlacklist
|
|
1968
|
+
deployd_metrics_provider: str
|
|
1969
|
+
deploy_whitelist: UnsafeDeployWhitelist
|
|
1970
|
+
disabled_watchers: List
|
|
1971
|
+
dockercfg_location: str
|
|
1972
|
+
docker_registry: str
|
|
1973
|
+
enable_client_cert_auth: bool
|
|
1974
|
+
enable_nerve_readiness_check: bool
|
|
1975
|
+
enable_envoy_readiness_check: bool
|
|
1976
|
+
enforce_disk_quota: bool
|
|
1977
|
+
envoy_admin_domain_name: str
|
|
1978
|
+
envoy_admin_endpoint_format: str
|
|
1979
|
+
envoy_nerve_readiness_check_script: List[str]
|
|
1980
|
+
envoy_readiness_check_script: List[str]
|
|
1981
|
+
expected_slave_attributes: ExpectedSlaveAttributes
|
|
1982
|
+
filter_bogus_mesos_cputime_enabled: bool
|
|
1983
|
+
fsm_template: str
|
|
1984
|
+
git_config: Dict
|
|
1985
|
+
hacheck_sidecar_image_url: str
|
|
1986
|
+
hacheck_sidecar_volumes: List[DockerVolume]
|
|
1987
|
+
kubernetes_add_registration_labels: bool
|
|
1988
|
+
kubernetes_custom_resources: List[KubeCustomResourceDict]
|
|
1989
|
+
kubernetes_use_hacheck_sidecar: bool
|
|
1990
|
+
ldap_host: str
|
|
1991
|
+
ldap_reader_password: str
|
|
1992
|
+
ldap_reader_username: str
|
|
1993
|
+
ldap_search_base: str
|
|
1994
|
+
ldap_search_ou: str
|
|
1995
|
+
local_run_config: LocalRunConfig
|
|
1996
|
+
log_reader: LogReaderConfig
|
|
1997
|
+
log_readers: List[LogReaderConfig]
|
|
1998
|
+
log_writer: LogWriterConfig
|
|
1999
|
+
mark_for_deployment_max_polling_threads: int
|
|
2000
|
+
mark_for_deployment_default_polling_interval: float
|
|
2001
|
+
mark_for_deployment_default_diagnosis_interval: float
|
|
2002
|
+
mark_for_deployment_default_default_time_before_first_diagnosis: float
|
|
2003
|
+
mark_for_deployment_should_ping_for_unhealthy_pods: bool
|
|
2004
|
+
mesos_config: Dict
|
|
2005
|
+
metrics_provider: str
|
|
2006
|
+
monitoring_config: Dict
|
|
2007
|
+
nerve_readiness_check_script: List[str]
|
|
2008
|
+
nerve_register_k8s_terminating: bool
|
|
2009
|
+
paasta_native: PaastaNativeConfig
|
|
2010
|
+
paasta_status_version: str
|
|
2011
|
+
pdb_max_unavailable: Union[str, int]
|
|
2012
|
+
pki_backend: str
|
|
2013
|
+
pod_defaults: Dict[str, Any]
|
|
2014
|
+
pool_node_affinities: Dict[str, Dict[str, List[str]]]
|
|
2015
|
+
topology_spread_constraints: List[TopologySpreadConstraintDict]
|
|
2016
|
+
readiness_check_prefix_template: List[str]
|
|
2017
|
+
register_k8s_pods: bool
|
|
2018
|
+
register_native_services: bool
|
|
2019
|
+
remote_run_duration_limit: int
|
|
2020
|
+
resource_pool_settings: PoolToResourcePoolSettingsDict
|
|
2021
|
+
secret_provider: str
|
|
2022
|
+
security_check_command: str
|
|
2023
|
+
sensu_host: str
|
|
2024
|
+
sensu_port: int
|
|
2025
|
+
service_discovery_providers: Dict[str, Any]
|
|
2026
|
+
slack: Dict[str, str]
|
|
2027
|
+
spark_run_config: SparkRunConfig
|
|
2028
|
+
supported_storage_classes: Sequence[str]
|
|
2029
|
+
synapse_haproxy_url_format: str
|
|
2030
|
+
synapse_host: str
|
|
2031
|
+
synapse_port: int
|
|
2032
|
+
taskproc: Dict
|
|
2033
|
+
tron: Dict
|
|
2034
|
+
gunicorn_exporter_sidecar_image_url: str
|
|
2035
|
+
vault_cluster_map: Dict
|
|
2036
|
+
vault_environment: str
|
|
2037
|
+
volumes: List[DockerVolume]
|
|
2038
|
+
zookeeper: str
|
|
2039
|
+
tron_k8s_cluster_overrides: Dict[str, str]
|
|
2040
|
+
skip_cpu_override_validation: List[str]
|
|
2041
|
+
spark_k8s_role: str
|
|
2042
|
+
cluster_aliases: Dict[str, str]
|
|
2043
|
+
hacheck_match_initial_delay: bool
|
|
2044
|
+
spark_ui_port: int
|
|
2045
|
+
spark_driver_port: int
|
|
2046
|
+
spark_blockmanager_port: int
|
|
2047
|
+
skip_cpu_burst_validation: List[str]
|
|
2048
|
+
tron_default_pool_override: str
|
|
2049
|
+
spark_kubeconfig: str
|
|
2050
|
+
spark_iam_user_kubeconfig: str
|
|
2051
|
+
kube_clusters: Dict
|
|
2052
|
+
spark_use_eks_default: bool
|
|
2053
|
+
sidecar_requirements_config: Dict[str, KubeContainerResourceRequest]
|
|
2054
|
+
eks_cluster_aliases: Dict[str, str]
|
|
2055
|
+
secret_sync_delay_seconds: float
|
|
2056
|
+
service_auth_token_settings: ProjectedSAVolume
|
|
2057
|
+
service_auth_vault_role: str
|
|
2058
|
+
service_auth_sso_oidc_client_id: str
|
|
2059
|
+
always_authenticating_services: List[str]
|
|
2060
|
+
uses_bulkdata_default: bool
|
|
2061
|
+
enable_automated_redeploys_default: bool
|
|
2062
|
+
enable_tron_tsc: bool
|
|
2063
|
+
default_spark_iam_user: str
|
|
2064
|
+
default_spark_driver_pool_override: str
|
|
2065
|
+
|
|
2066
|
+
|
|
2067
|
+
def load_system_paasta_config(
|
|
2068
|
+
path: str = PATH_TO_SYSTEM_PAASTA_CONFIG_DIR,
|
|
2069
|
+
) -> "SystemPaastaConfig":
|
|
2070
|
+
"""
|
|
2071
|
+
Reads Paasta configs in specified directory in lexicographical order and deep merges
|
|
2072
|
+
the dictionaries (last file wins).
|
|
2073
|
+
"""
|
|
2074
|
+
if not os.path.isdir(path):
|
|
2075
|
+
raise PaastaNotConfiguredError(
|
|
2076
|
+
"Could not find system paasta configuration directory: %s" % path
|
|
2077
|
+
)
|
|
2078
|
+
|
|
2079
|
+
if not os.access(path, os.R_OK):
|
|
2080
|
+
raise PaastaNotConfiguredError(
|
|
2081
|
+
"Could not read from system paasta configuration directory: %s" % path
|
|
2082
|
+
)
|
|
2083
|
+
|
|
2084
|
+
try:
|
|
2085
|
+
file_stats = frozenset(
|
|
2086
|
+
{
|
|
2087
|
+
(fn, os.stat(fn))
|
|
2088
|
+
for fn in get_readable_files_in_glob(glob="*.json", path=path)
|
|
2089
|
+
}
|
|
2090
|
+
)
|
|
2091
|
+
return parse_system_paasta_config(file_stats, path)
|
|
2092
|
+
except IOError as e:
|
|
2093
|
+
raise PaastaNotConfiguredError(
|
|
2094
|
+
f"Could not load system paasta config file {e.filename}: {e.strerror}"
|
|
2095
|
+
)
|
|
2096
|
+
|
|
2097
|
+
|
|
2098
|
+
def optionally_load_system_paasta_config(
|
|
2099
|
+
path: str = PATH_TO_SYSTEM_PAASTA_CONFIG_DIR,
|
|
2100
|
+
) -> "SystemPaastaConfig":
|
|
2101
|
+
"""
|
|
2102
|
+
Tries to load the system paasta config, but will return an empty configuration if not available,
|
|
2103
|
+
without raising.
|
|
2104
|
+
"""
|
|
2105
|
+
try:
|
|
2106
|
+
return load_system_paasta_config(path=path)
|
|
2107
|
+
except PaastaNotConfiguredError:
|
|
2108
|
+
return SystemPaastaConfig({}, "")
|
|
2109
|
+
|
|
2110
|
+
|
|
2111
|
+
@lru_cache()
|
|
2112
|
+
def parse_system_paasta_config(
|
|
2113
|
+
file_stats: FrozenSet[Tuple[str, os.stat_result]], path: str
|
|
2114
|
+
) -> "SystemPaastaConfig":
|
|
2115
|
+
"""Pass in a dictionary of filename -> os.stat_result, and this returns the merged parsed configs"""
|
|
2116
|
+
config: SystemPaastaConfigDict = {}
|
|
2117
|
+
for filename, _ in file_stats:
|
|
2118
|
+
with open(filename) as f:
|
|
2119
|
+
config = deep_merge_dictionaries(
|
|
2120
|
+
json.load(f), config, allow_duplicate_keys=False
|
|
2121
|
+
)
|
|
2122
|
+
return SystemPaastaConfig(config, path)
|
|
2123
|
+
|
|
2124
|
+
|
|
2125
|
+
class PoolsNotConfiguredError(Exception):
|
|
2126
|
+
pass
|
|
2127
|
+
|
|
2128
|
+
|
|
2129
|
+
def validate_pool(
|
|
2130
|
+
cluster: str, pool: str, system_paasta_config: "SystemPaastaConfig"
|
|
2131
|
+
) -> bool:
|
|
2132
|
+
if pool:
|
|
2133
|
+
valid_pools = system_paasta_config.get_pools_for_cluster(cluster)
|
|
2134
|
+
if not valid_pools:
|
|
2135
|
+
raise PoolsNotConfiguredError
|
|
2136
|
+
# at this point, we can be sure that `valid_pools` is populated
|
|
2137
|
+
return pool in valid_pools
|
|
2138
|
+
return True
|
|
2139
|
+
|
|
2140
|
+
|
|
2141
|
+
class SystemPaastaConfig:
|
|
2142
|
+
def __init__(self, config: SystemPaastaConfigDict, directory: str) -> None:
|
|
2143
|
+
self.directory = directory
|
|
2144
|
+
self.config_dict = config
|
|
2145
|
+
|
|
2146
|
+
def __eq__(self, other: Any) -> bool:
|
|
2147
|
+
if isinstance(other, SystemPaastaConfig):
|
|
2148
|
+
return (
|
|
2149
|
+
self.directory == other.directory
|
|
2150
|
+
and self.config_dict == other.config_dict
|
|
2151
|
+
)
|
|
2152
|
+
return False
|
|
2153
|
+
|
|
2154
|
+
def __repr__(self) -> str:
|
|
2155
|
+
return f"SystemPaastaConfig({self.config_dict!r}, {self.directory!r})"
|
|
2156
|
+
|
|
2157
|
+
def get_secret_sync_delay_seconds(self) -> float:
|
|
2158
|
+
return self.config_dict.get("secret_sync_delay_seconds", 0)
|
|
2159
|
+
|
|
2160
|
+
def get_spark_use_eks_default(self) -> bool:
|
|
2161
|
+
return self.config_dict.get("spark_use_eks_default", False)
|
|
2162
|
+
|
|
2163
|
+
def get_default_spark_iam_user(self) -> str:
|
|
2164
|
+
return self.config_dict.get(
|
|
2165
|
+
"default_spark_iam_user", "/etc/boto_cfg/mrjob.yaml"
|
|
2166
|
+
)
|
|
2167
|
+
|
|
2168
|
+
def get_default_spark_driver_pool_override(self) -> str:
|
|
2169
|
+
"""
|
|
2170
|
+
If defined, fetches the override for what pool to run a Spark driver in.
|
|
2171
|
+
Otherwise, returns the default Spark driver pool.
|
|
2172
|
+
|
|
2173
|
+
:returns: The default_spark_driver_pool_override specified in the paasta configuration
|
|
2174
|
+
"""
|
|
2175
|
+
return self.config_dict.get(
|
|
2176
|
+
"default_spark_driver_pool_override", DEFAULT_SPARK_DRIVER_POOL
|
|
2177
|
+
)
|
|
2178
|
+
|
|
2179
|
+
def get_sidecar_requirements_config(
|
|
2180
|
+
self,
|
|
2181
|
+
) -> Dict[str, KubeContainerResourceRequest]:
|
|
2182
|
+
return self.config_dict.get("sidecar_requirements_config", {})
|
|
2183
|
+
|
|
2184
|
+
def get_tron_default_pool_override(self) -> str:
|
|
2185
|
+
"""Get the default pool override variable defined in this host's cluster config file.
|
|
2186
|
+
|
|
2187
|
+
:returns: The default_pool_override specified in the paasta configuration
|
|
2188
|
+
"""
|
|
2189
|
+
return self.config_dict.get("tron_default_pool_override", "default")
|
|
2190
|
+
|
|
2191
|
+
def get_zk_hosts(self) -> str:
|
|
2192
|
+
"""Get the zk_hosts defined in this hosts's cluster config file.
|
|
2193
|
+
Strips off the zk:// prefix, if it exists, for use with Kazoo.
|
|
2194
|
+
|
|
2195
|
+
:returns: The zk_hosts specified in the paasta configuration
|
|
2196
|
+
"""
|
|
2197
|
+
try:
|
|
2198
|
+
hosts = self.config_dict["zookeeper"]
|
|
2199
|
+
except KeyError:
|
|
2200
|
+
raise PaastaNotConfiguredError(
|
|
2201
|
+
"Could not find zookeeper connection string in configuration directory: %s"
|
|
2202
|
+
% self.directory
|
|
2203
|
+
)
|
|
2204
|
+
|
|
2205
|
+
# how do python strings not have a method for doing this
|
|
2206
|
+
if hosts.startswith("zk://"):
|
|
2207
|
+
return hosts[len("zk://") :]
|
|
2208
|
+
return hosts
|
|
2209
|
+
|
|
2210
|
+
def get_system_docker_registry(self) -> str:
|
|
2211
|
+
"""Get the docker_registry defined in this host's cluster config file.
|
|
2212
|
+
|
|
2213
|
+
:returns: The docker_registry specified in the paasta configuration
|
|
2214
|
+
"""
|
|
2215
|
+
try:
|
|
2216
|
+
return self.config_dict["docker_registry"]
|
|
2217
|
+
except KeyError:
|
|
2218
|
+
raise PaastaNotConfiguredError(
|
|
2219
|
+
"Could not find docker registry in configuration directory: %s"
|
|
2220
|
+
% self.directory
|
|
2221
|
+
)
|
|
2222
|
+
|
|
2223
|
+
def get_hacheck_sidecar_volumes(self) -> List[DockerVolume]:
|
|
2224
|
+
"""Get the hacheck sidecar volumes defined in this host's hacheck_sidecar_volumes config file.
|
|
2225
|
+
|
|
2226
|
+
:returns: The list of volumes specified in the paasta configuration
|
|
2227
|
+
"""
|
|
2228
|
+
try:
|
|
2229
|
+
volumes = self.config_dict["hacheck_sidecar_volumes"]
|
|
2230
|
+
except KeyError:
|
|
2231
|
+
raise PaastaNotConfiguredError(
|
|
2232
|
+
"Could not find hacheck_sidecar_volumes in configuration directory: %s"
|
|
2233
|
+
% self.directory
|
|
2234
|
+
)
|
|
2235
|
+
return _reorder_docker_volumes(list(volumes))
|
|
2236
|
+
|
|
2237
|
+
def get_volumes(self) -> Sequence[DockerVolume]:
|
|
2238
|
+
"""Get the volumes defined in this host's volumes config file.
|
|
2239
|
+
|
|
2240
|
+
:returns: The list of volumes specified in the paasta configuration
|
|
2241
|
+
"""
|
|
2242
|
+
try:
|
|
2243
|
+
return self.config_dict["volumes"]
|
|
2244
|
+
except KeyError:
|
|
2245
|
+
raise PaastaNotConfiguredError(
|
|
2246
|
+
"Could not find volumes in configuration directory: %s" % self.directory
|
|
2247
|
+
)
|
|
2248
|
+
|
|
2249
|
+
def get_cluster(self) -> str:
|
|
2250
|
+
"""Get the cluster defined in this host's cluster config file.
|
|
2251
|
+
|
|
2252
|
+
:returns: The name of the cluster defined in the paasta configuration
|
|
2253
|
+
"""
|
|
2254
|
+
try:
|
|
2255
|
+
return self.config_dict["cluster"]
|
|
2256
|
+
except KeyError:
|
|
2257
|
+
raise PaastaNotConfiguredError(
|
|
2258
|
+
"Could not find cluster in configuration directory: %s" % self.directory
|
|
2259
|
+
)
|
|
2260
|
+
|
|
2261
|
+
def get_dashboard_links(self) -> Mapping[str, Mapping[str, str]]:
|
|
2262
|
+
return self.config_dict["dashboard_links"]
|
|
2263
|
+
|
|
2264
|
+
def get_cr_owners(self) -> Dict[str, str]:
|
|
2265
|
+
return self.config_dict["cr_owners"]
|
|
2266
|
+
|
|
2267
|
+
def get_auto_hostname_unique_size(self) -> int:
|
|
2268
|
+
"""
|
|
2269
|
+
We automatically add a ["hostname", "UNIQUE"] constraint to "small" services running in production clusters.
|
|
2270
|
+
If there are less than or equal to this number of instances, we consider it small.
|
|
2271
|
+
We fail safe and return -1 to avoid adding the ['hostname', 'UNIQUE'] constraint if this value is not defined
|
|
2272
|
+
|
|
2273
|
+
:returns: The integer size of a small service
|
|
2274
|
+
"""
|
|
2275
|
+
return self.config_dict.get("auto_hostname_unique_size", -1)
|
|
2276
|
+
|
|
2277
|
+
def get_auto_config_instance_types_enabled(self) -> Dict[str, bool]:
|
|
2278
|
+
return self.config_dict.get("auto_config_instance_types_enabled", {})
|
|
2279
|
+
|
|
2280
|
+
def get_auto_config_instance_type_aliases(self) -> Dict[str, str]:
|
|
2281
|
+
"""
|
|
2282
|
+
Allow re-using another instance type's autotuned data. This is useful when an instance can be trivially moved around
|
|
2283
|
+
type-wise as it allows us to avoid data races/issues with the autotuned recommendations generator/updater.
|
|
2284
|
+
"""
|
|
2285
|
+
return self.config_dict.get("auto_config_instance_type_aliases", {})
|
|
2286
|
+
|
|
2287
|
+
def get_api_client_timeout(self) -> int:
|
|
2288
|
+
"""
|
|
2289
|
+
We've seen the Paasta API get hung up sometimes and the client not realizing this will sit idle forever.
|
|
2290
|
+
This will be used to specify the default timeout
|
|
2291
|
+
"""
|
|
2292
|
+
return self.config_dict.get("api_client_timeout", 120)
|
|
2293
|
+
|
|
2294
|
+
def get_api_endpoints(self) -> Mapping[str, str]:
|
|
2295
|
+
return self.config_dict["api_endpoints"]
|
|
2296
|
+
|
|
2297
|
+
def get_enable_client_cert_auth(self) -> bool:
|
|
2298
|
+
"""
|
|
2299
|
+
If enabled present a client certificate from ~/.paasta/pki/<cluster>.crt and ~/.paasta/pki/<cluster>.key
|
|
2300
|
+
"""
|
|
2301
|
+
return self.config_dict.get("enable_client_cert_auth", True)
|
|
2302
|
+
|
|
2303
|
+
def get_enable_nerve_readiness_check(self) -> bool:
|
|
2304
|
+
"""
|
|
2305
|
+
If enabled perform readiness checks on nerve
|
|
2306
|
+
"""
|
|
2307
|
+
return self.config_dict.get("enable_nerve_readiness_check", True)
|
|
2308
|
+
|
|
2309
|
+
def get_enable_envoy_readiness_check(self) -> bool:
|
|
2310
|
+
"""
|
|
2311
|
+
If enabled perform readiness checks on envoy
|
|
2312
|
+
"""
|
|
2313
|
+
return self.config_dict.get("enable_envoy_readiness_check", False)
|
|
2314
|
+
|
|
2315
|
+
def get_nerve_readiness_check_script(self) -> List[str]:
|
|
2316
|
+
return self.config_dict.get(
|
|
2317
|
+
"nerve_readiness_check_script", ["/check_smartstack_up.sh"]
|
|
2318
|
+
)
|
|
2319
|
+
|
|
2320
|
+
def get_envoy_readiness_check_script(self) -> List[str]:
|
|
2321
|
+
return self.config_dict.get(
|
|
2322
|
+
"envoy_readiness_check_script",
|
|
2323
|
+
["/check_proxy_up.sh", "--enable-envoy", "--envoy-check-mode", "eds-dir"],
|
|
2324
|
+
)
|
|
2325
|
+
|
|
2326
|
+
def get_envoy_nerve_readiness_check_script(self) -> List[str]:
|
|
2327
|
+
return self.config_dict.get(
|
|
2328
|
+
"envoy_nerve_readiness_check_script",
|
|
2329
|
+
["/check_proxy_up.sh", "--enable-smartstack", "--enable-envoy"],
|
|
2330
|
+
)
|
|
2331
|
+
|
|
2332
|
+
def get_nerve_register_k8s_terminating(self) -> bool:
|
|
2333
|
+
return self.config_dict.get("nerve_register_k8s_terminating", True)
|
|
2334
|
+
|
|
2335
|
+
def get_enforce_disk_quota(self) -> bool:
|
|
2336
|
+
"""
|
|
2337
|
+
If enabled, add `--storage-opt size=SIZE` arg to `docker run` calls,
|
|
2338
|
+
enforcing the disk quota as a result.
|
|
2339
|
+
|
|
2340
|
+
Please note that this should be enabled only for a suported environment
|
|
2341
|
+
(which at the moment is only `overlay2` driver backed by `XFS`
|
|
2342
|
+
filesystem mounted with `prjquota` option) otherwise Docker will fail
|
|
2343
|
+
to start.
|
|
2344
|
+
"""
|
|
2345
|
+
return self.config_dict.get("enforce_disk_quota", False)
|
|
2346
|
+
|
|
2347
|
+
def get_auth_certificate_ttl(self) -> str:
|
|
2348
|
+
"""
|
|
2349
|
+
How long to request for ttl on auth certificates. Note that this maybe limited
|
|
2350
|
+
by policy in Vault
|
|
2351
|
+
"""
|
|
2352
|
+
return self.config_dict.get("auth_certificate_ttl", "11h")
|
|
2353
|
+
|
|
2354
|
+
def get_fsm_template(self) -> str:
|
|
2355
|
+
fsm_path = os.path.dirname(paasta_tools.cli.fsm.__file__)
|
|
2356
|
+
template_path = os.path.join(fsm_path, "template")
|
|
2357
|
+
return self.config_dict.get("fsm_template", template_path)
|
|
2358
|
+
|
|
2359
|
+
def get_log_writer(self) -> LogWriterConfig:
|
|
2360
|
+
"""Get the log_writer configuration out of global paasta config
|
|
2361
|
+
|
|
2362
|
+
:returns: The log_writer dictionary.
|
|
2363
|
+
"""
|
|
2364
|
+
try:
|
|
2365
|
+
return self.config_dict["log_writer"]
|
|
2366
|
+
except KeyError:
|
|
2367
|
+
raise PaastaNotConfiguredError(
|
|
2368
|
+
"Could not find log_writer in configuration directory: %s"
|
|
2369
|
+
% self.directory
|
|
2370
|
+
)
|
|
2371
|
+
|
|
2372
|
+
def get_log_reader(self) -> LogReaderConfig:
|
|
2373
|
+
"""Get the log_reader configuration out of global paasta config
|
|
2374
|
+
|
|
2375
|
+
:returns: the log_reader dictionary.
|
|
2376
|
+
"""
|
|
2377
|
+
try:
|
|
2378
|
+
return self.config_dict["log_reader"]
|
|
2379
|
+
except KeyError:
|
|
2380
|
+
raise PaastaNotConfiguredError(
|
|
2381
|
+
"Could not find log_reader in configuration directory: %s"
|
|
2382
|
+
% self.directory
|
|
2383
|
+
)
|
|
2384
|
+
|
|
2385
|
+
def get_log_readers(self) -> List[LogReaderConfig]:
|
|
2386
|
+
"""Get the log_readers configuration out of global paasta config
|
|
2387
|
+
|
|
2388
|
+
:returns: the log_readers list of dicts.
|
|
2389
|
+
"""
|
|
2390
|
+
try:
|
|
2391
|
+
return self.config_dict["log_readers"]
|
|
2392
|
+
except KeyError:
|
|
2393
|
+
raise PaastaNotConfiguredError(
|
|
2394
|
+
"Could not find log_readers in configuration directory: %s"
|
|
2395
|
+
% self.directory
|
|
2396
|
+
)
|
|
2397
|
+
|
|
2398
|
+
def get_metrics_provider(self) -> Optional[str]:
|
|
2399
|
+
"""Get the metrics_provider configuration out of global paasta config
|
|
2400
|
+
|
|
2401
|
+
:returns: A string identifying the metrics_provider
|
|
2402
|
+
"""
|
|
2403
|
+
deployd_metrics_provider = self.config_dict.get("deployd_metrics_provider")
|
|
2404
|
+
if deployd_metrics_provider is not None:
|
|
2405
|
+
return deployd_metrics_provider
|
|
2406
|
+
return self.config_dict.get("metrics_provider")
|
|
2407
|
+
|
|
2408
|
+
def get_sensu_host(self) -> str:
|
|
2409
|
+
"""Get the host that we should send sensu events to.
|
|
2410
|
+
|
|
2411
|
+
:returns: the sensu_host string, or localhost if not specified.
|
|
2412
|
+
"""
|
|
2413
|
+
return self.config_dict.get("sensu_host", "localhost")
|
|
2414
|
+
|
|
2415
|
+
def get_sensu_port(self) -> int:
|
|
2416
|
+
"""Get the port that we should send sensu events to.
|
|
2417
|
+
|
|
2418
|
+
:returns: the sensu_port value as an integer, or 3030 if not specified.
|
|
2419
|
+
"""
|
|
2420
|
+
return int(self.config_dict.get("sensu_port", 3030))
|
|
2421
|
+
|
|
2422
|
+
def get_dockercfg_location(self) -> str:
|
|
2423
|
+
"""Get the location of the dockerfile, as a URI.
|
|
2424
|
+
|
|
2425
|
+
:returns: the URI specified, or file:///root/.dockercfg if not specified.
|
|
2426
|
+
"""
|
|
2427
|
+
return self.config_dict.get("dockercfg_location", DEFAULT_DOCKERCFG_LOCATION)
|
|
2428
|
+
|
|
2429
|
+
def get_synapse_port(self) -> int:
|
|
2430
|
+
"""Get the port that haproxy-synapse exposes its status on. Defaults to 3212.
|
|
2431
|
+
|
|
2432
|
+
:returns: the haproxy-synapse status port."""
|
|
2433
|
+
return int(self.config_dict.get("synapse_port", 3212))
|
|
2434
|
+
|
|
2435
|
+
def get_default_synapse_host(self) -> str:
|
|
2436
|
+
"""Get the default host we should interrogate for haproxy-synapse state.
|
|
2437
|
+
|
|
2438
|
+
:returns: A hostname that is running haproxy-synapse."""
|
|
2439
|
+
return self.config_dict.get("synapse_host", "localhost")
|
|
2440
|
+
|
|
2441
|
+
def get_synapse_haproxy_url_format(self) -> str:
|
|
2442
|
+
"""Get a format string for the URL to query for haproxy-synapse state. This format string gets two keyword
|
|
2443
|
+
arguments, host and port. Defaults to "http://{host:s}:{port:d}/;csv;norefresh".
|
|
2444
|
+
|
|
2445
|
+
:returns: A format string for constructing the URL of haproxy-synapse's status page.
|
|
2446
|
+
"""
|
|
2447
|
+
return self.config_dict.get(
|
|
2448
|
+
"synapse_haproxy_url_format", DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT
|
|
2449
|
+
)
|
|
2450
|
+
|
|
2451
|
+
def get_service_discovery_providers(self) -> Dict[str, Any]:
|
|
2452
|
+
return self.config_dict.get("service_discovery_providers", {})
|
|
2453
|
+
|
|
2454
|
+
def get_resource_pool_settings(self) -> PoolToResourcePoolSettingsDict:
|
|
2455
|
+
return self.config_dict.get("resource_pool_settings", {})
|
|
2456
|
+
|
|
2457
|
+
def get_cluster_fqdn_format(self) -> str:
|
|
2458
|
+
"""Get a format string that constructs a DNS name pointing at the paasta masters in a cluster. This format
|
|
2459
|
+
string gets one parameter: cluster. Defaults to '{cluster:s}.paasta'.
|
|
2460
|
+
|
|
2461
|
+
:returns: A format string for constructing the FQDN of the masters in a given cluster.
|
|
2462
|
+
"""
|
|
2463
|
+
return self.config_dict.get("cluster_fqdn_format", "{cluster:s}.paasta")
|
|
2464
|
+
|
|
2465
|
+
def get_paasta_status_version(self) -> str:
|
|
2466
|
+
"""Get paasta status version string (new | old). Defaults to 'old'.
|
|
2467
|
+
|
|
2468
|
+
:returns: A string with the version desired version of paasta status."""
|
|
2469
|
+
return self.config_dict.get("paasta_status_version", "old")
|
|
2470
|
+
|
|
2471
|
+
def get_local_run_config(self) -> LocalRunConfig:
|
|
2472
|
+
"""Get the local-run config
|
|
2473
|
+
|
|
2474
|
+
:returns: The local-run job config dictionary"""
|
|
2475
|
+
return self.config_dict.get("local_run_config", {})
|
|
2476
|
+
|
|
2477
|
+
def get_spark_run_config(self) -> SparkRunConfig:
|
|
2478
|
+
"""Get the spark-run config
|
|
2479
|
+
|
|
2480
|
+
:returns: The spark-run system_paasta_config dictionary"""
|
|
2481
|
+
return self.config_dict.get("spark_run_config", {})
|
|
2482
|
+
|
|
2483
|
+
def get_paasta_native_config(self) -> PaastaNativeConfig:
|
|
2484
|
+
return self.config_dict.get("paasta_native", {})
|
|
2485
|
+
|
|
2486
|
+
def get_mesos_cli_config(self) -> Dict:
|
|
2487
|
+
"""Get the config for mesos-cli
|
|
2488
|
+
|
|
2489
|
+
:returns: The mesos cli config
|
|
2490
|
+
"""
|
|
2491
|
+
return self.config_dict.get("mesos_config", {})
|
|
2492
|
+
|
|
2493
|
+
def get_monitoring_config(self) -> Dict:
|
|
2494
|
+
"""Get the monitoring config
|
|
2495
|
+
|
|
2496
|
+
:returns: the monitoring config dictionary"""
|
|
2497
|
+
return self.config_dict.get("monitoring_config", {})
|
|
2498
|
+
|
|
2499
|
+
def get_deploy_blacklist(self) -> DeployBlacklist:
|
|
2500
|
+
"""Get global blacklist. This applies to all services
|
|
2501
|
+
in the cluster
|
|
2502
|
+
|
|
2503
|
+
:returns: The blacklist
|
|
2504
|
+
"""
|
|
2505
|
+
return safe_deploy_blacklist(self.config_dict.get("deploy_blacklist", []))
|
|
2506
|
+
|
|
2507
|
+
def get_deploy_whitelist(self) -> DeployWhitelist:
|
|
2508
|
+
"""Get global whitelist. This applies to all services
|
|
2509
|
+
in the cluster
|
|
2510
|
+
|
|
2511
|
+
:returns: The whitelist
|
|
2512
|
+
"""
|
|
2513
|
+
|
|
2514
|
+
return safe_deploy_whitelist(self.config_dict.get("deploy_whitelist"))
|
|
2515
|
+
|
|
2516
|
+
def get_expected_slave_attributes(self) -> ExpectedSlaveAttributes:
|
|
2517
|
+
"""Return a list of dictionaries, representing the expected combinations of attributes in this cluster. Used for
|
|
2518
|
+
calculating the default routing constraints."""
|
|
2519
|
+
return self.config_dict.get("expected_slave_attributes")
|
|
2520
|
+
|
|
2521
|
+
def get_security_check_command(self) -> Optional[str]:
|
|
2522
|
+
"""Get the script to be executed during the security-check build step
|
|
2523
|
+
|
|
2524
|
+
:return: The name of the file
|
|
2525
|
+
"""
|
|
2526
|
+
return self.config_dict.get("security_check_command", None)
|
|
2527
|
+
|
|
2528
|
+
def get_hacheck_sidecar_image_url(self) -> str:
|
|
2529
|
+
"""Get the docker image URL for the hacheck sidecar container"""
|
|
2530
|
+
return self.config_dict.get("hacheck_sidecar_image_url")
|
|
2531
|
+
|
|
2532
|
+
def get_register_k8s_pods(self) -> bool:
|
|
2533
|
+
"""Enable registration of k8s services in nerve"""
|
|
2534
|
+
return self.config_dict.get("register_k8s_pods", False)
|
|
2535
|
+
|
|
2536
|
+
def get_kubernetes_add_registration_labels(self) -> bool:
|
|
2537
|
+
return self.config_dict.get("kubernetes_add_registration_labels", False)
|
|
2538
|
+
|
|
2539
|
+
def get_kubernetes_custom_resources(self) -> Sequence[KubeCustomResourceDict]:
|
|
2540
|
+
"""List of custom resources that should be synced by setup_kubernetes_cr"""
|
|
2541
|
+
return self.config_dict.get("kubernetes_custom_resources", [])
|
|
2542
|
+
|
|
2543
|
+
def get_kubernetes_use_hacheck_sidecar(self) -> bool:
|
|
2544
|
+
return self.config_dict.get("kubernetes_use_hacheck_sidecar", True)
|
|
2545
|
+
|
|
2546
|
+
def get_register_native_services(self) -> bool:
|
|
2547
|
+
"""Enable registration of native paasta services in nerve"""
|
|
2548
|
+
return self.config_dict.get("register_native_services", False)
|
|
2549
|
+
|
|
2550
|
+
def get_taskproc(self) -> Dict:
|
|
2551
|
+
return self.config_dict.get("taskproc", {})
|
|
2552
|
+
|
|
2553
|
+
def get_disabled_watchers(self) -> List:
|
|
2554
|
+
return self.config_dict.get("disabled_watchers", [])
|
|
2555
|
+
|
|
2556
|
+
def get_pool_node_affinities(self) -> Dict[str, Dict[str, List[str]]]:
|
|
2557
|
+
"""Node selectors that will be applied to all Pods in a pool"""
|
|
2558
|
+
return self.config_dict.get("pool_node_affinities", {})
|
|
2559
|
+
|
|
2560
|
+
def get_topology_spread_constraints(self) -> List[TopologySpreadConstraintDict]:
|
|
2561
|
+
"""List of TopologySpreadConstraints that will be applied to all Pods in the cluster"""
|
|
2562
|
+
return self.config_dict.get("topology_spread_constraints", [])
|
|
2563
|
+
|
|
2564
|
+
def get_datastore_credentials_vault_overrides(self) -> Dict[str, str]:
|
|
2565
|
+
"""In order to use different Vault shards, vault-tools allows you to override
|
|
2566
|
+
environment variables (CA, token file, and URL). DB credentials are stored in
|
|
2567
|
+
a different shard to minimize the impact on the core Vault shard (which has
|
|
2568
|
+
size restrictions derived from Zookeeper limitations)."""
|
|
2569
|
+
return self.config_dict.get("datastore_credentials_vault_env_overrides", {})
|
|
2570
|
+
|
|
2571
|
+
def get_vault_environment(self) -> Optional[str]:
|
|
2572
|
+
"""Get the environment name for the vault cluster
|
|
2573
|
+
This must match the environment keys in the secret json files
|
|
2574
|
+
used by all services in this cluster"""
|
|
2575
|
+
return self.config_dict.get("vault_environment")
|
|
2576
|
+
|
|
2577
|
+
def get_vault_cluster_config(self) -> dict:
|
|
2578
|
+
"""Get a map from paasta_cluster to vault ecosystem. We need
|
|
2579
|
+
this because not every ecosystem will have its own vault cluster"""
|
|
2580
|
+
return self.config_dict.get("vault_cluster_map", {})
|
|
2581
|
+
|
|
2582
|
+
def get_secret_provider_name(self) -> str:
|
|
2583
|
+
"""Get the name for the configured secret_provider, used to
|
|
2584
|
+
decrypt secrets"""
|
|
2585
|
+
return self.config_dict.get("secret_provider", "paasta_tools.secret_providers")
|
|
2586
|
+
|
|
2587
|
+
def get_slack_token(self) -> str:
|
|
2588
|
+
"""Get a slack token for slack notifications. Returns None if there is
|
|
2589
|
+
none available"""
|
|
2590
|
+
return self.config_dict.get("slack", {}).get("token", None)
|
|
2591
|
+
|
|
2592
|
+
def get_tron_config(self) -> dict:
|
|
2593
|
+
return self.config_dict.get("tron", {})
|
|
2594
|
+
|
|
2595
|
+
def get_clusters(self) -> Sequence[str]:
|
|
2596
|
+
return self.config_dict.get("clusters", [])
|
|
2597
|
+
|
|
2598
|
+
def get_supported_storage_classes(self) -> Sequence[str]:
|
|
2599
|
+
return self.config_dict.get("supported_storage_classes", [])
|
|
2600
|
+
|
|
2601
|
+
def get_envoy_admin_endpoint_format(self) -> str:
|
|
2602
|
+
"""Get the format string for Envoy's admin interface."""
|
|
2603
|
+
return self.config_dict.get(
|
|
2604
|
+
"envoy_admin_endpoint_format", "http://{host:s}:{port:d}/{endpoint:s}"
|
|
2605
|
+
)
|
|
2606
|
+
|
|
2607
|
+
def get_envoy_admin_port(self) -> int:
|
|
2608
|
+
"""Get the port that Envoy's admin interface is listening on
|
|
2609
|
+
from /etc/services."""
|
|
2610
|
+
return socket.getservbyname(
|
|
2611
|
+
self.config_dict.get("envoy_admin_domain_name", "envoy-admin")
|
|
2612
|
+
)
|
|
2613
|
+
|
|
2614
|
+
def get_pdb_max_unavailable(self) -> Union[str, int]:
|
|
2615
|
+
return self.config_dict.get("pdb_max_unavailable", 0)
|
|
2616
|
+
|
|
2617
|
+
def get_pod_defaults(self) -> Dict[str, Any]:
|
|
2618
|
+
return self.config_dict.get("pod_defaults", {})
|
|
2619
|
+
|
|
2620
|
+
def get_ldap_search_base(self) -> str:
|
|
2621
|
+
return self.config_dict.get("ldap_search_base", None)
|
|
2622
|
+
|
|
2623
|
+
def get_ldap_search_ou(self) -> str:
|
|
2624
|
+
return self.config_dict.get("ldap_search_ou", None)
|
|
2625
|
+
|
|
2626
|
+
def get_ldap_host(self) -> str:
|
|
2627
|
+
return self.config_dict.get("ldap_host", None)
|
|
2628
|
+
|
|
2629
|
+
def get_ldap_reader_username(self) -> str:
|
|
2630
|
+
return self.config_dict.get("ldap_reader_username", None)
|
|
2631
|
+
|
|
2632
|
+
def get_ldap_reader_password(self) -> str:
|
|
2633
|
+
return self.config_dict.get("ldap_reader_password", None)
|
|
2634
|
+
|
|
2635
|
+
def get_default_push_groups(self) -> List:
|
|
2636
|
+
return self.config_dict.get("default_push_groups", None)
|
|
2637
|
+
|
|
2638
|
+
def get_git_config(self) -> Dict:
|
|
2639
|
+
"""Gets git configuration. Includes repo names and their git servers.
|
|
2640
|
+
|
|
2641
|
+
:returns: the git config dict
|
|
2642
|
+
"""
|
|
2643
|
+
return self.config_dict.get(
|
|
2644
|
+
"git_config",
|
|
2645
|
+
{
|
|
2646
|
+
"git_user": "git",
|
|
2647
|
+
"repos": {
|
|
2648
|
+
"yelpsoa-configs": {
|
|
2649
|
+
"repo_name": "yelpsoa-configs",
|
|
2650
|
+
"git_server": DEFAULT_SOA_CONFIGS_GIT_URL,
|
|
2651
|
+
"deploy_server": DEFAULT_SOA_CONFIGS_GIT_URL,
|
|
2652
|
+
},
|
|
2653
|
+
},
|
|
2654
|
+
},
|
|
2655
|
+
)
|
|
2656
|
+
|
|
2657
|
+
def get_git_repo_config(self, repo_name: str) -> Dict:
|
|
2658
|
+
"""Gets the git configuration for a specific repo.
|
|
2659
|
+
|
|
2660
|
+
:returns: the git config dict for a specific repo.
|
|
2661
|
+
"""
|
|
2662
|
+
return self.get_git_config().get("repos", {}).get(repo_name, {})
|
|
2663
|
+
|
|
2664
|
+
def default_should_use_uwsgi_exporter(self) -> bool:
|
|
2665
|
+
return self.config_dict.get("default_should_use_uwsgi_exporter", False)
|
|
2666
|
+
|
|
2667
|
+
def get_gunicorn_exporter_sidecar_image_url(self) -> str:
|
|
2668
|
+
"""Get the docker image URL for the gunicorn_exporter sidecar container"""
|
|
2669
|
+
return self.config_dict.get(
|
|
2670
|
+
"gunicorn_exporter_sidecar_image_url",
|
|
2671
|
+
"docker-paasta.yelpcorp.com:443/gunicorn_exporter-k8s-sidecar:v0.24.0-yelp0",
|
|
2672
|
+
)
|
|
2673
|
+
|
|
2674
|
+
def get_mark_for_deployment_max_polling_threads(self) -> int:
|
|
2675
|
+
return self.config_dict.get("mark_for_deployment_max_polling_threads", 4)
|
|
2676
|
+
|
|
2677
|
+
def get_mark_for_deployment_default_polling_interval(self) -> float:
|
|
2678
|
+
return self.config_dict.get("mark_for_deployment_default_polling_interval", 60)
|
|
2679
|
+
|
|
2680
|
+
def get_mark_for_deployment_default_diagnosis_interval(self) -> float:
|
|
2681
|
+
return self.config_dict.get(
|
|
2682
|
+
"mark_for_deployment_default_diagnosis_interval", 60
|
|
2683
|
+
)
|
|
2684
|
+
|
|
2685
|
+
def get_mark_for_deployment_default_time_before_first_diagnosis(self) -> float:
|
|
2686
|
+
return self.config_dict.get(
|
|
2687
|
+
"mark_for_deployment_default_default_time_before_first_diagnosis", 300
|
|
2688
|
+
)
|
|
2689
|
+
|
|
2690
|
+
def get_mark_for_deployment_should_ping_for_unhealthy_pods(self) -> bool:
|
|
2691
|
+
return self.config_dict.get(
|
|
2692
|
+
"mark_for_deployment_should_ping_for_unhealthy_pods", True
|
|
2693
|
+
)
|
|
2694
|
+
|
|
2695
|
+
def get_spark_k8s_role(self) -> str:
|
|
2696
|
+
return self.config_dict.get("spark_k8s_role", "spark")
|
|
2697
|
+
|
|
2698
|
+
def get_spark_driver_port(self) -> int:
|
|
2699
|
+
# default value is an arbitrary value
|
|
2700
|
+
return self.config_dict.get("spark_driver_port", 33001)
|
|
2701
|
+
|
|
2702
|
+
def get_spark_blockmanager_port(self) -> int:
|
|
2703
|
+
# default value is an arbitrary value
|
|
2704
|
+
return self.config_dict.get("spark_blockmanager_port", 33002)
|
|
2705
|
+
|
|
2706
|
+
def get_api_profiling_config(self) -> Dict:
|
|
2707
|
+
return self.config_dict.get(
|
|
2708
|
+
"api_profiling_config",
|
|
2709
|
+
{"cprofile_sampling_enabled": False},
|
|
2710
|
+
)
|
|
2711
|
+
|
|
2712
|
+
def get_skip_cpu_override_validation_services(self) -> List[str]:
|
|
2713
|
+
return self.config_dict.get("skip_cpu_override_validation", [])
|
|
2714
|
+
|
|
2715
|
+
def get_skip_cpu_burst_validation_services(self) -> List[str]:
|
|
2716
|
+
return self.config_dict.get("skip_cpu_burst_validation", [])
|
|
2717
|
+
|
|
2718
|
+
def get_cluster_aliases(self) -> Dict[str, str]:
|
|
2719
|
+
return self.config_dict.get("cluster_aliases", {})
|
|
2720
|
+
|
|
2721
|
+
def get_eks_cluster_aliases(self) -> Dict[str, str]:
|
|
2722
|
+
return self.config_dict.get("eks_cluster_aliases", {})
|
|
2723
|
+
|
|
2724
|
+
def get_cluster_pools(self) -> Dict[str, List[str]]:
|
|
2725
|
+
return self.config_dict.get("allowed_pools", {})
|
|
2726
|
+
|
|
2727
|
+
def get_spark_driver_iam_role(self) -> str:
|
|
2728
|
+
return self.get_spark_run_config().get("default_spark_driver_iam_role", "")
|
|
2729
|
+
|
|
2730
|
+
def get_spark_executor_iam_role(self) -> str:
|
|
2731
|
+
# use the same IAM role as the Spark driver
|
|
2732
|
+
return self.get_spark_run_config().get("default_spark_driver_iam_role", "")
|
|
2733
|
+
|
|
2734
|
+
def get_pools_for_cluster(self, cluster: str) -> List[str]:
|
|
2735
|
+
return self.get_cluster_pools().get(cluster, [])
|
|
2736
|
+
|
|
2737
|
+
def get_hacheck_match_initial_delay(self) -> bool:
|
|
2738
|
+
return self.config_dict.get("hacheck_match_initial_delay", False)
|
|
2739
|
+
|
|
2740
|
+
def get_readiness_check_prefix_template(self) -> List[str]:
|
|
2741
|
+
"""A prefix that will be added to the beginning of the readiness check command. Meant for e.g. `flock` and
|
|
2742
|
+
`timeout`."""
|
|
2743
|
+
# We use flock+timeout here to work around issues discovered in PAASTA-17673:
|
|
2744
|
+
# In k8s 1.18, probe timeout wasn't respected at all.
|
|
2745
|
+
# When we upgraded to k8s 1.20, the timeout started being partially respected - k8s would stop waiting for a
|
|
2746
|
+
# response, but wouldn't kill the command within the container (with the dockershim CRI).
|
|
2747
|
+
# Flock prevents multiple readiness probes from running at once, using lots of CPU.
|
|
2748
|
+
# The generous timeout allows for a slow readiness probe, but ensures that a truly-stuck readiness probe command
|
|
2749
|
+
# will eventually be killed so another process can retry.
|
|
2750
|
+
# Once we move off dockershim, we'll likely need to increase the readiness probe timeout, but we can then remove
|
|
2751
|
+
# this wrapper.
|
|
2752
|
+
return self.config_dict.get(
|
|
2753
|
+
"readiness_check_prefix_template",
|
|
2754
|
+
["flock", "-n", "/readiness_check_lock", "timeout", "120"],
|
|
2755
|
+
)
|
|
2756
|
+
|
|
2757
|
+
def get_tron_k8s_cluster_overrides(self) -> Dict[str, str]:
|
|
2758
|
+
"""
|
|
2759
|
+
Return a mapping of a tron cluster -> compute cluster. Returns an empty dict if there are no overrides set.
|
|
2760
|
+
|
|
2761
|
+
This exists as we have certain Tron masters that are named differently from the compute cluster that should
|
|
2762
|
+
actually be used (e.g., we might have tron-XYZ-test-prod, but instead of scheduling on XYZ-test-prod, we'd
|
|
2763
|
+
like to schedule jobs on test-prod).
|
|
2764
|
+
|
|
2765
|
+
To control this, we have an optional config item that we'll puppet onto Tron masters that need this type of
|
|
2766
|
+
tron master -> compute cluster override which this function will read.
|
|
2767
|
+
"""
|
|
2768
|
+
return self.config_dict.get("tron_k8s_cluster_overrides", {})
|
|
2769
|
+
|
|
2770
|
+
def get_spark_kubeconfig(self) -> str:
|
|
2771
|
+
return self.config_dict.get("spark_kubeconfig", "/etc/kubernetes/spark.conf")
|
|
2772
|
+
|
|
2773
|
+
def get_spark_iam_user_kubeconfig(self) -> str:
|
|
2774
|
+
return self.config_dict.get(
|
|
2775
|
+
"spark_iam_user_kubeconfig", "/etc/kubernetes/spark2.conf"
|
|
2776
|
+
)
|
|
2777
|
+
|
|
2778
|
+
def get_kube_clusters(self) -> Dict:
|
|
2779
|
+
return self.config_dict.get("kube_clusters", {})
|
|
2780
|
+
|
|
2781
|
+
def get_service_auth_token_volume_config(self) -> ProjectedSAVolume:
|
|
2782
|
+
return self.config_dict.get("service_auth_token_settings", {})
|
|
2783
|
+
|
|
2784
|
+
def get_service_auth_vault_role(self) -> str:
|
|
2785
|
+
return self.config_dict.get("service_auth_vault_role", "service_authz")
|
|
2786
|
+
|
|
2787
|
+
def get_service_auth_sso_oidc_client_id(self) -> str:
|
|
2788
|
+
return self.config_dict.get("service_auth_sso_oidc_client_id", "")
|
|
2789
|
+
|
|
2790
|
+
def get_api_auth_sso_oidc_client_id(self) -> str:
|
|
2791
|
+
return self.config_dict.get("api_auth_sso_oidc_client_id", "")
|
|
2792
|
+
|
|
2793
|
+
def get_always_authenticating_services(self) -> List[str]:
|
|
2794
|
+
return self.config_dict.get("always_authenticating_services", [])
|
|
2795
|
+
|
|
2796
|
+
def get_enable_automated_redeploys_default(self) -> bool:
|
|
2797
|
+
return self.config_dict.get("enable_automated_redeploys_default", False)
|
|
2798
|
+
|
|
2799
|
+
def get_enable_tron_tsc(self) -> bool:
|
|
2800
|
+
return self.config_dict.get("enable_tron_tsc", True)
|
|
2801
|
+
|
|
2802
|
+
def get_remote_run_duration_limit(self, default: int) -> int:
|
|
2803
|
+
return self.config_dict.get("remote_run_duration_limit", default)
|
|
2804
|
+
|
|
2805
|
+
|
|
2806
|
+
def _run(
|
|
2807
|
+
command: Union[str, List[str]],
|
|
2808
|
+
env: Mapping[str, str] = os.environ,
|
|
2809
|
+
timeout: float = None,
|
|
2810
|
+
log: bool = False,
|
|
2811
|
+
stream: bool = False,
|
|
2812
|
+
stdin: Any = None,
|
|
2813
|
+
stdin_interrupt: bool = False,
|
|
2814
|
+
popen_kwargs: Dict = {},
|
|
2815
|
+
**kwargs: Any,
|
|
2816
|
+
) -> Tuple[int, str]:
|
|
2817
|
+
"""Given a command, run it. Return a tuple of the return code and any
|
|
2818
|
+
output.
|
|
2819
|
+
|
|
2820
|
+
:param timeout: If specified, the command will be terminated after timeout
|
|
2821
|
+
seconds.
|
|
2822
|
+
:param log: If True, the _log will be handled by _run. If set, it is mandatory
|
|
2823
|
+
to pass at least a :service: and a :component: parameter. Optionally you
|
|
2824
|
+
can pass :cluster:, :instance: and :loglevel: parameters for logging.
|
|
2825
|
+
We wanted to use plumbum instead of rolling our own thing with
|
|
2826
|
+
subprocess.Popen but were blocked by
|
|
2827
|
+
https://github.com/tomerfiliba/plumbum/issues/162 and our local BASH_FUNC
|
|
2828
|
+
magic.
|
|
2829
|
+
"""
|
|
2830
|
+
output: List[str] = []
|
|
2831
|
+
if log:
|
|
2832
|
+
service = kwargs["service"]
|
|
2833
|
+
component = kwargs["component"]
|
|
2834
|
+
cluster = kwargs.get("cluster", ANY_CLUSTER)
|
|
2835
|
+
instance = kwargs.get("instance", ANY_INSTANCE)
|
|
2836
|
+
loglevel = kwargs.get("loglevel", DEFAULT_LOGLEVEL)
|
|
2837
|
+
try:
|
|
2838
|
+
if not isinstance(command, list):
|
|
2839
|
+
command = shlex.split(command)
|
|
2840
|
+
popen_kwargs["stdout"] = PIPE
|
|
2841
|
+
popen_kwargs["stderr"] = STDOUT
|
|
2842
|
+
popen_kwargs["stdin"] = stdin
|
|
2843
|
+
popen_kwargs["env"] = env
|
|
2844
|
+
process = Popen(command, **popen_kwargs)
|
|
2845
|
+
|
|
2846
|
+
if stdin_interrupt:
|
|
2847
|
+
|
|
2848
|
+
def signal_handler(signum: int, frame: FrameType) -> None:
|
|
2849
|
+
process.stdin.write("\n".encode("utf-8"))
|
|
2850
|
+
process.stdin.flush()
|
|
2851
|
+
process.wait()
|
|
2852
|
+
|
|
2853
|
+
signal.signal(signal.SIGINT, signal_handler)
|
|
2854
|
+
signal.signal(signal.SIGTERM, signal_handler)
|
|
2855
|
+
|
|
2856
|
+
# start the timer if we specified a timeout
|
|
2857
|
+
if timeout:
|
|
2858
|
+
proctimer = threading.Timer(timeout, _timeout, [process])
|
|
2859
|
+
proctimer.start()
|
|
2860
|
+
|
|
2861
|
+
outfn: Any = print if stream else output.append
|
|
2862
|
+
for linebytes in iter(process.stdout.readline, b""):
|
|
2863
|
+
line = linebytes.decode("utf-8", errors="replace").rstrip("\n")
|
|
2864
|
+
outfn(line)
|
|
2865
|
+
|
|
2866
|
+
if log:
|
|
2867
|
+
_log(
|
|
2868
|
+
service=service,
|
|
2869
|
+
line=line,
|
|
2870
|
+
component=component,
|
|
2871
|
+
level=loglevel,
|
|
2872
|
+
cluster=cluster,
|
|
2873
|
+
instance=instance,
|
|
2874
|
+
)
|
|
2875
|
+
# when finished, get the exit code
|
|
2876
|
+
process.wait()
|
|
2877
|
+
returncode = process.returncode
|
|
2878
|
+
except OSError as e:
|
|
2879
|
+
if log:
|
|
2880
|
+
_log(
|
|
2881
|
+
service=service,
|
|
2882
|
+
line=e.strerror.rstrip("\n"),
|
|
2883
|
+
component=component,
|
|
2884
|
+
level=loglevel,
|
|
2885
|
+
cluster=cluster,
|
|
2886
|
+
instance=instance,
|
|
2887
|
+
)
|
|
2888
|
+
output.append(e.strerror.rstrip("\n"))
|
|
2889
|
+
returncode = e.errno
|
|
2890
|
+
except (KeyboardInterrupt, SystemExit):
|
|
2891
|
+
# need to clean up the timing thread here
|
|
2892
|
+
if timeout:
|
|
2893
|
+
proctimer.cancel()
|
|
2894
|
+
raise
|
|
2895
|
+
else:
|
|
2896
|
+
# Stop the timer
|
|
2897
|
+
if timeout:
|
|
2898
|
+
proctimer.cancel()
|
|
2899
|
+
if returncode == -9:
|
|
2900
|
+
output.append(f"Command '{command}' timed out (longer than {timeout}s)")
|
|
2901
|
+
return returncode, "\n".join(output)
|
|
2902
|
+
|
|
2903
|
+
|
|
2904
|
+
def get_umask() -> int:
|
|
2905
|
+
"""Get the current umask for this process. NOT THREAD SAFE."""
|
|
2906
|
+
old_umask = os.umask(0o0022)
|
|
2907
|
+
os.umask(old_umask)
|
|
2908
|
+
return old_umask
|
|
2909
|
+
|
|
2910
|
+
|
|
2911
|
+
def get_user_agent() -> str:
|
|
2912
|
+
base_name = os.path.basename(sys.argv[0])
|
|
2913
|
+
if base_name == "gunicorn":
|
|
2914
|
+
return f"{sys.argv[-1]} {paasta_tools.__version__}"
|
|
2915
|
+
elif len(sys.argv) >= 1:
|
|
2916
|
+
return f"{base_name} {paasta_tools.__version__}"
|
|
2917
|
+
else:
|
|
2918
|
+
return f"PaaSTA Tools {paasta_tools.__version__}"
|
|
2919
|
+
|
|
2920
|
+
|
|
2921
|
+
@contextlib.contextmanager
|
|
2922
|
+
def atomic_file_write(target_path: str) -> Iterator[IO]:
|
|
2923
|
+
dirname = os.path.dirname(target_path)
|
|
2924
|
+
basename = os.path.basename(target_path)
|
|
2925
|
+
|
|
2926
|
+
if target_path == "-":
|
|
2927
|
+
yield sys.stdout
|
|
2928
|
+
else:
|
|
2929
|
+
with tempfile.NamedTemporaryFile(
|
|
2930
|
+
dir=dirname, prefix=(".%s-" % basename), delete=False, mode="w"
|
|
2931
|
+
) as f:
|
|
2932
|
+
temp_target_path = f.name
|
|
2933
|
+
yield f
|
|
2934
|
+
|
|
2935
|
+
mode = 0o0666 & (~get_umask())
|
|
2936
|
+
os.chmod(temp_target_path, mode)
|
|
2937
|
+
os.rename(temp_target_path, target_path)
|
|
2938
|
+
|
|
2939
|
+
|
|
2940
|
+
class InvalidJobNameError(Exception):
|
|
2941
|
+
pass
|
|
2942
|
+
|
|
2943
|
+
|
|
2944
|
+
def compose_job_id(
|
|
2945
|
+
name: str,
|
|
2946
|
+
instance: str,
|
|
2947
|
+
git_hash: Optional[str] = None,
|
|
2948
|
+
config_hash: Optional[str] = None,
|
|
2949
|
+
spacer: str = SPACER,
|
|
2950
|
+
) -> str:
|
|
2951
|
+
"""Compose a job/app id by concatenating its name, instance, git hash, and config hash.
|
|
2952
|
+
|
|
2953
|
+
:param name: The name of the service
|
|
2954
|
+
:param instance: The instance of the service
|
|
2955
|
+
:param git_hash: The git_hash portion of the job_id. If git_hash is set,
|
|
2956
|
+
config_hash must also be set.
|
|
2957
|
+
:param config_hash: The config_hash portion of the job_id. If config_hash
|
|
2958
|
+
is set, git_hash must also be set.
|
|
2959
|
+
:returns: <name><SPACER><instance> if no tag, or <name><SPACER><instance><SPACER><hashes>...
|
|
2960
|
+
if extra hash inputs are provided.
|
|
2961
|
+
|
|
2962
|
+
"""
|
|
2963
|
+
composed = f"{name}{spacer}{instance}"
|
|
2964
|
+
if git_hash and config_hash:
|
|
2965
|
+
composed = f"{composed}{spacer}{git_hash}{spacer}{config_hash}"
|
|
2966
|
+
elif git_hash or config_hash:
|
|
2967
|
+
raise InvalidJobNameError(
|
|
2968
|
+
"invalid job id because git_hash (%s) and config_hash (%s) must "
|
|
2969
|
+
"both be defined or neither can be defined" % (git_hash, config_hash)
|
|
2970
|
+
)
|
|
2971
|
+
return composed
|
|
2972
|
+
|
|
2973
|
+
|
|
2974
|
+
def decompose_job_id(job_id: str, spacer: str = SPACER) -> Tuple[str, str, str, str]:
|
|
2975
|
+
"""Break a composed job id into its constituent (service name, instance,
|
|
2976
|
+
git hash, config hash) by splitting with ``spacer``.
|
|
2977
|
+
|
|
2978
|
+
:param job_id: The composed id of the job/app
|
|
2979
|
+
:returns: A tuple (service name, instance, git hash, config hash) that
|
|
2980
|
+
comprise the job_id
|
|
2981
|
+
"""
|
|
2982
|
+
decomposed = job_id.split(spacer)
|
|
2983
|
+
if len(decomposed) == 2:
|
|
2984
|
+
git_hash = None
|
|
2985
|
+
config_hash = None
|
|
2986
|
+
elif len(decomposed) == 4:
|
|
2987
|
+
git_hash = decomposed[2]
|
|
2988
|
+
config_hash = decomposed[3]
|
|
2989
|
+
else:
|
|
2990
|
+
raise InvalidJobNameError("invalid job id %s" % job_id)
|
|
2991
|
+
return (decomposed[0], decomposed[1], git_hash, config_hash)
|
|
2992
|
+
|
|
2993
|
+
|
|
2994
|
+
def build_docker_image_name(service: str) -> str:
|
|
2995
|
+
"""docker-paasta.yelpcorp.com:443 is the URL for the Registry where PaaSTA
|
|
2996
|
+
will look for your images.
|
|
2997
|
+
|
|
2998
|
+
:returns: a sanitized-for-Jenkins (s,/,-,g) version of the
|
|
2999
|
+
service's path in git. E.g. For github.yelpcorp.com:services/foo the
|
|
3000
|
+
docker image name is docker_registry/services-foo.
|
|
3001
|
+
"""
|
|
3002
|
+
docker_registry_url = get_service_docker_registry(service)
|
|
3003
|
+
name = f"{docker_registry_url}/services-{service}"
|
|
3004
|
+
return name
|
|
3005
|
+
|
|
3006
|
+
|
|
3007
|
+
def build_docker_tag(
|
|
3008
|
+
service: str, upstream_git_commit: str, image_version: Optional[str] = None
|
|
3009
|
+
) -> str:
|
|
3010
|
+
"""Builds the DOCKER_TAG string
|
|
3011
|
+
|
|
3012
|
+
upstream_git_commit is the SHA that we're building. Usually this is the
|
|
3013
|
+
tip of origin/master.
|
|
3014
|
+
"""
|
|
3015
|
+
tag = "{}:paasta-{}".format(build_docker_image_name(service), upstream_git_commit)
|
|
3016
|
+
if image_version is not None:
|
|
3017
|
+
tag += f"-{image_version}"
|
|
3018
|
+
return tag
|
|
3019
|
+
|
|
3020
|
+
|
|
3021
|
+
def check_docker_image(
|
|
3022
|
+
service: str,
|
|
3023
|
+
commit: str,
|
|
3024
|
+
image_version: Optional[str] = None,
|
|
3025
|
+
) -> bool:
|
|
3026
|
+
"""Checks whether the given image for :service: with :tag: exists.
|
|
3027
|
+
|
|
3028
|
+
:raises: ValueError if more than one docker image with :tag: found.
|
|
3029
|
+
:returns: True if there is exactly one matching image found.
|
|
3030
|
+
"""
|
|
3031
|
+
docker_client = get_docker_client()
|
|
3032
|
+
image_name = build_docker_image_name(service)
|
|
3033
|
+
docker_tag = build_docker_tag(service, commit, image_version)
|
|
3034
|
+
images = docker_client.images(name=image_name)
|
|
3035
|
+
# image['RepoTags'] may be None
|
|
3036
|
+
# Fixed upstream but only in docker-py 2.
|
|
3037
|
+
# https://github.com/docker/docker-py/issues/1401
|
|
3038
|
+
result = [image for image in images if docker_tag in (image["RepoTags"] or [])]
|
|
3039
|
+
if len(result) > 1:
|
|
3040
|
+
raise ValueError(
|
|
3041
|
+
f"More than one docker image found with tag {docker_tag}\n{result}"
|
|
3042
|
+
)
|
|
3043
|
+
return len(result) == 1
|
|
3044
|
+
|
|
3045
|
+
|
|
3046
|
+
def datetime_from_utc_to_local(utc_datetime: datetime.datetime) -> datetime.datetime:
|
|
3047
|
+
return datetime_convert_timezone(
|
|
3048
|
+
utc_datetime, dateutil.tz.tzutc(), dateutil.tz.tzlocal()
|
|
3049
|
+
)
|
|
3050
|
+
|
|
3051
|
+
|
|
3052
|
+
def datetime_convert_timezone(
|
|
3053
|
+
dt: datetime.datetime, from_zone: datetime.tzinfo, to_zone: datetime.tzinfo
|
|
3054
|
+
) -> datetime.datetime:
|
|
3055
|
+
dt = dt.replace(tzinfo=from_zone)
|
|
3056
|
+
converted_datetime = dt.astimezone(to_zone)
|
|
3057
|
+
converted_datetime = converted_datetime.replace(tzinfo=None)
|
|
3058
|
+
return converted_datetime
|
|
3059
|
+
|
|
3060
|
+
|
|
3061
|
+
def get_username() -> str:
|
|
3062
|
+
"""Returns the current username in a portable way. Will use the SUDO_USER
|
|
3063
|
+
environment variable if present.
|
|
3064
|
+
http://stackoverflow.com/a/2899055
|
|
3065
|
+
"""
|
|
3066
|
+
return os.environ.get("SUDO_USER", pwd.getpwuid(os.getuid())[0])
|
|
3067
|
+
|
|
3068
|
+
|
|
3069
|
+
def get_hostname() -> str:
|
|
3070
|
+
"""Returns the fully-qualified domain name of the server this code is
|
|
3071
|
+
running on.
|
|
3072
|
+
"""
|
|
3073
|
+
return socket.getfqdn()
|
|
3074
|
+
|
|
3075
|
+
|
|
3076
|
+
def get_files_of_type_in_dir(
|
|
3077
|
+
file_type: str,
|
|
3078
|
+
service: str = None,
|
|
3079
|
+
soa_dir: str = DEFAULT_SOA_DIR,
|
|
3080
|
+
) -> List[str]:
|
|
3081
|
+
"""Recursively search path if type of file exists.
|
|
3082
|
+
|
|
3083
|
+
:param file_type: a string of a type of a file (kubernetes, slo, etc.)
|
|
3084
|
+
:param service: a string of a service
|
|
3085
|
+
:param soa_dir: a string of a path to a soa_configs directory
|
|
3086
|
+
:return: a list
|
|
3087
|
+
"""
|
|
3088
|
+
# TODO: Only use INSTANCE_TYPES as input by making file_type Literal
|
|
3089
|
+
service = "**" if service is None else service
|
|
3090
|
+
soa_dir = DEFAULT_SOA_DIR if soa_dir is None else soa_dir
|
|
3091
|
+
file_type += "-*.yaml"
|
|
3092
|
+
return [
|
|
3093
|
+
file_path
|
|
3094
|
+
for file_path in glob.glob(
|
|
3095
|
+
os.path.join(soa_dir, service, file_type),
|
|
3096
|
+
recursive=True,
|
|
3097
|
+
)
|
|
3098
|
+
]
|
|
3099
|
+
|
|
3100
|
+
|
|
3101
|
+
def get_soa_cluster_deploy_files(
|
|
3102
|
+
service: str = None, soa_dir: str = DEFAULT_SOA_DIR, instance_type: str = None
|
|
3103
|
+
) -> Iterator[Tuple[str, str]]:
|
|
3104
|
+
if service is None:
|
|
3105
|
+
service = "*"
|
|
3106
|
+
service_path = os.path.join(soa_dir, service)
|
|
3107
|
+
|
|
3108
|
+
valid_clusters = "|".join(load_system_paasta_config().get_clusters())
|
|
3109
|
+
|
|
3110
|
+
if instance_type in INSTANCE_TYPES:
|
|
3111
|
+
instance_types = instance_type
|
|
3112
|
+
else:
|
|
3113
|
+
instance_types = "|".join(INSTANCE_TYPES)
|
|
3114
|
+
|
|
3115
|
+
search_re = r"/.*/(" + instance_types + r")-(" + valid_clusters + r")\.yaml$"
|
|
3116
|
+
|
|
3117
|
+
for yaml_file in glob.glob("%s/*.yaml" % service_path):
|
|
3118
|
+
try:
|
|
3119
|
+
with open(yaml_file):
|
|
3120
|
+
cluster_re_match = re.search(search_re, yaml_file)
|
|
3121
|
+
if cluster_re_match is not None:
|
|
3122
|
+
cluster = cluster_re_match.group(2)
|
|
3123
|
+
yield (cluster, yaml_file)
|
|
3124
|
+
except IOError as err:
|
|
3125
|
+
print(f"Error opening {yaml_file}: {err}")
|
|
3126
|
+
|
|
3127
|
+
|
|
3128
|
+
def list_clusters(
|
|
3129
|
+
service: str = None, soa_dir: str = DEFAULT_SOA_DIR, instance_type: str = None
|
|
3130
|
+
) -> List[str]:
|
|
3131
|
+
"""Returns a sorted list of clusters a service is configured to deploy to,
|
|
3132
|
+
or all clusters if ``service`` is not specified.
|
|
3133
|
+
|
|
3134
|
+
Includes every cluster that has a ``kubernetes-*.yaml`` or ``tron-*.yaml`` file associated with it.
|
|
3135
|
+
|
|
3136
|
+
:param service: The service name. If unspecified, clusters running any service will be included.
|
|
3137
|
+
:returns: A sorted list of cluster names
|
|
3138
|
+
"""
|
|
3139
|
+
clusters = set()
|
|
3140
|
+
for cluster, _ in get_soa_cluster_deploy_files(
|
|
3141
|
+
service=service, soa_dir=soa_dir, instance_type=instance_type
|
|
3142
|
+
):
|
|
3143
|
+
clusters.add(cluster)
|
|
3144
|
+
return sorted(clusters)
|
|
3145
|
+
|
|
3146
|
+
|
|
3147
|
+
def list_all_instances_for_service(
|
|
3148
|
+
service: str,
|
|
3149
|
+
clusters: Iterable[str] = None,
|
|
3150
|
+
instance_type: str = None,
|
|
3151
|
+
soa_dir: str = DEFAULT_SOA_DIR,
|
|
3152
|
+
cache: bool = True,
|
|
3153
|
+
) -> Set[str]:
|
|
3154
|
+
instances = set()
|
|
3155
|
+
if not clusters:
|
|
3156
|
+
clusters = list_clusters(service, soa_dir=soa_dir)
|
|
3157
|
+
for cluster in clusters:
|
|
3158
|
+
if cache:
|
|
3159
|
+
si_list = get_service_instance_list(
|
|
3160
|
+
service, cluster, instance_type, soa_dir=soa_dir
|
|
3161
|
+
)
|
|
3162
|
+
else:
|
|
3163
|
+
si_list = get_service_instance_list_no_cache(
|
|
3164
|
+
service, cluster, instance_type, soa_dir=soa_dir
|
|
3165
|
+
)
|
|
3166
|
+
for service_instance in si_list:
|
|
3167
|
+
instances.add(service_instance[1])
|
|
3168
|
+
return instances
|
|
3169
|
+
|
|
3170
|
+
|
|
3171
|
+
def filter_templates_from_config(config: Dict) -> Dict[str, Any]:
|
|
3172
|
+
config = {
|
|
3173
|
+
key: value for key, value in config.items() if not key.startswith("_")
|
|
3174
|
+
} # filter templates
|
|
3175
|
+
return config or {}
|
|
3176
|
+
|
|
3177
|
+
|
|
3178
|
+
def read_service_instance_names(
|
|
3179
|
+
service: str, instance_type: str, cluster: str, soa_dir: str
|
|
3180
|
+
) -> Collection[Tuple[str, str]]:
|
|
3181
|
+
instance_list = []
|
|
3182
|
+
conf_file = f"{instance_type}-{cluster}"
|
|
3183
|
+
config = service_configuration_lib.read_extra_service_information(
|
|
3184
|
+
service,
|
|
3185
|
+
conf_file,
|
|
3186
|
+
soa_dir=soa_dir,
|
|
3187
|
+
deepcopy=False,
|
|
3188
|
+
)
|
|
3189
|
+
config = filter_templates_from_config(config)
|
|
3190
|
+
if instance_type == "tron":
|
|
3191
|
+
for job_name, job in config.items():
|
|
3192
|
+
action_names = list(job.get("actions", {}).keys())
|
|
3193
|
+
for name in action_names:
|
|
3194
|
+
instance = f"{job_name}.{name}"
|
|
3195
|
+
instance_list.append((service, instance))
|
|
3196
|
+
else:
|
|
3197
|
+
for instance in config:
|
|
3198
|
+
instance_list.append((service, instance))
|
|
3199
|
+
return instance_list
|
|
3200
|
+
|
|
3201
|
+
|
|
3202
|
+
def get_production_deploy_group(service: str, soa_dir: str = DEFAULT_SOA_DIR) -> str:
|
|
3203
|
+
service_configuration = read_service_configuration(service, soa_dir)
|
|
3204
|
+
return service_configuration.get("deploy", {}).get("production_deploy_group", None)
|
|
3205
|
+
|
|
3206
|
+
|
|
3207
|
+
def get_pipeline_config(service: str, soa_dir: str = DEFAULT_SOA_DIR) -> List[Dict]:
|
|
3208
|
+
service_configuration = read_service_configuration(service, soa_dir)
|
|
3209
|
+
return service_configuration.get("deploy", {}).get("pipeline", [])
|
|
3210
|
+
|
|
3211
|
+
|
|
3212
|
+
def is_secrets_for_teams_enabled(service: str, soa_dir: str = DEFAULT_SOA_DIR) -> bool:
|
|
3213
|
+
service_yaml_contents = read_extra_service_information(service, "service", soa_dir)
|
|
3214
|
+
return service_yaml_contents.get("secrets_for_owner_team", False)
|
|
3215
|
+
|
|
3216
|
+
|
|
3217
|
+
def get_pipeline_deploy_group_configs(
|
|
3218
|
+
service: str, soa_dir: str = DEFAULT_SOA_DIR
|
|
3219
|
+
) -> List[Dict]:
|
|
3220
|
+
pipeline_steps = []
|
|
3221
|
+
for step in get_pipeline_config(service, soa_dir):
|
|
3222
|
+
# added support for parallel steps in a deploy.yaml
|
|
3223
|
+
# parallel steps would break previous functionality as steps arent
|
|
3224
|
+
# expected to be nested in a parallel block
|
|
3225
|
+
if step.get("parallel"):
|
|
3226
|
+
for parallel_step in step.get("parallel"):
|
|
3227
|
+
if parallel_step.get("step"):
|
|
3228
|
+
pipeline_steps.append(parallel_step)
|
|
3229
|
+
else:
|
|
3230
|
+
pipeline_steps.append(step)
|
|
3231
|
+
return [step for step in pipeline_steps if is_deploy_step(step["step"])]
|
|
3232
|
+
|
|
3233
|
+
|
|
3234
|
+
def get_pipeline_deploy_groups(
|
|
3235
|
+
service: str, soa_dir: str = DEFAULT_SOA_DIR
|
|
3236
|
+
) -> List[str]:
|
|
3237
|
+
deploy_group_configs = get_pipeline_deploy_group_configs(service, soa_dir)
|
|
3238
|
+
return [step["step"] for step in deploy_group_configs]
|
|
3239
|
+
|
|
3240
|
+
|
|
3241
|
+
def get_service_instance_list_no_cache(
|
|
3242
|
+
service: str,
|
|
3243
|
+
cluster: Optional[str] = None,
|
|
3244
|
+
instance_type: str = None,
|
|
3245
|
+
soa_dir: str = DEFAULT_SOA_DIR,
|
|
3246
|
+
) -> List[Tuple[str, str]]:
|
|
3247
|
+
"""Enumerate the instances defined for a service as a list of tuples.
|
|
3248
|
+
|
|
3249
|
+
:param service: The service name
|
|
3250
|
+
:param cluster: The cluster to read the configuration for
|
|
3251
|
+
:param instance_type: The type of instances to examine: 'kubernetes', 'tron', or None (default) for both
|
|
3252
|
+
:param soa_dir: The SOA config directory to read from
|
|
3253
|
+
:returns: A list of tuples of (name, instance) for each instance defined for the service name
|
|
3254
|
+
"""
|
|
3255
|
+
|
|
3256
|
+
instance_types: Tuple[str, ...]
|
|
3257
|
+
if not cluster:
|
|
3258
|
+
cluster = load_system_paasta_config().get_cluster()
|
|
3259
|
+
if instance_type in INSTANCE_TYPES:
|
|
3260
|
+
instance_types = (instance_type,)
|
|
3261
|
+
else:
|
|
3262
|
+
instance_types = INSTANCE_TYPES
|
|
3263
|
+
|
|
3264
|
+
instance_list: List[Tuple[str, str]] = []
|
|
3265
|
+
for srv_instance_type in instance_types:
|
|
3266
|
+
instance_list.extend(
|
|
3267
|
+
read_service_instance_names(
|
|
3268
|
+
service=service,
|
|
3269
|
+
instance_type=srv_instance_type,
|
|
3270
|
+
cluster=cluster,
|
|
3271
|
+
soa_dir=soa_dir,
|
|
3272
|
+
)
|
|
3273
|
+
)
|
|
3274
|
+
log.debug("Enumerated the following instances: %s", instance_list)
|
|
3275
|
+
return instance_list
|
|
3276
|
+
|
|
3277
|
+
|
|
3278
|
+
@time_cache(ttl=5)
|
|
3279
|
+
def get_service_instance_list(
|
|
3280
|
+
service: str,
|
|
3281
|
+
cluster: Optional[str] = None,
|
|
3282
|
+
instance_type: str = None,
|
|
3283
|
+
soa_dir: str = DEFAULT_SOA_DIR,
|
|
3284
|
+
) -> List[Tuple[str, str]]:
|
|
3285
|
+
"""Enumerate the instances defined for a service as a list of tuples.
|
|
3286
|
+
|
|
3287
|
+
:param service: The service name
|
|
3288
|
+
:param cluster: The cluster to read the configuration for
|
|
3289
|
+
:param instance_type: The type of instances to examine: 'kubernetes', 'tron', or None (default) for both
|
|
3290
|
+
:param soa_dir: The SOA config directory to read from
|
|
3291
|
+
:returns: A list of tuples of (name, instance) for each instance defined for the service name
|
|
3292
|
+
"""
|
|
3293
|
+
return get_service_instance_list_no_cache(
|
|
3294
|
+
service=service, cluster=cluster, instance_type=instance_type, soa_dir=soa_dir
|
|
3295
|
+
)
|
|
3296
|
+
|
|
3297
|
+
|
|
3298
|
+
def get_services_for_cluster(
|
|
3299
|
+
cluster: str = None, instance_type: str = None, soa_dir: str = DEFAULT_SOA_DIR
|
|
3300
|
+
) -> List[Tuple[str, str]]:
|
|
3301
|
+
"""Retrieve all services and instances defined to run in a cluster.
|
|
3302
|
+
|
|
3303
|
+
:param cluster: The cluster to read the configuration for
|
|
3304
|
+
:param instance_type: The type of instances to examine: 'kubernetes', 'tron', or None (default) for both
|
|
3305
|
+
:param soa_dir: The SOA config directory to read from
|
|
3306
|
+
:returns: A list of tuples of (service, instance)
|
|
3307
|
+
"""
|
|
3308
|
+
|
|
3309
|
+
if not cluster:
|
|
3310
|
+
cluster = load_system_paasta_config().get_cluster()
|
|
3311
|
+
rootdir = os.path.abspath(soa_dir)
|
|
3312
|
+
log.debug(
|
|
3313
|
+
"Retrieving all service instance names from %s for cluster %s", rootdir, cluster
|
|
3314
|
+
)
|
|
3315
|
+
instance_list: List[Tuple[str, str]] = []
|
|
3316
|
+
for srv_dir in os.listdir(rootdir):
|
|
3317
|
+
instance_list.extend(
|
|
3318
|
+
get_service_instance_list(srv_dir, cluster, instance_type, soa_dir)
|
|
3319
|
+
)
|
|
3320
|
+
return instance_list
|
|
3321
|
+
|
|
3322
|
+
|
|
3323
|
+
def load_service_instance_configs(
|
|
3324
|
+
service: str,
|
|
3325
|
+
instance_type: str,
|
|
3326
|
+
cluster: str,
|
|
3327
|
+
soa_dir: str = DEFAULT_SOA_DIR,
|
|
3328
|
+
) -> Dict[str, InstanceConfigDict]:
|
|
3329
|
+
conf_file = f"{instance_type}-{cluster}"
|
|
3330
|
+
user_configs = service_configuration_lib.read_extra_service_information(
|
|
3331
|
+
service,
|
|
3332
|
+
conf_file,
|
|
3333
|
+
soa_dir=soa_dir,
|
|
3334
|
+
deepcopy=False,
|
|
3335
|
+
)
|
|
3336
|
+
user_configs = filter_templates_from_config(user_configs)
|
|
3337
|
+
auto_configs = load_service_instance_auto_configs(
|
|
3338
|
+
service, instance_type, cluster, soa_dir
|
|
3339
|
+
)
|
|
3340
|
+
merged = {}
|
|
3341
|
+
for instance_name, user_config in user_configs.items():
|
|
3342
|
+
auto_config = auto_configs.get(instance_name, {})
|
|
3343
|
+
merged[instance_name] = deep_merge_dictionaries(
|
|
3344
|
+
overrides=user_config,
|
|
3345
|
+
defaults=auto_config,
|
|
3346
|
+
)
|
|
3347
|
+
return merged
|
|
3348
|
+
|
|
3349
|
+
|
|
3350
|
+
def load_service_instance_config(
|
|
3351
|
+
service: str,
|
|
3352
|
+
instance: str,
|
|
3353
|
+
instance_type: str,
|
|
3354
|
+
cluster: str,
|
|
3355
|
+
soa_dir: str = DEFAULT_SOA_DIR,
|
|
3356
|
+
) -> InstanceConfigDict:
|
|
3357
|
+
if instance.startswith("_"):
|
|
3358
|
+
raise InvalidJobNameError(
|
|
3359
|
+
f"Unable to load {instance_type} config for {service}.{instance} as instance name starts with '_'"
|
|
3360
|
+
)
|
|
3361
|
+
conf_file = f"{instance_type}-{cluster}"
|
|
3362
|
+
|
|
3363
|
+
# We pass deepcopy=False here and then do our own deepcopy of the subset of the data we actually care about. Without
|
|
3364
|
+
# this optimization, any code that calls load_service_instance_config for every instance in a yaml file is ~O(n^2).
|
|
3365
|
+
user_config = copy.deepcopy(
|
|
3366
|
+
service_configuration_lib.read_extra_service_information(
|
|
3367
|
+
service, conf_file, soa_dir=soa_dir, deepcopy=False
|
|
3368
|
+
).get(instance)
|
|
3369
|
+
)
|
|
3370
|
+
if user_config is None:
|
|
3371
|
+
raise NoConfigurationForServiceError(
|
|
3372
|
+
f"{instance} not found in config file {soa_dir}/{service}/{conf_file}.yaml."
|
|
3373
|
+
)
|
|
3374
|
+
|
|
3375
|
+
auto_config = load_service_instance_auto_configs(
|
|
3376
|
+
service, instance_type, cluster, soa_dir
|
|
3377
|
+
).get(instance, {})
|
|
3378
|
+
return deep_merge_dictionaries(
|
|
3379
|
+
overrides=user_config,
|
|
3380
|
+
defaults=auto_config,
|
|
3381
|
+
)
|
|
3382
|
+
|
|
3383
|
+
|
|
3384
|
+
def load_service_instance_auto_configs(
|
|
3385
|
+
service: str,
|
|
3386
|
+
instance_type: str,
|
|
3387
|
+
cluster: str,
|
|
3388
|
+
soa_dir: str = DEFAULT_SOA_DIR,
|
|
3389
|
+
) -> Dict[str, Dict[str, Any]]:
|
|
3390
|
+
enabled_types = load_system_paasta_config().get_auto_config_instance_types_enabled()
|
|
3391
|
+
# this looks a little funky: but what we're generally trying to do here is ensure that
|
|
3392
|
+
# certain types of instances can be moved between instance types without having to worry
|
|
3393
|
+
# about any sort of data races (or data weirdness) in autotune.
|
|
3394
|
+
# instead, what we do is map certain instance types to whatever we've picked as the "canonical"
|
|
3395
|
+
# instance type in autotune and always merge from there.
|
|
3396
|
+
realized_type = (
|
|
3397
|
+
load_system_paasta_config()
|
|
3398
|
+
.get_auto_config_instance_type_aliases()
|
|
3399
|
+
.get(instance_type, instance_type)
|
|
3400
|
+
)
|
|
3401
|
+
conf_file = f"{realized_type}-{cluster}"
|
|
3402
|
+
if enabled_types.get(realized_type):
|
|
3403
|
+
return service_configuration_lib.read_extra_service_information(
|
|
3404
|
+
service,
|
|
3405
|
+
f"{AUTO_SOACONFIG_SUBDIR}/{conf_file}",
|
|
3406
|
+
soa_dir=soa_dir,
|
|
3407
|
+
deepcopy=False,
|
|
3408
|
+
)
|
|
3409
|
+
else:
|
|
3410
|
+
return {}
|
|
3411
|
+
|
|
3412
|
+
|
|
3413
|
+
def get_docker_host() -> str:
|
|
3414
|
+
return os.environ.get("DOCKER_HOST", "unix://var/run/docker.sock")
|
|
3415
|
+
|
|
3416
|
+
|
|
3417
|
+
def get_docker_client() -> APIClient:
|
|
3418
|
+
client_opts = kwargs_from_env(assert_hostname=False)
|
|
3419
|
+
if "base_url" in client_opts:
|
|
3420
|
+
return APIClient(**client_opts)
|
|
3421
|
+
else:
|
|
3422
|
+
return APIClient(base_url=get_docker_host(), **client_opts)
|
|
3423
|
+
|
|
3424
|
+
|
|
3425
|
+
def get_running_mesos_docker_containers() -> List[Dict]:
|
|
3426
|
+
client = get_docker_client()
|
|
3427
|
+
running_containers = client.containers()
|
|
3428
|
+
return [
|
|
3429
|
+
container
|
|
3430
|
+
for container in running_containers
|
|
3431
|
+
if "mesos-" in container["Names"][0]
|
|
3432
|
+
]
|
|
3433
|
+
|
|
3434
|
+
|
|
3435
|
+
class TimeoutError(Exception):
|
|
3436
|
+
pass
|
|
3437
|
+
|
|
3438
|
+
|
|
3439
|
+
class Timeout:
|
|
3440
|
+
# From http://stackoverflow.com/questions/2281850/timeout-function-if-it-takes-too-long-to-finish
|
|
3441
|
+
|
|
3442
|
+
def __init__(self, seconds: int = 1, error_message: str = "Timeout") -> None:
|
|
3443
|
+
self.seconds = seconds
|
|
3444
|
+
self.error_message = error_message
|
|
3445
|
+
|
|
3446
|
+
def handle_timeout(self, signum: int, frame: FrameType) -> None:
|
|
3447
|
+
raise TimeoutError(self.error_message)
|
|
3448
|
+
|
|
3449
|
+
def __enter__(self) -> None:
|
|
3450
|
+
self.old_handler = signal.signal(signal.SIGALRM, self.handle_timeout)
|
|
3451
|
+
signal.alarm(self.seconds)
|
|
3452
|
+
|
|
3453
|
+
def __exit__(self, type: Any, value: Any, traceback: Any) -> None:
|
|
3454
|
+
signal.alarm(0)
|
|
3455
|
+
signal.signal(signal.SIGALRM, self.old_handler)
|
|
3456
|
+
|
|
3457
|
+
|
|
3458
|
+
def print_with_indent(line: str, indent: int = 2) -> None:
|
|
3459
|
+
"""Print a line with a given indent level"""
|
|
3460
|
+
print(" " * indent + line)
|
|
3461
|
+
|
|
3462
|
+
|
|
3463
|
+
class NoDeploymentsAvailable(Exception):
|
|
3464
|
+
pass
|
|
3465
|
+
|
|
3466
|
+
|
|
3467
|
+
class DeploymentVersion(NamedTuple):
|
|
3468
|
+
sha: str
|
|
3469
|
+
image_version: Optional[str]
|
|
3470
|
+
|
|
3471
|
+
def __repr__(self) -> str:
|
|
3472
|
+
# Represented as commit if no image_version, standard tuple repr otherwise
|
|
3473
|
+
return (
|
|
3474
|
+
f"DeploymentVersion(sha={self.sha}, image_version={self.image_version})"
|
|
3475
|
+
if self.image_version
|
|
3476
|
+
else self.sha
|
|
3477
|
+
)
|
|
3478
|
+
|
|
3479
|
+
def short_sha_repr(self, sha_len: int = 8) -> str:
|
|
3480
|
+
# Same as __repr__ but allows us to print the shortned commit sha.
|
|
3481
|
+
short_sha = self.sha[:sha_len]
|
|
3482
|
+
return (
|
|
3483
|
+
f"DeploymentVersion(sha={short_sha}, image_version={self.image_version})"
|
|
3484
|
+
if self.image_version
|
|
3485
|
+
else short_sha
|
|
3486
|
+
)
|
|
3487
|
+
|
|
3488
|
+
def json(self) -> str:
|
|
3489
|
+
return json.dumps(self._asdict())
|
|
3490
|
+
|
|
3491
|
+
|
|
3492
|
+
DeploymentsJsonV1Dict = Dict[str, BranchDictV1]
|
|
3493
|
+
|
|
3494
|
+
DeployGroup = str
|
|
3495
|
+
BranchName = str
|
|
3496
|
+
|
|
3497
|
+
|
|
3498
|
+
class _DeploymentsJsonV2ControlsDict(TypedDict, total=False):
|
|
3499
|
+
force_bounce: Optional[str]
|
|
3500
|
+
desired_state: str
|
|
3501
|
+
|
|
3502
|
+
|
|
3503
|
+
class _DeploymentsJsonV2DeploymentsDict(TypedDict):
|
|
3504
|
+
docker_image: str
|
|
3505
|
+
git_sha: str
|
|
3506
|
+
image_version: Optional[str]
|
|
3507
|
+
|
|
3508
|
+
|
|
3509
|
+
class DeploymentsJsonV2Dict(TypedDict):
|
|
3510
|
+
deployments: Dict[DeployGroup, _DeploymentsJsonV2DeploymentsDict]
|
|
3511
|
+
controls: Dict[BranchName, _DeploymentsJsonV2ControlsDict]
|
|
3512
|
+
|
|
3513
|
+
|
|
3514
|
+
class DeploymentsJsonDict(TypedDict):
|
|
3515
|
+
v1: DeploymentsJsonV1Dict
|
|
3516
|
+
v2: DeploymentsJsonV2Dict
|
|
3517
|
+
|
|
3518
|
+
|
|
3519
|
+
class DeploymentsJsonV1:
|
|
3520
|
+
def __init__(self, config_dict: DeploymentsJsonV1Dict) -> None:
|
|
3521
|
+
self.config_dict = config_dict
|
|
3522
|
+
|
|
3523
|
+
def get_branch_dict(self, service: str, branch: str) -> BranchDictV1:
|
|
3524
|
+
full_branch = f"{service}:paasta-{branch}"
|
|
3525
|
+
return self.config_dict.get(full_branch, {})
|
|
3526
|
+
|
|
3527
|
+
def __eq__(self, other: Any) -> bool:
|
|
3528
|
+
return (
|
|
3529
|
+
isinstance(other, DeploymentsJsonV1)
|
|
3530
|
+
and other.config_dict == self.config_dict
|
|
3531
|
+
)
|
|
3532
|
+
|
|
3533
|
+
|
|
3534
|
+
class DeploymentsJsonV2:
|
|
3535
|
+
def __init__(self, service: str, config_dict: DeploymentsJsonV2Dict) -> None:
|
|
3536
|
+
self.config_dict = config_dict
|
|
3537
|
+
self.service = service
|
|
3538
|
+
|
|
3539
|
+
def get_branch_dict(
|
|
3540
|
+
self, service: str, branch: str, deploy_group: str
|
|
3541
|
+
) -> BranchDictV2:
|
|
3542
|
+
full_branch = f"{service}:{branch}"
|
|
3543
|
+
branch_dict: BranchDictV2 = {
|
|
3544
|
+
"docker_image": self.get_docker_image_for_deploy_group(deploy_group),
|
|
3545
|
+
"git_sha": self.get_git_sha_for_deploy_group(deploy_group),
|
|
3546
|
+
"image_version": self.get_image_version_for_deploy_group(deploy_group),
|
|
3547
|
+
"desired_state": self.get_desired_state_for_branch(full_branch),
|
|
3548
|
+
"force_bounce": self.get_force_bounce_for_branch(full_branch),
|
|
3549
|
+
}
|
|
3550
|
+
return branch_dict
|
|
3551
|
+
|
|
3552
|
+
def get_deploy_groups(self) -> Collection[str]:
|
|
3553
|
+
return self.config_dict["deployments"].keys()
|
|
3554
|
+
|
|
3555
|
+
def get_docker_image_for_deploy_group(self, deploy_group: str) -> str:
|
|
3556
|
+
try:
|
|
3557
|
+
deploy_group_config = self.config_dict["deployments"][deploy_group]
|
|
3558
|
+
except KeyError:
|
|
3559
|
+
e = f"{self.service} not deployed to {deploy_group}. Has mark-for-deployment been run?"
|
|
3560
|
+
raise NoDeploymentsAvailable(e)
|
|
3561
|
+
try:
|
|
3562
|
+
return deploy_group_config["docker_image"]
|
|
3563
|
+
except KeyError:
|
|
3564
|
+
e = f"The configuration for service {self.service} in deploy group {deploy_group} does not contain 'docker_image' metadata."
|
|
3565
|
+
raise KeyError(e)
|
|
3566
|
+
|
|
3567
|
+
def get_git_sha_for_deploy_group(self, deploy_group: str) -> str:
|
|
3568
|
+
try:
|
|
3569
|
+
deploy_group_config = self.config_dict["deployments"][deploy_group]
|
|
3570
|
+
except KeyError:
|
|
3571
|
+
e = f"{self.service} not deployed to {deploy_group}. Has mark-for-deployment been run?"
|
|
3572
|
+
raise NoDeploymentsAvailable(e)
|
|
3573
|
+
try:
|
|
3574
|
+
return deploy_group_config["git_sha"]
|
|
3575
|
+
except KeyError:
|
|
3576
|
+
e = f"The configuration for service {self.service} in deploy group {deploy_group} does not contain 'git_sha' metadata."
|
|
3577
|
+
raise KeyError(e)
|
|
3578
|
+
|
|
3579
|
+
def get_image_version_for_deploy_group(self, deploy_group: str) -> Optional[str]:
|
|
3580
|
+
try:
|
|
3581
|
+
deploy_group_config = self.config_dict["deployments"][deploy_group]
|
|
3582
|
+
except KeyError:
|
|
3583
|
+
e = f"{self.service} not deployed to {deploy_group}. Has mark-for-deployment been run?"
|
|
3584
|
+
raise NoDeploymentsAvailable(e)
|
|
3585
|
+
try:
|
|
3586
|
+
# TODO: Once these changes have propagated image_version should
|
|
3587
|
+
# always be present in the deployments.json file, so remove the
|
|
3588
|
+
# .get() call.
|
|
3589
|
+
return deploy_group_config.get("image_version", None)
|
|
3590
|
+
except KeyError:
|
|
3591
|
+
e = f"The configuration for service {self.service} in deploy group {deploy_group} does not contain 'image_version' metadata."
|
|
3592
|
+
raise KeyError(e)
|
|
3593
|
+
|
|
3594
|
+
def get_deployment_version_for_deploy_group(
|
|
3595
|
+
self, deploy_group: str
|
|
3596
|
+
) -> DeploymentVersion:
|
|
3597
|
+
return DeploymentVersion(
|
|
3598
|
+
sha=self.get_git_sha_for_deploy_group(deploy_group),
|
|
3599
|
+
image_version=self.get_image_version_for_deploy_group(deploy_group),
|
|
3600
|
+
)
|
|
3601
|
+
|
|
3602
|
+
def get_desired_state_for_branch(self, control_branch: str) -> str:
|
|
3603
|
+
try:
|
|
3604
|
+
return self.config_dict["controls"][control_branch].get(
|
|
3605
|
+
"desired_state", "start"
|
|
3606
|
+
)
|
|
3607
|
+
except KeyError:
|
|
3608
|
+
e = f"{self.service} not configured for {control_branch}. Has mark-for-deployment been run?"
|
|
3609
|
+
raise NoDeploymentsAvailable(e)
|
|
3610
|
+
|
|
3611
|
+
def get_force_bounce_for_branch(self, control_branch: str) -> str:
|
|
3612
|
+
try:
|
|
3613
|
+
return self.config_dict["controls"][control_branch].get(
|
|
3614
|
+
"force_bounce", None
|
|
3615
|
+
)
|
|
3616
|
+
except KeyError:
|
|
3617
|
+
e = f"{self.service} not configured for {control_branch}. Has mark-for-deployment been run?"
|
|
3618
|
+
raise NoDeploymentsAvailable(e)
|
|
3619
|
+
|
|
3620
|
+
|
|
3621
|
+
def load_deployments_json(service: str, soa_dir: str = DEFAULT_SOA_DIR) -> Any:
|
|
3622
|
+
deployment_file = os.path.join(soa_dir, service, "deployments.json")
|
|
3623
|
+
if os.path.isfile(deployment_file):
|
|
3624
|
+
with open(deployment_file) as f:
|
|
3625
|
+
config_dict = json.load(f)
|
|
3626
|
+
return (
|
|
3627
|
+
DeploymentsJsonV1(config_dict["v1"])
|
|
3628
|
+
if "v1" in config_dict
|
|
3629
|
+
else DeploymentsJsonV2(service=service, config_dict=config_dict["v2"])
|
|
3630
|
+
)
|
|
3631
|
+
else:
|
|
3632
|
+
e = f"{deployment_file} was not found. 'generate_deployments_for_service --service {service}' must be run first"
|
|
3633
|
+
raise NoDeploymentsAvailable(e)
|
|
3634
|
+
|
|
3635
|
+
|
|
3636
|
+
def load_v2_deployments_json(
|
|
3637
|
+
service: str, soa_dir: str = DEFAULT_SOA_DIR
|
|
3638
|
+
) -> DeploymentsJsonV2:
|
|
3639
|
+
deployment_file = os.path.join(soa_dir, service, "deployments.json")
|
|
3640
|
+
if os.path.isfile(deployment_file):
|
|
3641
|
+
with open(deployment_file) as f:
|
|
3642
|
+
return DeploymentsJsonV2(service=service, config_dict=json.load(f)["v2"])
|
|
3643
|
+
else:
|
|
3644
|
+
e = f"{deployment_file} was not found. 'generate_deployments_for_service --service {service}' must be run first"
|
|
3645
|
+
raise NoDeploymentsAvailable(e)
|
|
3646
|
+
|
|
3647
|
+
|
|
3648
|
+
def get_paasta_branch(cluster: str, instance: str) -> str:
|
|
3649
|
+
return SPACER.join((cluster, instance))
|
|
3650
|
+
|
|
3651
|
+
|
|
3652
|
+
def parse_timestamp(tstamp: str) -> datetime.datetime:
|
|
3653
|
+
return datetime.datetime.strptime(tstamp, "%Y%m%dT%H%M%S")
|
|
3654
|
+
|
|
3655
|
+
|
|
3656
|
+
def format_timestamp(dt: datetime.datetime = None) -> str:
|
|
3657
|
+
if dt is None:
|
|
3658
|
+
dt = datetime.datetime.utcnow()
|
|
3659
|
+
return dt.strftime("%Y%m%dT%H%M%S")
|
|
3660
|
+
|
|
3661
|
+
|
|
3662
|
+
def get_paasta_tag_from_deploy_group(
|
|
3663
|
+
identifier: str, desired_state: str, image_version: Optional[str] = None
|
|
3664
|
+
) -> str:
|
|
3665
|
+
timestamp = format_timestamp(datetime.datetime.utcnow())
|
|
3666
|
+
if image_version:
|
|
3667
|
+
return f"paasta-{identifier}+{image_version}-{timestamp}-{desired_state}"
|
|
3668
|
+
else:
|
|
3669
|
+
return f"paasta-{identifier}-{timestamp}-{desired_state}"
|
|
3670
|
+
|
|
3671
|
+
|
|
3672
|
+
def get_paasta_tag(cluster: str, instance: str, desired_state: str) -> str:
|
|
3673
|
+
timestamp = format_timestamp(datetime.datetime.utcnow())
|
|
3674
|
+
return f"paasta-{cluster}.{instance}-{timestamp}-{desired_state}"
|
|
3675
|
+
|
|
3676
|
+
|
|
3677
|
+
def format_tag(tag: str) -> str:
|
|
3678
|
+
return "refs/tags/%s" % tag
|
|
3679
|
+
|
|
3680
|
+
|
|
3681
|
+
def get_latest_deployment_tag(
|
|
3682
|
+
refs: Dict[str, str], deploy_group: str
|
|
3683
|
+
) -> Tuple[str, str, Optional[str]]:
|
|
3684
|
+
"""Gets the latest deployment tag and sha for the specified deploy_group
|
|
3685
|
+
|
|
3686
|
+
:param refs: A dictionary mapping git refs to shas
|
|
3687
|
+
:param deploy_group: The deployment group to return a deploy tag for
|
|
3688
|
+
|
|
3689
|
+
:returns: A tuple of the form (ref, sha, image_version) where ref is the
|
|
3690
|
+
actual deployment tag (with the most recent timestamp), sha is
|
|
3691
|
+
the sha it points at and image_version provides additional
|
|
3692
|
+
version information about the image
|
|
3693
|
+
"""
|
|
3694
|
+
most_recent_dtime = None
|
|
3695
|
+
most_recent_ref = None
|
|
3696
|
+
most_recent_sha = None
|
|
3697
|
+
most_recent_image_version = None
|
|
3698
|
+
pattern = re.compile(
|
|
3699
|
+
r"^refs/tags/paasta-%s(?:\+(?P<image_version>.*)){0,1}-(?P<dtime>\d{8}T\d{6})-deploy$"
|
|
3700
|
+
% deploy_group
|
|
3701
|
+
)
|
|
3702
|
+
|
|
3703
|
+
for ref_name, sha in refs.items():
|
|
3704
|
+
match = pattern.match(ref_name)
|
|
3705
|
+
if match:
|
|
3706
|
+
gd = match.groupdict()
|
|
3707
|
+
dtime = gd["dtime"]
|
|
3708
|
+
if most_recent_dtime is None or dtime > most_recent_dtime:
|
|
3709
|
+
most_recent_dtime = dtime
|
|
3710
|
+
most_recent_ref = ref_name
|
|
3711
|
+
most_recent_sha = sha
|
|
3712
|
+
most_recent_image_version = gd["image_version"]
|
|
3713
|
+
return most_recent_ref, most_recent_sha, most_recent_image_version
|
|
3714
|
+
|
|
3715
|
+
|
|
3716
|
+
def build_image_identifier(
|
|
3717
|
+
git_sha: str, sha_len: Optional[int] = None, image_version: Optional[str] = None
|
|
3718
|
+
) -> str:
|
|
3719
|
+
image = git_sha
|
|
3720
|
+
if sha_len is not None:
|
|
3721
|
+
image = image[:sha_len]
|
|
3722
|
+
if image_version is not None:
|
|
3723
|
+
image += f"-{image_version}"
|
|
3724
|
+
|
|
3725
|
+
return image
|
|
3726
|
+
|
|
3727
|
+
|
|
3728
|
+
class NoDockerImageError(Exception):
|
|
3729
|
+
pass
|
|
3730
|
+
|
|
3731
|
+
|
|
3732
|
+
def get_config_hash(config: Any, force_bounce: str = None) -> str:
|
|
3733
|
+
"""Create an MD5 hash of the configuration dictionary to be sent to
|
|
3734
|
+
Kubernetes. Or anything really, so long as str(config) works. Returns
|
|
3735
|
+
the first 8 characters so things are not really long.
|
|
3736
|
+
|
|
3737
|
+
:param config: The configuration to hash
|
|
3738
|
+
:param force_bounce: a timestamp (in the form of a string) that is appended before hashing
|
|
3739
|
+
that can be used to force a hash change
|
|
3740
|
+
:returns: A MD5 hash of str(config)
|
|
3741
|
+
"""
|
|
3742
|
+
hasher = hashlib.md5()
|
|
3743
|
+
hasher.update(
|
|
3744
|
+
json.dumps(config, sort_keys=True).encode("UTF-8")
|
|
3745
|
+
+ (force_bounce or "").encode("UTF-8")
|
|
3746
|
+
)
|
|
3747
|
+
return "config%s" % hasher.hexdigest()[:8]
|
|
3748
|
+
|
|
3749
|
+
|
|
3750
|
+
def get_git_sha_from_dockerurl(docker_url: str, long: bool = False) -> str:
|
|
3751
|
+
"""We encode the sha of the code that built a docker image *in* the docker
|
|
3752
|
+
url. This function takes that url as input and outputs the sha.
|
|
3753
|
+
"""
|
|
3754
|
+
if ":paasta-" in docker_url:
|
|
3755
|
+
deployment_version = get_deployment_version_from_dockerurl(docker_url)
|
|
3756
|
+
git_sha = deployment_version.sha if deployment_version else ""
|
|
3757
|
+
# Fall back to the old behavior if the docker_url does not follow the
|
|
3758
|
+
# expected pattern
|
|
3759
|
+
else:
|
|
3760
|
+
parts = docker_url.split("/")
|
|
3761
|
+
parts = parts[-1].split("-")
|
|
3762
|
+
git_sha = parts[-1]
|
|
3763
|
+
# Further ensure to only grab the image label in case not using paasta images
|
|
3764
|
+
git_sha = git_sha.split(":")[-1]
|
|
3765
|
+
|
|
3766
|
+
return git_sha if long else git_sha[:8]
|
|
3767
|
+
|
|
3768
|
+
|
|
3769
|
+
def get_image_version_from_dockerurl(docker_url: str) -> Optional[str]:
|
|
3770
|
+
"""We can optionally encode additional metadata about the docker image *in*
|
|
3771
|
+
the docker url. This function takes that url as input and outputs the sha.
|
|
3772
|
+
"""
|
|
3773
|
+
deployment_version = get_deployment_version_from_dockerurl(docker_url)
|
|
3774
|
+
return deployment_version.image_version if deployment_version else None
|
|
3775
|
+
|
|
3776
|
+
|
|
3777
|
+
def get_deployment_version_from_dockerurl(docker_url: str) -> DeploymentVersion:
|
|
3778
|
+
regex_match = re.match(
|
|
3779
|
+
r".*:paasta-(?P<git_sha>[A-Za-z0-9]+)(-(?P<image_version>.+))?", docker_url
|
|
3780
|
+
)
|
|
3781
|
+
|
|
3782
|
+
return (
|
|
3783
|
+
DeploymentVersion(
|
|
3784
|
+
sha=regex_match.group("git_sha"),
|
|
3785
|
+
image_version=regex_match.group("image_version"),
|
|
3786
|
+
)
|
|
3787
|
+
if regex_match is not None
|
|
3788
|
+
else None
|
|
3789
|
+
)
|
|
3790
|
+
|
|
3791
|
+
|
|
3792
|
+
def get_code_sha_from_dockerurl(docker_url: str) -> str:
|
|
3793
|
+
"""code_sha is hash extracted from docker url prefixed with "git", short
|
|
3794
|
+
hash is used because it's embedded in mesos task names and there's length
|
|
3795
|
+
limit.
|
|
3796
|
+
"""
|
|
3797
|
+
try:
|
|
3798
|
+
git_sha = get_git_sha_from_dockerurl(docker_url, long=False)
|
|
3799
|
+
return "git%s" % git_sha
|
|
3800
|
+
except Exception:
|
|
3801
|
+
return "gitUNKNOWN"
|
|
3802
|
+
|
|
3803
|
+
|
|
3804
|
+
def is_under_replicated(
|
|
3805
|
+
num_available: int, expected_count: int, crit_threshold: int
|
|
3806
|
+
) -> Tuple[bool, float]:
|
|
3807
|
+
"""Calculates if something is under replicated
|
|
3808
|
+
|
|
3809
|
+
:param num_available: How many things are up
|
|
3810
|
+
:param expected_count: How many things you think should be up
|
|
3811
|
+
:param crit_threshold: Int from 0-100
|
|
3812
|
+
:returns: Tuple of (bool, ratio)
|
|
3813
|
+
"""
|
|
3814
|
+
if expected_count == 0:
|
|
3815
|
+
ratio = 100.0
|
|
3816
|
+
else:
|
|
3817
|
+
ratio = (num_available / float(expected_count)) * 100
|
|
3818
|
+
|
|
3819
|
+
if ratio < int(crit_threshold):
|
|
3820
|
+
return (True, ratio)
|
|
3821
|
+
else:
|
|
3822
|
+
return (False, ratio)
|
|
3823
|
+
|
|
3824
|
+
|
|
3825
|
+
def deploy_blacklist_to_constraints(
|
|
3826
|
+
deploy_blacklist: DeployBlacklist,
|
|
3827
|
+
) -> List[Constraint]:
|
|
3828
|
+
"""Converts a blacklist of locations into tron appropriate constraints.
|
|
3829
|
+
|
|
3830
|
+
:param blacklist: List of lists of locations to blacklist
|
|
3831
|
+
:returns: List of lists of constraints
|
|
3832
|
+
"""
|
|
3833
|
+
constraints: List[Constraint] = []
|
|
3834
|
+
for blacklisted_location in deploy_blacklist:
|
|
3835
|
+
constraints.append([blacklisted_location[0], "UNLIKE", blacklisted_location[1]])
|
|
3836
|
+
|
|
3837
|
+
return constraints
|
|
3838
|
+
|
|
3839
|
+
|
|
3840
|
+
def deploy_whitelist_to_constraints(
|
|
3841
|
+
deploy_whitelist: DeployWhitelist,
|
|
3842
|
+
) -> List[Constraint]:
|
|
3843
|
+
"""Converts a whitelist of locations into tron appropriate constraints
|
|
3844
|
+
|
|
3845
|
+
:param deploy_whitelist: List of lists of locations to whitelist
|
|
3846
|
+
:returns: List of lists of constraints
|
|
3847
|
+
"""
|
|
3848
|
+
if deploy_whitelist is not None:
|
|
3849
|
+
(region_type, regions) = deploy_whitelist
|
|
3850
|
+
regionstr = "|".join(regions)
|
|
3851
|
+
|
|
3852
|
+
return [[region_type, "LIKE", regionstr]]
|
|
3853
|
+
return []
|
|
3854
|
+
|
|
3855
|
+
|
|
3856
|
+
def terminal_len(text: str) -> int:
|
|
3857
|
+
"""Return the number of characters that text will take up on a terminal."""
|
|
3858
|
+
return len(remove_ansi_escape_sequences(text))
|
|
3859
|
+
|
|
3860
|
+
|
|
3861
|
+
def format_table(
|
|
3862
|
+
rows: Iterable[Union[str, Sequence[str]]], min_spacing: int = 2
|
|
3863
|
+
) -> List[str]:
|
|
3864
|
+
"""Formats a table for use on the command line.
|
|
3865
|
+
|
|
3866
|
+
:param rows: List of rows, each of which can either be a tuple of strings containing the row's values, or a string
|
|
3867
|
+
to be inserted verbatim. Each row (except literal strings) should be the same number of elements as
|
|
3868
|
+
all the others.
|
|
3869
|
+
:returns: A string containing rows formatted as a table.
|
|
3870
|
+
"""
|
|
3871
|
+
|
|
3872
|
+
list_rows = [r for r in rows if not isinstance(r, str)]
|
|
3873
|
+
|
|
3874
|
+
# If all of the rows are strings, we have nothing to do, so short-circuit.
|
|
3875
|
+
if not list_rows:
|
|
3876
|
+
return cast(List[str], rows)
|
|
3877
|
+
|
|
3878
|
+
widths = []
|
|
3879
|
+
for i in range(len(list_rows[0])):
|
|
3880
|
+
widths.append(max(terminal_len(r[i]) for r in list_rows))
|
|
3881
|
+
|
|
3882
|
+
expanded_rows = []
|
|
3883
|
+
for row in rows:
|
|
3884
|
+
if isinstance(row, str):
|
|
3885
|
+
expanded_rows.append([row])
|
|
3886
|
+
else:
|
|
3887
|
+
expanded_row = []
|
|
3888
|
+
for i, cell in enumerate(row):
|
|
3889
|
+
if i == len(row) - 1:
|
|
3890
|
+
padding = ""
|
|
3891
|
+
else:
|
|
3892
|
+
padding = " " * (widths[i] - terminal_len(cell))
|
|
3893
|
+
expanded_row.append(cell + padding)
|
|
3894
|
+
expanded_rows.append(expanded_row)
|
|
3895
|
+
|
|
3896
|
+
return [(" " * min_spacing).join(r) for r in expanded_rows]
|
|
3897
|
+
|
|
3898
|
+
|
|
3899
|
+
_DeepMergeT = TypeVar("_DeepMergeT", bound=Any)
|
|
3900
|
+
|
|
3901
|
+
|
|
3902
|
+
class DuplicateKeyError(Exception):
|
|
3903
|
+
pass
|
|
3904
|
+
|
|
3905
|
+
|
|
3906
|
+
def deep_merge_dictionaries(
|
|
3907
|
+
overrides: _DeepMergeT, defaults: _DeepMergeT, allow_duplicate_keys: bool = True
|
|
3908
|
+
) -> _DeepMergeT:
|
|
3909
|
+
"""
|
|
3910
|
+
Merges two dictionaries.
|
|
3911
|
+
"""
|
|
3912
|
+
result = copy.deepcopy(defaults)
|
|
3913
|
+
stack: List[Tuple[Dict, Dict]] = [(overrides, result)]
|
|
3914
|
+
while stack:
|
|
3915
|
+
source_dict, result_dict = stack.pop()
|
|
3916
|
+
for key, value in source_dict.items():
|
|
3917
|
+
try:
|
|
3918
|
+
child = result_dict[key]
|
|
3919
|
+
except KeyError:
|
|
3920
|
+
result_dict[key] = value
|
|
3921
|
+
else:
|
|
3922
|
+
if isinstance(value, dict) and isinstance(child, dict):
|
|
3923
|
+
stack.append((value, child))
|
|
3924
|
+
else:
|
|
3925
|
+
if allow_duplicate_keys:
|
|
3926
|
+
result_dict[key] = value
|
|
3927
|
+
else:
|
|
3928
|
+
raise DuplicateKeyError(
|
|
3929
|
+
f"defaults and overrides both have key {key}"
|
|
3930
|
+
)
|
|
3931
|
+
return result
|
|
3932
|
+
|
|
3933
|
+
|
|
3934
|
+
class ZookeeperPool:
|
|
3935
|
+
"""
|
|
3936
|
+
A context manager that shares the same KazooClient with its children. The first nested context manager
|
|
3937
|
+
creates and deletes the client and shares it with any of its children. This allows to place a context
|
|
3938
|
+
manager over a large number of zookeeper calls without opening and closing a connection each time.
|
|
3939
|
+
GIL makes this 'safe'.
|
|
3940
|
+
"""
|
|
3941
|
+
|
|
3942
|
+
counter: int = 0
|
|
3943
|
+
zk: KazooClient = None
|
|
3944
|
+
|
|
3945
|
+
@classmethod
|
|
3946
|
+
def __enter__(cls) -> KazooClient:
|
|
3947
|
+
if cls.zk is None:
|
|
3948
|
+
cls.zk = KazooClient(
|
|
3949
|
+
hosts=load_system_paasta_config().get_zk_hosts(), read_only=True
|
|
3950
|
+
)
|
|
3951
|
+
cls.zk.start()
|
|
3952
|
+
cls.counter = cls.counter + 1
|
|
3953
|
+
return cls.zk
|
|
3954
|
+
|
|
3955
|
+
@classmethod
|
|
3956
|
+
def __exit__(cls, *args: Any, **kwargs: Any) -> None:
|
|
3957
|
+
cls.counter = cls.counter - 1
|
|
3958
|
+
if cls.counter == 0:
|
|
3959
|
+
cls.zk.stop()
|
|
3960
|
+
cls.zk.close()
|
|
3961
|
+
cls.zk = None
|
|
3962
|
+
|
|
3963
|
+
|
|
3964
|
+
def calculate_tail_lines(verbose_level: int) -> int:
|
|
3965
|
+
if verbose_level <= 1:
|
|
3966
|
+
return 0
|
|
3967
|
+
else:
|
|
3968
|
+
return 10 ** (verbose_level - 1)
|
|
3969
|
+
|
|
3970
|
+
|
|
3971
|
+
def is_deploy_step(step: str) -> bool:
|
|
3972
|
+
"""
|
|
3973
|
+
Returns true if the given step deploys to an instancename
|
|
3974
|
+
Returns false if the step is a predefined step-type, e.g. itest or command-*
|
|
3975
|
+
"""
|
|
3976
|
+
return not (
|
|
3977
|
+
(step in DEPLOY_PIPELINE_NON_DEPLOY_STEPS) or (step.startswith("command-"))
|
|
3978
|
+
)
|
|
3979
|
+
|
|
3980
|
+
|
|
3981
|
+
_UseRequestsCacheFuncT = TypeVar("_UseRequestsCacheFuncT", bound=Callable)
|
|
3982
|
+
|
|
3983
|
+
|
|
3984
|
+
def use_requests_cache(
|
|
3985
|
+
cache_name: str, backend: str = "memory", **kwargs: Any
|
|
3986
|
+
) -> Callable[[_UseRequestsCacheFuncT], _UseRequestsCacheFuncT]:
|
|
3987
|
+
def wrap(fun: _UseRequestsCacheFuncT) -> _UseRequestsCacheFuncT:
|
|
3988
|
+
def fun_with_cache(*args: Any, **kwargs: Any) -> Any:
|
|
3989
|
+
requests_cache.install_cache(cache_name, backend=backend, **kwargs)
|
|
3990
|
+
result = fun(*args, **kwargs)
|
|
3991
|
+
requests_cache.uninstall_cache()
|
|
3992
|
+
return result
|
|
3993
|
+
|
|
3994
|
+
return cast(_UseRequestsCacheFuncT, fun_with_cache)
|
|
3995
|
+
|
|
3996
|
+
return wrap
|
|
3997
|
+
|
|
3998
|
+
|
|
3999
|
+
def long_job_id_to_short_job_id(long_job_id: str) -> str:
|
|
4000
|
+
service, instance, _, __ = decompose_job_id(long_job_id)
|
|
4001
|
+
return compose_job_id(service, instance)
|
|
4002
|
+
|
|
4003
|
+
|
|
4004
|
+
def mean(iterable: Collection[float]) -> float:
|
|
4005
|
+
"""
|
|
4006
|
+
Returns the average value of an iterable
|
|
4007
|
+
"""
|
|
4008
|
+
return sum(iterable) / len(iterable)
|
|
4009
|
+
|
|
4010
|
+
|
|
4011
|
+
def prompt_pick_one(sequence: Collection[str], choosing: str) -> str:
|
|
4012
|
+
if not sys.stdin.isatty():
|
|
4013
|
+
print(
|
|
4014
|
+
"No {choosing} specified and no TTY present to ask."
|
|
4015
|
+
"Please specify a {choosing} using the cli.".format(choosing=choosing),
|
|
4016
|
+
file=sys.stderr,
|
|
4017
|
+
)
|
|
4018
|
+
sys.exit(1)
|
|
4019
|
+
|
|
4020
|
+
if not sequence:
|
|
4021
|
+
print(
|
|
4022
|
+
f"PaaSTA needs to pick a {choosing} but none were found.", file=sys.stderr
|
|
4023
|
+
)
|
|
4024
|
+
sys.exit(1)
|
|
4025
|
+
|
|
4026
|
+
global_actions = [str("quit")]
|
|
4027
|
+
choices = [(item, item) for item in sequence]
|
|
4028
|
+
|
|
4029
|
+
if len(choices) == 1:
|
|
4030
|
+
return choices[0][0]
|
|
4031
|
+
|
|
4032
|
+
chooser = choice.Menu(choices=choices, global_actions=global_actions)
|
|
4033
|
+
chooser.title = (
|
|
4034
|
+
'Please pick a {choosing} from the choices below (or "quit" to quit):'.format(
|
|
4035
|
+
choosing=str(choosing)
|
|
4036
|
+
)
|
|
4037
|
+
)
|
|
4038
|
+
try:
|
|
4039
|
+
result = chooser.ask()
|
|
4040
|
+
except (KeyboardInterrupt, EOFError):
|
|
4041
|
+
print("")
|
|
4042
|
+
sys.exit(1)
|
|
4043
|
+
|
|
4044
|
+
if isinstance(result, tuple) and result[1] == str("quit"):
|
|
4045
|
+
sys.exit(1)
|
|
4046
|
+
else:
|
|
4047
|
+
return result
|
|
4048
|
+
|
|
4049
|
+
|
|
4050
|
+
def to_bytes(obj: Any) -> bytes:
|
|
4051
|
+
if isinstance(obj, bytes):
|
|
4052
|
+
return obj
|
|
4053
|
+
elif isinstance(obj, str):
|
|
4054
|
+
return obj.encode("UTF-8")
|
|
4055
|
+
else:
|
|
4056
|
+
return str(obj).encode("UTF-8")
|
|
4057
|
+
|
|
4058
|
+
|
|
4059
|
+
_TimeoutFuncRetType = TypeVar("_TimeoutFuncRetType")
|
|
4060
|
+
|
|
4061
|
+
|
|
4062
|
+
def timeout(
|
|
4063
|
+
seconds: int = 10,
|
|
4064
|
+
error_message: str = os.strerror(errno.ETIME),
|
|
4065
|
+
use_signals: bool = True,
|
|
4066
|
+
) -> Callable[[Callable[..., _TimeoutFuncRetType]], Callable[..., _TimeoutFuncRetType]]:
|
|
4067
|
+
if use_signals:
|
|
4068
|
+
|
|
4069
|
+
def decorate(
|
|
4070
|
+
func: Callable[..., _TimeoutFuncRetType]
|
|
4071
|
+
) -> Callable[..., _TimeoutFuncRetType]:
|
|
4072
|
+
def _handle_timeout(signum: int, frame: FrameType) -> None:
|
|
4073
|
+
raise TimeoutError(error_message)
|
|
4074
|
+
|
|
4075
|
+
def wrapper(*args: Any, **kwargs: Any) -> _TimeoutFuncRetType:
|
|
4076
|
+
signal.signal(signal.SIGALRM, _handle_timeout)
|
|
4077
|
+
signal.alarm(seconds)
|
|
4078
|
+
try:
|
|
4079
|
+
result = func(*args, **kwargs)
|
|
4080
|
+
finally:
|
|
4081
|
+
signal.alarm(0)
|
|
4082
|
+
return result
|
|
4083
|
+
|
|
4084
|
+
return wraps(func)(wrapper)
|
|
4085
|
+
|
|
4086
|
+
else:
|
|
4087
|
+
|
|
4088
|
+
def decorate(
|
|
4089
|
+
func: Callable[..., _TimeoutFuncRetType]
|
|
4090
|
+
) -> Callable[..., _TimeoutFuncRetType]:
|
|
4091
|
+
# https://github.com/python/mypy/issues/797
|
|
4092
|
+
return _Timeout(func, seconds, error_message) # type: ignore
|
|
4093
|
+
|
|
4094
|
+
return decorate
|
|
4095
|
+
|
|
4096
|
+
|
|
4097
|
+
class _Timeout:
|
|
4098
|
+
def __init__(
|
|
4099
|
+
self,
|
|
4100
|
+
function: Callable[..., _TimeoutFuncRetType],
|
|
4101
|
+
seconds: float,
|
|
4102
|
+
error_message: str,
|
|
4103
|
+
) -> None:
|
|
4104
|
+
self.seconds = seconds
|
|
4105
|
+
self.control: queue.Queue[
|
|
4106
|
+
Tuple[bool, Union[_TimeoutFuncRetType, Tuple]]
|
|
4107
|
+
] = queue.Queue()
|
|
4108
|
+
self.function = function
|
|
4109
|
+
self.error_message = error_message
|
|
4110
|
+
|
|
4111
|
+
def run(self, *args: Any, **kwargs: Any) -> None:
|
|
4112
|
+
# Try and put the result of the function into the q
|
|
4113
|
+
# if an exception occurs then we put the exc_info instead
|
|
4114
|
+
# so that it can be raised in the main thread.
|
|
4115
|
+
try:
|
|
4116
|
+
self.control.put((True, self.function(*args, **kwargs)))
|
|
4117
|
+
except Exception:
|
|
4118
|
+
self.control.put((False, sys.exc_info()))
|
|
4119
|
+
|
|
4120
|
+
def __call__(self, *args: Any, **kwargs: Any) -> _TimeoutFuncRetType:
|
|
4121
|
+
self.func_thread = threading.Thread(target=self.run, args=args, kwargs=kwargs)
|
|
4122
|
+
self.func_thread.daemon = True
|
|
4123
|
+
self.timeout = self.seconds + time.time()
|
|
4124
|
+
self.func_thread.start()
|
|
4125
|
+
return self.get_and_raise()
|
|
4126
|
+
|
|
4127
|
+
def get_and_raise(self) -> _TimeoutFuncRetType:
|
|
4128
|
+
while not self.timeout < time.time():
|
|
4129
|
+
time.sleep(0.01)
|
|
4130
|
+
if not self.func_thread.is_alive():
|
|
4131
|
+
ret = self.control.get()
|
|
4132
|
+
if ret[0]:
|
|
4133
|
+
return cast(_TimeoutFuncRetType, ret[1])
|
|
4134
|
+
else:
|
|
4135
|
+
_, e, tb = cast(Tuple, ret[1])
|
|
4136
|
+
raise e.with_traceback(tb)
|
|
4137
|
+
raise TimeoutError(self.error_message)
|
|
4138
|
+
|
|
4139
|
+
|
|
4140
|
+
def suggest_possibilities(
|
|
4141
|
+
word: str, possibilities: Iterable[str], max_suggestions: int = 3
|
|
4142
|
+
) -> str:
|
|
4143
|
+
suggestions = cast(
|
|
4144
|
+
List[str],
|
|
4145
|
+
difflib.get_close_matches(
|
|
4146
|
+
word=word, possibilities=set(possibilities), n=max_suggestions
|
|
4147
|
+
),
|
|
4148
|
+
)
|
|
4149
|
+
if len(suggestions) == 1:
|
|
4150
|
+
return f"\nDid you mean: {suggestions[0]}?"
|
|
4151
|
+
elif len(suggestions) >= 1:
|
|
4152
|
+
return f"\nDid you mean one of: {', '.join(suggestions)}?"
|
|
4153
|
+
else:
|
|
4154
|
+
return ""
|
|
4155
|
+
|
|
4156
|
+
|
|
4157
|
+
def list_services(soa_dir: str = DEFAULT_SOA_DIR) -> Sequence[str]:
|
|
4158
|
+
"""Returns a sorted list of all services"""
|
|
4159
|
+
return sorted(os.listdir(os.path.abspath(soa_dir)))
|
|
4160
|
+
|
|
4161
|
+
|
|
4162
|
+
def get_possible_launched_by_user_variable_from_env() -> str:
|
|
4163
|
+
return os.getenv("SUDO_USER") or getpass.getuser()
|
|
4164
|
+
|
|
4165
|
+
|
|
4166
|
+
def load_all_configs(
|
|
4167
|
+
cluster: str, file_prefix: str, soa_dir: str
|
|
4168
|
+
) -> Mapping[str, Mapping[str, Any]]:
|
|
4169
|
+
config_dicts = {}
|
|
4170
|
+
for service in os.listdir(soa_dir):
|
|
4171
|
+
config_dicts[service] = load_service_instance_configs(
|
|
4172
|
+
service, file_prefix, cluster, soa_dir
|
|
4173
|
+
)
|
|
4174
|
+
return config_dicts
|
|
4175
|
+
|
|
4176
|
+
|
|
4177
|
+
def ldap_user_search(
|
|
4178
|
+
cn: str,
|
|
4179
|
+
search_base: str,
|
|
4180
|
+
search_ou: str,
|
|
4181
|
+
ldap_host: str,
|
|
4182
|
+
username: str,
|
|
4183
|
+
password: str,
|
|
4184
|
+
) -> Set[str]:
|
|
4185
|
+
"""Connects to LDAP and raises a subclass of LDAPOperationResult when it fails"""
|
|
4186
|
+
tls_config = ldap3.Tls(
|
|
4187
|
+
validate=ssl.CERT_REQUIRED, ca_certs_file="/etc/ssl/certs/ca-certificates.crt"
|
|
4188
|
+
)
|
|
4189
|
+
server = ldap3.Server(ldap_host, use_ssl=True, tls=tls_config)
|
|
4190
|
+
conn = ldap3.Connection(
|
|
4191
|
+
server, user=username, password=password, raise_exceptions=True
|
|
4192
|
+
)
|
|
4193
|
+
conn.bind()
|
|
4194
|
+
|
|
4195
|
+
search_filter = f"(&(memberOf=CN={cn},{search_ou})(!(userAccountControl=514)))"
|
|
4196
|
+
entries = conn.extend.standard.paged_search(
|
|
4197
|
+
search_base=search_base,
|
|
4198
|
+
search_scope=ldap3.SUBTREE,
|
|
4199
|
+
search_filter=search_filter,
|
|
4200
|
+
attributes=["sAMAccountName"],
|
|
4201
|
+
paged_size=1000,
|
|
4202
|
+
time_limit=10,
|
|
4203
|
+
)
|
|
4204
|
+
return {entry["attributes"]["sAMAccountName"] for entry in entries}
|
|
4205
|
+
|
|
4206
|
+
|
|
4207
|
+
def _reorder_docker_volumes(volumes: List[DockerVolume]) -> List[DockerVolume]:
|
|
4208
|
+
deduped = {
|
|
4209
|
+
v["containerPath"].rstrip("/") + v["hostPath"].rstrip("/"): v for v in volumes
|
|
4210
|
+
}.values()
|
|
4211
|
+
return sort_dicts(deduped)
|
|
4212
|
+
|
|
4213
|
+
|
|
4214
|
+
def get_k8s_url_for_cluster(cluster: str) -> Optional[str]:
|
|
4215
|
+
"""
|
|
4216
|
+
Annoyingly, there's two layers of aliases: one to figure out what
|
|
4217
|
+
k8s server url to use (this one) and another to figure out what
|
|
4218
|
+
soaconfigs filename to use ;_;
|
|
4219
|
+
|
|
4220
|
+
This exists so that we can map something like `--cluster pnw-devc`
|
|
4221
|
+
into spark-pnw-devc's k8s apiserver url without needing to update
|
|
4222
|
+
any soaconfigs/alter folk's muscle memory.
|
|
4223
|
+
|
|
4224
|
+
Ideally we can get rid of this entirely once spark-run reads soaconfigs
|
|
4225
|
+
in a manner more closely aligned to what we do with other paasta workloads
|
|
4226
|
+
(i.e., have it automatically determine where to run based on soaconfigs
|
|
4227
|
+
filenames - and not rely on explicit config)
|
|
4228
|
+
"""
|
|
4229
|
+
realized_cluster = (
|
|
4230
|
+
load_system_paasta_config().get_eks_cluster_aliases().get(cluster, cluster)
|
|
4231
|
+
)
|
|
4232
|
+
return (
|
|
4233
|
+
load_system_paasta_config()
|
|
4234
|
+
.get_kube_clusters()
|
|
4235
|
+
.get(realized_cluster, {})
|
|
4236
|
+
.get("server")
|
|
4237
|
+
)
|
|
4238
|
+
|
|
4239
|
+
|
|
4240
|
+
@lru_cache(maxsize=1)
|
|
4241
|
+
def is_using_unprivileged_containers() -> bool:
|
|
4242
|
+
return "podman" in os.getenv("DOCKER_HOST", "")
|
|
4243
|
+
|
|
4244
|
+
|
|
4245
|
+
def maybe_load_previous_config(
|
|
4246
|
+
filename: str, config_loader: Callable[[TextIO], dict]
|
|
4247
|
+
) -> Optional[dict]:
|
|
4248
|
+
"""Try to load configuration file
|
|
4249
|
+
|
|
4250
|
+
:param str filename: path to load from
|
|
4251
|
+
:param Callable[[TextIO], dict] config_loader: parser for the configuration
|
|
4252
|
+
:return: configuration data, None if loading fails
|
|
4253
|
+
"""
|
|
4254
|
+
try:
|
|
4255
|
+
with open(filename, "r") as fp:
|
|
4256
|
+
previous_config = config_loader(fp)
|
|
4257
|
+
return previous_config
|
|
4258
|
+
except Exception:
|
|
4259
|
+
pass
|
|
4260
|
+
return None
|
|
4261
|
+
|
|
4262
|
+
|
|
4263
|
+
def write_json_configuration_file(filename: str, configuration: dict) -> None:
|
|
4264
|
+
"""Atomically write configuration to JSON file
|
|
4265
|
+
|
|
4266
|
+
:param str filename: path to write to
|
|
4267
|
+
:param dict configuration: configuration data
|
|
4268
|
+
"""
|
|
4269
|
+
with atomic_file_write(filename) as fp:
|
|
4270
|
+
json.dump(
|
|
4271
|
+
obj=configuration,
|
|
4272
|
+
fp=fp,
|
|
4273
|
+
indent=2,
|
|
4274
|
+
sort_keys=True,
|
|
4275
|
+
separators=(",", ": "),
|
|
4276
|
+
)
|
|
4277
|
+
|
|
4278
|
+
|
|
4279
|
+
def write_yaml_configuration_file(
|
|
4280
|
+
filename: str, configuration: dict, check_existing: bool = True
|
|
4281
|
+
) -> None:
|
|
4282
|
+
"""Atomically write configuration to YAML file
|
|
4283
|
+
|
|
4284
|
+
:param str filename: path to write to
|
|
4285
|
+
:param dict configuration: configuration data
|
|
4286
|
+
:param bool check_existing: if existing file already matches config, do not overwrite
|
|
4287
|
+
"""
|
|
4288
|
+
if check_existing:
|
|
4289
|
+
previous_config = maybe_load_previous_config(filename, yaml.safe_load)
|
|
4290
|
+
if previous_config and previous_config == configuration:
|
|
4291
|
+
return
|
|
4292
|
+
|
|
4293
|
+
with atomic_file_write(filename) as fp:
|
|
4294
|
+
fp.write(
|
|
4295
|
+
"# This file is automatically generated by paasta_tools.\n"
|
|
4296
|
+
"# It was automatically generated at {now} on {host}.\n".format(
|
|
4297
|
+
host=socket.getfqdn(), now=datetime.datetime.now().isoformat()
|
|
4298
|
+
)
|
|
4299
|
+
)
|
|
4300
|
+
yaml.safe_dump(
|
|
4301
|
+
configuration,
|
|
4302
|
+
fp,
|
|
4303
|
+
indent=2,
|
|
4304
|
+
explicit_start=True,
|
|
4305
|
+
default_flow_style=False,
|
|
4306
|
+
allow_unicode=False,
|
|
4307
|
+
)
|