paasta-tools 1.21.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- k8s_itests/__init__.py +0 -0
- k8s_itests/test_autoscaling.py +23 -0
- k8s_itests/utils.py +38 -0
- paasta_tools/__init__.py +20 -0
- paasta_tools/adhoc_tools.py +142 -0
- paasta_tools/api/__init__.py +13 -0
- paasta_tools/api/api.py +330 -0
- paasta_tools/api/api_docs/swagger.json +2323 -0
- paasta_tools/api/client.py +106 -0
- paasta_tools/api/settings.py +33 -0
- paasta_tools/api/tweens/__init__.py +6 -0
- paasta_tools/api/tweens/auth.py +125 -0
- paasta_tools/api/tweens/profiling.py +108 -0
- paasta_tools/api/tweens/request_logger.py +124 -0
- paasta_tools/api/views/__init__.py +13 -0
- paasta_tools/api/views/autoscaler.py +100 -0
- paasta_tools/api/views/exception.py +45 -0
- paasta_tools/api/views/flink.py +73 -0
- paasta_tools/api/views/instance.py +395 -0
- paasta_tools/api/views/pause_autoscaler.py +71 -0
- paasta_tools/api/views/remote_run.py +113 -0
- paasta_tools/api/views/resources.py +76 -0
- paasta_tools/api/views/service.py +35 -0
- paasta_tools/api/views/version.py +25 -0
- paasta_tools/apply_external_resources.py +79 -0
- paasta_tools/async_utils.py +109 -0
- paasta_tools/autoscaling/__init__.py +0 -0
- paasta_tools/autoscaling/autoscaling_service_lib.py +57 -0
- paasta_tools/autoscaling/forecasting.py +106 -0
- paasta_tools/autoscaling/max_all_k8s_services.py +41 -0
- paasta_tools/autoscaling/pause_service_autoscaler.py +77 -0
- paasta_tools/autoscaling/utils.py +52 -0
- paasta_tools/bounce_lib.py +184 -0
- paasta_tools/broadcast_log_to_services.py +62 -0
- paasta_tools/cassandracluster_tools.py +210 -0
- paasta_tools/check_autoscaler_max_instances.py +212 -0
- paasta_tools/check_cassandracluster_services_replication.py +35 -0
- paasta_tools/check_flink_services_health.py +203 -0
- paasta_tools/check_kubernetes_api.py +57 -0
- paasta_tools/check_kubernetes_services_replication.py +141 -0
- paasta_tools/check_oom_events.py +244 -0
- paasta_tools/check_services_replication_tools.py +324 -0
- paasta_tools/check_spark_jobs.py +234 -0
- paasta_tools/cleanup_kubernetes_cr.py +138 -0
- paasta_tools/cleanup_kubernetes_crd.py +145 -0
- paasta_tools/cleanup_kubernetes_jobs.py +344 -0
- paasta_tools/cleanup_tron_namespaces.py +96 -0
- paasta_tools/cli/__init__.py +13 -0
- paasta_tools/cli/authentication.py +85 -0
- paasta_tools/cli/cli.py +260 -0
- paasta_tools/cli/cmds/__init__.py +13 -0
- paasta_tools/cli/cmds/autoscale.py +143 -0
- paasta_tools/cli/cmds/check.py +334 -0
- paasta_tools/cli/cmds/cook_image.py +147 -0
- paasta_tools/cli/cmds/get_docker_image.py +76 -0
- paasta_tools/cli/cmds/get_image_version.py +172 -0
- paasta_tools/cli/cmds/get_latest_deployment.py +93 -0
- paasta_tools/cli/cmds/info.py +155 -0
- paasta_tools/cli/cmds/itest.py +117 -0
- paasta_tools/cli/cmds/list.py +66 -0
- paasta_tools/cli/cmds/list_clusters.py +42 -0
- paasta_tools/cli/cmds/list_deploy_queue.py +171 -0
- paasta_tools/cli/cmds/list_namespaces.py +84 -0
- paasta_tools/cli/cmds/local_run.py +1396 -0
- paasta_tools/cli/cmds/logs.py +1601 -0
- paasta_tools/cli/cmds/mark_for_deployment.py +1988 -0
- paasta_tools/cli/cmds/mesh_status.py +174 -0
- paasta_tools/cli/cmds/pause_service_autoscaler.py +107 -0
- paasta_tools/cli/cmds/push_to_registry.py +275 -0
- paasta_tools/cli/cmds/remote_run.py +252 -0
- paasta_tools/cli/cmds/rollback.py +347 -0
- paasta_tools/cli/cmds/secret.py +549 -0
- paasta_tools/cli/cmds/security_check.py +59 -0
- paasta_tools/cli/cmds/spark_run.py +1400 -0
- paasta_tools/cli/cmds/start_stop_restart.py +401 -0
- paasta_tools/cli/cmds/status.py +2302 -0
- paasta_tools/cli/cmds/validate.py +1012 -0
- paasta_tools/cli/cmds/wait_for_deployment.py +275 -0
- paasta_tools/cli/fsm/__init__.py +13 -0
- paasta_tools/cli/fsm/autosuggest.py +82 -0
- paasta_tools/cli/fsm/template/README.md +8 -0
- paasta_tools/cli/fsm/template/cookiecutter.json +7 -0
- paasta_tools/cli/fsm/template/{{cookiecutter.service}}/kubernetes-PROD.yaml +91 -0
- paasta_tools/cli/fsm/template/{{cookiecutter.service}}/monitoring.yaml +20 -0
- paasta_tools/cli/fsm/template/{{cookiecutter.service}}/service.yaml +8 -0
- paasta_tools/cli/fsm/template/{{cookiecutter.service}}/smartstack.yaml +6 -0
- paasta_tools/cli/fsm_cmd.py +121 -0
- paasta_tools/cli/paasta_tabcomplete.sh +23 -0
- paasta_tools/cli/schemas/adhoc_schema.json +199 -0
- paasta_tools/cli/schemas/autoscaling_schema.json +91 -0
- paasta_tools/cli/schemas/autotuned_defaults/cassandracluster_schema.json +37 -0
- paasta_tools/cli/schemas/autotuned_defaults/kubernetes_schema.json +89 -0
- paasta_tools/cli/schemas/deploy_schema.json +173 -0
- paasta_tools/cli/schemas/eks_schema.json +970 -0
- paasta_tools/cli/schemas/kubernetes_schema.json +970 -0
- paasta_tools/cli/schemas/rollback_schema.json +160 -0
- paasta_tools/cli/schemas/service_schema.json +25 -0
- paasta_tools/cli/schemas/smartstack_schema.json +322 -0
- paasta_tools/cli/schemas/tron_schema.json +699 -0
- paasta_tools/cli/utils.py +1118 -0
- paasta_tools/clusterman.py +21 -0
- paasta_tools/config_utils.py +385 -0
- paasta_tools/contrib/__init__.py +0 -0
- paasta_tools/contrib/bounce_log_latency_parser.py +68 -0
- paasta_tools/contrib/check_manual_oapi_changes.sh +24 -0
- paasta_tools/contrib/check_orphans.py +306 -0
- paasta_tools/contrib/create_dynamodb_table.py +35 -0
- paasta_tools/contrib/create_paasta_playground.py +105 -0
- paasta_tools/contrib/emit_allocated_cpu_metrics.py +50 -0
- paasta_tools/contrib/get_running_task_allocation.py +346 -0
- paasta_tools/contrib/habitat_fixer.py +86 -0
- paasta_tools/contrib/ide_helper.py +316 -0
- paasta_tools/contrib/is_pod_healthy_in_proxy.py +139 -0
- paasta_tools/contrib/is_pod_healthy_in_smartstack.py +50 -0
- paasta_tools/contrib/kill_bad_containers.py +109 -0
- paasta_tools/contrib/mass-deploy-tag.sh +44 -0
- paasta_tools/contrib/mock_patch_checker.py +86 -0
- paasta_tools/contrib/paasta_update_soa_memcpu.py +520 -0
- paasta_tools/contrib/render_template.py +129 -0
- paasta_tools/contrib/rightsizer_soaconfigs_update.py +348 -0
- paasta_tools/contrib/service_shard_remove.py +157 -0
- paasta_tools/contrib/service_shard_update.py +373 -0
- paasta_tools/contrib/shared_ip_check.py +77 -0
- paasta_tools/contrib/timeouts_metrics_prom.py +64 -0
- paasta_tools/delete_kubernetes_deployments.py +89 -0
- paasta_tools/deployment_utils.py +44 -0
- paasta_tools/docker_wrapper.py +234 -0
- paasta_tools/docker_wrapper_imports.py +13 -0
- paasta_tools/drain_lib.py +351 -0
- paasta_tools/dump_locally_running_services.py +71 -0
- paasta_tools/eks_tools.py +119 -0
- paasta_tools/envoy_tools.py +373 -0
- paasta_tools/firewall.py +504 -0
- paasta_tools/firewall_logging.py +154 -0
- paasta_tools/firewall_update.py +172 -0
- paasta_tools/flink_tools.py +345 -0
- paasta_tools/flinkeks_tools.py +90 -0
- paasta_tools/frameworks/__init__.py +0 -0
- paasta_tools/frameworks/adhoc_scheduler.py +71 -0
- paasta_tools/frameworks/constraints.py +87 -0
- paasta_tools/frameworks/native_scheduler.py +652 -0
- paasta_tools/frameworks/native_service_config.py +301 -0
- paasta_tools/frameworks/task_store.py +245 -0
- paasta_tools/generate_all_deployments +9 -0
- paasta_tools/generate_authenticating_services.py +94 -0
- paasta_tools/generate_deployments_for_service.py +255 -0
- paasta_tools/generate_services_file.py +114 -0
- paasta_tools/generate_services_yaml.py +30 -0
- paasta_tools/hacheck.py +76 -0
- paasta_tools/instance/__init__.py +0 -0
- paasta_tools/instance/hpa_metrics_parser.py +122 -0
- paasta_tools/instance/kubernetes.py +1362 -0
- paasta_tools/iptables.py +240 -0
- paasta_tools/kafkacluster_tools.py +143 -0
- paasta_tools/kubernetes/__init__.py +0 -0
- paasta_tools/kubernetes/application/__init__.py +0 -0
- paasta_tools/kubernetes/application/controller_wrappers.py +476 -0
- paasta_tools/kubernetes/application/tools.py +90 -0
- paasta_tools/kubernetes/bin/__init__.py +0 -0
- paasta_tools/kubernetes/bin/kubernetes_remove_evicted_pods.py +164 -0
- paasta_tools/kubernetes/bin/paasta_cleanup_remote_run_resources.py +135 -0
- paasta_tools/kubernetes/bin/paasta_cleanup_stale_nodes.py +181 -0
- paasta_tools/kubernetes/bin/paasta_secrets_sync.py +758 -0
- paasta_tools/kubernetes/remote_run.py +558 -0
- paasta_tools/kubernetes_tools.py +4679 -0
- paasta_tools/list_kubernetes_service_instances.py +128 -0
- paasta_tools/list_tron_namespaces.py +60 -0
- paasta_tools/long_running_service_tools.py +678 -0
- paasta_tools/mac_address.py +44 -0
- paasta_tools/marathon_dashboard.py +0 -0
- paasta_tools/mesos/__init__.py +0 -0
- paasta_tools/mesos/cfg.py +46 -0
- paasta_tools/mesos/cluster.py +60 -0
- paasta_tools/mesos/exceptions.py +59 -0
- paasta_tools/mesos/framework.py +77 -0
- paasta_tools/mesos/log.py +48 -0
- paasta_tools/mesos/master.py +306 -0
- paasta_tools/mesos/mesos_file.py +169 -0
- paasta_tools/mesos/parallel.py +52 -0
- paasta_tools/mesos/slave.py +115 -0
- paasta_tools/mesos/task.py +94 -0
- paasta_tools/mesos/util.py +69 -0
- paasta_tools/mesos/zookeeper.py +37 -0
- paasta_tools/mesos_maintenance.py +848 -0
- paasta_tools/mesos_tools.py +1051 -0
- paasta_tools/metrics/__init__.py +0 -0
- paasta_tools/metrics/metastatus_lib.py +1110 -0
- paasta_tools/metrics/metrics_lib.py +217 -0
- paasta_tools/monitoring/__init__.py +13 -0
- paasta_tools/monitoring/check_k8s_api_performance.py +110 -0
- paasta_tools/monitoring_tools.py +652 -0
- paasta_tools/monkrelaycluster_tools.py +146 -0
- paasta_tools/nrtsearchservice_tools.py +143 -0
- paasta_tools/nrtsearchserviceeks_tools.py +68 -0
- paasta_tools/oom_logger.py +321 -0
- paasta_tools/paasta_deploy_tron_jobs +3 -0
- paasta_tools/paasta_execute_docker_command.py +123 -0
- paasta_tools/paasta_native_serviceinit.py +21 -0
- paasta_tools/paasta_service_config_loader.py +201 -0
- paasta_tools/paastaapi/__init__.py +29 -0
- paasta_tools/paastaapi/api/__init__.py +3 -0
- paasta_tools/paastaapi/api/autoscaler_api.py +302 -0
- paasta_tools/paastaapi/api/default_api.py +569 -0
- paasta_tools/paastaapi/api/remote_run_api.py +604 -0
- paasta_tools/paastaapi/api/resources_api.py +157 -0
- paasta_tools/paastaapi/api/service_api.py +1736 -0
- paasta_tools/paastaapi/api_client.py +818 -0
- paasta_tools/paastaapi/apis/__init__.py +22 -0
- paasta_tools/paastaapi/configuration.py +455 -0
- paasta_tools/paastaapi/exceptions.py +137 -0
- paasta_tools/paastaapi/model/__init__.py +5 -0
- paasta_tools/paastaapi/model/adhoc_launch_history.py +176 -0
- paasta_tools/paastaapi/model/autoscaler_count_msg.py +176 -0
- paasta_tools/paastaapi/model/deploy_queue.py +178 -0
- paasta_tools/paastaapi/model/deploy_queue_service_instance.py +194 -0
- paasta_tools/paastaapi/model/envoy_backend.py +185 -0
- paasta_tools/paastaapi/model/envoy_location.py +184 -0
- paasta_tools/paastaapi/model/envoy_status.py +181 -0
- paasta_tools/paastaapi/model/flink_cluster_overview.py +188 -0
- paasta_tools/paastaapi/model/flink_config.py +173 -0
- paasta_tools/paastaapi/model/flink_job.py +186 -0
- paasta_tools/paastaapi/model/flink_job_details.py +192 -0
- paasta_tools/paastaapi/model/flink_jobs.py +175 -0
- paasta_tools/paastaapi/model/float_and_error.py +173 -0
- paasta_tools/paastaapi/model/hpa_metric.py +176 -0
- paasta_tools/paastaapi/model/inline_object.py +170 -0
- paasta_tools/paastaapi/model/inline_response200.py +170 -0
- paasta_tools/paastaapi/model/inline_response2001.py +170 -0
- paasta_tools/paastaapi/model/instance_bounce_status.py +200 -0
- paasta_tools/paastaapi/model/instance_mesh_status.py +186 -0
- paasta_tools/paastaapi/model/instance_status.py +220 -0
- paasta_tools/paastaapi/model/instance_status_adhoc.py +187 -0
- paasta_tools/paastaapi/model/instance_status_cassandracluster.py +173 -0
- paasta_tools/paastaapi/model/instance_status_flink.py +173 -0
- paasta_tools/paastaapi/model/instance_status_kafkacluster.py +173 -0
- paasta_tools/paastaapi/model/instance_status_kubernetes.py +263 -0
- paasta_tools/paastaapi/model/instance_status_kubernetes_autoscaling_status.py +187 -0
- paasta_tools/paastaapi/model/instance_status_kubernetes_v2.py +197 -0
- paasta_tools/paastaapi/model/instance_status_tron.py +204 -0
- paasta_tools/paastaapi/model/instance_tasks.py +182 -0
- paasta_tools/paastaapi/model/integer_and_error.py +173 -0
- paasta_tools/paastaapi/model/kubernetes_container.py +178 -0
- paasta_tools/paastaapi/model/kubernetes_container_v2.py +219 -0
- paasta_tools/paastaapi/model/kubernetes_healthcheck.py +176 -0
- paasta_tools/paastaapi/model/kubernetes_pod.py +201 -0
- paasta_tools/paastaapi/model/kubernetes_pod_event.py +176 -0
- paasta_tools/paastaapi/model/kubernetes_pod_v2.py +213 -0
- paasta_tools/paastaapi/model/kubernetes_replica_set.py +185 -0
- paasta_tools/paastaapi/model/kubernetes_version.py +202 -0
- paasta_tools/paastaapi/model/remote_run_outcome.py +189 -0
- paasta_tools/paastaapi/model/remote_run_start.py +185 -0
- paasta_tools/paastaapi/model/remote_run_stop.py +176 -0
- paasta_tools/paastaapi/model/remote_run_token.py +173 -0
- paasta_tools/paastaapi/model/resource.py +187 -0
- paasta_tools/paastaapi/model/resource_item.py +187 -0
- paasta_tools/paastaapi/model/resource_value.py +176 -0
- paasta_tools/paastaapi/model/smartstack_backend.py +191 -0
- paasta_tools/paastaapi/model/smartstack_location.py +181 -0
- paasta_tools/paastaapi/model/smartstack_status.py +181 -0
- paasta_tools/paastaapi/model/task_tail_lines.py +176 -0
- paasta_tools/paastaapi/model_utils.py +1879 -0
- paasta_tools/paastaapi/models/__init__.py +62 -0
- paasta_tools/paastaapi/rest.py +287 -0
- paasta_tools/prune_completed_pods.py +220 -0
- paasta_tools/puppet_service_tools.py +59 -0
- paasta_tools/py.typed +1 -0
- paasta_tools/remote_git.py +127 -0
- paasta_tools/run-paasta-api-in-dev-mode.py +57 -0
- paasta_tools/run-paasta-api-playground.py +51 -0
- paasta_tools/secret_providers/__init__.py +66 -0
- paasta_tools/secret_providers/vault.py +214 -0
- paasta_tools/secret_tools.py +277 -0
- paasta_tools/setup_istio_mesh.py +353 -0
- paasta_tools/setup_kubernetes_cr.py +412 -0
- paasta_tools/setup_kubernetes_crd.py +138 -0
- paasta_tools/setup_kubernetes_internal_crd.py +154 -0
- paasta_tools/setup_kubernetes_job.py +353 -0
- paasta_tools/setup_prometheus_adapter_config.py +1028 -0
- paasta_tools/setup_tron_namespace.py +248 -0
- paasta_tools/slack.py +75 -0
- paasta_tools/smartstack_tools.py +676 -0
- paasta_tools/spark_tools.py +283 -0
- paasta_tools/synapse_srv_namespaces_fact.py +42 -0
- paasta_tools/tron/__init__.py +0 -0
- paasta_tools/tron/client.py +158 -0
- paasta_tools/tron/tron_command_context.py +194 -0
- paasta_tools/tron/tron_timeutils.py +101 -0
- paasta_tools/tron_tools.py +1448 -0
- paasta_tools/utils.py +4307 -0
- paasta_tools/yaml_tools.py +44 -0
- paasta_tools-1.21.3.data/scripts/apply_external_resources.py +79 -0
- paasta_tools-1.21.3.data/scripts/bounce_log_latency_parser.py +68 -0
- paasta_tools-1.21.3.data/scripts/check_autoscaler_max_instances.py +212 -0
- paasta_tools-1.21.3.data/scripts/check_cassandracluster_services_replication.py +35 -0
- paasta_tools-1.21.3.data/scripts/check_flink_services_health.py +203 -0
- paasta_tools-1.21.3.data/scripts/check_kubernetes_api.py +57 -0
- paasta_tools-1.21.3.data/scripts/check_kubernetes_services_replication.py +141 -0
- paasta_tools-1.21.3.data/scripts/check_manual_oapi_changes.sh +24 -0
- paasta_tools-1.21.3.data/scripts/check_oom_events.py +244 -0
- paasta_tools-1.21.3.data/scripts/check_orphans.py +306 -0
- paasta_tools-1.21.3.data/scripts/check_spark_jobs.py +234 -0
- paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_cr.py +138 -0
- paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_crd.py +145 -0
- paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_jobs.py +344 -0
- paasta_tools-1.21.3.data/scripts/create_dynamodb_table.py +35 -0
- paasta_tools-1.21.3.data/scripts/create_paasta_playground.py +105 -0
- paasta_tools-1.21.3.data/scripts/delete_kubernetes_deployments.py +89 -0
- paasta_tools-1.21.3.data/scripts/emit_allocated_cpu_metrics.py +50 -0
- paasta_tools-1.21.3.data/scripts/generate_all_deployments +9 -0
- paasta_tools-1.21.3.data/scripts/generate_authenticating_services.py +94 -0
- paasta_tools-1.21.3.data/scripts/generate_deployments_for_service.py +255 -0
- paasta_tools-1.21.3.data/scripts/generate_services_file.py +114 -0
- paasta_tools-1.21.3.data/scripts/generate_services_yaml.py +30 -0
- paasta_tools-1.21.3.data/scripts/get_running_task_allocation.py +346 -0
- paasta_tools-1.21.3.data/scripts/habitat_fixer.py +86 -0
- paasta_tools-1.21.3.data/scripts/ide_helper.py +316 -0
- paasta_tools-1.21.3.data/scripts/is_pod_healthy_in_proxy.py +139 -0
- paasta_tools-1.21.3.data/scripts/is_pod_healthy_in_smartstack.py +50 -0
- paasta_tools-1.21.3.data/scripts/kill_bad_containers.py +109 -0
- paasta_tools-1.21.3.data/scripts/kubernetes_remove_evicted_pods.py +164 -0
- paasta_tools-1.21.3.data/scripts/mass-deploy-tag.sh +44 -0
- paasta_tools-1.21.3.data/scripts/mock_patch_checker.py +86 -0
- paasta_tools-1.21.3.data/scripts/paasta_cleanup_remote_run_resources.py +135 -0
- paasta_tools-1.21.3.data/scripts/paasta_cleanup_stale_nodes.py +181 -0
- paasta_tools-1.21.3.data/scripts/paasta_deploy_tron_jobs +3 -0
- paasta_tools-1.21.3.data/scripts/paasta_execute_docker_command.py +123 -0
- paasta_tools-1.21.3.data/scripts/paasta_secrets_sync.py +758 -0
- paasta_tools-1.21.3.data/scripts/paasta_tabcomplete.sh +23 -0
- paasta_tools-1.21.3.data/scripts/paasta_update_soa_memcpu.py +520 -0
- paasta_tools-1.21.3.data/scripts/render_template.py +129 -0
- paasta_tools-1.21.3.data/scripts/rightsizer_soaconfigs_update.py +348 -0
- paasta_tools-1.21.3.data/scripts/service_shard_remove.py +157 -0
- paasta_tools-1.21.3.data/scripts/service_shard_update.py +373 -0
- paasta_tools-1.21.3.data/scripts/setup_istio_mesh.py +353 -0
- paasta_tools-1.21.3.data/scripts/setup_kubernetes_cr.py +412 -0
- paasta_tools-1.21.3.data/scripts/setup_kubernetes_crd.py +138 -0
- paasta_tools-1.21.3.data/scripts/setup_kubernetes_internal_crd.py +154 -0
- paasta_tools-1.21.3.data/scripts/setup_kubernetes_job.py +353 -0
- paasta_tools-1.21.3.data/scripts/setup_prometheus_adapter_config.py +1028 -0
- paasta_tools-1.21.3.data/scripts/shared_ip_check.py +77 -0
- paasta_tools-1.21.3.data/scripts/synapse_srv_namespaces_fact.py +42 -0
- paasta_tools-1.21.3.data/scripts/timeouts_metrics_prom.py +64 -0
- paasta_tools-1.21.3.dist-info/LICENSE +201 -0
- paasta_tools-1.21.3.dist-info/METADATA +74 -0
- paasta_tools-1.21.3.dist-info/RECORD +348 -0
- paasta_tools-1.21.3.dist-info/WHEEL +5 -0
- paasta_tools-1.21.3.dist-info/entry_points.txt +20 -0
- paasta_tools-1.21.3.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
# Copyright 2015-2019 Yelp Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
import argparse
|
|
15
|
+
import logging
|
|
16
|
+
import sys
|
|
17
|
+
from multiprocessing import Pool
|
|
18
|
+
from os import cpu_count
|
|
19
|
+
from typing import Any
|
|
20
|
+
from typing import Callable
|
|
21
|
+
from typing import cast
|
|
22
|
+
from typing import Dict
|
|
23
|
+
from typing import List
|
|
24
|
+
from typing import Optional
|
|
25
|
+
from typing import Sequence
|
|
26
|
+
from typing import Set
|
|
27
|
+
from typing import Tuple
|
|
28
|
+
from typing import Type
|
|
29
|
+
|
|
30
|
+
from mypy_extensions import Arg
|
|
31
|
+
from mypy_extensions import NamedArg
|
|
32
|
+
|
|
33
|
+
from paasta_tools.kubernetes_tools import get_all_managed_namespaces
|
|
34
|
+
from paasta_tools.kubernetes_tools import get_all_nodes
|
|
35
|
+
from paasta_tools.kubernetes_tools import get_all_pods
|
|
36
|
+
from paasta_tools.kubernetes_tools import group_pods_by_service_instance
|
|
37
|
+
from paasta_tools.kubernetes_tools import KubeClient
|
|
38
|
+
from paasta_tools.kubernetes_tools import V1Node
|
|
39
|
+
from paasta_tools.kubernetes_tools import V1Pod
|
|
40
|
+
from paasta_tools.metrics import metrics_lib
|
|
41
|
+
from paasta_tools.monitoring_tools import ReplicationChecker
|
|
42
|
+
from paasta_tools.paasta_service_config_loader import PaastaServiceConfigLoader
|
|
43
|
+
from paasta_tools.smartstack_tools import KubeSmartstackEnvoyReplicationChecker
|
|
44
|
+
from paasta_tools.utils import DEFAULT_SOA_DIR
|
|
45
|
+
from paasta_tools.utils import InstanceConfig_T
|
|
46
|
+
from paasta_tools.utils import list_services
|
|
47
|
+
from paasta_tools.utils import load_system_paasta_config
|
|
48
|
+
from paasta_tools.utils import SPACER
|
|
49
|
+
|
|
50
|
+
try:
|
|
51
|
+
import yelp_meteorite
|
|
52
|
+
except ImportError:
|
|
53
|
+
yelp_meteorite = None
|
|
54
|
+
|
|
55
|
+
log = logging.getLogger(__name__)
|
|
56
|
+
|
|
57
|
+
CheckServiceReplication = Callable[
|
|
58
|
+
[
|
|
59
|
+
Arg(InstanceConfig_T, "instance_config"),
|
|
60
|
+
Arg(Dict[str, Dict[str, List[V1Pod]]], "pods_by_service_instance"),
|
|
61
|
+
Arg(Any, "replication_checker"),
|
|
62
|
+
NamedArg(bool, "dry_run"),
|
|
63
|
+
],
|
|
64
|
+
Optional[bool],
|
|
65
|
+
]
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def parse_args() -> argparse.Namespace:
|
|
69
|
+
parser = argparse.ArgumentParser()
|
|
70
|
+
parser.add_argument(
|
|
71
|
+
"-d",
|
|
72
|
+
"--soa-dir",
|
|
73
|
+
dest="soa_dir",
|
|
74
|
+
metavar="SOA_DIR",
|
|
75
|
+
default=DEFAULT_SOA_DIR,
|
|
76
|
+
help="define a different soa config directory",
|
|
77
|
+
)
|
|
78
|
+
parser.add_argument(
|
|
79
|
+
"--crit",
|
|
80
|
+
dest="under_replicated_crit_pct",
|
|
81
|
+
type=float,
|
|
82
|
+
default=10,
|
|
83
|
+
help="The percentage of under replicated service instances past which "
|
|
84
|
+
"the script will return a critical status",
|
|
85
|
+
)
|
|
86
|
+
parser.add_argument(
|
|
87
|
+
"--min-count-critical",
|
|
88
|
+
dest="min_count_critical",
|
|
89
|
+
type=int,
|
|
90
|
+
default=5,
|
|
91
|
+
help="The script will not return a critical status if the number of "
|
|
92
|
+
"under replicated service instances is below this number, even if the "
|
|
93
|
+
"percentage is above the critical percentage.",
|
|
94
|
+
)
|
|
95
|
+
parser.add_argument(
|
|
96
|
+
"service_instance_list",
|
|
97
|
+
nargs="*",
|
|
98
|
+
help="The list of service instances to check",
|
|
99
|
+
metavar="SERVICE%sINSTANCE" % SPACER,
|
|
100
|
+
)
|
|
101
|
+
parser.add_argument(
|
|
102
|
+
"-v", "--verbose", action="store_true", dest="verbose", default=False
|
|
103
|
+
)
|
|
104
|
+
parser.add_argument(
|
|
105
|
+
"--dry-run",
|
|
106
|
+
action="store_true",
|
|
107
|
+
dest="dry_run",
|
|
108
|
+
help="Print Sensu alert events and metrics instead of sending them",
|
|
109
|
+
)
|
|
110
|
+
parser.add_argument(
|
|
111
|
+
"--eks",
|
|
112
|
+
help="This flag checks k8 services running on EKS",
|
|
113
|
+
dest="eks",
|
|
114
|
+
action="store_true",
|
|
115
|
+
default=False,
|
|
116
|
+
)
|
|
117
|
+
options = parser.parse_args()
|
|
118
|
+
|
|
119
|
+
return options
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def check_services_replication(
|
|
123
|
+
soa_dir: str,
|
|
124
|
+
cluster: str,
|
|
125
|
+
service_instances: Sequence[str],
|
|
126
|
+
instance_type_class: Type[InstanceConfig_T],
|
|
127
|
+
check_service_replication: CheckServiceReplication,
|
|
128
|
+
replication_checker: ReplicationChecker,
|
|
129
|
+
pods_by_service_instance: Dict[str, Dict[str, List[V1Pod]]],
|
|
130
|
+
dry_run: bool = False,
|
|
131
|
+
) -> Tuple[int, int]:
|
|
132
|
+
service_instances_set = set(service_instances)
|
|
133
|
+
replication_statuses: List[bool] = []
|
|
134
|
+
|
|
135
|
+
for service in list_services(soa_dir=soa_dir):
|
|
136
|
+
service_config = PaastaServiceConfigLoader(service=service, soa_dir=soa_dir)
|
|
137
|
+
for instance_config in service_config.instance_configs(
|
|
138
|
+
cluster=cluster, instance_type_class=instance_type_class
|
|
139
|
+
):
|
|
140
|
+
if (
|
|
141
|
+
service_instances_set
|
|
142
|
+
and f"{service}{SPACER}{instance_config.instance}"
|
|
143
|
+
not in service_instances_set
|
|
144
|
+
):
|
|
145
|
+
continue
|
|
146
|
+
if instance_config.get_docker_image():
|
|
147
|
+
is_well_replicated = check_service_replication(
|
|
148
|
+
instance_config=instance_config,
|
|
149
|
+
pods_by_service_instance=pods_by_service_instance,
|
|
150
|
+
replication_checker=replication_checker,
|
|
151
|
+
dry_run=dry_run,
|
|
152
|
+
)
|
|
153
|
+
if is_well_replicated is not None:
|
|
154
|
+
replication_statuses.append(is_well_replicated)
|
|
155
|
+
|
|
156
|
+
else:
|
|
157
|
+
log.debug(
|
|
158
|
+
"%s is not deployed. Skipping replication monitoring."
|
|
159
|
+
% instance_config.job_id
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
num_under_replicated = len(
|
|
163
|
+
[status for status in replication_statuses if status is False]
|
|
164
|
+
)
|
|
165
|
+
return num_under_replicated, len(replication_statuses)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def emit_cluster_replication_metrics(
|
|
169
|
+
pct_under_replicated: float,
|
|
170
|
+
cluster: str,
|
|
171
|
+
scheduler: str,
|
|
172
|
+
dry_run: bool = False,
|
|
173
|
+
) -> None:
|
|
174
|
+
metric_name = "paasta.pct_services_under_replicated"
|
|
175
|
+
if dry_run:
|
|
176
|
+
print(f"Would've sent value {pct_under_replicated} for metric '{metric_name}'")
|
|
177
|
+
else:
|
|
178
|
+
meteorite_dims = {"paasta_cluster": cluster, "scheduler": scheduler}
|
|
179
|
+
gauge = yelp_meteorite.create_gauge(metric_name, meteorite_dims)
|
|
180
|
+
gauge.set(pct_under_replicated)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def main(
|
|
184
|
+
instance_type_class: Type[InstanceConfig_T],
|
|
185
|
+
check_service_replication: CheckServiceReplication,
|
|
186
|
+
namespace: str = None,
|
|
187
|
+
) -> None:
|
|
188
|
+
args = parse_args()
|
|
189
|
+
if args.verbose:
|
|
190
|
+
logging.basicConfig(level=logging.DEBUG)
|
|
191
|
+
else:
|
|
192
|
+
logging.basicConfig(level=logging.WARNING)
|
|
193
|
+
|
|
194
|
+
system_paasta_config = load_system_paasta_config()
|
|
195
|
+
cluster = system_paasta_config.get_cluster()
|
|
196
|
+
replication_checker: ReplicationChecker
|
|
197
|
+
|
|
198
|
+
timer = metrics_lib.system_timer(dimensions=dict(eks=args.eks, cluster=cluster))
|
|
199
|
+
|
|
200
|
+
timer.start()
|
|
201
|
+
|
|
202
|
+
if namespace:
|
|
203
|
+
pods, nodes = get_kubernetes_pods_and_nodes(namespace=namespace)
|
|
204
|
+
replication_checker = KubeSmartstackEnvoyReplicationChecker(
|
|
205
|
+
nodes=nodes,
|
|
206
|
+
system_paasta_config=system_paasta_config,
|
|
207
|
+
)
|
|
208
|
+
else:
|
|
209
|
+
pods, nodes = get_kubernetes_pods_and_nodes()
|
|
210
|
+
replication_checker = KubeSmartstackEnvoyReplicationChecker(
|
|
211
|
+
nodes=nodes,
|
|
212
|
+
system_paasta_config=system_paasta_config,
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
pods_by_service_instance = group_pods_by_service_instance(pods)
|
|
216
|
+
|
|
217
|
+
count_under_replicated, total = check_services_replication(
|
|
218
|
+
soa_dir=args.soa_dir,
|
|
219
|
+
cluster=cluster,
|
|
220
|
+
service_instances=args.service_instance_list,
|
|
221
|
+
instance_type_class=instance_type_class,
|
|
222
|
+
check_service_replication=check_service_replication,
|
|
223
|
+
replication_checker=replication_checker,
|
|
224
|
+
pods_by_service_instance=pods_by_service_instance,
|
|
225
|
+
dry_run=args.dry_run,
|
|
226
|
+
)
|
|
227
|
+
pct_under_replicated = 0 if total == 0 else 100 * count_under_replicated / total
|
|
228
|
+
if yelp_meteorite is not None:
|
|
229
|
+
emit_cluster_replication_metrics(
|
|
230
|
+
pct_under_replicated,
|
|
231
|
+
cluster,
|
|
232
|
+
scheduler="kubernetes",
|
|
233
|
+
dry_run=args.dry_run,
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
exit_code = 0
|
|
237
|
+
if (
|
|
238
|
+
pct_under_replicated >= args.under_replicated_crit_pct
|
|
239
|
+
and count_under_replicated >= args.min_count_critical
|
|
240
|
+
):
|
|
241
|
+
log.critical(
|
|
242
|
+
f"{pct_under_replicated}% of instances ({count_under_replicated}/{total}) "
|
|
243
|
+
f"are under replicated (past {args.under_replicated_crit_pct} is critical)!"
|
|
244
|
+
)
|
|
245
|
+
exit_code = 2
|
|
246
|
+
|
|
247
|
+
timer.stop(tmp_dimensions={"result": exit_code})
|
|
248
|
+
logging.info(
|
|
249
|
+
f"Stopping timer for {cluster} (eks={args.eks}) with result {exit_code}: {timer()}ms elapsed"
|
|
250
|
+
)
|
|
251
|
+
sys.exit(exit_code)
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
# XXX: is there a base class for the k8s clientlib models that we could use to type `obj`?
|
|
255
|
+
def set_local_vars_configuration_to_none(obj: Any, visited: Set[int] = None) -> None:
|
|
256
|
+
"""
|
|
257
|
+
Recursive function to ensure that k8s clientlib objects are pickleable.
|
|
258
|
+
|
|
259
|
+
Without this, k8s clientlib objects can't be used by multiprocessing functions
|
|
260
|
+
as those pickle data to shuttle between processes.
|
|
261
|
+
"""
|
|
262
|
+
if visited is None:
|
|
263
|
+
visited = set()
|
|
264
|
+
|
|
265
|
+
# Avoid infinite recursion for objects that have already been visited
|
|
266
|
+
obj_id = id(obj)
|
|
267
|
+
if obj_id in visited:
|
|
268
|
+
return
|
|
269
|
+
visited.add(obj_id)
|
|
270
|
+
|
|
271
|
+
# if the object has the attribute, set it to None to essentially delete it
|
|
272
|
+
if hasattr(obj, "local_vars_configuration"):
|
|
273
|
+
setattr(obj, "local_vars_configuration", None)
|
|
274
|
+
|
|
275
|
+
# recursively check attributes of the object
|
|
276
|
+
if hasattr(obj, "__dict__"):
|
|
277
|
+
for attr_name, attr_value in obj.__dict__.items():
|
|
278
|
+
set_local_vars_configuration_to_none(attr_value, visited)
|
|
279
|
+
|
|
280
|
+
# if the object is iterable/a collection, iterate over its elements
|
|
281
|
+
elif isinstance(obj, (list, tuple, set)):
|
|
282
|
+
for item in obj:
|
|
283
|
+
set_local_vars_configuration_to_none(item, visited)
|
|
284
|
+
elif isinstance(obj, dict):
|
|
285
|
+
for value in obj.values():
|
|
286
|
+
set_local_vars_configuration_to_none(value, visited)
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def __fetch_pods(namespace: str) -> List[V1Pod]:
|
|
290
|
+
kube_client = KubeClient()
|
|
291
|
+
pods = get_all_pods(kube_client, namespace)
|
|
292
|
+
for pod in pods:
|
|
293
|
+
# this is pretty silly, but V1Pod cannot be pickled otherwise since the local_vars_configuration member
|
|
294
|
+
# is not picklable - and pretty much every k8s model has this member ;_;
|
|
295
|
+
set_local_vars_configuration_to_none(pod)
|
|
296
|
+
return pods
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def __get_all_pods_parallel(from_namespaces: Set[str]) -> List[V1Pod]:
|
|
300
|
+
all_pods: List[V1Pod] = []
|
|
301
|
+
with Pool() as pool:
|
|
302
|
+
for pod_list in pool.imap_unordered(
|
|
303
|
+
__fetch_pods,
|
|
304
|
+
from_namespaces,
|
|
305
|
+
chunksize=len(from_namespaces) // cast(int, cpu_count()),
|
|
306
|
+
):
|
|
307
|
+
all_pods.extend(pod_list)
|
|
308
|
+
return all_pods
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def get_kubernetes_pods_and_nodes(
|
|
312
|
+
namespace: Optional[str] = None,
|
|
313
|
+
) -> Tuple[List[V1Pod], List[V1Node]]:
|
|
314
|
+
kube_client = KubeClient()
|
|
315
|
+
|
|
316
|
+
if namespace:
|
|
317
|
+
all_pods = get_all_pods(kube_client=kube_client, namespace=namespace)
|
|
318
|
+
else:
|
|
319
|
+
all_managed_namespaces = set(get_all_managed_namespaces(kube_client))
|
|
320
|
+
all_pods = __get_all_pods_parallel(all_managed_namespaces)
|
|
321
|
+
|
|
322
|
+
all_nodes = get_all_nodes(kube_client)
|
|
323
|
+
|
|
324
|
+
return all_pods, all_nodes
|
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
import argparse
|
|
3
|
+
import datetime
|
|
4
|
+
import logging
|
|
5
|
+
import smtplib
|
|
6
|
+
import sys
|
|
7
|
+
from collections import defaultdict
|
|
8
|
+
from email.message import EmailMessage
|
|
9
|
+
from socket import getfqdn
|
|
10
|
+
|
|
11
|
+
import pysensu_yelp
|
|
12
|
+
import requests
|
|
13
|
+
|
|
14
|
+
from paasta_tools import mesos_tools
|
|
15
|
+
from paasta_tools.monitoring_tools import send_event
|
|
16
|
+
from paasta_tools.utils import DEFAULT_SOA_DIR
|
|
17
|
+
from paasta_tools.utils import list_services
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
email_from_address = f"paasta@{getfqdn()}"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
JUPYTER_PREFIX = "jupyterhub_"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def parse_args():
|
|
28
|
+
parser = argparse.ArgumentParser(
|
|
29
|
+
description="Reports long-running Spark frameworks."
|
|
30
|
+
)
|
|
31
|
+
parser.add_argument(
|
|
32
|
+
"--min-hours",
|
|
33
|
+
type=float,
|
|
34
|
+
help="Report frameworks that have been registered for more than this duration",
|
|
35
|
+
default=0,
|
|
36
|
+
)
|
|
37
|
+
parser.add_argument(
|
|
38
|
+
"--no-notify",
|
|
39
|
+
action="store_true",
|
|
40
|
+
help="Skip notifying the teams that own each framework",
|
|
41
|
+
)
|
|
42
|
+
parser.add_argument(
|
|
43
|
+
"--email-domain", default=None, help="Email domain for notifying users"
|
|
44
|
+
)
|
|
45
|
+
return parser.parse_args()
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def get_time_running(framework):
|
|
49
|
+
registered_time = datetime.datetime.fromtimestamp(framework["registered_time"])
|
|
50
|
+
return datetime.datetime.now() - registered_time
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def get_spark_properties(framework):
|
|
54
|
+
webui_url = framework.get("webui_url")
|
|
55
|
+
if not webui_url:
|
|
56
|
+
return None
|
|
57
|
+
|
|
58
|
+
env_endpoint = f"{webui_url}/api/v1/applications/{framework.id}/environment"
|
|
59
|
+
try:
|
|
60
|
+
response = requests.get(env_endpoint, timeout=5)
|
|
61
|
+
except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
|
|
62
|
+
logger.warning(f"Unable to connect to {env_endpoint}: {e!r}")
|
|
63
|
+
return None
|
|
64
|
+
|
|
65
|
+
if response.status_code != 200:
|
|
66
|
+
logger.warning(f"Bad response from {env_endpoint}: {response.status_code}")
|
|
67
|
+
return None
|
|
68
|
+
|
|
69
|
+
try:
|
|
70
|
+
return response.json()["sparkProperties"]
|
|
71
|
+
except (ValueError, KeyError):
|
|
72
|
+
logger.warning(
|
|
73
|
+
f"Unable to get sparkProperties for {framework.id}: got response {response.text}"
|
|
74
|
+
)
|
|
75
|
+
return None
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def guess_service(properties):
|
|
79
|
+
if not properties:
|
|
80
|
+
return None
|
|
81
|
+
for key, value in properties:
|
|
82
|
+
if key == "spark.executorEnv.PAASTA_SERVICE":
|
|
83
|
+
service = value
|
|
84
|
+
break
|
|
85
|
+
else:
|
|
86
|
+
return None
|
|
87
|
+
if service.startswith(JUPYTER_PREFIX):
|
|
88
|
+
return service[len(JUPYTER_PREFIX) :]
|
|
89
|
+
else:
|
|
90
|
+
return service
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def get_matching_framework_info(min_hours):
|
|
94
|
+
frameworks = mesos_tools.get_all_frameworks(active_only=True)
|
|
95
|
+
matching_info = []
|
|
96
|
+
min_timedelta = datetime.timedelta(hours=min_hours)
|
|
97
|
+
for framework in frameworks:
|
|
98
|
+
if not framework.active:
|
|
99
|
+
continue
|
|
100
|
+
if framework.get("principal") != "spark":
|
|
101
|
+
continue
|
|
102
|
+
time_running = get_time_running(framework)
|
|
103
|
+
if time_running >= min_timedelta:
|
|
104
|
+
info = {
|
|
105
|
+
"id": framework.id,
|
|
106
|
+
"name": framework.name,
|
|
107
|
+
"webui_url": framework.get("webui_url"),
|
|
108
|
+
"service": guess_service(get_spark_properties(framework)),
|
|
109
|
+
"user": framework.user,
|
|
110
|
+
"time_running": str(time_running),
|
|
111
|
+
}
|
|
112
|
+
matching_info.append(info)
|
|
113
|
+
|
|
114
|
+
return matching_info
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def format_framework(info):
|
|
118
|
+
result = [f'{info["name"]} (running for {info["time_running"]})']
|
|
119
|
+
result.append(f' user: {info["user"]}')
|
|
120
|
+
result.append(f' job UI: {info["webui_url"]}')
|
|
121
|
+
return "\n".join(result)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def format_message_for_service(service, frameworks):
|
|
125
|
+
output = f"Found the following long-running Spark frameworks associated with service {service}.\n"
|
|
126
|
+
output += (
|
|
127
|
+
f"Please check why they are still running and terminate if appropriate.\n\n"
|
|
128
|
+
)
|
|
129
|
+
output += "\n".join(format_framework(f) for f in frameworks)
|
|
130
|
+
return output
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def get_messages_by_service(frameworks):
|
|
134
|
+
frameworks_by_service = defaultdict(list)
|
|
135
|
+
for framework in frameworks:
|
|
136
|
+
service = framework["service"]
|
|
137
|
+
frameworks_by_service[service].append(framework)
|
|
138
|
+
|
|
139
|
+
return {
|
|
140
|
+
service: format_message_for_service(service, frameworks)
|
|
141
|
+
for service, frameworks in frameworks_by_service.items()
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def update_check_status(service, output, status):
|
|
146
|
+
overrides = {
|
|
147
|
+
"page": False,
|
|
148
|
+
"alert_after": 0,
|
|
149
|
+
"tip": "Ask the user to check the job UI and terminate the job if appropriate.",
|
|
150
|
+
"runbook": "http://y/spark-debug",
|
|
151
|
+
"ticket": True,
|
|
152
|
+
}
|
|
153
|
+
send_event(
|
|
154
|
+
service=service,
|
|
155
|
+
check_name=f"long_running_spark_jobs.{service}",
|
|
156
|
+
overrides=overrides,
|
|
157
|
+
status=status,
|
|
158
|
+
output=output,
|
|
159
|
+
soa_dir=DEFAULT_SOA_DIR,
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def email_user(framework_info, email_domain):
|
|
164
|
+
guessed_user = None
|
|
165
|
+
if framework_info["user"] != "root":
|
|
166
|
+
guessed_user = framework_info["user"]
|
|
167
|
+
elif framework_info["name"].startswith(JUPYTER_PREFIX):
|
|
168
|
+
try:
|
|
169
|
+
# the job format is now `<AppName>_<UserName>_<UIPort>_<StartTime>`
|
|
170
|
+
guessed_user = framework_info["name"].split("_")[-3]
|
|
171
|
+
except IndexError:
|
|
172
|
+
pass
|
|
173
|
+
|
|
174
|
+
if guessed_user:
|
|
175
|
+
print(
|
|
176
|
+
f'Guessed {framework_info["name"]} belongs to {guessed_user}, sending email'
|
|
177
|
+
)
|
|
178
|
+
else:
|
|
179
|
+
print(f"Could not guess user from {framework_info}, skipping user email")
|
|
180
|
+
return
|
|
181
|
+
|
|
182
|
+
msg = EmailMessage()
|
|
183
|
+
msg["From"] = email_from_address
|
|
184
|
+
msg["To"] = f"{guessed_user}@{email_domain}"
|
|
185
|
+
msg["Subject"] = f'Long-running Spark framework {framework_info["name"]}'
|
|
186
|
+
content = "Please check why it is still running and terminate if appropriate.\n"
|
|
187
|
+
content += format_framework(framework_info)
|
|
188
|
+
msg.set_content(content)
|
|
189
|
+
with smtplib.SMTP("localhost") as s:
|
|
190
|
+
s.send_message(msg)
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def report_spark_jobs(min_hours, no_notify, email_domain=None):
|
|
194
|
+
frameworks = get_matching_framework_info(min_hours=min_hours)
|
|
195
|
+
messages_by_service = get_messages_by_service(frameworks)
|
|
196
|
+
valid_services = set(list_services())
|
|
197
|
+
|
|
198
|
+
messages_for_unknown_services = []
|
|
199
|
+
for service, message in messages_by_service.items():
|
|
200
|
+
if service in valid_services:
|
|
201
|
+
print(f"{message}\n")
|
|
202
|
+
else:
|
|
203
|
+
messages_for_unknown_services.append(message)
|
|
204
|
+
if messages_for_unknown_services:
|
|
205
|
+
print("\nINVALID SERVICES")
|
|
206
|
+
print("----------------")
|
|
207
|
+
print(
|
|
208
|
+
"The following frameworks are associated with services that are not configured in PaaSTA.\n"
|
|
209
|
+
)
|
|
210
|
+
print("\n\n".join(messages_for_unknown_services))
|
|
211
|
+
|
|
212
|
+
if not no_notify:
|
|
213
|
+
for service in valid_services:
|
|
214
|
+
if service in messages_by_service:
|
|
215
|
+
update_check_status(service, message, pysensu_yelp.Status.WARNING)
|
|
216
|
+
else:
|
|
217
|
+
update_check_status(
|
|
218
|
+
service, "No long running spark jobs", pysensu_yelp.Status.OK
|
|
219
|
+
)
|
|
220
|
+
if email_domain:
|
|
221
|
+
for framework in frameworks:
|
|
222
|
+
email_user(framework, email_domain)
|
|
223
|
+
|
|
224
|
+
return 0 if len(frameworks) == 0 else 1
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def main():
|
|
228
|
+
args = parse_args()
|
|
229
|
+
logging.basicConfig()
|
|
230
|
+
return report_spark_jobs(args.min_hours, args.no_notify, args.email_domain)
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
if __name__ == "__main__":
|
|
234
|
+
sys.exit(main())
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# Copyright 2015-2018 Yelp Inc.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
"""
|
|
16
|
+
Usage: ./cleanup_kubernetes_cr.py [options]
|
|
17
|
+
|
|
18
|
+
Command line options:
|
|
19
|
+
|
|
20
|
+
- -d <SOA_DIR>, --soa-dir <SOA_DIR>: Specify a SOA config dir to read from
|
|
21
|
+
- -v, --verbose: Verbose output
|
|
22
|
+
"""
|
|
23
|
+
import argparse
|
|
24
|
+
import logging
|
|
25
|
+
import sys
|
|
26
|
+
from typing import Sequence
|
|
27
|
+
|
|
28
|
+
from paasta_tools.kubernetes_tools import CustomResourceDefinition
|
|
29
|
+
from paasta_tools.kubernetes_tools import delete_custom_resource
|
|
30
|
+
from paasta_tools.kubernetes_tools import KubeClient
|
|
31
|
+
from paasta_tools.kubernetes_tools import list_custom_resources
|
|
32
|
+
from paasta_tools.kubernetes_tools import load_custom_resource_definitions
|
|
33
|
+
from paasta_tools.kubernetes_tools import paasta_prefixed
|
|
34
|
+
from paasta_tools.utils import DEFAULT_SOA_DIR
|
|
35
|
+
from paasta_tools.utils import load_all_configs
|
|
36
|
+
from paasta_tools.utils import load_system_paasta_config
|
|
37
|
+
|
|
38
|
+
log = logging.getLogger(__name__)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def parse_args() -> argparse.Namespace:
|
|
42
|
+
parser = argparse.ArgumentParser(description="Cleanup custom_resources.")
|
|
43
|
+
parser.add_argument(
|
|
44
|
+
"-d",
|
|
45
|
+
"--soa-dir",
|
|
46
|
+
dest="soa_dir",
|
|
47
|
+
metavar="SOA_DIR",
|
|
48
|
+
default=DEFAULT_SOA_DIR,
|
|
49
|
+
help="define a different soa config directory",
|
|
50
|
+
)
|
|
51
|
+
parser.add_argument(
|
|
52
|
+
"-v", "--verbose", action="store_true", dest="verbose", default=False
|
|
53
|
+
)
|
|
54
|
+
parser.add_argument(
|
|
55
|
+
"-c", "--cluster", default=None, help="Cluster to cleanup CRs for"
|
|
56
|
+
)
|
|
57
|
+
args = parser.parse_args()
|
|
58
|
+
return args
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def main() -> None:
|
|
62
|
+
args = parse_args()
|
|
63
|
+
soa_dir = args.soa_dir
|
|
64
|
+
if args.verbose:
|
|
65
|
+
logging.basicConfig(level=logging.DEBUG)
|
|
66
|
+
else:
|
|
67
|
+
logging.basicConfig(level=logging.INFO)
|
|
68
|
+
|
|
69
|
+
kube_client = KubeClient()
|
|
70
|
+
|
|
71
|
+
system_paasta_config = load_system_paasta_config()
|
|
72
|
+
cluster = args.cluster or system_paasta_config.get_cluster()
|
|
73
|
+
custom_resource_definitions = load_custom_resource_definitions(system_paasta_config)
|
|
74
|
+
cleanup_kube_succeeded = cleanup_all_custom_resources(
|
|
75
|
+
kube_client=kube_client,
|
|
76
|
+
soa_dir=soa_dir,
|
|
77
|
+
cluster=cluster,
|
|
78
|
+
custom_resource_definitions=custom_resource_definitions,
|
|
79
|
+
)
|
|
80
|
+
sys.exit(0 if cleanup_kube_succeeded else 1)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def cleanup_all_custom_resources(
|
|
84
|
+
kube_client: KubeClient,
|
|
85
|
+
soa_dir: str,
|
|
86
|
+
cluster: str,
|
|
87
|
+
custom_resource_definitions: Sequence[CustomResourceDefinition],
|
|
88
|
+
) -> bool:
|
|
89
|
+
cluster_crds = {
|
|
90
|
+
crd.spec.names.kind
|
|
91
|
+
for crd in kube_client.apiextensions.list_custom_resource_definition(
|
|
92
|
+
label_selector=paasta_prefixed("service")
|
|
93
|
+
).items
|
|
94
|
+
}
|
|
95
|
+
log.debug(f"CRDs found: {cluster_crds}")
|
|
96
|
+
results = []
|
|
97
|
+
for crd in custom_resource_definitions:
|
|
98
|
+
if crd.kube_kind.singular not in cluster_crds:
|
|
99
|
+
# TODO: kube_kind.singular seems to correspond to `crd.names.kind`
|
|
100
|
+
# and not `crd.names.singular`
|
|
101
|
+
log.warning(f"CRD {crd.kube_kind.singular} " f"not found in {cluster}")
|
|
102
|
+
continue
|
|
103
|
+
config_dicts = load_all_configs(
|
|
104
|
+
cluster=cluster, file_prefix=crd.file_prefix, soa_dir=soa_dir
|
|
105
|
+
)
|
|
106
|
+
if not config_dicts:
|
|
107
|
+
continue
|
|
108
|
+
crs = list_custom_resources(
|
|
109
|
+
kube_client=kube_client,
|
|
110
|
+
kind=crd.kube_kind,
|
|
111
|
+
version=crd.version,
|
|
112
|
+
group=crd.group,
|
|
113
|
+
)
|
|
114
|
+
for cr in crs:
|
|
115
|
+
service = config_dicts.get(cr.service)
|
|
116
|
+
if service is not None:
|
|
117
|
+
instance = service.get(cr.instance)
|
|
118
|
+
if instance is not None:
|
|
119
|
+
continue
|
|
120
|
+
result = False
|
|
121
|
+
try:
|
|
122
|
+
delete_custom_resource(
|
|
123
|
+
kube_client=kube_client,
|
|
124
|
+
name=cr.name,
|
|
125
|
+
namespace=cr.namespace,
|
|
126
|
+
plural=crd.kube_kind.plural,
|
|
127
|
+
version=crd.version,
|
|
128
|
+
group=crd.group,
|
|
129
|
+
)
|
|
130
|
+
result = True
|
|
131
|
+
except Exception:
|
|
132
|
+
log.exception("Error while deleting CR {cr.name}")
|
|
133
|
+
results.append(result)
|
|
134
|
+
return all(results) if results else True
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
if __name__ == "__main__":
|
|
138
|
+
main()
|