paasta-tools 1.21.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- k8s_itests/__init__.py +0 -0
- k8s_itests/test_autoscaling.py +23 -0
- k8s_itests/utils.py +38 -0
- paasta_tools/__init__.py +20 -0
- paasta_tools/adhoc_tools.py +142 -0
- paasta_tools/api/__init__.py +13 -0
- paasta_tools/api/api.py +330 -0
- paasta_tools/api/api_docs/swagger.json +2323 -0
- paasta_tools/api/client.py +106 -0
- paasta_tools/api/settings.py +33 -0
- paasta_tools/api/tweens/__init__.py +6 -0
- paasta_tools/api/tweens/auth.py +125 -0
- paasta_tools/api/tweens/profiling.py +108 -0
- paasta_tools/api/tweens/request_logger.py +124 -0
- paasta_tools/api/views/__init__.py +13 -0
- paasta_tools/api/views/autoscaler.py +100 -0
- paasta_tools/api/views/exception.py +45 -0
- paasta_tools/api/views/flink.py +73 -0
- paasta_tools/api/views/instance.py +395 -0
- paasta_tools/api/views/pause_autoscaler.py +71 -0
- paasta_tools/api/views/remote_run.py +113 -0
- paasta_tools/api/views/resources.py +76 -0
- paasta_tools/api/views/service.py +35 -0
- paasta_tools/api/views/version.py +25 -0
- paasta_tools/apply_external_resources.py +79 -0
- paasta_tools/async_utils.py +109 -0
- paasta_tools/autoscaling/__init__.py +0 -0
- paasta_tools/autoscaling/autoscaling_service_lib.py +57 -0
- paasta_tools/autoscaling/forecasting.py +106 -0
- paasta_tools/autoscaling/max_all_k8s_services.py +41 -0
- paasta_tools/autoscaling/pause_service_autoscaler.py +77 -0
- paasta_tools/autoscaling/utils.py +52 -0
- paasta_tools/bounce_lib.py +184 -0
- paasta_tools/broadcast_log_to_services.py +62 -0
- paasta_tools/cassandracluster_tools.py +210 -0
- paasta_tools/check_autoscaler_max_instances.py +212 -0
- paasta_tools/check_cassandracluster_services_replication.py +35 -0
- paasta_tools/check_flink_services_health.py +203 -0
- paasta_tools/check_kubernetes_api.py +57 -0
- paasta_tools/check_kubernetes_services_replication.py +141 -0
- paasta_tools/check_oom_events.py +244 -0
- paasta_tools/check_services_replication_tools.py +324 -0
- paasta_tools/check_spark_jobs.py +234 -0
- paasta_tools/cleanup_kubernetes_cr.py +138 -0
- paasta_tools/cleanup_kubernetes_crd.py +145 -0
- paasta_tools/cleanup_kubernetes_jobs.py +344 -0
- paasta_tools/cleanup_tron_namespaces.py +96 -0
- paasta_tools/cli/__init__.py +13 -0
- paasta_tools/cli/authentication.py +85 -0
- paasta_tools/cli/cli.py +260 -0
- paasta_tools/cli/cmds/__init__.py +13 -0
- paasta_tools/cli/cmds/autoscale.py +143 -0
- paasta_tools/cli/cmds/check.py +334 -0
- paasta_tools/cli/cmds/cook_image.py +147 -0
- paasta_tools/cli/cmds/get_docker_image.py +76 -0
- paasta_tools/cli/cmds/get_image_version.py +172 -0
- paasta_tools/cli/cmds/get_latest_deployment.py +93 -0
- paasta_tools/cli/cmds/info.py +155 -0
- paasta_tools/cli/cmds/itest.py +117 -0
- paasta_tools/cli/cmds/list.py +66 -0
- paasta_tools/cli/cmds/list_clusters.py +42 -0
- paasta_tools/cli/cmds/list_deploy_queue.py +171 -0
- paasta_tools/cli/cmds/list_namespaces.py +84 -0
- paasta_tools/cli/cmds/local_run.py +1396 -0
- paasta_tools/cli/cmds/logs.py +1601 -0
- paasta_tools/cli/cmds/mark_for_deployment.py +1988 -0
- paasta_tools/cli/cmds/mesh_status.py +174 -0
- paasta_tools/cli/cmds/pause_service_autoscaler.py +107 -0
- paasta_tools/cli/cmds/push_to_registry.py +275 -0
- paasta_tools/cli/cmds/remote_run.py +252 -0
- paasta_tools/cli/cmds/rollback.py +347 -0
- paasta_tools/cli/cmds/secret.py +549 -0
- paasta_tools/cli/cmds/security_check.py +59 -0
- paasta_tools/cli/cmds/spark_run.py +1400 -0
- paasta_tools/cli/cmds/start_stop_restart.py +401 -0
- paasta_tools/cli/cmds/status.py +2302 -0
- paasta_tools/cli/cmds/validate.py +1012 -0
- paasta_tools/cli/cmds/wait_for_deployment.py +275 -0
- paasta_tools/cli/fsm/__init__.py +13 -0
- paasta_tools/cli/fsm/autosuggest.py +82 -0
- paasta_tools/cli/fsm/template/README.md +8 -0
- paasta_tools/cli/fsm/template/cookiecutter.json +7 -0
- paasta_tools/cli/fsm/template/{{cookiecutter.service}}/kubernetes-PROD.yaml +91 -0
- paasta_tools/cli/fsm/template/{{cookiecutter.service}}/monitoring.yaml +20 -0
- paasta_tools/cli/fsm/template/{{cookiecutter.service}}/service.yaml +8 -0
- paasta_tools/cli/fsm/template/{{cookiecutter.service}}/smartstack.yaml +6 -0
- paasta_tools/cli/fsm_cmd.py +121 -0
- paasta_tools/cli/paasta_tabcomplete.sh +23 -0
- paasta_tools/cli/schemas/adhoc_schema.json +199 -0
- paasta_tools/cli/schemas/autoscaling_schema.json +91 -0
- paasta_tools/cli/schemas/autotuned_defaults/cassandracluster_schema.json +37 -0
- paasta_tools/cli/schemas/autotuned_defaults/kubernetes_schema.json +89 -0
- paasta_tools/cli/schemas/deploy_schema.json +173 -0
- paasta_tools/cli/schemas/eks_schema.json +970 -0
- paasta_tools/cli/schemas/kubernetes_schema.json +970 -0
- paasta_tools/cli/schemas/rollback_schema.json +160 -0
- paasta_tools/cli/schemas/service_schema.json +25 -0
- paasta_tools/cli/schemas/smartstack_schema.json +322 -0
- paasta_tools/cli/schemas/tron_schema.json +699 -0
- paasta_tools/cli/utils.py +1118 -0
- paasta_tools/clusterman.py +21 -0
- paasta_tools/config_utils.py +385 -0
- paasta_tools/contrib/__init__.py +0 -0
- paasta_tools/contrib/bounce_log_latency_parser.py +68 -0
- paasta_tools/contrib/check_manual_oapi_changes.sh +24 -0
- paasta_tools/contrib/check_orphans.py +306 -0
- paasta_tools/contrib/create_dynamodb_table.py +35 -0
- paasta_tools/contrib/create_paasta_playground.py +105 -0
- paasta_tools/contrib/emit_allocated_cpu_metrics.py +50 -0
- paasta_tools/contrib/get_running_task_allocation.py +346 -0
- paasta_tools/contrib/habitat_fixer.py +86 -0
- paasta_tools/contrib/ide_helper.py +316 -0
- paasta_tools/contrib/is_pod_healthy_in_proxy.py +139 -0
- paasta_tools/contrib/is_pod_healthy_in_smartstack.py +50 -0
- paasta_tools/contrib/kill_bad_containers.py +109 -0
- paasta_tools/contrib/mass-deploy-tag.sh +44 -0
- paasta_tools/contrib/mock_patch_checker.py +86 -0
- paasta_tools/contrib/paasta_update_soa_memcpu.py +520 -0
- paasta_tools/contrib/render_template.py +129 -0
- paasta_tools/contrib/rightsizer_soaconfigs_update.py +348 -0
- paasta_tools/contrib/service_shard_remove.py +157 -0
- paasta_tools/contrib/service_shard_update.py +373 -0
- paasta_tools/contrib/shared_ip_check.py +77 -0
- paasta_tools/contrib/timeouts_metrics_prom.py +64 -0
- paasta_tools/delete_kubernetes_deployments.py +89 -0
- paasta_tools/deployment_utils.py +44 -0
- paasta_tools/docker_wrapper.py +234 -0
- paasta_tools/docker_wrapper_imports.py +13 -0
- paasta_tools/drain_lib.py +351 -0
- paasta_tools/dump_locally_running_services.py +71 -0
- paasta_tools/eks_tools.py +119 -0
- paasta_tools/envoy_tools.py +373 -0
- paasta_tools/firewall.py +504 -0
- paasta_tools/firewall_logging.py +154 -0
- paasta_tools/firewall_update.py +172 -0
- paasta_tools/flink_tools.py +345 -0
- paasta_tools/flinkeks_tools.py +90 -0
- paasta_tools/frameworks/__init__.py +0 -0
- paasta_tools/frameworks/adhoc_scheduler.py +71 -0
- paasta_tools/frameworks/constraints.py +87 -0
- paasta_tools/frameworks/native_scheduler.py +652 -0
- paasta_tools/frameworks/native_service_config.py +301 -0
- paasta_tools/frameworks/task_store.py +245 -0
- paasta_tools/generate_all_deployments +9 -0
- paasta_tools/generate_authenticating_services.py +94 -0
- paasta_tools/generate_deployments_for_service.py +255 -0
- paasta_tools/generate_services_file.py +114 -0
- paasta_tools/generate_services_yaml.py +30 -0
- paasta_tools/hacheck.py +76 -0
- paasta_tools/instance/__init__.py +0 -0
- paasta_tools/instance/hpa_metrics_parser.py +122 -0
- paasta_tools/instance/kubernetes.py +1362 -0
- paasta_tools/iptables.py +240 -0
- paasta_tools/kafkacluster_tools.py +143 -0
- paasta_tools/kubernetes/__init__.py +0 -0
- paasta_tools/kubernetes/application/__init__.py +0 -0
- paasta_tools/kubernetes/application/controller_wrappers.py +476 -0
- paasta_tools/kubernetes/application/tools.py +90 -0
- paasta_tools/kubernetes/bin/__init__.py +0 -0
- paasta_tools/kubernetes/bin/kubernetes_remove_evicted_pods.py +164 -0
- paasta_tools/kubernetes/bin/paasta_cleanup_remote_run_resources.py +135 -0
- paasta_tools/kubernetes/bin/paasta_cleanup_stale_nodes.py +181 -0
- paasta_tools/kubernetes/bin/paasta_secrets_sync.py +758 -0
- paasta_tools/kubernetes/remote_run.py +558 -0
- paasta_tools/kubernetes_tools.py +4679 -0
- paasta_tools/list_kubernetes_service_instances.py +128 -0
- paasta_tools/list_tron_namespaces.py +60 -0
- paasta_tools/long_running_service_tools.py +678 -0
- paasta_tools/mac_address.py +44 -0
- paasta_tools/marathon_dashboard.py +0 -0
- paasta_tools/mesos/__init__.py +0 -0
- paasta_tools/mesos/cfg.py +46 -0
- paasta_tools/mesos/cluster.py +60 -0
- paasta_tools/mesos/exceptions.py +59 -0
- paasta_tools/mesos/framework.py +77 -0
- paasta_tools/mesos/log.py +48 -0
- paasta_tools/mesos/master.py +306 -0
- paasta_tools/mesos/mesos_file.py +169 -0
- paasta_tools/mesos/parallel.py +52 -0
- paasta_tools/mesos/slave.py +115 -0
- paasta_tools/mesos/task.py +94 -0
- paasta_tools/mesos/util.py +69 -0
- paasta_tools/mesos/zookeeper.py +37 -0
- paasta_tools/mesos_maintenance.py +848 -0
- paasta_tools/mesos_tools.py +1051 -0
- paasta_tools/metrics/__init__.py +0 -0
- paasta_tools/metrics/metastatus_lib.py +1110 -0
- paasta_tools/metrics/metrics_lib.py +217 -0
- paasta_tools/monitoring/__init__.py +13 -0
- paasta_tools/monitoring/check_k8s_api_performance.py +110 -0
- paasta_tools/monitoring_tools.py +652 -0
- paasta_tools/monkrelaycluster_tools.py +146 -0
- paasta_tools/nrtsearchservice_tools.py +143 -0
- paasta_tools/nrtsearchserviceeks_tools.py +68 -0
- paasta_tools/oom_logger.py +321 -0
- paasta_tools/paasta_deploy_tron_jobs +3 -0
- paasta_tools/paasta_execute_docker_command.py +123 -0
- paasta_tools/paasta_native_serviceinit.py +21 -0
- paasta_tools/paasta_service_config_loader.py +201 -0
- paasta_tools/paastaapi/__init__.py +29 -0
- paasta_tools/paastaapi/api/__init__.py +3 -0
- paasta_tools/paastaapi/api/autoscaler_api.py +302 -0
- paasta_tools/paastaapi/api/default_api.py +569 -0
- paasta_tools/paastaapi/api/remote_run_api.py +604 -0
- paasta_tools/paastaapi/api/resources_api.py +157 -0
- paasta_tools/paastaapi/api/service_api.py +1736 -0
- paasta_tools/paastaapi/api_client.py +818 -0
- paasta_tools/paastaapi/apis/__init__.py +22 -0
- paasta_tools/paastaapi/configuration.py +455 -0
- paasta_tools/paastaapi/exceptions.py +137 -0
- paasta_tools/paastaapi/model/__init__.py +5 -0
- paasta_tools/paastaapi/model/adhoc_launch_history.py +176 -0
- paasta_tools/paastaapi/model/autoscaler_count_msg.py +176 -0
- paasta_tools/paastaapi/model/deploy_queue.py +178 -0
- paasta_tools/paastaapi/model/deploy_queue_service_instance.py +194 -0
- paasta_tools/paastaapi/model/envoy_backend.py +185 -0
- paasta_tools/paastaapi/model/envoy_location.py +184 -0
- paasta_tools/paastaapi/model/envoy_status.py +181 -0
- paasta_tools/paastaapi/model/flink_cluster_overview.py +188 -0
- paasta_tools/paastaapi/model/flink_config.py +173 -0
- paasta_tools/paastaapi/model/flink_job.py +186 -0
- paasta_tools/paastaapi/model/flink_job_details.py +192 -0
- paasta_tools/paastaapi/model/flink_jobs.py +175 -0
- paasta_tools/paastaapi/model/float_and_error.py +173 -0
- paasta_tools/paastaapi/model/hpa_metric.py +176 -0
- paasta_tools/paastaapi/model/inline_object.py +170 -0
- paasta_tools/paastaapi/model/inline_response200.py +170 -0
- paasta_tools/paastaapi/model/inline_response2001.py +170 -0
- paasta_tools/paastaapi/model/instance_bounce_status.py +200 -0
- paasta_tools/paastaapi/model/instance_mesh_status.py +186 -0
- paasta_tools/paastaapi/model/instance_status.py +220 -0
- paasta_tools/paastaapi/model/instance_status_adhoc.py +187 -0
- paasta_tools/paastaapi/model/instance_status_cassandracluster.py +173 -0
- paasta_tools/paastaapi/model/instance_status_flink.py +173 -0
- paasta_tools/paastaapi/model/instance_status_kafkacluster.py +173 -0
- paasta_tools/paastaapi/model/instance_status_kubernetes.py +263 -0
- paasta_tools/paastaapi/model/instance_status_kubernetes_autoscaling_status.py +187 -0
- paasta_tools/paastaapi/model/instance_status_kubernetes_v2.py +197 -0
- paasta_tools/paastaapi/model/instance_status_tron.py +204 -0
- paasta_tools/paastaapi/model/instance_tasks.py +182 -0
- paasta_tools/paastaapi/model/integer_and_error.py +173 -0
- paasta_tools/paastaapi/model/kubernetes_container.py +178 -0
- paasta_tools/paastaapi/model/kubernetes_container_v2.py +219 -0
- paasta_tools/paastaapi/model/kubernetes_healthcheck.py +176 -0
- paasta_tools/paastaapi/model/kubernetes_pod.py +201 -0
- paasta_tools/paastaapi/model/kubernetes_pod_event.py +176 -0
- paasta_tools/paastaapi/model/kubernetes_pod_v2.py +213 -0
- paasta_tools/paastaapi/model/kubernetes_replica_set.py +185 -0
- paasta_tools/paastaapi/model/kubernetes_version.py +202 -0
- paasta_tools/paastaapi/model/remote_run_outcome.py +189 -0
- paasta_tools/paastaapi/model/remote_run_start.py +185 -0
- paasta_tools/paastaapi/model/remote_run_stop.py +176 -0
- paasta_tools/paastaapi/model/remote_run_token.py +173 -0
- paasta_tools/paastaapi/model/resource.py +187 -0
- paasta_tools/paastaapi/model/resource_item.py +187 -0
- paasta_tools/paastaapi/model/resource_value.py +176 -0
- paasta_tools/paastaapi/model/smartstack_backend.py +191 -0
- paasta_tools/paastaapi/model/smartstack_location.py +181 -0
- paasta_tools/paastaapi/model/smartstack_status.py +181 -0
- paasta_tools/paastaapi/model/task_tail_lines.py +176 -0
- paasta_tools/paastaapi/model_utils.py +1879 -0
- paasta_tools/paastaapi/models/__init__.py +62 -0
- paasta_tools/paastaapi/rest.py +287 -0
- paasta_tools/prune_completed_pods.py +220 -0
- paasta_tools/puppet_service_tools.py +59 -0
- paasta_tools/py.typed +1 -0
- paasta_tools/remote_git.py +127 -0
- paasta_tools/run-paasta-api-in-dev-mode.py +57 -0
- paasta_tools/run-paasta-api-playground.py +51 -0
- paasta_tools/secret_providers/__init__.py +66 -0
- paasta_tools/secret_providers/vault.py +214 -0
- paasta_tools/secret_tools.py +277 -0
- paasta_tools/setup_istio_mesh.py +353 -0
- paasta_tools/setup_kubernetes_cr.py +412 -0
- paasta_tools/setup_kubernetes_crd.py +138 -0
- paasta_tools/setup_kubernetes_internal_crd.py +154 -0
- paasta_tools/setup_kubernetes_job.py +353 -0
- paasta_tools/setup_prometheus_adapter_config.py +1028 -0
- paasta_tools/setup_tron_namespace.py +248 -0
- paasta_tools/slack.py +75 -0
- paasta_tools/smartstack_tools.py +676 -0
- paasta_tools/spark_tools.py +283 -0
- paasta_tools/synapse_srv_namespaces_fact.py +42 -0
- paasta_tools/tron/__init__.py +0 -0
- paasta_tools/tron/client.py +158 -0
- paasta_tools/tron/tron_command_context.py +194 -0
- paasta_tools/tron/tron_timeutils.py +101 -0
- paasta_tools/tron_tools.py +1448 -0
- paasta_tools/utils.py +4307 -0
- paasta_tools/yaml_tools.py +44 -0
- paasta_tools-1.21.3.data/scripts/apply_external_resources.py +79 -0
- paasta_tools-1.21.3.data/scripts/bounce_log_latency_parser.py +68 -0
- paasta_tools-1.21.3.data/scripts/check_autoscaler_max_instances.py +212 -0
- paasta_tools-1.21.3.data/scripts/check_cassandracluster_services_replication.py +35 -0
- paasta_tools-1.21.3.data/scripts/check_flink_services_health.py +203 -0
- paasta_tools-1.21.3.data/scripts/check_kubernetes_api.py +57 -0
- paasta_tools-1.21.3.data/scripts/check_kubernetes_services_replication.py +141 -0
- paasta_tools-1.21.3.data/scripts/check_manual_oapi_changes.sh +24 -0
- paasta_tools-1.21.3.data/scripts/check_oom_events.py +244 -0
- paasta_tools-1.21.3.data/scripts/check_orphans.py +306 -0
- paasta_tools-1.21.3.data/scripts/check_spark_jobs.py +234 -0
- paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_cr.py +138 -0
- paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_crd.py +145 -0
- paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_jobs.py +344 -0
- paasta_tools-1.21.3.data/scripts/create_dynamodb_table.py +35 -0
- paasta_tools-1.21.3.data/scripts/create_paasta_playground.py +105 -0
- paasta_tools-1.21.3.data/scripts/delete_kubernetes_deployments.py +89 -0
- paasta_tools-1.21.3.data/scripts/emit_allocated_cpu_metrics.py +50 -0
- paasta_tools-1.21.3.data/scripts/generate_all_deployments +9 -0
- paasta_tools-1.21.3.data/scripts/generate_authenticating_services.py +94 -0
- paasta_tools-1.21.3.data/scripts/generate_deployments_for_service.py +255 -0
- paasta_tools-1.21.3.data/scripts/generate_services_file.py +114 -0
- paasta_tools-1.21.3.data/scripts/generate_services_yaml.py +30 -0
- paasta_tools-1.21.3.data/scripts/get_running_task_allocation.py +346 -0
- paasta_tools-1.21.3.data/scripts/habitat_fixer.py +86 -0
- paasta_tools-1.21.3.data/scripts/ide_helper.py +316 -0
- paasta_tools-1.21.3.data/scripts/is_pod_healthy_in_proxy.py +139 -0
- paasta_tools-1.21.3.data/scripts/is_pod_healthy_in_smartstack.py +50 -0
- paasta_tools-1.21.3.data/scripts/kill_bad_containers.py +109 -0
- paasta_tools-1.21.3.data/scripts/kubernetes_remove_evicted_pods.py +164 -0
- paasta_tools-1.21.3.data/scripts/mass-deploy-tag.sh +44 -0
- paasta_tools-1.21.3.data/scripts/mock_patch_checker.py +86 -0
- paasta_tools-1.21.3.data/scripts/paasta_cleanup_remote_run_resources.py +135 -0
- paasta_tools-1.21.3.data/scripts/paasta_cleanup_stale_nodes.py +181 -0
- paasta_tools-1.21.3.data/scripts/paasta_deploy_tron_jobs +3 -0
- paasta_tools-1.21.3.data/scripts/paasta_execute_docker_command.py +123 -0
- paasta_tools-1.21.3.data/scripts/paasta_secrets_sync.py +758 -0
- paasta_tools-1.21.3.data/scripts/paasta_tabcomplete.sh +23 -0
- paasta_tools-1.21.3.data/scripts/paasta_update_soa_memcpu.py +520 -0
- paasta_tools-1.21.3.data/scripts/render_template.py +129 -0
- paasta_tools-1.21.3.data/scripts/rightsizer_soaconfigs_update.py +348 -0
- paasta_tools-1.21.3.data/scripts/service_shard_remove.py +157 -0
- paasta_tools-1.21.3.data/scripts/service_shard_update.py +373 -0
- paasta_tools-1.21.3.data/scripts/setup_istio_mesh.py +353 -0
- paasta_tools-1.21.3.data/scripts/setup_kubernetes_cr.py +412 -0
- paasta_tools-1.21.3.data/scripts/setup_kubernetes_crd.py +138 -0
- paasta_tools-1.21.3.data/scripts/setup_kubernetes_internal_crd.py +154 -0
- paasta_tools-1.21.3.data/scripts/setup_kubernetes_job.py +353 -0
- paasta_tools-1.21.3.data/scripts/setup_prometheus_adapter_config.py +1028 -0
- paasta_tools-1.21.3.data/scripts/shared_ip_check.py +77 -0
- paasta_tools-1.21.3.data/scripts/synapse_srv_namespaces_fact.py +42 -0
- paasta_tools-1.21.3.data/scripts/timeouts_metrics_prom.py +64 -0
- paasta_tools-1.21.3.dist-info/LICENSE +201 -0
- paasta_tools-1.21.3.dist-info/METADATA +74 -0
- paasta_tools-1.21.3.dist-info/RECORD +348 -0
- paasta_tools-1.21.3.dist-info/WHEEL +5 -0
- paasta_tools-1.21.3.dist-info/entry_points.txt +20 -0
- paasta_tools-1.21.3.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,1051 @@
|
|
|
1
|
+
# Copyright 2015-2016 Yelp Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
import asyncio
|
|
15
|
+
import datetime
|
|
16
|
+
import itertools
|
|
17
|
+
import json
|
|
18
|
+
import logging
|
|
19
|
+
import re
|
|
20
|
+
import socket
|
|
21
|
+
from collections import namedtuple
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
from typing import Any
|
|
24
|
+
from typing import Awaitable
|
|
25
|
+
from typing import Callable
|
|
26
|
+
from typing import Collection
|
|
27
|
+
from typing import Dict
|
|
28
|
+
from typing import List
|
|
29
|
+
from typing import Mapping
|
|
30
|
+
from typing import MutableMapping
|
|
31
|
+
from typing import NamedTuple
|
|
32
|
+
from typing import Optional
|
|
33
|
+
from typing import Sequence
|
|
34
|
+
from typing import Tuple
|
|
35
|
+
from typing import Union
|
|
36
|
+
from urllib.parse import urlparse
|
|
37
|
+
|
|
38
|
+
import a_sync
|
|
39
|
+
import humanize
|
|
40
|
+
import requests
|
|
41
|
+
from kazoo.client import KazooClient
|
|
42
|
+
from mypy_extensions import TypedDict
|
|
43
|
+
|
|
44
|
+
import paasta_tools.mesos.cluster as cluster
|
|
45
|
+
import paasta_tools.mesos.exceptions as mesos_exceptions
|
|
46
|
+
from paasta_tools.async_utils import aiter_to_list
|
|
47
|
+
from paasta_tools.async_utils import async_timeout
|
|
48
|
+
from paasta_tools.async_utils import async_ttl_cache
|
|
49
|
+
from paasta_tools.long_running_service_tools import host_passes_blacklist
|
|
50
|
+
from paasta_tools.long_running_service_tools import host_passes_whitelist
|
|
51
|
+
from paasta_tools.mesos.cfg import load_mesos_config
|
|
52
|
+
from paasta_tools.mesos.exceptions import SlaveDoesNotExist
|
|
53
|
+
from paasta_tools.mesos.master import MesosMaster
|
|
54
|
+
from paasta_tools.mesos.master import MesosState
|
|
55
|
+
from paasta_tools.mesos.task import Task
|
|
56
|
+
from paasta_tools.utils import DeployBlacklist
|
|
57
|
+
from paasta_tools.utils import DeployWhitelist
|
|
58
|
+
from paasta_tools.utils import format_table
|
|
59
|
+
from paasta_tools.utils import get_user_agent
|
|
60
|
+
from paasta_tools.utils import load_system_paasta_config
|
|
61
|
+
from paasta_tools.utils import PaastaColors
|
|
62
|
+
from paasta_tools.utils import SystemPaastaConfig
|
|
63
|
+
from paasta_tools.utils import TimeoutError
|
|
64
|
+
|
|
65
|
+
ZookeeperHostPath = namedtuple("ZookeeperHostPath", ["host", "path"])
|
|
66
|
+
SlaveTaskCount = namedtuple("SlaveTaskCount", ["count", "slave"])
|
|
67
|
+
|
|
68
|
+
DEFAULT_MESOS_CLI_CONFIG_LOCATION = "/nail/etc/mesos-cli.json"
|
|
69
|
+
|
|
70
|
+
TERMINAL_STATES = (
|
|
71
|
+
"TASK_ERROR",
|
|
72
|
+
"TASK_KILLED",
|
|
73
|
+
"TASK_FAILED",
|
|
74
|
+
"TASK_FINISHED",
|
|
75
|
+
"TASK_DROPPED",
|
|
76
|
+
"TASK_GONE",
|
|
77
|
+
"TASK_GONE_BY_OPERATOR",
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
log = logging.getLogger(__name__)
|
|
81
|
+
log.addHandler(logging.NullHandler())
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def get_mesos_config_path(
|
|
85
|
+
system_paasta_config: Optional[SystemPaastaConfig] = None,
|
|
86
|
+
) -> str:
|
|
87
|
+
"""
|
|
88
|
+
Determine where to find the configuration for mesos-cli.
|
|
89
|
+
"""
|
|
90
|
+
if system_paasta_config is None:
|
|
91
|
+
system_paasta_config = load_system_paasta_config()
|
|
92
|
+
|
|
93
|
+
return system_paasta_config.get_mesos_cli_config().get(
|
|
94
|
+
"path", DEFAULT_MESOS_CLI_CONFIG_LOCATION
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def get_mesos_config(mesos_config_path: Optional[str] = None) -> Dict:
|
|
99
|
+
if mesos_config_path is None:
|
|
100
|
+
mesos_config_path = get_mesos_config_path()
|
|
101
|
+
return load_mesos_config(mesos_config_path)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def get_mesos_master(
|
|
105
|
+
mesos_config_path: Optional[str] = None, **overrides: Any
|
|
106
|
+
) -> MesosMaster:
|
|
107
|
+
config = get_mesos_config(mesos_config_path)
|
|
108
|
+
for k, v in overrides.items():
|
|
109
|
+
config[k] = v
|
|
110
|
+
return MesosMaster(config)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
MY_HOSTNAME = socket.getfqdn()
|
|
114
|
+
MESOS_MASTER_PORT = 5050
|
|
115
|
+
MESOS_SLAVE_PORT = "5051"
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class MesosSlaveConnectionError(Exception):
|
|
119
|
+
pass
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class MesosTailLines(NamedTuple):
|
|
123
|
+
stdout: List[str]
|
|
124
|
+
stderr: List[str]
|
|
125
|
+
error_message: str
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def get_mesos_leader(mesos_config_path: Optional[str] = None) -> str:
|
|
129
|
+
"""Get the current mesos-master leader's hostname.
|
|
130
|
+
Attempts to determine this by using mesos.cli to query ZooKeeper.
|
|
131
|
+
|
|
132
|
+
:returns: The current mesos-master hostname"""
|
|
133
|
+
try:
|
|
134
|
+
url = get_mesos_master(mesos_config_path).host
|
|
135
|
+
except mesos_exceptions.MasterNotAvailableException:
|
|
136
|
+
log.debug("mesos.cli failed to provide the master host")
|
|
137
|
+
raise
|
|
138
|
+
log.debug("mesos.cli thinks the master host is: %s" % url)
|
|
139
|
+
hostname = urlparse(url).hostname
|
|
140
|
+
log.debug("The parsed master hostname is: %s" % hostname)
|
|
141
|
+
# This check is necessary, as if we parse a value such as 'localhost:5050',
|
|
142
|
+
# it won't have a hostname attribute
|
|
143
|
+
if hostname:
|
|
144
|
+
try:
|
|
145
|
+
host = socket.gethostbyaddr(hostname)[0]
|
|
146
|
+
fqdn = socket.getfqdn(host)
|
|
147
|
+
except (socket.error, socket.herror, socket.gaierror, socket.timeout):
|
|
148
|
+
log.debug("Failed to convert mesos leader hostname to fqdn!")
|
|
149
|
+
raise
|
|
150
|
+
log.debug("Mesos Leader: %s" % fqdn)
|
|
151
|
+
return fqdn
|
|
152
|
+
else:
|
|
153
|
+
raise ValueError("Expected to receive a valid URL, got: %s" % url)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class MesosLeaderUnavailable(Exception):
|
|
157
|
+
pass
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def find_mesos_leader(cluster):
|
|
161
|
+
"""Find the leader with redirect given one mesos master."""
|
|
162
|
+
master = (
|
|
163
|
+
load_system_paasta_config().get_cluster_fqdn_format().format(cluster=cluster)
|
|
164
|
+
)
|
|
165
|
+
if master is None:
|
|
166
|
+
raise ValueError("Mesos master is required to find leader")
|
|
167
|
+
|
|
168
|
+
url = f"http://{master}:{MESOS_MASTER_PORT}/redirect"
|
|
169
|
+
try:
|
|
170
|
+
# Timeouts here are for connect, read
|
|
171
|
+
response = requests.get(url, timeout=(5, 30))
|
|
172
|
+
except Exception as e:
|
|
173
|
+
raise MesosLeaderUnavailable(e)
|
|
174
|
+
hostname = urlparse(response.url).hostname
|
|
175
|
+
return f"{hostname}:{MESOS_MASTER_PORT}"
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
async def get_current_tasks(job_id: str) -> List[Task]:
|
|
179
|
+
"""Returns a list of all the tasks with a given job id.
|
|
180
|
+
:param job_id: the job id of the tasks.
|
|
181
|
+
:return tasks: a list of mesos.cli.Task.
|
|
182
|
+
"""
|
|
183
|
+
mesos_master = get_mesos_master()
|
|
184
|
+
framework_tasks = await mesos_master.tasks(fltr=job_id, active_only=False)
|
|
185
|
+
return framework_tasks
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def is_task_running(task: Task) -> bool:
|
|
189
|
+
return task["state"] == "TASK_RUNNING"
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def filter_running_tasks(tasks: Collection[Task]) -> List[Task]:
|
|
193
|
+
"""Filters those tasks where it's state is TASK_RUNNING.
|
|
194
|
+
:param tasks: a list of mesos.cli.Task
|
|
195
|
+
:return filtered: a list of running tasks
|
|
196
|
+
"""
|
|
197
|
+
return [task for task in tasks if is_task_running(task)]
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def filter_not_running_tasks(tasks: Collection[Task]) -> List[Task]:
|
|
201
|
+
"""Filters those tasks where it's state is *not* TASK_RUNNING.
|
|
202
|
+
:param tasks: a list of mesos.cli.Task
|
|
203
|
+
:return filtered: a list of tasks *not* running
|
|
204
|
+
"""
|
|
205
|
+
return [task for task in tasks if not is_task_running(task)]
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
async def get_running_tasks_from_frameworks(job_id=""):
|
|
209
|
+
"""Will include tasks from active and completed frameworks
|
|
210
|
+
but NOT orphaned tasks
|
|
211
|
+
"""
|
|
212
|
+
active_framework_tasks = await get_current_tasks(job_id)
|
|
213
|
+
running_tasks = filter_running_tasks(active_framework_tasks)
|
|
214
|
+
return running_tasks
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
async def get_all_running_tasks() -> Collection[Task]:
|
|
218
|
+
"""Will include all running tasks; for now orphans are not included"""
|
|
219
|
+
framework_tasks = await get_current_tasks("")
|
|
220
|
+
mesos_master = get_mesos_master()
|
|
221
|
+
framework_tasks += await mesos_master.orphan_tasks()
|
|
222
|
+
running_tasks = filter_running_tasks(framework_tasks)
|
|
223
|
+
return running_tasks
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
@async_ttl_cache(ttl=600)
|
|
227
|
+
async def get_cached_list_of_all_current_tasks():
|
|
228
|
+
"""Returns a cached list of all mesos tasks.
|
|
229
|
+
|
|
230
|
+
This function is used by 'paasta status' and 'paasta_serviceinit status'
|
|
231
|
+
to avoid re-querying mesos master and re-parsing json to get mesos.Task objects.
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
The async_ttl_cache decorator caches the list for 600 seconds.
|
|
235
|
+
ttl doesn't really matter for this function because when we run 'paasta status'
|
|
236
|
+
the corresponding HTTP request to mesos master is cached by requests_cache.
|
|
237
|
+
|
|
238
|
+
:return tasks: a list of mesos.Task
|
|
239
|
+
"""
|
|
240
|
+
return await get_current_tasks("")
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
@async_ttl_cache(ttl=600)
|
|
244
|
+
async def get_cached_list_of_running_tasks_from_frameworks():
|
|
245
|
+
"""Returns a cached list of all running mesos tasks.
|
|
246
|
+
See the docstring for get_cached_list_of_all_current_tasks().
|
|
247
|
+
|
|
248
|
+
:return tasks: a list of mesos.Task
|
|
249
|
+
"""
|
|
250
|
+
return [
|
|
251
|
+
task
|
|
252
|
+
for task in filter_running_tasks(await get_cached_list_of_all_current_tasks())
|
|
253
|
+
]
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
@async_ttl_cache(ttl=600)
|
|
257
|
+
async def get_cached_list_of_not_running_tasks_from_frameworks():
|
|
258
|
+
"""Returns a cached list of mesos tasks that are NOT running.
|
|
259
|
+
See the docstring for get_cached_list_of_all_current_tasks().
|
|
260
|
+
|
|
261
|
+
:return tasks: a list of mesos.Task"""
|
|
262
|
+
return [
|
|
263
|
+
task
|
|
264
|
+
for task in filter_not_running_tasks(
|
|
265
|
+
await get_cached_list_of_all_current_tasks()
|
|
266
|
+
)
|
|
267
|
+
]
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def select_tasks_by_id(tasks: Collection[Task], job_id: str = "") -> List[Task]:
|
|
271
|
+
"""Returns a list of the tasks with a given job_id.
|
|
272
|
+
|
|
273
|
+
:param tasks: a list of mesos.Task.
|
|
274
|
+
:param job_id: the job id.
|
|
275
|
+
:return tasks: a list of mesos.Task.
|
|
276
|
+
"""
|
|
277
|
+
return [task for task in tasks if job_id in task["id"]]
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
async def get_non_running_tasks_from_frameworks(job_id: str = "") -> List[Task]:
|
|
281
|
+
"""Will include tasks from active and completed frameworks
|
|
282
|
+
but NOT orphaned tasks
|
|
283
|
+
"""
|
|
284
|
+
active_framework_tasks = await get_current_tasks(job_id)
|
|
285
|
+
not_running_tasks = filter_not_running_tasks(active_framework_tasks)
|
|
286
|
+
return not_running_tasks
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
async def get_short_hostname_from_task(task: Task) -> str:
|
|
290
|
+
try:
|
|
291
|
+
slave_hostname = (await task.slave())["hostname"]
|
|
292
|
+
return slave_hostname.split(".")[0]
|
|
293
|
+
except (AttributeError, SlaveDoesNotExist):
|
|
294
|
+
return "Unknown"
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def get_first_status_timestamp(task: Task) -> Optional[float]:
|
|
298
|
+
try:
|
|
299
|
+
start_time_string = task["statuses"][0]["timestamp"]
|
|
300
|
+
return float(start_time_string)
|
|
301
|
+
except (IndexError, SlaveDoesNotExist):
|
|
302
|
+
return None
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def get_first_status_timestamp_string(task: Task) -> str:
|
|
306
|
+
"""Gets the first status timestamp from a task id and returns a human
|
|
307
|
+
readable string with the local time and a humanized duration:
|
|
308
|
+
``2015-01-30T08:45 (an hour ago)``
|
|
309
|
+
"""
|
|
310
|
+
first_status_timestamp = get_first_status_timestamp(task)
|
|
311
|
+
if first_status_timestamp is None:
|
|
312
|
+
return "Unknown"
|
|
313
|
+
else:
|
|
314
|
+
first_status_datetime = datetime.datetime.fromtimestamp(first_status_timestamp)
|
|
315
|
+
return "{} ({})".format(
|
|
316
|
+
first_status_datetime.strftime("%Y-%m-%dT%H:%M"),
|
|
317
|
+
humanize.naturaltime(first_status_datetime),
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
async def get_mem_usage(task: Task) -> str:
|
|
322
|
+
try:
|
|
323
|
+
task_mem_limit = await task.mem_limit()
|
|
324
|
+
task_rss = await task.rss()
|
|
325
|
+
if task_mem_limit == 0:
|
|
326
|
+
return "Undef"
|
|
327
|
+
mem_percent = task_rss / task_mem_limit * 100
|
|
328
|
+
mem_string = "%d/%dMB" % (
|
|
329
|
+
(task_rss / 1024 / 1024),
|
|
330
|
+
(task_mem_limit / 1024 / 1024),
|
|
331
|
+
)
|
|
332
|
+
if mem_percent > 90:
|
|
333
|
+
return PaastaColors.red(mem_string)
|
|
334
|
+
else:
|
|
335
|
+
return mem_string
|
|
336
|
+
except (AttributeError, SlaveDoesNotExist):
|
|
337
|
+
return "None"
|
|
338
|
+
except TimeoutError:
|
|
339
|
+
return "Timed Out"
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
async def get_cpu_shares(task: Task) -> float:
|
|
343
|
+
# The CPU shares has an additional .1 allocated to it for executor overhead.
|
|
344
|
+
# We subtract this to the true number
|
|
345
|
+
# (https://github.com/apache/mesos/blob/dc7c4b6d0bcf778cc0cad57bb108564be734143a/src/slave/constants.hpp#L100)
|
|
346
|
+
cpu_shares = await task.cpu_limit()
|
|
347
|
+
return cpu_shares - 0.1
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
async def get_cpu_usage(task: Task) -> str:
|
|
351
|
+
"""Calculates a metric of used_cpu/allocated_cpu
|
|
352
|
+
To do this, we take the total number of cpu-seconds the task has consumed,
|
|
353
|
+
(the sum of system and user time), OVER the total cpu time the task
|
|
354
|
+
has been allocated.
|
|
355
|
+
|
|
356
|
+
The total time a task has been allocated is the total time the task has
|
|
357
|
+
been running (https://github.com/mesosphere/mesos/blob/0b092b1b0/src/webui/master/static/js/controllers.js#L140)
|
|
358
|
+
multiplied by the "shares" a task has.
|
|
359
|
+
"""
|
|
360
|
+
try:
|
|
361
|
+
start_time = round(task["statuses"][0]["timestamp"])
|
|
362
|
+
current_time = int(datetime.datetime.now().strftime("%s"))
|
|
363
|
+
duration_seconds = current_time - start_time
|
|
364
|
+
cpu_shares = await get_cpu_shares(task)
|
|
365
|
+
allocated_seconds = duration_seconds * cpu_shares
|
|
366
|
+
task_stats = await task.stats()
|
|
367
|
+
used_seconds = task_stats.get("cpus_system_time_secs", 0.0) + task_stats.get(
|
|
368
|
+
"cpus_user_time_secs", 0.0
|
|
369
|
+
)
|
|
370
|
+
if allocated_seconds == 0:
|
|
371
|
+
return "Undef"
|
|
372
|
+
percent = round(100 * (used_seconds / allocated_seconds), 1)
|
|
373
|
+
percent_string = "%s%%" % percent
|
|
374
|
+
if percent > 90:
|
|
375
|
+
return PaastaColors.red(percent_string)
|
|
376
|
+
else:
|
|
377
|
+
return percent_string
|
|
378
|
+
except (AttributeError, SlaveDoesNotExist):
|
|
379
|
+
return "None"
|
|
380
|
+
except TimeoutError:
|
|
381
|
+
return "Timed Out"
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
async def results_or_unknown(future: Awaitable[str]) -> str:
|
|
385
|
+
try:
|
|
386
|
+
return await future
|
|
387
|
+
except Exception:
|
|
388
|
+
return PaastaColors.red("Unknown")
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
async def format_running_mesos_task_row(
|
|
392
|
+
task: Task, get_short_task_id: Callable[[str], str]
|
|
393
|
+
) -> Tuple[str, ...]:
|
|
394
|
+
"""Returns a pretty formatted string of a running mesos task attributes"""
|
|
395
|
+
|
|
396
|
+
short_task_id = get_short_task_id(task["id"])
|
|
397
|
+
short_hostname_future = asyncio.ensure_future(
|
|
398
|
+
results_or_unknown(get_short_hostname_from_task(task))
|
|
399
|
+
)
|
|
400
|
+
mem_usage_future = asyncio.ensure_future(results_or_unknown(get_mem_usage(task)))
|
|
401
|
+
cpu_usage_future = asyncio.ensure_future(results_or_unknown(get_cpu_usage(task)))
|
|
402
|
+
first_status_timestamp = get_first_status_timestamp_string(task)
|
|
403
|
+
|
|
404
|
+
await asyncio.wait([short_hostname_future, mem_usage_future, cpu_usage_future])
|
|
405
|
+
|
|
406
|
+
return (
|
|
407
|
+
short_task_id,
|
|
408
|
+
short_hostname_future.result(),
|
|
409
|
+
mem_usage_future.result(),
|
|
410
|
+
cpu_usage_future.result(),
|
|
411
|
+
first_status_timestamp,
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
async def format_non_running_mesos_task_row(
|
|
416
|
+
task: Task, get_short_task_id: Callable[[str], str]
|
|
417
|
+
) -> Tuple[str, ...]:
|
|
418
|
+
"""Returns a pretty formatted string of a running mesos task attributes"""
|
|
419
|
+
return (
|
|
420
|
+
PaastaColors.grey(get_short_task_id(task["id"])),
|
|
421
|
+
PaastaColors.grey(await results_or_unknown(get_short_hostname_from_task(task))),
|
|
422
|
+
PaastaColors.grey(get_first_status_timestamp_string(task)),
|
|
423
|
+
PaastaColors.grey(task["state"]),
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
@async_timeout()
|
|
428
|
+
async def get_tail_lines_for_mesos_task(
|
|
429
|
+
task: Task, get_short_task_id: Callable[[str], str], num_tail_lines: int
|
|
430
|
+
) -> MutableMapping[str, Sequence[str]]:
|
|
431
|
+
tail_lines_dict: MutableMapping[str, Sequence[str]] = {}
|
|
432
|
+
mesos_cli_config = get_mesos_config()
|
|
433
|
+
|
|
434
|
+
try:
|
|
435
|
+
fobjs = await aiter_to_list(
|
|
436
|
+
cluster.get_files_for_tasks(
|
|
437
|
+
task_list=[task],
|
|
438
|
+
file_list=["stdout", "stderr"],
|
|
439
|
+
max_workers=mesos_cli_config["max_workers"],
|
|
440
|
+
)
|
|
441
|
+
)
|
|
442
|
+
if not fobjs:
|
|
443
|
+
return {"stdout": [], "stderr": []}
|
|
444
|
+
|
|
445
|
+
fobjs.sort(key=lambda fobj: fobj.path, reverse=True)
|
|
446
|
+
|
|
447
|
+
for fobj in fobjs:
|
|
448
|
+
# read nlines, starting from EOF
|
|
449
|
+
tail = []
|
|
450
|
+
lines_seen = 0
|
|
451
|
+
|
|
452
|
+
async for line in fobj._readlines_reverse():
|
|
453
|
+
tail.append(line)
|
|
454
|
+
lines_seen += 1
|
|
455
|
+
if lines_seen >= num_tail_lines:
|
|
456
|
+
break
|
|
457
|
+
|
|
458
|
+
# reverse the tail, so that EOF is at the bottom again
|
|
459
|
+
tail_lines_dict[fobj.path] = tail[::-1]
|
|
460
|
+
except (
|
|
461
|
+
mesos_exceptions.MasterNotAvailableException,
|
|
462
|
+
mesos_exceptions.SlaveDoesNotExist,
|
|
463
|
+
mesos_exceptions.TaskNotFoundException,
|
|
464
|
+
mesos_exceptions.FileNotFoundForTaskException,
|
|
465
|
+
TimeoutError,
|
|
466
|
+
) as e:
|
|
467
|
+
short_task_id = get_short_task_id(task["id"])
|
|
468
|
+
error_name = e.__class__.__name__
|
|
469
|
+
return {
|
|
470
|
+
"error_message": f"couldn't read stdout/stderr for {short_task_id} ({error_name})"
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
return tail_lines_dict
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
def format_tail_lines_for_mesos_task(tail_lines, task_id):
|
|
477
|
+
rows = []
|
|
478
|
+
if (tail_lines.stderr or tail_lines.stdout) is not None:
|
|
479
|
+
if len(tail_lines.stderr) + len(tail_lines.stdout) == 0:
|
|
480
|
+
rows.append(PaastaColors.blue(f" no stdout/stderrr for {task_id}"))
|
|
481
|
+
else:
|
|
482
|
+
for stdstream in ("stdout", "stderr"):
|
|
483
|
+
rows.append(PaastaColors.blue(f"{stdstream} tail for {task_id}"))
|
|
484
|
+
rows.extend(f" {line}" for line in getattr(tail_lines, stdstream, []))
|
|
485
|
+
elif tail_lines.error_message is not None:
|
|
486
|
+
rows.append(PaastaColors.red(f" {tail_lines.error_message}"))
|
|
487
|
+
|
|
488
|
+
return rows
|
|
489
|
+
|
|
490
|
+
|
|
491
|
+
@async_timeout()
|
|
492
|
+
async def format_stdstreams_tail_for_task(task, get_short_task_id, nlines=10):
|
|
493
|
+
tail_lines_dict = await get_tail_lines_for_mesos_task(
|
|
494
|
+
task, get_short_task_id, nlines
|
|
495
|
+
)
|
|
496
|
+
tail_lines = MesosTailLines(
|
|
497
|
+
stdout=tail_lines_dict.get("stdout"),
|
|
498
|
+
stderr=tail_lines_dict.get("stderr"),
|
|
499
|
+
error_message=tail_lines_dict.get("error_message"),
|
|
500
|
+
)
|
|
501
|
+
return [
|
|
502
|
+
f" {line}"
|
|
503
|
+
for line in format_tail_lines_for_mesos_task(tail_lines, task["id"])
|
|
504
|
+
]
|
|
505
|
+
|
|
506
|
+
|
|
507
|
+
def zip_tasks_verbose_output(table, stdstreams):
|
|
508
|
+
"""Zip a list of strings (table) with a list of lists (stdstreams)
|
|
509
|
+
:param table: a formatted list of tasks
|
|
510
|
+
:param stdstreams: for each task, a list of lines from stdout/stderr tail
|
|
511
|
+
"""
|
|
512
|
+
if len(table) != len(stdstreams):
|
|
513
|
+
raise ValueError("Can only zip same-length lists")
|
|
514
|
+
output = []
|
|
515
|
+
for i in range(len(table)):
|
|
516
|
+
output.append(table[i])
|
|
517
|
+
output.extend([line for line in stdstreams[i]])
|
|
518
|
+
return output
|
|
519
|
+
|
|
520
|
+
|
|
521
|
+
async def format_task_list(
|
|
522
|
+
tasks: Sequence[Task],
|
|
523
|
+
list_title: str,
|
|
524
|
+
table_header: Sequence[str],
|
|
525
|
+
get_short_task_id: Callable[[str], str],
|
|
526
|
+
format_task_row: Callable[
|
|
527
|
+
[Task, Callable[[str], str]], Awaitable[Union[Sequence[str], str]]
|
|
528
|
+
],
|
|
529
|
+
grey: bool,
|
|
530
|
+
tail_lines: int,
|
|
531
|
+
) -> List[str]:
|
|
532
|
+
"""Formats a list of tasks, returns a list of output lines
|
|
533
|
+
:param tasks: List of tasks as returned by get_*_tasks_from_all_frameworks.
|
|
534
|
+
:param list_title: 'Running Tasks:' or 'Non-Running Tasks'.
|
|
535
|
+
:param table_header: List of column names used in the tasks table.
|
|
536
|
+
:param get_short_task_id: A function which given a task_id returns a short task_id suitable for printing.
|
|
537
|
+
:param format_task_row: Formatting function, works on a task and a get_short_task_id function.
|
|
538
|
+
:param tail_lines (int): number of lines of stdout/stderr to tail, as obtained from the Mesos sandbox.
|
|
539
|
+
:param grey: If True, the list will be made less visually prominent.
|
|
540
|
+
:return output: Formatted output (list of output lines).
|
|
541
|
+
"""
|
|
542
|
+
if not grey:
|
|
543
|
+
|
|
544
|
+
def colorize(x):
|
|
545
|
+
return x
|
|
546
|
+
|
|
547
|
+
else:
|
|
548
|
+
|
|
549
|
+
def colorize(x):
|
|
550
|
+
return PaastaColors.grey(x)
|
|
551
|
+
|
|
552
|
+
output = []
|
|
553
|
+
output.append(colorize(" %s" % list_title))
|
|
554
|
+
table_rows: List[Union[str, Sequence[str]]] = [
|
|
555
|
+
[colorize(th) for th in table_header]
|
|
556
|
+
]
|
|
557
|
+
|
|
558
|
+
if tasks:
|
|
559
|
+
task_row_futures = [
|
|
560
|
+
asyncio.ensure_future(format_task_row(task, get_short_task_id))
|
|
561
|
+
for task in tasks
|
|
562
|
+
]
|
|
563
|
+
await asyncio.wait(task_row_futures)
|
|
564
|
+
|
|
565
|
+
for future in task_row_futures:
|
|
566
|
+
table_rows.append(future.result())
|
|
567
|
+
|
|
568
|
+
tasks_table = [" %s" % row for row in format_table(table_rows)]
|
|
569
|
+
if tail_lines == 0:
|
|
570
|
+
output.extend(tasks_table)
|
|
571
|
+
else:
|
|
572
|
+
stdstreams = []
|
|
573
|
+
for task in tasks:
|
|
574
|
+
stdstreams.append(
|
|
575
|
+
await format_stdstreams_tail_for_task(
|
|
576
|
+
task, get_short_task_id, nlines=tail_lines
|
|
577
|
+
)
|
|
578
|
+
)
|
|
579
|
+
output.append(tasks_table[0]) # header
|
|
580
|
+
output.extend(zip_tasks_verbose_output(tasks_table[1:], stdstreams))
|
|
581
|
+
|
|
582
|
+
return output
|
|
583
|
+
|
|
584
|
+
|
|
585
|
+
@a_sync.to_blocking
|
|
586
|
+
async def status_mesos_tasks_verbose(
|
|
587
|
+
filter_string: str, get_short_task_id: Callable[[str], str], tail_lines: int = 0
|
|
588
|
+
) -> str:
|
|
589
|
+
"""Returns detailed information about the mesos tasks for a service.
|
|
590
|
+
|
|
591
|
+
:param filter_string: An id used for looking up Mesos tasks
|
|
592
|
+
:param get_short_task_id: A function which given a
|
|
593
|
+
task_id returns a short task_id suitable for
|
|
594
|
+
printing.
|
|
595
|
+
:param tail_lines: int representing the number of lines of stdout/err to
|
|
596
|
+
report.
|
|
597
|
+
"""
|
|
598
|
+
output: List[str] = []
|
|
599
|
+
running_and_active_tasks = select_tasks_by_id(
|
|
600
|
+
await get_cached_list_of_running_tasks_from_frameworks(), filter_string
|
|
601
|
+
)
|
|
602
|
+
list_title = "Running Tasks:"
|
|
603
|
+
table_header = [
|
|
604
|
+
"Mesos Task ID",
|
|
605
|
+
"Host deployed to",
|
|
606
|
+
"Ram",
|
|
607
|
+
"CPU",
|
|
608
|
+
"Deployed at what localtime",
|
|
609
|
+
]
|
|
610
|
+
output.extend(
|
|
611
|
+
await format_task_list(
|
|
612
|
+
tasks=running_and_active_tasks,
|
|
613
|
+
list_title=list_title,
|
|
614
|
+
table_header=table_header,
|
|
615
|
+
get_short_task_id=get_short_task_id,
|
|
616
|
+
format_task_row=format_running_mesos_task_row,
|
|
617
|
+
grey=False,
|
|
618
|
+
tail_lines=tail_lines,
|
|
619
|
+
)
|
|
620
|
+
)
|
|
621
|
+
|
|
622
|
+
non_running_tasks = select_tasks_by_id(
|
|
623
|
+
await get_cached_list_of_not_running_tasks_from_frameworks(), filter_string
|
|
624
|
+
)
|
|
625
|
+
# Order the tasks by timestamp
|
|
626
|
+
non_running_tasks.sort(key=lambda task: get_first_status_timestamp_string(task))
|
|
627
|
+
non_running_tasks_ordered = list(reversed(non_running_tasks[-10:]))
|
|
628
|
+
|
|
629
|
+
list_title = "Non-Running Tasks"
|
|
630
|
+
table_header = [
|
|
631
|
+
"Mesos Task ID",
|
|
632
|
+
"Host deployed to",
|
|
633
|
+
"Deployed at what localtime",
|
|
634
|
+
"Status",
|
|
635
|
+
]
|
|
636
|
+
output.extend(
|
|
637
|
+
await format_task_list(
|
|
638
|
+
tasks=non_running_tasks_ordered,
|
|
639
|
+
list_title=list_title,
|
|
640
|
+
table_header=table_header,
|
|
641
|
+
get_short_task_id=get_short_task_id,
|
|
642
|
+
format_task_row=format_non_running_mesos_task_row,
|
|
643
|
+
grey=True,
|
|
644
|
+
tail_lines=tail_lines,
|
|
645
|
+
)
|
|
646
|
+
)
|
|
647
|
+
|
|
648
|
+
return "\n".join(output)
|
|
649
|
+
|
|
650
|
+
|
|
651
|
+
def get_local_slave_state(hostname=None):
|
|
652
|
+
"""Fetches mesos slave state and returns it as a dict.
|
|
653
|
+
|
|
654
|
+
:param hostname: The host from which to fetch slave state. If not specified, defaults to the local machine."""
|
|
655
|
+
if hostname is None:
|
|
656
|
+
hostname = socket.getfqdn()
|
|
657
|
+
stats_uri = f"http://{hostname}:{MESOS_SLAVE_PORT}/state"
|
|
658
|
+
try:
|
|
659
|
+
headers = {"User-Agent": get_user_agent()}
|
|
660
|
+
response = requests.get(stats_uri, timeout=10, headers=headers)
|
|
661
|
+
if response.status_code == 404:
|
|
662
|
+
fallback_stats_uri = f"http://{hostname}:{MESOS_SLAVE_PORT}/state.json"
|
|
663
|
+
response = requests.get(fallback_stats_uri, timeout=10, headers=headers)
|
|
664
|
+
except requests.ConnectionError as e:
|
|
665
|
+
raise MesosSlaveConnectionError(
|
|
666
|
+
"Could not connect to the mesos slave to see which services are running\n"
|
|
667
|
+
"on %s. Is the mesos-slave running?\n"
|
|
668
|
+
"Error was: %s\n" % (e.request.url, str(e))
|
|
669
|
+
)
|
|
670
|
+
response.raise_for_status()
|
|
671
|
+
return json.loads(response.text)
|
|
672
|
+
|
|
673
|
+
|
|
674
|
+
async def get_mesos_quorum():
|
|
675
|
+
"""Returns the configured quorum size."""
|
|
676
|
+
return int((await get_master_flags())["flags"]["quorum"])
|
|
677
|
+
|
|
678
|
+
|
|
679
|
+
MesosResources = Mapping[str, Any]
|
|
680
|
+
|
|
681
|
+
|
|
682
|
+
class MesosTask(TypedDict):
|
|
683
|
+
resources: MesosResources
|
|
684
|
+
slave_id: str
|
|
685
|
+
id: str
|
|
686
|
+
state: str
|
|
687
|
+
|
|
688
|
+
|
|
689
|
+
def get_all_tasks_from_state(
|
|
690
|
+
mesos_state: MesosState, include_orphans: bool = False
|
|
691
|
+
) -> Sequence[MesosTask]:
|
|
692
|
+
"""Given a mesos state, find the tasks from all frameworks.
|
|
693
|
+
:param mesos_state: the mesos_state
|
|
694
|
+
:returns: a list of tasks
|
|
695
|
+
"""
|
|
696
|
+
tasks = [
|
|
697
|
+
task
|
|
698
|
+
for framework in mesos_state.get("frameworks", [])
|
|
699
|
+
for task in framework.get("tasks", [])
|
|
700
|
+
]
|
|
701
|
+
if include_orphans:
|
|
702
|
+
tasks += mesos_state.get("orphan_tasks", [])
|
|
703
|
+
return tasks
|
|
704
|
+
|
|
705
|
+
|
|
706
|
+
async def get_master_flags():
|
|
707
|
+
res = await get_mesos_master().fetch("/master/flags")
|
|
708
|
+
return await res.json()
|
|
709
|
+
|
|
710
|
+
|
|
711
|
+
def get_zookeeper_host_path():
|
|
712
|
+
zk_url = "zk://%s" % load_system_paasta_config().get_zk_hosts()
|
|
713
|
+
parsed = urlparse(zk_url)
|
|
714
|
+
return ZookeeperHostPath(host=parsed.netloc, path=parsed.path)
|
|
715
|
+
|
|
716
|
+
|
|
717
|
+
def get_zookeeper_config(state):
|
|
718
|
+
"""Returns dict, containing the zookeeper hosts and path.
|
|
719
|
+
:param state: mesos state dictionary"""
|
|
720
|
+
re_zk = re.match(r"^zk://([^/]*)/(.*)$", state["flags"]["zk"])
|
|
721
|
+
return {"hosts": re_zk.group(1), "path": re_zk.group(2)}
|
|
722
|
+
|
|
723
|
+
|
|
724
|
+
def get_number_of_mesos_masters(host, path):
|
|
725
|
+
"""Returns an array, containing mesos masters
|
|
726
|
+
:param zk_config: dict containing information about zookeeper config.
|
|
727
|
+
Masters register themselves in zookeeper by creating ``info_`` entries.
|
|
728
|
+
We count these entries to get the number of masters.
|
|
729
|
+
"""
|
|
730
|
+
zk = KazooClient(hosts=host, read_only=True)
|
|
731
|
+
zk.start()
|
|
732
|
+
try:
|
|
733
|
+
root_entries = zk.get_children(path)
|
|
734
|
+
result = [
|
|
735
|
+
info
|
|
736
|
+
for info in root_entries
|
|
737
|
+
if info.startswith("json.info_") or info.startswith("info_")
|
|
738
|
+
]
|
|
739
|
+
return len(result)
|
|
740
|
+
finally:
|
|
741
|
+
zk.stop()
|
|
742
|
+
zk.close()
|
|
743
|
+
|
|
744
|
+
|
|
745
|
+
def get_all_slaves_for_blacklist_whitelist(
|
|
746
|
+
blacklist: DeployBlacklist, whitelist: DeployWhitelist
|
|
747
|
+
):
|
|
748
|
+
"""
|
|
749
|
+
A wrapper function to get all slaves and filter according to
|
|
750
|
+
provided blacklist and whitelist.
|
|
751
|
+
|
|
752
|
+
:param blacklist: a blacklist, used to filter mesos slaves by attribute
|
|
753
|
+
:param whitelist: a whitelist, used to filter mesos slaves by attribute
|
|
754
|
+
|
|
755
|
+
:returns: a list of mesos slave objects, filtered by those which are acceptable
|
|
756
|
+
according to the provided blacklist and whitelists.
|
|
757
|
+
"""
|
|
758
|
+
all_slaves = get_slaves()
|
|
759
|
+
return filter_mesos_slaves_by_blacklist(all_slaves, blacklist, whitelist)
|
|
760
|
+
|
|
761
|
+
|
|
762
|
+
def get_mesos_slaves_grouped_by_attribute(slaves, attribute):
|
|
763
|
+
"""Returns a dictionary of unique values and the corresponding hosts for a given Mesos attribute
|
|
764
|
+
|
|
765
|
+
:param slaves: a list of mesos slaves to group
|
|
766
|
+
:param attribute: an attribute to filter
|
|
767
|
+
:returns: a dictionary of the form {'<attribute_value>': [<list of hosts with attribute=attribute_value>]}
|
|
768
|
+
(response can contain multiple 'attribute_value)
|
|
769
|
+
"""
|
|
770
|
+
sorted_slaves = sorted(
|
|
771
|
+
slaves,
|
|
772
|
+
key=lambda slave: (
|
|
773
|
+
slave["attributes"].get(attribute) is None,
|
|
774
|
+
slave["attributes"].get(attribute),
|
|
775
|
+
),
|
|
776
|
+
)
|
|
777
|
+
return {
|
|
778
|
+
key: list(group)
|
|
779
|
+
for key, group in itertools.groupby(
|
|
780
|
+
sorted_slaves, key=lambda slave: slave["attributes"].get(attribute)
|
|
781
|
+
)
|
|
782
|
+
if key
|
|
783
|
+
}
|
|
784
|
+
|
|
785
|
+
|
|
786
|
+
# TODO: remove to_blocking, convert call sites (smartstack_tools and marathon_serviceinit) to asyncio.
|
|
787
|
+
@a_sync.to_blocking
|
|
788
|
+
async def get_slaves():
|
|
789
|
+
return (await (await get_mesos_master().fetch("/master/slaves")).json())["slaves"]
|
|
790
|
+
|
|
791
|
+
|
|
792
|
+
def filter_mesos_slaves_by_blacklist(
|
|
793
|
+
slaves, blacklist: DeployBlacklist, whitelist: DeployWhitelist
|
|
794
|
+
):
|
|
795
|
+
"""Takes an input list of slaves and filters them based on the given blacklist.
|
|
796
|
+
The blacklist is in the form of:
|
|
797
|
+
|
|
798
|
+
[["location_type", "location]]
|
|
799
|
+
|
|
800
|
+
Where the list inside is something like ["region", "uswest1-prod"]
|
|
801
|
+
|
|
802
|
+
:returns: The list of mesos slaves after the filter
|
|
803
|
+
"""
|
|
804
|
+
filtered_slaves = []
|
|
805
|
+
for slave in slaves:
|
|
806
|
+
if host_passes_blacklist(
|
|
807
|
+
slave["attributes"], blacklist
|
|
808
|
+
) and host_passes_whitelist(slave["attributes"], whitelist):
|
|
809
|
+
filtered_slaves.append(slave)
|
|
810
|
+
return filtered_slaves
|
|
811
|
+
|
|
812
|
+
|
|
813
|
+
def get_container_id_for_mesos_id(client, mesos_task_id):
|
|
814
|
+
running_containers = client.containers()
|
|
815
|
+
|
|
816
|
+
container_id = None
|
|
817
|
+
for container in running_containers:
|
|
818
|
+
info = client.inspect_container(container)
|
|
819
|
+
if info["Config"]["Env"]:
|
|
820
|
+
for env_var in info["Config"]["Env"]:
|
|
821
|
+
if ("MESOS_TASK_ID=%s" % mesos_task_id) in env_var:
|
|
822
|
+
container_id = info["Id"]
|
|
823
|
+
break
|
|
824
|
+
|
|
825
|
+
return container_id
|
|
826
|
+
|
|
827
|
+
|
|
828
|
+
def get_mesos_id_from_container(container, client):
|
|
829
|
+
mesos_id = None
|
|
830
|
+
info = client.inspect_container(container)
|
|
831
|
+
if info["Config"]["Env"]:
|
|
832
|
+
for env_var in info["Config"]["Env"]:
|
|
833
|
+
# In marathon it is like this
|
|
834
|
+
if "MESOS_TASK_ID=" in env_var:
|
|
835
|
+
mesos_id = re.match("MESOS_TASK_ID=(.*)", env_var).group(1)
|
|
836
|
+
break
|
|
837
|
+
# Chronos it is like this?
|
|
838
|
+
if "mesos_task_id=" in env_var:
|
|
839
|
+
mesos_id = re.match("mesos_task_id=(.*)", env_var).group(1)
|
|
840
|
+
break
|
|
841
|
+
return mesos_id
|
|
842
|
+
|
|
843
|
+
|
|
844
|
+
def get_mesos_network_for_net(net):
|
|
845
|
+
docker_mesos_net_mapping = {"none": "NONE", "bridge": "BRIDGE", "host": "HOST"}
|
|
846
|
+
return docker_mesos_net_mapping.get(net, net)
|
|
847
|
+
|
|
848
|
+
|
|
849
|
+
async def get_mesos_task_count_by_slave(
|
|
850
|
+
mesos_state: MesosState,
|
|
851
|
+
slaves_list: Sequence[Dict] = None,
|
|
852
|
+
pool: Optional[str] = None,
|
|
853
|
+
) -> List[Dict]:
|
|
854
|
+
"""Get counts of running tasks per mesos slave.
|
|
855
|
+
|
|
856
|
+
:param mesos_state: mesos state dict
|
|
857
|
+
:param slaves_list: a list of slave dicts to count running tasks for.
|
|
858
|
+
:param pool: pool of slaves to return (None means all)
|
|
859
|
+
:returns: list of slave dicts {'task_count': SlaveTaskCount}
|
|
860
|
+
"""
|
|
861
|
+
all_mesos_tasks = await get_all_running_tasks() # empty string = all app ids
|
|
862
|
+
slaves = {
|
|
863
|
+
slave["id"]: {"count": 0, "slave": slave}
|
|
864
|
+
for slave in mesos_state.get("slaves", [])
|
|
865
|
+
}
|
|
866
|
+
for task in all_mesos_tasks:
|
|
867
|
+
try:
|
|
868
|
+
task_slave = await task.slave()
|
|
869
|
+
if task_slave["id"] not in slaves:
|
|
870
|
+
log.debug("Slave {} not found for task".format(task_slave["id"]))
|
|
871
|
+
continue
|
|
872
|
+
else:
|
|
873
|
+
slaves[task_slave["id"]]["count"] += 1
|
|
874
|
+
task_framework = await task.framework()
|
|
875
|
+
log.debug(f"Task framework: {task_framework.name}")
|
|
876
|
+
except SlaveDoesNotExist:
|
|
877
|
+
log.debug(
|
|
878
|
+
"Tried to get mesos slaves for task {}, but none existed.".format(
|
|
879
|
+
task["id"]
|
|
880
|
+
)
|
|
881
|
+
)
|
|
882
|
+
continue
|
|
883
|
+
if slaves_list:
|
|
884
|
+
for slave in slaves_list:
|
|
885
|
+
slave["task_counts"] = SlaveTaskCount(
|
|
886
|
+
**slaves[slave["task_counts"].slave["id"]]
|
|
887
|
+
)
|
|
888
|
+
slaves_with_counts = list(slaves_list)
|
|
889
|
+
elif pool:
|
|
890
|
+
slaves_with_counts = [
|
|
891
|
+
{"task_counts": SlaveTaskCount(**slave_counts)}
|
|
892
|
+
for slave_counts in slaves.values()
|
|
893
|
+
if slave_counts["slave"]["attributes"].get("pool", "default") == pool
|
|
894
|
+
]
|
|
895
|
+
else:
|
|
896
|
+
slaves_with_counts = [
|
|
897
|
+
{"task_counts": SlaveTaskCount(**slave_counts)}
|
|
898
|
+
for slave_counts in slaves.values()
|
|
899
|
+
]
|
|
900
|
+
for slave in slaves_with_counts:
|
|
901
|
+
log.debug(
|
|
902
|
+
"Slave: {}, running {} tasks".format(
|
|
903
|
+
slave["task_counts"].slave["hostname"],
|
|
904
|
+
slave["task_counts"].count,
|
|
905
|
+
)
|
|
906
|
+
)
|
|
907
|
+
return slaves_with_counts
|
|
908
|
+
|
|
909
|
+
|
|
910
|
+
def get_count_running_tasks_on_slave(hostname: str) -> int:
|
|
911
|
+
"""Return the number of tasks running on a particular slave
|
|
912
|
+
or 0 if the slave is not found.
|
|
913
|
+
:param hostname: hostname of the slave
|
|
914
|
+
:returns: integer count of mesos tasks"""
|
|
915
|
+
mesos_state = a_sync.block(get_mesos_master().state_summary)
|
|
916
|
+
task_counts = a_sync.block(get_mesos_task_count_by_slave, mesos_state)
|
|
917
|
+
counts = [
|
|
918
|
+
slave["task_counts"].count
|
|
919
|
+
for slave in task_counts
|
|
920
|
+
if slave["task_counts"].slave["hostname"] == hostname
|
|
921
|
+
]
|
|
922
|
+
if counts:
|
|
923
|
+
return counts[0]
|
|
924
|
+
else:
|
|
925
|
+
return 0
|
|
926
|
+
|
|
927
|
+
|
|
928
|
+
def slave_pid_to_ip(slave_pid: str) -> str:
|
|
929
|
+
"""Convert slave_pid to IP
|
|
930
|
+
|
|
931
|
+
:param: slave pid e.g. slave(1)@10.40.31.172:5051
|
|
932
|
+
:returns: ip address"""
|
|
933
|
+
regex = re.compile(r".+?@([\d\.]+):\d+")
|
|
934
|
+
return regex.match(slave_pid).group(1)
|
|
935
|
+
|
|
936
|
+
|
|
937
|
+
async def list_framework_ids(active_only=False):
|
|
938
|
+
return [f.id for f in await get_mesos_master().frameworks(active_only=active_only)]
|
|
939
|
+
|
|
940
|
+
|
|
941
|
+
@a_sync.to_blocking
|
|
942
|
+
async def get_all_frameworks(active_only=False):
|
|
943
|
+
return await get_mesos_master().frameworks(active_only=active_only)
|
|
944
|
+
|
|
945
|
+
|
|
946
|
+
def terminate_framework(framework_id):
|
|
947
|
+
resp = requests.post(
|
|
948
|
+
"http://%s:%d/master/teardown" % (get_mesos_leader(), MESOS_MASTER_PORT),
|
|
949
|
+
data={"frameworkId": framework_id},
|
|
950
|
+
)
|
|
951
|
+
resp.raise_for_status()
|
|
952
|
+
|
|
953
|
+
|
|
954
|
+
async def get_tasks_from_app_id(app_id, slave_hostname=None):
|
|
955
|
+
tasks = await get_running_tasks_from_frameworks(app_id)
|
|
956
|
+
if slave_hostname:
|
|
957
|
+
tasks = [
|
|
958
|
+
task
|
|
959
|
+
for task in tasks
|
|
960
|
+
if await filter_task_by_hostname(task, slave_hostname)
|
|
961
|
+
]
|
|
962
|
+
return tasks
|
|
963
|
+
|
|
964
|
+
|
|
965
|
+
async def get_task(task_id: str, app_id: str = "") -> MesosTask:
|
|
966
|
+
tasks = await get_running_tasks_from_frameworks(app_id)
|
|
967
|
+
tasks = [task for task in tasks if filter_task_by_task_id(task, task_id)]
|
|
968
|
+
if len(tasks) < 1:
|
|
969
|
+
raise TaskNotFound(f"Couldn't find task for given id: {task_id}")
|
|
970
|
+
if len(tasks) > 1:
|
|
971
|
+
raise TooManyTasks(
|
|
972
|
+
f"Found more than one task with id: {task_id}, this should not happen!"
|
|
973
|
+
)
|
|
974
|
+
return tasks[0]
|
|
975
|
+
|
|
976
|
+
|
|
977
|
+
def filter_task_by_task_id(task: MesosTask, task_id: str) -> bool:
|
|
978
|
+
return task["id"] == task_id
|
|
979
|
+
|
|
980
|
+
|
|
981
|
+
async def filter_task_by_hostname(task, hostname):
|
|
982
|
+
return (await task.slave())["hostname"].startswith(hostname)
|
|
983
|
+
|
|
984
|
+
|
|
985
|
+
class TaskNotFound(Exception):
|
|
986
|
+
pass
|
|
987
|
+
|
|
988
|
+
|
|
989
|
+
class TooManyTasks(Exception):
|
|
990
|
+
pass
|
|
991
|
+
|
|
992
|
+
|
|
993
|
+
# TODO: async this
|
|
994
|
+
def mesos_services_running_here(
|
|
995
|
+
framework_filter, parse_service_instance_from_executor_id, hostname=None
|
|
996
|
+
):
|
|
997
|
+
"""See what paasta_native services are being run by a mesos-slave on this host.
|
|
998
|
+
|
|
999
|
+
:param framework_filter: a function that returns true if we should consider a given framework.
|
|
1000
|
+
:param parse_service_instance_from_executor_id: A function that returns a tuple of (service, instance) from the
|
|
1001
|
+
executor ID.
|
|
1002
|
+
:param hostname: Hostname to fetch mesos slave state from. See get_local_slave_state.
|
|
1003
|
+
|
|
1004
|
+
:returns: A list of triples of (service, instance, port)"""
|
|
1005
|
+
slave_state = get_local_slave_state(hostname=hostname)
|
|
1006
|
+
frameworks = [
|
|
1007
|
+
fw for fw in slave_state.get("frameworks", []) if framework_filter(fw)
|
|
1008
|
+
]
|
|
1009
|
+
executors = [
|
|
1010
|
+
ex
|
|
1011
|
+
for fw in frameworks
|
|
1012
|
+
for ex in fw.get("executors", [])
|
|
1013
|
+
if "TASK_RUNNING" in [t["state"] for t in ex.get("tasks", [])]
|
|
1014
|
+
]
|
|
1015
|
+
srv_list = []
|
|
1016
|
+
for executor in executors:
|
|
1017
|
+
try:
|
|
1018
|
+
srv_name, srv_instance = parse_service_instance_from_executor_id(
|
|
1019
|
+
executor["id"]
|
|
1020
|
+
)
|
|
1021
|
+
except ValueError:
|
|
1022
|
+
log.error(
|
|
1023
|
+
"Failed to decode paasta service instance from {}".format(
|
|
1024
|
+
executor["id"]
|
|
1025
|
+
)
|
|
1026
|
+
)
|
|
1027
|
+
continue
|
|
1028
|
+
if "ports" in executor["resources"]:
|
|
1029
|
+
srv_port = int(re.findall("[0-9]+", executor["resources"]["ports"])[0])
|
|
1030
|
+
else:
|
|
1031
|
+
srv_port = None
|
|
1032
|
+
srv_list.append((srv_name, srv_instance, srv_port))
|
|
1033
|
+
return srv_list
|
|
1034
|
+
|
|
1035
|
+
|
|
1036
|
+
def is_task_terminal(
|
|
1037
|
+
task: MesosTask,
|
|
1038
|
+
) -> bool:
|
|
1039
|
+
"""Return whether a given mesos task is terminal.
|
|
1040
|
+
|
|
1041
|
+
Terminal states are documented in
|
|
1042
|
+
http://mesos.apache.org/api/latest/java/org/apache/mesos/Protos.TaskState.html
|
|
1043
|
+
|
|
1044
|
+
:param task: the task to be inspected
|
|
1045
|
+
:returns: a boolean indicating if the task is considered to be in a terminal state
|
|
1046
|
+
"""
|
|
1047
|
+
return task["state"] in TERMINAL_STATES
|
|
1048
|
+
|
|
1049
|
+
|
|
1050
|
+
def is_mesos_available() -> bool:
|
|
1051
|
+
return Path(get_mesos_config_path()).exists()
|