paasta-tools 1.21.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- k8s_itests/__init__.py +0 -0
- k8s_itests/test_autoscaling.py +23 -0
- k8s_itests/utils.py +38 -0
- paasta_tools/__init__.py +20 -0
- paasta_tools/adhoc_tools.py +142 -0
- paasta_tools/api/__init__.py +13 -0
- paasta_tools/api/api.py +330 -0
- paasta_tools/api/api_docs/swagger.json +2323 -0
- paasta_tools/api/client.py +106 -0
- paasta_tools/api/settings.py +33 -0
- paasta_tools/api/tweens/__init__.py +6 -0
- paasta_tools/api/tweens/auth.py +125 -0
- paasta_tools/api/tweens/profiling.py +108 -0
- paasta_tools/api/tweens/request_logger.py +124 -0
- paasta_tools/api/views/__init__.py +13 -0
- paasta_tools/api/views/autoscaler.py +100 -0
- paasta_tools/api/views/exception.py +45 -0
- paasta_tools/api/views/flink.py +73 -0
- paasta_tools/api/views/instance.py +395 -0
- paasta_tools/api/views/pause_autoscaler.py +71 -0
- paasta_tools/api/views/remote_run.py +113 -0
- paasta_tools/api/views/resources.py +76 -0
- paasta_tools/api/views/service.py +35 -0
- paasta_tools/api/views/version.py +25 -0
- paasta_tools/apply_external_resources.py +79 -0
- paasta_tools/async_utils.py +109 -0
- paasta_tools/autoscaling/__init__.py +0 -0
- paasta_tools/autoscaling/autoscaling_service_lib.py +57 -0
- paasta_tools/autoscaling/forecasting.py +106 -0
- paasta_tools/autoscaling/max_all_k8s_services.py +41 -0
- paasta_tools/autoscaling/pause_service_autoscaler.py +77 -0
- paasta_tools/autoscaling/utils.py +52 -0
- paasta_tools/bounce_lib.py +184 -0
- paasta_tools/broadcast_log_to_services.py +62 -0
- paasta_tools/cassandracluster_tools.py +210 -0
- paasta_tools/check_autoscaler_max_instances.py +212 -0
- paasta_tools/check_cassandracluster_services_replication.py +35 -0
- paasta_tools/check_flink_services_health.py +203 -0
- paasta_tools/check_kubernetes_api.py +57 -0
- paasta_tools/check_kubernetes_services_replication.py +141 -0
- paasta_tools/check_oom_events.py +244 -0
- paasta_tools/check_services_replication_tools.py +324 -0
- paasta_tools/check_spark_jobs.py +234 -0
- paasta_tools/cleanup_kubernetes_cr.py +138 -0
- paasta_tools/cleanup_kubernetes_crd.py +145 -0
- paasta_tools/cleanup_kubernetes_jobs.py +344 -0
- paasta_tools/cleanup_tron_namespaces.py +96 -0
- paasta_tools/cli/__init__.py +13 -0
- paasta_tools/cli/authentication.py +85 -0
- paasta_tools/cli/cli.py +260 -0
- paasta_tools/cli/cmds/__init__.py +13 -0
- paasta_tools/cli/cmds/autoscale.py +143 -0
- paasta_tools/cli/cmds/check.py +334 -0
- paasta_tools/cli/cmds/cook_image.py +147 -0
- paasta_tools/cli/cmds/get_docker_image.py +76 -0
- paasta_tools/cli/cmds/get_image_version.py +172 -0
- paasta_tools/cli/cmds/get_latest_deployment.py +93 -0
- paasta_tools/cli/cmds/info.py +155 -0
- paasta_tools/cli/cmds/itest.py +117 -0
- paasta_tools/cli/cmds/list.py +66 -0
- paasta_tools/cli/cmds/list_clusters.py +42 -0
- paasta_tools/cli/cmds/list_deploy_queue.py +171 -0
- paasta_tools/cli/cmds/list_namespaces.py +84 -0
- paasta_tools/cli/cmds/local_run.py +1396 -0
- paasta_tools/cli/cmds/logs.py +1601 -0
- paasta_tools/cli/cmds/mark_for_deployment.py +1988 -0
- paasta_tools/cli/cmds/mesh_status.py +174 -0
- paasta_tools/cli/cmds/pause_service_autoscaler.py +107 -0
- paasta_tools/cli/cmds/push_to_registry.py +275 -0
- paasta_tools/cli/cmds/remote_run.py +252 -0
- paasta_tools/cli/cmds/rollback.py +347 -0
- paasta_tools/cli/cmds/secret.py +549 -0
- paasta_tools/cli/cmds/security_check.py +59 -0
- paasta_tools/cli/cmds/spark_run.py +1400 -0
- paasta_tools/cli/cmds/start_stop_restart.py +401 -0
- paasta_tools/cli/cmds/status.py +2302 -0
- paasta_tools/cli/cmds/validate.py +1012 -0
- paasta_tools/cli/cmds/wait_for_deployment.py +275 -0
- paasta_tools/cli/fsm/__init__.py +13 -0
- paasta_tools/cli/fsm/autosuggest.py +82 -0
- paasta_tools/cli/fsm/template/README.md +8 -0
- paasta_tools/cli/fsm/template/cookiecutter.json +7 -0
- paasta_tools/cli/fsm/template/{{cookiecutter.service}}/kubernetes-PROD.yaml +91 -0
- paasta_tools/cli/fsm/template/{{cookiecutter.service}}/monitoring.yaml +20 -0
- paasta_tools/cli/fsm/template/{{cookiecutter.service}}/service.yaml +8 -0
- paasta_tools/cli/fsm/template/{{cookiecutter.service}}/smartstack.yaml +6 -0
- paasta_tools/cli/fsm_cmd.py +121 -0
- paasta_tools/cli/paasta_tabcomplete.sh +23 -0
- paasta_tools/cli/schemas/adhoc_schema.json +199 -0
- paasta_tools/cli/schemas/autoscaling_schema.json +91 -0
- paasta_tools/cli/schemas/autotuned_defaults/cassandracluster_schema.json +37 -0
- paasta_tools/cli/schemas/autotuned_defaults/kubernetes_schema.json +89 -0
- paasta_tools/cli/schemas/deploy_schema.json +173 -0
- paasta_tools/cli/schemas/eks_schema.json +970 -0
- paasta_tools/cli/schemas/kubernetes_schema.json +970 -0
- paasta_tools/cli/schemas/rollback_schema.json +160 -0
- paasta_tools/cli/schemas/service_schema.json +25 -0
- paasta_tools/cli/schemas/smartstack_schema.json +322 -0
- paasta_tools/cli/schemas/tron_schema.json +699 -0
- paasta_tools/cli/utils.py +1118 -0
- paasta_tools/clusterman.py +21 -0
- paasta_tools/config_utils.py +385 -0
- paasta_tools/contrib/__init__.py +0 -0
- paasta_tools/contrib/bounce_log_latency_parser.py +68 -0
- paasta_tools/contrib/check_manual_oapi_changes.sh +24 -0
- paasta_tools/contrib/check_orphans.py +306 -0
- paasta_tools/contrib/create_dynamodb_table.py +35 -0
- paasta_tools/contrib/create_paasta_playground.py +105 -0
- paasta_tools/contrib/emit_allocated_cpu_metrics.py +50 -0
- paasta_tools/contrib/get_running_task_allocation.py +346 -0
- paasta_tools/contrib/habitat_fixer.py +86 -0
- paasta_tools/contrib/ide_helper.py +316 -0
- paasta_tools/contrib/is_pod_healthy_in_proxy.py +139 -0
- paasta_tools/contrib/is_pod_healthy_in_smartstack.py +50 -0
- paasta_tools/contrib/kill_bad_containers.py +109 -0
- paasta_tools/contrib/mass-deploy-tag.sh +44 -0
- paasta_tools/contrib/mock_patch_checker.py +86 -0
- paasta_tools/contrib/paasta_update_soa_memcpu.py +520 -0
- paasta_tools/contrib/render_template.py +129 -0
- paasta_tools/contrib/rightsizer_soaconfigs_update.py +348 -0
- paasta_tools/contrib/service_shard_remove.py +157 -0
- paasta_tools/contrib/service_shard_update.py +373 -0
- paasta_tools/contrib/shared_ip_check.py +77 -0
- paasta_tools/contrib/timeouts_metrics_prom.py +64 -0
- paasta_tools/delete_kubernetes_deployments.py +89 -0
- paasta_tools/deployment_utils.py +44 -0
- paasta_tools/docker_wrapper.py +234 -0
- paasta_tools/docker_wrapper_imports.py +13 -0
- paasta_tools/drain_lib.py +351 -0
- paasta_tools/dump_locally_running_services.py +71 -0
- paasta_tools/eks_tools.py +119 -0
- paasta_tools/envoy_tools.py +373 -0
- paasta_tools/firewall.py +504 -0
- paasta_tools/firewall_logging.py +154 -0
- paasta_tools/firewall_update.py +172 -0
- paasta_tools/flink_tools.py +345 -0
- paasta_tools/flinkeks_tools.py +90 -0
- paasta_tools/frameworks/__init__.py +0 -0
- paasta_tools/frameworks/adhoc_scheduler.py +71 -0
- paasta_tools/frameworks/constraints.py +87 -0
- paasta_tools/frameworks/native_scheduler.py +652 -0
- paasta_tools/frameworks/native_service_config.py +301 -0
- paasta_tools/frameworks/task_store.py +245 -0
- paasta_tools/generate_all_deployments +9 -0
- paasta_tools/generate_authenticating_services.py +94 -0
- paasta_tools/generate_deployments_for_service.py +255 -0
- paasta_tools/generate_services_file.py +114 -0
- paasta_tools/generate_services_yaml.py +30 -0
- paasta_tools/hacheck.py +76 -0
- paasta_tools/instance/__init__.py +0 -0
- paasta_tools/instance/hpa_metrics_parser.py +122 -0
- paasta_tools/instance/kubernetes.py +1362 -0
- paasta_tools/iptables.py +240 -0
- paasta_tools/kafkacluster_tools.py +143 -0
- paasta_tools/kubernetes/__init__.py +0 -0
- paasta_tools/kubernetes/application/__init__.py +0 -0
- paasta_tools/kubernetes/application/controller_wrappers.py +476 -0
- paasta_tools/kubernetes/application/tools.py +90 -0
- paasta_tools/kubernetes/bin/__init__.py +0 -0
- paasta_tools/kubernetes/bin/kubernetes_remove_evicted_pods.py +164 -0
- paasta_tools/kubernetes/bin/paasta_cleanup_remote_run_resources.py +135 -0
- paasta_tools/kubernetes/bin/paasta_cleanup_stale_nodes.py +181 -0
- paasta_tools/kubernetes/bin/paasta_secrets_sync.py +758 -0
- paasta_tools/kubernetes/remote_run.py +558 -0
- paasta_tools/kubernetes_tools.py +4679 -0
- paasta_tools/list_kubernetes_service_instances.py +128 -0
- paasta_tools/list_tron_namespaces.py +60 -0
- paasta_tools/long_running_service_tools.py +678 -0
- paasta_tools/mac_address.py +44 -0
- paasta_tools/marathon_dashboard.py +0 -0
- paasta_tools/mesos/__init__.py +0 -0
- paasta_tools/mesos/cfg.py +46 -0
- paasta_tools/mesos/cluster.py +60 -0
- paasta_tools/mesos/exceptions.py +59 -0
- paasta_tools/mesos/framework.py +77 -0
- paasta_tools/mesos/log.py +48 -0
- paasta_tools/mesos/master.py +306 -0
- paasta_tools/mesos/mesos_file.py +169 -0
- paasta_tools/mesos/parallel.py +52 -0
- paasta_tools/mesos/slave.py +115 -0
- paasta_tools/mesos/task.py +94 -0
- paasta_tools/mesos/util.py +69 -0
- paasta_tools/mesos/zookeeper.py +37 -0
- paasta_tools/mesos_maintenance.py +848 -0
- paasta_tools/mesos_tools.py +1051 -0
- paasta_tools/metrics/__init__.py +0 -0
- paasta_tools/metrics/metastatus_lib.py +1110 -0
- paasta_tools/metrics/metrics_lib.py +217 -0
- paasta_tools/monitoring/__init__.py +13 -0
- paasta_tools/monitoring/check_k8s_api_performance.py +110 -0
- paasta_tools/monitoring_tools.py +652 -0
- paasta_tools/monkrelaycluster_tools.py +146 -0
- paasta_tools/nrtsearchservice_tools.py +143 -0
- paasta_tools/nrtsearchserviceeks_tools.py +68 -0
- paasta_tools/oom_logger.py +321 -0
- paasta_tools/paasta_deploy_tron_jobs +3 -0
- paasta_tools/paasta_execute_docker_command.py +123 -0
- paasta_tools/paasta_native_serviceinit.py +21 -0
- paasta_tools/paasta_service_config_loader.py +201 -0
- paasta_tools/paastaapi/__init__.py +29 -0
- paasta_tools/paastaapi/api/__init__.py +3 -0
- paasta_tools/paastaapi/api/autoscaler_api.py +302 -0
- paasta_tools/paastaapi/api/default_api.py +569 -0
- paasta_tools/paastaapi/api/remote_run_api.py +604 -0
- paasta_tools/paastaapi/api/resources_api.py +157 -0
- paasta_tools/paastaapi/api/service_api.py +1736 -0
- paasta_tools/paastaapi/api_client.py +818 -0
- paasta_tools/paastaapi/apis/__init__.py +22 -0
- paasta_tools/paastaapi/configuration.py +455 -0
- paasta_tools/paastaapi/exceptions.py +137 -0
- paasta_tools/paastaapi/model/__init__.py +5 -0
- paasta_tools/paastaapi/model/adhoc_launch_history.py +176 -0
- paasta_tools/paastaapi/model/autoscaler_count_msg.py +176 -0
- paasta_tools/paastaapi/model/deploy_queue.py +178 -0
- paasta_tools/paastaapi/model/deploy_queue_service_instance.py +194 -0
- paasta_tools/paastaapi/model/envoy_backend.py +185 -0
- paasta_tools/paastaapi/model/envoy_location.py +184 -0
- paasta_tools/paastaapi/model/envoy_status.py +181 -0
- paasta_tools/paastaapi/model/flink_cluster_overview.py +188 -0
- paasta_tools/paastaapi/model/flink_config.py +173 -0
- paasta_tools/paastaapi/model/flink_job.py +186 -0
- paasta_tools/paastaapi/model/flink_job_details.py +192 -0
- paasta_tools/paastaapi/model/flink_jobs.py +175 -0
- paasta_tools/paastaapi/model/float_and_error.py +173 -0
- paasta_tools/paastaapi/model/hpa_metric.py +176 -0
- paasta_tools/paastaapi/model/inline_object.py +170 -0
- paasta_tools/paastaapi/model/inline_response200.py +170 -0
- paasta_tools/paastaapi/model/inline_response2001.py +170 -0
- paasta_tools/paastaapi/model/instance_bounce_status.py +200 -0
- paasta_tools/paastaapi/model/instance_mesh_status.py +186 -0
- paasta_tools/paastaapi/model/instance_status.py +220 -0
- paasta_tools/paastaapi/model/instance_status_adhoc.py +187 -0
- paasta_tools/paastaapi/model/instance_status_cassandracluster.py +173 -0
- paasta_tools/paastaapi/model/instance_status_flink.py +173 -0
- paasta_tools/paastaapi/model/instance_status_kafkacluster.py +173 -0
- paasta_tools/paastaapi/model/instance_status_kubernetes.py +263 -0
- paasta_tools/paastaapi/model/instance_status_kubernetes_autoscaling_status.py +187 -0
- paasta_tools/paastaapi/model/instance_status_kubernetes_v2.py +197 -0
- paasta_tools/paastaapi/model/instance_status_tron.py +204 -0
- paasta_tools/paastaapi/model/instance_tasks.py +182 -0
- paasta_tools/paastaapi/model/integer_and_error.py +173 -0
- paasta_tools/paastaapi/model/kubernetes_container.py +178 -0
- paasta_tools/paastaapi/model/kubernetes_container_v2.py +219 -0
- paasta_tools/paastaapi/model/kubernetes_healthcheck.py +176 -0
- paasta_tools/paastaapi/model/kubernetes_pod.py +201 -0
- paasta_tools/paastaapi/model/kubernetes_pod_event.py +176 -0
- paasta_tools/paastaapi/model/kubernetes_pod_v2.py +213 -0
- paasta_tools/paastaapi/model/kubernetes_replica_set.py +185 -0
- paasta_tools/paastaapi/model/kubernetes_version.py +202 -0
- paasta_tools/paastaapi/model/remote_run_outcome.py +189 -0
- paasta_tools/paastaapi/model/remote_run_start.py +185 -0
- paasta_tools/paastaapi/model/remote_run_stop.py +176 -0
- paasta_tools/paastaapi/model/remote_run_token.py +173 -0
- paasta_tools/paastaapi/model/resource.py +187 -0
- paasta_tools/paastaapi/model/resource_item.py +187 -0
- paasta_tools/paastaapi/model/resource_value.py +176 -0
- paasta_tools/paastaapi/model/smartstack_backend.py +191 -0
- paasta_tools/paastaapi/model/smartstack_location.py +181 -0
- paasta_tools/paastaapi/model/smartstack_status.py +181 -0
- paasta_tools/paastaapi/model/task_tail_lines.py +176 -0
- paasta_tools/paastaapi/model_utils.py +1879 -0
- paasta_tools/paastaapi/models/__init__.py +62 -0
- paasta_tools/paastaapi/rest.py +287 -0
- paasta_tools/prune_completed_pods.py +220 -0
- paasta_tools/puppet_service_tools.py +59 -0
- paasta_tools/py.typed +1 -0
- paasta_tools/remote_git.py +127 -0
- paasta_tools/run-paasta-api-in-dev-mode.py +57 -0
- paasta_tools/run-paasta-api-playground.py +51 -0
- paasta_tools/secret_providers/__init__.py +66 -0
- paasta_tools/secret_providers/vault.py +214 -0
- paasta_tools/secret_tools.py +277 -0
- paasta_tools/setup_istio_mesh.py +353 -0
- paasta_tools/setup_kubernetes_cr.py +412 -0
- paasta_tools/setup_kubernetes_crd.py +138 -0
- paasta_tools/setup_kubernetes_internal_crd.py +154 -0
- paasta_tools/setup_kubernetes_job.py +353 -0
- paasta_tools/setup_prometheus_adapter_config.py +1028 -0
- paasta_tools/setup_tron_namespace.py +248 -0
- paasta_tools/slack.py +75 -0
- paasta_tools/smartstack_tools.py +676 -0
- paasta_tools/spark_tools.py +283 -0
- paasta_tools/synapse_srv_namespaces_fact.py +42 -0
- paasta_tools/tron/__init__.py +0 -0
- paasta_tools/tron/client.py +158 -0
- paasta_tools/tron/tron_command_context.py +194 -0
- paasta_tools/tron/tron_timeutils.py +101 -0
- paasta_tools/tron_tools.py +1448 -0
- paasta_tools/utils.py +4307 -0
- paasta_tools/yaml_tools.py +44 -0
- paasta_tools-1.21.3.data/scripts/apply_external_resources.py +79 -0
- paasta_tools-1.21.3.data/scripts/bounce_log_latency_parser.py +68 -0
- paasta_tools-1.21.3.data/scripts/check_autoscaler_max_instances.py +212 -0
- paasta_tools-1.21.3.data/scripts/check_cassandracluster_services_replication.py +35 -0
- paasta_tools-1.21.3.data/scripts/check_flink_services_health.py +203 -0
- paasta_tools-1.21.3.data/scripts/check_kubernetes_api.py +57 -0
- paasta_tools-1.21.3.data/scripts/check_kubernetes_services_replication.py +141 -0
- paasta_tools-1.21.3.data/scripts/check_manual_oapi_changes.sh +24 -0
- paasta_tools-1.21.3.data/scripts/check_oom_events.py +244 -0
- paasta_tools-1.21.3.data/scripts/check_orphans.py +306 -0
- paasta_tools-1.21.3.data/scripts/check_spark_jobs.py +234 -0
- paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_cr.py +138 -0
- paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_crd.py +145 -0
- paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_jobs.py +344 -0
- paasta_tools-1.21.3.data/scripts/create_dynamodb_table.py +35 -0
- paasta_tools-1.21.3.data/scripts/create_paasta_playground.py +105 -0
- paasta_tools-1.21.3.data/scripts/delete_kubernetes_deployments.py +89 -0
- paasta_tools-1.21.3.data/scripts/emit_allocated_cpu_metrics.py +50 -0
- paasta_tools-1.21.3.data/scripts/generate_all_deployments +9 -0
- paasta_tools-1.21.3.data/scripts/generate_authenticating_services.py +94 -0
- paasta_tools-1.21.3.data/scripts/generate_deployments_for_service.py +255 -0
- paasta_tools-1.21.3.data/scripts/generate_services_file.py +114 -0
- paasta_tools-1.21.3.data/scripts/generate_services_yaml.py +30 -0
- paasta_tools-1.21.3.data/scripts/get_running_task_allocation.py +346 -0
- paasta_tools-1.21.3.data/scripts/habitat_fixer.py +86 -0
- paasta_tools-1.21.3.data/scripts/ide_helper.py +316 -0
- paasta_tools-1.21.3.data/scripts/is_pod_healthy_in_proxy.py +139 -0
- paasta_tools-1.21.3.data/scripts/is_pod_healthy_in_smartstack.py +50 -0
- paasta_tools-1.21.3.data/scripts/kill_bad_containers.py +109 -0
- paasta_tools-1.21.3.data/scripts/kubernetes_remove_evicted_pods.py +164 -0
- paasta_tools-1.21.3.data/scripts/mass-deploy-tag.sh +44 -0
- paasta_tools-1.21.3.data/scripts/mock_patch_checker.py +86 -0
- paasta_tools-1.21.3.data/scripts/paasta_cleanup_remote_run_resources.py +135 -0
- paasta_tools-1.21.3.data/scripts/paasta_cleanup_stale_nodes.py +181 -0
- paasta_tools-1.21.3.data/scripts/paasta_deploy_tron_jobs +3 -0
- paasta_tools-1.21.3.data/scripts/paasta_execute_docker_command.py +123 -0
- paasta_tools-1.21.3.data/scripts/paasta_secrets_sync.py +758 -0
- paasta_tools-1.21.3.data/scripts/paasta_tabcomplete.sh +23 -0
- paasta_tools-1.21.3.data/scripts/paasta_update_soa_memcpu.py +520 -0
- paasta_tools-1.21.3.data/scripts/render_template.py +129 -0
- paasta_tools-1.21.3.data/scripts/rightsizer_soaconfigs_update.py +348 -0
- paasta_tools-1.21.3.data/scripts/service_shard_remove.py +157 -0
- paasta_tools-1.21.3.data/scripts/service_shard_update.py +373 -0
- paasta_tools-1.21.3.data/scripts/setup_istio_mesh.py +353 -0
- paasta_tools-1.21.3.data/scripts/setup_kubernetes_cr.py +412 -0
- paasta_tools-1.21.3.data/scripts/setup_kubernetes_crd.py +138 -0
- paasta_tools-1.21.3.data/scripts/setup_kubernetes_internal_crd.py +154 -0
- paasta_tools-1.21.3.data/scripts/setup_kubernetes_job.py +353 -0
- paasta_tools-1.21.3.data/scripts/setup_prometheus_adapter_config.py +1028 -0
- paasta_tools-1.21.3.data/scripts/shared_ip_check.py +77 -0
- paasta_tools-1.21.3.data/scripts/synapse_srv_namespaces_fact.py +42 -0
- paasta_tools-1.21.3.data/scripts/timeouts_metrics_prom.py +64 -0
- paasta_tools-1.21.3.dist-info/LICENSE +201 -0
- paasta_tools-1.21.3.dist-info/METADATA +74 -0
- paasta_tools-1.21.3.dist-info/RECORD +348 -0
- paasta_tools-1.21.3.dist-info/WHEEL +5 -0
- paasta_tools-1.21.3.dist-info/entry_points.txt +20 -0
- paasta_tools-1.21.3.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,2302 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# Copyright 2015-2016 Yelp Inc.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
import asyncio
|
|
16
|
+
import concurrent.futures
|
|
17
|
+
import difflib
|
|
18
|
+
import shutil
|
|
19
|
+
import sys
|
|
20
|
+
from collections import Counter
|
|
21
|
+
from collections import defaultdict
|
|
22
|
+
from datetime import datetime
|
|
23
|
+
from datetime import timedelta
|
|
24
|
+
from datetime import timezone
|
|
25
|
+
from enum import Enum
|
|
26
|
+
from itertools import groupby
|
|
27
|
+
from threading import Lock
|
|
28
|
+
from typing import Any
|
|
29
|
+
from typing import Callable
|
|
30
|
+
from typing import Collection
|
|
31
|
+
from typing import DefaultDict
|
|
32
|
+
from typing import Dict
|
|
33
|
+
from typing import Iterable
|
|
34
|
+
from typing import List
|
|
35
|
+
from typing import Mapping
|
|
36
|
+
from typing import Optional
|
|
37
|
+
from typing import Sequence
|
|
38
|
+
from typing import Tuple
|
|
39
|
+
from typing import Type
|
|
40
|
+
from typing import Union
|
|
41
|
+
|
|
42
|
+
import a_sync
|
|
43
|
+
import humanize
|
|
44
|
+
from mypy_extensions import Arg
|
|
45
|
+
from service_configuration_lib import read_deploy
|
|
46
|
+
|
|
47
|
+
from paasta_tools import flink_tools
|
|
48
|
+
from paasta_tools import kubernetes_tools
|
|
49
|
+
from paasta_tools.adhoc_tools import AdhocJobConfig
|
|
50
|
+
from paasta_tools.api.client import get_paasta_oapi_client
|
|
51
|
+
from paasta_tools.api.client import PaastaOApiClient
|
|
52
|
+
from paasta_tools.cassandracluster_tools import CassandraClusterDeploymentConfig
|
|
53
|
+
from paasta_tools.cli.utils import figure_out_service_name
|
|
54
|
+
from paasta_tools.cli.utils import get_instance_configs_for_service
|
|
55
|
+
from paasta_tools.cli.utils import get_paasta_oapi_api_clustername
|
|
56
|
+
from paasta_tools.cli.utils import lazy_choices_completer
|
|
57
|
+
from paasta_tools.cli.utils import list_deploy_groups
|
|
58
|
+
from paasta_tools.cli.utils import NoSuchService
|
|
59
|
+
from paasta_tools.cli.utils import validate_service_name
|
|
60
|
+
from paasta_tools.cli.utils import verify_instances
|
|
61
|
+
from paasta_tools.eks_tools import EksDeploymentConfig
|
|
62
|
+
from paasta_tools.flink_tools import FlinkDeploymentConfig
|
|
63
|
+
from paasta_tools.flink_tools import get_flink_config_from_paasta_api_client
|
|
64
|
+
from paasta_tools.flink_tools import get_flink_jobs_from_paasta_api_client
|
|
65
|
+
from paasta_tools.flink_tools import get_flink_overview_from_paasta_api_client
|
|
66
|
+
from paasta_tools.flinkeks_tools import FlinkEksDeploymentConfig
|
|
67
|
+
from paasta_tools.kafkacluster_tools import KafkaClusterDeploymentConfig
|
|
68
|
+
from paasta_tools.kubernetes_tools import format_pod_event_messages
|
|
69
|
+
from paasta_tools.kubernetes_tools import format_tail_lines_for_kubernetes_pod
|
|
70
|
+
from paasta_tools.kubernetes_tools import KubernetesDeploymentConfig
|
|
71
|
+
from paasta_tools.kubernetes_tools import KubernetesDeployStatus
|
|
72
|
+
from paasta_tools.kubernetes_tools import paasta_prefixed
|
|
73
|
+
from paasta_tools.monitoring_tools import get_team
|
|
74
|
+
from paasta_tools.monitoring_tools import list_teams
|
|
75
|
+
from paasta_tools.paasta_service_config_loader import PaastaServiceConfigLoader
|
|
76
|
+
from paasta_tools.paastaapi.model.flink_job_details import FlinkJobDetails
|
|
77
|
+
from paasta_tools.paastaapi.model.flink_jobs import FlinkJobs
|
|
78
|
+
from paasta_tools.paastaapi.models import InstanceStatusKubernetesV2
|
|
79
|
+
from paasta_tools.paastaapi.models import KubernetesContainerV2
|
|
80
|
+
from paasta_tools.paastaapi.models import KubernetesPodV2
|
|
81
|
+
from paasta_tools.paastaapi.models import KubernetesVersion
|
|
82
|
+
from paasta_tools.tron_tools import TronActionConfig
|
|
83
|
+
from paasta_tools.utils import compose_job_id
|
|
84
|
+
from paasta_tools.utils import DEFAULT_SOA_DIR
|
|
85
|
+
from paasta_tools.utils import DeploymentVersion
|
|
86
|
+
from paasta_tools.utils import format_table
|
|
87
|
+
from paasta_tools.utils import get_deployment_version_from_dockerurl
|
|
88
|
+
from paasta_tools.utils import get_soa_cluster_deploy_files
|
|
89
|
+
from paasta_tools.utils import InstanceConfig
|
|
90
|
+
from paasta_tools.utils import is_under_replicated
|
|
91
|
+
from paasta_tools.utils import list_clusters
|
|
92
|
+
from paasta_tools.utils import list_services
|
|
93
|
+
from paasta_tools.utils import load_system_paasta_config
|
|
94
|
+
from paasta_tools.utils import PaastaColors
|
|
95
|
+
from paasta_tools.utils import remove_ansi_escape_sequences
|
|
96
|
+
from paasta_tools.utils import SystemPaastaConfig
|
|
97
|
+
|
|
98
|
+
FLINK_STATUS_MAX_THREAD_POOL_WORKERS = 50
|
|
99
|
+
ALLOWED_INSTANCE_CONFIG: Sequence[Type[InstanceConfig]] = [
|
|
100
|
+
FlinkDeploymentConfig,
|
|
101
|
+
FlinkEksDeploymentConfig,
|
|
102
|
+
CassandraClusterDeploymentConfig,
|
|
103
|
+
KafkaClusterDeploymentConfig,
|
|
104
|
+
KubernetesDeploymentConfig,
|
|
105
|
+
EksDeploymentConfig,
|
|
106
|
+
AdhocJobConfig,
|
|
107
|
+
TronActionConfig,
|
|
108
|
+
]
|
|
109
|
+
|
|
110
|
+
# Tron instances are not included in deployments, so skip these InstanceConfigs
|
|
111
|
+
DEPLOYMENT_INSTANCE_CONFIG: Sequence[Type[InstanceConfig]] = [
|
|
112
|
+
FlinkDeploymentConfig,
|
|
113
|
+
FlinkEksDeploymentConfig,
|
|
114
|
+
CassandraClusterDeploymentConfig,
|
|
115
|
+
KafkaClusterDeploymentConfig,
|
|
116
|
+
KubernetesDeploymentConfig,
|
|
117
|
+
EksDeploymentConfig,
|
|
118
|
+
AdhocJobConfig,
|
|
119
|
+
]
|
|
120
|
+
|
|
121
|
+
InstanceStatusWriter = Callable[
|
|
122
|
+
[
|
|
123
|
+
Arg(str, "cluster"),
|
|
124
|
+
Arg(str, "service"),
|
|
125
|
+
Arg(str, "instance"),
|
|
126
|
+
Arg(List[str], "output"),
|
|
127
|
+
Arg(Any),
|
|
128
|
+
Arg(int, "verbose"),
|
|
129
|
+
],
|
|
130
|
+
int,
|
|
131
|
+
]
|
|
132
|
+
|
|
133
|
+
EKS_DEPLOYMENT_CONFIGS = [
|
|
134
|
+
EksDeploymentConfig,
|
|
135
|
+
FlinkEksDeploymentConfig,
|
|
136
|
+
]
|
|
137
|
+
FLINK_DEPLOYMENT_CONFIGS = [FlinkDeploymentConfig, FlinkEksDeploymentConfig]
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def add_subparser(
|
|
141
|
+
subparsers,
|
|
142
|
+
) -> None:
|
|
143
|
+
status_parser = subparsers.add_parser(
|
|
144
|
+
"status",
|
|
145
|
+
help="Display the status of a PaaSTA service.",
|
|
146
|
+
description=(
|
|
147
|
+
"'paasta status' queries the PaaSTA API in order to report "
|
|
148
|
+
"on the overall health of a service."
|
|
149
|
+
),
|
|
150
|
+
)
|
|
151
|
+
status_parser.add_argument(
|
|
152
|
+
"-v",
|
|
153
|
+
"--verbose",
|
|
154
|
+
action="count",
|
|
155
|
+
dest="verbose",
|
|
156
|
+
default=0,
|
|
157
|
+
help="Print out more output regarding the state of the service. "
|
|
158
|
+
"A second -v will also print the stdout/stderr tail.",
|
|
159
|
+
)
|
|
160
|
+
status_parser.add_argument(
|
|
161
|
+
"-d",
|
|
162
|
+
"--soa-dir",
|
|
163
|
+
dest="soa_dir",
|
|
164
|
+
metavar="SOA_DIR",
|
|
165
|
+
default=DEFAULT_SOA_DIR,
|
|
166
|
+
help="define a different soa config directory",
|
|
167
|
+
)
|
|
168
|
+
status_parser.add_argument(
|
|
169
|
+
"-A",
|
|
170
|
+
"--all-namespaces",
|
|
171
|
+
dest="all_namespaces",
|
|
172
|
+
action="store_true",
|
|
173
|
+
default=False,
|
|
174
|
+
help="Search all PaaSTA-managed namespaces for possible running versions (Will search only your currently-configured namespace by default). Useful if you are moving your instance(s) to a new namespace",
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
version = status_parser.add_mutually_exclusive_group()
|
|
178
|
+
|
|
179
|
+
version.add_argument(
|
|
180
|
+
"--new",
|
|
181
|
+
dest="new",
|
|
182
|
+
action="store_true",
|
|
183
|
+
default=False,
|
|
184
|
+
help="Use experimental new version of paasta status for services",
|
|
185
|
+
)
|
|
186
|
+
version.add_argument(
|
|
187
|
+
"--old",
|
|
188
|
+
dest="old",
|
|
189
|
+
default=False,
|
|
190
|
+
action="store_true",
|
|
191
|
+
help="Use the old version of paasta status for services",
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
add_instance_filter_arguments(status_parser)
|
|
195
|
+
status_parser.set_defaults(command=paasta_status)
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def add_instance_filter_arguments(status_parser, verb: str = "inspect") -> None:
|
|
199
|
+
status_parser.add_argument(
|
|
200
|
+
"-s", "--service", help=f"The name of the service you wish to {verb}"
|
|
201
|
+
).completer = lazy_choices_completer(list_services)
|
|
202
|
+
status_parser.add_argument(
|
|
203
|
+
"-c",
|
|
204
|
+
"--clusters",
|
|
205
|
+
help=f"A comma-separated list of clusters to {verb}. By default, will {verb} all clusters.\n"
|
|
206
|
+
f"For example: --clusters pnw-prod,nova-prod",
|
|
207
|
+
).completer = lazy_choices_completer(list_clusters)
|
|
208
|
+
status_parser.add_argument(
|
|
209
|
+
"-i",
|
|
210
|
+
"--instances",
|
|
211
|
+
help=f"A comma-separated list of instances to {verb}. By default, will {verb} all instances.\n"
|
|
212
|
+
f"For example: --instances canary,main",
|
|
213
|
+
) # No completer because we need to know service first and we can't until some other stuff has happened
|
|
214
|
+
status_parser.add_argument(
|
|
215
|
+
"-l",
|
|
216
|
+
"--deploy-group",
|
|
217
|
+
help=(
|
|
218
|
+
f"Name of the deploy group which you want to {verb}. "
|
|
219
|
+
f"If specified together with --instances and/or --clusters, will {verb} common instances only."
|
|
220
|
+
),
|
|
221
|
+
).completer = lazy_choices_completer(list_deploy_groups)
|
|
222
|
+
status_parser.add_argument(
|
|
223
|
+
"-o",
|
|
224
|
+
"--owner",
|
|
225
|
+
help=f"Only {verb} instances with this owner specified in soa-configs.",
|
|
226
|
+
).completer = lazy_choices_completer(list_teams)
|
|
227
|
+
status_parser.add_argument(
|
|
228
|
+
"-r", "--registration", help=f"Only {verb} instances with this registration."
|
|
229
|
+
)
|
|
230
|
+
status_parser.add_argument(
|
|
231
|
+
"service_instance",
|
|
232
|
+
nargs="?",
|
|
233
|
+
help=f'A shorthand notation to {verb} instances. For example: "paasta status example_happyhour.canary,main"',
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def missing_deployments_message(
|
|
238
|
+
service: str,
|
|
239
|
+
) -> str:
|
|
240
|
+
message = (
|
|
241
|
+
f"{service} has no deployments in deployments.json yet.\n " "Has Jenkins run?"
|
|
242
|
+
)
|
|
243
|
+
return message
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def get_deploy_info(
|
|
247
|
+
deploy_file_path: str,
|
|
248
|
+
) -> Mapping:
|
|
249
|
+
deploy_info = read_deploy(deploy_file_path)
|
|
250
|
+
if not deploy_info:
|
|
251
|
+
print("Error encountered with %s" % deploy_file_path)
|
|
252
|
+
|
|
253
|
+
exit(1)
|
|
254
|
+
return deploy_info
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def get_planned_deployments(service: str, soa_dir: str) -> Iterable[str]:
|
|
258
|
+
for cluster, cluster_deploy_file in get_soa_cluster_deploy_files(
|
|
259
|
+
service=service, soa_dir=soa_dir
|
|
260
|
+
):
|
|
261
|
+
for instance in get_deploy_info(cluster_deploy_file):
|
|
262
|
+
yield f"{cluster}.{instance}"
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def get_actual_deployments(
|
|
266
|
+
service: str, soa_dir: str
|
|
267
|
+
) -> Mapping[str, DeploymentVersion]:
|
|
268
|
+
"""Given a service, return a dict of instances->DeploymentVersions"""
|
|
269
|
+
config_loader = PaastaServiceConfigLoader(service=service, soa_dir=soa_dir)
|
|
270
|
+
clusters = list_clusters(service=service, soa_dir=soa_dir)
|
|
271
|
+
actual_deployments = {}
|
|
272
|
+
for cluster in clusters:
|
|
273
|
+
for instance_type in DEPLOYMENT_INSTANCE_CONFIG:
|
|
274
|
+
for instance_config in config_loader.instance_configs(
|
|
275
|
+
cluster=cluster, instance_type_class=instance_type
|
|
276
|
+
):
|
|
277
|
+
namespace = f"{cluster}.{instance_config.instance}"
|
|
278
|
+
actual_deployments[namespace] = get_deployment_version_from_dockerurl(
|
|
279
|
+
instance_config.get_docker_image()
|
|
280
|
+
)
|
|
281
|
+
if not actual_deployments:
|
|
282
|
+
print(
|
|
283
|
+
f"Warning: it looks like {service} has not been deployed anywhere yet!",
|
|
284
|
+
file=sys.stderr,
|
|
285
|
+
)
|
|
286
|
+
return actual_deployments
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def paasta_status_on_api_endpoint(
|
|
290
|
+
cluster: str,
|
|
291
|
+
service: str,
|
|
292
|
+
instance: str,
|
|
293
|
+
system_paasta_config: SystemPaastaConfig,
|
|
294
|
+
lock: Lock,
|
|
295
|
+
verbose: int,
|
|
296
|
+
new: bool = False,
|
|
297
|
+
is_eks: bool = False,
|
|
298
|
+
all_namespaces: bool = False,
|
|
299
|
+
) -> int:
|
|
300
|
+
output = [
|
|
301
|
+
"",
|
|
302
|
+
f"\n{service}.{PaastaColors.cyan(instance)} in {cluster}{' (EKS)' if is_eks else ''}",
|
|
303
|
+
]
|
|
304
|
+
client = get_paasta_oapi_client(
|
|
305
|
+
cluster=get_paasta_oapi_api_clustername(cluster=cluster, is_eks=is_eks),
|
|
306
|
+
system_paasta_config=system_paasta_config,
|
|
307
|
+
)
|
|
308
|
+
if not client:
|
|
309
|
+
print("Cannot get a paasta-api client")
|
|
310
|
+
exit(1)
|
|
311
|
+
try:
|
|
312
|
+
status = client.service.status_instance(
|
|
313
|
+
service=service,
|
|
314
|
+
instance=instance,
|
|
315
|
+
verbose=verbose,
|
|
316
|
+
new=new,
|
|
317
|
+
all_namespaces=all_namespaces,
|
|
318
|
+
)
|
|
319
|
+
except client.api_error as exc:
|
|
320
|
+
output.append(PaastaColors.red(exc.reason))
|
|
321
|
+
return exc.status
|
|
322
|
+
except (client.connection_error, client.timeout_error) as exc:
|
|
323
|
+
output.append(
|
|
324
|
+
PaastaColors.red(f"Could not connect to API: {exc.__class__.__name__}")
|
|
325
|
+
)
|
|
326
|
+
return 1
|
|
327
|
+
except Exception as e:
|
|
328
|
+
output.append(PaastaColors.red(f"Exception when talking to the API:"))
|
|
329
|
+
output.append(str(e))
|
|
330
|
+
return 1
|
|
331
|
+
|
|
332
|
+
if status.version and status.version != "":
|
|
333
|
+
output.append(f" Version: {status.version} (desired)")
|
|
334
|
+
# TODO: Remove this when all clusters are returning status.version
|
|
335
|
+
elif status.git_sha != "":
|
|
336
|
+
output.append(f" Git sha: {status.git_sha} (desired)")
|
|
337
|
+
instance_types = find_instance_types(status)
|
|
338
|
+
if not instance_types:
|
|
339
|
+
output.append(
|
|
340
|
+
PaastaColors.red(
|
|
341
|
+
f"{instance} is not currently supported by `paasta status` - "
|
|
342
|
+
f"unable to find status metadata in API response."
|
|
343
|
+
)
|
|
344
|
+
)
|
|
345
|
+
return 0
|
|
346
|
+
|
|
347
|
+
ret_code = 0
|
|
348
|
+
for instance_type in instance_types:
|
|
349
|
+
# check the actual status value and call the corresponding status writer
|
|
350
|
+
service_status_value = getattr(status, instance_type)
|
|
351
|
+
writer_callable = INSTANCE_TYPE_WRITERS.get(instance_type)
|
|
352
|
+
ret = writer_callable(
|
|
353
|
+
cluster, service, instance, output, service_status_value, verbose
|
|
354
|
+
)
|
|
355
|
+
if ret != 0:
|
|
356
|
+
output.append(
|
|
357
|
+
f"Status writer failed for {instance_type} with return value {ret}"
|
|
358
|
+
)
|
|
359
|
+
ret_code = ret
|
|
360
|
+
|
|
361
|
+
with lock:
|
|
362
|
+
print("\n".join(output), flush=True)
|
|
363
|
+
|
|
364
|
+
return ret_code
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
def find_instance_types(status: Any) -> List[str]:
|
|
368
|
+
"""
|
|
369
|
+
find_instance_types finds the instance types from the status api response.
|
|
370
|
+
It iterates over all instance type registered in `INSTANCE_TYPE_WRITERS`.
|
|
371
|
+
|
|
372
|
+
:param status: paasta api status object
|
|
373
|
+
:return: the list of matching instance types
|
|
374
|
+
"""
|
|
375
|
+
types: List[str] = []
|
|
376
|
+
for instance_type in INSTANCE_TYPE_WRITERS.keys():
|
|
377
|
+
if status.get(instance_type) is not None:
|
|
378
|
+
types.append(instance_type)
|
|
379
|
+
return types
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
def print_adhoc_status(
|
|
383
|
+
cluster: str,
|
|
384
|
+
service: str,
|
|
385
|
+
instance: str,
|
|
386
|
+
output: List[str],
|
|
387
|
+
status,
|
|
388
|
+
verbose: int = 0,
|
|
389
|
+
) -> int:
|
|
390
|
+
output.append(f" Job: {instance}")
|
|
391
|
+
for run in status.value:
|
|
392
|
+
output.append(
|
|
393
|
+
"Launch time: %s, run id: %s, framework id: %s"
|
|
394
|
+
% (run["launch_time"], run["run_id"], run["framework_id"])
|
|
395
|
+
)
|
|
396
|
+
if status.value:
|
|
397
|
+
output.append(
|
|
398
|
+
(
|
|
399
|
+
" Use `paasta remote-run stop -s {} -c {} -i {} [-R <run id> "
|
|
400
|
+
" | -F <framework id>]` to stop."
|
|
401
|
+
).format(service, cluster, instance)
|
|
402
|
+
)
|
|
403
|
+
else:
|
|
404
|
+
output.append(" Nothing found.")
|
|
405
|
+
|
|
406
|
+
return 0
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
def create_autoscaling_info_table(autoscaling_info):
|
|
410
|
+
output = ["Autoscaling Info:"]
|
|
411
|
+
|
|
412
|
+
if autoscaling_info.current_utilization is not None:
|
|
413
|
+
current_utilization = "{:.1f}%".format(
|
|
414
|
+
autoscaling_info.current_utilization * 100
|
|
415
|
+
)
|
|
416
|
+
else:
|
|
417
|
+
current_utilization = "Exception"
|
|
418
|
+
|
|
419
|
+
target_instances = autoscaling_info.target_instances
|
|
420
|
+
if target_instances is None:
|
|
421
|
+
target_instances = "Exception"
|
|
422
|
+
|
|
423
|
+
headers = [
|
|
424
|
+
"Current instances",
|
|
425
|
+
"Max instances",
|
|
426
|
+
"Min instances",
|
|
427
|
+
"Current utilization",
|
|
428
|
+
"Target instances",
|
|
429
|
+
]
|
|
430
|
+
row = [
|
|
431
|
+
autoscaling_info.current_instances,
|
|
432
|
+
autoscaling_info.max_instances,
|
|
433
|
+
autoscaling_info.min_instances,
|
|
434
|
+
current_utilization,
|
|
435
|
+
target_instances,
|
|
436
|
+
]
|
|
437
|
+
row = [str(e) for e in row]
|
|
438
|
+
table = [f" {line}" for line in format_table([headers, row])]
|
|
439
|
+
output.extend(table)
|
|
440
|
+
return output
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
def format_kubernetes_pod_table(pods, verbose: int):
|
|
444
|
+
rows: List[Union[tuple, str]] = [
|
|
445
|
+
("Pod ID", "Host deployed to", "Deployed at what localtime", "Health")
|
|
446
|
+
]
|
|
447
|
+
for pod in pods:
|
|
448
|
+
local_deployed_datetime = datetime.fromtimestamp(pod.deployed_timestamp)
|
|
449
|
+
hostname = f"{pod.host}" if pod.host is not None else PaastaColors.grey("N/A")
|
|
450
|
+
phase = pod.phase
|
|
451
|
+
reason = pod.reason
|
|
452
|
+
if phase is None or phase == "Pending":
|
|
453
|
+
health_check_status = PaastaColors.grey("N/A")
|
|
454
|
+
elif phase == "Running":
|
|
455
|
+
health_check_status = PaastaColors.green("Healthy")
|
|
456
|
+
if not pod.ready:
|
|
457
|
+
health_check_status = PaastaColors.red("Unhealthy")
|
|
458
|
+
elif phase == "Failed" and reason == "Evicted":
|
|
459
|
+
health_check_status = PaastaColors.red("Evicted")
|
|
460
|
+
else:
|
|
461
|
+
health_check_status = PaastaColors.red("Unhealthy")
|
|
462
|
+
rows.append(
|
|
463
|
+
(
|
|
464
|
+
pod.name,
|
|
465
|
+
hostname,
|
|
466
|
+
"{} ({})".format(
|
|
467
|
+
local_deployed_datetime.strftime("%Y-%m-%dT%H:%M"),
|
|
468
|
+
humanize.naturaltime(local_deployed_datetime),
|
|
469
|
+
),
|
|
470
|
+
health_check_status,
|
|
471
|
+
)
|
|
472
|
+
)
|
|
473
|
+
if pod.events and verbose > 1:
|
|
474
|
+
rows.extend(format_pod_event_messages(pod.events, pod.name))
|
|
475
|
+
if pod.message is not None:
|
|
476
|
+
rows.append(PaastaColors.grey(f" {pod.message}"))
|
|
477
|
+
if len(pod.containers) > 0:
|
|
478
|
+
rows.extend(format_tail_lines_for_kubernetes_pod(pod.containers, pod.name))
|
|
479
|
+
|
|
480
|
+
return format_table(rows)
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
def format_kubernetes_replicaset_table(replicasets):
|
|
484
|
+
rows = [
|
|
485
|
+
(
|
|
486
|
+
"ReplicaSet Name",
|
|
487
|
+
"Ready / Desired",
|
|
488
|
+
"Created at what localtime",
|
|
489
|
+
"Service git SHA",
|
|
490
|
+
"Config hash",
|
|
491
|
+
)
|
|
492
|
+
]
|
|
493
|
+
for replicaset in replicasets:
|
|
494
|
+
local_created_datetime = datetime.fromtimestamp(replicaset.create_timestamp)
|
|
495
|
+
|
|
496
|
+
replica_status = f"{replicaset.ready_replicas}/{replicaset.replicas}"
|
|
497
|
+
if replicaset.ready_replicas >= replicaset.replicas:
|
|
498
|
+
replica_status = PaastaColors.green(replica_status)
|
|
499
|
+
else:
|
|
500
|
+
replica_status = PaastaColors.red(replica_status)
|
|
501
|
+
|
|
502
|
+
rows.append(
|
|
503
|
+
(
|
|
504
|
+
replicaset.name,
|
|
505
|
+
replica_status,
|
|
506
|
+
"{} ({})".format(
|
|
507
|
+
local_created_datetime.strftime("%Y-%m-%dT%H:%M"),
|
|
508
|
+
humanize.naturaltime(local_created_datetime),
|
|
509
|
+
),
|
|
510
|
+
replicaset.git_sha if replicaset.git_sha else "Unknown",
|
|
511
|
+
replicaset.config_sha if replicaset.config_sha else "Unknown",
|
|
512
|
+
)
|
|
513
|
+
)
|
|
514
|
+
|
|
515
|
+
return format_table(rows)
|
|
516
|
+
|
|
517
|
+
|
|
518
|
+
def get_smartstack_status_human(
|
|
519
|
+
registration: str,
|
|
520
|
+
expected_backends_per_location: int,
|
|
521
|
+
locations: Collection[Any],
|
|
522
|
+
) -> List[str]:
|
|
523
|
+
if len(locations) == 0:
|
|
524
|
+
return [f"Smartstack: ERROR - {registration} is NOT in smartstack at all!"]
|
|
525
|
+
|
|
526
|
+
output = ["Smartstack:"]
|
|
527
|
+
output.append(f" Haproxy Service Name: {registration}")
|
|
528
|
+
output.append(f" Backends:")
|
|
529
|
+
for location in locations:
|
|
530
|
+
backend_status = haproxy_backend_report(
|
|
531
|
+
expected_backends_per_location, location.running_backends_count
|
|
532
|
+
)
|
|
533
|
+
output.append(f" {location.name} - {backend_status}")
|
|
534
|
+
|
|
535
|
+
if location.backends:
|
|
536
|
+
backends_table = build_smartstack_backends_table(location.backends)
|
|
537
|
+
output.extend([f" {line}" for line in backends_table])
|
|
538
|
+
|
|
539
|
+
return output
|
|
540
|
+
|
|
541
|
+
|
|
542
|
+
def build_smartstack_backends_table(backends: Iterable[Any]) -> List[str]:
|
|
543
|
+
rows: List[Tuple[str, ...]] = [("Name", "LastCheck", "LastChange", "Status")]
|
|
544
|
+
for backend in backends:
|
|
545
|
+
if backend.status == "UP":
|
|
546
|
+
status = PaastaColors.default(backend.status)
|
|
547
|
+
elif backend.status == "DOWN":
|
|
548
|
+
status = PaastaColors.red(backend.status)
|
|
549
|
+
elif backend.status == "MAINT":
|
|
550
|
+
status = PaastaColors.grey(backend.status)
|
|
551
|
+
else:
|
|
552
|
+
status = PaastaColors.yellow(backend.status)
|
|
553
|
+
|
|
554
|
+
if backend.check_duration is None:
|
|
555
|
+
check_duration = ""
|
|
556
|
+
else:
|
|
557
|
+
check_duration = str(backend.check_duration)
|
|
558
|
+
|
|
559
|
+
row: Tuple[str, ...] = (
|
|
560
|
+
f"{backend.hostname}:{backend.port}",
|
|
561
|
+
f"{backend.check_status}/{backend.check_code} in {check_duration}ms",
|
|
562
|
+
humanize.naturaltime(timedelta(seconds=backend.last_change)),
|
|
563
|
+
status,
|
|
564
|
+
)
|
|
565
|
+
|
|
566
|
+
if not backend.has_associated_task:
|
|
567
|
+
row = tuple(
|
|
568
|
+
PaastaColors.grey(remove_ansi_escape_sequences(col)) for col in row
|
|
569
|
+
)
|
|
570
|
+
|
|
571
|
+
rows.append(row)
|
|
572
|
+
|
|
573
|
+
return format_table(rows)
|
|
574
|
+
|
|
575
|
+
|
|
576
|
+
def get_envoy_status_human(
|
|
577
|
+
registration: str,
|
|
578
|
+
expected_backends_per_location: int,
|
|
579
|
+
locations: Collection[Any],
|
|
580
|
+
) -> List[str]:
|
|
581
|
+
if len(locations) == 0:
|
|
582
|
+
return [f"Envoy: ERROR - {registration} is NOT in Envoy at all!"]
|
|
583
|
+
|
|
584
|
+
output = ["Envoy:"]
|
|
585
|
+
output.append(f" Service Name: {registration}")
|
|
586
|
+
output.append(f" Backends:")
|
|
587
|
+
for location in locations:
|
|
588
|
+
backend_status = envoy_backend_report(
|
|
589
|
+
expected_backends_per_location, location.running_backends_count
|
|
590
|
+
)
|
|
591
|
+
output.append(f" {location.name} - {backend_status}")
|
|
592
|
+
|
|
593
|
+
if location.backends:
|
|
594
|
+
color = (
|
|
595
|
+
PaastaColors.green
|
|
596
|
+
if location.is_proxied_through_casper
|
|
597
|
+
else PaastaColors.grey
|
|
598
|
+
)
|
|
599
|
+
is_proxied_through_casper_output = color(
|
|
600
|
+
f"{location.is_proxied_through_casper}"
|
|
601
|
+
)
|
|
602
|
+
output.append(
|
|
603
|
+
f" Proxied through Casper: {is_proxied_through_casper_output}"
|
|
604
|
+
)
|
|
605
|
+
|
|
606
|
+
backends_table = build_envoy_backends_table(location.backends)
|
|
607
|
+
output.extend([f" {line}" for line in backends_table])
|
|
608
|
+
|
|
609
|
+
return output
|
|
610
|
+
|
|
611
|
+
|
|
612
|
+
def build_envoy_backends_table(backends: Iterable[Any]) -> List[str]:
|
|
613
|
+
rows: List[Tuple[str, ...]] = [("Hostname:Port", "Weight", "Status")]
|
|
614
|
+
for backend in backends:
|
|
615
|
+
if backend.eds_health_status == "HEALTHY":
|
|
616
|
+
status = PaastaColors.default(backend.eds_health_status)
|
|
617
|
+
elif backend.eds_health_status == "UNHEALTHY":
|
|
618
|
+
status = PaastaColors.red(backend.eds_health_status)
|
|
619
|
+
else:
|
|
620
|
+
status = PaastaColors.yellow(backend.eds_health_status)
|
|
621
|
+
|
|
622
|
+
row: Tuple[str, ...] = (
|
|
623
|
+
f"{backend.hostname}:{backend.port_value}",
|
|
624
|
+
f"{backend.weight}",
|
|
625
|
+
status,
|
|
626
|
+
)
|
|
627
|
+
|
|
628
|
+
if not backend.has_associated_task:
|
|
629
|
+
row = tuple(
|
|
630
|
+
PaastaColors.grey(remove_ansi_escape_sequences(col)) for col in row
|
|
631
|
+
)
|
|
632
|
+
|
|
633
|
+
rows.append(row)
|
|
634
|
+
|
|
635
|
+
return format_table(rows)
|
|
636
|
+
|
|
637
|
+
|
|
638
|
+
def kubernetes_app_deploy_status_human(status, message, backoff_seconds=None):
|
|
639
|
+
status_string = kubernetes_tools.KubernetesDeployStatus.tostring(status)
|
|
640
|
+
|
|
641
|
+
if status in {
|
|
642
|
+
kubernetes_tools.KubernetesDeployStatus.Waiting,
|
|
643
|
+
kubernetes_tools.KubernetesDeployStatus.Stopped,
|
|
644
|
+
}:
|
|
645
|
+
deploy_status = PaastaColors.red(status_string)
|
|
646
|
+
elif status == kubernetes_tools.KubernetesDeployStatus.Deploying:
|
|
647
|
+
deploy_status = PaastaColors.yellow(status_string)
|
|
648
|
+
elif status == kubernetes_tools.KubernetesDeployStatus.Running:
|
|
649
|
+
deploy_status = PaastaColors.bold(status_string)
|
|
650
|
+
else:
|
|
651
|
+
deploy_status = status_string
|
|
652
|
+
|
|
653
|
+
if message:
|
|
654
|
+
deploy_status += f" ({message})"
|
|
655
|
+
return deploy_status
|
|
656
|
+
|
|
657
|
+
|
|
658
|
+
def status_kubernetes_job_human(
|
|
659
|
+
service: str,
|
|
660
|
+
instance: str,
|
|
661
|
+
deploy_status: str,
|
|
662
|
+
desired_app_id: str,
|
|
663
|
+
app_count: int,
|
|
664
|
+
running_instances: int,
|
|
665
|
+
normal_instance_count: int,
|
|
666
|
+
evicted_count: int,
|
|
667
|
+
) -> str:
|
|
668
|
+
name = PaastaColors.cyan(compose_job_id(service, instance))
|
|
669
|
+
|
|
670
|
+
if app_count >= 0:
|
|
671
|
+
if running_instances >= normal_instance_count:
|
|
672
|
+
status = PaastaColors.green("Healthy")
|
|
673
|
+
instance_count = PaastaColors.green(
|
|
674
|
+
"(%d/%d)" % (running_instances, normal_instance_count)
|
|
675
|
+
)
|
|
676
|
+
elif running_instances == 0:
|
|
677
|
+
status = PaastaColors.yellow("Critical")
|
|
678
|
+
instance_count = PaastaColors.red(
|
|
679
|
+
"(%d/%d)" % (running_instances, normal_instance_count)
|
|
680
|
+
)
|
|
681
|
+
else:
|
|
682
|
+
status = PaastaColors.yellow("Warning")
|
|
683
|
+
instance_count = PaastaColors.yellow(
|
|
684
|
+
"(%d/%d)" % (running_instances, normal_instance_count)
|
|
685
|
+
)
|
|
686
|
+
|
|
687
|
+
evicted = (
|
|
688
|
+
PaastaColors.red(str(evicted_count))
|
|
689
|
+
if evicted_count > 0
|
|
690
|
+
else PaastaColors.green(str(evicted_count))
|
|
691
|
+
)
|
|
692
|
+
return (
|
|
693
|
+
"Kubernetes: {} - up with {} instances ({} evicted). Status: {}".format(
|
|
694
|
+
status, instance_count, evicted, deploy_status
|
|
695
|
+
)
|
|
696
|
+
)
|
|
697
|
+
else:
|
|
698
|
+
status = PaastaColors.yellow("Warning")
|
|
699
|
+
return "Kubernetes: {} - {} (app {}) is not configured in Kubernetes yet (waiting for bounce)".format(
|
|
700
|
+
status, name, desired_app_id
|
|
701
|
+
)
|
|
702
|
+
|
|
703
|
+
|
|
704
|
+
def get_flink_job_name(flink_job: FlinkJobDetails) -> str:
|
|
705
|
+
return flink_job["name"].split(".", 2)[-1]
|
|
706
|
+
|
|
707
|
+
|
|
708
|
+
def should_job_info_be_shown(cluster_state):
|
|
709
|
+
return (
|
|
710
|
+
cluster_state == "running"
|
|
711
|
+
or cluster_state == "stoppingsupervisor"
|
|
712
|
+
or cluster_state == "cleanupsupervisor"
|
|
713
|
+
)
|
|
714
|
+
|
|
715
|
+
|
|
716
|
+
def get_pod_uptime(pod_deployed_timestamp: str):
|
|
717
|
+
# NOTE: the k8s API returns timestamps in UTC, so we make sure to always work in UTC
|
|
718
|
+
pod_creation_time = datetime.strptime(
|
|
719
|
+
pod_deployed_timestamp, "%Y-%m-%dT%H:%M:%SZ"
|
|
720
|
+
).replace(tzinfo=timezone.utc)
|
|
721
|
+
pod_uptime = datetime.now(timezone.utc) - pod_creation_time
|
|
722
|
+
pod_uptime_total_seconds = pod_uptime.total_seconds()
|
|
723
|
+
pod_uptime_days = divmod(pod_uptime_total_seconds, 86400)
|
|
724
|
+
pod_uptime_hours = divmod(pod_uptime_days[1], 3600)
|
|
725
|
+
pod_uptime_minutes = divmod(pod_uptime_hours[1], 60)
|
|
726
|
+
pod_uptime_seconds = divmod(pod_uptime_minutes[1], 1)
|
|
727
|
+
return f"{int(pod_uptime_days[0])}d{int(pod_uptime_hours[0])}h{int(pod_uptime_minutes[0])}m{int(pod_uptime_seconds[0])}s"
|
|
728
|
+
|
|
729
|
+
|
|
730
|
+
def append_pod_status(pod_status, output: List[str]):
|
|
731
|
+
output.append(" Pods:")
|
|
732
|
+
rows: List[Union[str, Tuple[str, str, str, str]]] = [
|
|
733
|
+
("Pod Name", "Host", "Phase", "Uptime")
|
|
734
|
+
]
|
|
735
|
+
for pod in pod_status:
|
|
736
|
+
color_fn = (
|
|
737
|
+
PaastaColors.green
|
|
738
|
+
if pod["phase"] == "Running" and pod["container_state"] == "Running"
|
|
739
|
+
else PaastaColors.red
|
|
740
|
+
# pods can get stuck in phase: Running and state: CrashLoopBackOff, so check for that
|
|
741
|
+
if pod["phase"] == "Failed"
|
|
742
|
+
or pod["container_state_reason"] == "CrashLoopBackOff"
|
|
743
|
+
else PaastaColors.yellow
|
|
744
|
+
)
|
|
745
|
+
|
|
746
|
+
rows.append(
|
|
747
|
+
(
|
|
748
|
+
pod["name"],
|
|
749
|
+
pod["host"],
|
|
750
|
+
color_fn(pod["phase"]),
|
|
751
|
+
get_pod_uptime(pod["deployed_timestamp"]),
|
|
752
|
+
)
|
|
753
|
+
)
|
|
754
|
+
if "reason" in pod and pod["reason"] != "":
|
|
755
|
+
rows.append(PaastaColors.grey(f" {pod['reason']}: {pod['message']}"))
|
|
756
|
+
if "container_state" in pod and pod["container_state"] != "Running":
|
|
757
|
+
rows.append(
|
|
758
|
+
PaastaColors.grey(
|
|
759
|
+
f" {pod['container_state']}: {pod['container_state_reason']}"
|
|
760
|
+
)
|
|
761
|
+
)
|
|
762
|
+
pods_table = format_table(rows)
|
|
763
|
+
output.extend([f" {line}" for line in pods_table])
|
|
764
|
+
|
|
765
|
+
|
|
766
|
+
def _print_flink_status_from_job_manager(
|
|
767
|
+
service: str,
|
|
768
|
+
instance: str,
|
|
769
|
+
output: List[str],
|
|
770
|
+
flink: Mapping[str, Any],
|
|
771
|
+
client: PaastaOApiClient,
|
|
772
|
+
verbose: int,
|
|
773
|
+
) -> int:
|
|
774
|
+
status = flink.get("status")
|
|
775
|
+
if status is None:
|
|
776
|
+
output.append(PaastaColors.red(" Flink cluster is not available yet"))
|
|
777
|
+
return 1
|
|
778
|
+
|
|
779
|
+
# Since metadata should be available no matter the state, we show it first. If this errors out
|
|
780
|
+
# then we cannot really do much to recover, because cluster is not in usable state anyway
|
|
781
|
+
metadata = flink.get("metadata")
|
|
782
|
+
labels = metadata.get("labels")
|
|
783
|
+
config_sha = labels.get(paasta_prefixed("config_sha"))
|
|
784
|
+
if config_sha is None:
|
|
785
|
+
raise ValueError(f"expected config sha on Flink, but received {metadata}")
|
|
786
|
+
if config_sha.startswith("config"):
|
|
787
|
+
config_sha = config_sha[6:]
|
|
788
|
+
|
|
789
|
+
output.append(f" Config SHA: {config_sha}")
|
|
790
|
+
|
|
791
|
+
if status["state"] == "running":
|
|
792
|
+
try:
|
|
793
|
+
flink_config = get_flink_config_from_paasta_api_client(
|
|
794
|
+
service=service, instance=instance, client=client
|
|
795
|
+
)
|
|
796
|
+
except Exception as e:
|
|
797
|
+
output.append(PaastaColors.red(f"Exception when talking to the API:"))
|
|
798
|
+
output.append(str(e))
|
|
799
|
+
return 1
|
|
800
|
+
|
|
801
|
+
if verbose:
|
|
802
|
+
output.append(
|
|
803
|
+
f" Flink version: {flink_config.flink_version} {flink_config.flink_revision}"
|
|
804
|
+
)
|
|
805
|
+
else:
|
|
806
|
+
output.append(f" Flink version: {flink_config.flink_version}")
|
|
807
|
+
|
|
808
|
+
# Annotation "flink.yelp.com/dashboard_url" is populated by flink-operator
|
|
809
|
+
dashboard_url = metadata["annotations"].get("flink.yelp.com/dashboard_url")
|
|
810
|
+
output.append(f" URL: {dashboard_url}/")
|
|
811
|
+
|
|
812
|
+
color = PaastaColors.green if status["state"] == "running" else PaastaColors.yellow
|
|
813
|
+
output.append(f" State: {color(status['state'].title())}")
|
|
814
|
+
|
|
815
|
+
pod_running_count = pod_evicted_count = pod_other_count = 0
|
|
816
|
+
# default for evicted in case where pod status is not available
|
|
817
|
+
evicted = f"{pod_evicted_count}"
|
|
818
|
+
|
|
819
|
+
for pod in status["pod_status"]:
|
|
820
|
+
if pod["phase"] == "Running":
|
|
821
|
+
pod_running_count += 1
|
|
822
|
+
elif pod["phase"] == "Failed" and pod["reason"] == "Evicted":
|
|
823
|
+
pod_evicted_count += 1
|
|
824
|
+
else:
|
|
825
|
+
pod_other_count += 1
|
|
826
|
+
evicted = (
|
|
827
|
+
PaastaColors.red(f"{pod_evicted_count}")
|
|
828
|
+
if pod_evicted_count > 0
|
|
829
|
+
else f"{pod_evicted_count}"
|
|
830
|
+
)
|
|
831
|
+
|
|
832
|
+
output.append(
|
|
833
|
+
" Pods:"
|
|
834
|
+
f" {pod_running_count} running,"
|
|
835
|
+
f" {evicted} evicted,"
|
|
836
|
+
f" {pod_other_count} other"
|
|
837
|
+
)
|
|
838
|
+
|
|
839
|
+
if not should_job_info_be_shown(status["state"]):
|
|
840
|
+
# In case where the jobmanager of cluster is in crashloopbackoff
|
|
841
|
+
# The pods for the cluster will be available and we need to show the pods.
|
|
842
|
+
# So that paasta status -v and kubectl get pods show the same consistent result.
|
|
843
|
+
if verbose and len(status["pod_status"]) > 0:
|
|
844
|
+
append_pod_status(status["pod_status"], output)
|
|
845
|
+
output.append(" No other information available in non-running state")
|
|
846
|
+
return 0
|
|
847
|
+
|
|
848
|
+
if status["state"] == "running":
|
|
849
|
+
# Flink cluster overview from paasta api client
|
|
850
|
+
try:
|
|
851
|
+
overview = get_flink_overview_from_paasta_api_client(
|
|
852
|
+
service=service, instance=instance, client=client
|
|
853
|
+
)
|
|
854
|
+
except Exception as e:
|
|
855
|
+
output.append(PaastaColors.red("Exception when talking to the API:"))
|
|
856
|
+
output.append(str(e))
|
|
857
|
+
return 1
|
|
858
|
+
|
|
859
|
+
output.append(
|
|
860
|
+
" Jobs:"
|
|
861
|
+
f" {overview.jobs_running} running,"
|
|
862
|
+
f" {overview.jobs_finished} finished,"
|
|
863
|
+
f" {overview.jobs_failed} failed,"
|
|
864
|
+
f" {overview.jobs_cancelled} cancelled"
|
|
865
|
+
)
|
|
866
|
+
output.append(
|
|
867
|
+
" "
|
|
868
|
+
f" {overview.taskmanagers} taskmanagers,"
|
|
869
|
+
f" {overview.slots_available}/{overview.slots_total} slots available"
|
|
870
|
+
)
|
|
871
|
+
|
|
872
|
+
flink_jobs = FlinkJobs()
|
|
873
|
+
flink_jobs.jobs = []
|
|
874
|
+
if status["state"] == "running":
|
|
875
|
+
try:
|
|
876
|
+
flink_jobs = get_flink_jobs_from_paasta_api_client(
|
|
877
|
+
service=service, instance=instance, client=client
|
|
878
|
+
)
|
|
879
|
+
except Exception as e:
|
|
880
|
+
output.append(PaastaColors.red("Exception when talking to the API:"))
|
|
881
|
+
output.append(str(e))
|
|
882
|
+
return 1
|
|
883
|
+
|
|
884
|
+
jobs: List[FlinkJobDetails] = []
|
|
885
|
+
job_ids: List[str] = []
|
|
886
|
+
if flink_jobs.get("jobs"):
|
|
887
|
+
job_ids = [job.id for job in flink_jobs.get("jobs")]
|
|
888
|
+
try:
|
|
889
|
+
jobs = a_sync.block(get_flink_job_details, service, instance, job_ids, client)
|
|
890
|
+
except Exception as e:
|
|
891
|
+
output.append(PaastaColors.red("Exception when talking to the API:"))
|
|
892
|
+
output.append(str(e))
|
|
893
|
+
return 1
|
|
894
|
+
|
|
895
|
+
# Avoid cutting job name. As opposed to default hardcoded value of 32, we will use max length of job name
|
|
896
|
+
if jobs:
|
|
897
|
+
max_job_name_length = max([len(get_flink_job_name(job)) for job in jobs])
|
|
898
|
+
else:
|
|
899
|
+
max_job_name_length = 10
|
|
900
|
+
|
|
901
|
+
# Apart from this column total length of one row is around 52 columns, using remaining terminal columns for job name
|
|
902
|
+
# Note: for terminals smaller than 90 columns the row will overflow in verbose printing
|
|
903
|
+
allowed_max_job_name_length = min(
|
|
904
|
+
max(10, shutil.get_terminal_size().columns - 52), max_job_name_length
|
|
905
|
+
)
|
|
906
|
+
|
|
907
|
+
output.append(" Jobs:")
|
|
908
|
+
if verbose > 1:
|
|
909
|
+
output.append(
|
|
910
|
+
f' {"Job Name": <{allowed_max_job_name_length}} State Job ID Started'
|
|
911
|
+
)
|
|
912
|
+
else:
|
|
913
|
+
output.append(
|
|
914
|
+
f' {"Job Name": <{allowed_max_job_name_length}} State Started'
|
|
915
|
+
)
|
|
916
|
+
|
|
917
|
+
# Use only the most recent jobs
|
|
918
|
+
unique_jobs = (
|
|
919
|
+
sorted(jobs, key=lambda j: -j["start_time"])[0] # type: ignore
|
|
920
|
+
for _, jobs in groupby(
|
|
921
|
+
sorted(
|
|
922
|
+
(j for j in jobs if j.get("name") and j.get("start_time")),
|
|
923
|
+
key=lambda j: j["name"],
|
|
924
|
+
),
|
|
925
|
+
lambda j: j["name"],
|
|
926
|
+
)
|
|
927
|
+
)
|
|
928
|
+
|
|
929
|
+
allowed_max_jobs_printed = 3
|
|
930
|
+
job_printed_count = 0
|
|
931
|
+
|
|
932
|
+
for job in unique_jobs:
|
|
933
|
+
job_id = job["jid"]
|
|
934
|
+
if verbose > 1:
|
|
935
|
+
fmt = """ {job_name: <{allowed_max_job_name_length}.{allowed_max_job_name_length}} {state: <11} {job_id} {start_time}
|
|
936
|
+
{dashboard_url}"""
|
|
937
|
+
else:
|
|
938
|
+
fmt = " {job_name: <{allowed_max_job_name_length}.{allowed_max_job_name_length}} {state: <11} {start_time}"
|
|
939
|
+
start_time = datetime.fromtimestamp(int(job["start_time"]) // 1000)
|
|
940
|
+
if verbose or job_printed_count < allowed_max_jobs_printed:
|
|
941
|
+
job_printed_count += 1
|
|
942
|
+
color_fn = (
|
|
943
|
+
PaastaColors.green
|
|
944
|
+
if job.get("state") and job.get("state") == "RUNNING"
|
|
945
|
+
else PaastaColors.red
|
|
946
|
+
if job.get("state") and job.get("state") in ("FAILED", "FAILING")
|
|
947
|
+
else PaastaColors.yellow
|
|
948
|
+
)
|
|
949
|
+
job_info_str = fmt.format(
|
|
950
|
+
job_id=job_id,
|
|
951
|
+
job_name=get_flink_job_name(job),
|
|
952
|
+
allowed_max_job_name_length=allowed_max_job_name_length,
|
|
953
|
+
state=color_fn((job.get("state").title() or "Unknown")),
|
|
954
|
+
start_time=f"{str(start_time)} ({humanize.naturaltime(start_time)})",
|
|
955
|
+
dashboard_url=PaastaColors.grey(f"{dashboard_url}/#/jobs/{job_id}"),
|
|
956
|
+
)
|
|
957
|
+
output.append(job_info_str)
|
|
958
|
+
else:
|
|
959
|
+
output.append(
|
|
960
|
+
PaastaColors.yellow(
|
|
961
|
+
f" Only showing {allowed_max_jobs_printed} Flink jobs, use -v to show all"
|
|
962
|
+
)
|
|
963
|
+
)
|
|
964
|
+
break
|
|
965
|
+
|
|
966
|
+
if verbose and len(status["pod_status"]) > 0:
|
|
967
|
+
append_pod_status(status["pod_status"], output)
|
|
968
|
+
return 0
|
|
969
|
+
|
|
970
|
+
|
|
971
|
+
def print_flink_status(
|
|
972
|
+
cluster: str,
|
|
973
|
+
service: str,
|
|
974
|
+
instance: str,
|
|
975
|
+
output: List[str],
|
|
976
|
+
flink: Mapping[str, Any],
|
|
977
|
+
verbose: int,
|
|
978
|
+
) -> int:
|
|
979
|
+
system_paasta_config = load_system_paasta_config()
|
|
980
|
+
|
|
981
|
+
client = get_paasta_oapi_client(cluster, system_paasta_config)
|
|
982
|
+
if not client:
|
|
983
|
+
output.append(
|
|
984
|
+
PaastaColors.red(
|
|
985
|
+
"paasta-api client unavailable - unable to get flink status"
|
|
986
|
+
)
|
|
987
|
+
)
|
|
988
|
+
return 1
|
|
989
|
+
|
|
990
|
+
return _print_flink_status_from_job_manager(
|
|
991
|
+
service, instance, output, flink, client, verbose
|
|
992
|
+
)
|
|
993
|
+
|
|
994
|
+
|
|
995
|
+
def print_flinkeks_status(
|
|
996
|
+
cluster: str,
|
|
997
|
+
service: str,
|
|
998
|
+
instance: str,
|
|
999
|
+
output: List[str],
|
|
1000
|
+
flink: Mapping[str, Any],
|
|
1001
|
+
verbose: int,
|
|
1002
|
+
) -> int:
|
|
1003
|
+
system_paasta_config = load_system_paasta_config()
|
|
1004
|
+
|
|
1005
|
+
client = get_paasta_oapi_client(
|
|
1006
|
+
cluster=get_paasta_oapi_api_clustername(cluster=cluster, is_eks=True),
|
|
1007
|
+
system_paasta_config=system_paasta_config,
|
|
1008
|
+
)
|
|
1009
|
+
if not client:
|
|
1010
|
+
output.append(
|
|
1011
|
+
PaastaColors.red(
|
|
1012
|
+
"paasta-api client unavailable - unable to get flink status"
|
|
1013
|
+
)
|
|
1014
|
+
)
|
|
1015
|
+
return 1
|
|
1016
|
+
|
|
1017
|
+
return _print_flink_status_from_job_manager(
|
|
1018
|
+
service, instance, output, flink, client, verbose
|
|
1019
|
+
)
|
|
1020
|
+
|
|
1021
|
+
|
|
1022
|
+
async def get_flink_job_details(
|
|
1023
|
+
service: str, instance: str, job_ids: List[str], client: PaastaOApiClient
|
|
1024
|
+
) -> List[FlinkJobDetails]:
|
|
1025
|
+
jobs_details = await asyncio.gather(
|
|
1026
|
+
*[
|
|
1027
|
+
flink_tools.get_flink_job_details_from_paasta_api_client(
|
|
1028
|
+
service, instance, job_id, client
|
|
1029
|
+
)
|
|
1030
|
+
for job_id in job_ids
|
|
1031
|
+
]
|
|
1032
|
+
)
|
|
1033
|
+
return [jd for jd in jobs_details]
|
|
1034
|
+
|
|
1035
|
+
|
|
1036
|
+
def print_kubernetes_status_v2(
|
|
1037
|
+
cluster: str,
|
|
1038
|
+
service: str,
|
|
1039
|
+
instance: str,
|
|
1040
|
+
output: List[str],
|
|
1041
|
+
status: InstanceStatusKubernetesV2,
|
|
1042
|
+
verbose: int = 0,
|
|
1043
|
+
) -> int:
|
|
1044
|
+
instance_state = get_instance_state(status)
|
|
1045
|
+
output.append(f" State: {instance_state}")
|
|
1046
|
+
output.append(" Running versions:")
|
|
1047
|
+
if not verbose:
|
|
1048
|
+
output.append(
|
|
1049
|
+
" " + PaastaColors.green("Rerun with -v to see all replicas")
|
|
1050
|
+
)
|
|
1051
|
+
elif verbose < 2:
|
|
1052
|
+
output.append(
|
|
1053
|
+
" "
|
|
1054
|
+
+ PaastaColors.green(
|
|
1055
|
+
"You can use paasta logs to view stdout/stderr or rerun with -vv for even more information."
|
|
1056
|
+
)
|
|
1057
|
+
)
|
|
1058
|
+
output.extend(
|
|
1059
|
+
[
|
|
1060
|
+
f" {line}"
|
|
1061
|
+
for line in get_versions_table(
|
|
1062
|
+
status.versions, service, instance, cluster, verbose
|
|
1063
|
+
)
|
|
1064
|
+
]
|
|
1065
|
+
)
|
|
1066
|
+
|
|
1067
|
+
if verbose > 1:
|
|
1068
|
+
output.extend(get_autoscaling_table(status.autoscaling_status, verbose))
|
|
1069
|
+
|
|
1070
|
+
if status.error_message:
|
|
1071
|
+
output.append(" " + PaastaColors.red(status.error_message))
|
|
1072
|
+
return 1
|
|
1073
|
+
else:
|
|
1074
|
+
return 0
|
|
1075
|
+
|
|
1076
|
+
|
|
1077
|
+
# TODO: Make an enum class or similar for the various instance states
|
|
1078
|
+
def get_instance_state(status: InstanceStatusKubernetesV2) -> str:
|
|
1079
|
+
num_versions = len(status.versions)
|
|
1080
|
+
num_ready_replicas = sum(r.ready_replicas for r in status.versions)
|
|
1081
|
+
if status.desired_state == "stop":
|
|
1082
|
+
if all(version.replicas == 0 for version in status.versions):
|
|
1083
|
+
return PaastaColors.red("Stopped")
|
|
1084
|
+
else:
|
|
1085
|
+
return PaastaColors.red("Stopping")
|
|
1086
|
+
elif status.desired_state == "start":
|
|
1087
|
+
if num_versions == 0:
|
|
1088
|
+
if status.desired_instances == 0:
|
|
1089
|
+
return PaastaColors.red("Stopped")
|
|
1090
|
+
else:
|
|
1091
|
+
return PaastaColors.yellow("Starting")
|
|
1092
|
+
if num_versions == 1:
|
|
1093
|
+
if num_ready_replicas < status.desired_instances:
|
|
1094
|
+
return PaastaColors.yellow("Launching replicas")
|
|
1095
|
+
else:
|
|
1096
|
+
return PaastaColors.green("Running")
|
|
1097
|
+
else:
|
|
1098
|
+
versions = sorted(
|
|
1099
|
+
status.versions, key=lambda x: x.create_timestamp, reverse=True
|
|
1100
|
+
)
|
|
1101
|
+
git_shas = {r.git_sha for r in versions}
|
|
1102
|
+
config_shas = {r.config_sha for r in versions}
|
|
1103
|
+
bouncing_to = []
|
|
1104
|
+
if len(git_shas) > 1:
|
|
1105
|
+
bouncing_to.append(versions[0].git_sha[:8])
|
|
1106
|
+
if len(config_shas) > 1:
|
|
1107
|
+
bouncing_to.append(versions[0].config_sha)
|
|
1108
|
+
|
|
1109
|
+
bouncing_to_str = ", ".join(bouncing_to)
|
|
1110
|
+
return PaastaColors.yellow(f"Bouncing to {bouncing_to_str}")
|
|
1111
|
+
else:
|
|
1112
|
+
return PaastaColors.red("Unknown")
|
|
1113
|
+
|
|
1114
|
+
|
|
1115
|
+
def get_versions_table(
|
|
1116
|
+
versions: List[KubernetesVersion],
|
|
1117
|
+
service: str,
|
|
1118
|
+
instance: str,
|
|
1119
|
+
cluster: str,
|
|
1120
|
+
verbose: int = 0,
|
|
1121
|
+
) -> List[str]:
|
|
1122
|
+
if len(versions) == 0:
|
|
1123
|
+
return [PaastaColors.red("There are no running versions for this instance")]
|
|
1124
|
+
elif len(versions) == 1:
|
|
1125
|
+
return get_version_table_entry(
|
|
1126
|
+
versions[0], service, instance, cluster, verbose=verbose
|
|
1127
|
+
)
|
|
1128
|
+
else:
|
|
1129
|
+
versions = sorted(versions, key=lambda x: x.create_timestamp, reverse=True)
|
|
1130
|
+
config_shas = {v.config_sha for v in versions}
|
|
1131
|
+
show_config_sha = len(config_shas) > 1
|
|
1132
|
+
|
|
1133
|
+
namespaces = {v.namespace for v in versions}
|
|
1134
|
+
show_namespace = len(namespaces) > 1
|
|
1135
|
+
|
|
1136
|
+
table: List[str] = []
|
|
1137
|
+
table.extend(
|
|
1138
|
+
get_version_table_entry(
|
|
1139
|
+
versions[0],
|
|
1140
|
+
service,
|
|
1141
|
+
instance,
|
|
1142
|
+
cluster,
|
|
1143
|
+
version_name_suffix="new",
|
|
1144
|
+
show_config_sha=show_config_sha,
|
|
1145
|
+
show_namespace=show_namespace,
|
|
1146
|
+
verbose=verbose,
|
|
1147
|
+
)
|
|
1148
|
+
)
|
|
1149
|
+
for version in versions[1:]:
|
|
1150
|
+
table.extend(
|
|
1151
|
+
get_version_table_entry(
|
|
1152
|
+
version,
|
|
1153
|
+
service,
|
|
1154
|
+
instance,
|
|
1155
|
+
cluster,
|
|
1156
|
+
version_name_suffix="old",
|
|
1157
|
+
show_config_sha=show_config_sha,
|
|
1158
|
+
show_namespace=show_namespace,
|
|
1159
|
+
verbose=verbose,
|
|
1160
|
+
)
|
|
1161
|
+
)
|
|
1162
|
+
return table
|
|
1163
|
+
|
|
1164
|
+
|
|
1165
|
+
def get_version_table_entry(
|
|
1166
|
+
version: KubernetesVersion,
|
|
1167
|
+
service: str,
|
|
1168
|
+
instance: str,
|
|
1169
|
+
cluster: str,
|
|
1170
|
+
version_name_suffix: str = None,
|
|
1171
|
+
show_config_sha: bool = False,
|
|
1172
|
+
show_namespace: bool = False,
|
|
1173
|
+
verbose: int = 0,
|
|
1174
|
+
) -> List[str]:
|
|
1175
|
+
version_name = version.git_sha[:8]
|
|
1176
|
+
if show_config_sha or verbose > 1:
|
|
1177
|
+
version_name += f", {version.config_sha}"
|
|
1178
|
+
if version.image_version is not None:
|
|
1179
|
+
version_name += f" (image_version: {version.image_version})"
|
|
1180
|
+
if version_name_suffix is not None:
|
|
1181
|
+
version_name += f" ({version_name_suffix})"
|
|
1182
|
+
if version.namespace is not None and (show_namespace or verbose > 1):
|
|
1183
|
+
version_name += f" (namespace: {version.namespace})"
|
|
1184
|
+
version_name = PaastaColors.blue(version_name)
|
|
1185
|
+
|
|
1186
|
+
start_datetime = datetime.fromtimestamp(version.create_timestamp)
|
|
1187
|
+
humanized_start_time = humanize.naturaltime(start_datetime)
|
|
1188
|
+
entry = [f"{version_name} - Started {start_datetime} ({humanized_start_time})"]
|
|
1189
|
+
replica_states = get_replica_states(version.pods)
|
|
1190
|
+
replica_states = sorted(replica_states, key=lambda s: s[1].create_timestamp)
|
|
1191
|
+
if len(replica_states) == 0:
|
|
1192
|
+
message = PaastaColors.red("0 pods found")
|
|
1193
|
+
entry.append(f" {message}")
|
|
1194
|
+
if replica_states:
|
|
1195
|
+
# If no replica_states, there were no pods found
|
|
1196
|
+
replica_state_counts = Counter([state for state, pod in replica_states])
|
|
1197
|
+
replica_state_display = [
|
|
1198
|
+
state.color(f"{replica_state_counts[state]} {state.message}")
|
|
1199
|
+
for state in ReplicaState
|
|
1200
|
+
if state in replica_state_counts
|
|
1201
|
+
]
|
|
1202
|
+
entry.append(f" Replica States: {' / '.join(replica_state_display)}")
|
|
1203
|
+
if not verbose:
|
|
1204
|
+
unhealthy_replicas = [
|
|
1205
|
+
(state, pod) for state, pod in replica_states if state.is_unhealthy()
|
|
1206
|
+
]
|
|
1207
|
+
if unhealthy_replicas:
|
|
1208
|
+
entry.append(f" Unhealthy Replicas:")
|
|
1209
|
+
replica_table = create_replica_table(
|
|
1210
|
+
unhealthy_replicas, service, instance, cluster, verbose
|
|
1211
|
+
)
|
|
1212
|
+
for line in replica_table:
|
|
1213
|
+
entry.append(f" {line}")
|
|
1214
|
+
else:
|
|
1215
|
+
replica_table = create_replica_table(
|
|
1216
|
+
replica_states, service, instance, cluster, verbose
|
|
1217
|
+
)
|
|
1218
|
+
for line in replica_table:
|
|
1219
|
+
entry.append(f" {line}")
|
|
1220
|
+
return entry
|
|
1221
|
+
|
|
1222
|
+
|
|
1223
|
+
class ReplicaState(Enum):
|
|
1224
|
+
# Order will be preserved in count summary
|
|
1225
|
+
RUNNING = "Healthy", PaastaColors.green
|
|
1226
|
+
|
|
1227
|
+
UNREACHABLE = "Unreachable", PaastaColors.red
|
|
1228
|
+
EVICTED = "Evicted", PaastaColors.red
|
|
1229
|
+
ALL_CONTAINERS_WAITING = "All Containers Waiting", PaastaColors.red
|
|
1230
|
+
FAILED = "Failed", PaastaColors.red
|
|
1231
|
+
MAIN_CONTAINER_NOT_RUNNING = "Main Container Not Running", PaastaColors.red
|
|
1232
|
+
NO_CONTAINERS_YET = "No Containers Yet", PaastaColors.red
|
|
1233
|
+
NOT_READY = "Not Ready", PaastaColors.red
|
|
1234
|
+
SOME_CONTAINERS_WAITING = "Some Containers Waiting", PaastaColors.red
|
|
1235
|
+
|
|
1236
|
+
WARNING = "Warning", PaastaColors.yellow
|
|
1237
|
+
UNSCHEDULED = "Unscheduled", PaastaColors.yellow
|
|
1238
|
+
STARTING = "Starting", PaastaColors.yellow
|
|
1239
|
+
WARMING_UP = "Warming Up", PaastaColors.cyan
|
|
1240
|
+
TERMINATING = "Terminating", PaastaColors.cyan
|
|
1241
|
+
UNKNOWN = "Unknown", PaastaColors.yellow
|
|
1242
|
+
|
|
1243
|
+
def is_unhealthy(self):
|
|
1244
|
+
return self.color == PaastaColors.red
|
|
1245
|
+
|
|
1246
|
+
@property
|
|
1247
|
+
def color(self) -> Callable:
|
|
1248
|
+
return self.value[1]
|
|
1249
|
+
|
|
1250
|
+
@property
|
|
1251
|
+
def formatted_message(self):
|
|
1252
|
+
return self.value[1](self.value[0])
|
|
1253
|
+
|
|
1254
|
+
@property
|
|
1255
|
+
def message(self):
|
|
1256
|
+
return self.value[0]
|
|
1257
|
+
|
|
1258
|
+
|
|
1259
|
+
def recent_liveness_failure(pod: KubernetesPodV2) -> bool:
|
|
1260
|
+
if not pod.events:
|
|
1261
|
+
return False
|
|
1262
|
+
return any(
|
|
1263
|
+
[evt for evt in pod.events if "Liveness probe failed" in evt.get("message", "")]
|
|
1264
|
+
)
|
|
1265
|
+
|
|
1266
|
+
|
|
1267
|
+
def recent_container_restart(
|
|
1268
|
+
container: Optional[KubernetesContainerV2], time_window: int = 900
|
|
1269
|
+
) -> bool:
|
|
1270
|
+
if container:
|
|
1271
|
+
return kubernetes_tools.recent_container_restart(
|
|
1272
|
+
container.restart_count,
|
|
1273
|
+
container.last_state,
|
|
1274
|
+
container.last_timestamp,
|
|
1275
|
+
time_window_s=time_window,
|
|
1276
|
+
)
|
|
1277
|
+
return False
|
|
1278
|
+
|
|
1279
|
+
|
|
1280
|
+
def get_main_container(pod: KubernetesPodV2) -> Optional[KubernetesContainerV2]:
|
|
1281
|
+
return next(
|
|
1282
|
+
(
|
|
1283
|
+
c
|
|
1284
|
+
for c in pod.containers
|
|
1285
|
+
if c.name not in kubernetes_tools.SIDECAR_CONTAINER_NAMES
|
|
1286
|
+
),
|
|
1287
|
+
None,
|
|
1288
|
+
)
|
|
1289
|
+
|
|
1290
|
+
|
|
1291
|
+
def get_replica_state(pod: KubernetesPodV2) -> ReplicaState:
|
|
1292
|
+
phase = pod.phase
|
|
1293
|
+
state = ReplicaState.UNKNOWN
|
|
1294
|
+
reason = pod.reason
|
|
1295
|
+
if reason == "Evicted":
|
|
1296
|
+
state = ReplicaState.EVICTED
|
|
1297
|
+
elif phase == "Failed":
|
|
1298
|
+
state = ReplicaState.FAILED
|
|
1299
|
+
elif phase is None or not pod.scheduled:
|
|
1300
|
+
state = ReplicaState.UNSCHEDULED
|
|
1301
|
+
elif pod.delete_timestamp:
|
|
1302
|
+
state = ReplicaState.TERMINATING
|
|
1303
|
+
elif phase == "Pending":
|
|
1304
|
+
if not pod.containers:
|
|
1305
|
+
state = ReplicaState.NO_CONTAINERS_YET
|
|
1306
|
+
elif all([c.state.lower() == "waiting" for c in pod.containers]):
|
|
1307
|
+
state = ReplicaState.ALL_CONTAINERS_WAITING
|
|
1308
|
+
else:
|
|
1309
|
+
state = ReplicaState.SOME_CONTAINERS_WAITING
|
|
1310
|
+
elif phase == "Running":
|
|
1311
|
+
####
|
|
1312
|
+
# TODO: Take sidecar containers into account
|
|
1313
|
+
# This logic likely needs refining
|
|
1314
|
+
main_container = get_main_container(pod)
|
|
1315
|
+
if main_container:
|
|
1316
|
+
# NOTE: the k8s API returns timestamps in UTC, so we make sure to always work in UTC
|
|
1317
|
+
warming_up = (
|
|
1318
|
+
pod.create_timestamp + main_container.healthcheck_grace_period
|
|
1319
|
+
> datetime.now(timezone.utc).timestamp()
|
|
1320
|
+
)
|
|
1321
|
+
if pod.mesh_ready is False:
|
|
1322
|
+
if main_container.state != "running":
|
|
1323
|
+
state = ReplicaState.MAIN_CONTAINER_NOT_RUNNING
|
|
1324
|
+
else:
|
|
1325
|
+
state = ReplicaState.UNREACHABLE
|
|
1326
|
+
elif not pod.ready:
|
|
1327
|
+
state = ReplicaState.NOT_READY
|
|
1328
|
+
else:
|
|
1329
|
+
if recent_liveness_failure(pod) or recent_container_restart(
|
|
1330
|
+
main_container
|
|
1331
|
+
):
|
|
1332
|
+
state = ReplicaState.WARNING
|
|
1333
|
+
else:
|
|
1334
|
+
state = ReplicaState.RUNNING
|
|
1335
|
+
|
|
1336
|
+
if state != ReplicaState.RUNNING and warming_up:
|
|
1337
|
+
state = ReplicaState.WARMING_UP
|
|
1338
|
+
|
|
1339
|
+
else:
|
|
1340
|
+
state = ReplicaState.UNKNOWN
|
|
1341
|
+
|
|
1342
|
+
return state
|
|
1343
|
+
|
|
1344
|
+
|
|
1345
|
+
def get_replica_states(
|
|
1346
|
+
pods: List[KubernetesPodV2],
|
|
1347
|
+
) -> List[Tuple[ReplicaState, KubernetesPodV2]]:
|
|
1348
|
+
return [(get_replica_state(pod), pod) for pod in pods]
|
|
1349
|
+
|
|
1350
|
+
|
|
1351
|
+
def create_replica_table(
|
|
1352
|
+
pods: List[Tuple[ReplicaState, KubernetesPodV2]],
|
|
1353
|
+
service: str,
|
|
1354
|
+
instance: str,
|
|
1355
|
+
cluster: str,
|
|
1356
|
+
verbose: int = 0,
|
|
1357
|
+
) -> List[str]:
|
|
1358
|
+
header = ["ID", "IP/Port", "Host deployed to", "Started at what localtime", "State"]
|
|
1359
|
+
table: List[Union[List[str], str]] = [header]
|
|
1360
|
+
for state, pod in pods:
|
|
1361
|
+
start_datetime = datetime.fromtimestamp(pod.create_timestamp)
|
|
1362
|
+
humanized_start_time = humanize.naturaltime(start_datetime)
|
|
1363
|
+
row = [
|
|
1364
|
+
pod.name,
|
|
1365
|
+
f"{pod.ip}:8888" if pod.ip else "None",
|
|
1366
|
+
pod.host or "None",
|
|
1367
|
+
humanized_start_time,
|
|
1368
|
+
state.formatted_message,
|
|
1369
|
+
]
|
|
1370
|
+
table.append(row)
|
|
1371
|
+
|
|
1372
|
+
# Adding additional context/tips
|
|
1373
|
+
if pod.reason == "Evicted":
|
|
1374
|
+
table.append(
|
|
1375
|
+
PaastaColors.red(
|
|
1376
|
+
f' Evicted: {pod.message if pod.message else "Unknown reason"}'
|
|
1377
|
+
)
|
|
1378
|
+
)
|
|
1379
|
+
|
|
1380
|
+
main_container = get_main_container(pod)
|
|
1381
|
+
if main_container:
|
|
1382
|
+
if main_container.timestamp:
|
|
1383
|
+
timestamp = datetime.fromtimestamp(main_container.timestamp)
|
|
1384
|
+
elif main_container.last_timestamp:
|
|
1385
|
+
timestamp = datetime.fromtimestamp(main_container.last_timestamp)
|
|
1386
|
+
else:
|
|
1387
|
+
# if no container timestamps are found, use pod's creation
|
|
1388
|
+
timestamp = start_datetime
|
|
1389
|
+
|
|
1390
|
+
humanized_timestamp = humanize.naturaltime(timestamp)
|
|
1391
|
+
if recent_container_restart(main_container):
|
|
1392
|
+
table.append(
|
|
1393
|
+
PaastaColors.red(
|
|
1394
|
+
f" Restarted {humanized_timestamp}. {main_container.restart_count} restarts since starting"
|
|
1395
|
+
)
|
|
1396
|
+
)
|
|
1397
|
+
if (
|
|
1398
|
+
main_container.reason == "OOMKilled"
|
|
1399
|
+
or main_container.last_reason == "OOMKilled"
|
|
1400
|
+
):
|
|
1401
|
+
if main_container.reason == "OOMKilled":
|
|
1402
|
+
oom_kill_timestamp = timestamp
|
|
1403
|
+
human_oom_kill_timestamp = humanized_timestamp
|
|
1404
|
+
elif main_container.last_reason == "OOMKilled":
|
|
1405
|
+
oom_kill_timestamp = datetime.fromtimestamp(
|
|
1406
|
+
main_container.last_timestamp
|
|
1407
|
+
)
|
|
1408
|
+
human_oom_kill_timestamp = humanize.naturaltime(oom_kill_timestamp)
|
|
1409
|
+
table.extend(
|
|
1410
|
+
[
|
|
1411
|
+
PaastaColors.red(
|
|
1412
|
+
f" OOM Killed {human_oom_kill_timestamp} ({oom_kill_timestamp})."
|
|
1413
|
+
),
|
|
1414
|
+
PaastaColors.red(
|
|
1415
|
+
f" Check y/check-oom-events and consider increasing memory in yelpsoa_configs"
|
|
1416
|
+
),
|
|
1417
|
+
]
|
|
1418
|
+
)
|
|
1419
|
+
if state == ReplicaState.WARMING_UP:
|
|
1420
|
+
if verbose > 0:
|
|
1421
|
+
# NOTE: the k8s API returns timestamps in UTC, so we make sure to always work in UTC
|
|
1422
|
+
warmup_duration = (
|
|
1423
|
+
datetime.now(timezone.utc).timestamp() - pod.create_timestamp
|
|
1424
|
+
)
|
|
1425
|
+
humanized_duration = humanize.naturaldelta(
|
|
1426
|
+
timedelta(seconds=warmup_duration)
|
|
1427
|
+
)
|
|
1428
|
+
grace_period_remaining = (
|
|
1429
|
+
pod.create_timestamp
|
|
1430
|
+
+ main_container.healthcheck_grace_period
|
|
1431
|
+
- datetime.now(timezone.utc).timestamp()
|
|
1432
|
+
)
|
|
1433
|
+
humanized_remaining = humanize.naturaldelta(
|
|
1434
|
+
timedelta(seconds=grace_period_remaining)
|
|
1435
|
+
)
|
|
1436
|
+
table.append(
|
|
1437
|
+
PaastaColors.cyan(
|
|
1438
|
+
f" Still warming up, {humanized_duration} elapsed, {humanized_remaining} before healthchecking starts"
|
|
1439
|
+
)
|
|
1440
|
+
)
|
|
1441
|
+
if recent_liveness_failure(pod) and state != ReplicaState.TERMINATING:
|
|
1442
|
+
healthcheck_string = (
|
|
1443
|
+
"check your healthcheck configuration in yelpsoa_configs"
|
|
1444
|
+
)
|
|
1445
|
+
if main_container and main_container.healthcheck_cmd:
|
|
1446
|
+
if main_container.healthcheck_cmd.http_url:
|
|
1447
|
+
healthcheck_string = (
|
|
1448
|
+
f"run `curl {main_container.healthcheck_cmd.http_url}`"
|
|
1449
|
+
)
|
|
1450
|
+
elif main_container.healthcheck_cmd.tcp_port:
|
|
1451
|
+
healthcheck_string = f"verify your service is listening on {main_container.healthcheck_cmd.tcp_port}"
|
|
1452
|
+
elif main_container.healthcheck_cmd.cmd:
|
|
1453
|
+
healthcheck_string = f"check why the following may be failing: `{main_container.healthcheck_cmd.cmd}`"
|
|
1454
|
+
table.append(
|
|
1455
|
+
PaastaColors.red(
|
|
1456
|
+
f" Healthchecks are failing. To investigate further, {healthcheck_string}"
|
|
1457
|
+
)
|
|
1458
|
+
)
|
|
1459
|
+
if state.is_unhealthy() or recent_container_restart(main_container):
|
|
1460
|
+
if verbose < 2:
|
|
1461
|
+
table.append(
|
|
1462
|
+
PaastaColors.red(
|
|
1463
|
+
f" Consider checking logs with `paasta logs -c {cluster} -s {service} -i {instance} -p {pod.name}`"
|
|
1464
|
+
)
|
|
1465
|
+
)
|
|
1466
|
+
else:
|
|
1467
|
+
if pod.events:
|
|
1468
|
+
table.extend(format_pod_event_messages(pod.events, pod.name))
|
|
1469
|
+
if len(pod.containers) > 0:
|
|
1470
|
+
table.extend(
|
|
1471
|
+
format_tail_lines_for_kubernetes_pod(pod.containers, pod.name)
|
|
1472
|
+
)
|
|
1473
|
+
elif state == ReplicaState.UNSCHEDULED:
|
|
1474
|
+
if pod.reason == "Unschedulable":
|
|
1475
|
+
table.append(PaastaColors.red(f" Pod is unschedulable: {pod.message}"))
|
|
1476
|
+
elif state == ReplicaState.UNKNOWN:
|
|
1477
|
+
table.append(
|
|
1478
|
+
PaastaColors.red(
|
|
1479
|
+
f" Cannot determine pod state, please try again. If you continue to see this state, please contact #paasta"
|
|
1480
|
+
)
|
|
1481
|
+
)
|
|
1482
|
+
return format_table(table)
|
|
1483
|
+
|
|
1484
|
+
|
|
1485
|
+
def get_autoscaling_table(
|
|
1486
|
+
autoscaling_status: Dict[str, Any], verbose: int = 0
|
|
1487
|
+
) -> List[str]:
|
|
1488
|
+
table = []
|
|
1489
|
+
if autoscaling_status and verbose > 1:
|
|
1490
|
+
table.append(" Autoscaling status:")
|
|
1491
|
+
table.append(f" min_instances: {autoscaling_status['min_instances']}")
|
|
1492
|
+
table.append(f" max_instances: {autoscaling_status['max_instances']}")
|
|
1493
|
+
table.append(
|
|
1494
|
+
f" Desired instances: {autoscaling_status['desired_replicas']}"
|
|
1495
|
+
)
|
|
1496
|
+
table.append(f" Last scale time: {autoscaling_status['last_scale_time']}")
|
|
1497
|
+
NA = PaastaColors.red("N/A")
|
|
1498
|
+
if len(autoscaling_status["metrics"]) > 0:
|
|
1499
|
+
table.append(f" Metrics:")
|
|
1500
|
+
|
|
1501
|
+
metrics_table: List[List[str]] = [["Metric", "Current", "Target"]]
|
|
1502
|
+
for metric in autoscaling_status["metrics"]:
|
|
1503
|
+
current_metric = (
|
|
1504
|
+
NA
|
|
1505
|
+
if getattr(metric, "current_value") is None
|
|
1506
|
+
else getattr(metric, "current_value")
|
|
1507
|
+
)
|
|
1508
|
+
target_metric = (
|
|
1509
|
+
NA
|
|
1510
|
+
if getattr(metric, "target_value") is None
|
|
1511
|
+
else getattr(metric, "target_value")
|
|
1512
|
+
)
|
|
1513
|
+
metrics_table.append([metric["name"], current_metric, target_metric])
|
|
1514
|
+
table.extend([" " + s for s in format_table(metrics_table)])
|
|
1515
|
+
|
|
1516
|
+
return format_table(table)
|
|
1517
|
+
|
|
1518
|
+
|
|
1519
|
+
def print_kubernetes_status(
|
|
1520
|
+
cluster: str,
|
|
1521
|
+
service: str,
|
|
1522
|
+
instance: str,
|
|
1523
|
+
output: List[str],
|
|
1524
|
+
kubernetes_status,
|
|
1525
|
+
verbose: int = 0,
|
|
1526
|
+
) -> int:
|
|
1527
|
+
bouncing_status = bouncing_status_human(
|
|
1528
|
+
kubernetes_status.app_count, kubernetes_status.bounce_method
|
|
1529
|
+
)
|
|
1530
|
+
desired_state = desired_state_human(
|
|
1531
|
+
kubernetes_status.desired_state, kubernetes_status.expected_instance_count
|
|
1532
|
+
)
|
|
1533
|
+
output.append(f" State: {bouncing_status} - Desired state: {desired_state}")
|
|
1534
|
+
|
|
1535
|
+
status = KubernetesDeployStatus.fromstring(kubernetes_status.deploy_status)
|
|
1536
|
+
deploy_status = kubernetes_app_deploy_status_human(
|
|
1537
|
+
status, kubernetes_status.deploy_status_message
|
|
1538
|
+
)
|
|
1539
|
+
|
|
1540
|
+
output.append(
|
|
1541
|
+
" {}".format(
|
|
1542
|
+
status_kubernetes_job_human(
|
|
1543
|
+
service=service,
|
|
1544
|
+
instance=instance,
|
|
1545
|
+
deploy_status=deploy_status,
|
|
1546
|
+
desired_app_id=kubernetes_status.app_id,
|
|
1547
|
+
app_count=kubernetes_status.app_count,
|
|
1548
|
+
running_instances=kubernetes_status.running_instance_count,
|
|
1549
|
+
normal_instance_count=kubernetes_status.expected_instance_count,
|
|
1550
|
+
evicted_count=kubernetes_status.evicted_count,
|
|
1551
|
+
)
|
|
1552
|
+
)
|
|
1553
|
+
)
|
|
1554
|
+
if kubernetes_status.create_timestamp and verbose > 0:
|
|
1555
|
+
create_datetime = datetime.fromtimestamp(kubernetes_status.create_timestamp)
|
|
1556
|
+
output.append(
|
|
1557
|
+
" App created: {} ({}). Namespace: {}".format(
|
|
1558
|
+
create_datetime,
|
|
1559
|
+
humanize.naturaltime(create_datetime),
|
|
1560
|
+
kubernetes_status.namespace,
|
|
1561
|
+
)
|
|
1562
|
+
)
|
|
1563
|
+
|
|
1564
|
+
if kubernetes_status.pods and len(kubernetes_status.pods) > 0:
|
|
1565
|
+
output.append(" Pods:")
|
|
1566
|
+
pods_table = format_kubernetes_pod_table(kubernetes_status.pods, verbose)
|
|
1567
|
+
output.extend([f" {line}" for line in pods_table])
|
|
1568
|
+
|
|
1569
|
+
if kubernetes_status.replicasets and len(kubernetes_status.replicasets) > 0:
|
|
1570
|
+
output.append(" ReplicaSets:")
|
|
1571
|
+
replicasets_table = format_kubernetes_replicaset_table(
|
|
1572
|
+
kubernetes_status.replicasets
|
|
1573
|
+
)
|
|
1574
|
+
output.extend([f" {line}" for line in replicasets_table])
|
|
1575
|
+
|
|
1576
|
+
autoscaling_status = kubernetes_status.autoscaling_status
|
|
1577
|
+
if autoscaling_status and verbose > 0:
|
|
1578
|
+
output.append(" Autoscaling status:")
|
|
1579
|
+
output.append(f" min_instances: {autoscaling_status['min_instances']}")
|
|
1580
|
+
output.append(f" max_instances: {autoscaling_status['max_instances']}")
|
|
1581
|
+
output.append(
|
|
1582
|
+
f" Desired instances: {autoscaling_status['desired_replicas']}"
|
|
1583
|
+
)
|
|
1584
|
+
output.append(
|
|
1585
|
+
f" Last scale time: {autoscaling_status['last_scale_time']}"
|
|
1586
|
+
)
|
|
1587
|
+
output.append(f" Dashboard: y/was-it-the-autoscaler")
|
|
1588
|
+
NA = PaastaColors.red("N/A")
|
|
1589
|
+
if len(autoscaling_status["metrics"]) > 0:
|
|
1590
|
+
output.append(f" Metrics:")
|
|
1591
|
+
|
|
1592
|
+
metrics_table: List[List[str]] = [["Metric", "Current", "Target"]]
|
|
1593
|
+
for metric in autoscaling_status["metrics"]:
|
|
1594
|
+
current_metric = (
|
|
1595
|
+
NA
|
|
1596
|
+
if getattr(metric, "current_value") is None
|
|
1597
|
+
else getattr(metric, "current_value")
|
|
1598
|
+
)
|
|
1599
|
+
target_metric = (
|
|
1600
|
+
NA
|
|
1601
|
+
if getattr(metric, "target_value") is None
|
|
1602
|
+
else getattr(metric, "target_value")
|
|
1603
|
+
)
|
|
1604
|
+
metrics_table.append([metric["name"], current_metric, target_metric])
|
|
1605
|
+
output.extend([" " + s for s in format_table(metrics_table)])
|
|
1606
|
+
|
|
1607
|
+
if kubernetes_status.smartstack is not None:
|
|
1608
|
+
smartstack_status_human = get_smartstack_status_human(
|
|
1609
|
+
kubernetes_status.smartstack.registration,
|
|
1610
|
+
kubernetes_status.smartstack.expected_backends_per_location,
|
|
1611
|
+
kubernetes_status.smartstack.locations,
|
|
1612
|
+
)
|
|
1613
|
+
output.extend([f" {line}" for line in smartstack_status_human])
|
|
1614
|
+
|
|
1615
|
+
if kubernetes_status.envoy is not None:
|
|
1616
|
+
envoy_status_human = get_envoy_status_human(
|
|
1617
|
+
kubernetes_status.envoy.registration,
|
|
1618
|
+
kubernetes_status.envoy.expected_backends_per_location,
|
|
1619
|
+
kubernetes_status.envoy.locations,
|
|
1620
|
+
)
|
|
1621
|
+
output.extend([f" {line}" for line in envoy_status_human])
|
|
1622
|
+
|
|
1623
|
+
error_message = kubernetes_status.error_message
|
|
1624
|
+
if error_message:
|
|
1625
|
+
output.append(" " + PaastaColors.red(error_message))
|
|
1626
|
+
return 1
|
|
1627
|
+
return 0
|
|
1628
|
+
|
|
1629
|
+
|
|
1630
|
+
def print_tron_status(
|
|
1631
|
+
cluster: str,
|
|
1632
|
+
service: str,
|
|
1633
|
+
instance: str,
|
|
1634
|
+
output: List[str],
|
|
1635
|
+
tron_status,
|
|
1636
|
+
verbose: int = 0,
|
|
1637
|
+
) -> int:
|
|
1638
|
+
output.append(f" Tron job: {tron_status.job_name}")
|
|
1639
|
+
if verbose:
|
|
1640
|
+
output.append(f" Status: {tron_status.job_status}")
|
|
1641
|
+
output.append(f" Schedule: {tron_status.job_schedule}")
|
|
1642
|
+
output.append(" Dashboard: {}".format(PaastaColors.blue(tron_status.job_url)))
|
|
1643
|
+
|
|
1644
|
+
output.append(f" Action: {tron_status.action_name}")
|
|
1645
|
+
output.append(f" Status: {tron_status.action_state}")
|
|
1646
|
+
if verbose:
|
|
1647
|
+
output.append(f" Start time: {tron_status.action_start_time}")
|
|
1648
|
+
output.append(f" Command: {tron_status.action_command}")
|
|
1649
|
+
if verbose > 1:
|
|
1650
|
+
output.append(f" Raw Command: {tron_status.action_raw_command}")
|
|
1651
|
+
output.append(f" Stdout: \n{tron_status.action_stdout}")
|
|
1652
|
+
output.append(f" Stderr: \n{tron_status.action_stderr}")
|
|
1653
|
+
|
|
1654
|
+
return 0
|
|
1655
|
+
|
|
1656
|
+
|
|
1657
|
+
def print_cassandra_status(
|
|
1658
|
+
cluster: str,
|
|
1659
|
+
service: str,
|
|
1660
|
+
instance: str,
|
|
1661
|
+
output: List[str],
|
|
1662
|
+
cassandra_status,
|
|
1663
|
+
verbose: int = 0,
|
|
1664
|
+
) -> int:
|
|
1665
|
+
tab = " "
|
|
1666
|
+
indent = 1
|
|
1667
|
+
|
|
1668
|
+
status = cassandra_status.get("status")
|
|
1669
|
+
if status is None:
|
|
1670
|
+
output.append(
|
|
1671
|
+
indent * tab + PaastaColors.red("Cassandra cluster is not available yet")
|
|
1672
|
+
)
|
|
1673
|
+
return 1
|
|
1674
|
+
|
|
1675
|
+
output.append(indent * tab + "Cassandra cluster:")
|
|
1676
|
+
indent += 1
|
|
1677
|
+
|
|
1678
|
+
status = cassandra_status.get("status")
|
|
1679
|
+
state = status.get("state")
|
|
1680
|
+
|
|
1681
|
+
if state == "Running":
|
|
1682
|
+
state = PaastaColors.green(state)
|
|
1683
|
+
else:
|
|
1684
|
+
state = PaastaColors.red(state)
|
|
1685
|
+
|
|
1686
|
+
nodes: List[Dict[str, Any]] = status.get("nodes") or []
|
|
1687
|
+
output.append(indent * tab + "State: " + state)
|
|
1688
|
+
|
|
1689
|
+
if not nodes:
|
|
1690
|
+
output.append(
|
|
1691
|
+
indent * tab + "Nodes: " + PaastaColors.red("No node status available")
|
|
1692
|
+
)
|
|
1693
|
+
return 0
|
|
1694
|
+
|
|
1695
|
+
output.append(indent * tab + "Nodes:")
|
|
1696
|
+
indent += 1
|
|
1697
|
+
all_rows: List[CassandraNodeStatusRow] = []
|
|
1698
|
+
|
|
1699
|
+
if not nodes:
|
|
1700
|
+
output.append(indent * tab + "No nodes found in CR status")
|
|
1701
|
+
return 0
|
|
1702
|
+
|
|
1703
|
+
for node in nodes:
|
|
1704
|
+
if node.get("properties"):
|
|
1705
|
+
row: CassandraNodeStatusRow = {}
|
|
1706
|
+
for prop in node.get("properties"):
|
|
1707
|
+
verbosity = prop.get("verbosity", 0)
|
|
1708
|
+
name = prop["name"]
|
|
1709
|
+
|
|
1710
|
+
if verbosity > verbose:
|
|
1711
|
+
continue
|
|
1712
|
+
if not prop.get("name"):
|
|
1713
|
+
continue
|
|
1714
|
+
|
|
1715
|
+
row[name] = node_property_to_str(prop, verbose)
|
|
1716
|
+
all_rows.append(row)
|
|
1717
|
+
|
|
1718
|
+
if verbose < 2:
|
|
1719
|
+
for rows in group_nodes_by_header(all_rows):
|
|
1720
|
+
lines = nodes_to_lines(verbose, rows)
|
|
1721
|
+
ftable = format_table(lines)
|
|
1722
|
+
output.extend([indent * tab + line for line in ftable])
|
|
1723
|
+
output.extend([indent * tab])
|
|
1724
|
+
else:
|
|
1725
|
+
for rows in group_nodes_by_header(all_rows):
|
|
1726
|
+
for node in rows:
|
|
1727
|
+
output.append(indent * tab + "Node:")
|
|
1728
|
+
indent += 1
|
|
1729
|
+
for key in node.keys():
|
|
1730
|
+
output.append(
|
|
1731
|
+
indent * tab + "{key}: {value}".format(key=key, value=node[key])
|
|
1732
|
+
)
|
|
1733
|
+
indent -= 1
|
|
1734
|
+
return 0
|
|
1735
|
+
|
|
1736
|
+
|
|
1737
|
+
CassandraNodeStatusRow = Dict[str, str]
|
|
1738
|
+
|
|
1739
|
+
|
|
1740
|
+
# group_nodes_by_header groups the given nodes into several lists of rows. The
|
|
1741
|
+
# rows in each group have the same headers.
|
|
1742
|
+
def group_nodes_by_header(
|
|
1743
|
+
rows: List[CassandraNodeStatusRow] = [],
|
|
1744
|
+
) -> List[List[CassandraNodeStatusRow]]:
|
|
1745
|
+
groups: Dict[str, List[CassandraNodeStatusRow]] = {}
|
|
1746
|
+
for row in rows:
|
|
1747
|
+
header = list(row.keys())
|
|
1748
|
+
header.sort()
|
|
1749
|
+
# "\0" is just a character that is unlikely to be in the header names.
|
|
1750
|
+
header_id = "\0".join(header)
|
|
1751
|
+
group = groups.get(header_id, [])
|
|
1752
|
+
group.append(row)
|
|
1753
|
+
groups[header_id] = group
|
|
1754
|
+
|
|
1755
|
+
return list(groups.values())
|
|
1756
|
+
|
|
1757
|
+
|
|
1758
|
+
def nodes_to_lines(
|
|
1759
|
+
verbose: int = 0,
|
|
1760
|
+
rows: List[CassandraNodeStatusRow] = [],
|
|
1761
|
+
) -> List[List[str]]:
|
|
1762
|
+
header: List[str] = []
|
|
1763
|
+
lines: List[List[str]] = []
|
|
1764
|
+
for row in rows:
|
|
1765
|
+
if len(header) == 0:
|
|
1766
|
+
header = list(row.keys())
|
|
1767
|
+
lines.append(list(header))
|
|
1768
|
+
line: List[str] = []
|
|
1769
|
+
for key in header:
|
|
1770
|
+
line.append(row.get(key, ""))
|
|
1771
|
+
lines.append(line)
|
|
1772
|
+
return lines
|
|
1773
|
+
|
|
1774
|
+
|
|
1775
|
+
def node_property_to_str(prop: Dict[str, Any], verbose: int) -> str:
|
|
1776
|
+
typ = prop.get("type")
|
|
1777
|
+
value = prop.get("value")
|
|
1778
|
+
|
|
1779
|
+
if value is None:
|
|
1780
|
+
return "None"
|
|
1781
|
+
|
|
1782
|
+
if typ == "string":
|
|
1783
|
+
return value
|
|
1784
|
+
elif typ in ["int", "float64"]:
|
|
1785
|
+
return str(value)
|
|
1786
|
+
elif typ == "bool":
|
|
1787
|
+
return "Yes" if value else "No"
|
|
1788
|
+
elif typ == "error":
|
|
1789
|
+
return PaastaColors.red(value)
|
|
1790
|
+
elif typ == "time":
|
|
1791
|
+
if verbose > 0:
|
|
1792
|
+
return value
|
|
1793
|
+
parsed_time = datetime.strptime(value, "%Y-%m-%dT%H:%M:%SZ").replace(
|
|
1794
|
+
tzinfo=timezone.utc
|
|
1795
|
+
)
|
|
1796
|
+
# NOTE: the k8s API returns timestamps in UTC, so we make sure to always work in UTC
|
|
1797
|
+
now = datetime.now(timezone.utc)
|
|
1798
|
+
return (
|
|
1799
|
+
humanize.naturaldelta(
|
|
1800
|
+
timedelta(seconds=(now - parsed_time).total_seconds())
|
|
1801
|
+
)
|
|
1802
|
+
+ " ago"
|
|
1803
|
+
)
|
|
1804
|
+
else:
|
|
1805
|
+
return str(value)
|
|
1806
|
+
|
|
1807
|
+
|
|
1808
|
+
def print_kafka_status(
|
|
1809
|
+
cluster: str,
|
|
1810
|
+
service: str,
|
|
1811
|
+
instance: str,
|
|
1812
|
+
output: List[str],
|
|
1813
|
+
kafka_status: Mapping[str, Any],
|
|
1814
|
+
verbose: int = 0,
|
|
1815
|
+
) -> int:
|
|
1816
|
+
status = kafka_status.get("status")
|
|
1817
|
+
if status is None:
|
|
1818
|
+
output.append(PaastaColors.red(" Kafka cluster is not available yet"))
|
|
1819
|
+
return 1
|
|
1820
|
+
|
|
1821
|
+
# print kafka view url before operator status because if the kafka cluster is not available for some reason
|
|
1822
|
+
# atleast the user can get a hold the kafka view url
|
|
1823
|
+
if status.get("kafka_view_url") is not None:
|
|
1824
|
+
output.append(f" Kafka View Url: {status.get('kafka_view_url')}")
|
|
1825
|
+
|
|
1826
|
+
output.append(f" Zookeeper: {status['zookeeper']}")
|
|
1827
|
+
|
|
1828
|
+
annotations = kafka_status.get("metadata").get("annotations")
|
|
1829
|
+
desired_state = annotations.get(paasta_prefixed("desired_state"))
|
|
1830
|
+
if desired_state is None:
|
|
1831
|
+
raise ValueError(
|
|
1832
|
+
"expected desired state in kafka annotation, but received none"
|
|
1833
|
+
)
|
|
1834
|
+
output.append(f" State: {desired_state}")
|
|
1835
|
+
|
|
1836
|
+
cluster_ready = "true" if status.get("cluster_ready") else PaastaColors.red("false")
|
|
1837
|
+
cluster_restarting = (
|
|
1838
|
+
" (rolling-restart in progress)" if status["health"]["restarting"] else ""
|
|
1839
|
+
)
|
|
1840
|
+
output.append(f" Ready: {cluster_ready}{cluster_restarting}")
|
|
1841
|
+
|
|
1842
|
+
if status.get("cluster_ready"):
|
|
1843
|
+
health: Mapping[str, Any] = status["health"]
|
|
1844
|
+
cluster_health = (
|
|
1845
|
+
PaastaColors.green("healthy")
|
|
1846
|
+
if health["healthy"]
|
|
1847
|
+
else PaastaColors.red("unhealthy")
|
|
1848
|
+
)
|
|
1849
|
+
output.append(f" Health: {cluster_health}")
|
|
1850
|
+
if not health.get("healthy"):
|
|
1851
|
+
output.append(f" Reason: {health['message']}")
|
|
1852
|
+
output.append(f" Offline Partitions: {health['offline_partitions']}")
|
|
1853
|
+
output.append(
|
|
1854
|
+
f" Under Replicated Partitions: {health['under_replicated_partitions']}"
|
|
1855
|
+
)
|
|
1856
|
+
|
|
1857
|
+
brokers = status["brokers"]
|
|
1858
|
+
output.append(" Brokers:")
|
|
1859
|
+
|
|
1860
|
+
if verbose:
|
|
1861
|
+
headers = ["Id", "Phase", "IP", "Pod Name", "Started"]
|
|
1862
|
+
else:
|
|
1863
|
+
headers = ["Id", "Phase", "Started"]
|
|
1864
|
+
|
|
1865
|
+
rows = [headers]
|
|
1866
|
+
for broker in brokers:
|
|
1867
|
+
color_fn = (
|
|
1868
|
+
PaastaColors.green if broker["phase"] == "Running" else PaastaColors.red
|
|
1869
|
+
)
|
|
1870
|
+
|
|
1871
|
+
# NOTE: the k8s API returns timestamps in UTC, so we make sure to always work in UTC
|
|
1872
|
+
start_time = datetime.strptime(
|
|
1873
|
+
broker["deployed_timestamp"], "%Y-%m-%dT%H:%M:%SZ"
|
|
1874
|
+
).replace(tzinfo=timezone.utc)
|
|
1875
|
+
delta = datetime.now(timezone.utc) - start_time
|
|
1876
|
+
formatted_start_time = f"{str(start_time)} ({humanize.naturaltime(delta)})"
|
|
1877
|
+
|
|
1878
|
+
if verbose:
|
|
1879
|
+
row = [
|
|
1880
|
+
str(broker["id"]),
|
|
1881
|
+
color_fn(broker["phase"]),
|
|
1882
|
+
str(broker["ip"]),
|
|
1883
|
+
str(broker["name"]),
|
|
1884
|
+
formatted_start_time,
|
|
1885
|
+
]
|
|
1886
|
+
else:
|
|
1887
|
+
row = [
|
|
1888
|
+
str(broker["id"]),
|
|
1889
|
+
color_fn(broker["phase"]),
|
|
1890
|
+
formatted_start_time,
|
|
1891
|
+
]
|
|
1892
|
+
|
|
1893
|
+
rows.append(row)
|
|
1894
|
+
|
|
1895
|
+
brokers_table = format_table(rows)
|
|
1896
|
+
output.extend([f" {line}" for line in brokers_table])
|
|
1897
|
+
|
|
1898
|
+
if verbose and len(brokers) > 0:
|
|
1899
|
+
append_pod_status(brokers, output)
|
|
1900
|
+
|
|
1901
|
+
return 0
|
|
1902
|
+
|
|
1903
|
+
|
|
1904
|
+
def report_status_for_cluster(
|
|
1905
|
+
service: str,
|
|
1906
|
+
cluster: str,
|
|
1907
|
+
deploy_pipeline: Sequence[str],
|
|
1908
|
+
actual_deployments: Mapping[str, DeploymentVersion],
|
|
1909
|
+
instance_whitelist: Mapping[str, Type[InstanceConfig]],
|
|
1910
|
+
system_paasta_config: SystemPaastaConfig,
|
|
1911
|
+
lock: Lock,
|
|
1912
|
+
verbose: int = 0,
|
|
1913
|
+
new: bool = False,
|
|
1914
|
+
all_namespaces: bool = False,
|
|
1915
|
+
) -> Tuple[int, Sequence[str]]:
|
|
1916
|
+
"""With a given service and cluster, prints the status of the instances
|
|
1917
|
+
in that cluster"""
|
|
1918
|
+
output = ["", "service: %s" % service, "cluster: %s" % cluster]
|
|
1919
|
+
deployed_instances = []
|
|
1920
|
+
instances = [
|
|
1921
|
+
(instance, instance_config_class)
|
|
1922
|
+
for instance, instance_config_class in instance_whitelist.items()
|
|
1923
|
+
if instance_config_class in ALLOWED_INSTANCE_CONFIG
|
|
1924
|
+
]
|
|
1925
|
+
|
|
1926
|
+
# Tron instance are not present in the deploy pipeline, so treat them as
|
|
1927
|
+
# seen by default to avoid error messages
|
|
1928
|
+
seen_instances = [
|
|
1929
|
+
instance
|
|
1930
|
+
for instance, instance_config_class in instance_whitelist.items()
|
|
1931
|
+
if instance_config_class == TronActionConfig
|
|
1932
|
+
]
|
|
1933
|
+
|
|
1934
|
+
for namespace in deploy_pipeline:
|
|
1935
|
+
cluster_in_pipeline, instance = namespace.split(".")
|
|
1936
|
+
seen_instances.append(instance)
|
|
1937
|
+
|
|
1938
|
+
if cluster_in_pipeline != cluster:
|
|
1939
|
+
continue
|
|
1940
|
+
if instances and instance not in instances:
|
|
1941
|
+
continue
|
|
1942
|
+
|
|
1943
|
+
# Case: service deployed to cluster.instance
|
|
1944
|
+
if namespace in actual_deployments:
|
|
1945
|
+
deployed_instances.append(instance)
|
|
1946
|
+
|
|
1947
|
+
# Case: flink instances don't use `deployments.json`
|
|
1948
|
+
elif instance_whitelist.get(instance) == FlinkDeploymentConfig:
|
|
1949
|
+
deployed_instances.append(instance)
|
|
1950
|
+
|
|
1951
|
+
# Case: service NOT deployed to cluster.instance
|
|
1952
|
+
else:
|
|
1953
|
+
output.append(" instance: %s" % PaastaColors.red(instance))
|
|
1954
|
+
output.append(" Git sha: None (not deployed yet)")
|
|
1955
|
+
|
|
1956
|
+
return_code = 0
|
|
1957
|
+
return_codes = []
|
|
1958
|
+
for deployed_instance, instance_config_class in instances:
|
|
1959
|
+
return_codes.append(
|
|
1960
|
+
paasta_status_on_api_endpoint(
|
|
1961
|
+
cluster=cluster,
|
|
1962
|
+
service=service,
|
|
1963
|
+
instance=deployed_instance,
|
|
1964
|
+
system_paasta_config=system_paasta_config,
|
|
1965
|
+
lock=lock,
|
|
1966
|
+
verbose=verbose,
|
|
1967
|
+
new=new,
|
|
1968
|
+
all_namespaces=all_namespaces,
|
|
1969
|
+
is_eks=(instance_config_class in EKS_DEPLOYMENT_CONFIGS),
|
|
1970
|
+
)
|
|
1971
|
+
)
|
|
1972
|
+
|
|
1973
|
+
if any(return_codes):
|
|
1974
|
+
return_code = 1
|
|
1975
|
+
|
|
1976
|
+
output.append(
|
|
1977
|
+
report_invalid_whitelist_values(
|
|
1978
|
+
whitelist=[instance[0] for instance in instances],
|
|
1979
|
+
items=seen_instances,
|
|
1980
|
+
item_type="instance",
|
|
1981
|
+
)
|
|
1982
|
+
)
|
|
1983
|
+
|
|
1984
|
+
return return_code, output
|
|
1985
|
+
|
|
1986
|
+
|
|
1987
|
+
def report_invalid_whitelist_values(
|
|
1988
|
+
whitelist: Iterable[str], items: Sequence[str], item_type: str
|
|
1989
|
+
) -> str:
|
|
1990
|
+
"""Warns the user if there are entries in ``whitelist`` which don't
|
|
1991
|
+
correspond to any item in ``items``. Helps highlight typos.
|
|
1992
|
+
"""
|
|
1993
|
+
return_string = ""
|
|
1994
|
+
bogus_entries = []
|
|
1995
|
+
if whitelist is None:
|
|
1996
|
+
return ""
|
|
1997
|
+
for entry in whitelist:
|
|
1998
|
+
if entry not in items:
|
|
1999
|
+
bogus_entries.append(entry)
|
|
2000
|
+
if len(bogus_entries) > 0:
|
|
2001
|
+
return_string = (
|
|
2002
|
+
"\n" "Warning: This service does not have any %s matching these names:\n%s"
|
|
2003
|
+
) % (item_type, ",".join(bogus_entries))
|
|
2004
|
+
return return_string
|
|
2005
|
+
|
|
2006
|
+
|
|
2007
|
+
def normalize_registrations(
|
|
2008
|
+
service: str, registrations: Sequence[str]
|
|
2009
|
+
) -> Sequence[str]:
|
|
2010
|
+
ret = []
|
|
2011
|
+
for reg in registrations:
|
|
2012
|
+
if "." not in reg:
|
|
2013
|
+
ret.append(f"{service}.{reg}")
|
|
2014
|
+
else:
|
|
2015
|
+
ret.append(reg)
|
|
2016
|
+
return ret
|
|
2017
|
+
|
|
2018
|
+
|
|
2019
|
+
def get_filters(
|
|
2020
|
+
args,
|
|
2021
|
+
) -> Sequence[Callable[[InstanceConfig], bool]]:
|
|
2022
|
+
"""Figures out which filters to apply from an args object, and returns them
|
|
2023
|
+
|
|
2024
|
+
:param args: args object
|
|
2025
|
+
:returns: list of functions that take an instance config and returns if the instance conf matches the filter
|
|
2026
|
+
"""
|
|
2027
|
+
filters = []
|
|
2028
|
+
|
|
2029
|
+
if args.service:
|
|
2030
|
+
filters.append(lambda conf: conf.get_service() in args.service.split(","))
|
|
2031
|
+
|
|
2032
|
+
if args.clusters:
|
|
2033
|
+
filters.append(lambda conf: conf.get_cluster() in args.clusters.split(","))
|
|
2034
|
+
|
|
2035
|
+
if args.instances:
|
|
2036
|
+
filters.append(lambda conf: conf.get_instance() in args.instances.split(","))
|
|
2037
|
+
|
|
2038
|
+
if args.deploy_group:
|
|
2039
|
+
filters.append(
|
|
2040
|
+
lambda conf: conf.get_deploy_group() in args.deploy_group.split(",")
|
|
2041
|
+
)
|
|
2042
|
+
|
|
2043
|
+
if args.registration:
|
|
2044
|
+
normalized_regs = normalize_registrations(
|
|
2045
|
+
service=args.service, registrations=args.registration.split(",")
|
|
2046
|
+
)
|
|
2047
|
+
filters.append(
|
|
2048
|
+
lambda conf: any(
|
|
2049
|
+
reg in normalized_regs
|
|
2050
|
+
for reg in (
|
|
2051
|
+
conf.get_registrations()
|
|
2052
|
+
if hasattr(conf, "get_registrations")
|
|
2053
|
+
else []
|
|
2054
|
+
)
|
|
2055
|
+
)
|
|
2056
|
+
)
|
|
2057
|
+
|
|
2058
|
+
if args.owner:
|
|
2059
|
+
owners = args.owner.split(",")
|
|
2060
|
+
|
|
2061
|
+
filters.append(
|
|
2062
|
+
# If the instance owner is None, check the service owner, else check the instance owner
|
|
2063
|
+
lambda conf: get_team(
|
|
2064
|
+
overrides={}, service=conf.get_service(), soa_dir=args.soa_dir
|
|
2065
|
+
)
|
|
2066
|
+
in owners
|
|
2067
|
+
if conf.get_team() is None
|
|
2068
|
+
else conf.get_team() in owners
|
|
2069
|
+
)
|
|
2070
|
+
|
|
2071
|
+
return filters
|
|
2072
|
+
|
|
2073
|
+
|
|
2074
|
+
def apply_args_filters(
|
|
2075
|
+
args,
|
|
2076
|
+
) -> Mapping[str, Mapping[str, Mapping[str, Type[InstanceConfig]]]]:
|
|
2077
|
+
"""
|
|
2078
|
+
Take an args object and returns the dict of cluster:service:instances
|
|
2079
|
+
Currently, will filter by clusters, instances, services, and deploy_groups
|
|
2080
|
+
If no instances are found, will print a message and try to find matching instances
|
|
2081
|
+
for each service
|
|
2082
|
+
|
|
2083
|
+
:param args: args object containing attributes to filter by
|
|
2084
|
+
:returns: Dict of dicts, in format {cluster_name: {service_name: {instance1, instance2}}}
|
|
2085
|
+
"""
|
|
2086
|
+
clusters_services_instances: DefaultDict[
|
|
2087
|
+
str, DefaultDict[str, Dict[str, Type[InstanceConfig]]]
|
|
2088
|
+
] = defaultdict(lambda: defaultdict(dict))
|
|
2089
|
+
if args.service_instance:
|
|
2090
|
+
if args.service or args.instances:
|
|
2091
|
+
print(
|
|
2092
|
+
PaastaColors.red(
|
|
2093
|
+
f"Invalid command. Do not include optional arguments -s or -i "
|
|
2094
|
+
f"when using shorthand notation."
|
|
2095
|
+
)
|
|
2096
|
+
)
|
|
2097
|
+
return clusters_services_instances
|
|
2098
|
+
if "." in args.service_instance:
|
|
2099
|
+
args.service, args.instances = args.service_instance.split(".", 1)
|
|
2100
|
+
else:
|
|
2101
|
+
print(PaastaColors.red(f'Use a "." to separate service and instance name'))
|
|
2102
|
+
return clusters_services_instances
|
|
2103
|
+
if args.service:
|
|
2104
|
+
try:
|
|
2105
|
+
validate_service_name(args.service, soa_dir=args.soa_dir)
|
|
2106
|
+
except NoSuchService:
|
|
2107
|
+
print(PaastaColors.red(f'The service "{args.service}" does not exist.'))
|
|
2108
|
+
all_services = list_services(soa_dir=args.soa_dir)
|
|
2109
|
+
suggestions = difflib.get_close_matches(
|
|
2110
|
+
args.service, all_services, n=5, cutoff=0.5
|
|
2111
|
+
)
|
|
2112
|
+
if suggestions:
|
|
2113
|
+
print(PaastaColors.red(f"Did you mean any of these?"))
|
|
2114
|
+
for suggestion in suggestions:
|
|
2115
|
+
print(PaastaColors.red(f" {suggestion}"))
|
|
2116
|
+
return clusters_services_instances
|
|
2117
|
+
|
|
2118
|
+
all_services = [args.service]
|
|
2119
|
+
else:
|
|
2120
|
+
args.service = None
|
|
2121
|
+
all_services = list_services(soa_dir=args.soa_dir)
|
|
2122
|
+
if args.service is None and args.owner is None:
|
|
2123
|
+
args.service = figure_out_service_name(args, soa_dir=args.soa_dir)
|
|
2124
|
+
|
|
2125
|
+
if args.clusters:
|
|
2126
|
+
clusters = args.clusters.split(",")
|
|
2127
|
+
else:
|
|
2128
|
+
clusters = list_clusters()
|
|
2129
|
+
|
|
2130
|
+
if args.instances:
|
|
2131
|
+
instances = args.instances.split(",")
|
|
2132
|
+
else:
|
|
2133
|
+
instances = None
|
|
2134
|
+
|
|
2135
|
+
filters = get_filters(args)
|
|
2136
|
+
|
|
2137
|
+
i_count = 0
|
|
2138
|
+
for service in all_services:
|
|
2139
|
+
if args.service and service != args.service:
|
|
2140
|
+
continue
|
|
2141
|
+
for instance_conf in get_instance_configs_for_service(
|
|
2142
|
+
service, soa_dir=args.soa_dir, clusters=clusters, instances=instances
|
|
2143
|
+
):
|
|
2144
|
+
if all([f(instance_conf) for f in filters]):
|
|
2145
|
+
cluster_service = clusters_services_instances[
|
|
2146
|
+
instance_conf.get_cluster()
|
|
2147
|
+
][service]
|
|
2148
|
+
cluster_service[instance_conf.get_instance()] = instance_conf.__class__
|
|
2149
|
+
i_count += 1
|
|
2150
|
+
|
|
2151
|
+
if i_count == 0 and args.service and args.instances:
|
|
2152
|
+
for service in args.service.split(","):
|
|
2153
|
+
verify_instances(args.instances, service, clusters)
|
|
2154
|
+
|
|
2155
|
+
return clusters_services_instances
|
|
2156
|
+
|
|
2157
|
+
|
|
2158
|
+
def paasta_status(args) -> int:
|
|
2159
|
+
"""Print the status of a Yelp service running on PaaSTA.
|
|
2160
|
+
:param args: argparse.Namespace obj created from sys.args by cli"""
|
|
2161
|
+
soa_dir = args.soa_dir
|
|
2162
|
+
system_paasta_config = load_system_paasta_config()
|
|
2163
|
+
|
|
2164
|
+
return_codes = [0]
|
|
2165
|
+
lock = Lock()
|
|
2166
|
+
tasks = []
|
|
2167
|
+
clusters_services_instances = apply_args_filters(args)
|
|
2168
|
+
for cluster, service_instances in clusters_services_instances.items():
|
|
2169
|
+
for service, instances in service_instances.items():
|
|
2170
|
+
all_flink = all((i in FLINK_DEPLOYMENT_CONFIGS) for i in instances.values())
|
|
2171
|
+
actual_deployments: Mapping[str, DeploymentVersion]
|
|
2172
|
+
if all_flink:
|
|
2173
|
+
actual_deployments = {}
|
|
2174
|
+
else:
|
|
2175
|
+
actual_deployments = get_actual_deployments(service, soa_dir)
|
|
2176
|
+
if all_flink or actual_deployments:
|
|
2177
|
+
deploy_pipeline = list(get_planned_deployments(service, soa_dir))
|
|
2178
|
+
new = _use_new_paasta_status(args, system_paasta_config)
|
|
2179
|
+
tasks.append(
|
|
2180
|
+
(
|
|
2181
|
+
report_status_for_cluster,
|
|
2182
|
+
dict(
|
|
2183
|
+
service=service,
|
|
2184
|
+
cluster=cluster,
|
|
2185
|
+
deploy_pipeline=deploy_pipeline,
|
|
2186
|
+
actual_deployments=actual_deployments,
|
|
2187
|
+
instance_whitelist=instances,
|
|
2188
|
+
system_paasta_config=system_paasta_config,
|
|
2189
|
+
lock=lock,
|
|
2190
|
+
verbose=args.verbose,
|
|
2191
|
+
new=new,
|
|
2192
|
+
all_namespaces=args.all_namespaces,
|
|
2193
|
+
),
|
|
2194
|
+
)
|
|
2195
|
+
)
|
|
2196
|
+
else:
|
|
2197
|
+
print(missing_deployments_message(service))
|
|
2198
|
+
return_codes.append(1)
|
|
2199
|
+
|
|
2200
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
|
|
2201
|
+
tasks = [executor.submit(t[0], **t[1]) for t in tasks] # type: ignore
|
|
2202
|
+
try:
|
|
2203
|
+
for future in concurrent.futures.as_completed(tasks): # type: ignore
|
|
2204
|
+
return_code, output = future.result()
|
|
2205
|
+
return_codes.append(return_code)
|
|
2206
|
+
except KeyboardInterrupt:
|
|
2207
|
+
# ideally we wouldn't need to reach into `ThreadPoolExecutor`
|
|
2208
|
+
# internals, but so far this is the best way to stop all these
|
|
2209
|
+
# threads until a public interface is added
|
|
2210
|
+
executor._threads.clear() # type: ignore
|
|
2211
|
+
concurrent.futures.thread._threads_queues.clear() # type: ignore
|
|
2212
|
+
raise KeyboardInterrupt
|
|
2213
|
+
|
|
2214
|
+
return max(return_codes)
|
|
2215
|
+
|
|
2216
|
+
|
|
2217
|
+
def bouncing_status_human(app_count, bounce_method):
|
|
2218
|
+
if app_count == 0:
|
|
2219
|
+
return PaastaColors.red("Disabled")
|
|
2220
|
+
elif app_count == 1:
|
|
2221
|
+
return PaastaColors.green("Configured")
|
|
2222
|
+
elif app_count > 1:
|
|
2223
|
+
return PaastaColors.yellow("Bouncing (%s)" % bounce_method)
|
|
2224
|
+
else:
|
|
2225
|
+
return PaastaColors.red("Unknown (count: %s)" % app_count)
|
|
2226
|
+
|
|
2227
|
+
|
|
2228
|
+
def desired_state_human(desired_state, instances):
|
|
2229
|
+
if desired_state == "start" and instances != 0:
|
|
2230
|
+
return PaastaColors.bold("Started")
|
|
2231
|
+
elif desired_state == "start" and instances == 0:
|
|
2232
|
+
return PaastaColors.bold("Stopped")
|
|
2233
|
+
elif desired_state == "stop":
|
|
2234
|
+
return PaastaColors.red("Stopped")
|
|
2235
|
+
else:
|
|
2236
|
+
return PaastaColors.red("Unknown (desired_state: %s)" % desired_state)
|
|
2237
|
+
|
|
2238
|
+
|
|
2239
|
+
class BackendType(Enum):
|
|
2240
|
+
ENVOY = "Envoy"
|
|
2241
|
+
HAPROXY = "haproxy"
|
|
2242
|
+
|
|
2243
|
+
|
|
2244
|
+
def envoy_backend_report(normal_instance_count: int, up_backends: int) -> str:
|
|
2245
|
+
return _backend_report(normal_instance_count, up_backends, BackendType.ENVOY)
|
|
2246
|
+
|
|
2247
|
+
|
|
2248
|
+
def haproxy_backend_report(normal_instance_count: int, up_backends: int) -> str:
|
|
2249
|
+
return _backend_report(normal_instance_count, up_backends, BackendType.HAPROXY)
|
|
2250
|
+
|
|
2251
|
+
|
|
2252
|
+
def _backend_report(
|
|
2253
|
+
normal_instance_count: int, up_backends: int, system_name: BackendType
|
|
2254
|
+
) -> str:
|
|
2255
|
+
"""Given that a service is in smartstack, this returns a human readable
|
|
2256
|
+
report of the up backends"""
|
|
2257
|
+
# TODO: Take into account a configurable threshold, PAASTA-1102
|
|
2258
|
+
crit_threshold = 50
|
|
2259
|
+
under_replicated, ratio = is_under_replicated(
|
|
2260
|
+
num_available=up_backends,
|
|
2261
|
+
expected_count=normal_instance_count,
|
|
2262
|
+
crit_threshold=crit_threshold,
|
|
2263
|
+
)
|
|
2264
|
+
if under_replicated:
|
|
2265
|
+
status = PaastaColors.red("Critical")
|
|
2266
|
+
count = PaastaColors.red(
|
|
2267
|
+
"(%d/%d, %d%%)" % (up_backends, normal_instance_count, ratio)
|
|
2268
|
+
)
|
|
2269
|
+
else:
|
|
2270
|
+
status = PaastaColors.green("Healthy")
|
|
2271
|
+
count = PaastaColors.green("(%d/%d)" % (up_backends, normal_instance_count))
|
|
2272
|
+
up_string = PaastaColors.bold("UP")
|
|
2273
|
+
return f"{status} - in {system_name} with {count} total backends {up_string} in this namespace."
|
|
2274
|
+
|
|
2275
|
+
|
|
2276
|
+
def _use_new_paasta_status(args, system_paasta_config) -> bool:
|
|
2277
|
+
if args.new:
|
|
2278
|
+
return True
|
|
2279
|
+
elif args.old:
|
|
2280
|
+
return False
|
|
2281
|
+
else:
|
|
2282
|
+
if system_paasta_config.get_paasta_status_version() == "old":
|
|
2283
|
+
return False
|
|
2284
|
+
elif system_paasta_config.get_paasta_status_version() == "new":
|
|
2285
|
+
return True
|
|
2286
|
+
else:
|
|
2287
|
+
return True
|
|
2288
|
+
|
|
2289
|
+
|
|
2290
|
+
# Add other custom status writers here
|
|
2291
|
+
# See `print_tron_status` for reference
|
|
2292
|
+
INSTANCE_TYPE_WRITERS: Mapping[str, InstanceStatusWriter] = defaultdict(
|
|
2293
|
+
kubernetes=print_kubernetes_status,
|
|
2294
|
+
kubernetes_v2=print_kubernetes_status_v2,
|
|
2295
|
+
eks=print_kubernetes_status,
|
|
2296
|
+
tron=print_tron_status,
|
|
2297
|
+
adhoc=print_adhoc_status,
|
|
2298
|
+
flink=print_flink_status,
|
|
2299
|
+
flinkeks=print_flinkeks_status,
|
|
2300
|
+
kafkacluster=print_kafka_status,
|
|
2301
|
+
cassandracluster=print_cassandra_status,
|
|
2302
|
+
)
|