paasta-tools 1.21.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (348) hide show
  1. k8s_itests/__init__.py +0 -0
  2. k8s_itests/test_autoscaling.py +23 -0
  3. k8s_itests/utils.py +38 -0
  4. paasta_tools/__init__.py +20 -0
  5. paasta_tools/adhoc_tools.py +142 -0
  6. paasta_tools/api/__init__.py +13 -0
  7. paasta_tools/api/api.py +330 -0
  8. paasta_tools/api/api_docs/swagger.json +2323 -0
  9. paasta_tools/api/client.py +106 -0
  10. paasta_tools/api/settings.py +33 -0
  11. paasta_tools/api/tweens/__init__.py +6 -0
  12. paasta_tools/api/tweens/auth.py +125 -0
  13. paasta_tools/api/tweens/profiling.py +108 -0
  14. paasta_tools/api/tweens/request_logger.py +124 -0
  15. paasta_tools/api/views/__init__.py +13 -0
  16. paasta_tools/api/views/autoscaler.py +100 -0
  17. paasta_tools/api/views/exception.py +45 -0
  18. paasta_tools/api/views/flink.py +73 -0
  19. paasta_tools/api/views/instance.py +395 -0
  20. paasta_tools/api/views/pause_autoscaler.py +71 -0
  21. paasta_tools/api/views/remote_run.py +113 -0
  22. paasta_tools/api/views/resources.py +76 -0
  23. paasta_tools/api/views/service.py +35 -0
  24. paasta_tools/api/views/version.py +25 -0
  25. paasta_tools/apply_external_resources.py +79 -0
  26. paasta_tools/async_utils.py +109 -0
  27. paasta_tools/autoscaling/__init__.py +0 -0
  28. paasta_tools/autoscaling/autoscaling_service_lib.py +57 -0
  29. paasta_tools/autoscaling/forecasting.py +106 -0
  30. paasta_tools/autoscaling/max_all_k8s_services.py +41 -0
  31. paasta_tools/autoscaling/pause_service_autoscaler.py +77 -0
  32. paasta_tools/autoscaling/utils.py +52 -0
  33. paasta_tools/bounce_lib.py +184 -0
  34. paasta_tools/broadcast_log_to_services.py +62 -0
  35. paasta_tools/cassandracluster_tools.py +210 -0
  36. paasta_tools/check_autoscaler_max_instances.py +212 -0
  37. paasta_tools/check_cassandracluster_services_replication.py +35 -0
  38. paasta_tools/check_flink_services_health.py +203 -0
  39. paasta_tools/check_kubernetes_api.py +57 -0
  40. paasta_tools/check_kubernetes_services_replication.py +141 -0
  41. paasta_tools/check_oom_events.py +244 -0
  42. paasta_tools/check_services_replication_tools.py +324 -0
  43. paasta_tools/check_spark_jobs.py +234 -0
  44. paasta_tools/cleanup_kubernetes_cr.py +138 -0
  45. paasta_tools/cleanup_kubernetes_crd.py +145 -0
  46. paasta_tools/cleanup_kubernetes_jobs.py +344 -0
  47. paasta_tools/cleanup_tron_namespaces.py +96 -0
  48. paasta_tools/cli/__init__.py +13 -0
  49. paasta_tools/cli/authentication.py +85 -0
  50. paasta_tools/cli/cli.py +260 -0
  51. paasta_tools/cli/cmds/__init__.py +13 -0
  52. paasta_tools/cli/cmds/autoscale.py +143 -0
  53. paasta_tools/cli/cmds/check.py +334 -0
  54. paasta_tools/cli/cmds/cook_image.py +147 -0
  55. paasta_tools/cli/cmds/get_docker_image.py +76 -0
  56. paasta_tools/cli/cmds/get_image_version.py +172 -0
  57. paasta_tools/cli/cmds/get_latest_deployment.py +93 -0
  58. paasta_tools/cli/cmds/info.py +155 -0
  59. paasta_tools/cli/cmds/itest.py +117 -0
  60. paasta_tools/cli/cmds/list.py +66 -0
  61. paasta_tools/cli/cmds/list_clusters.py +42 -0
  62. paasta_tools/cli/cmds/list_deploy_queue.py +171 -0
  63. paasta_tools/cli/cmds/list_namespaces.py +84 -0
  64. paasta_tools/cli/cmds/local_run.py +1396 -0
  65. paasta_tools/cli/cmds/logs.py +1601 -0
  66. paasta_tools/cli/cmds/mark_for_deployment.py +1988 -0
  67. paasta_tools/cli/cmds/mesh_status.py +174 -0
  68. paasta_tools/cli/cmds/pause_service_autoscaler.py +107 -0
  69. paasta_tools/cli/cmds/push_to_registry.py +275 -0
  70. paasta_tools/cli/cmds/remote_run.py +252 -0
  71. paasta_tools/cli/cmds/rollback.py +347 -0
  72. paasta_tools/cli/cmds/secret.py +549 -0
  73. paasta_tools/cli/cmds/security_check.py +59 -0
  74. paasta_tools/cli/cmds/spark_run.py +1400 -0
  75. paasta_tools/cli/cmds/start_stop_restart.py +401 -0
  76. paasta_tools/cli/cmds/status.py +2302 -0
  77. paasta_tools/cli/cmds/validate.py +1012 -0
  78. paasta_tools/cli/cmds/wait_for_deployment.py +275 -0
  79. paasta_tools/cli/fsm/__init__.py +13 -0
  80. paasta_tools/cli/fsm/autosuggest.py +82 -0
  81. paasta_tools/cli/fsm/template/README.md +8 -0
  82. paasta_tools/cli/fsm/template/cookiecutter.json +7 -0
  83. paasta_tools/cli/fsm/template/{{cookiecutter.service}}/kubernetes-PROD.yaml +91 -0
  84. paasta_tools/cli/fsm/template/{{cookiecutter.service}}/monitoring.yaml +20 -0
  85. paasta_tools/cli/fsm/template/{{cookiecutter.service}}/service.yaml +8 -0
  86. paasta_tools/cli/fsm/template/{{cookiecutter.service}}/smartstack.yaml +6 -0
  87. paasta_tools/cli/fsm_cmd.py +121 -0
  88. paasta_tools/cli/paasta_tabcomplete.sh +23 -0
  89. paasta_tools/cli/schemas/adhoc_schema.json +199 -0
  90. paasta_tools/cli/schemas/autoscaling_schema.json +91 -0
  91. paasta_tools/cli/schemas/autotuned_defaults/cassandracluster_schema.json +37 -0
  92. paasta_tools/cli/schemas/autotuned_defaults/kubernetes_schema.json +89 -0
  93. paasta_tools/cli/schemas/deploy_schema.json +173 -0
  94. paasta_tools/cli/schemas/eks_schema.json +970 -0
  95. paasta_tools/cli/schemas/kubernetes_schema.json +970 -0
  96. paasta_tools/cli/schemas/rollback_schema.json +160 -0
  97. paasta_tools/cli/schemas/service_schema.json +25 -0
  98. paasta_tools/cli/schemas/smartstack_schema.json +322 -0
  99. paasta_tools/cli/schemas/tron_schema.json +699 -0
  100. paasta_tools/cli/utils.py +1118 -0
  101. paasta_tools/clusterman.py +21 -0
  102. paasta_tools/config_utils.py +385 -0
  103. paasta_tools/contrib/__init__.py +0 -0
  104. paasta_tools/contrib/bounce_log_latency_parser.py +68 -0
  105. paasta_tools/contrib/check_manual_oapi_changes.sh +24 -0
  106. paasta_tools/contrib/check_orphans.py +306 -0
  107. paasta_tools/contrib/create_dynamodb_table.py +35 -0
  108. paasta_tools/contrib/create_paasta_playground.py +105 -0
  109. paasta_tools/contrib/emit_allocated_cpu_metrics.py +50 -0
  110. paasta_tools/contrib/get_running_task_allocation.py +346 -0
  111. paasta_tools/contrib/habitat_fixer.py +86 -0
  112. paasta_tools/contrib/ide_helper.py +316 -0
  113. paasta_tools/contrib/is_pod_healthy_in_proxy.py +139 -0
  114. paasta_tools/contrib/is_pod_healthy_in_smartstack.py +50 -0
  115. paasta_tools/contrib/kill_bad_containers.py +109 -0
  116. paasta_tools/contrib/mass-deploy-tag.sh +44 -0
  117. paasta_tools/contrib/mock_patch_checker.py +86 -0
  118. paasta_tools/contrib/paasta_update_soa_memcpu.py +520 -0
  119. paasta_tools/contrib/render_template.py +129 -0
  120. paasta_tools/contrib/rightsizer_soaconfigs_update.py +348 -0
  121. paasta_tools/contrib/service_shard_remove.py +157 -0
  122. paasta_tools/contrib/service_shard_update.py +373 -0
  123. paasta_tools/contrib/shared_ip_check.py +77 -0
  124. paasta_tools/contrib/timeouts_metrics_prom.py +64 -0
  125. paasta_tools/delete_kubernetes_deployments.py +89 -0
  126. paasta_tools/deployment_utils.py +44 -0
  127. paasta_tools/docker_wrapper.py +234 -0
  128. paasta_tools/docker_wrapper_imports.py +13 -0
  129. paasta_tools/drain_lib.py +351 -0
  130. paasta_tools/dump_locally_running_services.py +71 -0
  131. paasta_tools/eks_tools.py +119 -0
  132. paasta_tools/envoy_tools.py +373 -0
  133. paasta_tools/firewall.py +504 -0
  134. paasta_tools/firewall_logging.py +154 -0
  135. paasta_tools/firewall_update.py +172 -0
  136. paasta_tools/flink_tools.py +345 -0
  137. paasta_tools/flinkeks_tools.py +90 -0
  138. paasta_tools/frameworks/__init__.py +0 -0
  139. paasta_tools/frameworks/adhoc_scheduler.py +71 -0
  140. paasta_tools/frameworks/constraints.py +87 -0
  141. paasta_tools/frameworks/native_scheduler.py +652 -0
  142. paasta_tools/frameworks/native_service_config.py +301 -0
  143. paasta_tools/frameworks/task_store.py +245 -0
  144. paasta_tools/generate_all_deployments +9 -0
  145. paasta_tools/generate_authenticating_services.py +94 -0
  146. paasta_tools/generate_deployments_for_service.py +255 -0
  147. paasta_tools/generate_services_file.py +114 -0
  148. paasta_tools/generate_services_yaml.py +30 -0
  149. paasta_tools/hacheck.py +76 -0
  150. paasta_tools/instance/__init__.py +0 -0
  151. paasta_tools/instance/hpa_metrics_parser.py +122 -0
  152. paasta_tools/instance/kubernetes.py +1362 -0
  153. paasta_tools/iptables.py +240 -0
  154. paasta_tools/kafkacluster_tools.py +143 -0
  155. paasta_tools/kubernetes/__init__.py +0 -0
  156. paasta_tools/kubernetes/application/__init__.py +0 -0
  157. paasta_tools/kubernetes/application/controller_wrappers.py +476 -0
  158. paasta_tools/kubernetes/application/tools.py +90 -0
  159. paasta_tools/kubernetes/bin/__init__.py +0 -0
  160. paasta_tools/kubernetes/bin/kubernetes_remove_evicted_pods.py +164 -0
  161. paasta_tools/kubernetes/bin/paasta_cleanup_remote_run_resources.py +135 -0
  162. paasta_tools/kubernetes/bin/paasta_cleanup_stale_nodes.py +181 -0
  163. paasta_tools/kubernetes/bin/paasta_secrets_sync.py +758 -0
  164. paasta_tools/kubernetes/remote_run.py +558 -0
  165. paasta_tools/kubernetes_tools.py +4679 -0
  166. paasta_tools/list_kubernetes_service_instances.py +128 -0
  167. paasta_tools/list_tron_namespaces.py +60 -0
  168. paasta_tools/long_running_service_tools.py +678 -0
  169. paasta_tools/mac_address.py +44 -0
  170. paasta_tools/marathon_dashboard.py +0 -0
  171. paasta_tools/mesos/__init__.py +0 -0
  172. paasta_tools/mesos/cfg.py +46 -0
  173. paasta_tools/mesos/cluster.py +60 -0
  174. paasta_tools/mesos/exceptions.py +59 -0
  175. paasta_tools/mesos/framework.py +77 -0
  176. paasta_tools/mesos/log.py +48 -0
  177. paasta_tools/mesos/master.py +306 -0
  178. paasta_tools/mesos/mesos_file.py +169 -0
  179. paasta_tools/mesos/parallel.py +52 -0
  180. paasta_tools/mesos/slave.py +115 -0
  181. paasta_tools/mesos/task.py +94 -0
  182. paasta_tools/mesos/util.py +69 -0
  183. paasta_tools/mesos/zookeeper.py +37 -0
  184. paasta_tools/mesos_maintenance.py +848 -0
  185. paasta_tools/mesos_tools.py +1051 -0
  186. paasta_tools/metrics/__init__.py +0 -0
  187. paasta_tools/metrics/metastatus_lib.py +1110 -0
  188. paasta_tools/metrics/metrics_lib.py +217 -0
  189. paasta_tools/monitoring/__init__.py +13 -0
  190. paasta_tools/monitoring/check_k8s_api_performance.py +110 -0
  191. paasta_tools/monitoring_tools.py +652 -0
  192. paasta_tools/monkrelaycluster_tools.py +146 -0
  193. paasta_tools/nrtsearchservice_tools.py +143 -0
  194. paasta_tools/nrtsearchserviceeks_tools.py +68 -0
  195. paasta_tools/oom_logger.py +321 -0
  196. paasta_tools/paasta_deploy_tron_jobs +3 -0
  197. paasta_tools/paasta_execute_docker_command.py +123 -0
  198. paasta_tools/paasta_native_serviceinit.py +21 -0
  199. paasta_tools/paasta_service_config_loader.py +201 -0
  200. paasta_tools/paastaapi/__init__.py +29 -0
  201. paasta_tools/paastaapi/api/__init__.py +3 -0
  202. paasta_tools/paastaapi/api/autoscaler_api.py +302 -0
  203. paasta_tools/paastaapi/api/default_api.py +569 -0
  204. paasta_tools/paastaapi/api/remote_run_api.py +604 -0
  205. paasta_tools/paastaapi/api/resources_api.py +157 -0
  206. paasta_tools/paastaapi/api/service_api.py +1736 -0
  207. paasta_tools/paastaapi/api_client.py +818 -0
  208. paasta_tools/paastaapi/apis/__init__.py +22 -0
  209. paasta_tools/paastaapi/configuration.py +455 -0
  210. paasta_tools/paastaapi/exceptions.py +137 -0
  211. paasta_tools/paastaapi/model/__init__.py +5 -0
  212. paasta_tools/paastaapi/model/adhoc_launch_history.py +176 -0
  213. paasta_tools/paastaapi/model/autoscaler_count_msg.py +176 -0
  214. paasta_tools/paastaapi/model/deploy_queue.py +178 -0
  215. paasta_tools/paastaapi/model/deploy_queue_service_instance.py +194 -0
  216. paasta_tools/paastaapi/model/envoy_backend.py +185 -0
  217. paasta_tools/paastaapi/model/envoy_location.py +184 -0
  218. paasta_tools/paastaapi/model/envoy_status.py +181 -0
  219. paasta_tools/paastaapi/model/flink_cluster_overview.py +188 -0
  220. paasta_tools/paastaapi/model/flink_config.py +173 -0
  221. paasta_tools/paastaapi/model/flink_job.py +186 -0
  222. paasta_tools/paastaapi/model/flink_job_details.py +192 -0
  223. paasta_tools/paastaapi/model/flink_jobs.py +175 -0
  224. paasta_tools/paastaapi/model/float_and_error.py +173 -0
  225. paasta_tools/paastaapi/model/hpa_metric.py +176 -0
  226. paasta_tools/paastaapi/model/inline_object.py +170 -0
  227. paasta_tools/paastaapi/model/inline_response200.py +170 -0
  228. paasta_tools/paastaapi/model/inline_response2001.py +170 -0
  229. paasta_tools/paastaapi/model/instance_bounce_status.py +200 -0
  230. paasta_tools/paastaapi/model/instance_mesh_status.py +186 -0
  231. paasta_tools/paastaapi/model/instance_status.py +220 -0
  232. paasta_tools/paastaapi/model/instance_status_adhoc.py +187 -0
  233. paasta_tools/paastaapi/model/instance_status_cassandracluster.py +173 -0
  234. paasta_tools/paastaapi/model/instance_status_flink.py +173 -0
  235. paasta_tools/paastaapi/model/instance_status_kafkacluster.py +173 -0
  236. paasta_tools/paastaapi/model/instance_status_kubernetes.py +263 -0
  237. paasta_tools/paastaapi/model/instance_status_kubernetes_autoscaling_status.py +187 -0
  238. paasta_tools/paastaapi/model/instance_status_kubernetes_v2.py +197 -0
  239. paasta_tools/paastaapi/model/instance_status_tron.py +204 -0
  240. paasta_tools/paastaapi/model/instance_tasks.py +182 -0
  241. paasta_tools/paastaapi/model/integer_and_error.py +173 -0
  242. paasta_tools/paastaapi/model/kubernetes_container.py +178 -0
  243. paasta_tools/paastaapi/model/kubernetes_container_v2.py +219 -0
  244. paasta_tools/paastaapi/model/kubernetes_healthcheck.py +176 -0
  245. paasta_tools/paastaapi/model/kubernetes_pod.py +201 -0
  246. paasta_tools/paastaapi/model/kubernetes_pod_event.py +176 -0
  247. paasta_tools/paastaapi/model/kubernetes_pod_v2.py +213 -0
  248. paasta_tools/paastaapi/model/kubernetes_replica_set.py +185 -0
  249. paasta_tools/paastaapi/model/kubernetes_version.py +202 -0
  250. paasta_tools/paastaapi/model/remote_run_outcome.py +189 -0
  251. paasta_tools/paastaapi/model/remote_run_start.py +185 -0
  252. paasta_tools/paastaapi/model/remote_run_stop.py +176 -0
  253. paasta_tools/paastaapi/model/remote_run_token.py +173 -0
  254. paasta_tools/paastaapi/model/resource.py +187 -0
  255. paasta_tools/paastaapi/model/resource_item.py +187 -0
  256. paasta_tools/paastaapi/model/resource_value.py +176 -0
  257. paasta_tools/paastaapi/model/smartstack_backend.py +191 -0
  258. paasta_tools/paastaapi/model/smartstack_location.py +181 -0
  259. paasta_tools/paastaapi/model/smartstack_status.py +181 -0
  260. paasta_tools/paastaapi/model/task_tail_lines.py +176 -0
  261. paasta_tools/paastaapi/model_utils.py +1879 -0
  262. paasta_tools/paastaapi/models/__init__.py +62 -0
  263. paasta_tools/paastaapi/rest.py +287 -0
  264. paasta_tools/prune_completed_pods.py +220 -0
  265. paasta_tools/puppet_service_tools.py +59 -0
  266. paasta_tools/py.typed +1 -0
  267. paasta_tools/remote_git.py +127 -0
  268. paasta_tools/run-paasta-api-in-dev-mode.py +57 -0
  269. paasta_tools/run-paasta-api-playground.py +51 -0
  270. paasta_tools/secret_providers/__init__.py +66 -0
  271. paasta_tools/secret_providers/vault.py +214 -0
  272. paasta_tools/secret_tools.py +277 -0
  273. paasta_tools/setup_istio_mesh.py +353 -0
  274. paasta_tools/setup_kubernetes_cr.py +412 -0
  275. paasta_tools/setup_kubernetes_crd.py +138 -0
  276. paasta_tools/setup_kubernetes_internal_crd.py +154 -0
  277. paasta_tools/setup_kubernetes_job.py +353 -0
  278. paasta_tools/setup_prometheus_adapter_config.py +1028 -0
  279. paasta_tools/setup_tron_namespace.py +248 -0
  280. paasta_tools/slack.py +75 -0
  281. paasta_tools/smartstack_tools.py +676 -0
  282. paasta_tools/spark_tools.py +283 -0
  283. paasta_tools/synapse_srv_namespaces_fact.py +42 -0
  284. paasta_tools/tron/__init__.py +0 -0
  285. paasta_tools/tron/client.py +158 -0
  286. paasta_tools/tron/tron_command_context.py +194 -0
  287. paasta_tools/tron/tron_timeutils.py +101 -0
  288. paasta_tools/tron_tools.py +1448 -0
  289. paasta_tools/utils.py +4307 -0
  290. paasta_tools/yaml_tools.py +44 -0
  291. paasta_tools-1.21.3.data/scripts/apply_external_resources.py +79 -0
  292. paasta_tools-1.21.3.data/scripts/bounce_log_latency_parser.py +68 -0
  293. paasta_tools-1.21.3.data/scripts/check_autoscaler_max_instances.py +212 -0
  294. paasta_tools-1.21.3.data/scripts/check_cassandracluster_services_replication.py +35 -0
  295. paasta_tools-1.21.3.data/scripts/check_flink_services_health.py +203 -0
  296. paasta_tools-1.21.3.data/scripts/check_kubernetes_api.py +57 -0
  297. paasta_tools-1.21.3.data/scripts/check_kubernetes_services_replication.py +141 -0
  298. paasta_tools-1.21.3.data/scripts/check_manual_oapi_changes.sh +24 -0
  299. paasta_tools-1.21.3.data/scripts/check_oom_events.py +244 -0
  300. paasta_tools-1.21.3.data/scripts/check_orphans.py +306 -0
  301. paasta_tools-1.21.3.data/scripts/check_spark_jobs.py +234 -0
  302. paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_cr.py +138 -0
  303. paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_crd.py +145 -0
  304. paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_jobs.py +344 -0
  305. paasta_tools-1.21.3.data/scripts/create_dynamodb_table.py +35 -0
  306. paasta_tools-1.21.3.data/scripts/create_paasta_playground.py +105 -0
  307. paasta_tools-1.21.3.data/scripts/delete_kubernetes_deployments.py +89 -0
  308. paasta_tools-1.21.3.data/scripts/emit_allocated_cpu_metrics.py +50 -0
  309. paasta_tools-1.21.3.data/scripts/generate_all_deployments +9 -0
  310. paasta_tools-1.21.3.data/scripts/generate_authenticating_services.py +94 -0
  311. paasta_tools-1.21.3.data/scripts/generate_deployments_for_service.py +255 -0
  312. paasta_tools-1.21.3.data/scripts/generate_services_file.py +114 -0
  313. paasta_tools-1.21.3.data/scripts/generate_services_yaml.py +30 -0
  314. paasta_tools-1.21.3.data/scripts/get_running_task_allocation.py +346 -0
  315. paasta_tools-1.21.3.data/scripts/habitat_fixer.py +86 -0
  316. paasta_tools-1.21.3.data/scripts/ide_helper.py +316 -0
  317. paasta_tools-1.21.3.data/scripts/is_pod_healthy_in_proxy.py +139 -0
  318. paasta_tools-1.21.3.data/scripts/is_pod_healthy_in_smartstack.py +50 -0
  319. paasta_tools-1.21.3.data/scripts/kill_bad_containers.py +109 -0
  320. paasta_tools-1.21.3.data/scripts/kubernetes_remove_evicted_pods.py +164 -0
  321. paasta_tools-1.21.3.data/scripts/mass-deploy-tag.sh +44 -0
  322. paasta_tools-1.21.3.data/scripts/mock_patch_checker.py +86 -0
  323. paasta_tools-1.21.3.data/scripts/paasta_cleanup_remote_run_resources.py +135 -0
  324. paasta_tools-1.21.3.data/scripts/paasta_cleanup_stale_nodes.py +181 -0
  325. paasta_tools-1.21.3.data/scripts/paasta_deploy_tron_jobs +3 -0
  326. paasta_tools-1.21.3.data/scripts/paasta_execute_docker_command.py +123 -0
  327. paasta_tools-1.21.3.data/scripts/paasta_secrets_sync.py +758 -0
  328. paasta_tools-1.21.3.data/scripts/paasta_tabcomplete.sh +23 -0
  329. paasta_tools-1.21.3.data/scripts/paasta_update_soa_memcpu.py +520 -0
  330. paasta_tools-1.21.3.data/scripts/render_template.py +129 -0
  331. paasta_tools-1.21.3.data/scripts/rightsizer_soaconfigs_update.py +348 -0
  332. paasta_tools-1.21.3.data/scripts/service_shard_remove.py +157 -0
  333. paasta_tools-1.21.3.data/scripts/service_shard_update.py +373 -0
  334. paasta_tools-1.21.3.data/scripts/setup_istio_mesh.py +353 -0
  335. paasta_tools-1.21.3.data/scripts/setup_kubernetes_cr.py +412 -0
  336. paasta_tools-1.21.3.data/scripts/setup_kubernetes_crd.py +138 -0
  337. paasta_tools-1.21.3.data/scripts/setup_kubernetes_internal_crd.py +154 -0
  338. paasta_tools-1.21.3.data/scripts/setup_kubernetes_job.py +353 -0
  339. paasta_tools-1.21.3.data/scripts/setup_prometheus_adapter_config.py +1028 -0
  340. paasta_tools-1.21.3.data/scripts/shared_ip_check.py +77 -0
  341. paasta_tools-1.21.3.data/scripts/synapse_srv_namespaces_fact.py +42 -0
  342. paasta_tools-1.21.3.data/scripts/timeouts_metrics_prom.py +64 -0
  343. paasta_tools-1.21.3.dist-info/LICENSE +201 -0
  344. paasta_tools-1.21.3.dist-info/METADATA +74 -0
  345. paasta_tools-1.21.3.dist-info/RECORD +348 -0
  346. paasta_tools-1.21.3.dist-info/WHEEL +5 -0
  347. paasta_tools-1.21.3.dist-info/entry_points.txt +20 -0
  348. paasta_tools-1.21.3.dist-info/top_level.txt +2 -0
@@ -0,0 +1,2302 @@
1
+ #!/usr/bin/env python
2
+ # Copyright 2015-2016 Yelp Inc.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ import asyncio
16
+ import concurrent.futures
17
+ import difflib
18
+ import shutil
19
+ import sys
20
+ from collections import Counter
21
+ from collections import defaultdict
22
+ from datetime import datetime
23
+ from datetime import timedelta
24
+ from datetime import timezone
25
+ from enum import Enum
26
+ from itertools import groupby
27
+ from threading import Lock
28
+ from typing import Any
29
+ from typing import Callable
30
+ from typing import Collection
31
+ from typing import DefaultDict
32
+ from typing import Dict
33
+ from typing import Iterable
34
+ from typing import List
35
+ from typing import Mapping
36
+ from typing import Optional
37
+ from typing import Sequence
38
+ from typing import Tuple
39
+ from typing import Type
40
+ from typing import Union
41
+
42
+ import a_sync
43
+ import humanize
44
+ from mypy_extensions import Arg
45
+ from service_configuration_lib import read_deploy
46
+
47
+ from paasta_tools import flink_tools
48
+ from paasta_tools import kubernetes_tools
49
+ from paasta_tools.adhoc_tools import AdhocJobConfig
50
+ from paasta_tools.api.client import get_paasta_oapi_client
51
+ from paasta_tools.api.client import PaastaOApiClient
52
+ from paasta_tools.cassandracluster_tools import CassandraClusterDeploymentConfig
53
+ from paasta_tools.cli.utils import figure_out_service_name
54
+ from paasta_tools.cli.utils import get_instance_configs_for_service
55
+ from paasta_tools.cli.utils import get_paasta_oapi_api_clustername
56
+ from paasta_tools.cli.utils import lazy_choices_completer
57
+ from paasta_tools.cli.utils import list_deploy_groups
58
+ from paasta_tools.cli.utils import NoSuchService
59
+ from paasta_tools.cli.utils import validate_service_name
60
+ from paasta_tools.cli.utils import verify_instances
61
+ from paasta_tools.eks_tools import EksDeploymentConfig
62
+ from paasta_tools.flink_tools import FlinkDeploymentConfig
63
+ from paasta_tools.flink_tools import get_flink_config_from_paasta_api_client
64
+ from paasta_tools.flink_tools import get_flink_jobs_from_paasta_api_client
65
+ from paasta_tools.flink_tools import get_flink_overview_from_paasta_api_client
66
+ from paasta_tools.flinkeks_tools import FlinkEksDeploymentConfig
67
+ from paasta_tools.kafkacluster_tools import KafkaClusterDeploymentConfig
68
+ from paasta_tools.kubernetes_tools import format_pod_event_messages
69
+ from paasta_tools.kubernetes_tools import format_tail_lines_for_kubernetes_pod
70
+ from paasta_tools.kubernetes_tools import KubernetesDeploymentConfig
71
+ from paasta_tools.kubernetes_tools import KubernetesDeployStatus
72
+ from paasta_tools.kubernetes_tools import paasta_prefixed
73
+ from paasta_tools.monitoring_tools import get_team
74
+ from paasta_tools.monitoring_tools import list_teams
75
+ from paasta_tools.paasta_service_config_loader import PaastaServiceConfigLoader
76
+ from paasta_tools.paastaapi.model.flink_job_details import FlinkJobDetails
77
+ from paasta_tools.paastaapi.model.flink_jobs import FlinkJobs
78
+ from paasta_tools.paastaapi.models import InstanceStatusKubernetesV2
79
+ from paasta_tools.paastaapi.models import KubernetesContainerV2
80
+ from paasta_tools.paastaapi.models import KubernetesPodV2
81
+ from paasta_tools.paastaapi.models import KubernetesVersion
82
+ from paasta_tools.tron_tools import TronActionConfig
83
+ from paasta_tools.utils import compose_job_id
84
+ from paasta_tools.utils import DEFAULT_SOA_DIR
85
+ from paasta_tools.utils import DeploymentVersion
86
+ from paasta_tools.utils import format_table
87
+ from paasta_tools.utils import get_deployment_version_from_dockerurl
88
+ from paasta_tools.utils import get_soa_cluster_deploy_files
89
+ from paasta_tools.utils import InstanceConfig
90
+ from paasta_tools.utils import is_under_replicated
91
+ from paasta_tools.utils import list_clusters
92
+ from paasta_tools.utils import list_services
93
+ from paasta_tools.utils import load_system_paasta_config
94
+ from paasta_tools.utils import PaastaColors
95
+ from paasta_tools.utils import remove_ansi_escape_sequences
96
+ from paasta_tools.utils import SystemPaastaConfig
97
+
98
+ FLINK_STATUS_MAX_THREAD_POOL_WORKERS = 50
99
+ ALLOWED_INSTANCE_CONFIG: Sequence[Type[InstanceConfig]] = [
100
+ FlinkDeploymentConfig,
101
+ FlinkEksDeploymentConfig,
102
+ CassandraClusterDeploymentConfig,
103
+ KafkaClusterDeploymentConfig,
104
+ KubernetesDeploymentConfig,
105
+ EksDeploymentConfig,
106
+ AdhocJobConfig,
107
+ TronActionConfig,
108
+ ]
109
+
110
+ # Tron instances are not included in deployments, so skip these InstanceConfigs
111
+ DEPLOYMENT_INSTANCE_CONFIG: Sequence[Type[InstanceConfig]] = [
112
+ FlinkDeploymentConfig,
113
+ FlinkEksDeploymentConfig,
114
+ CassandraClusterDeploymentConfig,
115
+ KafkaClusterDeploymentConfig,
116
+ KubernetesDeploymentConfig,
117
+ EksDeploymentConfig,
118
+ AdhocJobConfig,
119
+ ]
120
+
121
+ InstanceStatusWriter = Callable[
122
+ [
123
+ Arg(str, "cluster"),
124
+ Arg(str, "service"),
125
+ Arg(str, "instance"),
126
+ Arg(List[str], "output"),
127
+ Arg(Any),
128
+ Arg(int, "verbose"),
129
+ ],
130
+ int,
131
+ ]
132
+
133
+ EKS_DEPLOYMENT_CONFIGS = [
134
+ EksDeploymentConfig,
135
+ FlinkEksDeploymentConfig,
136
+ ]
137
+ FLINK_DEPLOYMENT_CONFIGS = [FlinkDeploymentConfig, FlinkEksDeploymentConfig]
138
+
139
+
140
+ def add_subparser(
141
+ subparsers,
142
+ ) -> None:
143
+ status_parser = subparsers.add_parser(
144
+ "status",
145
+ help="Display the status of a PaaSTA service.",
146
+ description=(
147
+ "'paasta status' queries the PaaSTA API in order to report "
148
+ "on the overall health of a service."
149
+ ),
150
+ )
151
+ status_parser.add_argument(
152
+ "-v",
153
+ "--verbose",
154
+ action="count",
155
+ dest="verbose",
156
+ default=0,
157
+ help="Print out more output regarding the state of the service. "
158
+ "A second -v will also print the stdout/stderr tail.",
159
+ )
160
+ status_parser.add_argument(
161
+ "-d",
162
+ "--soa-dir",
163
+ dest="soa_dir",
164
+ metavar="SOA_DIR",
165
+ default=DEFAULT_SOA_DIR,
166
+ help="define a different soa config directory",
167
+ )
168
+ status_parser.add_argument(
169
+ "-A",
170
+ "--all-namespaces",
171
+ dest="all_namespaces",
172
+ action="store_true",
173
+ default=False,
174
+ help="Search all PaaSTA-managed namespaces for possible running versions (Will search only your currently-configured namespace by default). Useful if you are moving your instance(s) to a new namespace",
175
+ )
176
+
177
+ version = status_parser.add_mutually_exclusive_group()
178
+
179
+ version.add_argument(
180
+ "--new",
181
+ dest="new",
182
+ action="store_true",
183
+ default=False,
184
+ help="Use experimental new version of paasta status for services",
185
+ )
186
+ version.add_argument(
187
+ "--old",
188
+ dest="old",
189
+ default=False,
190
+ action="store_true",
191
+ help="Use the old version of paasta status for services",
192
+ )
193
+
194
+ add_instance_filter_arguments(status_parser)
195
+ status_parser.set_defaults(command=paasta_status)
196
+
197
+
198
+ def add_instance_filter_arguments(status_parser, verb: str = "inspect") -> None:
199
+ status_parser.add_argument(
200
+ "-s", "--service", help=f"The name of the service you wish to {verb}"
201
+ ).completer = lazy_choices_completer(list_services)
202
+ status_parser.add_argument(
203
+ "-c",
204
+ "--clusters",
205
+ help=f"A comma-separated list of clusters to {verb}. By default, will {verb} all clusters.\n"
206
+ f"For example: --clusters pnw-prod,nova-prod",
207
+ ).completer = lazy_choices_completer(list_clusters)
208
+ status_parser.add_argument(
209
+ "-i",
210
+ "--instances",
211
+ help=f"A comma-separated list of instances to {verb}. By default, will {verb} all instances.\n"
212
+ f"For example: --instances canary,main",
213
+ ) # No completer because we need to know service first and we can't until some other stuff has happened
214
+ status_parser.add_argument(
215
+ "-l",
216
+ "--deploy-group",
217
+ help=(
218
+ f"Name of the deploy group which you want to {verb}. "
219
+ f"If specified together with --instances and/or --clusters, will {verb} common instances only."
220
+ ),
221
+ ).completer = lazy_choices_completer(list_deploy_groups)
222
+ status_parser.add_argument(
223
+ "-o",
224
+ "--owner",
225
+ help=f"Only {verb} instances with this owner specified in soa-configs.",
226
+ ).completer = lazy_choices_completer(list_teams)
227
+ status_parser.add_argument(
228
+ "-r", "--registration", help=f"Only {verb} instances with this registration."
229
+ )
230
+ status_parser.add_argument(
231
+ "service_instance",
232
+ nargs="?",
233
+ help=f'A shorthand notation to {verb} instances. For example: "paasta status example_happyhour.canary,main"',
234
+ )
235
+
236
+
237
+ def missing_deployments_message(
238
+ service: str,
239
+ ) -> str:
240
+ message = (
241
+ f"{service} has no deployments in deployments.json yet.\n " "Has Jenkins run?"
242
+ )
243
+ return message
244
+
245
+
246
+ def get_deploy_info(
247
+ deploy_file_path: str,
248
+ ) -> Mapping:
249
+ deploy_info = read_deploy(deploy_file_path)
250
+ if not deploy_info:
251
+ print("Error encountered with %s" % deploy_file_path)
252
+
253
+ exit(1)
254
+ return deploy_info
255
+
256
+
257
+ def get_planned_deployments(service: str, soa_dir: str) -> Iterable[str]:
258
+ for cluster, cluster_deploy_file in get_soa_cluster_deploy_files(
259
+ service=service, soa_dir=soa_dir
260
+ ):
261
+ for instance in get_deploy_info(cluster_deploy_file):
262
+ yield f"{cluster}.{instance}"
263
+
264
+
265
+ def get_actual_deployments(
266
+ service: str, soa_dir: str
267
+ ) -> Mapping[str, DeploymentVersion]:
268
+ """Given a service, return a dict of instances->DeploymentVersions"""
269
+ config_loader = PaastaServiceConfigLoader(service=service, soa_dir=soa_dir)
270
+ clusters = list_clusters(service=service, soa_dir=soa_dir)
271
+ actual_deployments = {}
272
+ for cluster in clusters:
273
+ for instance_type in DEPLOYMENT_INSTANCE_CONFIG:
274
+ for instance_config in config_loader.instance_configs(
275
+ cluster=cluster, instance_type_class=instance_type
276
+ ):
277
+ namespace = f"{cluster}.{instance_config.instance}"
278
+ actual_deployments[namespace] = get_deployment_version_from_dockerurl(
279
+ instance_config.get_docker_image()
280
+ )
281
+ if not actual_deployments:
282
+ print(
283
+ f"Warning: it looks like {service} has not been deployed anywhere yet!",
284
+ file=sys.stderr,
285
+ )
286
+ return actual_deployments
287
+
288
+
289
+ def paasta_status_on_api_endpoint(
290
+ cluster: str,
291
+ service: str,
292
+ instance: str,
293
+ system_paasta_config: SystemPaastaConfig,
294
+ lock: Lock,
295
+ verbose: int,
296
+ new: bool = False,
297
+ is_eks: bool = False,
298
+ all_namespaces: bool = False,
299
+ ) -> int:
300
+ output = [
301
+ "",
302
+ f"\n{service}.{PaastaColors.cyan(instance)} in {cluster}{' (EKS)' if is_eks else ''}",
303
+ ]
304
+ client = get_paasta_oapi_client(
305
+ cluster=get_paasta_oapi_api_clustername(cluster=cluster, is_eks=is_eks),
306
+ system_paasta_config=system_paasta_config,
307
+ )
308
+ if not client:
309
+ print("Cannot get a paasta-api client")
310
+ exit(1)
311
+ try:
312
+ status = client.service.status_instance(
313
+ service=service,
314
+ instance=instance,
315
+ verbose=verbose,
316
+ new=new,
317
+ all_namespaces=all_namespaces,
318
+ )
319
+ except client.api_error as exc:
320
+ output.append(PaastaColors.red(exc.reason))
321
+ return exc.status
322
+ except (client.connection_error, client.timeout_error) as exc:
323
+ output.append(
324
+ PaastaColors.red(f"Could not connect to API: {exc.__class__.__name__}")
325
+ )
326
+ return 1
327
+ except Exception as e:
328
+ output.append(PaastaColors.red(f"Exception when talking to the API:"))
329
+ output.append(str(e))
330
+ return 1
331
+
332
+ if status.version and status.version != "":
333
+ output.append(f" Version: {status.version} (desired)")
334
+ # TODO: Remove this when all clusters are returning status.version
335
+ elif status.git_sha != "":
336
+ output.append(f" Git sha: {status.git_sha} (desired)")
337
+ instance_types = find_instance_types(status)
338
+ if not instance_types:
339
+ output.append(
340
+ PaastaColors.red(
341
+ f"{instance} is not currently supported by `paasta status` - "
342
+ f"unable to find status metadata in API response."
343
+ )
344
+ )
345
+ return 0
346
+
347
+ ret_code = 0
348
+ for instance_type in instance_types:
349
+ # check the actual status value and call the corresponding status writer
350
+ service_status_value = getattr(status, instance_type)
351
+ writer_callable = INSTANCE_TYPE_WRITERS.get(instance_type)
352
+ ret = writer_callable(
353
+ cluster, service, instance, output, service_status_value, verbose
354
+ )
355
+ if ret != 0:
356
+ output.append(
357
+ f"Status writer failed for {instance_type} with return value {ret}"
358
+ )
359
+ ret_code = ret
360
+
361
+ with lock:
362
+ print("\n".join(output), flush=True)
363
+
364
+ return ret_code
365
+
366
+
367
+ def find_instance_types(status: Any) -> List[str]:
368
+ """
369
+ find_instance_types finds the instance types from the status api response.
370
+ It iterates over all instance type registered in `INSTANCE_TYPE_WRITERS`.
371
+
372
+ :param status: paasta api status object
373
+ :return: the list of matching instance types
374
+ """
375
+ types: List[str] = []
376
+ for instance_type in INSTANCE_TYPE_WRITERS.keys():
377
+ if status.get(instance_type) is not None:
378
+ types.append(instance_type)
379
+ return types
380
+
381
+
382
+ def print_adhoc_status(
383
+ cluster: str,
384
+ service: str,
385
+ instance: str,
386
+ output: List[str],
387
+ status,
388
+ verbose: int = 0,
389
+ ) -> int:
390
+ output.append(f" Job: {instance}")
391
+ for run in status.value:
392
+ output.append(
393
+ "Launch time: %s, run id: %s, framework id: %s"
394
+ % (run["launch_time"], run["run_id"], run["framework_id"])
395
+ )
396
+ if status.value:
397
+ output.append(
398
+ (
399
+ " Use `paasta remote-run stop -s {} -c {} -i {} [-R <run id> "
400
+ " | -F <framework id>]` to stop."
401
+ ).format(service, cluster, instance)
402
+ )
403
+ else:
404
+ output.append(" Nothing found.")
405
+
406
+ return 0
407
+
408
+
409
+ def create_autoscaling_info_table(autoscaling_info):
410
+ output = ["Autoscaling Info:"]
411
+
412
+ if autoscaling_info.current_utilization is not None:
413
+ current_utilization = "{:.1f}%".format(
414
+ autoscaling_info.current_utilization * 100
415
+ )
416
+ else:
417
+ current_utilization = "Exception"
418
+
419
+ target_instances = autoscaling_info.target_instances
420
+ if target_instances is None:
421
+ target_instances = "Exception"
422
+
423
+ headers = [
424
+ "Current instances",
425
+ "Max instances",
426
+ "Min instances",
427
+ "Current utilization",
428
+ "Target instances",
429
+ ]
430
+ row = [
431
+ autoscaling_info.current_instances,
432
+ autoscaling_info.max_instances,
433
+ autoscaling_info.min_instances,
434
+ current_utilization,
435
+ target_instances,
436
+ ]
437
+ row = [str(e) for e in row]
438
+ table = [f" {line}" for line in format_table([headers, row])]
439
+ output.extend(table)
440
+ return output
441
+
442
+
443
+ def format_kubernetes_pod_table(pods, verbose: int):
444
+ rows: List[Union[tuple, str]] = [
445
+ ("Pod ID", "Host deployed to", "Deployed at what localtime", "Health")
446
+ ]
447
+ for pod in pods:
448
+ local_deployed_datetime = datetime.fromtimestamp(pod.deployed_timestamp)
449
+ hostname = f"{pod.host}" if pod.host is not None else PaastaColors.grey("N/A")
450
+ phase = pod.phase
451
+ reason = pod.reason
452
+ if phase is None or phase == "Pending":
453
+ health_check_status = PaastaColors.grey("N/A")
454
+ elif phase == "Running":
455
+ health_check_status = PaastaColors.green("Healthy")
456
+ if not pod.ready:
457
+ health_check_status = PaastaColors.red("Unhealthy")
458
+ elif phase == "Failed" and reason == "Evicted":
459
+ health_check_status = PaastaColors.red("Evicted")
460
+ else:
461
+ health_check_status = PaastaColors.red("Unhealthy")
462
+ rows.append(
463
+ (
464
+ pod.name,
465
+ hostname,
466
+ "{} ({})".format(
467
+ local_deployed_datetime.strftime("%Y-%m-%dT%H:%M"),
468
+ humanize.naturaltime(local_deployed_datetime),
469
+ ),
470
+ health_check_status,
471
+ )
472
+ )
473
+ if pod.events and verbose > 1:
474
+ rows.extend(format_pod_event_messages(pod.events, pod.name))
475
+ if pod.message is not None:
476
+ rows.append(PaastaColors.grey(f" {pod.message}"))
477
+ if len(pod.containers) > 0:
478
+ rows.extend(format_tail_lines_for_kubernetes_pod(pod.containers, pod.name))
479
+
480
+ return format_table(rows)
481
+
482
+
483
+ def format_kubernetes_replicaset_table(replicasets):
484
+ rows = [
485
+ (
486
+ "ReplicaSet Name",
487
+ "Ready / Desired",
488
+ "Created at what localtime",
489
+ "Service git SHA",
490
+ "Config hash",
491
+ )
492
+ ]
493
+ for replicaset in replicasets:
494
+ local_created_datetime = datetime.fromtimestamp(replicaset.create_timestamp)
495
+
496
+ replica_status = f"{replicaset.ready_replicas}/{replicaset.replicas}"
497
+ if replicaset.ready_replicas >= replicaset.replicas:
498
+ replica_status = PaastaColors.green(replica_status)
499
+ else:
500
+ replica_status = PaastaColors.red(replica_status)
501
+
502
+ rows.append(
503
+ (
504
+ replicaset.name,
505
+ replica_status,
506
+ "{} ({})".format(
507
+ local_created_datetime.strftime("%Y-%m-%dT%H:%M"),
508
+ humanize.naturaltime(local_created_datetime),
509
+ ),
510
+ replicaset.git_sha if replicaset.git_sha else "Unknown",
511
+ replicaset.config_sha if replicaset.config_sha else "Unknown",
512
+ )
513
+ )
514
+
515
+ return format_table(rows)
516
+
517
+
518
+ def get_smartstack_status_human(
519
+ registration: str,
520
+ expected_backends_per_location: int,
521
+ locations: Collection[Any],
522
+ ) -> List[str]:
523
+ if len(locations) == 0:
524
+ return [f"Smartstack: ERROR - {registration} is NOT in smartstack at all!"]
525
+
526
+ output = ["Smartstack:"]
527
+ output.append(f" Haproxy Service Name: {registration}")
528
+ output.append(f" Backends:")
529
+ for location in locations:
530
+ backend_status = haproxy_backend_report(
531
+ expected_backends_per_location, location.running_backends_count
532
+ )
533
+ output.append(f" {location.name} - {backend_status}")
534
+
535
+ if location.backends:
536
+ backends_table = build_smartstack_backends_table(location.backends)
537
+ output.extend([f" {line}" for line in backends_table])
538
+
539
+ return output
540
+
541
+
542
+ def build_smartstack_backends_table(backends: Iterable[Any]) -> List[str]:
543
+ rows: List[Tuple[str, ...]] = [("Name", "LastCheck", "LastChange", "Status")]
544
+ for backend in backends:
545
+ if backend.status == "UP":
546
+ status = PaastaColors.default(backend.status)
547
+ elif backend.status == "DOWN":
548
+ status = PaastaColors.red(backend.status)
549
+ elif backend.status == "MAINT":
550
+ status = PaastaColors.grey(backend.status)
551
+ else:
552
+ status = PaastaColors.yellow(backend.status)
553
+
554
+ if backend.check_duration is None:
555
+ check_duration = ""
556
+ else:
557
+ check_duration = str(backend.check_duration)
558
+
559
+ row: Tuple[str, ...] = (
560
+ f"{backend.hostname}:{backend.port}",
561
+ f"{backend.check_status}/{backend.check_code} in {check_duration}ms",
562
+ humanize.naturaltime(timedelta(seconds=backend.last_change)),
563
+ status,
564
+ )
565
+
566
+ if not backend.has_associated_task:
567
+ row = tuple(
568
+ PaastaColors.grey(remove_ansi_escape_sequences(col)) for col in row
569
+ )
570
+
571
+ rows.append(row)
572
+
573
+ return format_table(rows)
574
+
575
+
576
+ def get_envoy_status_human(
577
+ registration: str,
578
+ expected_backends_per_location: int,
579
+ locations: Collection[Any],
580
+ ) -> List[str]:
581
+ if len(locations) == 0:
582
+ return [f"Envoy: ERROR - {registration} is NOT in Envoy at all!"]
583
+
584
+ output = ["Envoy:"]
585
+ output.append(f" Service Name: {registration}")
586
+ output.append(f" Backends:")
587
+ for location in locations:
588
+ backend_status = envoy_backend_report(
589
+ expected_backends_per_location, location.running_backends_count
590
+ )
591
+ output.append(f" {location.name} - {backend_status}")
592
+
593
+ if location.backends:
594
+ color = (
595
+ PaastaColors.green
596
+ if location.is_proxied_through_casper
597
+ else PaastaColors.grey
598
+ )
599
+ is_proxied_through_casper_output = color(
600
+ f"{location.is_proxied_through_casper}"
601
+ )
602
+ output.append(
603
+ f" Proxied through Casper: {is_proxied_through_casper_output}"
604
+ )
605
+
606
+ backends_table = build_envoy_backends_table(location.backends)
607
+ output.extend([f" {line}" for line in backends_table])
608
+
609
+ return output
610
+
611
+
612
+ def build_envoy_backends_table(backends: Iterable[Any]) -> List[str]:
613
+ rows: List[Tuple[str, ...]] = [("Hostname:Port", "Weight", "Status")]
614
+ for backend in backends:
615
+ if backend.eds_health_status == "HEALTHY":
616
+ status = PaastaColors.default(backend.eds_health_status)
617
+ elif backend.eds_health_status == "UNHEALTHY":
618
+ status = PaastaColors.red(backend.eds_health_status)
619
+ else:
620
+ status = PaastaColors.yellow(backend.eds_health_status)
621
+
622
+ row: Tuple[str, ...] = (
623
+ f"{backend.hostname}:{backend.port_value}",
624
+ f"{backend.weight}",
625
+ status,
626
+ )
627
+
628
+ if not backend.has_associated_task:
629
+ row = tuple(
630
+ PaastaColors.grey(remove_ansi_escape_sequences(col)) for col in row
631
+ )
632
+
633
+ rows.append(row)
634
+
635
+ return format_table(rows)
636
+
637
+
638
+ def kubernetes_app_deploy_status_human(status, message, backoff_seconds=None):
639
+ status_string = kubernetes_tools.KubernetesDeployStatus.tostring(status)
640
+
641
+ if status in {
642
+ kubernetes_tools.KubernetesDeployStatus.Waiting,
643
+ kubernetes_tools.KubernetesDeployStatus.Stopped,
644
+ }:
645
+ deploy_status = PaastaColors.red(status_string)
646
+ elif status == kubernetes_tools.KubernetesDeployStatus.Deploying:
647
+ deploy_status = PaastaColors.yellow(status_string)
648
+ elif status == kubernetes_tools.KubernetesDeployStatus.Running:
649
+ deploy_status = PaastaColors.bold(status_string)
650
+ else:
651
+ deploy_status = status_string
652
+
653
+ if message:
654
+ deploy_status += f" ({message})"
655
+ return deploy_status
656
+
657
+
658
+ def status_kubernetes_job_human(
659
+ service: str,
660
+ instance: str,
661
+ deploy_status: str,
662
+ desired_app_id: str,
663
+ app_count: int,
664
+ running_instances: int,
665
+ normal_instance_count: int,
666
+ evicted_count: int,
667
+ ) -> str:
668
+ name = PaastaColors.cyan(compose_job_id(service, instance))
669
+
670
+ if app_count >= 0:
671
+ if running_instances >= normal_instance_count:
672
+ status = PaastaColors.green("Healthy")
673
+ instance_count = PaastaColors.green(
674
+ "(%d/%d)" % (running_instances, normal_instance_count)
675
+ )
676
+ elif running_instances == 0:
677
+ status = PaastaColors.yellow("Critical")
678
+ instance_count = PaastaColors.red(
679
+ "(%d/%d)" % (running_instances, normal_instance_count)
680
+ )
681
+ else:
682
+ status = PaastaColors.yellow("Warning")
683
+ instance_count = PaastaColors.yellow(
684
+ "(%d/%d)" % (running_instances, normal_instance_count)
685
+ )
686
+
687
+ evicted = (
688
+ PaastaColors.red(str(evicted_count))
689
+ if evicted_count > 0
690
+ else PaastaColors.green(str(evicted_count))
691
+ )
692
+ return (
693
+ "Kubernetes: {} - up with {} instances ({} evicted). Status: {}".format(
694
+ status, instance_count, evicted, deploy_status
695
+ )
696
+ )
697
+ else:
698
+ status = PaastaColors.yellow("Warning")
699
+ return "Kubernetes: {} - {} (app {}) is not configured in Kubernetes yet (waiting for bounce)".format(
700
+ status, name, desired_app_id
701
+ )
702
+
703
+
704
+ def get_flink_job_name(flink_job: FlinkJobDetails) -> str:
705
+ return flink_job["name"].split(".", 2)[-1]
706
+
707
+
708
+ def should_job_info_be_shown(cluster_state):
709
+ return (
710
+ cluster_state == "running"
711
+ or cluster_state == "stoppingsupervisor"
712
+ or cluster_state == "cleanupsupervisor"
713
+ )
714
+
715
+
716
+ def get_pod_uptime(pod_deployed_timestamp: str):
717
+ # NOTE: the k8s API returns timestamps in UTC, so we make sure to always work in UTC
718
+ pod_creation_time = datetime.strptime(
719
+ pod_deployed_timestamp, "%Y-%m-%dT%H:%M:%SZ"
720
+ ).replace(tzinfo=timezone.utc)
721
+ pod_uptime = datetime.now(timezone.utc) - pod_creation_time
722
+ pod_uptime_total_seconds = pod_uptime.total_seconds()
723
+ pod_uptime_days = divmod(pod_uptime_total_seconds, 86400)
724
+ pod_uptime_hours = divmod(pod_uptime_days[1], 3600)
725
+ pod_uptime_minutes = divmod(pod_uptime_hours[1], 60)
726
+ pod_uptime_seconds = divmod(pod_uptime_minutes[1], 1)
727
+ return f"{int(pod_uptime_days[0])}d{int(pod_uptime_hours[0])}h{int(pod_uptime_minutes[0])}m{int(pod_uptime_seconds[0])}s"
728
+
729
+
730
+ def append_pod_status(pod_status, output: List[str]):
731
+ output.append(" Pods:")
732
+ rows: List[Union[str, Tuple[str, str, str, str]]] = [
733
+ ("Pod Name", "Host", "Phase", "Uptime")
734
+ ]
735
+ for pod in pod_status:
736
+ color_fn = (
737
+ PaastaColors.green
738
+ if pod["phase"] == "Running" and pod["container_state"] == "Running"
739
+ else PaastaColors.red
740
+ # pods can get stuck in phase: Running and state: CrashLoopBackOff, so check for that
741
+ if pod["phase"] == "Failed"
742
+ or pod["container_state_reason"] == "CrashLoopBackOff"
743
+ else PaastaColors.yellow
744
+ )
745
+
746
+ rows.append(
747
+ (
748
+ pod["name"],
749
+ pod["host"],
750
+ color_fn(pod["phase"]),
751
+ get_pod_uptime(pod["deployed_timestamp"]),
752
+ )
753
+ )
754
+ if "reason" in pod and pod["reason"] != "":
755
+ rows.append(PaastaColors.grey(f" {pod['reason']}: {pod['message']}"))
756
+ if "container_state" in pod and pod["container_state"] != "Running":
757
+ rows.append(
758
+ PaastaColors.grey(
759
+ f" {pod['container_state']}: {pod['container_state_reason']}"
760
+ )
761
+ )
762
+ pods_table = format_table(rows)
763
+ output.extend([f" {line}" for line in pods_table])
764
+
765
+
766
+ def _print_flink_status_from_job_manager(
767
+ service: str,
768
+ instance: str,
769
+ output: List[str],
770
+ flink: Mapping[str, Any],
771
+ client: PaastaOApiClient,
772
+ verbose: int,
773
+ ) -> int:
774
+ status = flink.get("status")
775
+ if status is None:
776
+ output.append(PaastaColors.red(" Flink cluster is not available yet"))
777
+ return 1
778
+
779
+ # Since metadata should be available no matter the state, we show it first. If this errors out
780
+ # then we cannot really do much to recover, because cluster is not in usable state anyway
781
+ metadata = flink.get("metadata")
782
+ labels = metadata.get("labels")
783
+ config_sha = labels.get(paasta_prefixed("config_sha"))
784
+ if config_sha is None:
785
+ raise ValueError(f"expected config sha on Flink, but received {metadata}")
786
+ if config_sha.startswith("config"):
787
+ config_sha = config_sha[6:]
788
+
789
+ output.append(f" Config SHA: {config_sha}")
790
+
791
+ if status["state"] == "running":
792
+ try:
793
+ flink_config = get_flink_config_from_paasta_api_client(
794
+ service=service, instance=instance, client=client
795
+ )
796
+ except Exception as e:
797
+ output.append(PaastaColors.red(f"Exception when talking to the API:"))
798
+ output.append(str(e))
799
+ return 1
800
+
801
+ if verbose:
802
+ output.append(
803
+ f" Flink version: {flink_config.flink_version} {flink_config.flink_revision}"
804
+ )
805
+ else:
806
+ output.append(f" Flink version: {flink_config.flink_version}")
807
+
808
+ # Annotation "flink.yelp.com/dashboard_url" is populated by flink-operator
809
+ dashboard_url = metadata["annotations"].get("flink.yelp.com/dashboard_url")
810
+ output.append(f" URL: {dashboard_url}/")
811
+
812
+ color = PaastaColors.green if status["state"] == "running" else PaastaColors.yellow
813
+ output.append(f" State: {color(status['state'].title())}")
814
+
815
+ pod_running_count = pod_evicted_count = pod_other_count = 0
816
+ # default for evicted in case where pod status is not available
817
+ evicted = f"{pod_evicted_count}"
818
+
819
+ for pod in status["pod_status"]:
820
+ if pod["phase"] == "Running":
821
+ pod_running_count += 1
822
+ elif pod["phase"] == "Failed" and pod["reason"] == "Evicted":
823
+ pod_evicted_count += 1
824
+ else:
825
+ pod_other_count += 1
826
+ evicted = (
827
+ PaastaColors.red(f"{pod_evicted_count}")
828
+ if pod_evicted_count > 0
829
+ else f"{pod_evicted_count}"
830
+ )
831
+
832
+ output.append(
833
+ " Pods:"
834
+ f" {pod_running_count} running,"
835
+ f" {evicted} evicted,"
836
+ f" {pod_other_count} other"
837
+ )
838
+
839
+ if not should_job_info_be_shown(status["state"]):
840
+ # In case where the jobmanager of cluster is in crashloopbackoff
841
+ # The pods for the cluster will be available and we need to show the pods.
842
+ # So that paasta status -v and kubectl get pods show the same consistent result.
843
+ if verbose and len(status["pod_status"]) > 0:
844
+ append_pod_status(status["pod_status"], output)
845
+ output.append(" No other information available in non-running state")
846
+ return 0
847
+
848
+ if status["state"] == "running":
849
+ # Flink cluster overview from paasta api client
850
+ try:
851
+ overview = get_flink_overview_from_paasta_api_client(
852
+ service=service, instance=instance, client=client
853
+ )
854
+ except Exception as e:
855
+ output.append(PaastaColors.red("Exception when talking to the API:"))
856
+ output.append(str(e))
857
+ return 1
858
+
859
+ output.append(
860
+ " Jobs:"
861
+ f" {overview.jobs_running} running,"
862
+ f" {overview.jobs_finished} finished,"
863
+ f" {overview.jobs_failed} failed,"
864
+ f" {overview.jobs_cancelled} cancelled"
865
+ )
866
+ output.append(
867
+ " "
868
+ f" {overview.taskmanagers} taskmanagers,"
869
+ f" {overview.slots_available}/{overview.slots_total} slots available"
870
+ )
871
+
872
+ flink_jobs = FlinkJobs()
873
+ flink_jobs.jobs = []
874
+ if status["state"] == "running":
875
+ try:
876
+ flink_jobs = get_flink_jobs_from_paasta_api_client(
877
+ service=service, instance=instance, client=client
878
+ )
879
+ except Exception as e:
880
+ output.append(PaastaColors.red("Exception when talking to the API:"))
881
+ output.append(str(e))
882
+ return 1
883
+
884
+ jobs: List[FlinkJobDetails] = []
885
+ job_ids: List[str] = []
886
+ if flink_jobs.get("jobs"):
887
+ job_ids = [job.id for job in flink_jobs.get("jobs")]
888
+ try:
889
+ jobs = a_sync.block(get_flink_job_details, service, instance, job_ids, client)
890
+ except Exception as e:
891
+ output.append(PaastaColors.red("Exception when talking to the API:"))
892
+ output.append(str(e))
893
+ return 1
894
+
895
+ # Avoid cutting job name. As opposed to default hardcoded value of 32, we will use max length of job name
896
+ if jobs:
897
+ max_job_name_length = max([len(get_flink_job_name(job)) for job in jobs])
898
+ else:
899
+ max_job_name_length = 10
900
+
901
+ # Apart from this column total length of one row is around 52 columns, using remaining terminal columns for job name
902
+ # Note: for terminals smaller than 90 columns the row will overflow in verbose printing
903
+ allowed_max_job_name_length = min(
904
+ max(10, shutil.get_terminal_size().columns - 52), max_job_name_length
905
+ )
906
+
907
+ output.append(" Jobs:")
908
+ if verbose > 1:
909
+ output.append(
910
+ f' {"Job Name": <{allowed_max_job_name_length}} State Job ID Started'
911
+ )
912
+ else:
913
+ output.append(
914
+ f' {"Job Name": <{allowed_max_job_name_length}} State Started'
915
+ )
916
+
917
+ # Use only the most recent jobs
918
+ unique_jobs = (
919
+ sorted(jobs, key=lambda j: -j["start_time"])[0] # type: ignore
920
+ for _, jobs in groupby(
921
+ sorted(
922
+ (j for j in jobs if j.get("name") and j.get("start_time")),
923
+ key=lambda j: j["name"],
924
+ ),
925
+ lambda j: j["name"],
926
+ )
927
+ )
928
+
929
+ allowed_max_jobs_printed = 3
930
+ job_printed_count = 0
931
+
932
+ for job in unique_jobs:
933
+ job_id = job["jid"]
934
+ if verbose > 1:
935
+ fmt = """ {job_name: <{allowed_max_job_name_length}.{allowed_max_job_name_length}} {state: <11} {job_id} {start_time}
936
+ {dashboard_url}"""
937
+ else:
938
+ fmt = " {job_name: <{allowed_max_job_name_length}.{allowed_max_job_name_length}} {state: <11} {start_time}"
939
+ start_time = datetime.fromtimestamp(int(job["start_time"]) // 1000)
940
+ if verbose or job_printed_count < allowed_max_jobs_printed:
941
+ job_printed_count += 1
942
+ color_fn = (
943
+ PaastaColors.green
944
+ if job.get("state") and job.get("state") == "RUNNING"
945
+ else PaastaColors.red
946
+ if job.get("state") and job.get("state") in ("FAILED", "FAILING")
947
+ else PaastaColors.yellow
948
+ )
949
+ job_info_str = fmt.format(
950
+ job_id=job_id,
951
+ job_name=get_flink_job_name(job),
952
+ allowed_max_job_name_length=allowed_max_job_name_length,
953
+ state=color_fn((job.get("state").title() or "Unknown")),
954
+ start_time=f"{str(start_time)} ({humanize.naturaltime(start_time)})",
955
+ dashboard_url=PaastaColors.grey(f"{dashboard_url}/#/jobs/{job_id}"),
956
+ )
957
+ output.append(job_info_str)
958
+ else:
959
+ output.append(
960
+ PaastaColors.yellow(
961
+ f" Only showing {allowed_max_jobs_printed} Flink jobs, use -v to show all"
962
+ )
963
+ )
964
+ break
965
+
966
+ if verbose and len(status["pod_status"]) > 0:
967
+ append_pod_status(status["pod_status"], output)
968
+ return 0
969
+
970
+
971
+ def print_flink_status(
972
+ cluster: str,
973
+ service: str,
974
+ instance: str,
975
+ output: List[str],
976
+ flink: Mapping[str, Any],
977
+ verbose: int,
978
+ ) -> int:
979
+ system_paasta_config = load_system_paasta_config()
980
+
981
+ client = get_paasta_oapi_client(cluster, system_paasta_config)
982
+ if not client:
983
+ output.append(
984
+ PaastaColors.red(
985
+ "paasta-api client unavailable - unable to get flink status"
986
+ )
987
+ )
988
+ return 1
989
+
990
+ return _print_flink_status_from_job_manager(
991
+ service, instance, output, flink, client, verbose
992
+ )
993
+
994
+
995
+ def print_flinkeks_status(
996
+ cluster: str,
997
+ service: str,
998
+ instance: str,
999
+ output: List[str],
1000
+ flink: Mapping[str, Any],
1001
+ verbose: int,
1002
+ ) -> int:
1003
+ system_paasta_config = load_system_paasta_config()
1004
+
1005
+ client = get_paasta_oapi_client(
1006
+ cluster=get_paasta_oapi_api_clustername(cluster=cluster, is_eks=True),
1007
+ system_paasta_config=system_paasta_config,
1008
+ )
1009
+ if not client:
1010
+ output.append(
1011
+ PaastaColors.red(
1012
+ "paasta-api client unavailable - unable to get flink status"
1013
+ )
1014
+ )
1015
+ return 1
1016
+
1017
+ return _print_flink_status_from_job_manager(
1018
+ service, instance, output, flink, client, verbose
1019
+ )
1020
+
1021
+
1022
+ async def get_flink_job_details(
1023
+ service: str, instance: str, job_ids: List[str], client: PaastaOApiClient
1024
+ ) -> List[FlinkJobDetails]:
1025
+ jobs_details = await asyncio.gather(
1026
+ *[
1027
+ flink_tools.get_flink_job_details_from_paasta_api_client(
1028
+ service, instance, job_id, client
1029
+ )
1030
+ for job_id in job_ids
1031
+ ]
1032
+ )
1033
+ return [jd for jd in jobs_details]
1034
+
1035
+
1036
+ def print_kubernetes_status_v2(
1037
+ cluster: str,
1038
+ service: str,
1039
+ instance: str,
1040
+ output: List[str],
1041
+ status: InstanceStatusKubernetesV2,
1042
+ verbose: int = 0,
1043
+ ) -> int:
1044
+ instance_state = get_instance_state(status)
1045
+ output.append(f" State: {instance_state}")
1046
+ output.append(" Running versions:")
1047
+ if not verbose:
1048
+ output.append(
1049
+ " " + PaastaColors.green("Rerun with -v to see all replicas")
1050
+ )
1051
+ elif verbose < 2:
1052
+ output.append(
1053
+ " "
1054
+ + PaastaColors.green(
1055
+ "You can use paasta logs to view stdout/stderr or rerun with -vv for even more information."
1056
+ )
1057
+ )
1058
+ output.extend(
1059
+ [
1060
+ f" {line}"
1061
+ for line in get_versions_table(
1062
+ status.versions, service, instance, cluster, verbose
1063
+ )
1064
+ ]
1065
+ )
1066
+
1067
+ if verbose > 1:
1068
+ output.extend(get_autoscaling_table(status.autoscaling_status, verbose))
1069
+
1070
+ if status.error_message:
1071
+ output.append(" " + PaastaColors.red(status.error_message))
1072
+ return 1
1073
+ else:
1074
+ return 0
1075
+
1076
+
1077
+ # TODO: Make an enum class or similar for the various instance states
1078
+ def get_instance_state(status: InstanceStatusKubernetesV2) -> str:
1079
+ num_versions = len(status.versions)
1080
+ num_ready_replicas = sum(r.ready_replicas for r in status.versions)
1081
+ if status.desired_state == "stop":
1082
+ if all(version.replicas == 0 for version in status.versions):
1083
+ return PaastaColors.red("Stopped")
1084
+ else:
1085
+ return PaastaColors.red("Stopping")
1086
+ elif status.desired_state == "start":
1087
+ if num_versions == 0:
1088
+ if status.desired_instances == 0:
1089
+ return PaastaColors.red("Stopped")
1090
+ else:
1091
+ return PaastaColors.yellow("Starting")
1092
+ if num_versions == 1:
1093
+ if num_ready_replicas < status.desired_instances:
1094
+ return PaastaColors.yellow("Launching replicas")
1095
+ else:
1096
+ return PaastaColors.green("Running")
1097
+ else:
1098
+ versions = sorted(
1099
+ status.versions, key=lambda x: x.create_timestamp, reverse=True
1100
+ )
1101
+ git_shas = {r.git_sha for r in versions}
1102
+ config_shas = {r.config_sha for r in versions}
1103
+ bouncing_to = []
1104
+ if len(git_shas) > 1:
1105
+ bouncing_to.append(versions[0].git_sha[:8])
1106
+ if len(config_shas) > 1:
1107
+ bouncing_to.append(versions[0].config_sha)
1108
+
1109
+ bouncing_to_str = ", ".join(bouncing_to)
1110
+ return PaastaColors.yellow(f"Bouncing to {bouncing_to_str}")
1111
+ else:
1112
+ return PaastaColors.red("Unknown")
1113
+
1114
+
1115
+ def get_versions_table(
1116
+ versions: List[KubernetesVersion],
1117
+ service: str,
1118
+ instance: str,
1119
+ cluster: str,
1120
+ verbose: int = 0,
1121
+ ) -> List[str]:
1122
+ if len(versions) == 0:
1123
+ return [PaastaColors.red("There are no running versions for this instance")]
1124
+ elif len(versions) == 1:
1125
+ return get_version_table_entry(
1126
+ versions[0], service, instance, cluster, verbose=verbose
1127
+ )
1128
+ else:
1129
+ versions = sorted(versions, key=lambda x: x.create_timestamp, reverse=True)
1130
+ config_shas = {v.config_sha for v in versions}
1131
+ show_config_sha = len(config_shas) > 1
1132
+
1133
+ namespaces = {v.namespace for v in versions}
1134
+ show_namespace = len(namespaces) > 1
1135
+
1136
+ table: List[str] = []
1137
+ table.extend(
1138
+ get_version_table_entry(
1139
+ versions[0],
1140
+ service,
1141
+ instance,
1142
+ cluster,
1143
+ version_name_suffix="new",
1144
+ show_config_sha=show_config_sha,
1145
+ show_namespace=show_namespace,
1146
+ verbose=verbose,
1147
+ )
1148
+ )
1149
+ for version in versions[1:]:
1150
+ table.extend(
1151
+ get_version_table_entry(
1152
+ version,
1153
+ service,
1154
+ instance,
1155
+ cluster,
1156
+ version_name_suffix="old",
1157
+ show_config_sha=show_config_sha,
1158
+ show_namespace=show_namespace,
1159
+ verbose=verbose,
1160
+ )
1161
+ )
1162
+ return table
1163
+
1164
+
1165
+ def get_version_table_entry(
1166
+ version: KubernetesVersion,
1167
+ service: str,
1168
+ instance: str,
1169
+ cluster: str,
1170
+ version_name_suffix: str = None,
1171
+ show_config_sha: bool = False,
1172
+ show_namespace: bool = False,
1173
+ verbose: int = 0,
1174
+ ) -> List[str]:
1175
+ version_name = version.git_sha[:8]
1176
+ if show_config_sha or verbose > 1:
1177
+ version_name += f", {version.config_sha}"
1178
+ if version.image_version is not None:
1179
+ version_name += f" (image_version: {version.image_version})"
1180
+ if version_name_suffix is not None:
1181
+ version_name += f" ({version_name_suffix})"
1182
+ if version.namespace is not None and (show_namespace or verbose > 1):
1183
+ version_name += f" (namespace: {version.namespace})"
1184
+ version_name = PaastaColors.blue(version_name)
1185
+
1186
+ start_datetime = datetime.fromtimestamp(version.create_timestamp)
1187
+ humanized_start_time = humanize.naturaltime(start_datetime)
1188
+ entry = [f"{version_name} - Started {start_datetime} ({humanized_start_time})"]
1189
+ replica_states = get_replica_states(version.pods)
1190
+ replica_states = sorted(replica_states, key=lambda s: s[1].create_timestamp)
1191
+ if len(replica_states) == 0:
1192
+ message = PaastaColors.red("0 pods found")
1193
+ entry.append(f" {message}")
1194
+ if replica_states:
1195
+ # If no replica_states, there were no pods found
1196
+ replica_state_counts = Counter([state for state, pod in replica_states])
1197
+ replica_state_display = [
1198
+ state.color(f"{replica_state_counts[state]} {state.message}")
1199
+ for state in ReplicaState
1200
+ if state in replica_state_counts
1201
+ ]
1202
+ entry.append(f" Replica States: {' / '.join(replica_state_display)}")
1203
+ if not verbose:
1204
+ unhealthy_replicas = [
1205
+ (state, pod) for state, pod in replica_states if state.is_unhealthy()
1206
+ ]
1207
+ if unhealthy_replicas:
1208
+ entry.append(f" Unhealthy Replicas:")
1209
+ replica_table = create_replica_table(
1210
+ unhealthy_replicas, service, instance, cluster, verbose
1211
+ )
1212
+ for line in replica_table:
1213
+ entry.append(f" {line}")
1214
+ else:
1215
+ replica_table = create_replica_table(
1216
+ replica_states, service, instance, cluster, verbose
1217
+ )
1218
+ for line in replica_table:
1219
+ entry.append(f" {line}")
1220
+ return entry
1221
+
1222
+
1223
+ class ReplicaState(Enum):
1224
+ # Order will be preserved in count summary
1225
+ RUNNING = "Healthy", PaastaColors.green
1226
+
1227
+ UNREACHABLE = "Unreachable", PaastaColors.red
1228
+ EVICTED = "Evicted", PaastaColors.red
1229
+ ALL_CONTAINERS_WAITING = "All Containers Waiting", PaastaColors.red
1230
+ FAILED = "Failed", PaastaColors.red
1231
+ MAIN_CONTAINER_NOT_RUNNING = "Main Container Not Running", PaastaColors.red
1232
+ NO_CONTAINERS_YET = "No Containers Yet", PaastaColors.red
1233
+ NOT_READY = "Not Ready", PaastaColors.red
1234
+ SOME_CONTAINERS_WAITING = "Some Containers Waiting", PaastaColors.red
1235
+
1236
+ WARNING = "Warning", PaastaColors.yellow
1237
+ UNSCHEDULED = "Unscheduled", PaastaColors.yellow
1238
+ STARTING = "Starting", PaastaColors.yellow
1239
+ WARMING_UP = "Warming Up", PaastaColors.cyan
1240
+ TERMINATING = "Terminating", PaastaColors.cyan
1241
+ UNKNOWN = "Unknown", PaastaColors.yellow
1242
+
1243
+ def is_unhealthy(self):
1244
+ return self.color == PaastaColors.red
1245
+
1246
+ @property
1247
+ def color(self) -> Callable:
1248
+ return self.value[1]
1249
+
1250
+ @property
1251
+ def formatted_message(self):
1252
+ return self.value[1](self.value[0])
1253
+
1254
+ @property
1255
+ def message(self):
1256
+ return self.value[0]
1257
+
1258
+
1259
+ def recent_liveness_failure(pod: KubernetesPodV2) -> bool:
1260
+ if not pod.events:
1261
+ return False
1262
+ return any(
1263
+ [evt for evt in pod.events if "Liveness probe failed" in evt.get("message", "")]
1264
+ )
1265
+
1266
+
1267
+ def recent_container_restart(
1268
+ container: Optional[KubernetesContainerV2], time_window: int = 900
1269
+ ) -> bool:
1270
+ if container:
1271
+ return kubernetes_tools.recent_container_restart(
1272
+ container.restart_count,
1273
+ container.last_state,
1274
+ container.last_timestamp,
1275
+ time_window_s=time_window,
1276
+ )
1277
+ return False
1278
+
1279
+
1280
+ def get_main_container(pod: KubernetesPodV2) -> Optional[KubernetesContainerV2]:
1281
+ return next(
1282
+ (
1283
+ c
1284
+ for c in pod.containers
1285
+ if c.name not in kubernetes_tools.SIDECAR_CONTAINER_NAMES
1286
+ ),
1287
+ None,
1288
+ )
1289
+
1290
+
1291
+ def get_replica_state(pod: KubernetesPodV2) -> ReplicaState:
1292
+ phase = pod.phase
1293
+ state = ReplicaState.UNKNOWN
1294
+ reason = pod.reason
1295
+ if reason == "Evicted":
1296
+ state = ReplicaState.EVICTED
1297
+ elif phase == "Failed":
1298
+ state = ReplicaState.FAILED
1299
+ elif phase is None or not pod.scheduled:
1300
+ state = ReplicaState.UNSCHEDULED
1301
+ elif pod.delete_timestamp:
1302
+ state = ReplicaState.TERMINATING
1303
+ elif phase == "Pending":
1304
+ if not pod.containers:
1305
+ state = ReplicaState.NO_CONTAINERS_YET
1306
+ elif all([c.state.lower() == "waiting" for c in pod.containers]):
1307
+ state = ReplicaState.ALL_CONTAINERS_WAITING
1308
+ else:
1309
+ state = ReplicaState.SOME_CONTAINERS_WAITING
1310
+ elif phase == "Running":
1311
+ ####
1312
+ # TODO: Take sidecar containers into account
1313
+ # This logic likely needs refining
1314
+ main_container = get_main_container(pod)
1315
+ if main_container:
1316
+ # NOTE: the k8s API returns timestamps in UTC, so we make sure to always work in UTC
1317
+ warming_up = (
1318
+ pod.create_timestamp + main_container.healthcheck_grace_period
1319
+ > datetime.now(timezone.utc).timestamp()
1320
+ )
1321
+ if pod.mesh_ready is False:
1322
+ if main_container.state != "running":
1323
+ state = ReplicaState.MAIN_CONTAINER_NOT_RUNNING
1324
+ else:
1325
+ state = ReplicaState.UNREACHABLE
1326
+ elif not pod.ready:
1327
+ state = ReplicaState.NOT_READY
1328
+ else:
1329
+ if recent_liveness_failure(pod) or recent_container_restart(
1330
+ main_container
1331
+ ):
1332
+ state = ReplicaState.WARNING
1333
+ else:
1334
+ state = ReplicaState.RUNNING
1335
+
1336
+ if state != ReplicaState.RUNNING and warming_up:
1337
+ state = ReplicaState.WARMING_UP
1338
+
1339
+ else:
1340
+ state = ReplicaState.UNKNOWN
1341
+
1342
+ return state
1343
+
1344
+
1345
+ def get_replica_states(
1346
+ pods: List[KubernetesPodV2],
1347
+ ) -> List[Tuple[ReplicaState, KubernetesPodV2]]:
1348
+ return [(get_replica_state(pod), pod) for pod in pods]
1349
+
1350
+
1351
+ def create_replica_table(
1352
+ pods: List[Tuple[ReplicaState, KubernetesPodV2]],
1353
+ service: str,
1354
+ instance: str,
1355
+ cluster: str,
1356
+ verbose: int = 0,
1357
+ ) -> List[str]:
1358
+ header = ["ID", "IP/Port", "Host deployed to", "Started at what localtime", "State"]
1359
+ table: List[Union[List[str], str]] = [header]
1360
+ for state, pod in pods:
1361
+ start_datetime = datetime.fromtimestamp(pod.create_timestamp)
1362
+ humanized_start_time = humanize.naturaltime(start_datetime)
1363
+ row = [
1364
+ pod.name,
1365
+ f"{pod.ip}:8888" if pod.ip else "None",
1366
+ pod.host or "None",
1367
+ humanized_start_time,
1368
+ state.formatted_message,
1369
+ ]
1370
+ table.append(row)
1371
+
1372
+ # Adding additional context/tips
1373
+ if pod.reason == "Evicted":
1374
+ table.append(
1375
+ PaastaColors.red(
1376
+ f' Evicted: {pod.message if pod.message else "Unknown reason"}'
1377
+ )
1378
+ )
1379
+
1380
+ main_container = get_main_container(pod)
1381
+ if main_container:
1382
+ if main_container.timestamp:
1383
+ timestamp = datetime.fromtimestamp(main_container.timestamp)
1384
+ elif main_container.last_timestamp:
1385
+ timestamp = datetime.fromtimestamp(main_container.last_timestamp)
1386
+ else:
1387
+ # if no container timestamps are found, use pod's creation
1388
+ timestamp = start_datetime
1389
+
1390
+ humanized_timestamp = humanize.naturaltime(timestamp)
1391
+ if recent_container_restart(main_container):
1392
+ table.append(
1393
+ PaastaColors.red(
1394
+ f" Restarted {humanized_timestamp}. {main_container.restart_count} restarts since starting"
1395
+ )
1396
+ )
1397
+ if (
1398
+ main_container.reason == "OOMKilled"
1399
+ or main_container.last_reason == "OOMKilled"
1400
+ ):
1401
+ if main_container.reason == "OOMKilled":
1402
+ oom_kill_timestamp = timestamp
1403
+ human_oom_kill_timestamp = humanized_timestamp
1404
+ elif main_container.last_reason == "OOMKilled":
1405
+ oom_kill_timestamp = datetime.fromtimestamp(
1406
+ main_container.last_timestamp
1407
+ )
1408
+ human_oom_kill_timestamp = humanize.naturaltime(oom_kill_timestamp)
1409
+ table.extend(
1410
+ [
1411
+ PaastaColors.red(
1412
+ f" OOM Killed {human_oom_kill_timestamp} ({oom_kill_timestamp})."
1413
+ ),
1414
+ PaastaColors.red(
1415
+ f" Check y/check-oom-events and consider increasing memory in yelpsoa_configs"
1416
+ ),
1417
+ ]
1418
+ )
1419
+ if state == ReplicaState.WARMING_UP:
1420
+ if verbose > 0:
1421
+ # NOTE: the k8s API returns timestamps in UTC, so we make sure to always work in UTC
1422
+ warmup_duration = (
1423
+ datetime.now(timezone.utc).timestamp() - pod.create_timestamp
1424
+ )
1425
+ humanized_duration = humanize.naturaldelta(
1426
+ timedelta(seconds=warmup_duration)
1427
+ )
1428
+ grace_period_remaining = (
1429
+ pod.create_timestamp
1430
+ + main_container.healthcheck_grace_period
1431
+ - datetime.now(timezone.utc).timestamp()
1432
+ )
1433
+ humanized_remaining = humanize.naturaldelta(
1434
+ timedelta(seconds=grace_period_remaining)
1435
+ )
1436
+ table.append(
1437
+ PaastaColors.cyan(
1438
+ f" Still warming up, {humanized_duration} elapsed, {humanized_remaining} before healthchecking starts"
1439
+ )
1440
+ )
1441
+ if recent_liveness_failure(pod) and state != ReplicaState.TERMINATING:
1442
+ healthcheck_string = (
1443
+ "check your healthcheck configuration in yelpsoa_configs"
1444
+ )
1445
+ if main_container and main_container.healthcheck_cmd:
1446
+ if main_container.healthcheck_cmd.http_url:
1447
+ healthcheck_string = (
1448
+ f"run `curl {main_container.healthcheck_cmd.http_url}`"
1449
+ )
1450
+ elif main_container.healthcheck_cmd.tcp_port:
1451
+ healthcheck_string = f"verify your service is listening on {main_container.healthcheck_cmd.tcp_port}"
1452
+ elif main_container.healthcheck_cmd.cmd:
1453
+ healthcheck_string = f"check why the following may be failing: `{main_container.healthcheck_cmd.cmd}`"
1454
+ table.append(
1455
+ PaastaColors.red(
1456
+ f" Healthchecks are failing. To investigate further, {healthcheck_string}"
1457
+ )
1458
+ )
1459
+ if state.is_unhealthy() or recent_container_restart(main_container):
1460
+ if verbose < 2:
1461
+ table.append(
1462
+ PaastaColors.red(
1463
+ f" Consider checking logs with `paasta logs -c {cluster} -s {service} -i {instance} -p {pod.name}`"
1464
+ )
1465
+ )
1466
+ else:
1467
+ if pod.events:
1468
+ table.extend(format_pod_event_messages(pod.events, pod.name))
1469
+ if len(pod.containers) > 0:
1470
+ table.extend(
1471
+ format_tail_lines_for_kubernetes_pod(pod.containers, pod.name)
1472
+ )
1473
+ elif state == ReplicaState.UNSCHEDULED:
1474
+ if pod.reason == "Unschedulable":
1475
+ table.append(PaastaColors.red(f" Pod is unschedulable: {pod.message}"))
1476
+ elif state == ReplicaState.UNKNOWN:
1477
+ table.append(
1478
+ PaastaColors.red(
1479
+ f" Cannot determine pod state, please try again. If you continue to see this state, please contact #paasta"
1480
+ )
1481
+ )
1482
+ return format_table(table)
1483
+
1484
+
1485
+ def get_autoscaling_table(
1486
+ autoscaling_status: Dict[str, Any], verbose: int = 0
1487
+ ) -> List[str]:
1488
+ table = []
1489
+ if autoscaling_status and verbose > 1:
1490
+ table.append(" Autoscaling status:")
1491
+ table.append(f" min_instances: {autoscaling_status['min_instances']}")
1492
+ table.append(f" max_instances: {autoscaling_status['max_instances']}")
1493
+ table.append(
1494
+ f" Desired instances: {autoscaling_status['desired_replicas']}"
1495
+ )
1496
+ table.append(f" Last scale time: {autoscaling_status['last_scale_time']}")
1497
+ NA = PaastaColors.red("N/A")
1498
+ if len(autoscaling_status["metrics"]) > 0:
1499
+ table.append(f" Metrics:")
1500
+
1501
+ metrics_table: List[List[str]] = [["Metric", "Current", "Target"]]
1502
+ for metric in autoscaling_status["metrics"]:
1503
+ current_metric = (
1504
+ NA
1505
+ if getattr(metric, "current_value") is None
1506
+ else getattr(metric, "current_value")
1507
+ )
1508
+ target_metric = (
1509
+ NA
1510
+ if getattr(metric, "target_value") is None
1511
+ else getattr(metric, "target_value")
1512
+ )
1513
+ metrics_table.append([metric["name"], current_metric, target_metric])
1514
+ table.extend([" " + s for s in format_table(metrics_table)])
1515
+
1516
+ return format_table(table)
1517
+
1518
+
1519
+ def print_kubernetes_status(
1520
+ cluster: str,
1521
+ service: str,
1522
+ instance: str,
1523
+ output: List[str],
1524
+ kubernetes_status,
1525
+ verbose: int = 0,
1526
+ ) -> int:
1527
+ bouncing_status = bouncing_status_human(
1528
+ kubernetes_status.app_count, kubernetes_status.bounce_method
1529
+ )
1530
+ desired_state = desired_state_human(
1531
+ kubernetes_status.desired_state, kubernetes_status.expected_instance_count
1532
+ )
1533
+ output.append(f" State: {bouncing_status} - Desired state: {desired_state}")
1534
+
1535
+ status = KubernetesDeployStatus.fromstring(kubernetes_status.deploy_status)
1536
+ deploy_status = kubernetes_app_deploy_status_human(
1537
+ status, kubernetes_status.deploy_status_message
1538
+ )
1539
+
1540
+ output.append(
1541
+ " {}".format(
1542
+ status_kubernetes_job_human(
1543
+ service=service,
1544
+ instance=instance,
1545
+ deploy_status=deploy_status,
1546
+ desired_app_id=kubernetes_status.app_id,
1547
+ app_count=kubernetes_status.app_count,
1548
+ running_instances=kubernetes_status.running_instance_count,
1549
+ normal_instance_count=kubernetes_status.expected_instance_count,
1550
+ evicted_count=kubernetes_status.evicted_count,
1551
+ )
1552
+ )
1553
+ )
1554
+ if kubernetes_status.create_timestamp and verbose > 0:
1555
+ create_datetime = datetime.fromtimestamp(kubernetes_status.create_timestamp)
1556
+ output.append(
1557
+ " App created: {} ({}). Namespace: {}".format(
1558
+ create_datetime,
1559
+ humanize.naturaltime(create_datetime),
1560
+ kubernetes_status.namespace,
1561
+ )
1562
+ )
1563
+
1564
+ if kubernetes_status.pods and len(kubernetes_status.pods) > 0:
1565
+ output.append(" Pods:")
1566
+ pods_table = format_kubernetes_pod_table(kubernetes_status.pods, verbose)
1567
+ output.extend([f" {line}" for line in pods_table])
1568
+
1569
+ if kubernetes_status.replicasets and len(kubernetes_status.replicasets) > 0:
1570
+ output.append(" ReplicaSets:")
1571
+ replicasets_table = format_kubernetes_replicaset_table(
1572
+ kubernetes_status.replicasets
1573
+ )
1574
+ output.extend([f" {line}" for line in replicasets_table])
1575
+
1576
+ autoscaling_status = kubernetes_status.autoscaling_status
1577
+ if autoscaling_status and verbose > 0:
1578
+ output.append(" Autoscaling status:")
1579
+ output.append(f" min_instances: {autoscaling_status['min_instances']}")
1580
+ output.append(f" max_instances: {autoscaling_status['max_instances']}")
1581
+ output.append(
1582
+ f" Desired instances: {autoscaling_status['desired_replicas']}"
1583
+ )
1584
+ output.append(
1585
+ f" Last scale time: {autoscaling_status['last_scale_time']}"
1586
+ )
1587
+ output.append(f" Dashboard: y/was-it-the-autoscaler")
1588
+ NA = PaastaColors.red("N/A")
1589
+ if len(autoscaling_status["metrics"]) > 0:
1590
+ output.append(f" Metrics:")
1591
+
1592
+ metrics_table: List[List[str]] = [["Metric", "Current", "Target"]]
1593
+ for metric in autoscaling_status["metrics"]:
1594
+ current_metric = (
1595
+ NA
1596
+ if getattr(metric, "current_value") is None
1597
+ else getattr(metric, "current_value")
1598
+ )
1599
+ target_metric = (
1600
+ NA
1601
+ if getattr(metric, "target_value") is None
1602
+ else getattr(metric, "target_value")
1603
+ )
1604
+ metrics_table.append([metric["name"], current_metric, target_metric])
1605
+ output.extend([" " + s for s in format_table(metrics_table)])
1606
+
1607
+ if kubernetes_status.smartstack is not None:
1608
+ smartstack_status_human = get_smartstack_status_human(
1609
+ kubernetes_status.smartstack.registration,
1610
+ kubernetes_status.smartstack.expected_backends_per_location,
1611
+ kubernetes_status.smartstack.locations,
1612
+ )
1613
+ output.extend([f" {line}" for line in smartstack_status_human])
1614
+
1615
+ if kubernetes_status.envoy is not None:
1616
+ envoy_status_human = get_envoy_status_human(
1617
+ kubernetes_status.envoy.registration,
1618
+ kubernetes_status.envoy.expected_backends_per_location,
1619
+ kubernetes_status.envoy.locations,
1620
+ )
1621
+ output.extend([f" {line}" for line in envoy_status_human])
1622
+
1623
+ error_message = kubernetes_status.error_message
1624
+ if error_message:
1625
+ output.append(" " + PaastaColors.red(error_message))
1626
+ return 1
1627
+ return 0
1628
+
1629
+
1630
+ def print_tron_status(
1631
+ cluster: str,
1632
+ service: str,
1633
+ instance: str,
1634
+ output: List[str],
1635
+ tron_status,
1636
+ verbose: int = 0,
1637
+ ) -> int:
1638
+ output.append(f" Tron job: {tron_status.job_name}")
1639
+ if verbose:
1640
+ output.append(f" Status: {tron_status.job_status}")
1641
+ output.append(f" Schedule: {tron_status.job_schedule}")
1642
+ output.append(" Dashboard: {}".format(PaastaColors.blue(tron_status.job_url)))
1643
+
1644
+ output.append(f" Action: {tron_status.action_name}")
1645
+ output.append(f" Status: {tron_status.action_state}")
1646
+ if verbose:
1647
+ output.append(f" Start time: {tron_status.action_start_time}")
1648
+ output.append(f" Command: {tron_status.action_command}")
1649
+ if verbose > 1:
1650
+ output.append(f" Raw Command: {tron_status.action_raw_command}")
1651
+ output.append(f" Stdout: \n{tron_status.action_stdout}")
1652
+ output.append(f" Stderr: \n{tron_status.action_stderr}")
1653
+
1654
+ return 0
1655
+
1656
+
1657
+ def print_cassandra_status(
1658
+ cluster: str,
1659
+ service: str,
1660
+ instance: str,
1661
+ output: List[str],
1662
+ cassandra_status,
1663
+ verbose: int = 0,
1664
+ ) -> int:
1665
+ tab = " "
1666
+ indent = 1
1667
+
1668
+ status = cassandra_status.get("status")
1669
+ if status is None:
1670
+ output.append(
1671
+ indent * tab + PaastaColors.red("Cassandra cluster is not available yet")
1672
+ )
1673
+ return 1
1674
+
1675
+ output.append(indent * tab + "Cassandra cluster:")
1676
+ indent += 1
1677
+
1678
+ status = cassandra_status.get("status")
1679
+ state = status.get("state")
1680
+
1681
+ if state == "Running":
1682
+ state = PaastaColors.green(state)
1683
+ else:
1684
+ state = PaastaColors.red(state)
1685
+
1686
+ nodes: List[Dict[str, Any]] = status.get("nodes") or []
1687
+ output.append(indent * tab + "State: " + state)
1688
+
1689
+ if not nodes:
1690
+ output.append(
1691
+ indent * tab + "Nodes: " + PaastaColors.red("No node status available")
1692
+ )
1693
+ return 0
1694
+
1695
+ output.append(indent * tab + "Nodes:")
1696
+ indent += 1
1697
+ all_rows: List[CassandraNodeStatusRow] = []
1698
+
1699
+ if not nodes:
1700
+ output.append(indent * tab + "No nodes found in CR status")
1701
+ return 0
1702
+
1703
+ for node in nodes:
1704
+ if node.get("properties"):
1705
+ row: CassandraNodeStatusRow = {}
1706
+ for prop in node.get("properties"):
1707
+ verbosity = prop.get("verbosity", 0)
1708
+ name = prop["name"]
1709
+
1710
+ if verbosity > verbose:
1711
+ continue
1712
+ if not prop.get("name"):
1713
+ continue
1714
+
1715
+ row[name] = node_property_to_str(prop, verbose)
1716
+ all_rows.append(row)
1717
+
1718
+ if verbose < 2:
1719
+ for rows in group_nodes_by_header(all_rows):
1720
+ lines = nodes_to_lines(verbose, rows)
1721
+ ftable = format_table(lines)
1722
+ output.extend([indent * tab + line for line in ftable])
1723
+ output.extend([indent * tab])
1724
+ else:
1725
+ for rows in group_nodes_by_header(all_rows):
1726
+ for node in rows:
1727
+ output.append(indent * tab + "Node:")
1728
+ indent += 1
1729
+ for key in node.keys():
1730
+ output.append(
1731
+ indent * tab + "{key}: {value}".format(key=key, value=node[key])
1732
+ )
1733
+ indent -= 1
1734
+ return 0
1735
+
1736
+
1737
+ CassandraNodeStatusRow = Dict[str, str]
1738
+
1739
+
1740
+ # group_nodes_by_header groups the given nodes into several lists of rows. The
1741
+ # rows in each group have the same headers.
1742
+ def group_nodes_by_header(
1743
+ rows: List[CassandraNodeStatusRow] = [],
1744
+ ) -> List[List[CassandraNodeStatusRow]]:
1745
+ groups: Dict[str, List[CassandraNodeStatusRow]] = {}
1746
+ for row in rows:
1747
+ header = list(row.keys())
1748
+ header.sort()
1749
+ # "\0" is just a character that is unlikely to be in the header names.
1750
+ header_id = "\0".join(header)
1751
+ group = groups.get(header_id, [])
1752
+ group.append(row)
1753
+ groups[header_id] = group
1754
+
1755
+ return list(groups.values())
1756
+
1757
+
1758
+ def nodes_to_lines(
1759
+ verbose: int = 0,
1760
+ rows: List[CassandraNodeStatusRow] = [],
1761
+ ) -> List[List[str]]:
1762
+ header: List[str] = []
1763
+ lines: List[List[str]] = []
1764
+ for row in rows:
1765
+ if len(header) == 0:
1766
+ header = list(row.keys())
1767
+ lines.append(list(header))
1768
+ line: List[str] = []
1769
+ for key in header:
1770
+ line.append(row.get(key, ""))
1771
+ lines.append(line)
1772
+ return lines
1773
+
1774
+
1775
+ def node_property_to_str(prop: Dict[str, Any], verbose: int) -> str:
1776
+ typ = prop.get("type")
1777
+ value = prop.get("value")
1778
+
1779
+ if value is None:
1780
+ return "None"
1781
+
1782
+ if typ == "string":
1783
+ return value
1784
+ elif typ in ["int", "float64"]:
1785
+ return str(value)
1786
+ elif typ == "bool":
1787
+ return "Yes" if value else "No"
1788
+ elif typ == "error":
1789
+ return PaastaColors.red(value)
1790
+ elif typ == "time":
1791
+ if verbose > 0:
1792
+ return value
1793
+ parsed_time = datetime.strptime(value, "%Y-%m-%dT%H:%M:%SZ").replace(
1794
+ tzinfo=timezone.utc
1795
+ )
1796
+ # NOTE: the k8s API returns timestamps in UTC, so we make sure to always work in UTC
1797
+ now = datetime.now(timezone.utc)
1798
+ return (
1799
+ humanize.naturaldelta(
1800
+ timedelta(seconds=(now - parsed_time).total_seconds())
1801
+ )
1802
+ + " ago"
1803
+ )
1804
+ else:
1805
+ return str(value)
1806
+
1807
+
1808
+ def print_kafka_status(
1809
+ cluster: str,
1810
+ service: str,
1811
+ instance: str,
1812
+ output: List[str],
1813
+ kafka_status: Mapping[str, Any],
1814
+ verbose: int = 0,
1815
+ ) -> int:
1816
+ status = kafka_status.get("status")
1817
+ if status is None:
1818
+ output.append(PaastaColors.red(" Kafka cluster is not available yet"))
1819
+ return 1
1820
+
1821
+ # print kafka view url before operator status because if the kafka cluster is not available for some reason
1822
+ # atleast the user can get a hold the kafka view url
1823
+ if status.get("kafka_view_url") is not None:
1824
+ output.append(f" Kafka View Url: {status.get('kafka_view_url')}")
1825
+
1826
+ output.append(f" Zookeeper: {status['zookeeper']}")
1827
+
1828
+ annotations = kafka_status.get("metadata").get("annotations")
1829
+ desired_state = annotations.get(paasta_prefixed("desired_state"))
1830
+ if desired_state is None:
1831
+ raise ValueError(
1832
+ "expected desired state in kafka annotation, but received none"
1833
+ )
1834
+ output.append(f" State: {desired_state}")
1835
+
1836
+ cluster_ready = "true" if status.get("cluster_ready") else PaastaColors.red("false")
1837
+ cluster_restarting = (
1838
+ " (rolling-restart in progress)" if status["health"]["restarting"] else ""
1839
+ )
1840
+ output.append(f" Ready: {cluster_ready}{cluster_restarting}")
1841
+
1842
+ if status.get("cluster_ready"):
1843
+ health: Mapping[str, Any] = status["health"]
1844
+ cluster_health = (
1845
+ PaastaColors.green("healthy")
1846
+ if health["healthy"]
1847
+ else PaastaColors.red("unhealthy")
1848
+ )
1849
+ output.append(f" Health: {cluster_health}")
1850
+ if not health.get("healthy"):
1851
+ output.append(f" Reason: {health['message']}")
1852
+ output.append(f" Offline Partitions: {health['offline_partitions']}")
1853
+ output.append(
1854
+ f" Under Replicated Partitions: {health['under_replicated_partitions']}"
1855
+ )
1856
+
1857
+ brokers = status["brokers"]
1858
+ output.append(" Brokers:")
1859
+
1860
+ if verbose:
1861
+ headers = ["Id", "Phase", "IP", "Pod Name", "Started"]
1862
+ else:
1863
+ headers = ["Id", "Phase", "Started"]
1864
+
1865
+ rows = [headers]
1866
+ for broker in brokers:
1867
+ color_fn = (
1868
+ PaastaColors.green if broker["phase"] == "Running" else PaastaColors.red
1869
+ )
1870
+
1871
+ # NOTE: the k8s API returns timestamps in UTC, so we make sure to always work in UTC
1872
+ start_time = datetime.strptime(
1873
+ broker["deployed_timestamp"], "%Y-%m-%dT%H:%M:%SZ"
1874
+ ).replace(tzinfo=timezone.utc)
1875
+ delta = datetime.now(timezone.utc) - start_time
1876
+ formatted_start_time = f"{str(start_time)} ({humanize.naturaltime(delta)})"
1877
+
1878
+ if verbose:
1879
+ row = [
1880
+ str(broker["id"]),
1881
+ color_fn(broker["phase"]),
1882
+ str(broker["ip"]),
1883
+ str(broker["name"]),
1884
+ formatted_start_time,
1885
+ ]
1886
+ else:
1887
+ row = [
1888
+ str(broker["id"]),
1889
+ color_fn(broker["phase"]),
1890
+ formatted_start_time,
1891
+ ]
1892
+
1893
+ rows.append(row)
1894
+
1895
+ brokers_table = format_table(rows)
1896
+ output.extend([f" {line}" for line in brokers_table])
1897
+
1898
+ if verbose and len(brokers) > 0:
1899
+ append_pod_status(brokers, output)
1900
+
1901
+ return 0
1902
+
1903
+
1904
+ def report_status_for_cluster(
1905
+ service: str,
1906
+ cluster: str,
1907
+ deploy_pipeline: Sequence[str],
1908
+ actual_deployments: Mapping[str, DeploymentVersion],
1909
+ instance_whitelist: Mapping[str, Type[InstanceConfig]],
1910
+ system_paasta_config: SystemPaastaConfig,
1911
+ lock: Lock,
1912
+ verbose: int = 0,
1913
+ new: bool = False,
1914
+ all_namespaces: bool = False,
1915
+ ) -> Tuple[int, Sequence[str]]:
1916
+ """With a given service and cluster, prints the status of the instances
1917
+ in that cluster"""
1918
+ output = ["", "service: %s" % service, "cluster: %s" % cluster]
1919
+ deployed_instances = []
1920
+ instances = [
1921
+ (instance, instance_config_class)
1922
+ for instance, instance_config_class in instance_whitelist.items()
1923
+ if instance_config_class in ALLOWED_INSTANCE_CONFIG
1924
+ ]
1925
+
1926
+ # Tron instance are not present in the deploy pipeline, so treat them as
1927
+ # seen by default to avoid error messages
1928
+ seen_instances = [
1929
+ instance
1930
+ for instance, instance_config_class in instance_whitelist.items()
1931
+ if instance_config_class == TronActionConfig
1932
+ ]
1933
+
1934
+ for namespace in deploy_pipeline:
1935
+ cluster_in_pipeline, instance = namespace.split(".")
1936
+ seen_instances.append(instance)
1937
+
1938
+ if cluster_in_pipeline != cluster:
1939
+ continue
1940
+ if instances and instance not in instances:
1941
+ continue
1942
+
1943
+ # Case: service deployed to cluster.instance
1944
+ if namespace in actual_deployments:
1945
+ deployed_instances.append(instance)
1946
+
1947
+ # Case: flink instances don't use `deployments.json`
1948
+ elif instance_whitelist.get(instance) == FlinkDeploymentConfig:
1949
+ deployed_instances.append(instance)
1950
+
1951
+ # Case: service NOT deployed to cluster.instance
1952
+ else:
1953
+ output.append(" instance: %s" % PaastaColors.red(instance))
1954
+ output.append(" Git sha: None (not deployed yet)")
1955
+
1956
+ return_code = 0
1957
+ return_codes = []
1958
+ for deployed_instance, instance_config_class in instances:
1959
+ return_codes.append(
1960
+ paasta_status_on_api_endpoint(
1961
+ cluster=cluster,
1962
+ service=service,
1963
+ instance=deployed_instance,
1964
+ system_paasta_config=system_paasta_config,
1965
+ lock=lock,
1966
+ verbose=verbose,
1967
+ new=new,
1968
+ all_namespaces=all_namespaces,
1969
+ is_eks=(instance_config_class in EKS_DEPLOYMENT_CONFIGS),
1970
+ )
1971
+ )
1972
+
1973
+ if any(return_codes):
1974
+ return_code = 1
1975
+
1976
+ output.append(
1977
+ report_invalid_whitelist_values(
1978
+ whitelist=[instance[0] for instance in instances],
1979
+ items=seen_instances,
1980
+ item_type="instance",
1981
+ )
1982
+ )
1983
+
1984
+ return return_code, output
1985
+
1986
+
1987
+ def report_invalid_whitelist_values(
1988
+ whitelist: Iterable[str], items: Sequence[str], item_type: str
1989
+ ) -> str:
1990
+ """Warns the user if there are entries in ``whitelist`` which don't
1991
+ correspond to any item in ``items``. Helps highlight typos.
1992
+ """
1993
+ return_string = ""
1994
+ bogus_entries = []
1995
+ if whitelist is None:
1996
+ return ""
1997
+ for entry in whitelist:
1998
+ if entry not in items:
1999
+ bogus_entries.append(entry)
2000
+ if len(bogus_entries) > 0:
2001
+ return_string = (
2002
+ "\n" "Warning: This service does not have any %s matching these names:\n%s"
2003
+ ) % (item_type, ",".join(bogus_entries))
2004
+ return return_string
2005
+
2006
+
2007
+ def normalize_registrations(
2008
+ service: str, registrations: Sequence[str]
2009
+ ) -> Sequence[str]:
2010
+ ret = []
2011
+ for reg in registrations:
2012
+ if "." not in reg:
2013
+ ret.append(f"{service}.{reg}")
2014
+ else:
2015
+ ret.append(reg)
2016
+ return ret
2017
+
2018
+
2019
+ def get_filters(
2020
+ args,
2021
+ ) -> Sequence[Callable[[InstanceConfig], bool]]:
2022
+ """Figures out which filters to apply from an args object, and returns them
2023
+
2024
+ :param args: args object
2025
+ :returns: list of functions that take an instance config and returns if the instance conf matches the filter
2026
+ """
2027
+ filters = []
2028
+
2029
+ if args.service:
2030
+ filters.append(lambda conf: conf.get_service() in args.service.split(","))
2031
+
2032
+ if args.clusters:
2033
+ filters.append(lambda conf: conf.get_cluster() in args.clusters.split(","))
2034
+
2035
+ if args.instances:
2036
+ filters.append(lambda conf: conf.get_instance() in args.instances.split(","))
2037
+
2038
+ if args.deploy_group:
2039
+ filters.append(
2040
+ lambda conf: conf.get_deploy_group() in args.deploy_group.split(",")
2041
+ )
2042
+
2043
+ if args.registration:
2044
+ normalized_regs = normalize_registrations(
2045
+ service=args.service, registrations=args.registration.split(",")
2046
+ )
2047
+ filters.append(
2048
+ lambda conf: any(
2049
+ reg in normalized_regs
2050
+ for reg in (
2051
+ conf.get_registrations()
2052
+ if hasattr(conf, "get_registrations")
2053
+ else []
2054
+ )
2055
+ )
2056
+ )
2057
+
2058
+ if args.owner:
2059
+ owners = args.owner.split(",")
2060
+
2061
+ filters.append(
2062
+ # If the instance owner is None, check the service owner, else check the instance owner
2063
+ lambda conf: get_team(
2064
+ overrides={}, service=conf.get_service(), soa_dir=args.soa_dir
2065
+ )
2066
+ in owners
2067
+ if conf.get_team() is None
2068
+ else conf.get_team() in owners
2069
+ )
2070
+
2071
+ return filters
2072
+
2073
+
2074
+ def apply_args_filters(
2075
+ args,
2076
+ ) -> Mapping[str, Mapping[str, Mapping[str, Type[InstanceConfig]]]]:
2077
+ """
2078
+ Take an args object and returns the dict of cluster:service:instances
2079
+ Currently, will filter by clusters, instances, services, and deploy_groups
2080
+ If no instances are found, will print a message and try to find matching instances
2081
+ for each service
2082
+
2083
+ :param args: args object containing attributes to filter by
2084
+ :returns: Dict of dicts, in format {cluster_name: {service_name: {instance1, instance2}}}
2085
+ """
2086
+ clusters_services_instances: DefaultDict[
2087
+ str, DefaultDict[str, Dict[str, Type[InstanceConfig]]]
2088
+ ] = defaultdict(lambda: defaultdict(dict))
2089
+ if args.service_instance:
2090
+ if args.service or args.instances:
2091
+ print(
2092
+ PaastaColors.red(
2093
+ f"Invalid command. Do not include optional arguments -s or -i "
2094
+ f"when using shorthand notation."
2095
+ )
2096
+ )
2097
+ return clusters_services_instances
2098
+ if "." in args.service_instance:
2099
+ args.service, args.instances = args.service_instance.split(".", 1)
2100
+ else:
2101
+ print(PaastaColors.red(f'Use a "." to separate service and instance name'))
2102
+ return clusters_services_instances
2103
+ if args.service:
2104
+ try:
2105
+ validate_service_name(args.service, soa_dir=args.soa_dir)
2106
+ except NoSuchService:
2107
+ print(PaastaColors.red(f'The service "{args.service}" does not exist.'))
2108
+ all_services = list_services(soa_dir=args.soa_dir)
2109
+ suggestions = difflib.get_close_matches(
2110
+ args.service, all_services, n=5, cutoff=0.5
2111
+ )
2112
+ if suggestions:
2113
+ print(PaastaColors.red(f"Did you mean any of these?"))
2114
+ for suggestion in suggestions:
2115
+ print(PaastaColors.red(f" {suggestion}"))
2116
+ return clusters_services_instances
2117
+
2118
+ all_services = [args.service]
2119
+ else:
2120
+ args.service = None
2121
+ all_services = list_services(soa_dir=args.soa_dir)
2122
+ if args.service is None and args.owner is None:
2123
+ args.service = figure_out_service_name(args, soa_dir=args.soa_dir)
2124
+
2125
+ if args.clusters:
2126
+ clusters = args.clusters.split(",")
2127
+ else:
2128
+ clusters = list_clusters()
2129
+
2130
+ if args.instances:
2131
+ instances = args.instances.split(",")
2132
+ else:
2133
+ instances = None
2134
+
2135
+ filters = get_filters(args)
2136
+
2137
+ i_count = 0
2138
+ for service in all_services:
2139
+ if args.service and service != args.service:
2140
+ continue
2141
+ for instance_conf in get_instance_configs_for_service(
2142
+ service, soa_dir=args.soa_dir, clusters=clusters, instances=instances
2143
+ ):
2144
+ if all([f(instance_conf) for f in filters]):
2145
+ cluster_service = clusters_services_instances[
2146
+ instance_conf.get_cluster()
2147
+ ][service]
2148
+ cluster_service[instance_conf.get_instance()] = instance_conf.__class__
2149
+ i_count += 1
2150
+
2151
+ if i_count == 0 and args.service and args.instances:
2152
+ for service in args.service.split(","):
2153
+ verify_instances(args.instances, service, clusters)
2154
+
2155
+ return clusters_services_instances
2156
+
2157
+
2158
+ def paasta_status(args) -> int:
2159
+ """Print the status of a Yelp service running on PaaSTA.
2160
+ :param args: argparse.Namespace obj created from sys.args by cli"""
2161
+ soa_dir = args.soa_dir
2162
+ system_paasta_config = load_system_paasta_config()
2163
+
2164
+ return_codes = [0]
2165
+ lock = Lock()
2166
+ tasks = []
2167
+ clusters_services_instances = apply_args_filters(args)
2168
+ for cluster, service_instances in clusters_services_instances.items():
2169
+ for service, instances in service_instances.items():
2170
+ all_flink = all((i in FLINK_DEPLOYMENT_CONFIGS) for i in instances.values())
2171
+ actual_deployments: Mapping[str, DeploymentVersion]
2172
+ if all_flink:
2173
+ actual_deployments = {}
2174
+ else:
2175
+ actual_deployments = get_actual_deployments(service, soa_dir)
2176
+ if all_flink or actual_deployments:
2177
+ deploy_pipeline = list(get_planned_deployments(service, soa_dir))
2178
+ new = _use_new_paasta_status(args, system_paasta_config)
2179
+ tasks.append(
2180
+ (
2181
+ report_status_for_cluster,
2182
+ dict(
2183
+ service=service,
2184
+ cluster=cluster,
2185
+ deploy_pipeline=deploy_pipeline,
2186
+ actual_deployments=actual_deployments,
2187
+ instance_whitelist=instances,
2188
+ system_paasta_config=system_paasta_config,
2189
+ lock=lock,
2190
+ verbose=args.verbose,
2191
+ new=new,
2192
+ all_namespaces=args.all_namespaces,
2193
+ ),
2194
+ )
2195
+ )
2196
+ else:
2197
+ print(missing_deployments_message(service))
2198
+ return_codes.append(1)
2199
+
2200
+ with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
2201
+ tasks = [executor.submit(t[0], **t[1]) for t in tasks] # type: ignore
2202
+ try:
2203
+ for future in concurrent.futures.as_completed(tasks): # type: ignore
2204
+ return_code, output = future.result()
2205
+ return_codes.append(return_code)
2206
+ except KeyboardInterrupt:
2207
+ # ideally we wouldn't need to reach into `ThreadPoolExecutor`
2208
+ # internals, but so far this is the best way to stop all these
2209
+ # threads until a public interface is added
2210
+ executor._threads.clear() # type: ignore
2211
+ concurrent.futures.thread._threads_queues.clear() # type: ignore
2212
+ raise KeyboardInterrupt
2213
+
2214
+ return max(return_codes)
2215
+
2216
+
2217
+ def bouncing_status_human(app_count, bounce_method):
2218
+ if app_count == 0:
2219
+ return PaastaColors.red("Disabled")
2220
+ elif app_count == 1:
2221
+ return PaastaColors.green("Configured")
2222
+ elif app_count > 1:
2223
+ return PaastaColors.yellow("Bouncing (%s)" % bounce_method)
2224
+ else:
2225
+ return PaastaColors.red("Unknown (count: %s)" % app_count)
2226
+
2227
+
2228
+ def desired_state_human(desired_state, instances):
2229
+ if desired_state == "start" and instances != 0:
2230
+ return PaastaColors.bold("Started")
2231
+ elif desired_state == "start" and instances == 0:
2232
+ return PaastaColors.bold("Stopped")
2233
+ elif desired_state == "stop":
2234
+ return PaastaColors.red("Stopped")
2235
+ else:
2236
+ return PaastaColors.red("Unknown (desired_state: %s)" % desired_state)
2237
+
2238
+
2239
+ class BackendType(Enum):
2240
+ ENVOY = "Envoy"
2241
+ HAPROXY = "haproxy"
2242
+
2243
+
2244
+ def envoy_backend_report(normal_instance_count: int, up_backends: int) -> str:
2245
+ return _backend_report(normal_instance_count, up_backends, BackendType.ENVOY)
2246
+
2247
+
2248
+ def haproxy_backend_report(normal_instance_count: int, up_backends: int) -> str:
2249
+ return _backend_report(normal_instance_count, up_backends, BackendType.HAPROXY)
2250
+
2251
+
2252
+ def _backend_report(
2253
+ normal_instance_count: int, up_backends: int, system_name: BackendType
2254
+ ) -> str:
2255
+ """Given that a service is in smartstack, this returns a human readable
2256
+ report of the up backends"""
2257
+ # TODO: Take into account a configurable threshold, PAASTA-1102
2258
+ crit_threshold = 50
2259
+ under_replicated, ratio = is_under_replicated(
2260
+ num_available=up_backends,
2261
+ expected_count=normal_instance_count,
2262
+ crit_threshold=crit_threshold,
2263
+ )
2264
+ if under_replicated:
2265
+ status = PaastaColors.red("Critical")
2266
+ count = PaastaColors.red(
2267
+ "(%d/%d, %d%%)" % (up_backends, normal_instance_count, ratio)
2268
+ )
2269
+ else:
2270
+ status = PaastaColors.green("Healthy")
2271
+ count = PaastaColors.green("(%d/%d)" % (up_backends, normal_instance_count))
2272
+ up_string = PaastaColors.bold("UP")
2273
+ return f"{status} - in {system_name} with {count} total backends {up_string} in this namespace."
2274
+
2275
+
2276
+ def _use_new_paasta_status(args, system_paasta_config) -> bool:
2277
+ if args.new:
2278
+ return True
2279
+ elif args.old:
2280
+ return False
2281
+ else:
2282
+ if system_paasta_config.get_paasta_status_version() == "old":
2283
+ return False
2284
+ elif system_paasta_config.get_paasta_status_version() == "new":
2285
+ return True
2286
+ else:
2287
+ return True
2288
+
2289
+
2290
+ # Add other custom status writers here
2291
+ # See `print_tron_status` for reference
2292
+ INSTANCE_TYPE_WRITERS: Mapping[str, InstanceStatusWriter] = defaultdict(
2293
+ kubernetes=print_kubernetes_status,
2294
+ kubernetes_v2=print_kubernetes_status_v2,
2295
+ eks=print_kubernetes_status,
2296
+ tron=print_tron_status,
2297
+ adhoc=print_adhoc_status,
2298
+ flink=print_flink_status,
2299
+ flinkeks=print_flinkeks_status,
2300
+ kafkacluster=print_kafka_status,
2301
+ cassandracluster=print_cassandra_status,
2302
+ )