paasta-tools 1.21.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (348) hide show
  1. k8s_itests/__init__.py +0 -0
  2. k8s_itests/test_autoscaling.py +23 -0
  3. k8s_itests/utils.py +38 -0
  4. paasta_tools/__init__.py +20 -0
  5. paasta_tools/adhoc_tools.py +142 -0
  6. paasta_tools/api/__init__.py +13 -0
  7. paasta_tools/api/api.py +330 -0
  8. paasta_tools/api/api_docs/swagger.json +2323 -0
  9. paasta_tools/api/client.py +106 -0
  10. paasta_tools/api/settings.py +33 -0
  11. paasta_tools/api/tweens/__init__.py +6 -0
  12. paasta_tools/api/tweens/auth.py +125 -0
  13. paasta_tools/api/tweens/profiling.py +108 -0
  14. paasta_tools/api/tweens/request_logger.py +124 -0
  15. paasta_tools/api/views/__init__.py +13 -0
  16. paasta_tools/api/views/autoscaler.py +100 -0
  17. paasta_tools/api/views/exception.py +45 -0
  18. paasta_tools/api/views/flink.py +73 -0
  19. paasta_tools/api/views/instance.py +395 -0
  20. paasta_tools/api/views/pause_autoscaler.py +71 -0
  21. paasta_tools/api/views/remote_run.py +113 -0
  22. paasta_tools/api/views/resources.py +76 -0
  23. paasta_tools/api/views/service.py +35 -0
  24. paasta_tools/api/views/version.py +25 -0
  25. paasta_tools/apply_external_resources.py +79 -0
  26. paasta_tools/async_utils.py +109 -0
  27. paasta_tools/autoscaling/__init__.py +0 -0
  28. paasta_tools/autoscaling/autoscaling_service_lib.py +57 -0
  29. paasta_tools/autoscaling/forecasting.py +106 -0
  30. paasta_tools/autoscaling/max_all_k8s_services.py +41 -0
  31. paasta_tools/autoscaling/pause_service_autoscaler.py +77 -0
  32. paasta_tools/autoscaling/utils.py +52 -0
  33. paasta_tools/bounce_lib.py +184 -0
  34. paasta_tools/broadcast_log_to_services.py +62 -0
  35. paasta_tools/cassandracluster_tools.py +210 -0
  36. paasta_tools/check_autoscaler_max_instances.py +212 -0
  37. paasta_tools/check_cassandracluster_services_replication.py +35 -0
  38. paasta_tools/check_flink_services_health.py +203 -0
  39. paasta_tools/check_kubernetes_api.py +57 -0
  40. paasta_tools/check_kubernetes_services_replication.py +141 -0
  41. paasta_tools/check_oom_events.py +244 -0
  42. paasta_tools/check_services_replication_tools.py +324 -0
  43. paasta_tools/check_spark_jobs.py +234 -0
  44. paasta_tools/cleanup_kubernetes_cr.py +138 -0
  45. paasta_tools/cleanup_kubernetes_crd.py +145 -0
  46. paasta_tools/cleanup_kubernetes_jobs.py +344 -0
  47. paasta_tools/cleanup_tron_namespaces.py +96 -0
  48. paasta_tools/cli/__init__.py +13 -0
  49. paasta_tools/cli/authentication.py +85 -0
  50. paasta_tools/cli/cli.py +260 -0
  51. paasta_tools/cli/cmds/__init__.py +13 -0
  52. paasta_tools/cli/cmds/autoscale.py +143 -0
  53. paasta_tools/cli/cmds/check.py +334 -0
  54. paasta_tools/cli/cmds/cook_image.py +147 -0
  55. paasta_tools/cli/cmds/get_docker_image.py +76 -0
  56. paasta_tools/cli/cmds/get_image_version.py +172 -0
  57. paasta_tools/cli/cmds/get_latest_deployment.py +93 -0
  58. paasta_tools/cli/cmds/info.py +155 -0
  59. paasta_tools/cli/cmds/itest.py +117 -0
  60. paasta_tools/cli/cmds/list.py +66 -0
  61. paasta_tools/cli/cmds/list_clusters.py +42 -0
  62. paasta_tools/cli/cmds/list_deploy_queue.py +171 -0
  63. paasta_tools/cli/cmds/list_namespaces.py +84 -0
  64. paasta_tools/cli/cmds/local_run.py +1396 -0
  65. paasta_tools/cli/cmds/logs.py +1601 -0
  66. paasta_tools/cli/cmds/mark_for_deployment.py +1988 -0
  67. paasta_tools/cli/cmds/mesh_status.py +174 -0
  68. paasta_tools/cli/cmds/pause_service_autoscaler.py +107 -0
  69. paasta_tools/cli/cmds/push_to_registry.py +275 -0
  70. paasta_tools/cli/cmds/remote_run.py +252 -0
  71. paasta_tools/cli/cmds/rollback.py +347 -0
  72. paasta_tools/cli/cmds/secret.py +549 -0
  73. paasta_tools/cli/cmds/security_check.py +59 -0
  74. paasta_tools/cli/cmds/spark_run.py +1400 -0
  75. paasta_tools/cli/cmds/start_stop_restart.py +401 -0
  76. paasta_tools/cli/cmds/status.py +2302 -0
  77. paasta_tools/cli/cmds/validate.py +1012 -0
  78. paasta_tools/cli/cmds/wait_for_deployment.py +275 -0
  79. paasta_tools/cli/fsm/__init__.py +13 -0
  80. paasta_tools/cli/fsm/autosuggest.py +82 -0
  81. paasta_tools/cli/fsm/template/README.md +8 -0
  82. paasta_tools/cli/fsm/template/cookiecutter.json +7 -0
  83. paasta_tools/cli/fsm/template/{{cookiecutter.service}}/kubernetes-PROD.yaml +91 -0
  84. paasta_tools/cli/fsm/template/{{cookiecutter.service}}/monitoring.yaml +20 -0
  85. paasta_tools/cli/fsm/template/{{cookiecutter.service}}/service.yaml +8 -0
  86. paasta_tools/cli/fsm/template/{{cookiecutter.service}}/smartstack.yaml +6 -0
  87. paasta_tools/cli/fsm_cmd.py +121 -0
  88. paasta_tools/cli/paasta_tabcomplete.sh +23 -0
  89. paasta_tools/cli/schemas/adhoc_schema.json +199 -0
  90. paasta_tools/cli/schemas/autoscaling_schema.json +91 -0
  91. paasta_tools/cli/schemas/autotuned_defaults/cassandracluster_schema.json +37 -0
  92. paasta_tools/cli/schemas/autotuned_defaults/kubernetes_schema.json +89 -0
  93. paasta_tools/cli/schemas/deploy_schema.json +173 -0
  94. paasta_tools/cli/schemas/eks_schema.json +970 -0
  95. paasta_tools/cli/schemas/kubernetes_schema.json +970 -0
  96. paasta_tools/cli/schemas/rollback_schema.json +160 -0
  97. paasta_tools/cli/schemas/service_schema.json +25 -0
  98. paasta_tools/cli/schemas/smartstack_schema.json +322 -0
  99. paasta_tools/cli/schemas/tron_schema.json +699 -0
  100. paasta_tools/cli/utils.py +1118 -0
  101. paasta_tools/clusterman.py +21 -0
  102. paasta_tools/config_utils.py +385 -0
  103. paasta_tools/contrib/__init__.py +0 -0
  104. paasta_tools/contrib/bounce_log_latency_parser.py +68 -0
  105. paasta_tools/contrib/check_manual_oapi_changes.sh +24 -0
  106. paasta_tools/contrib/check_orphans.py +306 -0
  107. paasta_tools/contrib/create_dynamodb_table.py +35 -0
  108. paasta_tools/contrib/create_paasta_playground.py +105 -0
  109. paasta_tools/contrib/emit_allocated_cpu_metrics.py +50 -0
  110. paasta_tools/contrib/get_running_task_allocation.py +346 -0
  111. paasta_tools/contrib/habitat_fixer.py +86 -0
  112. paasta_tools/contrib/ide_helper.py +316 -0
  113. paasta_tools/contrib/is_pod_healthy_in_proxy.py +139 -0
  114. paasta_tools/contrib/is_pod_healthy_in_smartstack.py +50 -0
  115. paasta_tools/contrib/kill_bad_containers.py +109 -0
  116. paasta_tools/contrib/mass-deploy-tag.sh +44 -0
  117. paasta_tools/contrib/mock_patch_checker.py +86 -0
  118. paasta_tools/contrib/paasta_update_soa_memcpu.py +520 -0
  119. paasta_tools/contrib/render_template.py +129 -0
  120. paasta_tools/contrib/rightsizer_soaconfigs_update.py +348 -0
  121. paasta_tools/contrib/service_shard_remove.py +157 -0
  122. paasta_tools/contrib/service_shard_update.py +373 -0
  123. paasta_tools/contrib/shared_ip_check.py +77 -0
  124. paasta_tools/contrib/timeouts_metrics_prom.py +64 -0
  125. paasta_tools/delete_kubernetes_deployments.py +89 -0
  126. paasta_tools/deployment_utils.py +44 -0
  127. paasta_tools/docker_wrapper.py +234 -0
  128. paasta_tools/docker_wrapper_imports.py +13 -0
  129. paasta_tools/drain_lib.py +351 -0
  130. paasta_tools/dump_locally_running_services.py +71 -0
  131. paasta_tools/eks_tools.py +119 -0
  132. paasta_tools/envoy_tools.py +373 -0
  133. paasta_tools/firewall.py +504 -0
  134. paasta_tools/firewall_logging.py +154 -0
  135. paasta_tools/firewall_update.py +172 -0
  136. paasta_tools/flink_tools.py +345 -0
  137. paasta_tools/flinkeks_tools.py +90 -0
  138. paasta_tools/frameworks/__init__.py +0 -0
  139. paasta_tools/frameworks/adhoc_scheduler.py +71 -0
  140. paasta_tools/frameworks/constraints.py +87 -0
  141. paasta_tools/frameworks/native_scheduler.py +652 -0
  142. paasta_tools/frameworks/native_service_config.py +301 -0
  143. paasta_tools/frameworks/task_store.py +245 -0
  144. paasta_tools/generate_all_deployments +9 -0
  145. paasta_tools/generate_authenticating_services.py +94 -0
  146. paasta_tools/generate_deployments_for_service.py +255 -0
  147. paasta_tools/generate_services_file.py +114 -0
  148. paasta_tools/generate_services_yaml.py +30 -0
  149. paasta_tools/hacheck.py +76 -0
  150. paasta_tools/instance/__init__.py +0 -0
  151. paasta_tools/instance/hpa_metrics_parser.py +122 -0
  152. paasta_tools/instance/kubernetes.py +1362 -0
  153. paasta_tools/iptables.py +240 -0
  154. paasta_tools/kafkacluster_tools.py +143 -0
  155. paasta_tools/kubernetes/__init__.py +0 -0
  156. paasta_tools/kubernetes/application/__init__.py +0 -0
  157. paasta_tools/kubernetes/application/controller_wrappers.py +476 -0
  158. paasta_tools/kubernetes/application/tools.py +90 -0
  159. paasta_tools/kubernetes/bin/__init__.py +0 -0
  160. paasta_tools/kubernetes/bin/kubernetes_remove_evicted_pods.py +164 -0
  161. paasta_tools/kubernetes/bin/paasta_cleanup_remote_run_resources.py +135 -0
  162. paasta_tools/kubernetes/bin/paasta_cleanup_stale_nodes.py +181 -0
  163. paasta_tools/kubernetes/bin/paasta_secrets_sync.py +758 -0
  164. paasta_tools/kubernetes/remote_run.py +558 -0
  165. paasta_tools/kubernetes_tools.py +4679 -0
  166. paasta_tools/list_kubernetes_service_instances.py +128 -0
  167. paasta_tools/list_tron_namespaces.py +60 -0
  168. paasta_tools/long_running_service_tools.py +678 -0
  169. paasta_tools/mac_address.py +44 -0
  170. paasta_tools/marathon_dashboard.py +0 -0
  171. paasta_tools/mesos/__init__.py +0 -0
  172. paasta_tools/mesos/cfg.py +46 -0
  173. paasta_tools/mesos/cluster.py +60 -0
  174. paasta_tools/mesos/exceptions.py +59 -0
  175. paasta_tools/mesos/framework.py +77 -0
  176. paasta_tools/mesos/log.py +48 -0
  177. paasta_tools/mesos/master.py +306 -0
  178. paasta_tools/mesos/mesos_file.py +169 -0
  179. paasta_tools/mesos/parallel.py +52 -0
  180. paasta_tools/mesos/slave.py +115 -0
  181. paasta_tools/mesos/task.py +94 -0
  182. paasta_tools/mesos/util.py +69 -0
  183. paasta_tools/mesos/zookeeper.py +37 -0
  184. paasta_tools/mesos_maintenance.py +848 -0
  185. paasta_tools/mesos_tools.py +1051 -0
  186. paasta_tools/metrics/__init__.py +0 -0
  187. paasta_tools/metrics/metastatus_lib.py +1110 -0
  188. paasta_tools/metrics/metrics_lib.py +217 -0
  189. paasta_tools/monitoring/__init__.py +13 -0
  190. paasta_tools/monitoring/check_k8s_api_performance.py +110 -0
  191. paasta_tools/monitoring_tools.py +652 -0
  192. paasta_tools/monkrelaycluster_tools.py +146 -0
  193. paasta_tools/nrtsearchservice_tools.py +143 -0
  194. paasta_tools/nrtsearchserviceeks_tools.py +68 -0
  195. paasta_tools/oom_logger.py +321 -0
  196. paasta_tools/paasta_deploy_tron_jobs +3 -0
  197. paasta_tools/paasta_execute_docker_command.py +123 -0
  198. paasta_tools/paasta_native_serviceinit.py +21 -0
  199. paasta_tools/paasta_service_config_loader.py +201 -0
  200. paasta_tools/paastaapi/__init__.py +29 -0
  201. paasta_tools/paastaapi/api/__init__.py +3 -0
  202. paasta_tools/paastaapi/api/autoscaler_api.py +302 -0
  203. paasta_tools/paastaapi/api/default_api.py +569 -0
  204. paasta_tools/paastaapi/api/remote_run_api.py +604 -0
  205. paasta_tools/paastaapi/api/resources_api.py +157 -0
  206. paasta_tools/paastaapi/api/service_api.py +1736 -0
  207. paasta_tools/paastaapi/api_client.py +818 -0
  208. paasta_tools/paastaapi/apis/__init__.py +22 -0
  209. paasta_tools/paastaapi/configuration.py +455 -0
  210. paasta_tools/paastaapi/exceptions.py +137 -0
  211. paasta_tools/paastaapi/model/__init__.py +5 -0
  212. paasta_tools/paastaapi/model/adhoc_launch_history.py +176 -0
  213. paasta_tools/paastaapi/model/autoscaler_count_msg.py +176 -0
  214. paasta_tools/paastaapi/model/deploy_queue.py +178 -0
  215. paasta_tools/paastaapi/model/deploy_queue_service_instance.py +194 -0
  216. paasta_tools/paastaapi/model/envoy_backend.py +185 -0
  217. paasta_tools/paastaapi/model/envoy_location.py +184 -0
  218. paasta_tools/paastaapi/model/envoy_status.py +181 -0
  219. paasta_tools/paastaapi/model/flink_cluster_overview.py +188 -0
  220. paasta_tools/paastaapi/model/flink_config.py +173 -0
  221. paasta_tools/paastaapi/model/flink_job.py +186 -0
  222. paasta_tools/paastaapi/model/flink_job_details.py +192 -0
  223. paasta_tools/paastaapi/model/flink_jobs.py +175 -0
  224. paasta_tools/paastaapi/model/float_and_error.py +173 -0
  225. paasta_tools/paastaapi/model/hpa_metric.py +176 -0
  226. paasta_tools/paastaapi/model/inline_object.py +170 -0
  227. paasta_tools/paastaapi/model/inline_response200.py +170 -0
  228. paasta_tools/paastaapi/model/inline_response2001.py +170 -0
  229. paasta_tools/paastaapi/model/instance_bounce_status.py +200 -0
  230. paasta_tools/paastaapi/model/instance_mesh_status.py +186 -0
  231. paasta_tools/paastaapi/model/instance_status.py +220 -0
  232. paasta_tools/paastaapi/model/instance_status_adhoc.py +187 -0
  233. paasta_tools/paastaapi/model/instance_status_cassandracluster.py +173 -0
  234. paasta_tools/paastaapi/model/instance_status_flink.py +173 -0
  235. paasta_tools/paastaapi/model/instance_status_kafkacluster.py +173 -0
  236. paasta_tools/paastaapi/model/instance_status_kubernetes.py +263 -0
  237. paasta_tools/paastaapi/model/instance_status_kubernetes_autoscaling_status.py +187 -0
  238. paasta_tools/paastaapi/model/instance_status_kubernetes_v2.py +197 -0
  239. paasta_tools/paastaapi/model/instance_status_tron.py +204 -0
  240. paasta_tools/paastaapi/model/instance_tasks.py +182 -0
  241. paasta_tools/paastaapi/model/integer_and_error.py +173 -0
  242. paasta_tools/paastaapi/model/kubernetes_container.py +178 -0
  243. paasta_tools/paastaapi/model/kubernetes_container_v2.py +219 -0
  244. paasta_tools/paastaapi/model/kubernetes_healthcheck.py +176 -0
  245. paasta_tools/paastaapi/model/kubernetes_pod.py +201 -0
  246. paasta_tools/paastaapi/model/kubernetes_pod_event.py +176 -0
  247. paasta_tools/paastaapi/model/kubernetes_pod_v2.py +213 -0
  248. paasta_tools/paastaapi/model/kubernetes_replica_set.py +185 -0
  249. paasta_tools/paastaapi/model/kubernetes_version.py +202 -0
  250. paasta_tools/paastaapi/model/remote_run_outcome.py +189 -0
  251. paasta_tools/paastaapi/model/remote_run_start.py +185 -0
  252. paasta_tools/paastaapi/model/remote_run_stop.py +176 -0
  253. paasta_tools/paastaapi/model/remote_run_token.py +173 -0
  254. paasta_tools/paastaapi/model/resource.py +187 -0
  255. paasta_tools/paastaapi/model/resource_item.py +187 -0
  256. paasta_tools/paastaapi/model/resource_value.py +176 -0
  257. paasta_tools/paastaapi/model/smartstack_backend.py +191 -0
  258. paasta_tools/paastaapi/model/smartstack_location.py +181 -0
  259. paasta_tools/paastaapi/model/smartstack_status.py +181 -0
  260. paasta_tools/paastaapi/model/task_tail_lines.py +176 -0
  261. paasta_tools/paastaapi/model_utils.py +1879 -0
  262. paasta_tools/paastaapi/models/__init__.py +62 -0
  263. paasta_tools/paastaapi/rest.py +287 -0
  264. paasta_tools/prune_completed_pods.py +220 -0
  265. paasta_tools/puppet_service_tools.py +59 -0
  266. paasta_tools/py.typed +1 -0
  267. paasta_tools/remote_git.py +127 -0
  268. paasta_tools/run-paasta-api-in-dev-mode.py +57 -0
  269. paasta_tools/run-paasta-api-playground.py +51 -0
  270. paasta_tools/secret_providers/__init__.py +66 -0
  271. paasta_tools/secret_providers/vault.py +214 -0
  272. paasta_tools/secret_tools.py +277 -0
  273. paasta_tools/setup_istio_mesh.py +353 -0
  274. paasta_tools/setup_kubernetes_cr.py +412 -0
  275. paasta_tools/setup_kubernetes_crd.py +138 -0
  276. paasta_tools/setup_kubernetes_internal_crd.py +154 -0
  277. paasta_tools/setup_kubernetes_job.py +353 -0
  278. paasta_tools/setup_prometheus_adapter_config.py +1028 -0
  279. paasta_tools/setup_tron_namespace.py +248 -0
  280. paasta_tools/slack.py +75 -0
  281. paasta_tools/smartstack_tools.py +676 -0
  282. paasta_tools/spark_tools.py +283 -0
  283. paasta_tools/synapse_srv_namespaces_fact.py +42 -0
  284. paasta_tools/tron/__init__.py +0 -0
  285. paasta_tools/tron/client.py +158 -0
  286. paasta_tools/tron/tron_command_context.py +194 -0
  287. paasta_tools/tron/tron_timeutils.py +101 -0
  288. paasta_tools/tron_tools.py +1448 -0
  289. paasta_tools/utils.py +4307 -0
  290. paasta_tools/yaml_tools.py +44 -0
  291. paasta_tools-1.21.3.data/scripts/apply_external_resources.py +79 -0
  292. paasta_tools-1.21.3.data/scripts/bounce_log_latency_parser.py +68 -0
  293. paasta_tools-1.21.3.data/scripts/check_autoscaler_max_instances.py +212 -0
  294. paasta_tools-1.21.3.data/scripts/check_cassandracluster_services_replication.py +35 -0
  295. paasta_tools-1.21.3.data/scripts/check_flink_services_health.py +203 -0
  296. paasta_tools-1.21.3.data/scripts/check_kubernetes_api.py +57 -0
  297. paasta_tools-1.21.3.data/scripts/check_kubernetes_services_replication.py +141 -0
  298. paasta_tools-1.21.3.data/scripts/check_manual_oapi_changes.sh +24 -0
  299. paasta_tools-1.21.3.data/scripts/check_oom_events.py +244 -0
  300. paasta_tools-1.21.3.data/scripts/check_orphans.py +306 -0
  301. paasta_tools-1.21.3.data/scripts/check_spark_jobs.py +234 -0
  302. paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_cr.py +138 -0
  303. paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_crd.py +145 -0
  304. paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_jobs.py +344 -0
  305. paasta_tools-1.21.3.data/scripts/create_dynamodb_table.py +35 -0
  306. paasta_tools-1.21.3.data/scripts/create_paasta_playground.py +105 -0
  307. paasta_tools-1.21.3.data/scripts/delete_kubernetes_deployments.py +89 -0
  308. paasta_tools-1.21.3.data/scripts/emit_allocated_cpu_metrics.py +50 -0
  309. paasta_tools-1.21.3.data/scripts/generate_all_deployments +9 -0
  310. paasta_tools-1.21.3.data/scripts/generate_authenticating_services.py +94 -0
  311. paasta_tools-1.21.3.data/scripts/generate_deployments_for_service.py +255 -0
  312. paasta_tools-1.21.3.data/scripts/generate_services_file.py +114 -0
  313. paasta_tools-1.21.3.data/scripts/generate_services_yaml.py +30 -0
  314. paasta_tools-1.21.3.data/scripts/get_running_task_allocation.py +346 -0
  315. paasta_tools-1.21.3.data/scripts/habitat_fixer.py +86 -0
  316. paasta_tools-1.21.3.data/scripts/ide_helper.py +316 -0
  317. paasta_tools-1.21.3.data/scripts/is_pod_healthy_in_proxy.py +139 -0
  318. paasta_tools-1.21.3.data/scripts/is_pod_healthy_in_smartstack.py +50 -0
  319. paasta_tools-1.21.3.data/scripts/kill_bad_containers.py +109 -0
  320. paasta_tools-1.21.3.data/scripts/kubernetes_remove_evicted_pods.py +164 -0
  321. paasta_tools-1.21.3.data/scripts/mass-deploy-tag.sh +44 -0
  322. paasta_tools-1.21.3.data/scripts/mock_patch_checker.py +86 -0
  323. paasta_tools-1.21.3.data/scripts/paasta_cleanup_remote_run_resources.py +135 -0
  324. paasta_tools-1.21.3.data/scripts/paasta_cleanup_stale_nodes.py +181 -0
  325. paasta_tools-1.21.3.data/scripts/paasta_deploy_tron_jobs +3 -0
  326. paasta_tools-1.21.3.data/scripts/paasta_execute_docker_command.py +123 -0
  327. paasta_tools-1.21.3.data/scripts/paasta_secrets_sync.py +758 -0
  328. paasta_tools-1.21.3.data/scripts/paasta_tabcomplete.sh +23 -0
  329. paasta_tools-1.21.3.data/scripts/paasta_update_soa_memcpu.py +520 -0
  330. paasta_tools-1.21.3.data/scripts/render_template.py +129 -0
  331. paasta_tools-1.21.3.data/scripts/rightsizer_soaconfigs_update.py +348 -0
  332. paasta_tools-1.21.3.data/scripts/service_shard_remove.py +157 -0
  333. paasta_tools-1.21.3.data/scripts/service_shard_update.py +373 -0
  334. paasta_tools-1.21.3.data/scripts/setup_istio_mesh.py +353 -0
  335. paasta_tools-1.21.3.data/scripts/setup_kubernetes_cr.py +412 -0
  336. paasta_tools-1.21.3.data/scripts/setup_kubernetes_crd.py +138 -0
  337. paasta_tools-1.21.3.data/scripts/setup_kubernetes_internal_crd.py +154 -0
  338. paasta_tools-1.21.3.data/scripts/setup_kubernetes_job.py +353 -0
  339. paasta_tools-1.21.3.data/scripts/setup_prometheus_adapter_config.py +1028 -0
  340. paasta_tools-1.21.3.data/scripts/shared_ip_check.py +77 -0
  341. paasta_tools-1.21.3.data/scripts/synapse_srv_namespaces_fact.py +42 -0
  342. paasta_tools-1.21.3.data/scripts/timeouts_metrics_prom.py +64 -0
  343. paasta_tools-1.21.3.dist-info/LICENSE +201 -0
  344. paasta_tools-1.21.3.dist-info/METADATA +74 -0
  345. paasta_tools-1.21.3.dist-info/RECORD +348 -0
  346. paasta_tools-1.21.3.dist-info/WHEEL +5 -0
  347. paasta_tools-1.21.3.dist-info/entry_points.txt +20 -0
  348. paasta_tools-1.21.3.dist-info/top_level.txt +2 -0
@@ -0,0 +1,1362 @@
1
+ import asyncio
2
+ from collections import defaultdict
3
+ from enum import Enum
4
+ from typing import Any
5
+ from typing import DefaultDict
6
+ from typing import Dict
7
+ from typing import Iterable
8
+ from typing import List
9
+ from typing import Mapping
10
+ from typing import MutableMapping
11
+ from typing import Optional
12
+ from typing import Sequence
13
+ from typing import Set
14
+ from typing import Tuple
15
+ from typing import Union
16
+
17
+ import a_sync
18
+ import pytz
19
+ from kubernetes.client import V1Container
20
+ from kubernetes.client import V1ControllerRevision
21
+ from kubernetes.client import V1Pod
22
+ from kubernetes.client import V1Probe
23
+ from kubernetes.client import V1ReplicaSet
24
+ from kubernetes.client.rest import ApiException
25
+ from mypy_extensions import TypedDict
26
+
27
+ from paasta_tools import cassandracluster_tools
28
+ from paasta_tools import eks_tools
29
+ from paasta_tools import envoy_tools
30
+ from paasta_tools import flink_tools
31
+ from paasta_tools import kafkacluster_tools
32
+ from paasta_tools import kubernetes_tools
33
+ from paasta_tools import monkrelaycluster_tools
34
+ from paasta_tools import nrtsearchservice_tools
35
+ from paasta_tools import smartstack_tools
36
+ from paasta_tools.cli.utils import LONG_RUNNING_INSTANCE_TYPE_HANDLERS
37
+ from paasta_tools.instance.hpa_metrics_parser import HPAMetricsDict
38
+ from paasta_tools.instance.hpa_metrics_parser import HPAMetricsParser
39
+ from paasta_tools.kubernetes_tools import get_pod_event_messages
40
+ from paasta_tools.kubernetes_tools import get_tail_lines_for_kubernetes_container
41
+ from paasta_tools.kubernetes_tools import KubernetesDeploymentConfig
42
+ from paasta_tools.kubernetes_tools import paasta_prefixed
43
+ from paasta_tools.long_running_service_tools import (
44
+ get_expected_instance_count_for_namespace,
45
+ )
46
+ from paasta_tools.long_running_service_tools import LongRunningServiceConfig
47
+ from paasta_tools.long_running_service_tools import ServiceNamespaceConfig
48
+ from paasta_tools.smartstack_tools import KubeSmartstackEnvoyReplicationChecker
49
+ from paasta_tools.smartstack_tools import match_backends_and_pods
50
+ from paasta_tools.utils import calculate_tail_lines
51
+
52
+
53
+ INSTANCE_TYPES_CR = {
54
+ "flink",
55
+ "flinkeks",
56
+ "cassandracluster",
57
+ "kafkacluster",
58
+ }
59
+ INSTANCE_TYPES_K8S = {
60
+ "cassandracluster",
61
+ "eks",
62
+ "kubernetes",
63
+ }
64
+ INSTANCE_TYPES = INSTANCE_TYPES_K8S.union(INSTANCE_TYPES_CR)
65
+
66
+ INSTANCE_TYPES_WITH_SET_STATE = {"flink", "flinkeks"}
67
+ INSTANCE_TYPE_CR_ID = dict(
68
+ flink=flink_tools.cr_id,
69
+ flinkeks=flink_tools.cr_id,
70
+ cassandracluster=cassandracluster_tools.cr_id,
71
+ kafkacluster=kafkacluster_tools.cr_id,
72
+ nrtsearchservice=nrtsearchservice_tools.cr_id,
73
+ nrtsearchserviceeks=nrtsearchservice_tools.cr_id,
74
+ monkrelaycluster=monkrelaycluster_tools.cr_id,
75
+ )
76
+
77
+
78
+ class ServiceMesh(Enum):
79
+ SMARTSTACK = "smartstack"
80
+ ENVOY = "envoy"
81
+
82
+
83
+ class KubernetesAutoscalingStatusDict(TypedDict):
84
+ min_instances: int
85
+ max_instances: int
86
+ metrics: List
87
+ desired_replicas: int
88
+ last_scale_time: str
89
+
90
+
91
+ class KubernetesVersionDict(TypedDict, total=False):
92
+ name: str
93
+ type: str
94
+ replicas: int
95
+ ready_replicas: int
96
+ create_timestamp: int
97
+ git_sha: str
98
+ image_version: Optional[str]
99
+ config_sha: str
100
+ pods: Sequence[Mapping[str, Any]]
101
+ namespace: str
102
+
103
+
104
+ def cr_id(service: str, instance: str, instance_type: str) -> Mapping[str, str]:
105
+ cr_id_fn = INSTANCE_TYPE_CR_ID.get(instance_type)
106
+ if not cr_id_fn:
107
+ raise RuntimeError(f"Unknown instance type {instance_type}")
108
+ return cr_id_fn(service, instance)
109
+
110
+
111
+ def can_handle(instance_type: str) -> bool:
112
+ return instance_type in INSTANCE_TYPES
113
+
114
+
115
+ def can_set_state(instance_type: str) -> bool:
116
+ return instance_type in INSTANCE_TYPES_WITH_SET_STATE
117
+
118
+
119
+ def set_cr_desired_state(
120
+ kube_client: kubernetes_tools.KubeClient,
121
+ service: str,
122
+ instance: str,
123
+ instance_type: str,
124
+ desired_state: str,
125
+ ) -> None:
126
+ try:
127
+ kubernetes_tools.set_cr_desired_state(
128
+ kube_client=kube_client,
129
+ cr_id=cr_id(service, instance, instance_type),
130
+ desired_state=desired_state,
131
+ )
132
+ except ApiException as e:
133
+ error_message = (
134
+ f"Error while setting state {desired_state} of "
135
+ f"{service}.{instance}: {e}"
136
+ )
137
+ raise RuntimeError(error_message)
138
+
139
+
140
+ async def autoscaling_status(
141
+ kube_client: kubernetes_tools.KubeClient,
142
+ job_config: LongRunningServiceConfig,
143
+ namespace: str,
144
+ ) -> KubernetesAutoscalingStatusDict:
145
+ hpa = await kubernetes_tools.get_hpa(
146
+ kube_client,
147
+ name=job_config.get_sanitised_deployment_name(),
148
+ namespace=namespace,
149
+ )
150
+ if hpa is None:
151
+ return KubernetesAutoscalingStatusDict(
152
+ min_instances=-1,
153
+ max_instances=-1,
154
+ metrics=[],
155
+ desired_replicas=-1,
156
+ last_scale_time="unknown (could not find HPA object)",
157
+ )
158
+
159
+ # Parse metrics sources, based on
160
+ # https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V2beta2ExternalMetricSource.md#v2beta2externalmetricsource
161
+ parser = HPAMetricsParser(hpa)
162
+
163
+ # https://github.com/python/mypy/issues/7217
164
+ metrics_by_name: DefaultDict[str, HPAMetricsDict] = defaultdict(
165
+ lambda: HPAMetricsDict()
166
+ )
167
+
168
+ if hpa.spec.metrics is not None:
169
+ for metric_spec in hpa.spec.metrics:
170
+ parsed = parser.parse_target(metric_spec)
171
+ metrics_by_name[parsed["name"]].update(parsed)
172
+
173
+ if hpa.status.current_metrics is not None:
174
+ for metric_spec in hpa.status.current_metrics:
175
+ parsed = parser.parse_current(metric_spec)
176
+ if parsed is not None:
177
+ metrics_by_name[parsed["name"]].update(parsed)
178
+
179
+ metric_stats = list(metrics_by_name.values())
180
+
181
+ last_scale_time = (
182
+ hpa.status.last_scale_time.replace(tzinfo=pytz.UTC).isoformat()
183
+ if getattr(hpa.status, "last_scale_time")
184
+ else "N/A"
185
+ )
186
+
187
+ return KubernetesAutoscalingStatusDict(
188
+ min_instances=hpa.spec.min_replicas,
189
+ max_instances=hpa.spec.max_replicas,
190
+ metrics=metric_stats,
191
+ desired_replicas=hpa.status.desired_replicas,
192
+ last_scale_time=last_scale_time,
193
+ )
194
+
195
+
196
+ async def pod_info(
197
+ pod: V1Pod,
198
+ client: kubernetes_tools.KubeClient,
199
+ num_tail_lines: int,
200
+ ) -> Dict[str, Any]:
201
+ container_statuses = pod.status.container_statuses or []
202
+ try:
203
+ pod_event_messages = await get_pod_event_messages(client, pod)
204
+ except asyncio.TimeoutError:
205
+ pod_event_messages = [{"error": "Could not fetch events for pod"}]
206
+ containers = [
207
+ dict(
208
+ name=container.name,
209
+ tail_lines=await get_tail_lines_for_kubernetes_container(
210
+ client,
211
+ pod,
212
+ container,
213
+ num_tail_lines,
214
+ ),
215
+ )
216
+ for container in container_statuses
217
+ ]
218
+ return {
219
+ "name": pod.metadata.name,
220
+ "host": kubernetes_tools.get_pod_hostname(client, pod),
221
+ "deployed_timestamp": pod.metadata.creation_timestamp.timestamp(),
222
+ "phase": pod.status.phase,
223
+ "ready": kubernetes_tools.is_pod_ready(pod),
224
+ "containers": containers,
225
+ "reason": pod.status.reason,
226
+ "message": pod.status.message,
227
+ "events": pod_event_messages,
228
+ "git_sha": pod.metadata.labels.get("paasta.yelp.com/git_sha"),
229
+ "config_sha": pod.metadata.labels.get("paasta.yelp.com/config_sha"),
230
+ }
231
+
232
+
233
+ # TODO: Cleanup
234
+ # Only used in old kubernetes_status
235
+ async def job_status(
236
+ kstatus: MutableMapping[str, Any],
237
+ client: kubernetes_tools.KubeClient,
238
+ job_config: LongRunningServiceConfig,
239
+ pod_list: Sequence[V1Pod],
240
+ replicaset_list: Sequence[V1ReplicaSet],
241
+ verbose: int,
242
+ namespace: str,
243
+ ) -> None:
244
+ app_id = job_config.get_sanitised_deployment_name()
245
+ kstatus["app_id"] = app_id
246
+ kstatus["pods"] = []
247
+ kstatus["replicasets"] = []
248
+
249
+ if verbose > 0:
250
+ num_tail_lines = calculate_tail_lines(verbose)
251
+ kstatus["pods"] = await asyncio.gather(
252
+ *[pod_info(pod, client, num_tail_lines) for pod in pod_list]
253
+ )
254
+
255
+ for replicaset in replicaset_list:
256
+ kstatus["replicasets"].append(
257
+ {
258
+ "name": replicaset.metadata.name,
259
+ "replicas": replicaset.spec.replicas,
260
+ "ready_replicas": ready_replicas_from_replicaset(replicaset),
261
+ "create_timestamp": replicaset.metadata.creation_timestamp.timestamp(),
262
+ "git_sha": replicaset.metadata.labels.get("paasta.yelp.com/git_sha"),
263
+ "config_sha": replicaset.metadata.labels.get(
264
+ "paasta.yelp.com/config_sha"
265
+ ),
266
+ }
267
+ )
268
+
269
+ kstatus["expected_instance_count"] = job_config.get_instances()
270
+
271
+ app = kubernetes_tools.get_kubernetes_app_by_name(
272
+ name=app_id, kube_client=client, namespace=namespace
273
+ )
274
+ desired_instances = (
275
+ job_config.get_instances() if job_config.get_desired_state() != "stop" else 0
276
+ )
277
+ deploy_status, message = kubernetes_tools.get_kubernetes_app_deploy_status(
278
+ app=app,
279
+ desired_instances=desired_instances,
280
+ )
281
+ kstatus["deploy_status"] = kubernetes_tools.KubernetesDeployStatus.tostring(
282
+ deploy_status
283
+ )
284
+ kstatus["deploy_status_message"] = message
285
+ kstatus["running_instance_count"] = (
286
+ app.status.ready_replicas if app.status.ready_replicas else 0
287
+ )
288
+ kstatus["create_timestamp"] = app.metadata.creation_timestamp.timestamp()
289
+ kstatus["namespace"] = app.metadata.namespace
290
+
291
+
292
+ async def get_backends_from_mesh_status(
293
+ mesh_status_task: "asyncio.Future[Dict[str, Any]]",
294
+ ) -> Set[str]:
295
+ status = await mesh_status_task
296
+ if status.get("locations"):
297
+ backends = {
298
+ be["address"]
299
+ for location in status["locations"]
300
+ for be in location.get("backends", [])
301
+ }
302
+ else:
303
+ backends = set()
304
+
305
+ return backends
306
+
307
+
308
+ async def mesh_status(
309
+ service: str,
310
+ service_mesh: ServiceMesh,
311
+ instance: str,
312
+ job_config: LongRunningServiceConfig,
313
+ service_namespace_config: ServiceNamespaceConfig,
314
+ pods_task: "asyncio.Future[V1Pod]",
315
+ settings: Any,
316
+ should_return_individual_backends: bool = False,
317
+ ) -> Mapping[str, Any]:
318
+ registration = job_config.get_registrations()[0]
319
+ instance_pool = job_config.get_pool()
320
+
321
+ async_get_nodes = a_sync.to_async(kubernetes_tools.get_all_nodes)
322
+ nodes = await async_get_nodes(settings.kubernetes_client)
323
+
324
+ replication_checker = KubeSmartstackEnvoyReplicationChecker(
325
+ nodes=nodes,
326
+ system_paasta_config=settings.system_paasta_config,
327
+ )
328
+ node_hostname_by_location = replication_checker.get_allowed_locations_and_hosts(
329
+ job_config
330
+ )
331
+
332
+ expected_smartstack_count = get_expected_instance_count_for_namespace(
333
+ service=service,
334
+ namespace=job_config.get_nerve_namespace(),
335
+ cluster=settings.cluster,
336
+ instance_type_class=KubernetesDeploymentConfig,
337
+ )
338
+ expected_count_per_location = int(
339
+ expected_smartstack_count / len(node_hostname_by_location)
340
+ )
341
+ mesh_status: MutableMapping[str, Any] = {
342
+ "registration": registration,
343
+ "expected_backends_per_location": expected_count_per_location,
344
+ "locations": [],
345
+ }
346
+
347
+ pods = await pods_task
348
+ for location, hosts in node_hostname_by_location.items():
349
+ host = replication_checker.get_hostname_in_pool(hosts, instance_pool)
350
+ if service_mesh == ServiceMesh.SMARTSTACK:
351
+ mesh_status["locations"].append(
352
+ _build_smartstack_location_dict(
353
+ synapse_host=host,
354
+ synapse_port=settings.system_paasta_config.get_synapse_port(),
355
+ synapse_haproxy_url_format=settings.system_paasta_config.get_synapse_haproxy_url_format(),
356
+ registration=registration,
357
+ pods=pods,
358
+ location=location,
359
+ should_return_individual_backends=should_return_individual_backends,
360
+ )
361
+ )
362
+ elif service_mesh == ServiceMesh.ENVOY:
363
+ mesh_status["locations"].append(
364
+ _build_envoy_location_dict(
365
+ envoy_host=host,
366
+ envoy_admin_port=settings.system_paasta_config.get_envoy_admin_port(),
367
+ envoy_admin_endpoint_format=settings.system_paasta_config.get_envoy_admin_endpoint_format(),
368
+ registration=registration,
369
+ pods=pods,
370
+ location=location,
371
+ should_return_individual_backends=should_return_individual_backends,
372
+ )
373
+ )
374
+ return mesh_status
375
+
376
+
377
+ def _build_envoy_location_dict(
378
+ envoy_host: str,
379
+ envoy_admin_port: int,
380
+ envoy_admin_endpoint_format: str,
381
+ registration: str,
382
+ pods: Iterable[V1Pod],
383
+ location: str,
384
+ should_return_individual_backends: bool,
385
+ ) -> MutableMapping[str, Any]:
386
+ backends = envoy_tools.get_backends(
387
+ registration,
388
+ envoy_host=envoy_host,
389
+ envoy_admin_port=envoy_admin_port,
390
+ envoy_admin_endpoint_format=envoy_admin_endpoint_format,
391
+ )
392
+ sorted_envoy_backends = sorted(
393
+ [
394
+ backend[0]
395
+ for _, service_backends in backends.items()
396
+ for backend in service_backends
397
+ ],
398
+ key=lambda backend: backend["eds_health_status"],
399
+ )
400
+ casper_proxied_backends = {
401
+ (backend["address"], backend["port_value"])
402
+ for _, service_backends in backends.items()
403
+ for backend, is_casper_proxied_backend in service_backends
404
+ if is_casper_proxied_backend
405
+ }
406
+
407
+ matched_envoy_backends_and_pods = envoy_tools.match_backends_and_pods(
408
+ sorted_envoy_backends,
409
+ pods,
410
+ )
411
+
412
+ return envoy_tools.build_envoy_location_dict(
413
+ location,
414
+ matched_envoy_backends_and_pods,
415
+ should_return_individual_backends,
416
+ casper_proxied_backends,
417
+ )
418
+
419
+
420
+ def _build_smartstack_location_dict(
421
+ synapse_host: str,
422
+ synapse_port: int,
423
+ synapse_haproxy_url_format: str,
424
+ registration: str,
425
+ pods: Iterable[V1Pod],
426
+ location: str,
427
+ should_return_individual_backends: bool,
428
+ ) -> MutableMapping[str, Any]:
429
+ sorted_backends = sorted(
430
+ smartstack_tools.get_backends(
431
+ registration,
432
+ synapse_host=synapse_host,
433
+ synapse_port=synapse_port,
434
+ synapse_haproxy_url_format=synapse_haproxy_url_format,
435
+ ),
436
+ key=lambda backend: backend["status"],
437
+ reverse=True, # put 'UP' backends above 'MAINT' backends
438
+ )
439
+
440
+ matched_backends_and_pods = match_backends_and_pods(sorted_backends, pods)
441
+ location_dict = smartstack_tools.build_smartstack_location_dict(
442
+ location, matched_backends_and_pods, should_return_individual_backends
443
+ )
444
+ return location_dict
445
+
446
+
447
+ def cr_status(
448
+ service: str,
449
+ instance: str,
450
+ verbose: int,
451
+ instance_type: str,
452
+ kube_client: Any,
453
+ ) -> Mapping[str, Any]:
454
+ status: MutableMapping[str, Any] = {}
455
+ cr = (
456
+ kubernetes_tools.get_cr(
457
+ kube_client=kube_client, cr_id=cr_id(service, instance, instance_type)
458
+ )
459
+ or {}
460
+ )
461
+ crstatus = cr.get("status")
462
+ metadata = cr.get("metadata")
463
+ if crstatus is not None:
464
+ status["status"] = crstatus
465
+ if metadata is not None:
466
+ status["metadata"] = metadata
467
+ return status
468
+
469
+
470
+ def filter_actually_running_replicasets(
471
+ replicaset_list: Sequence[V1ReplicaSet],
472
+ ) -> List[V1ReplicaSet]:
473
+ return [
474
+ rs
475
+ for rs in replicaset_list
476
+ if not (rs.spec.replicas == 0 and ready_replicas_from_replicaset(rs) == 0)
477
+ ]
478
+
479
+
480
+ def bounce_status(
481
+ service: str, instance: str, settings: Any, is_eks: bool = False
482
+ ) -> Dict[str, Any]:
483
+ status: Dict[str, Any] = {}
484
+ # this should be the only place where it matters that we use eks_tools.
485
+ # apart from loading config files, we should be using kubernetes_tools
486
+ # everywhere.
487
+ job_config: Union[KubernetesDeploymentConfig, eks_tools.EksDeploymentConfig]
488
+ if is_eks:
489
+ job_config = eks_tools.load_eks_service_config(
490
+ service=service,
491
+ instance=instance,
492
+ cluster=settings.cluster,
493
+ soa_dir=settings.soa_dir,
494
+ load_deployments=True,
495
+ )
496
+ else:
497
+ job_config = kubernetes_tools.load_kubernetes_service_config(
498
+ service=service,
499
+ instance=instance,
500
+ cluster=settings.cluster,
501
+ soa_dir=settings.soa_dir,
502
+ load_deployments=True,
503
+ )
504
+ expected_instance_count = job_config.get_instances()
505
+ status["expected_instance_count"] = expected_instance_count
506
+ desired_state = job_config.get_desired_state()
507
+ status["desired_state"] = desired_state
508
+
509
+ kube_client = settings.kubernetes_client
510
+ if kube_client is None:
511
+ raise RuntimeError("Could not load Kubernetes client!")
512
+
513
+ app = kubernetes_tools.get_kubernetes_app_by_name(
514
+ name=job_config.get_sanitised_deployment_name(),
515
+ kube_client=kube_client,
516
+ namespace=job_config.get_kubernetes_namespace(),
517
+ )
518
+ status["running_instance_count"] = (
519
+ app.status.ready_replicas if app.status.ready_replicas else 0
520
+ )
521
+
522
+ deploy_status, message = kubernetes_tools.get_kubernetes_app_deploy_status(
523
+ app=app,
524
+ desired_instances=(expected_instance_count if desired_state != "stop" else 0),
525
+ )
526
+ status["deploy_status"] = kubernetes_tools.KubernetesDeployStatus.tostring(
527
+ deploy_status
528
+ )
529
+
530
+ if job_config.get_persistent_volumes():
531
+ version_objects = a_sync.block(
532
+ kubernetes_tools.controller_revisions_for_service_instance,
533
+ service=job_config.service,
534
+ instance=job_config.instance,
535
+ kube_client=kube_client,
536
+ namespace=job_config.get_kubernetes_namespace(),
537
+ )
538
+ else:
539
+ replicasets = a_sync.block(
540
+ kubernetes_tools.replicasets_for_service_instance,
541
+ service=job_config.service,
542
+ instance=job_config.instance,
543
+ kube_client=kube_client,
544
+ namespace=job_config.get_kubernetes_namespace(),
545
+ )
546
+ version_objects = filter_actually_running_replicasets(replicasets)
547
+
548
+ active_versions = kubernetes_tools.get_active_versions_for_service(
549
+ [app, *version_objects],
550
+ )
551
+ status["active_shas"] = [
552
+ (deployment_version.sha, config_sha)
553
+ for deployment_version, config_sha in active_versions
554
+ ]
555
+ status["active_versions"] = [
556
+ (deployment_version.sha, deployment_version.image_version, config_sha)
557
+ for deployment_version, config_sha in active_versions
558
+ ]
559
+ status["app_count"] = len(active_versions)
560
+ return status
561
+
562
+
563
+ async def get_pods_for_service_instance_multiple_namespaces(
564
+ service: str,
565
+ instance: str,
566
+ kube_client: kubernetes_tools.KubeClient,
567
+ namespaces: Iterable[str],
568
+ ) -> Sequence[V1Pod]:
569
+ ret: List[V1Pod] = []
570
+
571
+ for coro in asyncio.as_completed(
572
+ [
573
+ kubernetes_tools.pods_for_service_instance(
574
+ service=service,
575
+ instance=instance,
576
+ kube_client=kube_client,
577
+ namespace=namespace,
578
+ )
579
+ for namespace in namespaces
580
+ ]
581
+ ):
582
+ ret.extend(await coro)
583
+
584
+ return ret
585
+
586
+
587
+ def find_all_relevant_namespaces(
588
+ service: str,
589
+ instance: str,
590
+ kube_client: kubernetes_tools.KubeClient,
591
+ job_config: LongRunningServiceConfig,
592
+ ) -> Set[str]:
593
+ return {job_config.get_kubernetes_namespace()} | {
594
+ deployment.namespace
595
+ for deployment in kubernetes_tools.list_deployments_in_managed_namespaces(
596
+ kube_client=kube_client,
597
+ label_selector=f"{paasta_prefixed('service')}={service},{paasta_prefixed('instance')}={instance}",
598
+ )
599
+ }
600
+
601
+
602
+ @a_sync.to_blocking
603
+ async def kubernetes_status_v2(
604
+ service: str,
605
+ instance: str,
606
+ verbose: int,
607
+ include_envoy: bool,
608
+ instance_type: str,
609
+ settings: Any,
610
+ all_namespaces: bool = False,
611
+ ) -> Dict[str, Any]:
612
+ status: Dict[str, Any] = {}
613
+ config_loader = LONG_RUNNING_INSTANCE_TYPE_HANDLERS[instance_type].loader
614
+ job_config = config_loader(
615
+ service=service,
616
+ instance=instance,
617
+ cluster=settings.cluster,
618
+ soa_dir=settings.soa_dir,
619
+ load_deployments=True,
620
+ )
621
+ kube_client = settings.kubernetes_client
622
+ if kube_client is None:
623
+ return status
624
+
625
+ if all_namespaces:
626
+ relevant_namespaces = await a_sync.to_async(find_all_relevant_namespaces)(
627
+ service, instance, kube_client, job_config
628
+ )
629
+ else:
630
+ relevant_namespaces = {job_config.get_kubernetes_namespace()}
631
+
632
+ tasks: List["asyncio.Future[Dict[str, Any]]"] = []
633
+
634
+ if (
635
+ verbose > 1
636
+ and job_config.is_autoscaling_enabled()
637
+ and job_config.get_autoscaling_params().get("decision_policy", "") != "bespoke" # type: ignore
638
+ ):
639
+ autoscaling_task = asyncio.create_task(
640
+ autoscaling_status(
641
+ kube_client, job_config, job_config.get_kubernetes_namespace()
642
+ )
643
+ )
644
+ tasks.append(autoscaling_task)
645
+ else:
646
+ autoscaling_task = None
647
+
648
+ pods_task = asyncio.create_task(
649
+ get_pods_for_service_instance_multiple_namespaces(
650
+ service=service,
651
+ instance=instance,
652
+ kube_client=kube_client,
653
+ namespaces=relevant_namespaces,
654
+ )
655
+ )
656
+ tasks.append(pods_task)
657
+
658
+ service_namespace_config = kubernetes_tools.load_service_namespace_config(
659
+ service=service,
660
+ namespace=job_config.get_nerve_namespace(),
661
+ soa_dir=settings.soa_dir,
662
+ )
663
+ if "proxy_port" in service_namespace_config:
664
+ mesh_status_task = asyncio.create_task(
665
+ mesh_status(
666
+ service=service,
667
+ service_mesh=ServiceMesh.ENVOY,
668
+ instance=job_config.get_nerve_namespace(),
669
+ job_config=job_config,
670
+ service_namespace_config=service_namespace_config,
671
+ pods_task=pods_task,
672
+ should_return_individual_backends=True,
673
+ settings=settings,
674
+ )
675
+ )
676
+ backends_task = asyncio.create_task(
677
+ get_backends_from_mesh_status(mesh_status_task)
678
+ )
679
+ tasks.extend([mesh_status_task, backends_task])
680
+ else:
681
+ mesh_status_task = None
682
+ backends_task = None
683
+
684
+ if job_config.get_persistent_volumes():
685
+ pod_status_by_sha_and_readiness_task = asyncio.create_task(
686
+ get_pod_status_tasks_by_sha_and_readiness(
687
+ pods_task,
688
+ backends_task,
689
+ kube_client,
690
+ verbose,
691
+ )
692
+ )
693
+ versions_task = asyncio.create_task(
694
+ get_versions_for_controller_revisions(
695
+ kube_client=kube_client,
696
+ service=service,
697
+ instance=instance,
698
+ namespaces=relevant_namespaces,
699
+ pod_status_by_sha_and_readiness_task=pod_status_by_sha_and_readiness_task,
700
+ )
701
+ )
702
+ tasks.extend([pod_status_by_sha_and_readiness_task, versions_task])
703
+ else:
704
+ pod_status_by_replicaset_task = asyncio.create_task(
705
+ get_pod_status_tasks_by_replicaset(
706
+ pods_task,
707
+ backends_task,
708
+ kube_client,
709
+ verbose,
710
+ )
711
+ )
712
+ versions_task = asyncio.create_task(
713
+ get_versions_for_replicasets(
714
+ kube_client=kube_client,
715
+ service=service,
716
+ instance=instance,
717
+ namespaces=relevant_namespaces,
718
+ pod_status_by_replicaset_task=pod_status_by_replicaset_task,
719
+ )
720
+ )
721
+ tasks.extend([pod_status_by_replicaset_task, versions_task])
722
+
723
+ await asyncio.gather(*tasks, return_exceptions=True)
724
+
725
+ desired_state = job_config.get_desired_state()
726
+ status["app_name"] = job_config.get_sanitised_deployment_name()
727
+ status["desired_state"] = desired_state
728
+ status["desired_instances"] = (
729
+ job_config.get_instances() if desired_state != "stop" else 0
730
+ )
731
+ status["bounce_method"] = job_config.get_bounce_method()
732
+
733
+ try:
734
+ pods_task.result() # just verifies we have a valid result
735
+ # These tasks also depend on pods_task, so we cannot populate them without pods
736
+ status["versions"] = versions_task.result()
737
+ if mesh_status_task is not None:
738
+ status["envoy"] = mesh_status_task.result()
739
+ except asyncio.TimeoutError:
740
+ status["versions"] = []
741
+ status["error_message"] = (
742
+ "Could not fetch instance data. "
743
+ "This is usually a temporary problem. Please try again or contact #compute-infra for help if you continue to see this message\n"
744
+ )
745
+
746
+ if autoscaling_task is not None:
747
+ try:
748
+ status["autoscaling_status"] = autoscaling_task.result()
749
+ except Exception as e:
750
+ if "error_message" not in status:
751
+ status["error_message"] = (
752
+ f"Unknown error occurred while fetching autoscaling status. "
753
+ f"Please contact #compute-infra for help: {e}"
754
+ )
755
+ else:
756
+ status[
757
+ "error_message"
758
+ ] += f"Unknown error occurred while fetching autoscaling status: {e}"
759
+ return status
760
+
761
+
762
+ async def get_pod_status_tasks_by_replicaset(
763
+ pods_task: "asyncio.Future[V1Pod]",
764
+ backends_task: "asyncio.Future[Dict[str, Any]]",
765
+ client: kubernetes_tools.KubeClient,
766
+ verbose: int,
767
+ ) -> Dict[str, List["asyncio.Future[Dict[str, Any]]"]]:
768
+ num_tail_lines = calculate_tail_lines(verbose)
769
+ pods = await pods_task
770
+ tasks_by_replicaset: DefaultDict[
771
+ str, List["asyncio.Future[Dict[str, Any]]"]
772
+ ] = defaultdict(list)
773
+ for pod in pods:
774
+ for owner_reference in pod.metadata.owner_references:
775
+ if owner_reference.kind == "ReplicaSet":
776
+ pod_status_task = asyncio.create_task(
777
+ get_pod_status(pod, backends_task, client, num_tail_lines)
778
+ )
779
+ tasks_by_replicaset[owner_reference.name].append(pod_status_task)
780
+
781
+ return tasks_by_replicaset
782
+
783
+
784
+ async def get_versions_for_replicasets(
785
+ kube_client: kubernetes_tools.KubeClient,
786
+ service: str,
787
+ instance: str,
788
+ namespaces: Iterable[str],
789
+ pod_status_by_replicaset_task: "asyncio.Future[Mapping[str, Sequence[asyncio.Future[Dict[str, Any]]]]]",
790
+ ) -> List[KubernetesVersionDict]:
791
+
792
+ replicaset_list: List[V1ReplicaSet] = []
793
+ for coro in asyncio.as_completed(
794
+ [
795
+ kubernetes_tools.replicasets_for_service_instance(
796
+ service=service,
797
+ instance=instance,
798
+ kube_client=kube_client,
799
+ namespace=namespace,
800
+ )
801
+ for namespace in namespaces
802
+ ]
803
+ ):
804
+ replicaset_list.extend(await coro)
805
+
806
+ # For the purpose of active_versions/app_count, don't count replicasets that
807
+ # are at 0/0.
808
+ actually_running_replicasets = filter_actually_running_replicasets(replicaset_list)
809
+
810
+ pod_status_by_replicaset = await pod_status_by_replicaset_task
811
+ versions = await asyncio.gather(
812
+ *[
813
+ get_replicaset_status(
814
+ replicaset,
815
+ kube_client,
816
+ pod_status_by_replicaset.get(replicaset.metadata.name),
817
+ )
818
+ for replicaset in actually_running_replicasets
819
+ ]
820
+ )
821
+ return versions
822
+
823
+
824
+ async def get_replicaset_status(
825
+ replicaset: V1ReplicaSet,
826
+ client: kubernetes_tools.KubeClient,
827
+ pod_status_tasks: Sequence["asyncio.Future[Dict[str, Any]]"],
828
+ ) -> KubernetesVersionDict:
829
+ return {
830
+ "name": replicaset.metadata.name,
831
+ "type": "ReplicaSet",
832
+ "replicas": replicaset.spec.replicas,
833
+ "ready_replicas": ready_replicas_from_replicaset(replicaset),
834
+ "create_timestamp": replicaset.metadata.creation_timestamp.timestamp(),
835
+ "git_sha": replicaset.metadata.labels.get("paasta.yelp.com/git_sha"),
836
+ "image_version": replicaset.metadata.labels.get(
837
+ "paasta.yelp.com/image_version", None
838
+ ),
839
+ "config_sha": replicaset.metadata.labels.get("paasta.yelp.com/config_sha"),
840
+ "pods": await asyncio.gather(*pod_status_tasks) if pod_status_tasks else [],
841
+ "namespace": replicaset.metadata.namespace,
842
+ }
843
+
844
+
845
+ async def get_pod_status(
846
+ pod: V1Pod,
847
+ backends_task: "asyncio.Future[Dict[str, Any]]",
848
+ client: Any,
849
+ num_tail_lines: int,
850
+ ) -> Dict[str, Any]:
851
+ events_task = asyncio.create_task(
852
+ get_pod_event_messages(client, pod, max_age_in_seconds=900)
853
+ )
854
+ containers_task = asyncio.create_task(
855
+ get_pod_containers(pod, client, num_tail_lines)
856
+ )
857
+
858
+ await asyncio.gather(events_task, containers_task, return_exceptions=True)
859
+
860
+ reason = pod.status.reason
861
+ message = pod.status.message
862
+ scheduled = kubernetes_tools.is_pod_scheduled(pod)
863
+ ready = kubernetes_tools.is_pod_ready(pod)
864
+ delete_timestamp = (
865
+ pod.metadata.deletion_timestamp.timestamp()
866
+ if pod.metadata.deletion_timestamp
867
+ else None
868
+ )
869
+
870
+ try:
871
+ # Filter events to only last 15m
872
+ pod_event_messages = events_task.result()
873
+ except asyncio.TimeoutError:
874
+ pod_event_messages = [{"error": "Could not retrieve events. Please try again."}]
875
+
876
+ if not scheduled and reason != "Evicted":
877
+ sched_condition = kubernetes_tools.get_pod_condition(pod, "PodScheduled")
878
+ # If the condition is not yet available (e.g. pod not fully created yet), defer to Status messages
879
+ if sched_condition:
880
+ reason = sched_condition.reason
881
+ message = sched_condition.message
882
+
883
+ mesh_ready = None
884
+ if backends_task is not None:
885
+ # TODO: Remove this once k8s readiness reflects mesh readiness, PAASTA-17266
886
+ mesh_ready = pod.status.pod_ip in (await backends_task)
887
+
888
+ return {
889
+ "name": pod.metadata.name,
890
+ "ip": pod.status.pod_ip,
891
+ "host": pod.status.host_ip,
892
+ "phase": pod.status.phase,
893
+ "reason": reason,
894
+ "message": message,
895
+ "scheduled": scheduled,
896
+ "ready": ready,
897
+ "mesh_ready": mesh_ready,
898
+ "containers": containers_task.result(),
899
+ "create_timestamp": pod.metadata.creation_timestamp.timestamp(),
900
+ "delete_timestamp": delete_timestamp,
901
+ "events": pod_event_messages,
902
+ }
903
+
904
+
905
+ def get_container_healthcheck(pod_ip: str, probe: V1Probe) -> Dict[str, Any]:
906
+ if getattr(probe, "http_get", None):
907
+ return {
908
+ "http_url": f"http://{pod_ip}:{probe.http_get.port}{probe.http_get.path}"
909
+ }
910
+ if getattr(probe, "tcp_socket", None):
911
+ return {"tcp_port": f"{probe.tcp_socket.port}"}
912
+ if getattr(probe, "_exec", None):
913
+ return {"cmd": f"{' '.join(probe._exec.command)}"}
914
+ return {}
915
+
916
+
917
+ async def get_pod_containers(
918
+ pod: V1Pod, client: Any, num_tail_lines: int
919
+ ) -> List[Dict[str, Any]]:
920
+ containers = []
921
+ statuses = pod.status.container_statuses or []
922
+ container_specs = pod.spec.containers
923
+ for cs in statuses:
924
+ specs: List[V1Container] = [c for c in container_specs if c.name == cs.name]
925
+ healthcheck_grace_period = 0
926
+ healthcheck = None
927
+ if specs:
928
+ # There should be only one matching spec
929
+ spec = specs[0]
930
+ if spec.liveness_probe:
931
+ healthcheck_grace_period = (
932
+ spec.liveness_probe.initial_delay_seconds or 0
933
+ )
934
+ healthcheck = get_container_healthcheck(
935
+ pod.status.pod_ip, spec.liveness_probe
936
+ )
937
+
938
+ state_dict = cs.state.to_dict()
939
+ state = None
940
+ reason = None
941
+ message = None
942
+ start_timestamp = None
943
+ for state_name, this_state in state_dict.items():
944
+ # Each container has only populated state at a time
945
+ if this_state:
946
+ state = state_name
947
+ if "reason" in this_state:
948
+ reason = this_state["reason"]
949
+ if "message" in this_state:
950
+ message = this_state["message"]
951
+ if this_state.get("started_at"):
952
+ start_timestamp = this_state["started_at"].timestamp()
953
+
954
+ last_state_dict = cs.last_state.to_dict()
955
+ last_state = None
956
+ last_reason = None
957
+ last_message = None
958
+ last_duration = None
959
+ last_timestamp = None
960
+ for state_name, this_state in last_state_dict.items():
961
+ if this_state:
962
+ last_state = state_name
963
+ if "reason" in this_state:
964
+ last_reason = this_state["reason"]
965
+ if "message" in this_state:
966
+ last_message = this_state["message"]
967
+ if this_state.get("started_at"):
968
+ if this_state.get("finished_at"):
969
+ last_duration = (
970
+ this_state["finished_at"] - this_state["started_at"]
971
+ ).total_seconds()
972
+
973
+ last_timestamp = this_state["started_at"].timestamp()
974
+
975
+ async def get_tail_lines() -> MutableMapping[str, Any]:
976
+ try:
977
+ return await get_tail_lines_for_kubernetes_container(
978
+ client,
979
+ pod,
980
+ cs,
981
+ num_tail_lines,
982
+ previous=False,
983
+ )
984
+ except asyncio.TimeoutError:
985
+ return {"error_message": f"Could not fetch logs for {cs.name}"}
986
+
987
+ # get previous log lines as well if this container restarted recently
988
+ async def get_previous_tail_lines() -> MutableMapping[str, Any]:
989
+ if state == "running" and kubernetes_tools.recent_container_restart(
990
+ cs.restart_count, last_state, last_timestamp
991
+ ):
992
+ try:
993
+ return await get_tail_lines_for_kubernetes_container(
994
+ client,
995
+ pod,
996
+ cs,
997
+ num_tail_lines,
998
+ previous=True,
999
+ )
1000
+ except asyncio.TimeoutError:
1001
+ return {
1002
+ "error_message": f"Could not fetch previous logs for {cs.name}"
1003
+ }
1004
+ return None
1005
+
1006
+ tail_lines, previous_tail_lines = await asyncio.gather(
1007
+ asyncio.ensure_future(get_tail_lines()),
1008
+ asyncio.ensure_future(get_previous_tail_lines()),
1009
+ )
1010
+
1011
+ containers.append(
1012
+ {
1013
+ "name": cs.name,
1014
+ "restart_count": cs.restart_count,
1015
+ "state": state,
1016
+ "reason": reason,
1017
+ "message": message,
1018
+ "last_state": last_state,
1019
+ "last_reason": last_reason,
1020
+ "last_message": last_message,
1021
+ "last_duration": last_duration,
1022
+ "last_timestamp": last_timestamp,
1023
+ "previous_tail_lines": previous_tail_lines,
1024
+ "timestamp": start_timestamp,
1025
+ "healthcheck_grace_period": healthcheck_grace_period,
1026
+ "healthcheck_cmd": healthcheck,
1027
+ "tail_lines": tail_lines,
1028
+ }
1029
+ )
1030
+ return containers
1031
+
1032
+
1033
+ async def get_pod_status_tasks_by_sha_and_readiness(
1034
+ pods_task: "asyncio.Future[V1Pod]",
1035
+ backends_task: "asyncio.Future[Dict[str, Any]]",
1036
+ client: kubernetes_tools.KubeClient,
1037
+ verbose: int,
1038
+ ) -> DefaultDict[
1039
+ Tuple[str, str], DefaultDict[bool, List["asyncio.Future[Dict[str, Any]]"]]
1040
+ ]:
1041
+ num_tail_lines = calculate_tail_lines(verbose)
1042
+ tasks_by_sha_and_readiness: DefaultDict[
1043
+ Tuple[str, str], DefaultDict[bool, List["asyncio.Future[Dict[str, Any]]"]]
1044
+ ] = defaultdict(lambda: defaultdict(list))
1045
+ for pod in await pods_task:
1046
+ git_sha = pod.metadata.labels["paasta.yelp.com/git_sha"]
1047
+ config_sha = pod.metadata.labels["paasta.yelp.com/config_sha"]
1048
+ is_ready = kubernetes_tools.is_pod_ready(pod)
1049
+ pod_status_task = asyncio.create_task(
1050
+ get_pod_status(pod, backends_task, client, num_tail_lines)
1051
+ )
1052
+ tasks_by_sha_and_readiness[(git_sha, config_sha)][is_ready].append(
1053
+ pod_status_task
1054
+ )
1055
+
1056
+ return tasks_by_sha_and_readiness
1057
+
1058
+
1059
+ async def get_versions_for_controller_revisions(
1060
+ kube_client: kubernetes_tools.KubeClient,
1061
+ service: str,
1062
+ instance: str,
1063
+ namespaces: Iterable[str],
1064
+ pod_status_by_sha_and_readiness_task: "asyncio.Future[Mapping[Tuple[str, str], Mapping[bool, Sequence[asyncio.Future[Mapping[str, Any]]]]]]",
1065
+ ) -> List[KubernetesVersionDict]:
1066
+ controller_revision_list: List[V1ControllerRevision] = []
1067
+
1068
+ for coro in asyncio.as_completed(
1069
+ [
1070
+ kubernetes_tools.controller_revisions_for_service_instance(
1071
+ service=service,
1072
+ instance=instance,
1073
+ kube_client=kube_client,
1074
+ namespace=namespace,
1075
+ )
1076
+ for namespace in namespaces
1077
+ ]
1078
+ ):
1079
+ controller_revision_list.extend(await coro)
1080
+
1081
+ cr_by_shas: Dict[Tuple[str, str], V1ControllerRevision] = {}
1082
+ for cr in controller_revision_list:
1083
+ git_sha = cr.metadata.labels["paasta.yelp.com/git_sha"]
1084
+ config_sha = cr.metadata.labels["paasta.yelp.com/config_sha"]
1085
+ cr_by_shas[(git_sha, config_sha)] = cr
1086
+
1087
+ pod_status_by_sha_and_readiness = await pod_status_by_sha_and_readiness_task
1088
+ versions = await asyncio.gather(
1089
+ *[
1090
+ get_version_for_controller_revision(
1091
+ cr,
1092
+ kube_client,
1093
+ pod_status_by_sha_and_readiness[(git_sha, config_sha)],
1094
+ )
1095
+ for (git_sha, config_sha), cr in cr_by_shas.items()
1096
+ ]
1097
+ )
1098
+
1099
+ return versions
1100
+
1101
+
1102
+ async def get_version_for_controller_revision(
1103
+ cr: V1ControllerRevision,
1104
+ client: Any,
1105
+ pod_status_tasks_by_readiness: Mapping[
1106
+ bool, Sequence["asyncio.Future[Mapping[str, Any]]"]
1107
+ ],
1108
+ ) -> KubernetesVersionDict:
1109
+ all_pod_status_tasks = [
1110
+ task for tasks in pod_status_tasks_by_readiness.values() for task in tasks
1111
+ ]
1112
+ await asyncio.gather(*all_pod_status_tasks)
1113
+ return {
1114
+ "name": cr.metadata.name,
1115
+ "type": "ControllerRevision",
1116
+ "replicas": len(all_pod_status_tasks),
1117
+ "ready_replicas": len(pod_status_tasks_by_readiness[True]),
1118
+ "create_timestamp": cr.metadata.creation_timestamp.timestamp(),
1119
+ "git_sha": cr.metadata.labels.get("paasta.yelp.com/git_sha"),
1120
+ "image_version": cr.metadata.labels.get("paasta.yelp.com/image_version", None),
1121
+ "config_sha": cr.metadata.labels.get("paasta.yelp.com/config_sha"),
1122
+ "pods": [task.result() for task in all_pod_status_tasks],
1123
+ "namespace": cr.metadata.namespace,
1124
+ }
1125
+
1126
+
1127
+ # TODO: Cleanup old kubernetes status
1128
+ @a_sync.to_blocking
1129
+ async def kubernetes_status(
1130
+ service: str,
1131
+ instance: str,
1132
+ verbose: int,
1133
+ include_envoy: bool,
1134
+ instance_type: str,
1135
+ settings: Any,
1136
+ ) -> Mapping[str, Any]:
1137
+ kstatus: Dict[str, Any] = {}
1138
+ config_loader = LONG_RUNNING_INSTANCE_TYPE_HANDLERS[instance_type].loader
1139
+ job_config = config_loader(
1140
+ service=service,
1141
+ instance=instance,
1142
+ cluster=settings.cluster,
1143
+ soa_dir=settings.soa_dir,
1144
+ load_deployments=True,
1145
+ )
1146
+ kube_client = settings.kubernetes_client
1147
+ if kube_client is None:
1148
+ return kstatus
1149
+
1150
+ app = kubernetes_tools.get_kubernetes_app_by_name(
1151
+ name=job_config.get_sanitised_deployment_name(),
1152
+ kube_client=kube_client,
1153
+ namespace=job_config.get_kubernetes_namespace(),
1154
+ )
1155
+ # bouncing status can be inferred from app_count, ref get_bouncing_status
1156
+
1157
+ # this task is necessary for mesh_status, but most other use cases want
1158
+ # just the list of pods
1159
+ pods_task = asyncio.create_task(
1160
+ kubernetes_tools.pods_for_service_instance(
1161
+ service=job_config.service,
1162
+ instance=job_config.instance,
1163
+ kube_client=kube_client,
1164
+ namespace=job_config.get_kubernetes_namespace(),
1165
+ )
1166
+ )
1167
+ pod_list = await pods_task
1168
+ replicaset_list = await kubernetes_tools.replicasets_for_service_instance(
1169
+ service=job_config.service,
1170
+ instance=job_config.instance,
1171
+ kube_client=kube_client,
1172
+ namespace=job_config.get_kubernetes_namespace(),
1173
+ )
1174
+ # For the purpose of active_versions/app_count, don't count replicasets that are at 0/0.
1175
+ actually_running_replicasets = filter_actually_running_replicasets(replicaset_list)
1176
+ active_versions = kubernetes_tools.get_active_versions_for_service(
1177
+ [app, *pod_list, *actually_running_replicasets]
1178
+ )
1179
+ kstatus["app_count"] = len(active_versions)
1180
+ kstatus["desired_state"] = job_config.get_desired_state()
1181
+ kstatus["bounce_method"] = job_config.get_bounce_method()
1182
+ kstatus["active_shas"] = [
1183
+ (deployment_version.sha, config_sha)
1184
+ for deployment_version, config_sha in active_versions
1185
+ ]
1186
+ kstatus["active_versions"] = [
1187
+ (deployment_version.sha, deployment_version.image_version, config_sha)
1188
+ for deployment_version, config_sha in active_versions
1189
+ ]
1190
+
1191
+ await job_status(
1192
+ kstatus=kstatus,
1193
+ client=kube_client,
1194
+ namespace=job_config.get_kubernetes_namespace(),
1195
+ job_config=job_config,
1196
+ verbose=verbose,
1197
+ pod_list=pod_list,
1198
+ replicaset_list=replicaset_list,
1199
+ )
1200
+
1201
+ if (
1202
+ job_config.is_autoscaling_enabled() is True
1203
+ and job_config.get_autoscaling_params().get("decision_policy", "") != "bespoke" # type: ignore
1204
+ ):
1205
+ try:
1206
+ kstatus["autoscaling_status"] = await autoscaling_status(
1207
+ kube_client, job_config, job_config.get_kubernetes_namespace()
1208
+ )
1209
+ except Exception as e:
1210
+ kstatus[
1211
+ "error_message"
1212
+ ] = f"Unknown error occurred while fetching autoscaling status. Please contact #compute-infra for help: {e}"
1213
+
1214
+ evicted_count = 0
1215
+ for pod in pod_list:
1216
+ if pod.status.reason == "Evicted":
1217
+ evicted_count += 1
1218
+ kstatus["evicted_count"] = evicted_count
1219
+
1220
+ if include_envoy:
1221
+ service_namespace_config = kubernetes_tools.load_service_namespace_config(
1222
+ service=service,
1223
+ namespace=job_config.get_nerve_namespace(),
1224
+ soa_dir=settings.soa_dir,
1225
+ )
1226
+ if "proxy_port" in service_namespace_config:
1227
+ kstatus["envoy"] = await mesh_status(
1228
+ service=service,
1229
+ service_mesh=ServiceMesh.ENVOY,
1230
+ instance=job_config.get_nerve_namespace(),
1231
+ job_config=job_config,
1232
+ service_namespace_config=service_namespace_config,
1233
+ pods_task=pods_task,
1234
+ should_return_individual_backends=verbose > 0,
1235
+ settings=settings,
1236
+ )
1237
+ return kstatus
1238
+
1239
+
1240
+ def instance_status(
1241
+ service: str,
1242
+ instance: str,
1243
+ verbose: int,
1244
+ include_envoy: bool,
1245
+ use_new: bool,
1246
+ instance_type: str,
1247
+ settings: Any,
1248
+ all_namespaces: bool,
1249
+ ) -> Mapping[str, Any]:
1250
+ status = {}
1251
+
1252
+ if not can_handle(instance_type):
1253
+ raise RuntimeError(
1254
+ f"Unknown instance type: {instance_type!r}, "
1255
+ f"can handle: {INSTANCE_TYPES}"
1256
+ )
1257
+
1258
+ if instance_type in INSTANCE_TYPES_CR:
1259
+ status[instance_type] = cr_status(
1260
+ service=service,
1261
+ instance=instance,
1262
+ instance_type=instance_type,
1263
+ verbose=verbose,
1264
+ kube_client=settings.kubernetes_client,
1265
+ )
1266
+
1267
+ if instance_type in INSTANCE_TYPES_K8S:
1268
+ if use_new:
1269
+ status["kubernetes_v2"] = kubernetes_status_v2(
1270
+ service=service,
1271
+ instance=instance,
1272
+ instance_type=instance_type,
1273
+ verbose=verbose,
1274
+ include_envoy=include_envoy,
1275
+ settings=settings,
1276
+ all_namespaces=all_namespaces,
1277
+ )
1278
+ else:
1279
+ status["kubernetes"] = kubernetes_status(
1280
+ service=service,
1281
+ instance=instance,
1282
+ instance_type=instance_type,
1283
+ verbose=verbose,
1284
+ include_envoy=include_envoy,
1285
+ settings=settings,
1286
+ )
1287
+
1288
+ return status
1289
+
1290
+
1291
+ def ready_replicas_from_replicaset(replicaset: V1ReplicaSet) -> int:
1292
+ try:
1293
+ ready_replicas = replicaset.status.ready_replicas
1294
+ if ready_replicas is None:
1295
+ ready_replicas = 0
1296
+ except AttributeError:
1297
+ ready_replicas = 0
1298
+
1299
+ return ready_replicas
1300
+
1301
+
1302
+ @a_sync.to_blocking
1303
+ async def kubernetes_mesh_status(
1304
+ service: str,
1305
+ instance: str,
1306
+ instance_type: str,
1307
+ settings: Any,
1308
+ include_envoy: bool = True,
1309
+ ) -> Mapping[str, Any]:
1310
+
1311
+ if not include_envoy:
1312
+ raise RuntimeError("No mesh types specified when requesting mesh status")
1313
+ if instance_type not in LONG_RUNNING_INSTANCE_TYPE_HANDLERS:
1314
+ raise RuntimeError(
1315
+ f"Getting mesh status for {instance_type} instances is not supported"
1316
+ )
1317
+
1318
+ config_loader = LONG_RUNNING_INSTANCE_TYPE_HANDLERS[instance_type].loader
1319
+ job_config = config_loader(
1320
+ service=service,
1321
+ instance=instance,
1322
+ cluster=settings.cluster,
1323
+ soa_dir=settings.soa_dir,
1324
+ load_deployments=True,
1325
+ )
1326
+ service_namespace_config = kubernetes_tools.load_service_namespace_config(
1327
+ service=service,
1328
+ namespace=job_config.get_nerve_namespace(),
1329
+ soa_dir=settings.soa_dir,
1330
+ )
1331
+ if "proxy_port" not in service_namespace_config:
1332
+ raise RuntimeError(
1333
+ f"Instance '{service}.{instance}' is not configured for the mesh"
1334
+ )
1335
+
1336
+ kube_client = settings.kubernetes_client
1337
+ pods_task = asyncio.create_task(
1338
+ kubernetes_tools.pods_for_service_instance(
1339
+ service=job_config.service,
1340
+ instance=job_config.instance,
1341
+ kube_client=kube_client,
1342
+ namespace=job_config.get_kubernetes_namespace(),
1343
+ )
1344
+ )
1345
+
1346
+ kmesh: Dict[str, Any] = {}
1347
+ mesh_status_kwargs = dict(
1348
+ service=service,
1349
+ instance=job_config.get_nerve_namespace(),
1350
+ job_config=job_config,
1351
+ service_namespace_config=service_namespace_config,
1352
+ pods_task=pods_task,
1353
+ should_return_individual_backends=True,
1354
+ settings=settings,
1355
+ )
1356
+ if include_envoy:
1357
+ kmesh["envoy"] = await mesh_status(
1358
+ service_mesh=ServiceMesh.ENVOY,
1359
+ **mesh_status_kwargs,
1360
+ )
1361
+
1362
+ return kmesh