paasta-tools 1.21.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (348) hide show
  1. k8s_itests/__init__.py +0 -0
  2. k8s_itests/test_autoscaling.py +23 -0
  3. k8s_itests/utils.py +38 -0
  4. paasta_tools/__init__.py +20 -0
  5. paasta_tools/adhoc_tools.py +142 -0
  6. paasta_tools/api/__init__.py +13 -0
  7. paasta_tools/api/api.py +330 -0
  8. paasta_tools/api/api_docs/swagger.json +2323 -0
  9. paasta_tools/api/client.py +106 -0
  10. paasta_tools/api/settings.py +33 -0
  11. paasta_tools/api/tweens/__init__.py +6 -0
  12. paasta_tools/api/tweens/auth.py +125 -0
  13. paasta_tools/api/tweens/profiling.py +108 -0
  14. paasta_tools/api/tweens/request_logger.py +124 -0
  15. paasta_tools/api/views/__init__.py +13 -0
  16. paasta_tools/api/views/autoscaler.py +100 -0
  17. paasta_tools/api/views/exception.py +45 -0
  18. paasta_tools/api/views/flink.py +73 -0
  19. paasta_tools/api/views/instance.py +395 -0
  20. paasta_tools/api/views/pause_autoscaler.py +71 -0
  21. paasta_tools/api/views/remote_run.py +113 -0
  22. paasta_tools/api/views/resources.py +76 -0
  23. paasta_tools/api/views/service.py +35 -0
  24. paasta_tools/api/views/version.py +25 -0
  25. paasta_tools/apply_external_resources.py +79 -0
  26. paasta_tools/async_utils.py +109 -0
  27. paasta_tools/autoscaling/__init__.py +0 -0
  28. paasta_tools/autoscaling/autoscaling_service_lib.py +57 -0
  29. paasta_tools/autoscaling/forecasting.py +106 -0
  30. paasta_tools/autoscaling/max_all_k8s_services.py +41 -0
  31. paasta_tools/autoscaling/pause_service_autoscaler.py +77 -0
  32. paasta_tools/autoscaling/utils.py +52 -0
  33. paasta_tools/bounce_lib.py +184 -0
  34. paasta_tools/broadcast_log_to_services.py +62 -0
  35. paasta_tools/cassandracluster_tools.py +210 -0
  36. paasta_tools/check_autoscaler_max_instances.py +212 -0
  37. paasta_tools/check_cassandracluster_services_replication.py +35 -0
  38. paasta_tools/check_flink_services_health.py +203 -0
  39. paasta_tools/check_kubernetes_api.py +57 -0
  40. paasta_tools/check_kubernetes_services_replication.py +141 -0
  41. paasta_tools/check_oom_events.py +244 -0
  42. paasta_tools/check_services_replication_tools.py +324 -0
  43. paasta_tools/check_spark_jobs.py +234 -0
  44. paasta_tools/cleanup_kubernetes_cr.py +138 -0
  45. paasta_tools/cleanup_kubernetes_crd.py +145 -0
  46. paasta_tools/cleanup_kubernetes_jobs.py +344 -0
  47. paasta_tools/cleanup_tron_namespaces.py +96 -0
  48. paasta_tools/cli/__init__.py +13 -0
  49. paasta_tools/cli/authentication.py +85 -0
  50. paasta_tools/cli/cli.py +260 -0
  51. paasta_tools/cli/cmds/__init__.py +13 -0
  52. paasta_tools/cli/cmds/autoscale.py +143 -0
  53. paasta_tools/cli/cmds/check.py +334 -0
  54. paasta_tools/cli/cmds/cook_image.py +147 -0
  55. paasta_tools/cli/cmds/get_docker_image.py +76 -0
  56. paasta_tools/cli/cmds/get_image_version.py +172 -0
  57. paasta_tools/cli/cmds/get_latest_deployment.py +93 -0
  58. paasta_tools/cli/cmds/info.py +155 -0
  59. paasta_tools/cli/cmds/itest.py +117 -0
  60. paasta_tools/cli/cmds/list.py +66 -0
  61. paasta_tools/cli/cmds/list_clusters.py +42 -0
  62. paasta_tools/cli/cmds/list_deploy_queue.py +171 -0
  63. paasta_tools/cli/cmds/list_namespaces.py +84 -0
  64. paasta_tools/cli/cmds/local_run.py +1396 -0
  65. paasta_tools/cli/cmds/logs.py +1601 -0
  66. paasta_tools/cli/cmds/mark_for_deployment.py +1988 -0
  67. paasta_tools/cli/cmds/mesh_status.py +174 -0
  68. paasta_tools/cli/cmds/pause_service_autoscaler.py +107 -0
  69. paasta_tools/cli/cmds/push_to_registry.py +275 -0
  70. paasta_tools/cli/cmds/remote_run.py +252 -0
  71. paasta_tools/cli/cmds/rollback.py +347 -0
  72. paasta_tools/cli/cmds/secret.py +549 -0
  73. paasta_tools/cli/cmds/security_check.py +59 -0
  74. paasta_tools/cli/cmds/spark_run.py +1400 -0
  75. paasta_tools/cli/cmds/start_stop_restart.py +401 -0
  76. paasta_tools/cli/cmds/status.py +2302 -0
  77. paasta_tools/cli/cmds/validate.py +1012 -0
  78. paasta_tools/cli/cmds/wait_for_deployment.py +275 -0
  79. paasta_tools/cli/fsm/__init__.py +13 -0
  80. paasta_tools/cli/fsm/autosuggest.py +82 -0
  81. paasta_tools/cli/fsm/template/README.md +8 -0
  82. paasta_tools/cli/fsm/template/cookiecutter.json +7 -0
  83. paasta_tools/cli/fsm/template/{{cookiecutter.service}}/kubernetes-PROD.yaml +91 -0
  84. paasta_tools/cli/fsm/template/{{cookiecutter.service}}/monitoring.yaml +20 -0
  85. paasta_tools/cli/fsm/template/{{cookiecutter.service}}/service.yaml +8 -0
  86. paasta_tools/cli/fsm/template/{{cookiecutter.service}}/smartstack.yaml +6 -0
  87. paasta_tools/cli/fsm_cmd.py +121 -0
  88. paasta_tools/cli/paasta_tabcomplete.sh +23 -0
  89. paasta_tools/cli/schemas/adhoc_schema.json +199 -0
  90. paasta_tools/cli/schemas/autoscaling_schema.json +91 -0
  91. paasta_tools/cli/schemas/autotuned_defaults/cassandracluster_schema.json +37 -0
  92. paasta_tools/cli/schemas/autotuned_defaults/kubernetes_schema.json +89 -0
  93. paasta_tools/cli/schemas/deploy_schema.json +173 -0
  94. paasta_tools/cli/schemas/eks_schema.json +970 -0
  95. paasta_tools/cli/schemas/kubernetes_schema.json +970 -0
  96. paasta_tools/cli/schemas/rollback_schema.json +160 -0
  97. paasta_tools/cli/schemas/service_schema.json +25 -0
  98. paasta_tools/cli/schemas/smartstack_schema.json +322 -0
  99. paasta_tools/cli/schemas/tron_schema.json +699 -0
  100. paasta_tools/cli/utils.py +1118 -0
  101. paasta_tools/clusterman.py +21 -0
  102. paasta_tools/config_utils.py +385 -0
  103. paasta_tools/contrib/__init__.py +0 -0
  104. paasta_tools/contrib/bounce_log_latency_parser.py +68 -0
  105. paasta_tools/contrib/check_manual_oapi_changes.sh +24 -0
  106. paasta_tools/contrib/check_orphans.py +306 -0
  107. paasta_tools/contrib/create_dynamodb_table.py +35 -0
  108. paasta_tools/contrib/create_paasta_playground.py +105 -0
  109. paasta_tools/contrib/emit_allocated_cpu_metrics.py +50 -0
  110. paasta_tools/contrib/get_running_task_allocation.py +346 -0
  111. paasta_tools/contrib/habitat_fixer.py +86 -0
  112. paasta_tools/contrib/ide_helper.py +316 -0
  113. paasta_tools/contrib/is_pod_healthy_in_proxy.py +139 -0
  114. paasta_tools/contrib/is_pod_healthy_in_smartstack.py +50 -0
  115. paasta_tools/contrib/kill_bad_containers.py +109 -0
  116. paasta_tools/contrib/mass-deploy-tag.sh +44 -0
  117. paasta_tools/contrib/mock_patch_checker.py +86 -0
  118. paasta_tools/contrib/paasta_update_soa_memcpu.py +520 -0
  119. paasta_tools/contrib/render_template.py +129 -0
  120. paasta_tools/contrib/rightsizer_soaconfigs_update.py +348 -0
  121. paasta_tools/contrib/service_shard_remove.py +157 -0
  122. paasta_tools/contrib/service_shard_update.py +373 -0
  123. paasta_tools/contrib/shared_ip_check.py +77 -0
  124. paasta_tools/contrib/timeouts_metrics_prom.py +64 -0
  125. paasta_tools/delete_kubernetes_deployments.py +89 -0
  126. paasta_tools/deployment_utils.py +44 -0
  127. paasta_tools/docker_wrapper.py +234 -0
  128. paasta_tools/docker_wrapper_imports.py +13 -0
  129. paasta_tools/drain_lib.py +351 -0
  130. paasta_tools/dump_locally_running_services.py +71 -0
  131. paasta_tools/eks_tools.py +119 -0
  132. paasta_tools/envoy_tools.py +373 -0
  133. paasta_tools/firewall.py +504 -0
  134. paasta_tools/firewall_logging.py +154 -0
  135. paasta_tools/firewall_update.py +172 -0
  136. paasta_tools/flink_tools.py +345 -0
  137. paasta_tools/flinkeks_tools.py +90 -0
  138. paasta_tools/frameworks/__init__.py +0 -0
  139. paasta_tools/frameworks/adhoc_scheduler.py +71 -0
  140. paasta_tools/frameworks/constraints.py +87 -0
  141. paasta_tools/frameworks/native_scheduler.py +652 -0
  142. paasta_tools/frameworks/native_service_config.py +301 -0
  143. paasta_tools/frameworks/task_store.py +245 -0
  144. paasta_tools/generate_all_deployments +9 -0
  145. paasta_tools/generate_authenticating_services.py +94 -0
  146. paasta_tools/generate_deployments_for_service.py +255 -0
  147. paasta_tools/generate_services_file.py +114 -0
  148. paasta_tools/generate_services_yaml.py +30 -0
  149. paasta_tools/hacheck.py +76 -0
  150. paasta_tools/instance/__init__.py +0 -0
  151. paasta_tools/instance/hpa_metrics_parser.py +122 -0
  152. paasta_tools/instance/kubernetes.py +1362 -0
  153. paasta_tools/iptables.py +240 -0
  154. paasta_tools/kafkacluster_tools.py +143 -0
  155. paasta_tools/kubernetes/__init__.py +0 -0
  156. paasta_tools/kubernetes/application/__init__.py +0 -0
  157. paasta_tools/kubernetes/application/controller_wrappers.py +476 -0
  158. paasta_tools/kubernetes/application/tools.py +90 -0
  159. paasta_tools/kubernetes/bin/__init__.py +0 -0
  160. paasta_tools/kubernetes/bin/kubernetes_remove_evicted_pods.py +164 -0
  161. paasta_tools/kubernetes/bin/paasta_cleanup_remote_run_resources.py +135 -0
  162. paasta_tools/kubernetes/bin/paasta_cleanup_stale_nodes.py +181 -0
  163. paasta_tools/kubernetes/bin/paasta_secrets_sync.py +758 -0
  164. paasta_tools/kubernetes/remote_run.py +558 -0
  165. paasta_tools/kubernetes_tools.py +4679 -0
  166. paasta_tools/list_kubernetes_service_instances.py +128 -0
  167. paasta_tools/list_tron_namespaces.py +60 -0
  168. paasta_tools/long_running_service_tools.py +678 -0
  169. paasta_tools/mac_address.py +44 -0
  170. paasta_tools/marathon_dashboard.py +0 -0
  171. paasta_tools/mesos/__init__.py +0 -0
  172. paasta_tools/mesos/cfg.py +46 -0
  173. paasta_tools/mesos/cluster.py +60 -0
  174. paasta_tools/mesos/exceptions.py +59 -0
  175. paasta_tools/mesos/framework.py +77 -0
  176. paasta_tools/mesos/log.py +48 -0
  177. paasta_tools/mesos/master.py +306 -0
  178. paasta_tools/mesos/mesos_file.py +169 -0
  179. paasta_tools/mesos/parallel.py +52 -0
  180. paasta_tools/mesos/slave.py +115 -0
  181. paasta_tools/mesos/task.py +94 -0
  182. paasta_tools/mesos/util.py +69 -0
  183. paasta_tools/mesos/zookeeper.py +37 -0
  184. paasta_tools/mesos_maintenance.py +848 -0
  185. paasta_tools/mesos_tools.py +1051 -0
  186. paasta_tools/metrics/__init__.py +0 -0
  187. paasta_tools/metrics/metastatus_lib.py +1110 -0
  188. paasta_tools/metrics/metrics_lib.py +217 -0
  189. paasta_tools/monitoring/__init__.py +13 -0
  190. paasta_tools/monitoring/check_k8s_api_performance.py +110 -0
  191. paasta_tools/monitoring_tools.py +652 -0
  192. paasta_tools/monkrelaycluster_tools.py +146 -0
  193. paasta_tools/nrtsearchservice_tools.py +143 -0
  194. paasta_tools/nrtsearchserviceeks_tools.py +68 -0
  195. paasta_tools/oom_logger.py +321 -0
  196. paasta_tools/paasta_deploy_tron_jobs +3 -0
  197. paasta_tools/paasta_execute_docker_command.py +123 -0
  198. paasta_tools/paasta_native_serviceinit.py +21 -0
  199. paasta_tools/paasta_service_config_loader.py +201 -0
  200. paasta_tools/paastaapi/__init__.py +29 -0
  201. paasta_tools/paastaapi/api/__init__.py +3 -0
  202. paasta_tools/paastaapi/api/autoscaler_api.py +302 -0
  203. paasta_tools/paastaapi/api/default_api.py +569 -0
  204. paasta_tools/paastaapi/api/remote_run_api.py +604 -0
  205. paasta_tools/paastaapi/api/resources_api.py +157 -0
  206. paasta_tools/paastaapi/api/service_api.py +1736 -0
  207. paasta_tools/paastaapi/api_client.py +818 -0
  208. paasta_tools/paastaapi/apis/__init__.py +22 -0
  209. paasta_tools/paastaapi/configuration.py +455 -0
  210. paasta_tools/paastaapi/exceptions.py +137 -0
  211. paasta_tools/paastaapi/model/__init__.py +5 -0
  212. paasta_tools/paastaapi/model/adhoc_launch_history.py +176 -0
  213. paasta_tools/paastaapi/model/autoscaler_count_msg.py +176 -0
  214. paasta_tools/paastaapi/model/deploy_queue.py +178 -0
  215. paasta_tools/paastaapi/model/deploy_queue_service_instance.py +194 -0
  216. paasta_tools/paastaapi/model/envoy_backend.py +185 -0
  217. paasta_tools/paastaapi/model/envoy_location.py +184 -0
  218. paasta_tools/paastaapi/model/envoy_status.py +181 -0
  219. paasta_tools/paastaapi/model/flink_cluster_overview.py +188 -0
  220. paasta_tools/paastaapi/model/flink_config.py +173 -0
  221. paasta_tools/paastaapi/model/flink_job.py +186 -0
  222. paasta_tools/paastaapi/model/flink_job_details.py +192 -0
  223. paasta_tools/paastaapi/model/flink_jobs.py +175 -0
  224. paasta_tools/paastaapi/model/float_and_error.py +173 -0
  225. paasta_tools/paastaapi/model/hpa_metric.py +176 -0
  226. paasta_tools/paastaapi/model/inline_object.py +170 -0
  227. paasta_tools/paastaapi/model/inline_response200.py +170 -0
  228. paasta_tools/paastaapi/model/inline_response2001.py +170 -0
  229. paasta_tools/paastaapi/model/instance_bounce_status.py +200 -0
  230. paasta_tools/paastaapi/model/instance_mesh_status.py +186 -0
  231. paasta_tools/paastaapi/model/instance_status.py +220 -0
  232. paasta_tools/paastaapi/model/instance_status_adhoc.py +187 -0
  233. paasta_tools/paastaapi/model/instance_status_cassandracluster.py +173 -0
  234. paasta_tools/paastaapi/model/instance_status_flink.py +173 -0
  235. paasta_tools/paastaapi/model/instance_status_kafkacluster.py +173 -0
  236. paasta_tools/paastaapi/model/instance_status_kubernetes.py +263 -0
  237. paasta_tools/paastaapi/model/instance_status_kubernetes_autoscaling_status.py +187 -0
  238. paasta_tools/paastaapi/model/instance_status_kubernetes_v2.py +197 -0
  239. paasta_tools/paastaapi/model/instance_status_tron.py +204 -0
  240. paasta_tools/paastaapi/model/instance_tasks.py +182 -0
  241. paasta_tools/paastaapi/model/integer_and_error.py +173 -0
  242. paasta_tools/paastaapi/model/kubernetes_container.py +178 -0
  243. paasta_tools/paastaapi/model/kubernetes_container_v2.py +219 -0
  244. paasta_tools/paastaapi/model/kubernetes_healthcheck.py +176 -0
  245. paasta_tools/paastaapi/model/kubernetes_pod.py +201 -0
  246. paasta_tools/paastaapi/model/kubernetes_pod_event.py +176 -0
  247. paasta_tools/paastaapi/model/kubernetes_pod_v2.py +213 -0
  248. paasta_tools/paastaapi/model/kubernetes_replica_set.py +185 -0
  249. paasta_tools/paastaapi/model/kubernetes_version.py +202 -0
  250. paasta_tools/paastaapi/model/remote_run_outcome.py +189 -0
  251. paasta_tools/paastaapi/model/remote_run_start.py +185 -0
  252. paasta_tools/paastaapi/model/remote_run_stop.py +176 -0
  253. paasta_tools/paastaapi/model/remote_run_token.py +173 -0
  254. paasta_tools/paastaapi/model/resource.py +187 -0
  255. paasta_tools/paastaapi/model/resource_item.py +187 -0
  256. paasta_tools/paastaapi/model/resource_value.py +176 -0
  257. paasta_tools/paastaapi/model/smartstack_backend.py +191 -0
  258. paasta_tools/paastaapi/model/smartstack_location.py +181 -0
  259. paasta_tools/paastaapi/model/smartstack_status.py +181 -0
  260. paasta_tools/paastaapi/model/task_tail_lines.py +176 -0
  261. paasta_tools/paastaapi/model_utils.py +1879 -0
  262. paasta_tools/paastaapi/models/__init__.py +62 -0
  263. paasta_tools/paastaapi/rest.py +287 -0
  264. paasta_tools/prune_completed_pods.py +220 -0
  265. paasta_tools/puppet_service_tools.py +59 -0
  266. paasta_tools/py.typed +1 -0
  267. paasta_tools/remote_git.py +127 -0
  268. paasta_tools/run-paasta-api-in-dev-mode.py +57 -0
  269. paasta_tools/run-paasta-api-playground.py +51 -0
  270. paasta_tools/secret_providers/__init__.py +66 -0
  271. paasta_tools/secret_providers/vault.py +214 -0
  272. paasta_tools/secret_tools.py +277 -0
  273. paasta_tools/setup_istio_mesh.py +353 -0
  274. paasta_tools/setup_kubernetes_cr.py +412 -0
  275. paasta_tools/setup_kubernetes_crd.py +138 -0
  276. paasta_tools/setup_kubernetes_internal_crd.py +154 -0
  277. paasta_tools/setup_kubernetes_job.py +353 -0
  278. paasta_tools/setup_prometheus_adapter_config.py +1028 -0
  279. paasta_tools/setup_tron_namespace.py +248 -0
  280. paasta_tools/slack.py +75 -0
  281. paasta_tools/smartstack_tools.py +676 -0
  282. paasta_tools/spark_tools.py +283 -0
  283. paasta_tools/synapse_srv_namespaces_fact.py +42 -0
  284. paasta_tools/tron/__init__.py +0 -0
  285. paasta_tools/tron/client.py +158 -0
  286. paasta_tools/tron/tron_command_context.py +194 -0
  287. paasta_tools/tron/tron_timeutils.py +101 -0
  288. paasta_tools/tron_tools.py +1448 -0
  289. paasta_tools/utils.py +4307 -0
  290. paasta_tools/yaml_tools.py +44 -0
  291. paasta_tools-1.21.3.data/scripts/apply_external_resources.py +79 -0
  292. paasta_tools-1.21.3.data/scripts/bounce_log_latency_parser.py +68 -0
  293. paasta_tools-1.21.3.data/scripts/check_autoscaler_max_instances.py +212 -0
  294. paasta_tools-1.21.3.data/scripts/check_cassandracluster_services_replication.py +35 -0
  295. paasta_tools-1.21.3.data/scripts/check_flink_services_health.py +203 -0
  296. paasta_tools-1.21.3.data/scripts/check_kubernetes_api.py +57 -0
  297. paasta_tools-1.21.3.data/scripts/check_kubernetes_services_replication.py +141 -0
  298. paasta_tools-1.21.3.data/scripts/check_manual_oapi_changes.sh +24 -0
  299. paasta_tools-1.21.3.data/scripts/check_oom_events.py +244 -0
  300. paasta_tools-1.21.3.data/scripts/check_orphans.py +306 -0
  301. paasta_tools-1.21.3.data/scripts/check_spark_jobs.py +234 -0
  302. paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_cr.py +138 -0
  303. paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_crd.py +145 -0
  304. paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_jobs.py +344 -0
  305. paasta_tools-1.21.3.data/scripts/create_dynamodb_table.py +35 -0
  306. paasta_tools-1.21.3.data/scripts/create_paasta_playground.py +105 -0
  307. paasta_tools-1.21.3.data/scripts/delete_kubernetes_deployments.py +89 -0
  308. paasta_tools-1.21.3.data/scripts/emit_allocated_cpu_metrics.py +50 -0
  309. paasta_tools-1.21.3.data/scripts/generate_all_deployments +9 -0
  310. paasta_tools-1.21.3.data/scripts/generate_authenticating_services.py +94 -0
  311. paasta_tools-1.21.3.data/scripts/generate_deployments_for_service.py +255 -0
  312. paasta_tools-1.21.3.data/scripts/generate_services_file.py +114 -0
  313. paasta_tools-1.21.3.data/scripts/generate_services_yaml.py +30 -0
  314. paasta_tools-1.21.3.data/scripts/get_running_task_allocation.py +346 -0
  315. paasta_tools-1.21.3.data/scripts/habitat_fixer.py +86 -0
  316. paasta_tools-1.21.3.data/scripts/ide_helper.py +316 -0
  317. paasta_tools-1.21.3.data/scripts/is_pod_healthy_in_proxy.py +139 -0
  318. paasta_tools-1.21.3.data/scripts/is_pod_healthy_in_smartstack.py +50 -0
  319. paasta_tools-1.21.3.data/scripts/kill_bad_containers.py +109 -0
  320. paasta_tools-1.21.3.data/scripts/kubernetes_remove_evicted_pods.py +164 -0
  321. paasta_tools-1.21.3.data/scripts/mass-deploy-tag.sh +44 -0
  322. paasta_tools-1.21.3.data/scripts/mock_patch_checker.py +86 -0
  323. paasta_tools-1.21.3.data/scripts/paasta_cleanup_remote_run_resources.py +135 -0
  324. paasta_tools-1.21.3.data/scripts/paasta_cleanup_stale_nodes.py +181 -0
  325. paasta_tools-1.21.3.data/scripts/paasta_deploy_tron_jobs +3 -0
  326. paasta_tools-1.21.3.data/scripts/paasta_execute_docker_command.py +123 -0
  327. paasta_tools-1.21.3.data/scripts/paasta_secrets_sync.py +758 -0
  328. paasta_tools-1.21.3.data/scripts/paasta_tabcomplete.sh +23 -0
  329. paasta_tools-1.21.3.data/scripts/paasta_update_soa_memcpu.py +520 -0
  330. paasta_tools-1.21.3.data/scripts/render_template.py +129 -0
  331. paasta_tools-1.21.3.data/scripts/rightsizer_soaconfigs_update.py +348 -0
  332. paasta_tools-1.21.3.data/scripts/service_shard_remove.py +157 -0
  333. paasta_tools-1.21.3.data/scripts/service_shard_update.py +373 -0
  334. paasta_tools-1.21.3.data/scripts/setup_istio_mesh.py +353 -0
  335. paasta_tools-1.21.3.data/scripts/setup_kubernetes_cr.py +412 -0
  336. paasta_tools-1.21.3.data/scripts/setup_kubernetes_crd.py +138 -0
  337. paasta_tools-1.21.3.data/scripts/setup_kubernetes_internal_crd.py +154 -0
  338. paasta_tools-1.21.3.data/scripts/setup_kubernetes_job.py +353 -0
  339. paasta_tools-1.21.3.data/scripts/setup_prometheus_adapter_config.py +1028 -0
  340. paasta_tools-1.21.3.data/scripts/shared_ip_check.py +77 -0
  341. paasta_tools-1.21.3.data/scripts/synapse_srv_namespaces_fact.py +42 -0
  342. paasta_tools-1.21.3.data/scripts/timeouts_metrics_prom.py +64 -0
  343. paasta_tools-1.21.3.dist-info/LICENSE +201 -0
  344. paasta_tools-1.21.3.dist-info/METADATA +74 -0
  345. paasta_tools-1.21.3.dist-info/RECORD +348 -0
  346. paasta_tools-1.21.3.dist-info/WHEEL +5 -0
  347. paasta_tools-1.21.3.dist-info/entry_points.txt +20 -0
  348. paasta_tools-1.21.3.dist-info/top_level.txt +2 -0
@@ -0,0 +1,4679 @@
1
+ # Copyright 2015-2018 Yelp Inc.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ import base64
14
+ import functools
15
+ import hashlib
16
+ import itertools
17
+ import json
18
+ import logging
19
+ import math
20
+ import os
21
+ import re
22
+ from datetime import datetime
23
+ from datetime import timezone
24
+ from enum import Enum
25
+ from functools import lru_cache
26
+ from inspect import currentframe
27
+ from pathlib import Path
28
+ from typing import Any
29
+ from typing import cast
30
+ from typing import Collection
31
+ from typing import Container
32
+ from typing import Dict
33
+ from typing import Iterable
34
+ from typing import List
35
+ from typing import Literal
36
+ from typing import Mapping
37
+ from typing import MutableMapping
38
+ from typing import NamedTuple
39
+ from typing import Optional
40
+ from typing import Sequence
41
+ from typing import Set
42
+ from typing import Tuple
43
+ from typing import Union
44
+
45
+ import a_sync
46
+ import requests
47
+ import service_configuration_lib
48
+ from humanfriendly import parse_size
49
+ from kubernetes import client as kube_client
50
+ from kubernetes import config as kube_config
51
+ from kubernetes.client import CoreV1Event
52
+ from kubernetes.client import models
53
+ from kubernetes.client import V1Affinity
54
+ from kubernetes.client import V1AWSElasticBlockStoreVolumeSource
55
+ from kubernetes.client import V1Capabilities
56
+ from kubernetes.client import V1ConfigMap
57
+ from kubernetes.client import V1Container
58
+ from kubernetes.client import V1ContainerPort
59
+ from kubernetes.client import V1ContainerStatus
60
+ from kubernetes.client import V1ControllerRevision
61
+ from kubernetes.client import V1CustomResourceDefinition
62
+ from kubernetes.client import V1CustomResourceDefinitionList
63
+ from kubernetes.client import V1DeleteOptions
64
+ from kubernetes.client import V1Deployment
65
+ from kubernetes.client import V1DeploymentSpec
66
+ from kubernetes.client import V1DeploymentStrategy
67
+ from kubernetes.client import V1EnvVar
68
+ from kubernetes.client import V1EnvVarSource
69
+ from kubernetes.client import V1ExecAction
70
+ from kubernetes.client import V1HostPathVolumeSource
71
+ from kubernetes.client import V1HTTPGetAction
72
+ from kubernetes.client import V1Job
73
+ from kubernetes.client import V1JobSpec
74
+ from kubernetes.client import V1KeyToPath
75
+ from kubernetes.client import V1LabelSelector
76
+ from kubernetes.client import V1Lifecycle
77
+ from kubernetes.client import V1LifecycleHandler
78
+ from kubernetes.client import V1LimitRange
79
+ from kubernetes.client import V1LimitRangeItem
80
+ from kubernetes.client import V1LimitRangeSpec
81
+ from kubernetes.client import V1Namespace
82
+ from kubernetes.client import V1Node
83
+ from kubernetes.client import V1NodeAffinity
84
+ from kubernetes.client import V1NodeSelector
85
+ from kubernetes.client import V1NodeSelectorRequirement
86
+ from kubernetes.client import V1NodeSelectorTerm
87
+ from kubernetes.client import V1ObjectFieldSelector
88
+ from kubernetes.client import V1ObjectMeta
89
+ from kubernetes.client import V1PersistentVolumeClaim
90
+ from kubernetes.client import V1PersistentVolumeClaimSpec
91
+ from kubernetes.client import V1Pod
92
+ from kubernetes.client import V1PodAffinityTerm
93
+ from kubernetes.client import V1PodAntiAffinity
94
+ from kubernetes.client import V1PodCondition
95
+ from kubernetes.client import V1PodDisruptionBudget
96
+ from kubernetes.client import V1PodDisruptionBudgetSpec
97
+ from kubernetes.client import V1PodSecurityContext
98
+ from kubernetes.client import V1PodSpec
99
+ from kubernetes.client import V1PodTemplateSpec
100
+ from kubernetes.client import V1PreferredSchedulingTerm
101
+ from kubernetes.client import V1Probe
102
+ from kubernetes.client import V1ProjectedVolumeSource
103
+ from kubernetes.client import V1ReplicaSet
104
+ from kubernetes.client import V1ResourceRequirements
105
+ from kubernetes.client import V1RoleBinding
106
+ from kubernetes.client import V1RoleRef
107
+ from kubernetes.client import V1RollingUpdateDeployment
108
+ from kubernetes.client import V1Secret
109
+ from kubernetes.client import V1SecretKeySelector
110
+ from kubernetes.client import V1SecretVolumeSource
111
+ from kubernetes.client import V1SecurityContext
112
+ from kubernetes.client import V1ServiceAccount
113
+ from kubernetes.client import V1ServiceAccountTokenProjection
114
+ from kubernetes.client import V1StatefulSet
115
+ from kubernetes.client import V1StatefulSetSpec
116
+ from kubernetes.client import V1Subject
117
+ from kubernetes.client import V1TCPSocketAction
118
+ from kubernetes.client import V1TopologySpreadConstraint
119
+ from kubernetes.client import V1Volume
120
+ from kubernetes.client import V1VolumeMount
121
+ from kubernetes.client import V1VolumeProjection
122
+ from kubernetes.client import V1WeightedPodAffinityTerm
123
+ from kubernetes.client import V2CrossVersionObjectReference
124
+ from kubernetes.client import V2HorizontalPodAutoscaler
125
+ from kubernetes.client import V2HorizontalPodAutoscalerCondition
126
+ from kubernetes.client import V2HorizontalPodAutoscalerSpec
127
+ from kubernetes.client import V2MetricIdentifier
128
+ from kubernetes.client import V2MetricSpec
129
+ from kubernetes.client import V2MetricTarget
130
+ from kubernetes.client import V2ObjectMetricSource
131
+ from kubernetes.client import V2ResourceMetricSource
132
+ from kubernetes.client.models import V2HorizontalPodAutoscalerStatus
133
+ from kubernetes.client.rest import ApiException
134
+ from mypy_extensions import TypedDict
135
+ from service_configuration_lib import read_soa_metadata
136
+
137
+ from paasta_tools import __version__
138
+ from paasta_tools.async_utils import async_timeout
139
+ from paasta_tools.autoscaling.utils import AutoscalingParamsDict
140
+ from paasta_tools.autoscaling.utils import MetricsProviderDict
141
+ from paasta_tools.long_running_service_tools import host_passes_blacklist
142
+ from paasta_tools.long_running_service_tools import host_passes_whitelist
143
+ from paasta_tools.long_running_service_tools import InvalidHealthcheckMode
144
+ from paasta_tools.long_running_service_tools import load_service_namespace_config
145
+ from paasta_tools.long_running_service_tools import LongRunningServiceConfig
146
+ from paasta_tools.long_running_service_tools import LongRunningServiceConfigDict
147
+ from paasta_tools.long_running_service_tools import METRICS_PROVIDER_ACTIVE_REQUESTS
148
+ from paasta_tools.long_running_service_tools import METRICS_PROVIDER_CPU
149
+ from paasta_tools.long_running_service_tools import METRICS_PROVIDER_GUNICORN
150
+ from paasta_tools.long_running_service_tools import METRICS_PROVIDER_PISCINA
151
+ from paasta_tools.long_running_service_tools import METRICS_PROVIDER_PROMQL
152
+ from paasta_tools.long_running_service_tools import METRICS_PROVIDER_UWSGI
153
+ from paasta_tools.long_running_service_tools import METRICS_PROVIDER_UWSGI_V2
154
+ from paasta_tools.long_running_service_tools import ServiceNamespaceConfig
155
+ from paasta_tools.secret_tools import get_secret_name_from_ref
156
+ from paasta_tools.secret_tools import is_secret_ref
157
+ from paasta_tools.secret_tools import is_shared_secret
158
+ from paasta_tools.secret_tools import SHARED_SECRET_SERVICE
159
+ from paasta_tools.utils import AwsEbsVolume
160
+ from paasta_tools.utils import BranchDictV2
161
+ from paasta_tools.utils import CAPS_DROP
162
+ from paasta_tools.utils import decompose_job_id
163
+ from paasta_tools.utils import deep_merge_dictionaries
164
+ from paasta_tools.utils import DEFAULT_SOA_DIR
165
+ from paasta_tools.utils import DeployBlacklist
166
+ from paasta_tools.utils import DeploymentVersion
167
+ from paasta_tools.utils import DeployWhitelist
168
+ from paasta_tools.utils import DockerVolume
169
+ from paasta_tools.utils import get_config_hash
170
+ from paasta_tools.utils import get_git_sha_from_dockerurl
171
+ from paasta_tools.utils import KubeContainerResourceRequest
172
+ from paasta_tools.utils import load_service_instance_config
173
+ from paasta_tools.utils import load_system_paasta_config
174
+ from paasta_tools.utils import load_v2_deployments_json
175
+ from paasta_tools.utils import PaastaColors
176
+ from paasta_tools.utils import PaastaNotConfiguredError
177
+ from paasta_tools.utils import PersistentVolume
178
+ from paasta_tools.utils import ProjectedSAVolume
179
+ from paasta_tools.utils import SecretVolume
180
+ from paasta_tools.utils import SystemPaastaConfig
181
+ from paasta_tools.utils import time_cache
182
+ from paasta_tools.utils import TopologySpreadConstraintDict
183
+ from paasta_tools.utils import VolumeWithMode
184
+
185
+
186
+ log = logging.getLogger(__name__)
187
+
188
+ KUBE_CONFIG_PATH = "/etc/kubernetes/admin.conf"
189
+ KUBE_CONFIG_USER_PATH = "/etc/kubernetes/paasta.conf"
190
+ YELP_ATTRIBUTE_PREFIX = "yelp.com/"
191
+ PAASTA_ATTRIBUTE_PREFIX = "paasta.yelp.com/"
192
+ KUBE_DEPLOY_STATEGY_MAP = {
193
+ "crossover": "RollingUpdate",
194
+ "downthenup": "Recreate",
195
+ "brutal": "RollingUpdate",
196
+ }
197
+ HACHECK_POD_NAME = "hacheck"
198
+ GUNICORN_EXPORTER_POD_NAME = "gunicorn--exporter"
199
+ SIDECAR_CONTAINER_NAMES = [
200
+ HACHECK_POD_NAME,
201
+ GUNICORN_EXPORTER_POD_NAME,
202
+ ]
203
+ KUBERNETES_NAMESPACE = "paasta"
204
+ PAASTA_WORKLOAD_OWNER = "compute_infra_platform_experience"
205
+ MAX_EVENTS_TO_RETRIEVE = 200
206
+ DISCOVERY_ATTRIBUTES = {
207
+ "region",
208
+ "superregion",
209
+ "ecosystem",
210
+ "habitat",
211
+ "pool",
212
+ "hostname",
213
+ "owner",
214
+ }
215
+ ZONE_LABELS = (
216
+ "topology.kubernetes.io/zone",
217
+ "yelp.com/habitat",
218
+ "yelp.com/eni_config",
219
+ "karpenter.sh/nodepool",
220
+ "topology.ebs.csi.aws.com/zone",
221
+ )
222
+ JOB_TYPE_LABEL_NAME = "job_type"
223
+
224
+ GPU_RESOURCE_NAME = "nvidia.com/gpu"
225
+ DEFAULT_STORAGE_CLASS_NAME = "ebs"
226
+
227
+ DEFAULT_SIDECAR_REQUEST: KubeContainerResourceRequest = {
228
+ "cpu": 0.1,
229
+ "memory": "1024Mi",
230
+ "ephemeral-storage": "256Mi",
231
+ }
232
+
233
+ DEFAULT_PROJECTED_SA_EXPIRATION_SECONDS = 3600
234
+ PROJECTED_SA_TOKEN_PATH = "token"
235
+
236
+
237
+ # conditions is None when creating a new HPA, but the client raises an error in that case.
238
+ # For detail, https://github.com/kubernetes-client/python/issues/553
239
+ # This hack should be removed when the issue got fixed.
240
+ # This is no better way to work around rn.
241
+ class MonkeyPatchAutoScalingConditions(V2HorizontalPodAutoscalerStatus):
242
+ @property
243
+ def conditions(self) -> Sequence[V2HorizontalPodAutoscalerCondition]:
244
+ return super().conditions()
245
+
246
+ @conditions.setter
247
+ def conditions(
248
+ self, conditions: Optional[Sequence[V2HorizontalPodAutoscalerCondition]]
249
+ ) -> None:
250
+ self._conditions = list() if conditions is None else conditions
251
+
252
+
253
+ models.V2HorizontalPodAutoscalerStatus = MonkeyPatchAutoScalingConditions
254
+
255
+
256
+ class KubeKind(NamedTuple):
257
+ singular: str
258
+ plural: str
259
+
260
+
261
+ class KubeDeployment(NamedTuple):
262
+ service: str
263
+ instance: str
264
+ git_sha: str
265
+ image_version: Optional[str]
266
+ config_sha: str
267
+ namespace: str
268
+ replicas: Optional[int]
269
+
270
+
271
+ class KubeCustomResource(NamedTuple):
272
+ service: str
273
+ instance: str
274
+ config_sha: str
275
+ git_sha: str
276
+ kind: str
277
+ namespace: str
278
+ name: str
279
+
280
+
281
+ class KubeContainerResources(NamedTuple):
282
+ cpus: float
283
+ mem: float # mb
284
+ disk: float # mb
285
+
286
+
287
+ class KubernetesServiceRegistration(NamedTuple):
288
+ name: str
289
+ instance: str
290
+ port: int
291
+ pod_ip: str
292
+ registrations: Sequence[str]
293
+ weight: int
294
+
295
+
296
+ class CustomResourceDefinition(NamedTuple):
297
+ file_prefix: str
298
+ version: str
299
+ kube_kind: KubeKind
300
+ group: str
301
+
302
+
303
+ class KubeLifecycleDict(TypedDict, total=False):
304
+ termination_grace_period_seconds: int
305
+ pre_stop_command: Union[str, List[str]]
306
+ pre_stop_drain_seconds: int
307
+ pre_stop_wait_for_connections_to_complete: bool
308
+
309
+
310
+ class KubeAffinityCondition(TypedDict, total=False):
311
+ service: str
312
+ instance: str
313
+
314
+
315
+ class KubeWeightedAffinityCondition(KubeAffinityCondition):
316
+ weight: int
317
+
318
+
319
+ class DatastoreCredentialsConfig(TypedDict, total=False):
320
+ mysql: List[str]
321
+
322
+
323
+ def _set_disrupted_pods(self: Any, disrupted_pods: Mapping[str, datetime]) -> None:
324
+ """Private function used to patch the setter for V1PodDisruptionBudgetStatus.
325
+ Can be removed once https://github.com/kubernetes-client/python/issues/466 is resolved
326
+ """
327
+ self._disrupted_pods = disrupted_pods
328
+
329
+
330
+ SidecarResourceRequirements = TypedDict(
331
+ "SidecarResourceRequirements",
332
+ {
333
+ "requests": KubeContainerResourceRequest,
334
+ "limits": KubeContainerResourceRequest,
335
+ },
336
+ total=False,
337
+ )
338
+
339
+
340
+ KubePodAnnotations = TypedDict(
341
+ "KubePodAnnotations",
342
+ {
343
+ "autoscaling": str,
344
+ "iam.amazonaws.com/role": str,
345
+ "paasta.yelp.com/prometheus_path": str,
346
+ "paasta.yelp.com/prometheus_port": str,
347
+ "paasta.yelp.com/routable_ip": str,
348
+ "smartstack_registrations": str,
349
+ },
350
+ total=False,
351
+ )
352
+
353
+ KubePodLabels = TypedDict(
354
+ "KubePodLabels",
355
+ {
356
+ # NOTE: we can't use the paasta_prefixed() helper here
357
+ # since mypy expects TypedDict keys to be string literals
358
+ "paasta.yelp.com/deploy_group": str,
359
+ "paasta.yelp.com/git_sha": str,
360
+ "paasta.yelp.com/image_version": str,
361
+ "paasta.yelp.com/instance": str,
362
+ "paasta.yelp.com/prometheus_shard": str,
363
+ "paasta.yelp.com/scrape_piscina_prometheus": str,
364
+ "paasta.yelp.com/scrape_gunicorn_prometheus": str,
365
+ "paasta.yelp.com/service": str,
366
+ "paasta.yelp.com/autoscaled": str,
367
+ "yelp.com/paasta_git_sha": str,
368
+ "yelp.com/paasta_instance": str,
369
+ "yelp.com/paasta_service": str,
370
+ "sidecar.istio.io/inject": str,
371
+ "paasta.yelp.com/cluster": str,
372
+ "paasta.yelp.com/pool": str,
373
+ "paasta.yelp.com/weight": str,
374
+ "yelp.com/owner": str,
375
+ "paasta.yelp.com/managed": str,
376
+ "elbv2.k8s.aws/pod-readiness-gate-inject": str,
377
+ },
378
+ total=False,
379
+ )
380
+
381
+
382
+ class CryptoKeyConfig(TypedDict):
383
+ encrypt: List[str]
384
+ decrypt: List[str]
385
+
386
+
387
+ class NodeSelectorInNotIn(TypedDict):
388
+ operator: Literal["In", "NotIn"]
389
+ values: List[str]
390
+
391
+
392
+ class NodeSelectorExistsDoesNotExist(TypedDict):
393
+ operator: Literal["Exists", "DoesNotExist"]
394
+
395
+
396
+ class NodeSelectorGtLt(TypedDict):
397
+ operator: Literal["Gt", "Lt"]
398
+ value: int
399
+
400
+
401
+ NodeSelectorOperator = Union[
402
+ NodeSelectorInNotIn,
403
+ NodeSelectorExistsDoesNotExist,
404
+ NodeSelectorGtLt,
405
+ ]
406
+
407
+
408
+ NodeSelectorConfig = Union[
409
+ str,
410
+ List[str],
411
+ List[NodeSelectorOperator],
412
+ ]
413
+
414
+
415
+ class NodeSelectorsPreferredConfigDict(TypedDict):
416
+ weight: int
417
+ preferences: Dict[str, NodeSelectorConfig]
418
+
419
+
420
+ class KubernetesDeploymentConfigDict(LongRunningServiceConfigDict, total=False):
421
+ bounce_method: str
422
+ bounce_health_params: Dict[str, Any]
423
+ service_account_name: str
424
+ node_selectors: Dict[str, NodeSelectorConfig]
425
+ node_selectors_preferred: List[NodeSelectorsPreferredConfigDict]
426
+ sidecar_resource_requirements: Dict[str, SidecarResourceRequirements]
427
+ lifecycle: KubeLifecycleDict
428
+ anti_affinity: Union[KubeAffinityCondition, List[KubeAffinityCondition]]
429
+ anti_affinity_preferred: Union[
430
+ KubeWeightedAffinityCondition, List[KubeWeightedAffinityCondition]
431
+ ]
432
+ prometheus_shard: str
433
+ prometheus_path: str
434
+ prometheus_port: int
435
+ routable_ip: bool
436
+ pod_management_policy: str
437
+ is_istio_sidecar_injection_enabled: bool
438
+ boto_keys: List[str]
439
+ crypto_keys: CryptoKeyConfig
440
+ datastore_credentials: DatastoreCredentialsConfig
441
+ topology_spread_constraints: List[TopologySpreadConstraintDict]
442
+ enable_aws_lb_readiness_gate: bool
443
+
444
+
445
+ def load_kubernetes_service_config_no_cache(
446
+ service: str,
447
+ instance: str,
448
+ cluster: str,
449
+ load_deployments: bool = True,
450
+ soa_dir: str = DEFAULT_SOA_DIR,
451
+ ) -> "KubernetesDeploymentConfig":
452
+ """Read a service instance's configuration for kubernetes.
453
+
454
+ If a branch isn't specified for a config, the 'branch' key defaults to
455
+ paasta-${cluster}.${instance}.
456
+
457
+ :param name: The service name
458
+ :param instance: The instance of the service to retrieve
459
+ :param cluster: The cluster to read the configuration for
460
+ :param load_deployments: A boolean indicating if the corresponding deployments.json for this service
461
+ should also be loaded
462
+ :param soa_dir: The SOA configuration directory to read from
463
+ :returns: A dictionary of whatever was in the config for the service instance"""
464
+ general_config = service_configuration_lib.read_service_configuration(
465
+ service, soa_dir=soa_dir
466
+ )
467
+ instance_config = load_service_instance_config(
468
+ service, instance, "kubernetes", cluster, soa_dir=soa_dir
469
+ )
470
+ general_config = deep_merge_dictionaries(
471
+ overrides=instance_config, defaults=general_config
472
+ )
473
+
474
+ branch_dict: Optional[BranchDictV2] = None
475
+ if load_deployments:
476
+ deployments_json = load_v2_deployments_json(service, soa_dir=soa_dir)
477
+ temp_instance_config = KubernetesDeploymentConfig(
478
+ service=service,
479
+ cluster=cluster,
480
+ instance=instance,
481
+ config_dict=general_config,
482
+ branch_dict=None,
483
+ soa_dir=soa_dir,
484
+ )
485
+ branch = temp_instance_config.get_branch()
486
+ deploy_group = temp_instance_config.get_deploy_group()
487
+ branch_dict = deployments_json.get_branch_dict(service, branch, deploy_group)
488
+
489
+ return KubernetesDeploymentConfig(
490
+ service=service,
491
+ cluster=cluster,
492
+ instance=instance,
493
+ config_dict=general_config,
494
+ branch_dict=branch_dict,
495
+ soa_dir=soa_dir,
496
+ )
497
+
498
+
499
+ @time_cache(ttl=5)
500
+ def load_kubernetes_service_config(
501
+ service: str,
502
+ instance: str,
503
+ cluster: str,
504
+ load_deployments: bool = True,
505
+ soa_dir: str = DEFAULT_SOA_DIR,
506
+ ) -> "KubernetesDeploymentConfig":
507
+ """Read a service instance's configuration for kubernetes.
508
+
509
+ If a branch isn't specified for a config, the 'branch' key defaults to
510
+ paasta-${cluster}.${instance}.
511
+
512
+ :param name: The service name
513
+ :param instance: The instance of the service to retrieve
514
+ :param cluster: The cluster to read the configuration for
515
+ :param load_deployments: A boolean indicating if the corresponding deployments.json for this service
516
+ should also be loaded
517
+ :param soa_dir: The SOA configuration directory to read from
518
+ :returns: A dictionary of whatever was in the config for the service instance"""
519
+ return load_kubernetes_service_config_no_cache(
520
+ service=service,
521
+ instance=instance,
522
+ cluster=cluster,
523
+ load_deployments=load_deployments,
524
+ soa_dir=soa_dir,
525
+ )
526
+
527
+
528
+ def limit_size_with_hash(name: str, limit: int = 63, suffix: int = 4) -> str:
529
+ """Returns `name` unchanged if it's length does not exceed the `limit`.
530
+ Otherwise, returns truncated `name` with it's hash of size `suffix`
531
+ appended.
532
+
533
+ base32 encoding is chosen as it satisfies the common requirement in
534
+ various k8s names to be alphanumeric.
535
+ """
536
+ if len(name) > limit:
537
+ digest = hashlib.md5(name.encode()).digest()
538
+ hashed = base64.b32encode(digest).decode().replace("=", "").lower()
539
+ return f"{name[:(limit-suffix-1)]}-{hashed[:suffix]}"
540
+ else:
541
+ return name
542
+
543
+
544
+ def get_vault_key_secret_name(vault_key: str) -> str:
545
+ """
546
+ Vault path may contain `/` slashes which is invalid as secret name
547
+ V1Secret's data key must match regexp [a-zA-Z0-9._-],
548
+ which is enforced with schema https://github.com/Yelp/paasta/blob/master/paasta_tools/cli/schemas/adhoc_schema.json#L80
549
+ Source: https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1Secret.md
550
+ """
551
+ return vault_key.replace("/", "-")
552
+
553
+
554
+ class InvalidKubernetesConfig(Exception):
555
+ def __init__(self, exception: Exception, service: str, instance: str) -> None:
556
+ super().__init__(
557
+ f"Couldn't generate config for kubernetes service: {service}.{instance}: {exception}"
558
+ )
559
+
560
+
561
+ class KubeClient:
562
+ @functools.lru_cache() # type: ignore
563
+ def __new__(
564
+ cls,
565
+ component: Optional[str] = None,
566
+ config_file: Optional[str] = None,
567
+ context: Optional[str] = None,
568
+ ) -> "KubeClient":
569
+ """By @lru_cache'ing this function, repeated instantiations of KubeClient with the same arguments will return the
570
+ exact same object. This makes it possible to effectively cache function calls that take a KubeClient as an
571
+ argument."""
572
+ return super().__new__(cls)
573
+
574
+ @functools.lru_cache() # type: ignore
575
+ def __init__(
576
+ self,
577
+ component: Optional[str] = None,
578
+ config_file: Optional[str] = None,
579
+ context: Optional[str] = None,
580
+ ) -> None:
581
+ if not config_file:
582
+ config_file = os.environ.get("KUBECONFIG", KUBE_CONFIG_PATH)
583
+ if not context:
584
+ context = os.environ.get("KUBECONTEXT")
585
+ kube_config.load_kube_config(
586
+ config_file=config_file,
587
+ context=context,
588
+ )
589
+
590
+ models.V1PodDisruptionBudgetStatus.disrupted_pods = property(
591
+ fget=lambda *args, **kwargs: models.V1PodDisruptionBudgetStatus.disrupted_pods(
592
+ *args, **kwargs
593
+ ),
594
+ fset=_set_disrupted_pods,
595
+ )
596
+ if not component:
597
+ # If we don't get an explicit component set via constructor,
598
+ # try to find it by looking back in the stack, and getting `__file__` from
599
+ # the context calling this constructor
600
+ # Normally, `__module__` would make more sense, but since we have a lot of
601
+ # single scripts we directly call, that would be set to `__main__` most of the time.
602
+ current = currentframe()
603
+ parent = current.f_back
604
+ component = parent.f_globals.get("__file__", "unknown")
605
+
606
+ self.api_client = kube_client.ApiClient()
607
+ self.api_client.user_agent = f"paasta/{component}/v{__version__}"
608
+
609
+ self.deployments = kube_client.AppsV1Api(self.api_client)
610
+ self.core = kube_client.CoreV1Api(self.api_client)
611
+ self.policy = kube_client.PolicyV1Api(self.api_client)
612
+ self.apiextensions = kube_client.ApiextensionsV1Api(self.api_client)
613
+ self.batches = kube_client.BatchV1Api(self.api_client)
614
+
615
+ self.custom = kube_client.CustomObjectsApi(self.api_client)
616
+ self.autoscaling = kube_client.AutoscalingV2Api(self.api_client)
617
+ self.rbac = kube_client.RbacAuthorizationV1Api(self.api_client)
618
+
619
+ self.request = self.api_client.request
620
+ # This function is used by the k8s client to serialize OpenAPI objects
621
+ # into JSON before posting to the api. The JSON output can be used
622
+ # in place of OpenAPI objects in client function calls. This allows us
623
+ # to monkey-patch the JSON data with configs the api supports, but the
624
+ # Python client lib may not yet.
625
+ self.jsonify = self.api_client.sanitize_for_serialization
626
+
627
+
628
+ def allowlist_denylist_to_requirements(
629
+ allowlist: DeployWhitelist, denylist: DeployBlacklist
630
+ ) -> List[Tuple[str, str, List[str]]]:
631
+ """Converts deploy_whitelist and deploy_blacklist to a list of
632
+ requirements, which can be converted to node affinities.
633
+ """
634
+ requirements = []
635
+ # convert whitelist into a node selector req
636
+ if allowlist:
637
+ location_type, alloweds = allowlist
638
+ requirements.append((to_node_label(location_type), "In", alloweds))
639
+ # convert blacklist into multiple node selector reqs
640
+ if denylist:
641
+ # not going to prune for duplicates, or group blacklist items for
642
+ # same location_type. makes testing easier and k8s can handle it.
643
+ for location_type, not_allowed in denylist:
644
+ requirements.append((to_node_label(location_type), "NotIn", [not_allowed]))
645
+ return requirements
646
+
647
+
648
+ def raw_selectors_to_requirements(
649
+ raw_selectors: Mapping[str, NodeSelectorConfig]
650
+ ) -> List[Tuple[str, str, List[str]]]:
651
+ """Converts certain node_selectors into requirements, which can be
652
+ converted to node affinities.
653
+ """
654
+ requirements: List[Tuple[str, str, List[str]]] = []
655
+
656
+ for label, configs in raw_selectors.items():
657
+ operator_configs: List[NodeSelectorOperator] = []
658
+
659
+ if type(configs) is not list or len(configs) == 0:
660
+ continue
661
+ elif type(configs[0]) is str:
662
+ # specifying an array/list of strings for a label is shorthand
663
+ # for the "In" operator
664
+ operator_configs = [
665
+ NodeSelectorInNotIn(
666
+ {"operator": "In", "values": cast(List[str], configs)}
667
+ )
668
+ ]
669
+ else:
670
+ # configs should already be a List[NodeSelectorOperator]
671
+ operator_configs = cast(List[NodeSelectorOperator], configs)
672
+
673
+ label = to_node_label(label)
674
+ for config in operator_configs:
675
+ if config["operator"] in {"In", "NotIn"}:
676
+ config = cast(NodeSelectorInNotIn, config)
677
+ values = config["values"]
678
+ elif config["operator"] in {"Exists", "DoesNotExist"}:
679
+ config = cast(NodeSelectorExistsDoesNotExist, config)
680
+ values = []
681
+ elif config["operator"] in {"Gt", "Lt"}:
682
+ config = cast(NodeSelectorGtLt, config)
683
+ # config["value"] is validated by jsonschema to be an int. but,
684
+ # k8s expects singleton list of the int represented as a str
685
+ # for these operators.
686
+ values = [str(config["value"])]
687
+ else:
688
+ raise ValueError(
689
+ f"Unknown k8s node affinity operator: {config['operator']}"
690
+ )
691
+ requirements.append((label, config["operator"], values))
692
+
693
+ return requirements
694
+
695
+
696
+ def registration_label(namespace: str) -> str:
697
+ """Returns namespace prefixed with registrations.{paasta_prefix}/, with
698
+ name part of label key truncated to 63 characters with hash as suffix
699
+ if needed.
700
+ """
701
+ limited_namespace = limit_size_with_hash(namespace, limit=63, suffix=4)
702
+ return f"registrations.{PAASTA_ATTRIBUTE_PREFIX}{limited_namespace}"
703
+
704
+
705
+ def contains_zone_label(node_selectors: Dict[str, NodeSelectorConfig]) -> bool:
706
+ return any(k in node_selectors for k in ZONE_LABELS)
707
+
708
+
709
+ class KubernetesDeploymentConfig(LongRunningServiceConfig):
710
+ config_dict: KubernetesDeploymentConfigDict
711
+
712
+ config_filename_prefix = "kubernetes"
713
+
714
+ def __init__(
715
+ self,
716
+ service: str,
717
+ cluster: str,
718
+ instance: str,
719
+ config_dict: KubernetesDeploymentConfigDict,
720
+ branch_dict: Optional[BranchDictV2],
721
+ soa_dir: str = DEFAULT_SOA_DIR,
722
+ ) -> None:
723
+ super().__init__(
724
+ cluster=cluster,
725
+ instance=instance,
726
+ service=service,
727
+ config_dict=config_dict,
728
+ branch_dict=branch_dict,
729
+ soa_dir=soa_dir,
730
+ )
731
+
732
+ def copy(self) -> "KubernetesDeploymentConfig":
733
+ return self.__class__(
734
+ service=self.service,
735
+ instance=self.instance,
736
+ cluster=self.cluster,
737
+ config_dict=self.config_dict.copy(),
738
+ branch_dict=(
739
+ self.branch_dict.copy() if self.branch_dict is not None else None
740
+ ),
741
+ soa_dir=self.soa_dir,
742
+ )
743
+
744
+ def get_kubernetes_namespace(self) -> str:
745
+ return self.get_namespace()
746
+
747
+ def get_cmd(self) -> Optional[List[str]]:
748
+ cmd = super(LongRunningServiceConfig, self).get_cmd()
749
+ if cmd:
750
+ if isinstance(cmd, str):
751
+ return ["sh", "-c", cmd]
752
+ elif isinstance(cmd, list):
753
+ return cmd
754
+ else:
755
+ raise ValueError("cmd should be str or list")
756
+ else:
757
+ return None
758
+
759
+ def get_bounce_method(self) -> str:
760
+ """Get the bounce method specified in the service's kubernetes configuration."""
761
+ # map existing bounce methods to k8s equivalents.
762
+ # but if there's an EBS volume we must downthenup to free up the volume.
763
+ # in the future we may support stateful sets to dynamically create the volumes
764
+ bounce_method = self.config_dict.get("bounce_method", "crossover")
765
+ if self.get_aws_ebs_volumes() and not bounce_method == "downthenup":
766
+ raise Exception(
767
+ "If service instance defines an EBS volume it must use a downthenup bounce_method"
768
+ )
769
+ return bounce_method
770
+
771
+ # TODO: move the default scaling policy to system paasta configs
772
+ def get_autoscaling_scaling_policy(
773
+ self,
774
+ max_replicas: int,
775
+ autoscaling_params: AutoscalingParamsDict,
776
+ ) -> Dict:
777
+ """Returns the k8s HPA scaling policy in raw JSON. Requires k8s v1.18
778
+ to work.
779
+ """
780
+ # The HPA scaling algorithm is as follows. Every sync period (default:
781
+ # 15 seconds), the HPA will:
782
+ # 1. determine what the desired capacity is from metrics
783
+ # 2. apply min/max replica scaling limits
784
+ # 3. rate-limit the scaling magnitude (e.g. scale down by no more than
785
+ # 30% of current replicas)
786
+ # 4. constrain the scaling magnitude by the period seconds (e.g. scale
787
+ # down by no more than 30% of current replicas per 60 seconds)
788
+ # 5. record the desired capacity, then pick the highest capacity from
789
+ # the stabilization window (default: last 300 seconds) as the final
790
+ # desired capacity.
791
+ # - the idea is to stabilize scaling against (heavily) fluctuating
792
+ # metrics
793
+ policy = {
794
+ "scaleDown": {
795
+ "stabilizationWindowSeconds": 300,
796
+ # the policy in a human-readable way: scale down every 60s by
797
+ # at most 30% of current replicas.
798
+ "selectPolicy": "Max",
799
+ "policies": [{"type": "Percent", "value": 30, "periodSeconds": 60}],
800
+ }
801
+ }
802
+ policy["scaleDown"].update(autoscaling_params.get("scaledown_policies", {}))
803
+ return policy
804
+
805
+ def namespace_external_metric_name(self, metric_name: str) -> str:
806
+ return f"{self.get_sanitised_deployment_name()}-{metric_name}"
807
+
808
+ def get_autoscaling_provider_spec(
809
+ self, name: str, namespace: str, provider: MetricsProviderDict
810
+ ) -> Optional[V2MetricSpec]:
811
+ target = provider["setpoint"]
812
+ prometheus_hpa_metric_name = (
813
+ f"{self.namespace_external_metric_name(provider['type'])}-prom"
814
+ )
815
+
816
+ if provider["type"] == METRICS_PROVIDER_CPU:
817
+ return V2MetricSpec(
818
+ type="Resource",
819
+ resource=V2ResourceMetricSource(
820
+ name="cpu",
821
+ target=V2MetricTarget(
822
+ type="Utilization",
823
+ average_utilization=int(target * 100),
824
+ ),
825
+ ),
826
+ )
827
+ elif provider["type"] in {
828
+ METRICS_PROVIDER_UWSGI,
829
+ METRICS_PROVIDER_PISCINA,
830
+ METRICS_PROVIDER_GUNICORN,
831
+ METRICS_PROVIDER_ACTIVE_REQUESTS,
832
+ }:
833
+ return V2MetricSpec(
834
+ type="Object",
835
+ object=V2ObjectMetricSource(
836
+ metric=V2MetricIdentifier(name=prometheus_hpa_metric_name),
837
+ described_object=V2CrossVersionObjectReference(
838
+ api_version="apps/v1", kind="Deployment", name=name
839
+ ),
840
+ target=V2MetricTarget(
841
+ type="Value",
842
+ # we average the number of instances needed to handle the current (or
843
+ # averaged) load instead of the load itself as this leads to more
844
+ # stable behavior. we return the percentage by which we want to
845
+ # scale, so the target in the HPA should always be 1.
846
+ # PAASTA-16756 for details
847
+ value=1,
848
+ ),
849
+ ),
850
+ )
851
+ elif provider["type"] == METRICS_PROVIDER_PROMQL:
852
+ return V2MetricSpec(
853
+ type="Object",
854
+ object=V2ObjectMetricSource(
855
+ metric=V2MetricIdentifier(name=prometheus_hpa_metric_name),
856
+ described_object=V2CrossVersionObjectReference(
857
+ api_version="apps/v1", kind="Deployment", name=name
858
+ ),
859
+ target=V2MetricTarget(
860
+ # Use the setpoint specified by the user.
861
+ type="Value",
862
+ value=target,
863
+ ),
864
+ ),
865
+ )
866
+ elif provider["type"] == METRICS_PROVIDER_UWSGI_V2:
867
+ return V2MetricSpec(
868
+ type="Object",
869
+ object=V2ObjectMetricSource(
870
+ metric=V2MetricIdentifier(name=prometheus_hpa_metric_name),
871
+ described_object=V2CrossVersionObjectReference(
872
+ api_version="apps/v1", kind="Deployment", name=name
873
+ ),
874
+ target=V2MetricTarget(
875
+ type="AverageValue",
876
+ average_value=target,
877
+ ),
878
+ ),
879
+ )
880
+
881
+ log.error(
882
+ f"Unknown metrics_provider specified: {provider['type']} for\
883
+ {name}/name in namespace{namespace}"
884
+ )
885
+ return None
886
+
887
+ def get_autoscaling_metric_spec(
888
+ self,
889
+ name: str,
890
+ cluster: str,
891
+ kube_client: KubeClient,
892
+ namespace: str,
893
+ ) -> Optional[V2HorizontalPodAutoscaler]:
894
+ # Returns None if an HPA should not be attached based on the config,
895
+ # or the config is invalid.
896
+
897
+ if self.get_desired_state() == "stop":
898
+ return None
899
+
900
+ if not self.is_autoscaling_enabled():
901
+ return None
902
+
903
+ autoscaling_params = self.get_autoscaling_params()
904
+ if autoscaling_params["metrics_providers"][0]["decision_policy"] == "bespoke":
905
+ return None
906
+
907
+ min_replicas = self.get_min_instances()
908
+ max_replicas = self.get_max_instances()
909
+ if min_replicas == 0 or max_replicas == 0:
910
+ log.error(
911
+ f"Invalid value for min or max_instances on {name}: {min_replicas}, {max_replicas}"
912
+ )
913
+ return None
914
+
915
+ metrics = []
916
+ for provider in autoscaling_params["metrics_providers"]:
917
+ spec = self.get_autoscaling_provider_spec(name, namespace, provider)
918
+ if spec is not None:
919
+ metrics.append(spec)
920
+ scaling_policy = self.get_autoscaling_scaling_policy(
921
+ max_replicas,
922
+ autoscaling_params,
923
+ )
924
+
925
+ labels = {
926
+ paasta_prefixed("service"): self.service,
927
+ paasta_prefixed("instance"): self.instance,
928
+ paasta_prefixed("pool"): self.get_pool(),
929
+ paasta_prefixed("managed"): "true",
930
+ }
931
+
932
+ hpa = V2HorizontalPodAutoscaler(
933
+ kind="HorizontalPodAutoscaler",
934
+ metadata=V1ObjectMeta(
935
+ name=name, namespace=namespace, annotations=dict(), labels=labels
936
+ ),
937
+ spec=V2HorizontalPodAutoscalerSpec(
938
+ behavior=scaling_policy,
939
+ max_replicas=max_replicas,
940
+ min_replicas=min_replicas,
941
+ metrics=metrics,
942
+ scale_target_ref=V2CrossVersionObjectReference(
943
+ api_version="apps/v1", kind="Deployment", name=name
944
+ ),
945
+ ),
946
+ )
947
+
948
+ return hpa
949
+
950
+ def get_deployment_strategy_config(self) -> V1DeploymentStrategy:
951
+ # get soa defined bounce_method
952
+ bounce_method = self.get_bounce_method()
953
+ # get k8s equivalent
954
+ strategy_type = KUBE_DEPLOY_STATEGY_MAP[bounce_method]
955
+
956
+ if strategy_type == "RollingUpdate":
957
+ max_surge = "100%"
958
+ if bounce_method == "crossover":
959
+ max_unavailable = "{}%".format(
960
+ int((1 - self.get_bounce_margin_factor()) * 100)
961
+ )
962
+ elif bounce_method == "brutal":
963
+ # `brutal` bounce method means a bounce margin factor of 0, do not call get_bounce_margin_factor
964
+ max_unavailable = "100%"
965
+ else:
966
+ raise Exception("Unknown bounce method for RollingUpdate.")
967
+ rolling_update = V1RollingUpdateDeployment
968
+
969
+ # this translates bounce_margin to k8s speak maxUnavailable
970
+ # for now we keep max_surge 100% but we could customise later
971
+ rolling_update = V1RollingUpdateDeployment(
972
+ max_surge=max_surge, max_unavailable=max_unavailable
973
+ )
974
+ else:
975
+ rolling_update = None
976
+
977
+ return V1DeploymentStrategy(type=strategy_type, rolling_update=rolling_update)
978
+
979
+ def get_sanitised_volume_name(self, volume_name: str, length_limit: int = 0) -> str:
980
+ """I know but we really aren't allowed many characters..."""
981
+ volume_name = volume_name.rstrip("/")
982
+ sanitised = volume_name.replace("/", "slash-").replace(".", "dot-")
983
+ sanitised_name = sanitise_kubernetes_name(sanitised)
984
+ if length_limit and len(sanitised_name) > length_limit:
985
+ sanitised_name = (
986
+ sanitised_name[0 : length_limit - 6]
987
+ + "--"
988
+ + hashlib.md5(sanitised_name.encode("ascii")).hexdigest()[:4]
989
+ )
990
+ return sanitised_name
991
+
992
+ def get_docker_volume_name(self, docker_volume: DockerVolume) -> str:
993
+ return self.get_sanitised_volume_name(
994
+ "host--{name}".format(name=docker_volume["hostPath"]), length_limit=63
995
+ )
996
+
997
+ def get_persistent_volume_name(self, docker_volume: PersistentVolume) -> str:
998
+ return self.get_sanitised_volume_name(
999
+ "pv--{name}".format(name=docker_volume["container_path"]), length_limit=253
1000
+ )
1001
+
1002
+ def get_aws_ebs_volume_name(self, aws_ebs_volume: AwsEbsVolume) -> str:
1003
+ return self.get_sanitised_volume_name(
1004
+ "aws-ebs--{name}{partition}".format(
1005
+ name=aws_ebs_volume["volume_id"],
1006
+ partition=aws_ebs_volume.get("partition", ""),
1007
+ )
1008
+ )
1009
+
1010
+ def get_secret_volume_name(self, secret_volume: SecretVolume) -> str:
1011
+ return self.get_sanitised_volume_name(
1012
+ "secret--{name}".format(name=secret_volume["secret_name"]), length_limit=63
1013
+ )
1014
+
1015
+ def get_projected_sa_volume_name(
1016
+ self, projected_sa_volume: ProjectedSAVolume
1017
+ ) -> str:
1018
+ return self.get_sanitised_volume_name(
1019
+ "projected-sa--{audience}".format(audience=projected_sa_volume["audience"]),
1020
+ length_limit=63,
1021
+ )
1022
+
1023
+ def get_boto_secret_volume_name(self, service_name: str) -> str:
1024
+ return self.get_sanitised_volume_name(
1025
+ f"secret-boto-key-{service_name}", length_limit=63
1026
+ )
1027
+
1028
+ def get_crypto_secret_volume_name(self, service_name: str) -> str:
1029
+ return self.get_sanitised_volume_name(
1030
+ f"secret-crypto-key-{service_name}", length_limit=63
1031
+ )
1032
+
1033
+ def read_only_mode(self, d: VolumeWithMode) -> bool:
1034
+ return d.get("mode", "RO") == "RO"
1035
+
1036
+ def get_readiness_check_script(
1037
+ self, system_paasta_config: SystemPaastaConfig
1038
+ ) -> List[str]:
1039
+ """Script to check if a service is up in smartstack / envoy"""
1040
+ enable_envoy_check = self.get_enable_envoy_readiness_check(system_paasta_config)
1041
+ enable_nerve_check = self.get_enable_nerve_readiness_check(system_paasta_config)
1042
+ if enable_nerve_check and enable_envoy_check:
1043
+ return system_paasta_config.get_envoy_nerve_readiness_check_script()
1044
+ elif enable_envoy_check:
1045
+ return system_paasta_config.get_envoy_readiness_check_script()
1046
+ else:
1047
+ return system_paasta_config.get_nerve_readiness_check_script()
1048
+
1049
+ def get_sidecar_containers(
1050
+ self,
1051
+ system_paasta_config: SystemPaastaConfig,
1052
+ service_namespace_config: ServiceNamespaceConfig,
1053
+ hacheck_sidecar_volumes: Sequence[DockerVolume],
1054
+ ) -> Sequence[V1Container]:
1055
+ hacheck_container = self.get_hacheck_sidecar_container(
1056
+ system_paasta_config,
1057
+ service_namespace_config,
1058
+ hacheck_sidecar_volumes,
1059
+ )
1060
+ gunicorn_exporter_container = self.get_gunicorn_exporter_sidecar_container(
1061
+ system_paasta_config
1062
+ )
1063
+
1064
+ sidecars = []
1065
+ if hacheck_container:
1066
+ sidecars.append(hacheck_container)
1067
+ if gunicorn_exporter_container:
1068
+ sidecars.append(gunicorn_exporter_container)
1069
+ return sidecars
1070
+
1071
+ def get_readiness_check_prefix(
1072
+ self,
1073
+ system_paasta_config: SystemPaastaConfig,
1074
+ initial_delay: float,
1075
+ period_seconds: float,
1076
+ ) -> List[str]:
1077
+ return [
1078
+ x.format(initial_delay=initial_delay, period_seconds=period_seconds)
1079
+ for x in system_paasta_config.get_readiness_check_prefix_template()
1080
+ ]
1081
+
1082
+ def get_hacheck_sidecar_container(
1083
+ self,
1084
+ system_paasta_config: SystemPaastaConfig,
1085
+ service_namespace_config: ServiceNamespaceConfig,
1086
+ hacheck_sidecar_volumes: Sequence[DockerVolume],
1087
+ ) -> Optional[V1Container]:
1088
+ registrations = " ".join(self.get_registrations())
1089
+ # s_m_j currently asserts that services are healthy in smartstack before
1090
+ # continuing a bounce. this readiness check lets us achieve the same thing
1091
+ readiness_probe: Optional[V1Probe]
1092
+ if service_namespace_config.is_in_smartstack() and (
1093
+ self.get_enable_nerve_readiness_check(system_paasta_config)
1094
+ or self.get_enable_envoy_readiness_check(system_paasta_config)
1095
+ ):
1096
+ initial_delay = self.get_healthcheck_grace_period_seconds()
1097
+ # COMPINFRA-989, this used to be hardcoded to always be 10 seconds
1098
+ # and to not cause rolling updates on everything at once this is a config option for now
1099
+ if not system_paasta_config.get_hacheck_match_initial_delay():
1100
+ initial_delay = 10
1101
+ period_seconds = 10
1102
+ readiness_probe = V1Probe(
1103
+ _exec=V1ExecAction(
1104
+ command=self.get_readiness_check_prefix(
1105
+ system_paasta_config=system_paasta_config,
1106
+ initial_delay=initial_delay,
1107
+ period_seconds=period_seconds,
1108
+ )
1109
+ + self.get_readiness_check_script(system_paasta_config)
1110
+ + [str(self.get_container_port())]
1111
+ + self.get_registrations()
1112
+ ),
1113
+ initial_delay_seconds=initial_delay,
1114
+ period_seconds=period_seconds,
1115
+ )
1116
+ else:
1117
+ readiness_probe = None
1118
+
1119
+ hacheck_registrations_env = V1EnvVar(
1120
+ name="MESH_REGISTRATIONS",
1121
+ value=" ".join(self.get_registrations()),
1122
+ )
1123
+
1124
+ if service_namespace_config.is_in_smartstack():
1125
+ return V1Container(
1126
+ image=system_paasta_config.get_hacheck_sidecar_image_url(),
1127
+ lifecycle=V1Lifecycle(
1128
+ pre_stop=V1LifecycleHandler(
1129
+ _exec=V1ExecAction(
1130
+ command=[
1131
+ "/bin/sh",
1132
+ "-c",
1133
+ f"/usr/bin/hadown {registrations}; sleep {self.get_hacheck_prestop_sleep_seconds()}",
1134
+ ]
1135
+ )
1136
+ )
1137
+ ),
1138
+ resources=self.get_sidecar_resource_requirements(
1139
+ "hacheck",
1140
+ system_paasta_config,
1141
+ ),
1142
+ name=HACHECK_POD_NAME,
1143
+ env=self.get_kubernetes_environment() + [hacheck_registrations_env],
1144
+ ports=[V1ContainerPort(container_port=6666)],
1145
+ readiness_probe=readiness_probe,
1146
+ volume_mounts=self.get_volume_mounts(
1147
+ docker_volumes=hacheck_sidecar_volumes,
1148
+ aws_ebs_volumes=[],
1149
+ persistent_volumes=[],
1150
+ secret_volumes=[],
1151
+ projected_sa_volumes=[],
1152
+ ),
1153
+ )
1154
+ return None
1155
+
1156
+ def get_gunicorn_exporter_sidecar_container(
1157
+ self,
1158
+ system_paasta_config: SystemPaastaConfig,
1159
+ ) -> Optional[V1Container]:
1160
+
1161
+ if self.should_use_metrics_provider(METRICS_PROVIDER_GUNICORN):
1162
+ return V1Container(
1163
+ image=system_paasta_config.get_gunicorn_exporter_sidecar_image_url(),
1164
+ resources=self.get_sidecar_resource_requirements(
1165
+ "gunicorn_exporter", system_paasta_config
1166
+ ),
1167
+ name=GUNICORN_EXPORTER_POD_NAME,
1168
+ env=self.get_kubernetes_environment(),
1169
+ ports=[V1ContainerPort(container_port=9117)],
1170
+ lifecycle=V1Lifecycle(
1171
+ pre_stop=V1LifecycleHandler(
1172
+ _exec=V1ExecAction(
1173
+ command=[
1174
+ "/bin/sh",
1175
+ "-c",
1176
+ # we sleep for the same amount of time as we do after an hadown to ensure that we have accurate
1177
+ # metrics up until our Pod dies
1178
+ f"sleep {self.get_hacheck_prestop_sleep_seconds()}",
1179
+ ]
1180
+ )
1181
+ )
1182
+ ),
1183
+ )
1184
+
1185
+ return None
1186
+
1187
+ def get_env(
1188
+ self, system_paasta_config: Optional["SystemPaastaConfig"] = None
1189
+ ) -> Dict[str, str]:
1190
+ env = super().get_env(system_paasta_config=system_paasta_config)
1191
+ # see CLIENTOBS-64 and PAASTA-17558
1192
+ # this is deliberately set here to make sure it is only available for
1193
+ # k8s long-running services. putting this in `InstanceConfig.get_env` will
1194
+ # make it available for all workloads, which will cause big bounces and
1195
+ # continuous reconfiguring every time soa-configs is updated unless the
1196
+ # env var is deliberately excluded from config hashing for those workloads
1197
+ # as well.
1198
+ env["PAASTA_SOA_CONFIGS_SHA"] = read_soa_metadata(soa_dir=self.soa_dir).get(
1199
+ "git_sha", ""
1200
+ )
1201
+
1202
+ # We drop PAASTA_CLUSTER here because it will be added via `get_kubernetes_environment()`
1203
+ env.pop("PAASTA_CLUSTER", None)
1204
+
1205
+ return env
1206
+
1207
+ def get_env_vars_that_use_secrets(self) -> Tuple[Dict[str, str], Dict[str, str]]:
1208
+ """Returns two dictionaries of environment variable name->value; the first is vars that use non-shared
1209
+ secrets, and the second is vars that use shared secrets.
1210
+
1211
+ The values of the dictionaries are the secret refs as formatted in yelpsoa-configs, e.g. "SECRET(foo)"
1212
+ or "SHARED_SECRET(bar)". These can be decoded with get_secret_name_from_ref.
1213
+ """
1214
+ secret_env_vars = {}
1215
+ shared_secret_env_vars = {}
1216
+ for k, v in self.get_env().items():
1217
+ if is_secret_ref(v):
1218
+ if is_shared_secret(v):
1219
+ shared_secret_env_vars[k] = v
1220
+ else:
1221
+ secret_env_vars[k] = v
1222
+ return secret_env_vars, shared_secret_env_vars
1223
+
1224
+ def get_container_env(self) -> Sequence[V1EnvVar]:
1225
+ secret_env_vars, shared_secret_env_vars = self.get_env_vars_that_use_secrets()
1226
+
1227
+ user_env = [
1228
+ V1EnvVar(name=name, value=value)
1229
+ for name, value in self.get_env().items()
1230
+ if name
1231
+ not in list(secret_env_vars.keys()) + list(shared_secret_env_vars.keys())
1232
+ ]
1233
+ user_env += self.get_kubernetes_secret_env_vars(
1234
+ secret_env_vars=secret_env_vars,
1235
+ shared_secret_env_vars=shared_secret_env_vars,
1236
+ )
1237
+ return user_env + self.get_kubernetes_environment() # type: ignore
1238
+
1239
+ def get_kubernetes_secret_env_vars(
1240
+ self,
1241
+ secret_env_vars: Mapping[str, str],
1242
+ shared_secret_env_vars: Mapping[str, str],
1243
+ ) -> Sequence[V1EnvVar]:
1244
+ ret = []
1245
+ for k, v in secret_env_vars.items():
1246
+ secret = get_secret_name_from_ref(v)
1247
+ ret.append(
1248
+ V1EnvVar(
1249
+ name=k,
1250
+ value_from=V1EnvVarSource(
1251
+ secret_key_ref=V1SecretKeySelector(
1252
+ name=get_paasta_secret_name(
1253
+ self.get_namespace(), self.get_service(), secret
1254
+ ),
1255
+ key=secret,
1256
+ optional=False,
1257
+ )
1258
+ ),
1259
+ )
1260
+ )
1261
+ for k, v in shared_secret_env_vars.items():
1262
+ secret = get_secret_name_from_ref(v)
1263
+ ret.append(
1264
+ V1EnvVar(
1265
+ name=k,
1266
+ value_from=V1EnvVarSource(
1267
+ secret_key_ref=V1SecretKeySelector(
1268
+ name=get_paasta_secret_name(
1269
+ self.get_namespace(), SHARED_SECRET_SERVICE, secret
1270
+ ),
1271
+ key=secret,
1272
+ optional=False,
1273
+ )
1274
+ ),
1275
+ )
1276
+ )
1277
+ return ret
1278
+
1279
+ def get_kubernetes_environment(self) -> List[V1EnvVar]:
1280
+ kubernetes_env = [
1281
+ V1EnvVar(
1282
+ name="PAASTA_POD_IP",
1283
+ value_from=V1EnvVarSource(
1284
+ field_ref=V1ObjectFieldSelector(field_path="status.podIP")
1285
+ ),
1286
+ ),
1287
+ V1EnvVar(
1288
+ # this is used by some functions of operator-sdk
1289
+ # it uses this environment variable to get the pods
1290
+ name="POD_NAME",
1291
+ value_from=V1EnvVarSource(
1292
+ field_ref=V1ObjectFieldSelector(field_path="metadata.name")
1293
+ ),
1294
+ ),
1295
+ V1EnvVar(
1296
+ name="PAASTA_HOST",
1297
+ value_from=V1EnvVarSource(
1298
+ field_ref=V1ObjectFieldSelector(field_path="spec.nodeName")
1299
+ ),
1300
+ ),
1301
+ V1EnvVar(
1302
+ name="PAASTA_CLUSTER",
1303
+ value_from=V1EnvVarSource(
1304
+ field_ref=V1ObjectFieldSelector(
1305
+ field_path="metadata.labels['"
1306
+ + paasta_prefixed("cluster")
1307
+ + "']"
1308
+ )
1309
+ ),
1310
+ ),
1311
+ ]
1312
+ return kubernetes_env
1313
+
1314
+ def get_resource_requirements(self) -> V1ResourceRequirements:
1315
+ limits = {
1316
+ "cpu": self.get_cpus() + self.get_cpu_burst_add(),
1317
+ "memory": f"{self.get_mem()}Mi",
1318
+ "ephemeral-storage": f"{self.get_disk()}Mi",
1319
+ }
1320
+ requests = {
1321
+ "cpu": self.get_cpus(),
1322
+ "memory": f"{self.get_mem()}Mi",
1323
+ "ephemeral-storage": f"{self.get_disk()}Mi",
1324
+ }
1325
+ if self.get_gpus():
1326
+ limits[GPU_RESOURCE_NAME] = self.get_gpus()
1327
+ requests[GPU_RESOURCE_NAME] = self.get_gpus()
1328
+ return V1ResourceRequirements(limits=limits, requests=requests)
1329
+
1330
+ def get_sidecar_resource_requirements(
1331
+ self,
1332
+ sidecar_name: str,
1333
+ system_paasta_config: SystemPaastaConfig,
1334
+ ) -> V1ResourceRequirements:
1335
+ """
1336
+ Sidecar request/limits are set with varying levels of priority, with
1337
+ elements further down the list taking precedence:
1338
+ * hard-coded paasta default
1339
+ * SystemPaastaConfig
1340
+ * per-service soaconfig overrides
1341
+
1342
+ Additionally, for the time being we do not expose a way to set
1343
+ limits separately from requests - these values will always mirror
1344
+ each other
1345
+
1346
+ NOTE: changing any of these will cause a bounce of all services that
1347
+ run the sidecars affected by the resource change
1348
+ """
1349
+ config = self.config_dict.get("sidecar_resource_requirements", {}).get(
1350
+ sidecar_name, {}
1351
+ )
1352
+ sidecar_requirements_config = (
1353
+ system_paasta_config.get_sidecar_requirements_config().get(
1354
+ sidecar_name, DEFAULT_SIDECAR_REQUEST
1355
+ )
1356
+ )
1357
+ requests: KubeContainerResourceRequest = {
1358
+ "cpu": sidecar_requirements_config.get("cpu"),
1359
+ "memory": sidecar_requirements_config.get("memory"),
1360
+ "ephemeral-storage": sidecar_requirements_config.get("ephemeral-storage"),
1361
+ }
1362
+ requests.update(config.get("requests", {}))
1363
+
1364
+ limits: KubeContainerResourceRequest = {
1365
+ "cpu": requests["cpu"],
1366
+ "memory": requests["memory"],
1367
+ "ephemeral-storage": requests["ephemeral-storage"],
1368
+ }
1369
+ limits.update(config.get("limits", {}))
1370
+
1371
+ return V1ResourceRequirements(
1372
+ limits=limits,
1373
+ requests=requests,
1374
+ )
1375
+
1376
+ def get_liveness_probe(
1377
+ self, service_namespace_config: ServiceNamespaceConfig
1378
+ ) -> Optional[V1Probe]:
1379
+ mode = self.get_healthcheck_mode(service_namespace_config)
1380
+ if mode is None:
1381
+ return None
1382
+
1383
+ initial_delay_seconds = self.get_healthcheck_grace_period_seconds()
1384
+ period_seconds = self.get_healthcheck_interval_seconds()
1385
+ timeout_seconds = self.get_healthcheck_timeout_seconds()
1386
+ failure_threshold = self.get_healthcheck_max_consecutive_failures()
1387
+ probe = V1Probe(
1388
+ failure_threshold=failure_threshold,
1389
+ initial_delay_seconds=initial_delay_seconds,
1390
+ period_seconds=period_seconds,
1391
+ timeout_seconds=timeout_seconds,
1392
+ )
1393
+
1394
+ if mode == "http" or mode == "https":
1395
+ path = self.get_healthcheck_uri(service_namespace_config)
1396
+ probe.http_get = V1HTTPGetAction(
1397
+ path=path, port=self.get_container_port(), scheme=mode.upper()
1398
+ )
1399
+ elif mode == "tcp":
1400
+ probe.tcp_socket = V1TCPSocketAction(port=self.get_container_port())
1401
+ elif mode == "cmd":
1402
+ probe._exec = V1ExecAction(
1403
+ command=["/bin/sh", "-c", self.get_healthcheck_cmd()]
1404
+ )
1405
+ else:
1406
+ raise InvalidHealthcheckMode(
1407
+ "Unknown mode: %s. Only acceptable healthcheck modes are http/https/tcp/cmd"
1408
+ % mode
1409
+ )
1410
+
1411
+ return probe
1412
+
1413
+ def get_security_context(self) -> Optional[V1SecurityContext]:
1414
+ cap_add = self.config_dict.get("cap_add", None)
1415
+ context_kwargs = (
1416
+ # passing parameter like this to avoid all services to bounce
1417
+ # when this change is released
1418
+ {"privileged": self.config_dict["privileged"]}
1419
+ if "privileged" in self.config_dict
1420
+ else {}
1421
+ )
1422
+ if cap_add is None:
1423
+ return V1SecurityContext(
1424
+ capabilities=V1Capabilities(drop=CAPS_DROP),
1425
+ **context_kwargs,
1426
+ )
1427
+ else:
1428
+ return V1SecurityContext(
1429
+ # XXX: we should probably generally work in sets, but V1Capabilities is typed as accepting
1430
+ # lists of string only
1431
+ capabilities=V1Capabilities(
1432
+ add=cap_add,
1433
+ # NOTE: this is necessary as containerd differs in behavior from dockershim: in dockershim
1434
+ # dropped capabilities were overriden if the same capability was added - but in containerd
1435
+ # the dropped capabilities appear to have higher priority.
1436
+ # WARNING: this must be sorted - otherwise the order of the capabilities will be different
1437
+ # on every setup_kubernetes_job run and cause unnecessary redeployments
1438
+ drop=sorted(list(set(CAPS_DROP) - set(cap_add))),
1439
+ ),
1440
+ **context_kwargs,
1441
+ )
1442
+
1443
+ def get_kubernetes_containers(
1444
+ self,
1445
+ docker_volumes: Sequence[DockerVolume],
1446
+ hacheck_sidecar_volumes: Sequence[DockerVolume],
1447
+ system_paasta_config: SystemPaastaConfig,
1448
+ aws_ebs_volumes: Sequence[AwsEbsVolume],
1449
+ secret_volumes: Sequence[SecretVolume],
1450
+ service_namespace_config: ServiceNamespaceConfig,
1451
+ include_sidecars: bool = True,
1452
+ ) -> Sequence[V1Container]:
1453
+ ports = [self.get_container_port()]
1454
+ # MONK-1130
1455
+ # The prometheus_port is used for scraping metrics from the main
1456
+ # container in the pod. Prometheus discovers ports using the kubernetes
1457
+ # API and creates scrape targets for all the exported container ports.
1458
+ # A better way of doing this would to export the prometheus port as pod
1459
+ # annotations but this is not currently supported.
1460
+ # https://github.com/prometheus/prometheus/issues/3756
1461
+ prometheus_port = self.get_prometheus_port()
1462
+ if prometheus_port and prometheus_port not in ports:
1463
+ ports.append(prometheus_port)
1464
+
1465
+ service_container = V1Container(
1466
+ image=self.get_docker_url(),
1467
+ command=self.get_cmd(),
1468
+ args=self.get_args(),
1469
+ env=self.get_container_env(),
1470
+ resources=self.get_resource_requirements(),
1471
+ lifecycle=V1Lifecycle(
1472
+ pre_stop=self.get_kubernetes_container_termination_action(
1473
+ service_namespace_config
1474
+ )
1475
+ ),
1476
+ name=self.get_sanitised_instance_name(),
1477
+ liveness_probe=self.get_liveness_probe(service_namespace_config),
1478
+ readiness_probe=self.get_readiness_probe(service_namespace_config),
1479
+ ports=[V1ContainerPort(container_port=port) for port in ports],
1480
+ security_context=self.get_security_context(),
1481
+ volume_mounts=self.get_volume_mounts(
1482
+ docker_volumes=docker_volumes,
1483
+ aws_ebs_volumes=aws_ebs_volumes,
1484
+ persistent_volumes=self.get_persistent_volumes(),
1485
+ secret_volumes=secret_volumes,
1486
+ projected_sa_volumes=self.get_projected_sa_volumes(),
1487
+ ),
1488
+ )
1489
+ containers = [service_container]
1490
+ if include_sidecars:
1491
+ containers += self.get_sidecar_containers( # type: ignore
1492
+ system_paasta_config=system_paasta_config,
1493
+ service_namespace_config=service_namespace_config,
1494
+ hacheck_sidecar_volumes=hacheck_sidecar_volumes,
1495
+ )
1496
+ return containers
1497
+
1498
+ def get_readiness_probe(
1499
+ self, service_namespace_config: ServiceNamespaceConfig
1500
+ ) -> Optional[V1Probe]:
1501
+ if service_namespace_config.is_in_smartstack():
1502
+ return None
1503
+ else:
1504
+ return self.get_liveness_probe(service_namespace_config)
1505
+
1506
+ def get_lifecycle_dict(self) -> KubeLifecycleDict:
1507
+ return self.config_dict.get("lifecycle", KubeLifecycleDict({}))
1508
+
1509
+ def get_prestop_sleep_seconds(self, is_in_smartstack: bool) -> int:
1510
+ if is_in_smartstack:
1511
+ default = 30
1512
+ else:
1513
+ default = 0
1514
+ return self.get_lifecycle_dict().get("pre_stop_drain_seconds", default)
1515
+
1516
+ def get_hacheck_prestop_sleep_seconds(self) -> int:
1517
+ """The number of seconds to sleep between hadown and terminating the hacheck container. We want hacheck to be
1518
+ up for slightly longer than the main container is, so we default to pre_stop_drain_seconds + 1.
1519
+
1520
+ It doesn't super matter if hacheck goes down before the main container -- if it's down, healthchecks will fail
1521
+ and the service will be removed from smartstack, which is the same effect we get after running hadown.
1522
+ """
1523
+
1524
+ # Everywhere this value is currently used (hacheck sidecar or gunicorn sidecar), we can pretty safely
1525
+ # assume that the service is in smartstack.
1526
+ return self.get_prestop_sleep_seconds(is_in_smartstack=True) + 1
1527
+
1528
+ def get_pre_stop_wait_for_connections_to_complete(
1529
+ self, service_namespace_config: ServiceNamespaceConfig
1530
+ ) -> bool:
1531
+ return self.get_lifecycle_dict().get(
1532
+ "pre_stop_wait_for_connections_to_complete",
1533
+ service_namespace_config.is_in_smartstack()
1534
+ and service_namespace_config.get_longest_timeout_ms() >= 20000,
1535
+ )
1536
+
1537
+ def get_kubernetes_container_termination_action(
1538
+ self,
1539
+ service_namespace_config: ServiceNamespaceConfig,
1540
+ ) -> V1LifecycleHandler:
1541
+ command = self.get_lifecycle_dict().get("pre_stop_command", [])
1542
+ # default pre stop hook for the container
1543
+ if not command:
1544
+ pre_stop_sleep_seconds = self.get_prestop_sleep_seconds(
1545
+ service_namespace_config.is_in_smartstack()
1546
+ )
1547
+ if self.get_pre_stop_wait_for_connections_to_complete(
1548
+ service_namespace_config
1549
+ ):
1550
+ # This pre-stop command:
1551
+ # 1. Waits for pre_stop_sleep_seconds seconds (to give hadown time to take effect). This avoids a
1552
+ # potential race condition where step 2 detects no connections in flight and the pod is terminated
1553
+ # immediately, but because the pod is still listed in Envoy somewhere, it receives a new connection
1554
+ # just as the pod is terminated.
1555
+ # 2. Every second, checks if there are any established connections to the pod. It exits when there are no
1556
+ # established connections.
1557
+ # It exits when all connections are closed, which should mean the pod can be safely terminated.
1558
+ # The first four fields of /proc/net/tcp are:
1559
+ # 1. slot number (which is not relevant here, but it's a decimal number left-padded with whitespace)
1560
+ # 2. local address:port (both in hex)
1561
+ # 3. remote address:port (both in hex)
1562
+ # 4. state (in hex)
1563
+ # State 01 means ESTABLISHED.
1564
+ hex_port = hex(self.get_container_port()).upper()[2:]
1565
+ command = [
1566
+ "/bin/sh",
1567
+ "-c",
1568
+ f"sleep {pre_stop_sleep_seconds}; while grep '^ *[0-9]*: ........:{hex_port} ........:.... 01 ' /proc/net/tcp; do sleep 1; echo; done",
1569
+ ]
1570
+ else:
1571
+ command = [
1572
+ "/bin/sh",
1573
+ "-c",
1574
+ f"sleep {pre_stop_sleep_seconds}",
1575
+ ]
1576
+
1577
+ if isinstance(command, str):
1578
+ command = [command]
1579
+ return V1LifecycleHandler(_exec=V1ExecAction(command=command))
1580
+
1581
+ def get_pod_volumes(
1582
+ self,
1583
+ docker_volumes: Sequence[DockerVolume],
1584
+ aws_ebs_volumes: Sequence[AwsEbsVolume],
1585
+ secret_volumes: Sequence[SecretVolume],
1586
+ projected_sa_volumes: Sequence[ProjectedSAVolume],
1587
+ ) -> Sequence[V1Volume]:
1588
+ pod_volumes = []
1589
+ unique_docker_volumes = {
1590
+ self.get_docker_volume_name(docker_volume): docker_volume
1591
+ for docker_volume in docker_volumes
1592
+ }
1593
+ for name, docker_volume in unique_docker_volumes.items():
1594
+ pod_volumes.append(
1595
+ V1Volume(
1596
+ host_path=V1HostPathVolumeSource(path=docker_volume["hostPath"]),
1597
+ name=name,
1598
+ )
1599
+ )
1600
+ unique_aws_ebs_volumes = {
1601
+ self.get_aws_ebs_volume_name(aws_ebs_volume): aws_ebs_volume
1602
+ for aws_ebs_volume in aws_ebs_volumes
1603
+ }
1604
+ for name, aws_ebs_volume in unique_aws_ebs_volumes.items():
1605
+ pod_volumes.append(
1606
+ V1Volume(
1607
+ aws_elastic_block_store=V1AWSElasticBlockStoreVolumeSource(
1608
+ volume_id=aws_ebs_volume["volume_id"],
1609
+ fs_type=aws_ebs_volume.get("fs_type"),
1610
+ partition=aws_ebs_volume.get("partition"),
1611
+ # k8s wants RW volume even if it's later mounted RO
1612
+ read_only=False,
1613
+ ),
1614
+ name=name,
1615
+ )
1616
+ )
1617
+ for secret_volume in secret_volumes:
1618
+ if "items" in secret_volume:
1619
+ items = [
1620
+ V1KeyToPath(
1621
+ key=item["key"],
1622
+ mode=mode_to_int(item.get("mode")),
1623
+ path=item["path"],
1624
+ )
1625
+ for item in secret_volume["items"]
1626
+ ]
1627
+ else:
1628
+ items = None
1629
+ pod_volumes.append(
1630
+ V1Volume(
1631
+ name=self.get_secret_volume_name(secret_volume),
1632
+ secret=V1SecretVolumeSource(
1633
+ secret_name=get_paasta_secret_name(
1634
+ self.get_namespace(),
1635
+ self.get_service(),
1636
+ secret_volume["secret_name"],
1637
+ ),
1638
+ default_mode=mode_to_int(secret_volume.get("default_mode")),
1639
+ items=items,
1640
+ optional=False,
1641
+ ),
1642
+ )
1643
+ )
1644
+ for projected_volume in projected_sa_volumes:
1645
+ pod_volumes.append(
1646
+ V1Volume(
1647
+ name=self.get_projected_sa_volume_name(projected_volume),
1648
+ projected=V1ProjectedVolumeSource(
1649
+ sources=[
1650
+ V1VolumeProjection(
1651
+ service_account_token=V1ServiceAccountTokenProjection(
1652
+ audience=projected_volume["audience"],
1653
+ expiration_seconds=projected_volume.get(
1654
+ "expiration_seconds",
1655
+ DEFAULT_PROJECTED_SA_EXPIRATION_SECONDS,
1656
+ ),
1657
+ path=PROJECTED_SA_TOKEN_PATH,
1658
+ )
1659
+ )
1660
+ ],
1661
+ ),
1662
+ ),
1663
+ )
1664
+
1665
+ boto_volume = self.get_boto_volume()
1666
+ if boto_volume:
1667
+ pod_volumes.append(boto_volume)
1668
+
1669
+ crypto_volume = self.get_crypto_volume()
1670
+ if crypto_volume:
1671
+ pod_volumes.append(crypto_volume)
1672
+
1673
+ datastore_credentials_secrets_volume = (
1674
+ self.get_datastore_credentials_secrets_volume()
1675
+ )
1676
+ if datastore_credentials_secrets_volume:
1677
+ pod_volumes.append(datastore_credentials_secrets_volume)
1678
+
1679
+ return pod_volumes
1680
+
1681
+ def get_datastore_credentials(self) -> DatastoreCredentialsConfig:
1682
+ datastore_credentials = self.config_dict.get("datastore_credentials", {})
1683
+ return datastore_credentials
1684
+
1685
+ def get_datastore_credentials_secret_name(self) -> str:
1686
+ return _get_secret_name(
1687
+ self.get_namespace(),
1688
+ "datastore-credentials",
1689
+ self.get_service(),
1690
+ self.get_instance(),
1691
+ )
1692
+
1693
+ def get_datastore_secret_volume_name(self) -> str:
1694
+ """
1695
+ Volume names must abide to DNS mappings of 63 chars or less, so we limit it here and replace _ with --.
1696
+ """
1697
+ return self.get_sanitised_volume_name(
1698
+ f"secret-datastore-creds-{self.get_sanitised_deployment_name()}",
1699
+ length_limit=63,
1700
+ )
1701
+
1702
+ def get_datastore_credentials_secrets_volume(self) -> V1Volume:
1703
+ """
1704
+ All credentials are stored in 1 Kubernetes Secret, which are mapped on an item->path
1705
+ structure to /datastore/<datastore>/<credential>/<password file>.
1706
+ """
1707
+ datastore_credentials = self.get_datastore_credentials()
1708
+ if not datastore_credentials:
1709
+ return None
1710
+
1711
+ # Assume k8s secret exists if its configmap signature exists
1712
+ secret_hash = self.get_datastore_credentials_secret_hash()
1713
+ if not secret_hash:
1714
+ log.warning(
1715
+ f"Expected to find datastore_credentials secret signature {self.get_datastore_credentials_secret_name()} for {self.get_service()}.{self.get_instance()} on {self.get_namespace()}"
1716
+ )
1717
+ return None
1718
+
1719
+ secrets_with_custom_mountpaths = []
1720
+
1721
+ for datastore, credentials in datastore_credentials.items():
1722
+ # mypy loses type hints on '.items' and throws false positives. unfortunately have to type: ignore
1723
+ # https://github.com/python/mypy/issues/7178
1724
+ for credential in credentials: # type: ignore
1725
+ secrets_with_custom_mountpaths.append(
1726
+ {
1727
+ "key": get_vault_key_secret_name(
1728
+ f"secrets/datastore/{datastore}/{credential}"
1729
+ ),
1730
+ "mode": mode_to_int("0444"),
1731
+ "path": f"{datastore}/{credential}/credentials",
1732
+ }
1733
+ )
1734
+
1735
+ return V1Volume(
1736
+ name=self.get_datastore_secret_volume_name(),
1737
+ secret=V1SecretVolumeSource(
1738
+ secret_name=self.get_datastore_credentials_secret_name(),
1739
+ default_mode=mode_to_int("0444"),
1740
+ items=secrets_with_custom_mountpaths,
1741
+ optional=False,
1742
+ ),
1743
+ )
1744
+
1745
+ def get_boto_volume(self) -> Optional[V1Volume]:
1746
+ required_boto_keys = self.config_dict.get("boto_keys", [])
1747
+ service_name = self.get_sanitised_deployment_name()
1748
+ if not required_boto_keys:
1749
+ return None
1750
+ items = []
1751
+ for boto_key in required_boto_keys:
1752
+ for filetype in ["sh", "yaml", "cfg", "json"]:
1753
+ this_key = boto_key + "." + filetype
1754
+ secret_name = this_key.replace(".", "-").replace("_", "--")
1755
+ item = V1KeyToPath(
1756
+ key=secret_name,
1757
+ mode=mode_to_int("0444"),
1758
+ path=this_key,
1759
+ )
1760
+ items.append(item)
1761
+ # Assume k8s secret exists if its configmap signature exists
1762
+ secret_hash = self.get_boto_secret_hash()
1763
+ if not secret_hash:
1764
+ log.warning(
1765
+ f"Expected to find boto_cfg secret signature {self.get_boto_secret_signature_name()} for {self.get_service()}.{self.get_instance()} on {self.get_namespace()}"
1766
+ )
1767
+ return None
1768
+
1769
+ volume = V1Volume(
1770
+ name=self.get_boto_secret_volume_name(service_name),
1771
+ secret=V1SecretVolumeSource(
1772
+ secret_name=self.get_boto_secret_name(),
1773
+ default_mode=mode_to_int("0444"),
1774
+ items=items,
1775
+ ),
1776
+ )
1777
+ return volume
1778
+
1779
+ def get_crypto_keys_from_config(self) -> List[str]:
1780
+ crypto_keys = self.config_dict.get("crypto_keys", {})
1781
+ return [
1782
+ *(f"public/{key}" for key in crypto_keys.get("encrypt", [])),
1783
+ *(f"private/{key}" for key in crypto_keys.get("decrypt", [])),
1784
+ ]
1785
+
1786
+ def get_crypto_volume(self) -> Optional[V1Volume]:
1787
+ required_crypto_keys = self.get_crypto_keys_from_config()
1788
+ if not required_crypto_keys:
1789
+ return None
1790
+
1791
+ if not self.get_crypto_secret_hash():
1792
+ log.warning(
1793
+ f"Expected to find crypto_keys secret signature {self.get_crypto_secret_name()} {self.get_boto_secret_signature_name()} for {self.get_service()}.{self.get_instance()} on {self.get_namespace()}"
1794
+ )
1795
+ return None
1796
+
1797
+ return V1Volume(
1798
+ name=self.get_crypto_secret_volume_name(
1799
+ self.get_sanitised_deployment_name()
1800
+ ),
1801
+ secret=V1SecretVolumeSource(
1802
+ secret_name=self.get_crypto_secret_name(),
1803
+ default_mode=mode_to_int("0444"),
1804
+ items=[
1805
+ V1KeyToPath(
1806
+ # key should exist in data section of k8s secret
1807
+ key=get_vault_key_secret_name(crypto_key),
1808
+ # path is equivalent to Vault key directory structure
1809
+ # e.g. private/foo will create /etc/crypto_keys/private/foo.json
1810
+ path=f"{crypto_key}.json",
1811
+ mode=mode_to_int("0444"),
1812
+ )
1813
+ for crypto_key in required_crypto_keys
1814
+ ],
1815
+ optional=True,
1816
+ ),
1817
+ )
1818
+
1819
+ def get_volume_mounts(
1820
+ self,
1821
+ docker_volumes: Sequence[DockerVolume],
1822
+ aws_ebs_volumes: Sequence[AwsEbsVolume],
1823
+ persistent_volumes: Sequence[PersistentVolume],
1824
+ secret_volumes: Sequence[SecretVolume],
1825
+ projected_sa_volumes: Sequence[ProjectedSAVolume],
1826
+ ) -> Sequence[V1VolumeMount]:
1827
+ volume_mounts = (
1828
+ [
1829
+ V1VolumeMount(
1830
+ mount_path=docker_volume["containerPath"],
1831
+ name=self.get_docker_volume_name(docker_volume),
1832
+ read_only=self.read_only_mode(docker_volume),
1833
+ )
1834
+ for docker_volume in docker_volumes
1835
+ ]
1836
+ + [
1837
+ V1VolumeMount(
1838
+ mount_path=aws_ebs_volume["container_path"],
1839
+ name=self.get_aws_ebs_volume_name(aws_ebs_volume),
1840
+ read_only=self.read_only_mode(aws_ebs_volume),
1841
+ )
1842
+ for aws_ebs_volume in aws_ebs_volumes
1843
+ ]
1844
+ + [
1845
+ V1VolumeMount(
1846
+ mount_path=volume["container_path"],
1847
+ name=self.get_persistent_volume_name(volume),
1848
+ read_only=self.read_only_mode(volume),
1849
+ )
1850
+ for volume in persistent_volumes
1851
+ ]
1852
+ + [
1853
+ V1VolumeMount(
1854
+ mount_path=volume["container_path"],
1855
+ name=self.get_secret_volume_name(volume),
1856
+ read_only=True,
1857
+ )
1858
+ for volume in secret_volumes
1859
+ ]
1860
+ + [
1861
+ V1VolumeMount(
1862
+ mount_path=volume["container_path"],
1863
+ name=self.get_projected_sa_volume_name(volume),
1864
+ read_only=True,
1865
+ )
1866
+ for volume in projected_sa_volumes
1867
+ ]
1868
+ )
1869
+ if self.config_dict.get("boto_keys", []):
1870
+ secret_hash = self.get_boto_secret_hash()
1871
+ service_name = self.get_sanitised_deployment_name()
1872
+ if secret_hash:
1873
+ mount = V1VolumeMount(
1874
+ mount_path="/etc/boto_cfg",
1875
+ name=self.get_boto_secret_volume_name(service_name),
1876
+ read_only=True,
1877
+ )
1878
+ for existing_mount in volume_mounts:
1879
+ if existing_mount.mount_path == "/etc/boto_cfg":
1880
+ volume_mounts.remove(existing_mount)
1881
+ break
1882
+ volume_mounts.append(mount)
1883
+
1884
+ if self.config_dict.get("crypto_keys", []):
1885
+ if self.get_crypto_secret_hash():
1886
+ mount = V1VolumeMount(
1887
+ mount_path="/etc/crypto_keys",
1888
+ name=self.get_crypto_secret_volume_name(
1889
+ self.get_sanitised_deployment_name()
1890
+ ),
1891
+ read_only=True,
1892
+ )
1893
+ for existing_mount in volume_mounts:
1894
+ if existing_mount.mount_path == "/etc/crypto_keys":
1895
+ volume_mounts.remove(existing_mount)
1896
+ break
1897
+ volume_mounts.append(mount)
1898
+
1899
+ datastore_credentials = self.get_datastore_credentials()
1900
+ if datastore_credentials:
1901
+ if self.get_datastore_credentials_secret_hash():
1902
+ volume_mounts.append(
1903
+ V1VolumeMount(
1904
+ mount_path=f"/datastore",
1905
+ name=self.get_datastore_secret_volume_name(),
1906
+ read_only=True,
1907
+ )
1908
+ )
1909
+
1910
+ return volume_mounts
1911
+
1912
+ def get_boto_secret_name(self) -> str:
1913
+ """
1914
+ Namespace is ignored so that there are no bounces with existing boto_keys secrets
1915
+ """
1916
+ return limit_size_with_hash(
1917
+ f"paasta-boto-key-{self.get_sanitised_deployment_name()}"
1918
+ )
1919
+
1920
+ def get_crypto_secret_name(self) -> str:
1921
+ return _get_secret_name(
1922
+ self.get_namespace(), "crypto-key", self.get_service(), self.get_instance()
1923
+ )
1924
+
1925
+ def get_boto_secret_signature_name(self) -> str:
1926
+ """
1927
+ Keep the following signature naming convention so that bounces do not happen because boto_keys configmap signatures already exist, see PAASTA-17910
1928
+
1929
+ Note: Since hashing is done only on a portion of secret, it may explode if service or instance names are too long
1930
+ """
1931
+ secret_instance = limit_size_with_hash(
1932
+ f"paasta-boto-key-{self.get_sanitised_deployment_name()}"
1933
+ )
1934
+ return f"{self.get_namespace()}-secret-{self.get_sanitised_service_name()}-{secret_instance}-signature"
1935
+
1936
+ def get_crypto_secret_signature_name(self) -> str:
1937
+ return _get_secret_signature_name(
1938
+ self.get_namespace(), "crypto-key", self.get_service(), self.get_instance()
1939
+ )
1940
+
1941
+ def get_datastore_credentials_signature_name(self) -> str:
1942
+ """
1943
+ All datastore credentials are stored in a single Kubernetes secret, so they share a name
1944
+ """
1945
+ return _get_secret_signature_name(
1946
+ self.get_namespace(),
1947
+ "datastore-credentials",
1948
+ self.get_service(),
1949
+ # key is on instances, which get their own configurations
1950
+ key_name=self.get_instance(),
1951
+ )
1952
+
1953
+ def get_boto_secret_hash(self) -> Optional[str]:
1954
+ return get_secret_signature(
1955
+ kube_client=KubeClient(),
1956
+ signature_name=self.get_boto_secret_signature_name(),
1957
+ namespace=self.get_namespace(),
1958
+ )
1959
+
1960
+ def get_crypto_secret_hash(self) -> Optional[str]:
1961
+ return get_secret_signature(
1962
+ kube_client=KubeClient(),
1963
+ signature_name=self.get_crypto_secret_signature_name(),
1964
+ namespace=self.get_namespace(),
1965
+ )
1966
+
1967
+ def get_datastore_credentials_secret_hash(self) -> Optional[str]:
1968
+ return get_secret_signature(
1969
+ kube_client=KubeClient(),
1970
+ signature_name=self.get_datastore_credentials_signature_name(),
1971
+ namespace=self.get_namespace(),
1972
+ )
1973
+
1974
+ def get_sanitised_service_name(self) -> str:
1975
+ return sanitise_kubernetes_name(self.get_service())
1976
+
1977
+ def get_sanitised_instance_name(self) -> str:
1978
+ return sanitise_kubernetes_name(self.get_instance())
1979
+
1980
+ def get_autoscaled_instances(self) -> Optional[int]:
1981
+ try:
1982
+ if self.get_persistent_volumes():
1983
+ return (
1984
+ KubeClient()
1985
+ .deployments.read_namespaced_stateful_set(
1986
+ name=self.get_sanitised_deployment_name(),
1987
+ namespace=self.get_namespace(),
1988
+ )
1989
+ .spec.replicas
1990
+ )
1991
+ else:
1992
+ return (
1993
+ KubeClient()
1994
+ .deployments.read_namespaced_deployment(
1995
+ name=self.get_sanitised_deployment_name(),
1996
+ namespace=self.get_namespace(),
1997
+ )
1998
+ .spec.replicas
1999
+ )
2000
+ except ApiException as e:
2001
+ log.error(e)
2002
+ log.debug(
2003
+ "Error occured when trying to connect to Kubernetes API, \
2004
+ returning max_instances (%d)"
2005
+ % self.get_max_instances()
2006
+ )
2007
+ return None
2008
+
2009
+ def get_min_instances(self) -> Optional[int]:
2010
+ return self.config_dict.get(
2011
+ "min_instances",
2012
+ 1,
2013
+ )
2014
+
2015
+ def get_max_instances(self) -> Optional[int]:
2016
+ return self.config_dict.get(
2017
+ "max_instances",
2018
+ None,
2019
+ )
2020
+
2021
+ def set_autoscaled_instances(
2022
+ self, instance_count: int, kube_client: KubeClient
2023
+ ) -> None:
2024
+ """Set the number of instances in the same way that the autoscaler does."""
2025
+ set_instances_for_kubernetes_service(
2026
+ kube_client=kube_client, service_config=self, instance_count=instance_count
2027
+ )
2028
+
2029
+ def get_desired_instances(self) -> int:
2030
+ """For now if we have an EBS instance it means we can only have 1 instance
2031
+ since we can't attach to multiple instances. In the future we might support
2032
+ statefulsets which are clever enough to manage EBS for you"""
2033
+ instances = super().get_desired_instances()
2034
+ if self.get_aws_ebs_volumes() and instances not in [1, 0]:
2035
+ raise Exception(
2036
+ "Number of instances must be 1 or 0 if an EBS volume is defined."
2037
+ )
2038
+ return instances
2039
+
2040
+ def get_volume_claim_templates(self) -> Sequence[V1PersistentVolumeClaim]:
2041
+ return [
2042
+ V1PersistentVolumeClaim(
2043
+ metadata=V1ObjectMeta(name=self.get_persistent_volume_name(volume)),
2044
+ spec=V1PersistentVolumeClaimSpec(
2045
+ # must be ReadWriteOnce for EBS
2046
+ access_modes=["ReadWriteOnce"],
2047
+ storage_class_name=self.get_storage_class_name(volume),
2048
+ resources=V1ResourceRequirements(
2049
+ requests={"storage": f"{volume['size']}Gi"}
2050
+ ),
2051
+ ),
2052
+ )
2053
+ for volume in self.get_persistent_volumes()
2054
+ ]
2055
+
2056
+ def get_storage_class_name(self, volume: PersistentVolume) -> str:
2057
+ try:
2058
+ system_paasta_config = load_system_paasta_config()
2059
+ supported_storage_classes = (
2060
+ system_paasta_config.get_supported_storage_classes()
2061
+ )
2062
+ except PaastaNotConfiguredError:
2063
+ log.warning("No PaaSTA configuration was found, returning default value")
2064
+ supported_storage_classes = []
2065
+ storage_class_name = volume.get("storage_class_name", "ebs")
2066
+ if storage_class_name not in supported_storage_classes:
2067
+ log.warning(f"storage class {storage_class_name} is not supported")
2068
+ storage_class_name = DEFAULT_STORAGE_CLASS_NAME
2069
+ return storage_class_name
2070
+
2071
+ def get_kubernetes_metadata(self, git_sha: str) -> V1ObjectMeta:
2072
+ return V1ObjectMeta(
2073
+ name=self.get_sanitised_deployment_name(),
2074
+ namespace=self.get_namespace(),
2075
+ labels={
2076
+ "yelp.com/owner": PAASTA_WORKLOAD_OWNER,
2077
+ "yelp.com/paasta_service": self.get_service(),
2078
+ "yelp.com/paasta_instance": self.get_instance(),
2079
+ "yelp.com/paasta_git_sha": git_sha,
2080
+ paasta_prefixed("service"): self.get_service(),
2081
+ paasta_prefixed("instance"): self.get_instance(),
2082
+ paasta_prefixed("git_sha"): git_sha,
2083
+ paasta_prefixed("cluster"): self.cluster,
2084
+ paasta_prefixed("autoscaled"): str(
2085
+ self.is_autoscaling_enabled()
2086
+ ).lower(),
2087
+ paasta_prefixed("paasta.yelp.com/pool"): self.get_pool(),
2088
+ paasta_prefixed("managed"): "true",
2089
+ },
2090
+ )
2091
+
2092
+ def get_sanitised_deployment_name(self) -> str:
2093
+ return get_kubernetes_app_name(self.get_service(), self.get_instance())
2094
+
2095
+ def get_min_task_uptime(self) -> int:
2096
+ return self.config_dict.get("bounce_health_params", {}).get(
2097
+ "min_task_uptime", 0
2098
+ )
2099
+
2100
+ def get_enable_nerve_readiness_check(
2101
+ self, system_paasta_config: SystemPaastaConfig
2102
+ ) -> bool:
2103
+ """Enables a k8s readiness check on the Pod to ensure that all registrations
2104
+ are UP on the local synapse haproxy"""
2105
+ return self.config_dict.get("bounce_health_params", {}).get(
2106
+ "check_haproxy", system_paasta_config.get_enable_nerve_readiness_check()
2107
+ )
2108
+
2109
+ def get_enable_envoy_readiness_check(
2110
+ self, system_paasta_config: SystemPaastaConfig
2111
+ ) -> bool:
2112
+ """Enables a k8s readiness check on the Pod to ensure that all registrations
2113
+ are UP on the local Envoy"""
2114
+ return self.config_dict.get("bounce_health_params", {}).get(
2115
+ "check_envoy", system_paasta_config.get_enable_envoy_readiness_check()
2116
+ )
2117
+
2118
+ def get_namespace(self) -> str:
2119
+ """Get namespace from config, default to 'paasta'"""
2120
+ return self.config_dict.get(
2121
+ "namespace", f"paastasvc-{self.get_sanitised_service_name()}"
2122
+ )
2123
+
2124
+ def get_pod_management_policy(self) -> str:
2125
+ """Get sts pod_management_policy from config, default to 'OrderedReady'"""
2126
+ return self.config_dict.get("pod_management_policy", "OrderedReady")
2127
+
2128
+ def format_kubernetes_job(
2129
+ self,
2130
+ job_label: str,
2131
+ deadline_seconds: int = 3600,
2132
+ keep_routable_ip: bool = False,
2133
+ include_sidecars: bool = False,
2134
+ ) -> V1Job:
2135
+ """Create the config for launching the deployment as a Job
2136
+
2137
+ :param str job_label: value to set for the "job type" label
2138
+ :param int deadline_seconds: maximum allowed duration for the job
2139
+ :param bool keep_routable_ip: maintain routable IP annotation in pod template
2140
+ :param bool include_sidecars: do not discard sidecar containers when building pod spec
2141
+ :return: job object
2142
+ """
2143
+ additional_labels = {paasta_prefixed(JOB_TYPE_LABEL_NAME): job_label}
2144
+ try:
2145
+ docker_url = self.get_docker_url()
2146
+ git_sha = get_git_sha_from_dockerurl(docker_url, long=True)
2147
+ system_paasta_config = load_system_paasta_config()
2148
+ image_version = self.get_image_version()
2149
+ if image_version is not None:
2150
+ additional_labels[paasta_prefixed("image_version")] = image_version
2151
+ pod_template = self.get_pod_template_spec(
2152
+ git_sha=git_sha,
2153
+ system_paasta_config=system_paasta_config,
2154
+ restart_on_failure=False,
2155
+ include_sidecars=include_sidecars,
2156
+ force_no_routable_ip=not keep_routable_ip,
2157
+ )
2158
+ pod_template.metadata.labels.update(additional_labels)
2159
+ complete_config = V1Job(
2160
+ api_version="batch/v1",
2161
+ kind="Job",
2162
+ metadata=self.get_kubernetes_metadata(git_sha),
2163
+ spec=V1JobSpec(
2164
+ active_deadline_seconds=deadline_seconds,
2165
+ ttl_seconds_after_finished=0, # remove job resource after completion
2166
+ template=pod_template,
2167
+ ),
2168
+ )
2169
+ complete_config.metadata.labels.update(additional_labels)
2170
+ except Exception as e:
2171
+ raise InvalidKubernetesConfig(e, self.get_service(), self.get_instance())
2172
+ log.debug(
2173
+ f"Complete configuration for job instance is: {complete_config}",
2174
+ )
2175
+ return complete_config
2176
+
2177
+ def format_kubernetes_app(self) -> Union[V1Deployment, V1StatefulSet]:
2178
+ """Create the configuration that will be passed to the Kubernetes REST API."""
2179
+
2180
+ try:
2181
+ system_paasta_config = load_system_paasta_config()
2182
+ docker_url = self.get_docker_url()
2183
+ git_sha = get_git_sha_from_dockerurl(docker_url, long=True)
2184
+ complete_config: Union[V1StatefulSet, V1Deployment]
2185
+ if self.get_persistent_volumes():
2186
+ complete_config = V1StatefulSet(
2187
+ api_version="apps/v1",
2188
+ kind="StatefulSet",
2189
+ metadata=self.get_kubernetes_metadata(git_sha),
2190
+ spec=V1StatefulSetSpec(
2191
+ service_name=self.get_sanitised_deployment_name(),
2192
+ volume_claim_templates=self.get_volume_claim_templates(),
2193
+ replicas=self.get_desired_instances(),
2194
+ revision_history_limit=0,
2195
+ selector=V1LabelSelector(
2196
+ match_labels={
2197
+ "paasta.yelp.com/service": self.get_service(),
2198
+ "paasta.yelp.com/instance": self.get_instance(),
2199
+ }
2200
+ ),
2201
+ template=self.get_pod_template_spec(
2202
+ git_sha=git_sha, system_paasta_config=system_paasta_config
2203
+ ),
2204
+ pod_management_policy=self.get_pod_management_policy(),
2205
+ ),
2206
+ )
2207
+ else:
2208
+ complete_config = V1Deployment(
2209
+ api_version="apps/v1",
2210
+ kind="Deployment",
2211
+ metadata=self.get_kubernetes_metadata(git_sha),
2212
+ spec=V1DeploymentSpec(
2213
+ replicas=self.get_desired_instances(),
2214
+ min_ready_seconds=self.get_min_task_uptime(),
2215
+ selector=V1LabelSelector(
2216
+ match_labels={
2217
+ "paasta.yelp.com/service": self.get_service(),
2218
+ "paasta.yelp.com/instance": self.get_instance(),
2219
+ }
2220
+ ),
2221
+ revision_history_limit=0,
2222
+ template=self.get_pod_template_spec(
2223
+ git_sha=git_sha, system_paasta_config=system_paasta_config
2224
+ ),
2225
+ strategy=self.get_deployment_strategy_config(),
2226
+ ),
2227
+ )
2228
+
2229
+ prometheus_shard = self.get_prometheus_shard()
2230
+ if prometheus_shard:
2231
+ complete_config.metadata.labels[
2232
+ "paasta.yelp.com/prometheus_shard"
2233
+ ] = prometheus_shard
2234
+
2235
+ image_version = self.get_image_version()
2236
+ if image_version is not None:
2237
+ complete_config.metadata.labels[
2238
+ "paasta.yelp.com/image_version"
2239
+ ] = image_version
2240
+
2241
+ # DO NOT ADD LABELS AFTER THIS LINE
2242
+ config_hash = get_config_hash(
2243
+ self.sanitize_for_config_hash(complete_config),
2244
+ force_bounce=self.get_force_bounce(),
2245
+ )
2246
+ complete_config.metadata.labels["yelp.com/paasta_config_sha"] = config_hash
2247
+ complete_config.metadata.labels["paasta.yelp.com/config_sha"] = config_hash
2248
+
2249
+ complete_config.spec.template.metadata.labels[
2250
+ "yelp.com/paasta_config_sha"
2251
+ ] = config_hash
2252
+ complete_config.spec.template.metadata.labels[
2253
+ "paasta.yelp.com/config_sha"
2254
+ ] = config_hash
2255
+ except Exception as e:
2256
+ raise InvalidKubernetesConfig(e, self.get_service(), self.get_instance())
2257
+ log.debug("Complete configuration for instance is: %s", complete_config)
2258
+ return complete_config
2259
+
2260
+ def get_kubernetes_service_account_name(self) -> Optional[str]:
2261
+ return self.config_dict.get("service_account_name", None)
2262
+
2263
+ def is_istio_sidecar_injection_enabled(self) -> bool:
2264
+ return self.config_dict.get("is_istio_sidecar_injection_enabled", False)
2265
+
2266
+ def has_routable_ip(
2267
+ self,
2268
+ service_namespace_config: ServiceNamespaceConfig,
2269
+ system_paasta_config: SystemPaastaConfig,
2270
+ ) -> str:
2271
+ """Return whether the routable_ip label should be true or false.
2272
+
2273
+ Services with a `prometheus_port` defined or that use certain sidecars must have a routable IP
2274
+ address to allow Prometheus shards to scrape metrics.
2275
+ """
2276
+ if (
2277
+ self.config_dict.get("routable_ip", False)
2278
+ or service_namespace_config.is_in_smartstack()
2279
+ or self.get_prometheus_port() is not None
2280
+ or self.should_use_metrics_provider(METRICS_PROVIDER_UWSGI)
2281
+ or self.should_use_metrics_provider(METRICS_PROVIDER_GUNICORN)
2282
+ ):
2283
+ return "true"
2284
+ return "false"
2285
+
2286
+ def should_enable_aws_lb_readiness_gate(self) -> bool:
2287
+ return self.config_dict.get("enable_aws_lb_readiness_gate", False)
2288
+
2289
+ def get_pod_template_spec(
2290
+ self,
2291
+ git_sha: str,
2292
+ system_paasta_config: SystemPaastaConfig,
2293
+ restart_on_failure: bool = True,
2294
+ include_sidecars: bool = True,
2295
+ force_no_routable_ip: bool = False,
2296
+ ) -> V1PodTemplateSpec:
2297
+ service_namespace_config = load_service_namespace_config(
2298
+ service=self.service, namespace=self.get_nerve_namespace()
2299
+ )
2300
+ docker_volumes = self.get_volumes(
2301
+ system_volumes=system_paasta_config.get_volumes(),
2302
+ )
2303
+
2304
+ hacheck_sidecar_volumes = system_paasta_config.get_hacheck_sidecar_volumes()
2305
+ has_routable_ip = (
2306
+ "false"
2307
+ if force_no_routable_ip
2308
+ else self.has_routable_ip(service_namespace_config, system_paasta_config)
2309
+ )
2310
+ annotations: KubePodAnnotations = {
2311
+ "smartstack_registrations": json.dumps(self.get_registrations()),
2312
+ "paasta.yelp.com/routable_ip": has_routable_ip,
2313
+ }
2314
+
2315
+ # The HPAMetrics collector needs these annotations to tell it to pull
2316
+ # metrics from these pods
2317
+ # TODO: see if we can remove this as we're no longer using sfx data to scale
2318
+ if self.get_autoscaling_metrics_provider(METRICS_PROVIDER_UWSGI) is not None:
2319
+ annotations["autoscaling"] = METRICS_PROVIDER_UWSGI
2320
+
2321
+ pod_spec_kwargs = {}
2322
+ pod_spec_kwargs.update(system_paasta_config.get_pod_defaults())
2323
+ pod_spec_kwargs.update(
2324
+ service_account_name=self.get_kubernetes_service_account_name(),
2325
+ containers=self.get_kubernetes_containers(
2326
+ docker_volumes=docker_volumes,
2327
+ hacheck_sidecar_volumes=hacheck_sidecar_volumes,
2328
+ aws_ebs_volumes=self.get_aws_ebs_volumes(),
2329
+ secret_volumes=self.get_secret_volumes(),
2330
+ system_paasta_config=system_paasta_config,
2331
+ service_namespace_config=service_namespace_config,
2332
+ include_sidecars=include_sidecars,
2333
+ ),
2334
+ share_process_namespace=True,
2335
+ node_selector=self.get_node_selector(),
2336
+ restart_policy="Always" if restart_on_failure else "Never",
2337
+ volumes=self.get_pod_volumes(
2338
+ docker_volumes=docker_volumes + hacheck_sidecar_volumes,
2339
+ aws_ebs_volumes=self.get_aws_ebs_volumes(),
2340
+ secret_volumes=self.get_secret_volumes(),
2341
+ projected_sa_volumes=self.get_projected_sa_volumes(),
2342
+ ),
2343
+ )
2344
+ # need to check if there are node selectors/affinities. if there are none
2345
+ # and we create an empty affinity object, k8s will deselect all nodes.
2346
+ node_affinity = self.get_node_affinity(
2347
+ system_paasta_config.get_pool_node_affinities()
2348
+ )
2349
+ if node_affinity is not None:
2350
+ pod_spec_kwargs["affinity"] = V1Affinity(node_affinity=node_affinity)
2351
+
2352
+ pod_anti_affinity = self.get_pod_anti_affinity()
2353
+ if pod_anti_affinity is not None:
2354
+ affinity = pod_spec_kwargs.get("affinity", V1Affinity())
2355
+ affinity.pod_anti_affinity = pod_anti_affinity
2356
+ pod_spec_kwargs["affinity"] = affinity
2357
+
2358
+ # PAASTA-17941: Allow configuring topology spread constraints per cluster
2359
+ pod_topology_spread_constraints = create_pod_topology_spread_constraints(
2360
+ service=self.get_service(),
2361
+ instance=self.get_instance(),
2362
+ topology_spread_constraints=self.get_topology_spread_constraints(
2363
+ system_paasta_config.get_topology_spread_constraints()
2364
+ ),
2365
+ )
2366
+ if pod_topology_spread_constraints:
2367
+ constraints = pod_spec_kwargs.get("topology_spread_constraints", [])
2368
+ constraints += pod_topology_spread_constraints
2369
+ pod_spec_kwargs["topology_spread_constraints"] = constraints
2370
+
2371
+ termination_grace_period = self.get_termination_grace_period(
2372
+ service_namespace_config
2373
+ )
2374
+ if termination_grace_period is not None:
2375
+ pod_spec_kwargs[
2376
+ "termination_grace_period_seconds"
2377
+ ] = termination_grace_period
2378
+
2379
+ fs_group = self.get_fs_group()
2380
+
2381
+ if self.get_iam_role_provider() == "aws":
2382
+ annotations["iam.amazonaws.com/role"] = ""
2383
+ iam_role = self.get_iam_role()
2384
+ if iam_role:
2385
+ pod_spec_kwargs["service_account_name"] = get_service_account_name(
2386
+ iam_role
2387
+ )
2388
+ if fs_group is None:
2389
+ # We need some reasoable default for group id of a process
2390
+ # running inside the container. Seems like most of such
2391
+ # programs run as `nobody`, let's use that as a default.
2392
+ #
2393
+ # PAASTA-16919: This should be removed when
2394
+ # https://github.com/aws/amazon-eks-pod-identity-webhook/issues/8
2395
+ # is fixed.
2396
+ fs_group = 65534
2397
+ else:
2398
+ annotations["iam.amazonaws.com/role"] = self.get_iam_role()
2399
+
2400
+ if fs_group is not None:
2401
+ pod_spec_kwargs["security_context"] = V1PodSecurityContext(
2402
+ fs_group=fs_group
2403
+ )
2404
+
2405
+ # prometheus_path is used to override the default scrape path in Prometheus
2406
+ prometheus_path = self.get_prometheus_path()
2407
+ if prometheus_path:
2408
+ annotations["paasta.yelp.com/prometheus_path"] = prometheus_path
2409
+
2410
+ # prometheus_port is used to override the default scrape port in Prometheus
2411
+ prometheus_port = self.get_prometheus_port()
2412
+ if prometheus_port:
2413
+ annotations["paasta.yelp.com/prometheus_port"] = str(prometheus_port)
2414
+
2415
+ # Default Pod labels
2416
+ labels: KubePodLabels = {
2417
+ "yelp.com/paasta_service": self.get_service(),
2418
+ "yelp.com/paasta_instance": self.get_instance(),
2419
+ "yelp.com/paasta_git_sha": git_sha,
2420
+ # NOTE: we can't use the paasta_prefixed() helper here
2421
+ # since mypy expects TypedDict keys to be string literals
2422
+ "paasta.yelp.com/service": self.get_service(),
2423
+ "paasta.yelp.com/instance": self.get_instance(),
2424
+ "paasta.yelp.com/git_sha": git_sha,
2425
+ "paasta.yelp.com/autoscaled": str(self.is_autoscaling_enabled()).lower(),
2426
+ "paasta.yelp.com/pool": self.get_pool(),
2427
+ "paasta.yelp.com/cluster": self.cluster,
2428
+ "yelp.com/owner": "compute_infra_platform_experience",
2429
+ "paasta.yelp.com/managed": "true",
2430
+ }
2431
+ if service_namespace_config.is_in_smartstack():
2432
+ labels["paasta.yelp.com/weight"] = str(self.get_weight())
2433
+
2434
+ # Allow the Prometheus Operator's Pod Service Monitor for specified
2435
+ # shard to find this pod
2436
+ prometheus_shard = self.get_prometheus_shard()
2437
+ if prometheus_shard:
2438
+ labels["paasta.yelp.com/prometheus_shard"] = prometheus_shard
2439
+
2440
+ image_version = self.get_image_version()
2441
+ if image_version is not None:
2442
+ labels["paasta.yelp.com/image_version"] = image_version
2443
+
2444
+ if system_paasta_config.get_kubernetes_add_registration_labels():
2445
+ # Allow Kubernetes Services to easily find
2446
+ # pods belonging to a certain smartstack namespace
2447
+ for registration in self.get_registrations():
2448
+ labels[registration_label(registration)] = "true" # type: ignore
2449
+
2450
+ if self.is_istio_sidecar_injection_enabled():
2451
+ labels["sidecar.istio.io/inject"] = "true"
2452
+
2453
+ # not all services use autoscaling, so we label those that do in order to have
2454
+ # prometheus selectively discover/scrape them
2455
+ if self.should_use_metrics_provider(METRICS_PROVIDER_UWSGI):
2456
+ # UWSGI no longer needs a label to indicate it needs to be scraped as all pods are checked for the uwsgi stats port by our centralized uwsgi-exporter
2457
+ # But we do still need deploy_group for relabeling properly
2458
+ # this should probably eventually be made into a default label,
2459
+ # but for now we're fine with it being behind these feature toggles.
2460
+ # ideally, we'd also have the docker image here for ease-of-use
2461
+ # in Prometheus relabeling, but that information is over the
2462
+ # character limit for k8s labels (63 chars)
2463
+ labels["paasta.yelp.com/deploy_group"] = self.get_deploy_group()
2464
+
2465
+ elif self.should_use_metrics_provider(METRICS_PROVIDER_PISCINA):
2466
+ labels["paasta.yelp.com/deploy_group"] = self.get_deploy_group()
2467
+ labels["paasta.yelp.com/scrape_piscina_prometheus"] = "true"
2468
+
2469
+ elif self.should_use_metrics_provider(METRICS_PROVIDER_GUNICORN):
2470
+ labels["paasta.yelp.com/deploy_group"] = self.get_deploy_group()
2471
+ labels["paasta.yelp.com/scrape_gunicorn_prometheus"] = "true"
2472
+
2473
+ # the default AWS LB Controller behavior is to enable this by-namespace
2474
+ # ...but that's kinda annoying to do in a toggleable way - so let's instead
2475
+ # toggle based on pod labels (which of course, will require changing the controller
2476
+ # settings :p)
2477
+ if self.should_enable_aws_lb_readiness_gate():
2478
+ labels["elbv2.k8s.aws/pod-readiness-gate-inject"] = "enabled"
2479
+
2480
+ return V1PodTemplateSpec(
2481
+ metadata=V1ObjectMeta(
2482
+ labels=labels,
2483
+ annotations=annotations,
2484
+ ),
2485
+ spec=V1PodSpec(**pod_spec_kwargs),
2486
+ )
2487
+
2488
+ def get_node_selector(self) -> Mapping[str, str]:
2489
+ """Converts simple node restrictions into node selectors. Unlike node
2490
+ affinities, selectors will show up in `kubectl describe`.
2491
+ """
2492
+ raw_selectors: Mapping[str, Any] = self.config_dict.get("node_selectors", {})
2493
+ node_selectors = {
2494
+ to_node_label(label): value
2495
+ for label, value in raw_selectors.items()
2496
+ if type(value) is str
2497
+ }
2498
+ node_selectors["yelp.com/pool"] = self.get_pool()
2499
+ return node_selectors
2500
+
2501
+ def get_node_affinity(
2502
+ self, pool_node_affinities: Dict[str, Dict[str, List[str]]] = None
2503
+ ) -> Optional[V1NodeAffinity]:
2504
+ """Converts deploy_whitelist and deploy_blacklist in node affinities.
2505
+
2506
+ note: At the time of writing, `kubectl describe` does not show affinities,
2507
+ only selectors. To see affinities, use `kubectl get pod -o json` instead.
2508
+ """
2509
+ requirements = allowlist_denylist_to_requirements(
2510
+ allowlist=self.get_deploy_whitelist(),
2511
+ denylist=self.get_deploy_blacklist(),
2512
+ )
2513
+ node_selectors = self.config_dict.get("node_selectors", {})
2514
+ requirements.extend(
2515
+ raw_selectors_to_requirements(
2516
+ raw_selectors=node_selectors,
2517
+ )
2518
+ )
2519
+
2520
+ # PAASTA-18198: To improve AZ balance with Karpenter, we temporarily allow specifying zone affinities per pool
2521
+ if pool_node_affinities and self.get_pool() in pool_node_affinities:
2522
+ current_pool_node_affinities = pool_node_affinities[self.get_pool()]
2523
+ # If the service already has a node selector for a zone, we don't want to override it
2524
+ if current_pool_node_affinities and not contains_zone_label(node_selectors):
2525
+ requirements.extend(
2526
+ raw_selectors_to_requirements(
2527
+ raw_selectors=current_pool_node_affinities,
2528
+ )
2529
+ )
2530
+
2531
+ preferred_terms = []
2532
+ for node_selectors_prefered_config_dict in self.config_dict.get(
2533
+ "node_selectors_preferred", []
2534
+ ):
2535
+ preferred_terms.append(
2536
+ V1PreferredSchedulingTerm(
2537
+ weight=node_selectors_prefered_config_dict["weight"],
2538
+ preference=V1NodeSelectorTerm(
2539
+ match_expressions=[
2540
+ V1NodeSelectorRequirement(
2541
+ key=key,
2542
+ operator=op,
2543
+ values=vs,
2544
+ )
2545
+ for key, op, vs in raw_selectors_to_requirements(
2546
+ raw_selectors=node_selectors_prefered_config_dict[
2547
+ "preferences"
2548
+ ]
2549
+ )
2550
+ ]
2551
+ ),
2552
+ )
2553
+ )
2554
+
2555
+ # package everything into a node affinity - lots of layers :P
2556
+ if len(requirements) == 0 and len(preferred_terms) == 0:
2557
+ return None
2558
+
2559
+ required_term = (
2560
+ V1NodeSelectorTerm(
2561
+ match_expressions=[
2562
+ V1NodeSelectorRequirement(
2563
+ key=key,
2564
+ operator=op,
2565
+ values=vs,
2566
+ )
2567
+ for key, op, vs in requirements
2568
+ ]
2569
+ )
2570
+ if requirements
2571
+ else None
2572
+ )
2573
+
2574
+ if not preferred_terms:
2575
+ preferred_terms = None
2576
+
2577
+ return V1NodeAffinity(
2578
+ required_during_scheduling_ignored_during_execution=(
2579
+ V1NodeSelector(node_selector_terms=[required_term])
2580
+ if required_term
2581
+ else None
2582
+ ),
2583
+ preferred_during_scheduling_ignored_during_execution=preferred_terms,
2584
+ )
2585
+
2586
+ def get_pod_required_anti_affinity_terms(
2587
+ self,
2588
+ ) -> Optional[List[V1PodAffinityTerm]]:
2589
+ conditions = self.config_dict.get("anti_affinity", [])
2590
+ if not conditions:
2591
+ return None
2592
+
2593
+ if not isinstance(conditions, list):
2594
+ conditions = [conditions]
2595
+
2596
+ affinity_terms = []
2597
+ for condition in conditions:
2598
+ label_selector = self._kube_affinity_condition_to_label_selector(condition)
2599
+ if label_selector:
2600
+ affinity_terms.append(
2601
+ V1PodAffinityTerm(
2602
+ # Topology of a hostname means the pod of this service
2603
+ # cannot be scheduled on host containing another pod
2604
+ # matching the label_selector
2605
+ topology_key="kubernetes.io/hostname",
2606
+ label_selector=label_selector,
2607
+ )
2608
+ )
2609
+ return affinity_terms
2610
+
2611
+ def get_pod_preferred_anti_affinity_terms(
2612
+ self,
2613
+ ) -> Optional[List[V1WeightedPodAffinityTerm]]:
2614
+ conditions = self.config_dict.get("anti_affinity_preferred", [])
2615
+ if not conditions:
2616
+ return None
2617
+
2618
+ if not isinstance(conditions, list):
2619
+ conditions = [conditions]
2620
+
2621
+ affinity_terms = []
2622
+ for condition in conditions:
2623
+ label_selector = self._kube_affinity_condition_to_label_selector(condition)
2624
+ if label_selector:
2625
+ affinity_terms.append(
2626
+ V1WeightedPodAffinityTerm(
2627
+ # Topology of a hostname means the pod of this service
2628
+ # cannot be scheduled on host containing another pod
2629
+ # matching the label_selector
2630
+ topology_key="kubernetes.io/hostname",
2631
+ label_selector=label_selector,
2632
+ weight=condition["weight"],
2633
+ )
2634
+ )
2635
+ return affinity_terms
2636
+
2637
+ def get_pod_anti_affinity(self) -> Optional[V1PodAntiAffinity]:
2638
+ """
2639
+ Converts the given anti-affinity on service and instance to pod
2640
+ affinities with the "paasta.yelp.com" prefixed label selector
2641
+ :return:
2642
+ """
2643
+
2644
+ required_terms = self.get_pod_required_anti_affinity_terms()
2645
+ preferred_terms = self.get_pod_preferred_anti_affinity_terms()
2646
+
2647
+ if required_terms is None and preferred_terms is None:
2648
+ return None
2649
+
2650
+ return V1PodAntiAffinity(
2651
+ required_during_scheduling_ignored_during_execution=required_terms,
2652
+ preferred_during_scheduling_ignored_during_execution=preferred_terms,
2653
+ )
2654
+
2655
+ def _kube_affinity_condition_to_label_selector(
2656
+ self, condition: KubeAffinityCondition
2657
+ ) -> Optional[V1LabelSelector]:
2658
+ """Converts the given condition to label selectors with paasta prefix"""
2659
+ labels = {}
2660
+ if "service" in condition:
2661
+ labels[PAASTA_ATTRIBUTE_PREFIX + "service"] = condition.get("service")
2662
+ if "instance" in condition:
2663
+ labels[PAASTA_ATTRIBUTE_PREFIX + "instance"] = condition.get("instance")
2664
+ return V1LabelSelector(match_labels=labels) if labels else None
2665
+
2666
+ def sanitize_for_config_hash(
2667
+ self, config: Union[V1Deployment, V1StatefulSet]
2668
+ ) -> Mapping[str, Any]:
2669
+ """Removes some data from config to make it suitable for
2670
+ calculation of config hash.
2671
+
2672
+ :param config: complete_config hash to sanitise
2673
+ :returns: sanitised copy of complete_config hash
2674
+ """
2675
+ ahash = config.to_dict() # deep convert to dict
2676
+ ahash["paasta_secrets"] = get_kubernetes_secret_hashes(
2677
+ service=self.get_service(),
2678
+ environment_variables=self.get_env(),
2679
+ namespace=self.get_namespace(),
2680
+ )
2681
+
2682
+ # remove data we dont want used to hash configs
2683
+ # replica count
2684
+ if ahash["spec"] is not None:
2685
+ ahash["spec"].pop("replicas", None)
2686
+
2687
+ if ahash["metadata"] is not None:
2688
+ ahash["metadata"]["namespace"] = None
2689
+
2690
+ # soa-configs SHA
2691
+ try:
2692
+ for container in ahash["spec"]["template"]["spec"]["containers"]:
2693
+ container["env"] = [
2694
+ e
2695
+ for e in container["env"]
2696
+ if e.get("name", "") != "PAASTA_SOA_CONFIGS_SHA"
2697
+ ]
2698
+ except TypeError: # any of the values can be None
2699
+ pass
2700
+
2701
+ return ahash
2702
+
2703
+ def get_termination_grace_period(
2704
+ self, service_namespace_config: ServiceNamespaceConfig
2705
+ ) -> Optional[int]:
2706
+ """Return the number of seconds that kubernetes should wait for pre-stop hooks to finish (or for the main
2707
+ process to exit after signaling) before forcefully terminating the pod.
2708
+
2709
+ For smartstack services, defaults to a value long enough to allow the default pre-stop hook to finish.
2710
+ For non-smartstack services, defaults to None (kubernetes default of 30s).
2711
+ """
2712
+
2713
+ if service_namespace_config.is_in_smartstack():
2714
+ default = self.get_hacheck_prestop_sleep_seconds() + 1
2715
+ if self.get_pre_stop_wait_for_connections_to_complete(
2716
+ service_namespace_config
2717
+ ):
2718
+ # If the max timeout is more than 30 minutes, cap it to 30 minutes.
2719
+ # Most services with ultra-long timeouts are probably able to handle SIGTERM gracefully anyway.
2720
+ default += int(
2721
+ math.ceil(
2722
+ min(
2723
+ 1800,
2724
+ service_namespace_config.get_longest_timeout_ms() / 1000,
2725
+ )
2726
+ )
2727
+ )
2728
+ else:
2729
+ default = None
2730
+
2731
+ return self.get_lifecycle_dict().get(
2732
+ "termination_grace_period_seconds", default
2733
+ )
2734
+
2735
+ def get_prometheus_shard(self) -> Optional[str]:
2736
+ return self.config_dict.get("prometheus_shard")
2737
+
2738
+ def get_prometheus_path(self) -> Optional[str]:
2739
+ return self.config_dict.get("prometheus_path")
2740
+
2741
+ def get_prometheus_port(self) -> Optional[int]:
2742
+ return self.config_dict.get("prometheus_port")
2743
+
2744
+ def get_topology_spread_constraints(
2745
+ self,
2746
+ default_pod_topology_spread_constraints: List[TopologySpreadConstraintDict],
2747
+ ) -> List[TopologySpreadConstraintDict]:
2748
+ return self.config_dict.get(
2749
+ "topology_spread_constraints", default_pod_topology_spread_constraints
2750
+ )
2751
+
2752
+ def get_projected_sa_volumes(self) -> List[ProjectedSAVolume]:
2753
+ return add_volumes_for_authenticating_services(
2754
+ service_name=self.service,
2755
+ config_volumes=super().get_projected_sa_volumes(),
2756
+ soa_dir=self.soa_dir,
2757
+ )
2758
+
2759
+
2760
+ def get_kubernetes_secret_hashes(
2761
+ environment_variables: Mapping[str, str], service: str, namespace: str
2762
+ ) -> Mapping[str, str]:
2763
+ hashes = {}
2764
+ to_get_hash = []
2765
+ for v in environment_variables.values():
2766
+ if is_secret_ref(v):
2767
+ to_get_hash.append(v)
2768
+ if to_get_hash:
2769
+ kube_client = KubeClient()
2770
+ for value in to_get_hash:
2771
+ hashes[value] = get_secret_signature(
2772
+ kube_client=kube_client,
2773
+ signature_name=get_paasta_secret_signature_name(
2774
+ namespace,
2775
+ SHARED_SECRET_SERVICE if is_shared_secret(value) else service,
2776
+ get_secret_name_from_ref(value),
2777
+ ),
2778
+ namespace=namespace,
2779
+ )
2780
+ return hashes
2781
+
2782
+
2783
+ def get_k8s_pods() -> Mapping[str, Any]:
2784
+ return requests.get("http://127.0.0.1:10255/pods").json()
2785
+
2786
+
2787
+ def get_all_kubernetes_services_running_here() -> List[Tuple[str, str, int]]:
2788
+ """Returns all k8s paasta services, even if not in smartstack. Returns a service, instance, port
2789
+ tuple to match the return value of other similar functions"""
2790
+ services = []
2791
+ try:
2792
+ pods = get_k8s_pods()
2793
+ except requests.exceptions.ConnectionError:
2794
+ log.debug("Failed to connect to the kublet when trying to get pods")
2795
+ return []
2796
+ for pod in pods["items"]:
2797
+ try:
2798
+ service = pod["metadata"]["labels"]["paasta.yelp.com/service"]
2799
+ instance = pod["metadata"]["labels"]["paasta.yelp.com/instance"]
2800
+ services.append((service, instance, 0))
2801
+ except KeyError:
2802
+ log.debug(f"Skipping listing what looks like a non-paasta pod: {pod}")
2803
+ return services
2804
+
2805
+
2806
+ def get_kubernetes_services_running_here(
2807
+ exclude_terminating: bool = False,
2808
+ ) -> Sequence[KubernetesServiceRegistration]:
2809
+ services = []
2810
+ pods = get_k8s_pods()
2811
+ for pod in pods["items"]:
2812
+ if (
2813
+ pod["status"]["phase"] != "Running"
2814
+ or "smartstack_registrations" not in pod["metadata"].get("annotations", {})
2815
+ or (exclude_terminating and pod["metadata"].get("deletionTimestamp"))
2816
+ ):
2817
+ continue
2818
+ try:
2819
+ port = None
2820
+ for container in pod["spec"]["containers"]:
2821
+ if container["name"] != HACHECK_POD_NAME:
2822
+ port = container["ports"][0]["containerPort"]
2823
+ break
2824
+
2825
+ try:
2826
+ weight = int(pod["metadata"]["labels"]["paasta.yelp.com/weight"])
2827
+ except (KeyError, ValueError):
2828
+ weight = 10
2829
+
2830
+ services.append(
2831
+ KubernetesServiceRegistration(
2832
+ name=pod["metadata"]["labels"]["paasta.yelp.com/service"],
2833
+ instance=pod["metadata"]["labels"]["paasta.yelp.com/instance"],
2834
+ port=port,
2835
+ pod_ip=pod["status"]["podIP"],
2836
+ registrations=json.loads(
2837
+ pod["metadata"]["annotations"]["smartstack_registrations"]
2838
+ ),
2839
+ weight=weight,
2840
+ )
2841
+ )
2842
+ except KeyError as e:
2843
+ log.warning(
2844
+ f"Found running paasta pod but missing {e} key so not registering with nerve"
2845
+ )
2846
+ return services
2847
+
2848
+
2849
+ def get_kubernetes_services_running_here_for_nerve(
2850
+ cluster: Optional[str], soa_dir: str
2851
+ ) -> List[Tuple[str, ServiceNamespaceConfig]]:
2852
+ try:
2853
+ system_paasta_config = load_system_paasta_config()
2854
+ if not cluster:
2855
+ cluster = system_paasta_config.get_cluster()
2856
+ # In the cases where there is *no* cluster or in the case
2857
+ # where there isn't a Paasta configuration file at *all*, then
2858
+ # there must be no kubernetes services running here, so we catch
2859
+ # these custom exceptions and return [].
2860
+ if not system_paasta_config.get_register_k8s_pods():
2861
+ return []
2862
+ exclude_terminating = (
2863
+ not system_paasta_config.get_nerve_register_k8s_terminating()
2864
+ )
2865
+
2866
+ except PaastaNotConfiguredError:
2867
+ log.warning("No PaaSTA config so skipping registering k8s pods in nerve")
2868
+ return []
2869
+ kubernetes_services = get_kubernetes_services_running_here(
2870
+ exclude_terminating=exclude_terminating
2871
+ )
2872
+ nerve_list = []
2873
+ for kubernetes_service in kubernetes_services:
2874
+ try:
2875
+ for registration in kubernetes_service.registrations:
2876
+ reg_service, reg_namespace, _, __ = decompose_job_id(registration)
2877
+ try:
2878
+ nerve_dict = load_service_namespace_config(
2879
+ service=reg_service, namespace=reg_namespace, soa_dir=soa_dir
2880
+ )
2881
+ except Exception as e:
2882
+ log.warning(str(e))
2883
+ log.warning(
2884
+ f"Could not get smartstack config for {reg_service}.{reg_namespace}, skipping"
2885
+ )
2886
+ # but the show must go on!
2887
+ continue
2888
+ if not nerve_dict.is_in_smartstack():
2889
+ continue
2890
+ nerve_dict["port"] = kubernetes_service.port
2891
+ nerve_dict["service_ip"] = kubernetes_service.pod_ip
2892
+ if system_paasta_config.get_kubernetes_use_hacheck_sidecar():
2893
+ nerve_dict["hacheck_ip"] = kubernetes_service.pod_ip
2894
+ else:
2895
+ nerve_dict["extra_healthcheck_headers"] = {
2896
+ "X-Nerve-Check-IP": kubernetes_service.pod_ip
2897
+ }
2898
+ nerve_dict["weight"] = kubernetes_service.weight
2899
+ nerve_list.append((registration, nerve_dict))
2900
+ except KeyError:
2901
+ continue # SOA configs got deleted for this app, it'll get cleaned up
2902
+
2903
+ return nerve_list
2904
+
2905
+
2906
+ def force_delete_pods(
2907
+ service: str,
2908
+ paasta_service: str,
2909
+ instance: str,
2910
+ namespace: str,
2911
+ kube_client: KubeClient,
2912
+ ) -> None:
2913
+ # Note that KubeClient.deployments.delete_namespaced_deployment must be called prior to this method.
2914
+ pods_to_delete = a_sync.block(
2915
+ pods_for_service_instance,
2916
+ paasta_service,
2917
+ instance,
2918
+ kube_client,
2919
+ namespace=namespace,
2920
+ )
2921
+ delete_options = V1DeleteOptions()
2922
+ for pod in pods_to_delete:
2923
+ kube_client.core.delete_namespaced_pod(
2924
+ pod.metadata.name, namespace, body=delete_options, grace_period_seconds=0
2925
+ )
2926
+
2927
+
2928
+ @time_cache(ttl=60)
2929
+ def get_all_namespaces(
2930
+ kube_client: KubeClient, label_selector: Optional[str] = None
2931
+ ) -> List[str]:
2932
+ namespaces = kube_client.core.list_namespace(label_selector=label_selector)
2933
+ return [item.metadata.name for item in namespaces.items]
2934
+
2935
+
2936
+ def get_all_managed_namespaces(kube_client: KubeClient) -> List[str]:
2937
+ return get_all_namespaces(
2938
+ kube_client=kube_client, label_selector=f"{paasta_prefixed('managed')}=true"
2939
+ )
2940
+
2941
+
2942
+ def get_matching_namespaces(
2943
+ all_namespaces: Iterable[str],
2944
+ namespace_prefix: Optional[str],
2945
+ additional_namespaces: Container[str],
2946
+ ) -> List[str]:
2947
+ return [
2948
+ n
2949
+ for n in all_namespaces
2950
+ if (namespace_prefix is not None and n.startswith(namespace_prefix))
2951
+ or n in additional_namespaces
2952
+ ]
2953
+
2954
+
2955
+ @functools.lru_cache()
2956
+ def ensure_namespace(kube_client: KubeClient, namespace: str) -> None:
2957
+ paasta_namespace = V1Namespace(
2958
+ metadata=V1ObjectMeta(
2959
+ name=namespace,
2960
+ labels={
2961
+ "name": namespace,
2962
+ paasta_prefixed("owner"): "compute_infra_platform_experience",
2963
+ paasta_prefixed("managed"): "true",
2964
+ },
2965
+ )
2966
+ )
2967
+ namespace_names = get_all_namespaces(kube_client)
2968
+ if namespace not in namespace_names:
2969
+ log.warning(f"Creating namespace: {namespace} as it does not exist")
2970
+ try:
2971
+ kube_client.core.create_namespace(body=paasta_namespace)
2972
+ except ApiException as e:
2973
+ if e.status == 409:
2974
+ log.warning(
2975
+ "Got HTTP 409 when creating namespace; it must already exist. Continuing."
2976
+ )
2977
+ else:
2978
+ raise
2979
+
2980
+ ensure_paasta_api_rolebinding(kube_client, namespace)
2981
+ ensure_paasta_namespace_limits(kube_client, namespace)
2982
+
2983
+
2984
+ def ensure_paasta_api_rolebinding(kube_client: KubeClient, namespace: str) -> None:
2985
+ rolebindings = get_all_role_bindings(kube_client, namespace=namespace)
2986
+ rolebinding_names = [item.metadata.name for item in rolebindings]
2987
+ if "paasta-api-server-per-namespace" not in rolebinding_names:
2988
+ log.warning(
2989
+ f"Creating rolebinding paasta-api-server-per-namespace on {namespace} namespace as it does not exist"
2990
+ )
2991
+ role_binding = V1RoleBinding(
2992
+ metadata=V1ObjectMeta(
2993
+ name="paasta-api-server-per-namespace",
2994
+ namespace=namespace,
2995
+ ),
2996
+ role_ref=V1RoleRef(
2997
+ api_group="rbac.authorization.k8s.io",
2998
+ kind="ClusterRole",
2999
+ name="paasta-api-server-per-namespace",
3000
+ ),
3001
+ subjects=[
3002
+ V1Subject(
3003
+ kind="User",
3004
+ name="yelp.com/paasta-api-server",
3005
+ ),
3006
+ ],
3007
+ )
3008
+ kube_client.rbac.create_namespaced_role_binding(
3009
+ namespace=namespace, body=role_binding
3010
+ )
3011
+
3012
+
3013
+ def ensure_paasta_namespace_limits(kube_client: KubeClient, namespace: str) -> None:
3014
+ if not namespace.startswith("paastasvc-"):
3015
+ log.debug(
3016
+ f"Not creating LimitRange because {namespace} does not start with paastasvc-"
3017
+ )
3018
+ return
3019
+
3020
+ limits = get_all_limit_ranges(kube_client, namespace=namespace)
3021
+ limits_names = {item.metadata.name for item in limits}
3022
+ if "limit-mem-cpu-disk-per-container" not in limits_names:
3023
+ log.warning(
3024
+ f"Creating limit: limit-mem-cpu-disk-per-container on {namespace} namespace as it does not exist"
3025
+ )
3026
+ limit = V1LimitRange(
3027
+ metadata=V1ObjectMeta(
3028
+ name="limit-mem-cpu-disk-per-container",
3029
+ namespace=namespace,
3030
+ ),
3031
+ spec=V1LimitRangeSpec(
3032
+ limits=[
3033
+ V1LimitRangeItem(
3034
+ type="Container",
3035
+ default={
3036
+ "cpu": "1",
3037
+ "memory": "1024Mi",
3038
+ "ephemeral-storage": "1Gi",
3039
+ },
3040
+ default_request={
3041
+ "cpu": "1",
3042
+ "memory": "1024Mi",
3043
+ "ephemeral-storage": "1Gi",
3044
+ },
3045
+ )
3046
+ ]
3047
+ ),
3048
+ )
3049
+ kube_client.core.create_namespaced_limit_range(namespace=namespace, body=limit)
3050
+
3051
+
3052
+ def list_deployments_in_all_namespaces(
3053
+ kube_client: KubeClient, label_selector: str
3054
+ ) -> List[KubeDeployment]:
3055
+ deployments = kube_client.deployments.list_deployment_for_all_namespaces(
3056
+ label_selector=label_selector
3057
+ )
3058
+ stateful_sets = kube_client.deployments.list_stateful_set_for_all_namespaces(
3059
+ label_selector=label_selector
3060
+ )
3061
+ return [
3062
+ KubeDeployment(
3063
+ service=item.metadata.labels["paasta.yelp.com/service"],
3064
+ instance=item.metadata.labels["paasta.yelp.com/instance"],
3065
+ git_sha=item.metadata.labels.get("paasta.yelp.com/git_sha", ""),
3066
+ image_version=item.metadata.labels.get(
3067
+ "paasta.yelp.com/image_version", None
3068
+ ),
3069
+ namespace=item.metadata.namespace,
3070
+ config_sha=item.metadata.labels.get("paasta.yelp.com/config_sha", ""),
3071
+ replicas=(
3072
+ item.spec.replicas
3073
+ if item.metadata.labels.get(paasta_prefixed("autoscaled"), "false")
3074
+ == "false"
3075
+ else None
3076
+ ),
3077
+ )
3078
+ for item in deployments.items + stateful_sets.items
3079
+ ]
3080
+
3081
+
3082
+ def list_deployments(
3083
+ kube_client: KubeClient,
3084
+ *,
3085
+ namespace: str,
3086
+ label_selector: str = "",
3087
+ ) -> Sequence[KubeDeployment]:
3088
+
3089
+ deployments = kube_client.deployments.list_namespaced_deployment(
3090
+ namespace=namespace, label_selector=label_selector
3091
+ )
3092
+ stateful_sets = kube_client.deployments.list_namespaced_stateful_set(
3093
+ namespace=namespace, label_selector=label_selector
3094
+ )
3095
+ return [
3096
+ KubeDeployment(
3097
+ service=item.metadata.labels["paasta.yelp.com/service"],
3098
+ instance=item.metadata.labels["paasta.yelp.com/instance"],
3099
+ git_sha=item.metadata.labels.get("paasta.yelp.com/git_sha", ""),
3100
+ image_version=item.metadata.labels.get(
3101
+ "paasta.yelp.com/image_version", None
3102
+ ),
3103
+ namespace=item.metadata.namespace,
3104
+ config_sha=item.metadata.labels["paasta.yelp.com/config_sha"],
3105
+ replicas=(
3106
+ item.spec.replicas
3107
+ if item.metadata.labels.get(paasta_prefixed("autoscaled"), "false")
3108
+ == "false"
3109
+ else None
3110
+ ),
3111
+ )
3112
+ for item in deployments.items + stateful_sets.items
3113
+ ]
3114
+
3115
+
3116
+ def list_deployments_in_managed_namespaces(
3117
+ kube_client: KubeClient,
3118
+ label_selector: str,
3119
+ ) -> List[KubeDeployment]:
3120
+ ret: List[KubeDeployment] = []
3121
+ for namespace in get_all_managed_namespaces(kube_client):
3122
+ try:
3123
+ ret.extend(
3124
+ list_deployments(
3125
+ kube_client=kube_client,
3126
+ label_selector=label_selector,
3127
+ namespace=namespace,
3128
+ )
3129
+ )
3130
+ except ApiException as exc:
3131
+ log.error(
3132
+ f"Error fetching deployments from namespace {namespace}: "
3133
+ f"status: {exc.status}, reason: {exc.reason}."
3134
+ )
3135
+ return ret
3136
+
3137
+
3138
+ def recent_container_restart(
3139
+ restart_count: int,
3140
+ last_state: Optional[str],
3141
+ last_timestamp: Optional[int],
3142
+ time_window_s: int = 900, # 15 mins
3143
+ ) -> bool:
3144
+ min_timestamp = datetime.now(timezone.utc).timestamp() - time_window_s
3145
+ return (
3146
+ restart_count > 0
3147
+ and last_state == "terminated"
3148
+ and last_timestamp is not None
3149
+ and last_timestamp > min_timestamp
3150
+ )
3151
+
3152
+
3153
+ @async_timeout()
3154
+ async def get_tail_lines_for_kubernetes_container(
3155
+ kube_client: KubeClient,
3156
+ pod: V1Pod,
3157
+ container: V1ContainerStatus,
3158
+ num_tail_lines: int,
3159
+ previous: bool = False,
3160
+ ) -> MutableMapping[str, Any]:
3161
+ tail_lines: MutableMapping[str, Any] = {
3162
+ "stdout": [],
3163
+ "stderr": [],
3164
+ "error_message": "",
3165
+ }
3166
+
3167
+ if container.name != HACHECK_POD_NAME:
3168
+ error = ""
3169
+ if container.state.waiting:
3170
+ error = container.state.waiting.message or ""
3171
+ elif container.state.terminated:
3172
+ error = container.state.terminated.message or ""
3173
+ tail_lines["error_message"] = error
3174
+
3175
+ try:
3176
+ if num_tail_lines > 0:
3177
+ log = kube_client.core.read_namespaced_pod_log(
3178
+ name=pod.metadata.name,
3179
+ namespace=pod.metadata.namespace,
3180
+ container=container.name,
3181
+ tail_lines=num_tail_lines,
3182
+ previous=previous,
3183
+ )
3184
+ tail_lines["stdout"].extend(log.split("\n"))
3185
+ except ApiException as e:
3186
+ # there is a potential race condition in which a pod's containers
3187
+ # have not failed, but have when we get the container's logs. in this
3188
+ # case, use the error from the exception, though it is less accurate.
3189
+ if error == "":
3190
+ body = json.loads(e.body)
3191
+ error = body.get("message", "")
3192
+ tail_lines["error_message"] = f"couldn't read stdout/stderr: '{error}'"
3193
+
3194
+ return tail_lines
3195
+
3196
+
3197
+ async def get_pod_event_messages(
3198
+ kube_client: KubeClient, pod: V1Pod, max_age_in_seconds: Optional[int] = None
3199
+ ) -> List[Dict]:
3200
+ pod_events = await get_events_for_object(
3201
+ kube_client, pod, "Pod", max_age_in_seconds
3202
+ )
3203
+ pod_event_messages = []
3204
+ if pod_events:
3205
+ for event in pod_events:
3206
+ message = {
3207
+ "message": event.message,
3208
+ "timeStamp": str(event.last_timestamp),
3209
+ }
3210
+ pod_event_messages.append(message)
3211
+ return pod_event_messages
3212
+
3213
+
3214
+ def format_pod_event_messages(
3215
+ pod_event_messages: List[Dict], pod_name: str
3216
+ ) -> List[str]:
3217
+ rows: List[str] = list()
3218
+ rows.append(PaastaColors.blue(f" Pod Events for {pod_name}"))
3219
+ for message in pod_event_messages:
3220
+ if "error" in message:
3221
+ rows.append(PaastaColors.yellow(f' Error: {message["error"]}'))
3222
+ else:
3223
+ timestamp = message.get("time_stamp", "unknown time")
3224
+ message_text = message.get("message", "")
3225
+ rows.append(f" Event at {timestamp}: {message_text}")
3226
+ return rows
3227
+
3228
+
3229
+ def format_tail_lines_for_kubernetes_pod(
3230
+ pod_containers: Sequence,
3231
+ pod_name: str,
3232
+ ) -> List[str]:
3233
+ errors: List[str] = []
3234
+ lines: List[str] = []
3235
+ tail_line_prefixes = (
3236
+ ("tail_lines", "current"),
3237
+ ("previous_tail_lines", "previous (pre-restart)"),
3238
+ )
3239
+
3240
+ for container in pod_containers:
3241
+ for tail_line_key, stream_prefix in tail_line_prefixes:
3242
+ tail_lines = getattr(container, tail_line_key, None)
3243
+ if tail_lines is None:
3244
+ break
3245
+ if tail_lines.error_message:
3246
+ errors.append(PaastaColors.red(f" {tail_lines.error_message}"))
3247
+
3248
+ for stream_name in ("stdout", "stderr"):
3249
+ stream_lines = getattr(tail_lines, stream_name, [])
3250
+ if len(stream_lines) > 0:
3251
+ lines.append(
3252
+ PaastaColors.blue(
3253
+ f" {stream_prefix} {stream_name} tail for {container.name} "
3254
+ f"in pod {pod_name}"
3255
+ )
3256
+ )
3257
+ lines.extend(f" {line}" for line in stream_lines)
3258
+
3259
+ rows: List[str] = []
3260
+ if errors:
3261
+ rows.append(
3262
+ PaastaColors.blue(
3263
+ f" errors for container {container.name} in pod {pod_name}"
3264
+ )
3265
+ )
3266
+ rows.extend(errors)
3267
+ rows.append("")
3268
+ rows.extend(lines)
3269
+ return rows
3270
+
3271
+
3272
+ def create_custom_resource(
3273
+ kube_client: KubeClient,
3274
+ formatted_resource: Mapping[str, Any],
3275
+ version: str,
3276
+ kind: KubeKind,
3277
+ group: str,
3278
+ ) -> None:
3279
+ return kube_client.custom.create_namespaced_custom_object(
3280
+ group=group,
3281
+ version=version,
3282
+ namespace=f"paasta-{kind.plural}",
3283
+ plural=kind.plural,
3284
+ body=formatted_resource,
3285
+ )
3286
+
3287
+
3288
+ def update_custom_resource(
3289
+ kube_client: KubeClient,
3290
+ formatted_resource: Mapping[str, Any],
3291
+ version: str,
3292
+ name: str,
3293
+ kind: KubeKind,
3294
+ group: str,
3295
+ ) -> None:
3296
+ co = kube_client.custom.get_namespaced_custom_object(
3297
+ name=name,
3298
+ group=group,
3299
+ version=version,
3300
+ namespace=f"paasta-{kind.plural}",
3301
+ plural=kind.plural,
3302
+ )
3303
+ formatted_resource["metadata"]["resourceVersion"] = co["metadata"][
3304
+ "resourceVersion"
3305
+ ]
3306
+ return kube_client.custom.replace_namespaced_custom_object(
3307
+ name=name,
3308
+ group=group,
3309
+ version=version,
3310
+ namespace=f"paasta-{kind.plural}",
3311
+ plural=kind.plural,
3312
+ body=formatted_resource,
3313
+ )
3314
+
3315
+
3316
+ def list_custom_resources(
3317
+ kind: KubeKind,
3318
+ version: str,
3319
+ kube_client: KubeClient,
3320
+ group: str,
3321
+ label_selector: str = "",
3322
+ ) -> Sequence[KubeCustomResource]:
3323
+ crs = kube_client.custom.list_namespaced_custom_object(
3324
+ group=group,
3325
+ version=version,
3326
+ label_selector=label_selector,
3327
+ plural=kind.plural,
3328
+ namespace=f"paasta-{kind.plural}",
3329
+ )
3330
+ kube_custom_resources = []
3331
+ for cr in crs["items"]:
3332
+ try:
3333
+ kube_custom_resources.append(
3334
+ KubeCustomResource(
3335
+ service=cr["metadata"]["labels"]["paasta.yelp.com/service"],
3336
+ instance=cr["metadata"]["labels"]["paasta.yelp.com/instance"],
3337
+ config_sha=cr["metadata"]["labels"]["paasta.yelp.com/config_sha"],
3338
+ git_sha=cr["metadata"]["labels"].get("paasta.yelp.com/git_sha", ""),
3339
+ kind=cr["kind"],
3340
+ namespace=cr["metadata"]["namespace"],
3341
+ name=cr["metadata"]["name"],
3342
+ )
3343
+ )
3344
+ except KeyError as e:
3345
+ log.debug(
3346
+ f"Ignoring custom resource that is missing paasta label {e}: {cr}"
3347
+ )
3348
+ continue
3349
+ return kube_custom_resources
3350
+
3351
+
3352
+ def delete_custom_resource(
3353
+ kube_client: KubeClient,
3354
+ name: str,
3355
+ namespace: str,
3356
+ group: str,
3357
+ version: str,
3358
+ plural: str,
3359
+ ) -> None:
3360
+ return kube_client.custom.delete_namespaced_custom_object(
3361
+ name=name,
3362
+ namespace=namespace,
3363
+ group=group,
3364
+ version=version,
3365
+ plural=plural,
3366
+ body=V1DeleteOptions(),
3367
+ )
3368
+
3369
+
3370
+ def max_unavailable(instance_count: int, bounce_margin_factor: float) -> int:
3371
+ if instance_count == 0:
3372
+ return 0
3373
+ else:
3374
+ return max(
3375
+ instance_count - int(math.ceil(instance_count * bounce_margin_factor)), 1
3376
+ )
3377
+
3378
+
3379
+ def pod_disruption_budget_for_service_instance(
3380
+ service: str,
3381
+ instance: str,
3382
+ max_unavailable: Union[str, int],
3383
+ namespace: str,
3384
+ ) -> V1PodDisruptionBudget:
3385
+ return V1PodDisruptionBudget(
3386
+ metadata=V1ObjectMeta(
3387
+ name=get_kubernetes_app_name(service, instance),
3388
+ namespace=namespace,
3389
+ ),
3390
+ spec=V1PodDisruptionBudgetSpec(
3391
+ max_unavailable=max_unavailable,
3392
+ selector=V1LabelSelector(
3393
+ match_labels={
3394
+ "paasta.yelp.com/service": service,
3395
+ "paasta.yelp.com/instance": instance,
3396
+ }
3397
+ ),
3398
+ ),
3399
+ )
3400
+
3401
+
3402
+ def create_pod_disruption_budget(
3403
+ kube_client: KubeClient,
3404
+ pod_disruption_budget: V1PodDisruptionBudget,
3405
+ namespace: str,
3406
+ ) -> None:
3407
+ return kube_client.policy.create_namespaced_pod_disruption_budget(
3408
+ namespace=namespace, body=pod_disruption_budget
3409
+ )
3410
+
3411
+
3412
+ def set_instances_for_kubernetes_service(
3413
+ kube_client: KubeClient,
3414
+ service_config: KubernetesDeploymentConfig,
3415
+ instance_count: int,
3416
+ ) -> None:
3417
+ name = service_config.get_sanitised_deployment_name()
3418
+ formatted_application = service_config.format_kubernetes_app()
3419
+ formatted_application.spec.replicas = instance_count
3420
+ if service_config.get_persistent_volumes():
3421
+ kube_client.deployments.patch_namespaced_stateful_set_scale(
3422
+ name=name,
3423
+ namespace=service_config.get_namespace(),
3424
+ body=formatted_application,
3425
+ )
3426
+ else:
3427
+ kube_client.deployments.patch_namespaced_deployment_scale(
3428
+ name=name,
3429
+ namespace=service_config.get_namespace(),
3430
+ body=formatted_application,
3431
+ )
3432
+
3433
+
3434
+ def get_annotations_for_kubernetes_service(
3435
+ kube_client: KubeClient, service_config: KubernetesDeploymentConfig
3436
+ ) -> Dict:
3437
+ name = service_config.get_sanitised_deployment_name()
3438
+ if service_config.get_persistent_volumes():
3439
+ k8s_service = kube_client.deployments.read_namespaced_stateful_set(
3440
+ name=name, namespace=service_config.get_namespace()
3441
+ )
3442
+ else:
3443
+ k8s_service = kube_client.deployments.read_namespaced_deployment(
3444
+ name=name, namespace=service_config.get_namespace()
3445
+ )
3446
+ return k8s_service.metadata.annotations if k8s_service.metadata.annotations else {}
3447
+
3448
+
3449
+ def write_annotation_for_kubernetes_service(
3450
+ kube_client: KubeClient,
3451
+ service_config: KubernetesDeploymentConfig,
3452
+ formatted_application: Union[V1Deployment, V1StatefulSet],
3453
+ annotation: Dict,
3454
+ ) -> None:
3455
+ name = formatted_application.metadata.name
3456
+ formatted_application.metadata.annotations = annotation
3457
+ if service_config.get_persistent_volumes():
3458
+ kube_client.deployments.patch_namespaced_stateful_set(
3459
+ name=name,
3460
+ namespace=service_config.get_namespace(),
3461
+ body=formatted_application,
3462
+ )
3463
+ else:
3464
+ kube_client.deployments.patch_namespaced_deployment(
3465
+ name=name,
3466
+ namespace=service_config.get_namespace(),
3467
+ body=formatted_application,
3468
+ )
3469
+
3470
+
3471
+ def list_all_paasta_deployments(kube_client: KubeClient) -> Sequence[KubeDeployment]:
3472
+ """Gets deployments in all namespaces by passing the service label selector"""
3473
+ label_selectors = "paasta.yelp.com/service"
3474
+ return list_deployments_in_all_namespaces(
3475
+ kube_client=kube_client, label_selector=label_selectors
3476
+ )
3477
+
3478
+
3479
+ def list_all_deployments(
3480
+ kube_client: KubeClient, namespace: str
3481
+ ) -> Sequence[KubeDeployment]:
3482
+ return list_deployments(kube_client=kube_client, namespace=namespace)
3483
+
3484
+
3485
+ def list_matching_deployments(
3486
+ service: str,
3487
+ instance: str,
3488
+ *,
3489
+ namespace: str,
3490
+ kube_client: KubeClient,
3491
+ ) -> Sequence[KubeDeployment]:
3492
+ return list_deployments(
3493
+ kube_client,
3494
+ label_selector=f"paasta.yelp.com/service={service},paasta.yelp.com/instance={instance}",
3495
+ namespace=namespace,
3496
+ )
3497
+
3498
+
3499
+ def list_matching_deployments_in_all_namespaces(
3500
+ service: str,
3501
+ instance: str,
3502
+ kube_client: KubeClient,
3503
+ ) -> List[KubeDeployment]:
3504
+ return list_deployments_in_all_namespaces(
3505
+ kube_client,
3506
+ f"paasta.yelp.com/service={service},paasta.yelp.com/instance={instance}",
3507
+ )
3508
+
3509
+
3510
+ @async_timeout()
3511
+ async def replicasets_for_service_instance(
3512
+ service: str, instance: str, kube_client: KubeClient, namespace: str
3513
+ ) -> Sequence[V1ReplicaSet]:
3514
+ async_list_replica_set = a_sync.to_async(
3515
+ kube_client.deployments.list_namespaced_replica_set
3516
+ )
3517
+ response = await async_list_replica_set(
3518
+ label_selector=f"paasta.yelp.com/service={service},paasta.yelp.com/instance={instance}",
3519
+ namespace=namespace,
3520
+ )
3521
+ return response.items
3522
+
3523
+
3524
+ @async_timeout()
3525
+ async def controller_revisions_for_service_instance(
3526
+ service: str, instance: str, kube_client: KubeClient, namespace: str
3527
+ ) -> Sequence[V1ControllerRevision]:
3528
+ async_list_controller_revisions = a_sync.to_async(
3529
+ kube_client.deployments.list_namespaced_controller_revision
3530
+ )
3531
+ response = await async_list_controller_revisions(
3532
+ label_selector=f"paasta.yelp.com/service={service},paasta.yelp.com/instance={instance}",
3533
+ namespace=namespace,
3534
+ )
3535
+ return response.items
3536
+
3537
+
3538
+ @async_timeout(15)
3539
+ async def pods_for_service_instance(
3540
+ service: str, instance: str, kube_client: KubeClient, namespace: str
3541
+ ) -> Sequence[V1Pod]:
3542
+ async_list_pods = a_sync.to_async(kube_client.core.list_namespaced_pod)
3543
+ response = await async_list_pods(
3544
+ label_selector=f"paasta.yelp.com/service={service},paasta.yelp.com/instance={instance}",
3545
+ namespace=namespace,
3546
+ )
3547
+ return response.items
3548
+
3549
+
3550
+ def get_pods_by_node(kube_client: KubeClient, node: V1Node) -> Sequence[V1Pod]:
3551
+ return kube_client.core.list_pod_for_all_namespaces(
3552
+ field_selector=f"spec.nodeName={node.metadata.name}"
3553
+ ).items
3554
+
3555
+
3556
+ def get_all_pods(kube_client: KubeClient, namespace: Optional[str]) -> List[V1Pod]:
3557
+ if namespace:
3558
+ return kube_client.core.list_namespaced_pod(namespace=namespace).items
3559
+ else:
3560
+ return kube_client.core.list_pod_for_all_namespaces().items
3561
+
3562
+
3563
+ @time_cache(ttl=300)
3564
+ def get_all_pods_cached(kube_client: KubeClient, namespace: str) -> Sequence[V1Pod]:
3565
+ pods: Sequence[V1Pod] = get_all_pods(kube_client, namespace)
3566
+ return pods
3567
+
3568
+
3569
+ def filter_pods_by_service_instance(
3570
+ pod_list: Sequence[V1Pod], service: str, instance: str
3571
+ ) -> Sequence[V1Pod]:
3572
+ return [
3573
+ pod
3574
+ for pod in pod_list
3575
+ if pod.metadata.labels is not None
3576
+ and pod.metadata.labels.get("paasta.yelp.com/service", "") == service
3577
+ and pod.metadata.labels.get("paasta.yelp.com/instance", "") == instance
3578
+ ]
3579
+
3580
+
3581
+ def group_pods_by_service_instance(
3582
+ pods: Sequence[V1Pod],
3583
+ ) -> Dict[str, Dict[str, List[V1Pod]]]:
3584
+ pods_by_service_instance: Dict[str, Dict[str, List[V1Pod]]] = {}
3585
+ for pod in pods:
3586
+ if pod.metadata.labels is not None:
3587
+ service = pod.metadata.labels.get("paasta.yelp.com/service")
3588
+ instance = pod.metadata.labels.get("paasta.yelp.com/instance")
3589
+
3590
+ if service and instance:
3591
+ if service not in pods_by_service_instance:
3592
+ pods_by_service_instance[service] = {}
3593
+ if instance not in pods_by_service_instance[service]:
3594
+ pods_by_service_instance[service][instance] = []
3595
+
3596
+ pods_by_service_instance[service][instance].append(pod)
3597
+
3598
+ return pods_by_service_instance
3599
+
3600
+
3601
+ def _is_it_ready(
3602
+ it: Union[V1Pod, V1Node],
3603
+ ) -> bool:
3604
+ ready_conditions = [
3605
+ cond.status == "True"
3606
+ for cond in it.status.conditions or []
3607
+ if cond.type == "Ready"
3608
+ ]
3609
+ return all(ready_conditions) if ready_conditions else False
3610
+
3611
+
3612
+ is_pod_ready = _is_it_ready
3613
+ is_node_ready = _is_it_ready
3614
+
3615
+
3616
+ def is_pod_completed(pod: V1Pod) -> bool:
3617
+ condition = get_pod_condition(pod, "ContainersReady")
3618
+ return condition.reason == "PodCompleted" if condition else False
3619
+
3620
+
3621
+ def is_pod_scheduled(pod: V1Pod) -> bool:
3622
+ scheduled_condition = get_pod_condition(pod, "PodScheduled")
3623
+ return scheduled_condition.status == "True" if scheduled_condition else False
3624
+
3625
+
3626
+ def get_pod_condition(pod: V1Pod, condition: str) -> V1PodCondition:
3627
+ conditions = [
3628
+ cond for cond in pod.status.conditions or [] if cond.type == condition
3629
+ ]
3630
+ if conditions:
3631
+ return conditions[0]
3632
+ return None
3633
+
3634
+
3635
+ class PodStatus(Enum):
3636
+ PENDING = (1,)
3637
+ RUNNING = (2,)
3638
+ SUCCEEDED = (3,)
3639
+ FAILED = (4,)
3640
+ UNKNOWN = (5,)
3641
+
3642
+
3643
+ _POD_STATUS_NAME_TO_STATUS = {s.name.upper(): s for s in PodStatus}
3644
+
3645
+
3646
+ def get_pod_status(
3647
+ pod: V1Pod,
3648
+ ) -> PodStatus:
3649
+ # TODO: we probably also need to deduce extended statuses here, like
3650
+ # `CrashLoopBackOff`, `ContainerCreating` timeout, and etc.
3651
+ return _POD_STATUS_NAME_TO_STATUS[pod.status.phase.upper()]
3652
+
3653
+
3654
+ def parse_container_resources(resources: Mapping[str, str]) -> KubeContainerResources:
3655
+ cpu_str = resources.get("cpu")
3656
+ if not cpu_str:
3657
+ cpus = None
3658
+ elif cpu_str[-1] == "m":
3659
+ cpus = float(cpu_str[:-1]) / 1000
3660
+ else:
3661
+ cpus = float(cpu_str)
3662
+
3663
+ mem_str = resources.get("memory")
3664
+ if not mem_str:
3665
+ mem_mb = None
3666
+ else:
3667
+ mem_mb = parse_size(mem_str) / 1000000
3668
+
3669
+ disk_str = resources.get("ephemeral-storage")
3670
+ if not disk_str:
3671
+ disk_mb = None
3672
+ else:
3673
+ disk_mb = parse_size(disk_str) / 1000000
3674
+
3675
+ return KubeContainerResources(cpus=cpus, mem=mem_mb, disk=disk_mb)
3676
+
3677
+
3678
+ def get_active_versions_for_service(
3679
+ obj_list: Sequence[Union[V1Pod, V1ReplicaSet, V1Deployment, V1StatefulSet]],
3680
+ ) -> Set[Tuple[DeploymentVersion, str]]:
3681
+ ret = set()
3682
+
3683
+ for obj in obj_list:
3684
+ config_sha = obj.metadata.labels.get("paasta.yelp.com/config_sha")
3685
+ if config_sha and config_sha.startswith("config"):
3686
+ config_sha = config_sha[len("config") :]
3687
+
3688
+ git_sha = obj.metadata.labels.get("paasta.yelp.com/git_sha")
3689
+ if git_sha and git_sha.startswith("git"):
3690
+ git_sha = git_sha[len("git") :]
3691
+
3692
+ image_version = obj.metadata.labels.get("paasta.yelp.com/image_version")
3693
+
3694
+ # Suppress entries where we have no clue what's running.
3695
+ if git_sha or config_sha:
3696
+ ret.add(
3697
+ (
3698
+ DeploymentVersion(sha=git_sha, image_version=image_version),
3699
+ config_sha,
3700
+ )
3701
+ )
3702
+ return ret
3703
+
3704
+
3705
+ def get_all_nodes(
3706
+ kube_client: KubeClient,
3707
+ ) -> List[V1Node]:
3708
+ return kube_client.core.list_node().items
3709
+
3710
+
3711
+ @time_cache(ttl=60)
3712
+ def get_all_nodes_cached(kube_client: KubeClient) -> Sequence[V1Node]:
3713
+ nodes: Sequence[V1Node] = get_all_nodes(kube_client)
3714
+ return nodes
3715
+
3716
+
3717
+ def filter_nodes_by_blacklist(
3718
+ nodes: Sequence[V1Node], blacklist: DeployBlacklist, whitelist: DeployWhitelist
3719
+ ) -> Sequence[V1Node]:
3720
+ """Takes an input list of nodes and filters them based on the given blacklist.
3721
+ The blacklist is in the form of:
3722
+
3723
+ [["location_type", "location]]
3724
+
3725
+ Where the list inside is something like ["region", "uswest1-prod"]
3726
+
3727
+ :returns: The list of nodes after the filter
3728
+ """
3729
+ if whitelist:
3730
+ whitelist = (paasta_prefixed(whitelist[0]), whitelist[1])
3731
+ blacklist = [(paasta_prefixed(entry[0]), entry[1]) for entry in blacklist]
3732
+ return [
3733
+ node
3734
+ for node in nodes
3735
+ if host_passes_whitelist(node.metadata.labels, whitelist)
3736
+ and host_passes_blacklist(node.metadata.labels, blacklist)
3737
+ ]
3738
+
3739
+
3740
+ def paasta_prefixed(
3741
+ attribute: str,
3742
+ ) -> str:
3743
+ # discovery attributes are exempt for now
3744
+ if attribute in DISCOVERY_ATTRIBUTES:
3745
+ return YELP_ATTRIBUTE_PREFIX + attribute
3746
+ elif "/" in attribute:
3747
+ return attribute
3748
+ else:
3749
+ return PAASTA_ATTRIBUTE_PREFIX + attribute
3750
+
3751
+
3752
+ def get_nodes_grouped_by_attribute(
3753
+ nodes: Sequence[V1Node], attribute: str
3754
+ ) -> Mapping[str, Sequence[V1Node]]:
3755
+ attribute = paasta_prefixed(attribute)
3756
+ sorted_nodes = sorted(
3757
+ nodes, key=lambda node: node.metadata.labels.get(attribute, "")
3758
+ )
3759
+ return {
3760
+ key: list(group)
3761
+ for key, group in itertools.groupby(
3762
+ sorted_nodes, key=lambda node: node.metadata.labels.get(attribute, "")
3763
+ )
3764
+ if key
3765
+ }
3766
+
3767
+
3768
+ def get_kubernetes_app_name(service: str, instance: str) -> str:
3769
+ return "{service}-{instance}".format(
3770
+ service=sanitise_kubernetes_name(service),
3771
+ instance=sanitise_kubernetes_name(instance),
3772
+ )
3773
+
3774
+
3775
+ def get_kubernetes_app_by_name(
3776
+ name: str, kube_client: KubeClient, namespace: str
3777
+ ) -> Union[V1Deployment, V1StatefulSet]:
3778
+ try:
3779
+ app = kube_client.deployments.read_namespaced_deployment_status(
3780
+ name=name, namespace=namespace
3781
+ )
3782
+ return app
3783
+ except ApiException as e:
3784
+ if e.status == 404:
3785
+ pass
3786
+ else:
3787
+ raise
3788
+ return kube_client.deployments.read_namespaced_stateful_set_status(
3789
+ name=name, namespace=namespace
3790
+ )
3791
+
3792
+
3793
+ def create_deployment(
3794
+ kube_client: KubeClient,
3795
+ formatted_deployment: V1Deployment,
3796
+ namespace: str,
3797
+ ) -> None:
3798
+ return kube_client.deployments.create_namespaced_deployment(
3799
+ namespace=namespace, body=formatted_deployment
3800
+ )
3801
+
3802
+
3803
+ def update_deployment(
3804
+ kube_client: KubeClient,
3805
+ formatted_deployment: V1Deployment,
3806
+ namespace: str,
3807
+ ) -> None:
3808
+ return kube_client.deployments.replace_namespaced_deployment(
3809
+ name=formatted_deployment.metadata.name,
3810
+ namespace=namespace,
3811
+ body=formatted_deployment,
3812
+ )
3813
+
3814
+
3815
+ def patch_deployment(
3816
+ kube_client: KubeClient,
3817
+ formatted_deployment: V1Deployment,
3818
+ namespace: str,
3819
+ ) -> None:
3820
+ return kube_client.deployments.patch_namespaced_deployment(
3821
+ name=formatted_deployment.metadata.name,
3822
+ namespace=namespace,
3823
+ body=formatted_deployment,
3824
+ )
3825
+
3826
+
3827
+ def delete_deployment(
3828
+ kube_client: KubeClient,
3829
+ deployment_name: str,
3830
+ namespace: str,
3831
+ ) -> None:
3832
+ return kube_client.deployments.delete_namespaced_deployment(
3833
+ name=deployment_name,
3834
+ namespace=namespace,
3835
+ )
3836
+
3837
+
3838
+ def create_stateful_set(
3839
+ kube_client: KubeClient,
3840
+ formatted_stateful_set: V1StatefulSet,
3841
+ namespace: str,
3842
+ ) -> None:
3843
+ return kube_client.deployments.create_namespaced_stateful_set(
3844
+ namespace=namespace, body=formatted_stateful_set
3845
+ )
3846
+
3847
+
3848
+ def update_stateful_set(
3849
+ kube_client: KubeClient,
3850
+ formatted_stateful_set: V1StatefulSet,
3851
+ namespace: str,
3852
+ ) -> None:
3853
+ return kube_client.deployments.replace_namespaced_stateful_set(
3854
+ name=formatted_stateful_set.metadata.name,
3855
+ namespace=namespace,
3856
+ body=formatted_stateful_set,
3857
+ )
3858
+
3859
+
3860
+ def create_job(
3861
+ kube_client: KubeClient,
3862
+ formatted_job: V1Job,
3863
+ namespace: str,
3864
+ ) -> None:
3865
+ return kube_client.batches.create_namespaced_job(
3866
+ namespace=namespace,
3867
+ body=formatted_job,
3868
+ )
3869
+
3870
+
3871
+ def get_event_timestamp(event: CoreV1Event) -> Optional[float]:
3872
+ # Cycle through timestamp attributes in order of preference
3873
+ for ts_attr in ["last_timestamp", "event_time", "first_timestamp"]:
3874
+ ts = getattr(event, ts_attr)
3875
+ if ts:
3876
+ return ts.timestamp()
3877
+ return None
3878
+
3879
+
3880
+ @async_timeout()
3881
+ async def get_events_for_object(
3882
+ kube_client: KubeClient,
3883
+ obj: Union[V1Pod, V1Deployment, V1StatefulSet, V1ReplicaSet],
3884
+ kind: str, # for some reason, obj.kind isn't populated when this function is called so we pass it in by hand
3885
+ max_age_in_seconds: Optional[int] = None,
3886
+ ) -> List[CoreV1Event]:
3887
+
3888
+ try:
3889
+ # this is a blocking call since it does network I/O and can end up significantly blocking the
3890
+ # asyncio event loop when doing things like getting events for all the Pods for a service with
3891
+ # a large amount of replicas. therefore, we need to wrap the kubernetes client into something
3892
+ # that's awaitable so that we can actually do things concurrently and not serially
3893
+ events = await a_sync.to_async(kube_client.core.list_namespaced_event)(
3894
+ namespace=obj.metadata.namespace,
3895
+ field_selector=f"involvedObject.name={obj.metadata.name},involvedObject.kind={kind}",
3896
+ limit=MAX_EVENTS_TO_RETRIEVE,
3897
+ )
3898
+ events = events.items if events else []
3899
+ if max_age_in_seconds and max_age_in_seconds > 0:
3900
+ # NOTE: the k8s API returns timestamps in UTC, so we make sure to always work in UTC
3901
+ min_timestamp = datetime.now(timezone.utc).timestamp() - max_age_in_seconds
3902
+ events = [
3903
+ evt
3904
+ for evt in events
3905
+ if get_event_timestamp(evt) is None
3906
+ or get_event_timestamp(evt) > min_timestamp
3907
+ ]
3908
+ return events
3909
+ except ApiException:
3910
+ return []
3911
+
3912
+
3913
+ @async_timeout()
3914
+ async def get_hpa(
3915
+ kube_client: KubeClient,
3916
+ name: str,
3917
+ namespace: str,
3918
+ ) -> V2HorizontalPodAutoscaler:
3919
+ async_get_hpa = a_sync.to_async(
3920
+ kube_client.autoscaling.read_namespaced_horizontal_pod_autoscaler
3921
+ )
3922
+ try:
3923
+ return await async_get_hpa(name, namespace)
3924
+ except ApiException as e:
3925
+ if e.status == 404:
3926
+ return None
3927
+ else:
3928
+ raise
3929
+
3930
+
3931
+ def get_kubernetes_app_deploy_status(
3932
+ app: Union[V1Deployment, V1StatefulSet],
3933
+ desired_instances: int,
3934
+ ) -> Tuple[int, str]:
3935
+ if app.status.ready_replicas is None:
3936
+ if desired_instances == 0:
3937
+ deploy_status = KubernetesDeployStatus.Stopped
3938
+ else:
3939
+ deploy_status = KubernetesDeployStatus.Waiting
3940
+ elif app.status.ready_replicas != desired_instances:
3941
+ deploy_status = KubernetesDeployStatus.Waiting
3942
+ # updated_replicas can currently be None for stateful sets so we may not correctly detect status for now
3943
+ # when https://github.com/kubernetes/kubernetes/pull/62943 lands in a release this should work for both:
3944
+ elif app.status.updated_replicas is not None and (
3945
+ app.status.updated_replicas < desired_instances
3946
+ ):
3947
+ deploy_status = KubernetesDeployStatus.Deploying
3948
+ elif app.status.replicas == 0 and desired_instances == 0:
3949
+ deploy_status = KubernetesDeployStatus.Stopped
3950
+ else:
3951
+ deploy_status = KubernetesDeployStatus.Running
3952
+ # Temporarily removing the message because the events query it used was overloading etcd
3953
+ # TODO: change the implementation or remove the deploy message entirely
3954
+ deploy_message = ""
3955
+ return deploy_status, deploy_message
3956
+
3957
+
3958
+ class KubernetesDeployStatus:
3959
+ """An enum to represent Kubernetes app deploy status.
3960
+ Changing name of the keys will affect both the paasta CLI and API.
3961
+ """
3962
+
3963
+ Running, Deploying, Waiting, Stopped = range(0, 4)
3964
+
3965
+ @classmethod
3966
+ def tostring(cls, val: int) -> str:
3967
+ for k, v in vars(cls).items():
3968
+ if v == val:
3969
+ return k
3970
+ raise ValueError("Unknown Kubernetes deploy status %d" % val)
3971
+
3972
+ @classmethod
3973
+ def fromstring(cls, _str: str) -> int:
3974
+ return getattr(cls, _str, None)
3975
+
3976
+
3977
+ def is_kubernetes_available() -> bool:
3978
+ return Path(os.environ.get("KUBECONFIG", KUBE_CONFIG_PATH)).exists()
3979
+
3980
+
3981
+ def create_secret(
3982
+ kube_client: KubeClient,
3983
+ service_name: str,
3984
+ secret_name: str,
3985
+ secret_data: Dict[str, str],
3986
+ namespace: str,
3987
+ ) -> None:
3988
+ """
3989
+ See restrictions on kubernetes secret at https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1Secret.md
3990
+ :param secret_name: Expect properly formatted kubernetes secret name, see _get_secret_name()
3991
+ :param secret_data: Expect a mapping of string-to-string where values are base64-encoded
3992
+ :param service_name: Expect unsanitised service name, since it's used as a label it will have 63 character limit
3993
+ :param namespace: Unsanitized namespace of a service that will use the secret
3994
+ :raises ApiException:
3995
+ """
3996
+ kube_client.core.create_namespaced_secret(
3997
+ namespace=namespace,
3998
+ body=V1Secret(
3999
+ metadata=V1ObjectMeta(
4000
+ name=secret_name,
4001
+ labels={
4002
+ "yelp.com/paasta_service": sanitise_label_value(service_name),
4003
+ "paasta.yelp.com/service": sanitise_label_value(service_name),
4004
+ },
4005
+ ),
4006
+ data=secret_data,
4007
+ ),
4008
+ )
4009
+
4010
+
4011
+ def update_secret(
4012
+ kube_client: KubeClient,
4013
+ service_name: str,
4014
+ secret_name: str,
4015
+ secret_data: Dict[str, str],
4016
+ namespace: str,
4017
+ ) -> None:
4018
+ """
4019
+ Expect secret_name to exist, e.g. kubectl get secret
4020
+ :param service_name: Expect unsanitised service name
4021
+ :param secret_data: Expect a mapping of string-to-string where values are base64-encoded
4022
+ :param namespace: Unsanitized namespace of a service that will use the secret
4023
+ :raises ApiException:
4024
+ """
4025
+ kube_client.core.replace_namespaced_secret(
4026
+ name=secret_name,
4027
+ namespace=namespace,
4028
+ body=V1Secret(
4029
+ metadata=V1ObjectMeta(
4030
+ name=secret_name,
4031
+ labels={
4032
+ "yelp.com/paasta_service": sanitise_label_value(service_name),
4033
+ "paasta.yelp.com/service": sanitise_label_value(service_name),
4034
+ },
4035
+ ),
4036
+ data=secret_data,
4037
+ ),
4038
+ )
4039
+
4040
+
4041
+ @time_cache(ttl=300)
4042
+ def get_secret_signature(
4043
+ kube_client: KubeClient,
4044
+ signature_name: str,
4045
+ namespace: str,
4046
+ ) -> Optional[str]:
4047
+ """
4048
+ :param signature_name: Expect the signature to exist in kubernetes configmap
4049
+ :return: Kubernetes configmap as a signature
4050
+ :raises ApiException:
4051
+ """
4052
+ try:
4053
+ signature = kube_client.core.read_namespaced_config_map(
4054
+ name=signature_name,
4055
+ namespace=namespace,
4056
+ )
4057
+ except ApiException as e:
4058
+ if e.status == 404:
4059
+ return None
4060
+ else:
4061
+ raise
4062
+ if not signature:
4063
+ return None
4064
+ else:
4065
+ return signature.data["signature"]
4066
+
4067
+
4068
+ def update_secret_signature(
4069
+ kube_client: KubeClient,
4070
+ service_name: str,
4071
+ signature_name: str,
4072
+ secret_signature: str,
4073
+ namespace: str,
4074
+ ) -> None:
4075
+ """
4076
+ :param service_name: Expect unsanitised service_name
4077
+ :param signature_name: Expect signature_name to exist in kubernetes configmap
4078
+ :param secret_signature: Signature to replace with
4079
+ :raises ApiException:
4080
+ """
4081
+ kube_client.core.replace_namespaced_config_map(
4082
+ name=signature_name,
4083
+ namespace=namespace,
4084
+ body=V1ConfigMap(
4085
+ metadata=V1ObjectMeta(
4086
+ name=signature_name,
4087
+ labels={
4088
+ "yelp.com/paasta_service": sanitise_label_value(service_name),
4089
+ "paasta.yelp.com/service": sanitise_label_value(service_name),
4090
+ },
4091
+ ),
4092
+ data={"signature": secret_signature},
4093
+ ),
4094
+ )
4095
+
4096
+
4097
+ def create_secret_signature(
4098
+ kube_client: KubeClient,
4099
+ service_name: str,
4100
+ signature_name: str,
4101
+ secret_signature: str,
4102
+ namespace: str,
4103
+ ) -> None:
4104
+ """
4105
+ :param service_name: Expect unsanitised service_name
4106
+ :param signature_name: Expected properly formatted signature, see _get_secret_signature_name()
4107
+ :param secret_signature: Signature value
4108
+ :param namespace: Unsanitized namespace of a service that will use the signature
4109
+ """
4110
+ kube_client.core.create_namespaced_config_map(
4111
+ namespace=namespace,
4112
+ body=V1ConfigMap(
4113
+ metadata=V1ObjectMeta(
4114
+ name=signature_name,
4115
+ labels={
4116
+ "yelp.com/paasta_service": sanitise_label_value(service_name),
4117
+ "paasta.yelp.com/service": sanitise_label_value(service_name),
4118
+ },
4119
+ ),
4120
+ data={"signature": secret_signature},
4121
+ ),
4122
+ )
4123
+
4124
+
4125
+ def sanitise_kubernetes_name(
4126
+ service: str,
4127
+ ) -> str:
4128
+ """
4129
+ Sanitizes kubernetes name so that hyphen (-) can be used a delimeter
4130
+ """
4131
+ name = service.replace("_", "--")
4132
+ if name.startswith("--"):
4133
+ name = name.replace("--", "underscore-", 1)
4134
+ return name.lower()
4135
+
4136
+
4137
+ def load_custom_resource_definitions(
4138
+ system_paasta_config: SystemPaastaConfig,
4139
+ ) -> Sequence[CustomResourceDefinition]:
4140
+ custom_resources = []
4141
+ for custom_resource_dict in system_paasta_config.get_kubernetes_custom_resources():
4142
+ kube_kind = KubeKind(**custom_resource_dict.pop("kube_kind")) # type: ignore
4143
+ custom_resources.append(
4144
+ CustomResourceDefinition( # type: ignore
4145
+ kube_kind=kube_kind, **custom_resource_dict # type: ignore
4146
+ )
4147
+ )
4148
+ return custom_resources
4149
+
4150
+
4151
+ def create_pod_topology_spread_constraints(
4152
+ service: str,
4153
+ instance: str,
4154
+ topology_spread_constraints: List[TopologySpreadConstraintDict],
4155
+ ) -> List[V1TopologySpreadConstraint]:
4156
+ """
4157
+ Applies cluster-level topology spread constraints to every Pod template.
4158
+ This allows us to configure default topology spread constraints on EKS where we cannot configure the scheduler.
4159
+ """
4160
+ if not topology_spread_constraints:
4161
+ return []
4162
+
4163
+ selector = V1LabelSelector(
4164
+ match_labels={
4165
+ "paasta.yelp.com/service": service,
4166
+ "paasta.yelp.com/instance": instance,
4167
+ }
4168
+ )
4169
+
4170
+ pod_topology_spread_constraints = []
4171
+ for constraint in topology_spread_constraints:
4172
+ pod_topology_spread_constraints.append(
4173
+ V1TopologySpreadConstraint(
4174
+ label_selector=selector,
4175
+ topology_key=constraint.get(
4176
+ "topology_key", None
4177
+ ), # ValueError will be raised if unset
4178
+ max_skew=constraint.get("max_skew", 1),
4179
+ when_unsatisfiable=constraint.get(
4180
+ "when_unsatisfiable", "ScheduleAnyway"
4181
+ ),
4182
+ )
4183
+ )
4184
+
4185
+ return pod_topology_spread_constraints
4186
+
4187
+
4188
+ def sanitised_cr_name(service: str, instance: str) -> str:
4189
+ sanitised_service = sanitise_kubernetes_name(service)
4190
+ sanitised_instance = sanitise_kubernetes_name(instance)
4191
+ return f"{sanitised_service}-{sanitised_instance}"
4192
+
4193
+
4194
+ def get_cr(
4195
+ kube_client: KubeClient, cr_id: Mapping[str, str]
4196
+ ) -> Optional[Mapping[str, Any]]:
4197
+ try:
4198
+ return kube_client.custom.get_namespaced_custom_object(**cr_id)
4199
+ except ApiException as e:
4200
+ if e.status == 404:
4201
+ return None
4202
+ else:
4203
+ raise
4204
+
4205
+
4206
+ def set_cr_desired_state(
4207
+ kube_client: KubeClient, cr_id: Mapping[str, str], desired_state: str
4208
+ ) -> str:
4209
+ cr = kube_client.custom.get_namespaced_custom_object(**cr_id)
4210
+ if cr.get("status", {}).get("state") == desired_state:
4211
+ return cr["status"]
4212
+
4213
+ if "metadata" not in cr:
4214
+ cr["metadata"] = {}
4215
+ if "annotations" not in cr["metadata"]:
4216
+ cr["metadata"]["annotations"] = {}
4217
+ cr["metadata"]["annotations"]["yelp.com/desired_state"] = desired_state
4218
+ cr["metadata"]["annotations"]["paasta.yelp.com/desired_state"] = desired_state
4219
+ kube_client.custom.replace_namespaced_custom_object(**cr_id, body=cr)
4220
+ status = cr.get("status")
4221
+ return status
4222
+
4223
+
4224
+ def get_pod_hostname(kube_client: KubeClient, pod: V1Pod) -> str:
4225
+ """Gets the hostname of a pod's node from labels"""
4226
+ if not pod.spec.node_name: # can be none, if pod not yet scheduled
4227
+ return "NotScheduled"
4228
+ try:
4229
+ node = kube_client.core.read_node(name=pod.spec.node_name)
4230
+ except ApiException:
4231
+ # fall back to node name (which has the IP) if node somehow doesnt exist
4232
+ return pod.spec.node_name
4233
+ # if label has disappeared (say we changed it), default to node name
4234
+ return node.metadata.labels.get("yelp.com/hostname", pod.spec.node_name)
4235
+
4236
+
4237
+ def get_pod_node(
4238
+ kube_client: KubeClient, pod: V1Pod, cache_nodes: bool = False
4239
+ ) -> Optional[V1Node]:
4240
+ if cache_nodes:
4241
+ nodes = get_all_nodes_cached(kube_client)
4242
+ else:
4243
+ nodes = get_all_nodes(kube_client)
4244
+ running_node = [node for node in nodes if node.metadata.name == pod.spec.node_name]
4245
+ return running_node[0] if running_node else None
4246
+
4247
+
4248
+ def to_node_label(label: str) -> str:
4249
+ """k8s-ifies certain special node labels"""
4250
+ if label in {"instance_type", "instance-type"}:
4251
+ return "node.kubernetes.io/instance-type"
4252
+ elif label in {
4253
+ "datacenter",
4254
+ "ecosystem",
4255
+ "habitat",
4256
+ "hostname",
4257
+ "region",
4258
+ "superregion",
4259
+ }:
4260
+ return f"yelp.com/{label}"
4261
+ return label
4262
+
4263
+
4264
+ def get_all_service_accounts(
4265
+ kube_client: KubeClient,
4266
+ namespace: str,
4267
+ label_selector: Optional[str] = None,
4268
+ ) -> Sequence[V1ServiceAccount]:
4269
+ return kube_client.core.list_namespaced_service_account(
4270
+ namespace=namespace, label_selector=label_selector
4271
+ ).items
4272
+
4273
+
4274
+ def get_all_role_bindings(
4275
+ kube_client: KubeClient,
4276
+ namespace: str,
4277
+ ) -> Sequence[V1RoleBinding]:
4278
+ return kube_client.rbac.list_namespaced_role_binding(namespace=namespace).items
4279
+
4280
+
4281
+ def get_all_limit_ranges(
4282
+ kube_client: KubeClient,
4283
+ namespace: str,
4284
+ ) -> Sequence[V1LimitRange]:
4285
+ return kube_client.core.list_namespaced_limit_range(namespace).items
4286
+
4287
+
4288
+ _RE_NORMALIZE_IAM_ROLE = re.compile(r"[^0-9a-zA-Z]+")
4289
+
4290
+
4291
+ def get_service_account_name(
4292
+ iam_role: str,
4293
+ k8s_role: Optional[str] = None,
4294
+ ) -> str:
4295
+ # the service account is expected to always be prefixed with paasta- as using the actual namespace
4296
+ # potentially wastes a lot of characters (e.g., paasta-nrtsearchservices) that could be used for
4297
+ # the actual name
4298
+ if iam_role: # this is either an empty string or a real role
4299
+ # it's possible for an IAM role to be used for multiple purposes. Some usages may require a
4300
+ # Kubernetes Role attached to the Service Account (e.g., Spark drivers may access S3 but also
4301
+ # need to manage Spark executor Pods), while "normal" services/batches need a Service Account
4302
+ # with only an IAM role attached.
4303
+ # to support these two usecases, we'll suffix the name of a Service Account with the
4304
+ # Kubernetes Role name to disambiguate between the two.
4305
+ if k8s_role:
4306
+ sa_name = f"paasta--{_RE_NORMALIZE_IAM_ROLE.sub('-', iam_role.lower())}--{k8s_role}"
4307
+ else:
4308
+ sa_name = f"paasta--{_RE_NORMALIZE_IAM_ROLE.sub('-', iam_role.lower())}"
4309
+ # until Core ML migrates Spark to use Pod Identity, we need to support starting Spark drivers with a Service Account
4310
+ # that only has k8s access
4311
+ elif not iam_role and k8s_role:
4312
+ sa_name = f"paasta--{k8s_role}"
4313
+ # we should never get here in normal usage, but just in case we make a mistake in the future :)
4314
+ else:
4315
+ raise ValueError(
4316
+ "Expected at least one of iam_role or k8s_role to be passed in!"
4317
+ )
4318
+
4319
+ return sa_name
4320
+
4321
+
4322
+ def ensure_service_account(
4323
+ iam_role: str,
4324
+ namespace: str,
4325
+ kube_client: KubeClient,
4326
+ k8s_role: Optional[str] = None,
4327
+ ) -> None:
4328
+ sa_name = get_service_account_name(iam_role, k8s_role)
4329
+
4330
+ if not any(
4331
+ sa.metadata and sa.metadata.name == sa_name
4332
+ for sa in get_all_service_accounts(kube_client, namespace)
4333
+ ):
4334
+ sa = V1ServiceAccount(
4335
+ kind="ServiceAccount",
4336
+ metadata=V1ObjectMeta(
4337
+ name=sa_name,
4338
+ namespace=namespace,
4339
+ annotations={"eks.amazonaws.com/role-arn": iam_role},
4340
+ ),
4341
+ )
4342
+ kube_client.core.create_namespaced_service_account(namespace=namespace, body=sa)
4343
+
4344
+ # we're expecting that any Role dynamically associated with a Service Account already exists.
4345
+ # at Yelp, this means that we have a version-controlled resource for the Role in Puppet.
4346
+ # and since the Role already exists, we just need to associate it with the Service Account through
4347
+ # a Role Binding
4348
+ if k8s_role:
4349
+ # that said, we still check that there's a RoleBinding every time this function is called so that
4350
+ # we can self-heal if we somehow create a Service Account and then fail to create a Role Binding
4351
+ # due to a transient issue
4352
+ if not any(
4353
+ rb.metadata and rb.metadata.name == sa_name
4354
+ for rb in get_all_role_bindings(kube_client, namespace)
4355
+ ):
4356
+ role_binding = V1RoleBinding(
4357
+ metadata=V1ObjectMeta(
4358
+ name=sa_name,
4359
+ namespace=namespace,
4360
+ ),
4361
+ role_ref=V1RoleRef(
4362
+ api_group="rbac.authorization.k8s.io",
4363
+ kind="Role",
4364
+ name=k8s_role,
4365
+ ),
4366
+ subjects=[
4367
+ V1Subject(
4368
+ kind="ServiceAccount",
4369
+ namespace=namespace,
4370
+ name=sa_name,
4371
+ ),
4372
+ ],
4373
+ )
4374
+ kube_client.rbac.create_namespaced_role_binding(
4375
+ namespace=namespace, body=role_binding
4376
+ )
4377
+
4378
+
4379
+ def mode_to_int(mode: Optional[Union[str, int]]) -> Optional[int]:
4380
+ if mode is not None:
4381
+ if isinstance(mode, str):
4382
+ if len(mode) < 2 or mode[0] != "0":
4383
+ raise ValueError(f"Invalid mode: {mode}")
4384
+ mode = int(mode[1:], 8)
4385
+ return mode
4386
+
4387
+
4388
+ def update_crds(
4389
+ kube_client: KubeClient,
4390
+ desired_crds: Collection[Union[V1CustomResourceDefinition]],
4391
+ existing_crds: Union[V1CustomResourceDefinitionList],
4392
+ ) -> bool:
4393
+ for desired_crd in desired_crds:
4394
+ existing_crd = None
4395
+ for crd in existing_crds.items:
4396
+ if crd.metadata.name == desired_crd.metadata["name"]:
4397
+ existing_crd = crd
4398
+ break
4399
+ try:
4400
+
4401
+ apiextensions = kube_client.apiextensions
4402
+
4403
+ if existing_crd:
4404
+ desired_crd.metadata[
4405
+ "resourceVersion"
4406
+ ] = existing_crd.metadata.resource_version
4407
+
4408
+ apiextensions.replace_custom_resource_definition(
4409
+ name=desired_crd.metadata["name"], body=desired_crd
4410
+ )
4411
+ else:
4412
+ try:
4413
+ apiextensions.create_custom_resource_definition(body=desired_crd)
4414
+ except ValueError as err:
4415
+ # TODO: kubernetes server will sometimes reply with conditions:null,
4416
+ # figure out how to deal with this correctly, for more details:
4417
+ # https://github.com/kubernetes/kubernetes/pull/64996
4418
+ if "`conditions`, must not be `None`" in str(err):
4419
+ pass
4420
+ else:
4421
+ raise err
4422
+ log.info(f"deployed internal crd {desired_crd.metadata['name']}")
4423
+ except ApiException as exc:
4424
+ log.error(
4425
+ f"error deploying crd {desired_crd.metadata['name']}, "
4426
+ f"status: {exc.status}, reason: {exc.reason}"
4427
+ )
4428
+ log.debug(exc.body)
4429
+ return False
4430
+
4431
+ return True
4432
+
4433
+
4434
+ def sanitise_label_value(value: str) -> str:
4435
+ """
4436
+ :param value: value is sanitized and limited to 63 characters due to kubernetes restriction
4437
+ :return: Sanitised at most 63-character label value
4438
+ """
4439
+ return limit_size_with_hash(
4440
+ sanitise_kubernetes_name(value),
4441
+ limit=63,
4442
+ )
4443
+
4444
+
4445
+ def _get_secret_name(
4446
+ namespace: str, secret_identifier: str, service_name: str, key_name: str
4447
+ ) -> str:
4448
+ """
4449
+ Use to generate kubernetes secret names,
4450
+ secret names have limit of 253 characters due to https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#dns-subdomain-names
4451
+ However, if you are storing secret name as a label value as well then it has lower limit of 63 characters.
4452
+ Hyphen (-) is used as a delimeter between values.
4453
+
4454
+ :param namespace: Unsanitised namespace of a service that will use the signature
4455
+ :param secret_identifier: Identifies the type of secret
4456
+ :param service_name: Unsanitised service_name
4457
+ :param key_name: Name of the actual secret, typically specified in a configuration file
4458
+ :return: Sanitised at most 253-character kubernetes secret name
4459
+ """
4460
+ return limit_size_with_hash(
4461
+ "-".join(
4462
+ [
4463
+ namespace,
4464
+ secret_identifier,
4465
+ sanitise_kubernetes_name(service_name),
4466
+ sanitise_kubernetes_name(key_name),
4467
+ ]
4468
+ ),
4469
+ limit=253,
4470
+ )
4471
+
4472
+
4473
+ def _get_secret_signature_name(
4474
+ namespace: str, secret_identifier: str, service_name: str, key_name: str
4475
+ ) -> str:
4476
+ """
4477
+ :param namespace: Unsanitised namespace of a service that will use the signature
4478
+ :param secret_identifier: Identifies the type of secret
4479
+ :param service_name: Unsanitised service_name
4480
+ :param key_name: Name of the actual secret, typically specified in a configuration file
4481
+ :return: Sanitised signature name as kubernetes configmap name with at most 253 characters
4482
+ """
4483
+ return limit_size_with_hash(
4484
+ "-".join(
4485
+ [
4486
+ namespace,
4487
+ secret_identifier,
4488
+ sanitise_kubernetes_name(service_name),
4489
+ sanitise_kubernetes_name(key_name),
4490
+ "signature",
4491
+ ]
4492
+ ),
4493
+ limit=253,
4494
+ )
4495
+
4496
+
4497
+ def get_paasta_secret_name(namespace: str, service_name: str, key_name: str) -> str:
4498
+ """
4499
+ Use whenever creating or references a PaaSTA secret
4500
+
4501
+ :param namespace: Unsanitised namespace of a service that will use the signature
4502
+ :param service_name: Unsanitised service_name
4503
+ :param key_name: Name of the actual secret, typically specified in a configuration file
4504
+ :return: Sanitised PaaSTA secret name
4505
+ """
4506
+ return _get_secret_name(
4507
+ namespace=namespace,
4508
+ secret_identifier="secret",
4509
+ service_name=service_name,
4510
+ key_name=key_name,
4511
+ )
4512
+
4513
+
4514
+ def get_paasta_secret_signature_name(
4515
+ namespace: str, service_name: str, key_name: str
4516
+ ) -> str:
4517
+ """
4518
+ Get PaaSTA signature name stored as kubernetes configmap
4519
+
4520
+ :param namespace: Unsanitised namespace of a service that will use the signature
4521
+ :param service_name: Unsanitised service_name
4522
+ :param key_name: Name of the actual secret, typically specified in a configuration file
4523
+ :return: Sanitised PaaSTA signature name
4524
+ """
4525
+ return _get_secret_signature_name(
4526
+ namespace=namespace,
4527
+ secret_identifier="secret",
4528
+ service_name=service_name,
4529
+ key_name=key_name,
4530
+ )
4531
+
4532
+
4533
+ def get_secret(
4534
+ kube_client: KubeClient,
4535
+ secret_name: str,
4536
+ key_name: str,
4537
+ *,
4538
+ namespace: str,
4539
+ decode: bool = True,
4540
+ ) -> Union[str, bytes]:
4541
+ """
4542
+ :param secret_name: Expect properly formatted kubernetes secret name and that it exists
4543
+ :param key_name: Expect key_name to be a key in a data section
4544
+ :raises ApiException:
4545
+ :raises KeyError: if key_name does not exists in kubernetes secret's data section
4546
+ """
4547
+ secret_data = kube_client.core.read_namespaced_secret(
4548
+ name=secret_name, namespace=namespace
4549
+ ).data[key_name]
4550
+ # String secrets (e.g. yaml config files) need to be decoded
4551
+ # Binary secrets (e.g. TLS Keystore or binary certificate files) cannot be decoded
4552
+ if decode:
4553
+ return base64.b64decode(secret_data).decode("utf-8")
4554
+ return base64.b64decode(secret_data)
4555
+
4556
+
4557
+ def get_kubernetes_secret_env_variables(
4558
+ kube_client: KubeClient,
4559
+ environment: Dict[str, str],
4560
+ service_name: str,
4561
+ namespace: str,
4562
+ ) -> Dict[str, str]:
4563
+ decrypted_secrets = {}
4564
+ for k, v in environment.items():
4565
+ if is_secret_ref(v):
4566
+ secret = get_secret_name_from_ref(v)
4567
+ # decode=True because environment variables need to be strings and not binary
4568
+ # Cast to string to make mypy / type-hints happy
4569
+ decrypted_secrets[k] = str(
4570
+ get_secret(
4571
+ kube_client,
4572
+ secret_name=get_paasta_secret_name(
4573
+ namespace,
4574
+ SHARED_SECRET_SERVICE if is_shared_secret(v) else service_name,
4575
+ secret,
4576
+ ),
4577
+ key_name=secret,
4578
+ decode=True,
4579
+ namespace=namespace,
4580
+ )
4581
+ )
4582
+ return decrypted_secrets
4583
+
4584
+
4585
+ def get_kubernetes_secret_volumes(
4586
+ kube_client: KubeClient,
4587
+ secret_volumes_config: Sequence[SecretVolume],
4588
+ service_name: str,
4589
+ namespace: str,
4590
+ ) -> Dict[str, Union[str, bytes]]:
4591
+ secret_volumes = {}
4592
+ # The config might look one of two ways:
4593
+ # Implicit full path consisting of the container path and the secret name:
4594
+ # secret_volumes:
4595
+ # - container_path: /nail/foo
4596
+ # secret_name: the_secret_1
4597
+ # - container_path: /nail/bar
4598
+ # secret_name: the_secret_2
4599
+ #
4600
+ # This ^ should result in two files (/nail/foo/the_secret_1, /nail/foo/the_secret_2)
4601
+ #
4602
+ # OR
4603
+ #
4604
+ # Multiple files within a folder with explicit path names
4605
+ # secret_volumes:
4606
+ # - container_path: /nail/foo
4607
+ # items:
4608
+ # - key: the_secret_1
4609
+ # path: bar.yaml
4610
+ # - key: the_secret_2
4611
+ # path: baz.yaml
4612
+ #
4613
+ # This ^ should result in 2 files (/nail/foo/bar.yaml, /nail/foo/baz.yaml)
4614
+ # We need to support both cases
4615
+ for secret_volume in secret_volumes_config:
4616
+ if "items" not in secret_volume:
4617
+ secret_contents = get_secret(
4618
+ kube_client,
4619
+ secret_name=get_paasta_secret_name(
4620
+ namespace, service_name, secret_volume["secret_name"]
4621
+ ),
4622
+ key_name=secret_volume["secret_name"],
4623
+ decode=False,
4624
+ namespace=namespace,
4625
+ )
4626
+ # Index by container path => the actual secret contents, to be used downstream to create local files and mount into the container
4627
+ secret_volumes[
4628
+ os.path.join(
4629
+ secret_volume["container_path"], secret_volume["secret_name"]
4630
+ )
4631
+ ] = secret_contents
4632
+ else:
4633
+ for item in secret_volume["items"]:
4634
+ secret_contents = get_secret(
4635
+ kube_client,
4636
+ secret_name=get_paasta_secret_name(
4637
+ namespace, service_name, item["key"]
4638
+ ),
4639
+ key_name=item["key"],
4640
+ decode=False,
4641
+ namespace=namespace,
4642
+ )
4643
+ secret_volumes[
4644
+ os.path.join(secret_volume["container_path"], item["path"])
4645
+ ] = secret_contents
4646
+
4647
+ return secret_volumes
4648
+
4649
+
4650
+ @lru_cache()
4651
+ def get_authenticating_services(soa_dir: str = DEFAULT_SOA_DIR) -> Set[str]:
4652
+ """Load list of services participating in authenticated traffic"""
4653
+ authenticating_services_conf_path = os.path.join(soa_dir, "authenticating.yaml")
4654
+ config = service_configuration_lib.read_yaml_file(authenticating_services_conf_path)
4655
+ return set(config.get("services", []))
4656
+
4657
+
4658
+ def add_volumes_for_authenticating_services(
4659
+ service_name: str,
4660
+ config_volumes: List[ProjectedSAVolume],
4661
+ soa_dir: str = DEFAULT_SOA_DIR,
4662
+ ) -> List[ProjectedSAVolume]:
4663
+ """Add projected service account volume to the list of volumes if service
4664
+ participates in authenticated traffic. In case of changes, a new list is returned,
4665
+ no updates in-place.
4666
+
4667
+ :param str service_name: name of the service
4668
+ :param List[ProjectedSAVolume] config_volumes: existing projected volumes from service config
4669
+ :param str soa_dir: path to SOA configurations directory
4670
+ :return: updated list of projected service account volumes
4671
+ """
4672
+ token_config = load_system_paasta_config().get_service_auth_token_volume_config()
4673
+ if (
4674
+ token_config
4675
+ and service_name in get_authenticating_services(soa_dir)
4676
+ and not any(volume == token_config for volume in config_volumes)
4677
+ ):
4678
+ config_volumes = [token_config, *config_volumes]
4679
+ return config_volumes