paasta-tools 1.21.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (348) hide show
  1. k8s_itests/__init__.py +0 -0
  2. k8s_itests/test_autoscaling.py +23 -0
  3. k8s_itests/utils.py +38 -0
  4. paasta_tools/__init__.py +20 -0
  5. paasta_tools/adhoc_tools.py +142 -0
  6. paasta_tools/api/__init__.py +13 -0
  7. paasta_tools/api/api.py +330 -0
  8. paasta_tools/api/api_docs/swagger.json +2323 -0
  9. paasta_tools/api/client.py +106 -0
  10. paasta_tools/api/settings.py +33 -0
  11. paasta_tools/api/tweens/__init__.py +6 -0
  12. paasta_tools/api/tweens/auth.py +125 -0
  13. paasta_tools/api/tweens/profiling.py +108 -0
  14. paasta_tools/api/tweens/request_logger.py +124 -0
  15. paasta_tools/api/views/__init__.py +13 -0
  16. paasta_tools/api/views/autoscaler.py +100 -0
  17. paasta_tools/api/views/exception.py +45 -0
  18. paasta_tools/api/views/flink.py +73 -0
  19. paasta_tools/api/views/instance.py +395 -0
  20. paasta_tools/api/views/pause_autoscaler.py +71 -0
  21. paasta_tools/api/views/remote_run.py +113 -0
  22. paasta_tools/api/views/resources.py +76 -0
  23. paasta_tools/api/views/service.py +35 -0
  24. paasta_tools/api/views/version.py +25 -0
  25. paasta_tools/apply_external_resources.py +79 -0
  26. paasta_tools/async_utils.py +109 -0
  27. paasta_tools/autoscaling/__init__.py +0 -0
  28. paasta_tools/autoscaling/autoscaling_service_lib.py +57 -0
  29. paasta_tools/autoscaling/forecasting.py +106 -0
  30. paasta_tools/autoscaling/max_all_k8s_services.py +41 -0
  31. paasta_tools/autoscaling/pause_service_autoscaler.py +77 -0
  32. paasta_tools/autoscaling/utils.py +52 -0
  33. paasta_tools/bounce_lib.py +184 -0
  34. paasta_tools/broadcast_log_to_services.py +62 -0
  35. paasta_tools/cassandracluster_tools.py +210 -0
  36. paasta_tools/check_autoscaler_max_instances.py +212 -0
  37. paasta_tools/check_cassandracluster_services_replication.py +35 -0
  38. paasta_tools/check_flink_services_health.py +203 -0
  39. paasta_tools/check_kubernetes_api.py +57 -0
  40. paasta_tools/check_kubernetes_services_replication.py +141 -0
  41. paasta_tools/check_oom_events.py +244 -0
  42. paasta_tools/check_services_replication_tools.py +324 -0
  43. paasta_tools/check_spark_jobs.py +234 -0
  44. paasta_tools/cleanup_kubernetes_cr.py +138 -0
  45. paasta_tools/cleanup_kubernetes_crd.py +145 -0
  46. paasta_tools/cleanup_kubernetes_jobs.py +344 -0
  47. paasta_tools/cleanup_tron_namespaces.py +96 -0
  48. paasta_tools/cli/__init__.py +13 -0
  49. paasta_tools/cli/authentication.py +85 -0
  50. paasta_tools/cli/cli.py +260 -0
  51. paasta_tools/cli/cmds/__init__.py +13 -0
  52. paasta_tools/cli/cmds/autoscale.py +143 -0
  53. paasta_tools/cli/cmds/check.py +334 -0
  54. paasta_tools/cli/cmds/cook_image.py +147 -0
  55. paasta_tools/cli/cmds/get_docker_image.py +76 -0
  56. paasta_tools/cli/cmds/get_image_version.py +172 -0
  57. paasta_tools/cli/cmds/get_latest_deployment.py +93 -0
  58. paasta_tools/cli/cmds/info.py +155 -0
  59. paasta_tools/cli/cmds/itest.py +117 -0
  60. paasta_tools/cli/cmds/list.py +66 -0
  61. paasta_tools/cli/cmds/list_clusters.py +42 -0
  62. paasta_tools/cli/cmds/list_deploy_queue.py +171 -0
  63. paasta_tools/cli/cmds/list_namespaces.py +84 -0
  64. paasta_tools/cli/cmds/local_run.py +1396 -0
  65. paasta_tools/cli/cmds/logs.py +1601 -0
  66. paasta_tools/cli/cmds/mark_for_deployment.py +1988 -0
  67. paasta_tools/cli/cmds/mesh_status.py +174 -0
  68. paasta_tools/cli/cmds/pause_service_autoscaler.py +107 -0
  69. paasta_tools/cli/cmds/push_to_registry.py +275 -0
  70. paasta_tools/cli/cmds/remote_run.py +252 -0
  71. paasta_tools/cli/cmds/rollback.py +347 -0
  72. paasta_tools/cli/cmds/secret.py +549 -0
  73. paasta_tools/cli/cmds/security_check.py +59 -0
  74. paasta_tools/cli/cmds/spark_run.py +1400 -0
  75. paasta_tools/cli/cmds/start_stop_restart.py +401 -0
  76. paasta_tools/cli/cmds/status.py +2302 -0
  77. paasta_tools/cli/cmds/validate.py +1012 -0
  78. paasta_tools/cli/cmds/wait_for_deployment.py +275 -0
  79. paasta_tools/cli/fsm/__init__.py +13 -0
  80. paasta_tools/cli/fsm/autosuggest.py +82 -0
  81. paasta_tools/cli/fsm/template/README.md +8 -0
  82. paasta_tools/cli/fsm/template/cookiecutter.json +7 -0
  83. paasta_tools/cli/fsm/template/{{cookiecutter.service}}/kubernetes-PROD.yaml +91 -0
  84. paasta_tools/cli/fsm/template/{{cookiecutter.service}}/monitoring.yaml +20 -0
  85. paasta_tools/cli/fsm/template/{{cookiecutter.service}}/service.yaml +8 -0
  86. paasta_tools/cli/fsm/template/{{cookiecutter.service}}/smartstack.yaml +6 -0
  87. paasta_tools/cli/fsm_cmd.py +121 -0
  88. paasta_tools/cli/paasta_tabcomplete.sh +23 -0
  89. paasta_tools/cli/schemas/adhoc_schema.json +199 -0
  90. paasta_tools/cli/schemas/autoscaling_schema.json +91 -0
  91. paasta_tools/cli/schemas/autotuned_defaults/cassandracluster_schema.json +37 -0
  92. paasta_tools/cli/schemas/autotuned_defaults/kubernetes_schema.json +89 -0
  93. paasta_tools/cli/schemas/deploy_schema.json +173 -0
  94. paasta_tools/cli/schemas/eks_schema.json +970 -0
  95. paasta_tools/cli/schemas/kubernetes_schema.json +970 -0
  96. paasta_tools/cli/schemas/rollback_schema.json +160 -0
  97. paasta_tools/cli/schemas/service_schema.json +25 -0
  98. paasta_tools/cli/schemas/smartstack_schema.json +322 -0
  99. paasta_tools/cli/schemas/tron_schema.json +699 -0
  100. paasta_tools/cli/utils.py +1118 -0
  101. paasta_tools/clusterman.py +21 -0
  102. paasta_tools/config_utils.py +385 -0
  103. paasta_tools/contrib/__init__.py +0 -0
  104. paasta_tools/contrib/bounce_log_latency_parser.py +68 -0
  105. paasta_tools/contrib/check_manual_oapi_changes.sh +24 -0
  106. paasta_tools/contrib/check_orphans.py +306 -0
  107. paasta_tools/contrib/create_dynamodb_table.py +35 -0
  108. paasta_tools/contrib/create_paasta_playground.py +105 -0
  109. paasta_tools/contrib/emit_allocated_cpu_metrics.py +50 -0
  110. paasta_tools/contrib/get_running_task_allocation.py +346 -0
  111. paasta_tools/contrib/habitat_fixer.py +86 -0
  112. paasta_tools/contrib/ide_helper.py +316 -0
  113. paasta_tools/contrib/is_pod_healthy_in_proxy.py +139 -0
  114. paasta_tools/contrib/is_pod_healthy_in_smartstack.py +50 -0
  115. paasta_tools/contrib/kill_bad_containers.py +109 -0
  116. paasta_tools/contrib/mass-deploy-tag.sh +44 -0
  117. paasta_tools/contrib/mock_patch_checker.py +86 -0
  118. paasta_tools/contrib/paasta_update_soa_memcpu.py +520 -0
  119. paasta_tools/contrib/render_template.py +129 -0
  120. paasta_tools/contrib/rightsizer_soaconfigs_update.py +348 -0
  121. paasta_tools/contrib/service_shard_remove.py +157 -0
  122. paasta_tools/contrib/service_shard_update.py +373 -0
  123. paasta_tools/contrib/shared_ip_check.py +77 -0
  124. paasta_tools/contrib/timeouts_metrics_prom.py +64 -0
  125. paasta_tools/delete_kubernetes_deployments.py +89 -0
  126. paasta_tools/deployment_utils.py +44 -0
  127. paasta_tools/docker_wrapper.py +234 -0
  128. paasta_tools/docker_wrapper_imports.py +13 -0
  129. paasta_tools/drain_lib.py +351 -0
  130. paasta_tools/dump_locally_running_services.py +71 -0
  131. paasta_tools/eks_tools.py +119 -0
  132. paasta_tools/envoy_tools.py +373 -0
  133. paasta_tools/firewall.py +504 -0
  134. paasta_tools/firewall_logging.py +154 -0
  135. paasta_tools/firewall_update.py +172 -0
  136. paasta_tools/flink_tools.py +345 -0
  137. paasta_tools/flinkeks_tools.py +90 -0
  138. paasta_tools/frameworks/__init__.py +0 -0
  139. paasta_tools/frameworks/adhoc_scheduler.py +71 -0
  140. paasta_tools/frameworks/constraints.py +87 -0
  141. paasta_tools/frameworks/native_scheduler.py +652 -0
  142. paasta_tools/frameworks/native_service_config.py +301 -0
  143. paasta_tools/frameworks/task_store.py +245 -0
  144. paasta_tools/generate_all_deployments +9 -0
  145. paasta_tools/generate_authenticating_services.py +94 -0
  146. paasta_tools/generate_deployments_for_service.py +255 -0
  147. paasta_tools/generate_services_file.py +114 -0
  148. paasta_tools/generate_services_yaml.py +30 -0
  149. paasta_tools/hacheck.py +76 -0
  150. paasta_tools/instance/__init__.py +0 -0
  151. paasta_tools/instance/hpa_metrics_parser.py +122 -0
  152. paasta_tools/instance/kubernetes.py +1362 -0
  153. paasta_tools/iptables.py +240 -0
  154. paasta_tools/kafkacluster_tools.py +143 -0
  155. paasta_tools/kubernetes/__init__.py +0 -0
  156. paasta_tools/kubernetes/application/__init__.py +0 -0
  157. paasta_tools/kubernetes/application/controller_wrappers.py +476 -0
  158. paasta_tools/kubernetes/application/tools.py +90 -0
  159. paasta_tools/kubernetes/bin/__init__.py +0 -0
  160. paasta_tools/kubernetes/bin/kubernetes_remove_evicted_pods.py +164 -0
  161. paasta_tools/kubernetes/bin/paasta_cleanup_remote_run_resources.py +135 -0
  162. paasta_tools/kubernetes/bin/paasta_cleanup_stale_nodes.py +181 -0
  163. paasta_tools/kubernetes/bin/paasta_secrets_sync.py +758 -0
  164. paasta_tools/kubernetes/remote_run.py +558 -0
  165. paasta_tools/kubernetes_tools.py +4679 -0
  166. paasta_tools/list_kubernetes_service_instances.py +128 -0
  167. paasta_tools/list_tron_namespaces.py +60 -0
  168. paasta_tools/long_running_service_tools.py +678 -0
  169. paasta_tools/mac_address.py +44 -0
  170. paasta_tools/marathon_dashboard.py +0 -0
  171. paasta_tools/mesos/__init__.py +0 -0
  172. paasta_tools/mesos/cfg.py +46 -0
  173. paasta_tools/mesos/cluster.py +60 -0
  174. paasta_tools/mesos/exceptions.py +59 -0
  175. paasta_tools/mesos/framework.py +77 -0
  176. paasta_tools/mesos/log.py +48 -0
  177. paasta_tools/mesos/master.py +306 -0
  178. paasta_tools/mesos/mesos_file.py +169 -0
  179. paasta_tools/mesos/parallel.py +52 -0
  180. paasta_tools/mesos/slave.py +115 -0
  181. paasta_tools/mesos/task.py +94 -0
  182. paasta_tools/mesos/util.py +69 -0
  183. paasta_tools/mesos/zookeeper.py +37 -0
  184. paasta_tools/mesos_maintenance.py +848 -0
  185. paasta_tools/mesos_tools.py +1051 -0
  186. paasta_tools/metrics/__init__.py +0 -0
  187. paasta_tools/metrics/metastatus_lib.py +1110 -0
  188. paasta_tools/metrics/metrics_lib.py +217 -0
  189. paasta_tools/monitoring/__init__.py +13 -0
  190. paasta_tools/monitoring/check_k8s_api_performance.py +110 -0
  191. paasta_tools/monitoring_tools.py +652 -0
  192. paasta_tools/monkrelaycluster_tools.py +146 -0
  193. paasta_tools/nrtsearchservice_tools.py +143 -0
  194. paasta_tools/nrtsearchserviceeks_tools.py +68 -0
  195. paasta_tools/oom_logger.py +321 -0
  196. paasta_tools/paasta_deploy_tron_jobs +3 -0
  197. paasta_tools/paasta_execute_docker_command.py +123 -0
  198. paasta_tools/paasta_native_serviceinit.py +21 -0
  199. paasta_tools/paasta_service_config_loader.py +201 -0
  200. paasta_tools/paastaapi/__init__.py +29 -0
  201. paasta_tools/paastaapi/api/__init__.py +3 -0
  202. paasta_tools/paastaapi/api/autoscaler_api.py +302 -0
  203. paasta_tools/paastaapi/api/default_api.py +569 -0
  204. paasta_tools/paastaapi/api/remote_run_api.py +604 -0
  205. paasta_tools/paastaapi/api/resources_api.py +157 -0
  206. paasta_tools/paastaapi/api/service_api.py +1736 -0
  207. paasta_tools/paastaapi/api_client.py +818 -0
  208. paasta_tools/paastaapi/apis/__init__.py +22 -0
  209. paasta_tools/paastaapi/configuration.py +455 -0
  210. paasta_tools/paastaapi/exceptions.py +137 -0
  211. paasta_tools/paastaapi/model/__init__.py +5 -0
  212. paasta_tools/paastaapi/model/adhoc_launch_history.py +176 -0
  213. paasta_tools/paastaapi/model/autoscaler_count_msg.py +176 -0
  214. paasta_tools/paastaapi/model/deploy_queue.py +178 -0
  215. paasta_tools/paastaapi/model/deploy_queue_service_instance.py +194 -0
  216. paasta_tools/paastaapi/model/envoy_backend.py +185 -0
  217. paasta_tools/paastaapi/model/envoy_location.py +184 -0
  218. paasta_tools/paastaapi/model/envoy_status.py +181 -0
  219. paasta_tools/paastaapi/model/flink_cluster_overview.py +188 -0
  220. paasta_tools/paastaapi/model/flink_config.py +173 -0
  221. paasta_tools/paastaapi/model/flink_job.py +186 -0
  222. paasta_tools/paastaapi/model/flink_job_details.py +192 -0
  223. paasta_tools/paastaapi/model/flink_jobs.py +175 -0
  224. paasta_tools/paastaapi/model/float_and_error.py +173 -0
  225. paasta_tools/paastaapi/model/hpa_metric.py +176 -0
  226. paasta_tools/paastaapi/model/inline_object.py +170 -0
  227. paasta_tools/paastaapi/model/inline_response200.py +170 -0
  228. paasta_tools/paastaapi/model/inline_response2001.py +170 -0
  229. paasta_tools/paastaapi/model/instance_bounce_status.py +200 -0
  230. paasta_tools/paastaapi/model/instance_mesh_status.py +186 -0
  231. paasta_tools/paastaapi/model/instance_status.py +220 -0
  232. paasta_tools/paastaapi/model/instance_status_adhoc.py +187 -0
  233. paasta_tools/paastaapi/model/instance_status_cassandracluster.py +173 -0
  234. paasta_tools/paastaapi/model/instance_status_flink.py +173 -0
  235. paasta_tools/paastaapi/model/instance_status_kafkacluster.py +173 -0
  236. paasta_tools/paastaapi/model/instance_status_kubernetes.py +263 -0
  237. paasta_tools/paastaapi/model/instance_status_kubernetes_autoscaling_status.py +187 -0
  238. paasta_tools/paastaapi/model/instance_status_kubernetes_v2.py +197 -0
  239. paasta_tools/paastaapi/model/instance_status_tron.py +204 -0
  240. paasta_tools/paastaapi/model/instance_tasks.py +182 -0
  241. paasta_tools/paastaapi/model/integer_and_error.py +173 -0
  242. paasta_tools/paastaapi/model/kubernetes_container.py +178 -0
  243. paasta_tools/paastaapi/model/kubernetes_container_v2.py +219 -0
  244. paasta_tools/paastaapi/model/kubernetes_healthcheck.py +176 -0
  245. paasta_tools/paastaapi/model/kubernetes_pod.py +201 -0
  246. paasta_tools/paastaapi/model/kubernetes_pod_event.py +176 -0
  247. paasta_tools/paastaapi/model/kubernetes_pod_v2.py +213 -0
  248. paasta_tools/paastaapi/model/kubernetes_replica_set.py +185 -0
  249. paasta_tools/paastaapi/model/kubernetes_version.py +202 -0
  250. paasta_tools/paastaapi/model/remote_run_outcome.py +189 -0
  251. paasta_tools/paastaapi/model/remote_run_start.py +185 -0
  252. paasta_tools/paastaapi/model/remote_run_stop.py +176 -0
  253. paasta_tools/paastaapi/model/remote_run_token.py +173 -0
  254. paasta_tools/paastaapi/model/resource.py +187 -0
  255. paasta_tools/paastaapi/model/resource_item.py +187 -0
  256. paasta_tools/paastaapi/model/resource_value.py +176 -0
  257. paasta_tools/paastaapi/model/smartstack_backend.py +191 -0
  258. paasta_tools/paastaapi/model/smartstack_location.py +181 -0
  259. paasta_tools/paastaapi/model/smartstack_status.py +181 -0
  260. paasta_tools/paastaapi/model/task_tail_lines.py +176 -0
  261. paasta_tools/paastaapi/model_utils.py +1879 -0
  262. paasta_tools/paastaapi/models/__init__.py +62 -0
  263. paasta_tools/paastaapi/rest.py +287 -0
  264. paasta_tools/prune_completed_pods.py +220 -0
  265. paasta_tools/puppet_service_tools.py +59 -0
  266. paasta_tools/py.typed +1 -0
  267. paasta_tools/remote_git.py +127 -0
  268. paasta_tools/run-paasta-api-in-dev-mode.py +57 -0
  269. paasta_tools/run-paasta-api-playground.py +51 -0
  270. paasta_tools/secret_providers/__init__.py +66 -0
  271. paasta_tools/secret_providers/vault.py +214 -0
  272. paasta_tools/secret_tools.py +277 -0
  273. paasta_tools/setup_istio_mesh.py +353 -0
  274. paasta_tools/setup_kubernetes_cr.py +412 -0
  275. paasta_tools/setup_kubernetes_crd.py +138 -0
  276. paasta_tools/setup_kubernetes_internal_crd.py +154 -0
  277. paasta_tools/setup_kubernetes_job.py +353 -0
  278. paasta_tools/setup_prometheus_adapter_config.py +1028 -0
  279. paasta_tools/setup_tron_namespace.py +248 -0
  280. paasta_tools/slack.py +75 -0
  281. paasta_tools/smartstack_tools.py +676 -0
  282. paasta_tools/spark_tools.py +283 -0
  283. paasta_tools/synapse_srv_namespaces_fact.py +42 -0
  284. paasta_tools/tron/__init__.py +0 -0
  285. paasta_tools/tron/client.py +158 -0
  286. paasta_tools/tron/tron_command_context.py +194 -0
  287. paasta_tools/tron/tron_timeutils.py +101 -0
  288. paasta_tools/tron_tools.py +1448 -0
  289. paasta_tools/utils.py +4307 -0
  290. paasta_tools/yaml_tools.py +44 -0
  291. paasta_tools-1.21.3.data/scripts/apply_external_resources.py +79 -0
  292. paasta_tools-1.21.3.data/scripts/bounce_log_latency_parser.py +68 -0
  293. paasta_tools-1.21.3.data/scripts/check_autoscaler_max_instances.py +212 -0
  294. paasta_tools-1.21.3.data/scripts/check_cassandracluster_services_replication.py +35 -0
  295. paasta_tools-1.21.3.data/scripts/check_flink_services_health.py +203 -0
  296. paasta_tools-1.21.3.data/scripts/check_kubernetes_api.py +57 -0
  297. paasta_tools-1.21.3.data/scripts/check_kubernetes_services_replication.py +141 -0
  298. paasta_tools-1.21.3.data/scripts/check_manual_oapi_changes.sh +24 -0
  299. paasta_tools-1.21.3.data/scripts/check_oom_events.py +244 -0
  300. paasta_tools-1.21.3.data/scripts/check_orphans.py +306 -0
  301. paasta_tools-1.21.3.data/scripts/check_spark_jobs.py +234 -0
  302. paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_cr.py +138 -0
  303. paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_crd.py +145 -0
  304. paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_jobs.py +344 -0
  305. paasta_tools-1.21.3.data/scripts/create_dynamodb_table.py +35 -0
  306. paasta_tools-1.21.3.data/scripts/create_paasta_playground.py +105 -0
  307. paasta_tools-1.21.3.data/scripts/delete_kubernetes_deployments.py +89 -0
  308. paasta_tools-1.21.3.data/scripts/emit_allocated_cpu_metrics.py +50 -0
  309. paasta_tools-1.21.3.data/scripts/generate_all_deployments +9 -0
  310. paasta_tools-1.21.3.data/scripts/generate_authenticating_services.py +94 -0
  311. paasta_tools-1.21.3.data/scripts/generate_deployments_for_service.py +255 -0
  312. paasta_tools-1.21.3.data/scripts/generate_services_file.py +114 -0
  313. paasta_tools-1.21.3.data/scripts/generate_services_yaml.py +30 -0
  314. paasta_tools-1.21.3.data/scripts/get_running_task_allocation.py +346 -0
  315. paasta_tools-1.21.3.data/scripts/habitat_fixer.py +86 -0
  316. paasta_tools-1.21.3.data/scripts/ide_helper.py +316 -0
  317. paasta_tools-1.21.3.data/scripts/is_pod_healthy_in_proxy.py +139 -0
  318. paasta_tools-1.21.3.data/scripts/is_pod_healthy_in_smartstack.py +50 -0
  319. paasta_tools-1.21.3.data/scripts/kill_bad_containers.py +109 -0
  320. paasta_tools-1.21.3.data/scripts/kubernetes_remove_evicted_pods.py +164 -0
  321. paasta_tools-1.21.3.data/scripts/mass-deploy-tag.sh +44 -0
  322. paasta_tools-1.21.3.data/scripts/mock_patch_checker.py +86 -0
  323. paasta_tools-1.21.3.data/scripts/paasta_cleanup_remote_run_resources.py +135 -0
  324. paasta_tools-1.21.3.data/scripts/paasta_cleanup_stale_nodes.py +181 -0
  325. paasta_tools-1.21.3.data/scripts/paasta_deploy_tron_jobs +3 -0
  326. paasta_tools-1.21.3.data/scripts/paasta_execute_docker_command.py +123 -0
  327. paasta_tools-1.21.3.data/scripts/paasta_secrets_sync.py +758 -0
  328. paasta_tools-1.21.3.data/scripts/paasta_tabcomplete.sh +23 -0
  329. paasta_tools-1.21.3.data/scripts/paasta_update_soa_memcpu.py +520 -0
  330. paasta_tools-1.21.3.data/scripts/render_template.py +129 -0
  331. paasta_tools-1.21.3.data/scripts/rightsizer_soaconfigs_update.py +348 -0
  332. paasta_tools-1.21.3.data/scripts/service_shard_remove.py +157 -0
  333. paasta_tools-1.21.3.data/scripts/service_shard_update.py +373 -0
  334. paasta_tools-1.21.3.data/scripts/setup_istio_mesh.py +353 -0
  335. paasta_tools-1.21.3.data/scripts/setup_kubernetes_cr.py +412 -0
  336. paasta_tools-1.21.3.data/scripts/setup_kubernetes_crd.py +138 -0
  337. paasta_tools-1.21.3.data/scripts/setup_kubernetes_internal_crd.py +154 -0
  338. paasta_tools-1.21.3.data/scripts/setup_kubernetes_job.py +353 -0
  339. paasta_tools-1.21.3.data/scripts/setup_prometheus_adapter_config.py +1028 -0
  340. paasta_tools-1.21.3.data/scripts/shared_ip_check.py +77 -0
  341. paasta_tools-1.21.3.data/scripts/synapse_srv_namespaces_fact.py +42 -0
  342. paasta_tools-1.21.3.data/scripts/timeouts_metrics_prom.py +64 -0
  343. paasta_tools-1.21.3.dist-info/LICENSE +201 -0
  344. paasta_tools-1.21.3.dist-info/METADATA +74 -0
  345. paasta_tools-1.21.3.dist-info/RECORD +348 -0
  346. paasta_tools-1.21.3.dist-info/WHEEL +5 -0
  347. paasta_tools-1.21.3.dist-info/entry_points.txt +20 -0
  348. paasta_tools-1.21.3.dist-info/top_level.txt +2 -0
@@ -0,0 +1,1028 @@
1
+ #!/usr/bin/env python
2
+ # Copyright 2015-2021 Yelp Inc.
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """
15
+ Small utility to update the Prometheus adapter's config to match soaconfigs.
16
+ """
17
+ import argparse
18
+ import logging
19
+ import sys
20
+ from pathlib import Path
21
+ from typing import cast
22
+ from typing import Dict
23
+ from typing import List
24
+ from typing import Optional
25
+
26
+ import ruamel.yaml as yaml
27
+ from kubernetes.client import V1ConfigMap
28
+ from kubernetes.client import V1DeleteOptions
29
+ from kubernetes.client import V1ObjectMeta
30
+ from kubernetes.client.rest import ApiException
31
+ from mypy_extensions import TypedDict
32
+
33
+ from paasta_tools.autoscaling.utils import MetricsProviderDict
34
+ from paasta_tools.eks_tools import EksDeploymentConfig
35
+ from paasta_tools.kubernetes_tools import ensure_namespace
36
+ from paasta_tools.kubernetes_tools import get_kubernetes_app_name
37
+ from paasta_tools.kubernetes_tools import KubeClient
38
+ from paasta_tools.kubernetes_tools import KubernetesDeploymentConfig
39
+ from paasta_tools.kubernetes_tools import V1Pod
40
+ from paasta_tools.long_running_service_tools import ALL_METRICS_PROVIDERS
41
+ from paasta_tools.long_running_service_tools import (
42
+ DEFAULT_ACTIVE_REQUESTS_AUTOSCALING_MOVING_AVERAGE_WINDOW,
43
+ )
44
+ from paasta_tools.long_running_service_tools import (
45
+ DEFAULT_DESIRED_ACTIVE_REQUESTS_PER_REPLICA,
46
+ )
47
+ from paasta_tools.long_running_service_tools import (
48
+ DEFAULT_GUNICORN_AUTOSCALING_MOVING_AVERAGE_WINDOW,
49
+ )
50
+ from paasta_tools.long_running_service_tools import (
51
+ DEFAULT_PISCINA_AUTOSCALING_MOVING_AVERAGE_WINDOW,
52
+ )
53
+ from paasta_tools.long_running_service_tools import (
54
+ DEFAULT_UWSGI_AUTOSCALING_MOVING_AVERAGE_WINDOW,
55
+ )
56
+ from paasta_tools.long_running_service_tools import METRICS_PROVIDER_ACTIVE_REQUESTS
57
+ from paasta_tools.long_running_service_tools import METRICS_PROVIDER_CPU
58
+ from paasta_tools.long_running_service_tools import METRICS_PROVIDER_GUNICORN
59
+ from paasta_tools.long_running_service_tools import METRICS_PROVIDER_PISCINA
60
+ from paasta_tools.long_running_service_tools import METRICS_PROVIDER_PROMQL
61
+ from paasta_tools.long_running_service_tools import METRICS_PROVIDER_UWSGI
62
+ from paasta_tools.long_running_service_tools import METRICS_PROVIDER_UWSGI_V2
63
+ from paasta_tools.paasta_service_config_loader import PaastaServiceConfigLoader
64
+ from paasta_tools.utils import DEFAULT_SOA_DIR
65
+ from paasta_tools.utils import get_services_for_cluster
66
+
67
+ log = logging.getLogger(__name__)
68
+
69
+ PROMETHEUS_ADAPTER_CONFIGMAP_NAMESPACE = "custom-metrics"
70
+ PROMETHEUS_ADAPTER_POD_NAMESPACE = "custom-metrics"
71
+ PROMETHEUS_ADAPTER_CONFIGMAP_NAME = "adapter-config"
72
+ PROMETHEUS_ADAPTER_CONFIGMAP_FILENAME = "config.yaml"
73
+ PROMETHEUS_ADAPTER_POD_NAME_PREFIX = "custom-metrics-apiserver"
74
+ PROMETHEUS_ADAPTER_POD_PHASES_TO_REMOVE = (
75
+ "Running",
76
+ "Pending",
77
+ )
78
+
79
+ DEFAULT_SCRAPE_PERIOD_S = 10
80
+ DEFAULT_EXTRAPOLATION_PERIODS = 10
81
+ DEFAULT_EXTRAPOLATION_TIME = DEFAULT_SCRAPE_PERIOD_S * DEFAULT_EXTRAPOLATION_PERIODS
82
+
83
+ K8S_INSTANCE_TYPE_CLASSES = (
84
+ KubernetesDeploymentConfig,
85
+ EksDeploymentConfig,
86
+ )
87
+
88
+
89
+ class PrometheusAdapterResourceConfig(TypedDict, total=False):
90
+ """
91
+ Configuration for resource association in the Prometheus adapter.
92
+
93
+ NOTE: this dict is not total as there's no existing way in mypy to annotate
94
+ that you only need one of these keys can be populated (and that both can be
95
+ populated if so desired)
96
+
97
+ For more information, see:
98
+ https://github.com/kubernetes-sigs/prometheus-adapter/blob/master/docs/config.md#association
99
+ """
100
+
101
+ # this should be a Go template string (e.g., "kube_<<.Resource>>") and will be used to
102
+ # extract k8s resources from a label
103
+ template: str
104
+ # if your labels don't have a common prefix (or if you only want to inspect certain labels)
105
+ # you'd want to use an override - these are of the form:
106
+ # {
107
+ # "$SOME_PROMETHEUS_LABEL": {
108
+ # "group": "$SOME_K8S_GROUP",
109
+ # "resource": "$SOME_K8S_RESOURCE",
110
+ # }
111
+ # }
112
+ overrides: Dict[str, Dict[str, str]]
113
+
114
+
115
+ class PrometheusAdapterRule(TypedDict):
116
+ """
117
+ Typed version of the (minimal) set of Prometheus adapter rule configuration options that we use
118
+
119
+ For more information, see:
120
+ https://github.com/kubernetes-sigs/prometheus-adapter/blob/master/docs/config.md
121
+ """
122
+
123
+ # used for discovering what resources should be scaled
124
+ seriesQuery: str
125
+ # configuration for how to expose this rule to the HPA
126
+ name: Dict[str, str]
127
+ # used to associate metrics with k8s resources
128
+ resources: PrometheusAdapterResourceConfig
129
+ # the actual query we want to send to Prometheus to use for scaling
130
+ metricsQuery: str
131
+
132
+
133
+ class PrometheusAdapterConfig(TypedDict):
134
+ """
135
+ Typed version of the Prometheus adapter configuration dictionary.
136
+ """
137
+
138
+ rules: List[PrometheusAdapterRule]
139
+
140
+
141
+ def parse_args() -> argparse.Namespace:
142
+ parser = argparse.ArgumentParser(
143
+ description="Syncs the Prometheus metric adapter config with soaconfigs.",
144
+ )
145
+
146
+ parser.add_argument(
147
+ "-d",
148
+ "--soa-dir",
149
+ dest="soa_dir",
150
+ metavar="SOA_DIR",
151
+ default=Path(DEFAULT_SOA_DIR),
152
+ help="Directory to read service configs from. Default is %(default)s.",
153
+ type=Path,
154
+ )
155
+ # TODO: do we need to be able to pass multiple clusters in?
156
+ parser.add_argument(
157
+ "-c",
158
+ "--cluster",
159
+ dest="cluster",
160
+ help="PaaSTA cluster to generate configs for.",
161
+ required=True,
162
+ )
163
+ parser.add_argument(
164
+ "-v",
165
+ "--verbose",
166
+ action="store_true",
167
+ dest="verbose",
168
+ default=False,
169
+ help="Enable verbose logging.",
170
+ )
171
+ parser.add_argument(
172
+ "--dry-run",
173
+ dest="dry_run",
174
+ action="store_true",
175
+ default=False,
176
+ help="Enable verbose logging.",
177
+ )
178
+
179
+ return parser.parse_args()
180
+
181
+
182
+ def _minify_promql(query: str) -> str:
183
+ """
184
+ Given a PromQL query, return the same query with most whitespace collapsed.
185
+
186
+ This is useful for allowing us to nicely format queries in code, but minimize the size of our
187
+ queries when they're actually sent to Prometheus by the adapter.
188
+ """
189
+ trimmed_query = []
190
+ # while we could potentially do some regex magic, we want to ensure
191
+ # that we don't mess up any labels (even though they really shouldn't
192
+ # have any whitespace in them in the first place) - thus we just just
193
+ # strip any leading/trailing whitespace and leave everything else alone
194
+ for line in query.split("\n"):
195
+ trimmed_query.append(line.strip())
196
+
197
+ return (" ".join(trimmed_query)).strip()
198
+
199
+
200
+ def create_instance_scaling_rule(
201
+ service: str,
202
+ instance_config: KubernetesDeploymentConfig,
203
+ metrics_provider_config: MetricsProviderDict,
204
+ paasta_cluster: str,
205
+ ) -> Optional[PrometheusAdapterRule]:
206
+ if metrics_provider_config["type"] == METRICS_PROVIDER_CPU:
207
+ log.debug("[{service}] prometheus-based CPU scaling is not supported")
208
+ return None
209
+ if metrics_provider_config["type"] == METRICS_PROVIDER_UWSGI:
210
+ return create_instance_uwsgi_scaling_rule(
211
+ service, instance_config, metrics_provider_config, paasta_cluster
212
+ )
213
+ if metrics_provider_config["type"] == METRICS_PROVIDER_UWSGI_V2:
214
+ return create_instance_uwsgi_v2_scaling_rule(
215
+ service, instance_config, metrics_provider_config, paasta_cluster
216
+ )
217
+ if metrics_provider_config["type"] == METRICS_PROVIDER_PISCINA:
218
+ return create_instance_piscina_scaling_rule(
219
+ service, instance_config, metrics_provider_config, paasta_cluster
220
+ )
221
+ if metrics_provider_config["type"] == METRICS_PROVIDER_GUNICORN:
222
+ return create_instance_gunicorn_scaling_rule(
223
+ service, instance_config, metrics_provider_config, paasta_cluster
224
+ )
225
+ if metrics_provider_config["type"] == METRICS_PROVIDER_ACTIVE_REQUESTS:
226
+ return create_instance_active_requests_scaling_rule(
227
+ service, instance_config, metrics_provider_config, paasta_cluster
228
+ )
229
+ if metrics_provider_config["type"] == METRICS_PROVIDER_PROMQL:
230
+ return create_instance_arbitrary_promql_scaling_rule(
231
+ service, instance_config, metrics_provider_config, paasta_cluster
232
+ )
233
+
234
+ raise ValueError(
235
+ f"unknown metrics provider type: {metrics_provider_config['type']}"
236
+ )
237
+
238
+
239
+ def create_instance_active_requests_scaling_rule(
240
+ service: str,
241
+ instance_config: KubernetesDeploymentConfig,
242
+ metrics_provider_config: MetricsProviderDict,
243
+ paasta_cluster: str,
244
+ ) -> PrometheusAdapterRule:
245
+ """
246
+ Creates a Prometheus adapter rule config for a given service instance.
247
+ """
248
+ instance = instance_config.instance
249
+ namespace = instance_config.get_namespace()
250
+ desired_active_requests_per_replica = metrics_provider_config.get(
251
+ "desired_active_requests_per_replica",
252
+ DEFAULT_DESIRED_ACTIVE_REQUESTS_PER_REPLICA,
253
+ )
254
+ moving_average_window = metrics_provider_config.get(
255
+ "moving_average_window_seconds",
256
+ DEFAULT_ACTIVE_REQUESTS_AUTOSCALING_MOVING_AVERAGE_WINDOW,
257
+ )
258
+ deployment_name = get_kubernetes_app_name(service=service, instance=instance)
259
+
260
+ # In order for autoscaling to work safely while a service migrates from one namespace to another, the HPA needs to
261
+ # make sure that the deployment in the new namespace is scaled up enough to handle _all_ the load.
262
+ # This is because once the new deployment is 100% healthy, cleanup_kubernetes_job will delete the deployment out of
263
+ # the old namespace all at once, suddenly putting all the load onto the deployment in the new namespace.
264
+ # To ensure this, we must:
265
+ # - DO NOT filter on namespace in worker_filter_terms (which is used when calculating desired_instances).
266
+ # - DO filter on namespace in replica_filter_terms (which is used to calculate current_replicas).
267
+ # This makes sure that desired_instances includes load from all namespaces, but that the scaling ratio calculated
268
+ # by (desired_instances / current_replicas) is meaningful for each namespace.
269
+ worker_filter_terms = f"paasta_cluster='{paasta_cluster}',paasta_service='{service}',paasta_instance='{instance}'"
270
+ replica_filter_terms = f"paasta_cluster='{paasta_cluster}',deployment='{deployment_name}',namespace='{namespace}'"
271
+
272
+ current_replicas = f"""
273
+ sum(
274
+ label_join(
275
+ (
276
+ kube_deployment_spec_replicas{{{replica_filter_terms}}} >= 0
277
+ or
278
+ max_over_time(
279
+ kube_deployment_spec_replicas{{{replica_filter_terms}}}[{DEFAULT_EXTRAPOLATION_TIME}s]
280
+ )
281
+ ),
282
+ "kube_deployment", "", "deployment"
283
+ )
284
+ ) by (kube_deployment)
285
+ """
286
+
287
+ # Envoy tracks metrics at the smartstack namespace level. In most cases the paasta instance name matches the smartstack namespace.
288
+ # In rare cases, there are custom registration added to instance configs.
289
+ # If there is no custom registration the envoy and instance names match and no need to update the worker_filter_terms.
290
+ # If there is a single custom registration for an instance, we will process the registration value and extract the value to be used.
291
+ # The registrations usually follow the format of {service_name}.{smartstack_name}. Hence we split the string by dot and extract the last token.
292
+ # More than one custom registrations are not supported and config validation takes care of rejecting such configs.
293
+ registrations = instance_config.get_registrations()
294
+
295
+ mesh_instance = registrations[0].split(".")[-1] if len(registrations) == 1 else None
296
+ envoy_filter_terms = f"paasta_cluster='{paasta_cluster}',paasta_service='{service}',paasta_instance='{mesh_instance or instance}'"
297
+
298
+ # envoy-based metrics have no labels corresponding to the k8s resources that they
299
+ # front, but we can trivially add one in since our deployment names are of the form
300
+ # {service_name}-{instance_name} - which are both things in `worker_filter_terms` so
301
+ # it's safe to unconditionally add.
302
+ # This is necessary as otherwise the HPA/prometheus adapter does not know what these
303
+ # metrics are for.
304
+ total_load = f"""
305
+ (
306
+ sum(
307
+ label_replace(
308
+ paasta_instance:envoy_cluster__egress_cluster_upstream_rq_active{{{envoy_filter_terms}}},
309
+ "kube_deployment", "{deployment_name}", "", ""
310
+ )
311
+ ) by (kube_deployment)
312
+ )
313
+ """
314
+ desired_instances_at_each_point_in_time = f"""
315
+ {total_load} / {desired_active_requests_per_replica}
316
+ """
317
+ desired_instances = f"""
318
+ avg_over_time(
319
+ (
320
+ {desired_instances_at_each_point_in_time}
321
+ )[{moving_average_window}s:]
322
+ )
323
+ """
324
+
325
+ # The prometheus HPA adapter needs kube_deployment and kube_namespace labels attached to the metrics its scaling on.
326
+ # The envoy-based metrics have no labels corresponding to the k8s resources, so we can add them in.
327
+ metrics_query = f"""
328
+ label_replace(
329
+ label_replace(
330
+ {desired_instances} / {current_replicas},
331
+ "kube_deployment", "{deployment_name}", "", ""
332
+ ),
333
+ "kube_namespace", "{namespace}", "", ""
334
+ )
335
+ """
336
+ series_query = f"""
337
+ k8s:deployment:pods_status_ready{{{worker_filter_terms}}}
338
+ """
339
+
340
+ metric_name = f"{deployment_name}-active-requests-prom"
341
+
342
+ return {
343
+ "name": {"as": metric_name},
344
+ "seriesQuery": _minify_promql(series_query),
345
+ "resources": {"template": "kube_<<.Resource>>"},
346
+ "metricsQuery": _minify_promql(metrics_query),
347
+ }
348
+
349
+
350
+ def create_instance_uwsgi_scaling_rule(
351
+ service: str,
352
+ instance_config: KubernetesDeploymentConfig,
353
+ metrics_provider_config: MetricsProviderDict,
354
+ paasta_cluster: str,
355
+ ) -> PrometheusAdapterRule:
356
+ """
357
+ Creates a Prometheus adapter rule config for a given service instance.
358
+ """
359
+ instance = instance_config.instance
360
+ namespace = instance_config.get_namespace()
361
+ setpoint = metrics_provider_config["setpoint"]
362
+ moving_average_window = metrics_provider_config.get(
363
+ "moving_average_window_seconds", DEFAULT_UWSGI_AUTOSCALING_MOVING_AVERAGE_WINDOW
364
+ )
365
+ deployment_name = get_kubernetes_app_name(service=service, instance=instance)
366
+
367
+ # In order for autoscaling to work safely while a service migrates from one namespace to another, the HPA needs to
368
+ # make sure that the deployment in the new namespace is scaled up enough to handle _all_ the load.
369
+ # This is because once the new deployment is 100% healthy, cleanup_kubernetes_job will delete the deployment out of
370
+ # the old namespace all at once, suddenly putting all the load onto the deployment in the new namespace.
371
+ # To ensure this, we must:
372
+ # - DO NOT filter on namespace in worker_filter_terms (which is used when calculating desired_instances).
373
+ # - DO filter on namespace in replica_filter_terms (which is used to calculate current_replicas).
374
+ # This makes sure that desired_instances includes load from all namespaces, but that the scaling ratio calculated
375
+ # by (desired_instances / current_replicas) is meaningful for each namespace.
376
+ worker_filter_terms = f"paasta_cluster='{paasta_cluster}',paasta_service='{service}',paasta_instance='{instance}'"
377
+ replica_filter_terms = f"paasta_cluster='{paasta_cluster}',kube_deployment='{deployment_name}',namespace='{namespace}'"
378
+
379
+ # k8s:deployment:pods_status_ready is a metric created by summing kube_pod_status_ready
380
+ # over paasta service/instance/cluster. it counts the number of ready pods in a paasta
381
+ # deployment.
382
+ ready_pods = f"""
383
+ (sum(
384
+ k8s:deployment:pods_status_ready{{{worker_filter_terms}}} >= 0
385
+ or
386
+ max_over_time(
387
+ k8s:deployment:pods_status_ready{{{worker_filter_terms}}}[{DEFAULT_EXTRAPOLATION_TIME}s]
388
+ )
389
+ ) by (kube_deployment))
390
+ """
391
+ # as mentioned above: we want to get the overload by counting load across namespces - but we need
392
+ # to divide by the ready pods in the target namespace - which is done by using a namespace filter here
393
+ ready_pods_namespaced = f"""
394
+ (sum(
395
+ k8s:deployment:pods_status_ready{{{replica_filter_terms}}} >= 0
396
+ or
397
+ max_over_time(
398
+ k8s:deployment:pods_status_ready{{{replica_filter_terms}}}[{DEFAULT_EXTRAPOLATION_TIME}s]
399
+ )
400
+ ) by (kube_deployment))
401
+ """
402
+ load_per_instance = f"""
403
+ avg(
404
+ uwsgi_worker_busy{{{worker_filter_terms}}}
405
+ ) by (kube_pod, kube_deployment)
406
+ """
407
+ missing_instances = f"""
408
+ clamp_min(
409
+ {ready_pods} - count({load_per_instance}) by (kube_deployment),
410
+ 0
411
+ )
412
+ """
413
+ total_load = f"""
414
+ (
415
+ sum(
416
+ {load_per_instance}
417
+ ) by (kube_deployment)
418
+ +
419
+ {missing_instances}
420
+ )
421
+ """
422
+ desired_instances_at_each_point_in_time = f"""
423
+ {total_load} / {setpoint}
424
+ """
425
+ desired_instances = f"""
426
+ avg_over_time(
427
+ (
428
+ {desired_instances_at_each_point_in_time}
429
+ )[{moving_average_window}s:]
430
+ )
431
+ """
432
+
433
+ # our Prometheus query is calculating a desired number of replicas, and then k8s wants that expressed as an average utilization
434
+ # so as long as we divide by the number that k8s ends up multiplying by, we should be able to convince k8s to run any arbitrary
435
+ # number of replicas.
436
+ # k8s happens to multiply by the # of ready pods - so we divide by that rather than by the amount of current replicas (which may
437
+ # include non-ready pods)
438
+ # ref: https://github.com/kubernetes/kubernetes/blob/7ec1a89a509906dad9fd6a4635d7bfc157b47790/pkg/controller/podautoscaler/replica_calculator.go#L278
439
+ metrics_query = f"""
440
+ {desired_instances} / {ready_pods_namespaced}
441
+ """
442
+
443
+ metric_name = f"{deployment_name}-uwsgi-prom"
444
+
445
+ return {
446
+ "name": {"as": metric_name},
447
+ "seriesQuery": f"uwsgi_worker_busy{{{worker_filter_terms}}}",
448
+ "resources": {"template": "kube_<<.Resource>>"},
449
+ "metricsQuery": _minify_promql(metrics_query),
450
+ }
451
+
452
+
453
+ def create_instance_uwsgi_v2_scaling_rule(
454
+ service: str,
455
+ instance_config: KubernetesDeploymentConfig,
456
+ metrics_provider_config: MetricsProviderDict,
457
+ paasta_cluster: str,
458
+ ) -> PrometheusAdapterRule:
459
+ """
460
+ Creates a Prometheus adapter rule config for a given service instance.
461
+ """
462
+ instance = instance_config.instance
463
+ moving_average_window = metrics_provider_config.get(
464
+ "moving_average_window_seconds", DEFAULT_UWSGI_AUTOSCALING_MOVING_AVERAGE_WINDOW
465
+ )
466
+ deployment_name = get_kubernetes_app_name(service=service, instance=instance)
467
+
468
+ # In order for autoscaling to work safely while a service migrates from one namespace to another, the HPA needs to
469
+ # make sure that the deployment in the new namespace is scaled up enough to handle _all_ the load.
470
+ # This is because once the new deployment is 100% healthy, cleanup_kubernetes_job will delete the deployment out of
471
+ # the old namespace all at once, suddenly putting all the load onto the deployment in the new namespace.
472
+ # To ensure this, we must NOT filter on namespace in worker_filter_terms (which is used when calculating total_load.
473
+ # This makes sure that desired_instances includes load from all namespaces.
474
+ worker_filter_terms = f"paasta_cluster='{paasta_cluster}',paasta_service='{service}',paasta_instance='{instance}'"
475
+
476
+ # k8s:deployment:pods_status_ready is a metric created by summing kube_pod_status_ready
477
+ # over paasta service/instance/cluster. it counts the number of ready pods in a paasta
478
+ # deployment.
479
+ ready_pods = f"""
480
+ (sum(
481
+ k8s:deployment:pods_status_ready{{{worker_filter_terms}}} >= 0
482
+ or
483
+ max_over_time(
484
+ k8s:deployment:pods_status_ready{{{worker_filter_terms}}}[{DEFAULT_EXTRAPOLATION_TIME}s]
485
+ )
486
+ ) by (kube_deployment))
487
+ """
488
+ load_per_instance = f"""
489
+ avg(
490
+ uwsgi_worker_busy{{{worker_filter_terms}}}
491
+ ) by (kube_pod, kube_deployment)
492
+ """
493
+ missing_instances = f"""
494
+ clamp_min(
495
+ {ready_pods} - count({load_per_instance}) by (kube_deployment),
496
+ 0
497
+ )
498
+ """
499
+ total_load = f"""
500
+ (
501
+ sum(
502
+ {load_per_instance}
503
+ ) by (kube_deployment)
504
+ +
505
+ {missing_instances}
506
+ )
507
+ """
508
+ total_load_smoothed = f"""
509
+ avg_over_time(
510
+ (
511
+ {total_load}
512
+ )[{moving_average_window}s:]
513
+ )
514
+ """
515
+
516
+ metric_name = f"{deployment_name}-uwsgi-v2-prom"
517
+
518
+ return {
519
+ "name": {"as": metric_name},
520
+ "seriesQuery": f"uwsgi_worker_busy{{{worker_filter_terms}}}",
521
+ "resources": {"template": "kube_<<.Resource>>"},
522
+ "metricsQuery": _minify_promql(total_load_smoothed),
523
+ }
524
+
525
+
526
+ def create_instance_piscina_scaling_rule(
527
+ service: str,
528
+ instance_config: KubernetesDeploymentConfig,
529
+ metrics_provider_config: MetricsProviderDict,
530
+ paasta_cluster: str,
531
+ ) -> PrometheusAdapterRule:
532
+ """
533
+ Creates a Prometheus adapter rule config for a given service instance.
534
+ """
535
+ instance = instance_config.instance
536
+ namespace = instance_config.get_namespace()
537
+ setpoint = metrics_provider_config["setpoint"]
538
+ moving_average_window = metrics_provider_config.get(
539
+ "moving_average_window_seconds",
540
+ DEFAULT_PISCINA_AUTOSCALING_MOVING_AVERAGE_WINDOW,
541
+ )
542
+ deployment_name = get_kubernetes_app_name(service=service, instance=instance)
543
+
544
+ # In order for autoscaling to work safely while a service migrates from one namespace to another, the HPA needs to
545
+ # make sure that the deployment in the new namespace is scaled up enough to handle _all_ the load.
546
+ # This is because once the new deployment is 100% healthy, cleanup_kubernetes_job will delete the deployment out of
547
+ # the old namespace all at once, suddenly putting all the load onto the deployment in the new namespace.
548
+ # To ensure this, we must:
549
+ # - DO NOT filter on namespace in worker_filter_terms (which is used when calculating desired_instances).
550
+ # - DO filter on namespace in replica_filter_terms (which is used to calculate current_replicas).
551
+ # This makes sure that desired_instances includes load from all namespaces, but that the scaling ratio calculated
552
+ # by (desired_instances / current_replicas) is meaningful for each namespace.
553
+ worker_filter_terms = f"paasta_cluster='{paasta_cluster}',paasta_service='{service}',paasta_instance='{instance}'"
554
+ replica_filter_terms = f"paasta_cluster='{paasta_cluster}',deployment='{deployment_name}',namespace='{namespace}'"
555
+
556
+ current_replicas = f"""
557
+ sum(
558
+ label_join(
559
+ (
560
+ kube_deployment_spec_replicas{{{replica_filter_terms}}} >= 0
561
+ or
562
+ max_over_time(
563
+ kube_deployment_spec_replicas{{{replica_filter_terms}}}[{DEFAULT_EXTRAPOLATION_TIME}s]
564
+ )
565
+ ),
566
+ "kube_deployment", "", "deployment"
567
+ )
568
+ ) by (kube_deployment)
569
+ """
570
+ # k8s:deployment:pods_status_ready is a metric created by summing kube_pod_status_ready
571
+ # over paasta service/instance/cluster. it counts the number of ready pods in a paasta
572
+ # deployment.
573
+ ready_pods = f"""
574
+ (sum(
575
+ k8s:deployment:pods_status_ready{{{worker_filter_terms}}} >= 0
576
+ or
577
+ max_over_time(
578
+ k8s:deployment:pods_status_ready{{{worker_filter_terms}}}[{DEFAULT_EXTRAPOLATION_TIME}s]
579
+ )
580
+ ) by (kube_deployment))
581
+ """
582
+ load_per_instance = f"""
583
+ (piscina_pool_utilization{{{worker_filter_terms}}})
584
+ """
585
+ missing_instances = f"""
586
+ clamp_min(
587
+ {ready_pods} - count({load_per_instance}) by (kube_deployment),
588
+ 0
589
+ )
590
+ """
591
+ total_load = f"""
592
+ (
593
+ sum(
594
+ {load_per_instance}
595
+ ) by (kube_deployment)
596
+ +
597
+ {missing_instances}
598
+ )
599
+ """
600
+ desired_instances_at_each_point_in_time = f"""
601
+ {total_load} / {setpoint}
602
+ """
603
+ desired_instances = f"""
604
+ avg_over_time(
605
+ (
606
+ {desired_instances_at_each_point_in_time}
607
+ )[{moving_average_window}s:]
608
+ )
609
+ """
610
+ metrics_query = f"""
611
+ {desired_instances} / {current_replicas}
612
+ """
613
+
614
+ return {
615
+ "name": {"as": f"{deployment_name}-piscina-prom"},
616
+ "seriesQuery": f"piscina_pool_utilization{{{worker_filter_terms}}}",
617
+ "resources": {"template": "kube_<<.Resource>>"},
618
+ "metricsQuery": _minify_promql(metrics_query),
619
+ }
620
+
621
+
622
+ def create_instance_gunicorn_scaling_rule(
623
+ service: str,
624
+ instance_config: KubernetesDeploymentConfig,
625
+ metrics_provider_config: MetricsProviderDict,
626
+ paasta_cluster: str,
627
+ ) -> PrometheusAdapterRule:
628
+ """
629
+ Creates a Prometheus adapter rule config for a given service instance.
630
+ """
631
+ instance = instance_config.instance
632
+ namespace = instance_config.get_namespace()
633
+ setpoint = metrics_provider_config["setpoint"]
634
+ moving_average_window = metrics_provider_config.get(
635
+ "moving_average_window_seconds",
636
+ DEFAULT_GUNICORN_AUTOSCALING_MOVING_AVERAGE_WINDOW,
637
+ )
638
+
639
+ deployment_name = get_kubernetes_app_name(service=service, instance=instance)
640
+
641
+ # In order for autoscaling to work safely while a service migrates from one namespace to another, the HPA needs to
642
+ # make sure that the deployment in the new namespace is scaled up enough to handle _all_ the load.
643
+ # This is because once the new deployment is 100% healthy, cleanup_kubernetes_job will delete the deployment out of
644
+ # the old namespace all at once, suddenly putting all the load onto the deployment in the new namespace.
645
+ # To ensure this, we must:
646
+ # - DO NOT filter on namespace in worker_filter_terms (which is used when calculating desired_instances).
647
+ # - DO filter on namespace in replica_filter_terms (which is used to calculate current_replicas).
648
+ # This makes sure that desired_instances includes load from all namespaces, but that the scaling ratio calculated
649
+ # by (desired_instances / current_replicas) is meaningful for each namespace.
650
+ worker_filter_terms = f"paasta_cluster='{paasta_cluster}',paasta_service='{service}',paasta_instance='{instance}'"
651
+ replica_filter_terms = f"paasta_cluster='{paasta_cluster}',deployment='{deployment_name}',namespace='{namespace}'"
652
+
653
+ current_replicas = f"""
654
+ sum(
655
+ label_join(
656
+ (
657
+ kube_deployment_spec_replicas{{{replica_filter_terms}}} >= 0
658
+ or
659
+ max_over_time(
660
+ kube_deployment_spec_replicas{{{replica_filter_terms}}}[{DEFAULT_EXTRAPOLATION_TIME}s]
661
+ )
662
+ ),
663
+ "kube_deployment", "", "deployment"
664
+ )
665
+ ) by (kube_deployment)
666
+ """
667
+ # k8s:deployment:pods_status_ready is a metric created by summing kube_pod_status_ready
668
+ # over paasta service/instance/cluster. it counts the number of ready pods in a paasta
669
+ # deployment.
670
+ ready_pods = f"""
671
+ (sum(
672
+ k8s:deployment:pods_status_ready{{{worker_filter_terms}}} >= 0
673
+ or
674
+ max_over_time(
675
+ k8s:deployment:pods_status_ready{{{worker_filter_terms}}}[{DEFAULT_EXTRAPOLATION_TIME}s]
676
+ )
677
+ ) by (kube_deployment))
678
+ """
679
+ load_per_instance = f"""
680
+ avg(
681
+ gunicorn_worker_busy{{{worker_filter_terms}}}
682
+ ) by (kube_pod, kube_deployment)
683
+ """
684
+ missing_instances = f"""
685
+ clamp_min(
686
+ {ready_pods} - count({load_per_instance}) by (kube_deployment),
687
+ 0
688
+ )
689
+ """
690
+ total_load = f"""
691
+ (
692
+ sum(
693
+ {load_per_instance}
694
+ ) by (kube_deployment)
695
+ +
696
+ {missing_instances}
697
+ )
698
+ """
699
+ desired_instances_at_each_point_in_time = f"""
700
+ {total_load} / {setpoint}
701
+ """
702
+ desired_instances = f"""
703
+ avg_over_time(
704
+ (
705
+ {desired_instances_at_each_point_in_time}
706
+ )[{moving_average_window}s:]
707
+ )
708
+ """
709
+ metrics_query = f"""
710
+ {desired_instances} / {current_replicas}
711
+ """
712
+
713
+ metric_name = f"{deployment_name}-gunicorn-prom"
714
+
715
+ return {
716
+ "name": {"as": metric_name},
717
+ "seriesQuery": f"gunicorn_worker_busy{{{worker_filter_terms}}}",
718
+ "resources": {"template": "kube_<<.Resource>>"},
719
+ "metricsQuery": _minify_promql(metrics_query),
720
+ }
721
+
722
+
723
+ def create_instance_arbitrary_promql_scaling_rule(
724
+ service: str,
725
+ instance_config: KubernetesDeploymentConfig,
726
+ metrics_provider_config: MetricsProviderDict,
727
+ paasta_cluster: str,
728
+ ) -> PrometheusAdapterRule:
729
+ instance = instance_config.instance
730
+ namespace = instance_config.get_namespace()
731
+ prometheus_adapter_config = metrics_provider_config["prometheus_adapter_config"]
732
+ deployment_name = get_kubernetes_app_name(service=service, instance=instance)
733
+
734
+ if "seriesQuery" in prometheus_adapter_config:
735
+ # If the user specifies seriesQuery, don't wrap their metricsQuery, under the assumption that they may not want
736
+ # us to mess with their labels.
737
+ series_query = prometheus_adapter_config["seriesQuery"]
738
+ metrics_query = prometheus_adapter_config["metricsQuery"]
739
+ else:
740
+ # If the user doesn't specify seriesQuery, assume they want to just write some promql that returns a number.
741
+ # Set up series_query to match the default `resources`
742
+ series_query = f"""
743
+ kube_deployment_labels{{
744
+ deployment='{deployment_name}',
745
+ paasta_cluster='{paasta_cluster}',
746
+ namespace='{namespace}'
747
+ }}
748
+ """
749
+ # Wrap their promql with label_replace() calls that add `deployment` / `namespace` labels which match the default `resources`.
750
+ metrics_query = f"""
751
+ label_replace(
752
+ label_replace(
753
+ {prometheus_adapter_config["metricsQuery"]},
754
+ 'deployment',
755
+ '{deployment_name}',
756
+ '',
757
+ ''
758
+ ),
759
+ 'namespace',
760
+ '{namespace}',
761
+ '',
762
+ ''
763
+ )
764
+ """
765
+
766
+ return {
767
+ "name": {
768
+ "as": f"{deployment_name}-arbitrary-promql",
769
+ },
770
+ "seriesQuery": _minify_promql(series_query),
771
+ "metricsQuery": _minify_promql(metrics_query),
772
+ "resources": prometheus_adapter_config.get(
773
+ "resources",
774
+ {
775
+ "overrides": {
776
+ "namespace": {"resource": "namespace"},
777
+ "deployment": {"group": "apps", "resource": "deployments"},
778
+ },
779
+ },
780
+ ),
781
+ }
782
+
783
+
784
+ def get_rules_for_service_instance(
785
+ service_name: str,
786
+ instance_config: KubernetesDeploymentConfig,
787
+ paasta_cluster: str,
788
+ ) -> List[PrometheusAdapterRule]:
789
+ """
790
+ Returns a list of Prometheus Adapter rules for a given service instance. For now, this
791
+ will always be a 0 or 1-element list - but when we support scaling on multiple metrics
792
+ we will return N rules for a given service instance.
793
+ """
794
+ rules: List[PrometheusAdapterRule] = []
795
+
796
+ for metrics_provider_type in ALL_METRICS_PROVIDERS:
797
+ metrics_provider_config = instance_config.get_autoscaling_metrics_provider(
798
+ metrics_provider_type
799
+ )
800
+ if metrics_provider_config is None:
801
+ log.debug(
802
+ f"Skipping {service_name}.{instance_config.instance} - no Prometheus-based autoscaling configured for {metrics_provider_type}"
803
+ )
804
+ continue
805
+
806
+ rule = create_instance_scaling_rule(
807
+ service=service_name,
808
+ instance_config=instance_config,
809
+ metrics_provider_config=metrics_provider_config,
810
+ paasta_cluster=paasta_cluster,
811
+ )
812
+ if rule is not None:
813
+ rules.append(rule)
814
+
815
+ return rules
816
+
817
+
818
+ def create_prometheus_adapter_config(
819
+ paasta_cluster: str, soa_dir: Path
820
+ ) -> PrometheusAdapterConfig:
821
+ """
822
+ Given a paasta cluster and a soaconfigs directory, create the necessary Prometheus adapter
823
+ config to autoscale services.
824
+ Currently supports the following metrics providers:
825
+ * uwsgi
826
+ """
827
+ rules: List[PrometheusAdapterRule] = []
828
+ # get_services_for_cluster() returns a list of (service, instance) tuples, but this
829
+ # is not great for us: if we were to iterate over that we'd end up getting duplicates
830
+ # for every service as PaastaServiceConfigLoader does not expose a way to get configs
831
+ # for a single instance by name. instead, we get the unique set of service names and then
832
+ # let PaastaServiceConfigLoader iterate over instances for us later
833
+ services = {
834
+ service_name
835
+ for service_name, _ in get_services_for_cluster(
836
+ cluster=paasta_cluster, instance_type="kubernetes", soa_dir=str(soa_dir)
837
+ )
838
+ }
839
+ services.update(
840
+ {
841
+ service_name
842
+ for service_name, _ in get_services_for_cluster(
843
+ cluster=paasta_cluster, instance_type="eks", soa_dir=str(soa_dir)
844
+ )
845
+ }
846
+ )
847
+ for service_name in services:
848
+ config_loader = PaastaServiceConfigLoader(
849
+ service=service_name, soa_dir=str(soa_dir)
850
+ )
851
+ for instance_type_class in K8S_INSTANCE_TYPE_CLASSES:
852
+ for instance_config in config_loader.instance_configs(
853
+ cluster=paasta_cluster,
854
+ instance_type_class=instance_type_class,
855
+ ):
856
+ rules.extend(
857
+ get_rules_for_service_instance(
858
+ service_name=service_name,
859
+ instance_config=instance_config,
860
+ paasta_cluster=paasta_cluster,
861
+ )
862
+ )
863
+
864
+ return {
865
+ # we sort our rules so that we can easily compare between two different configmaps
866
+ # as otherwise we'd need to do fancy order-independent comparisons between the two
867
+ # sets of rules later due to the fact that we're not iterating in a deterministic
868
+ # way and can add rules in any arbitrary order
869
+ "rules": sorted(rules, key=lambda rule: rule["name"]["as"]),
870
+ }
871
+
872
+
873
+ def update_prometheus_adapter_configmap(
874
+ kube_client: KubeClient, config: PrometheusAdapterConfig
875
+ ) -> None:
876
+ kube_client.core.replace_namespaced_config_map(
877
+ name=PROMETHEUS_ADAPTER_CONFIGMAP_NAME,
878
+ namespace=PROMETHEUS_ADAPTER_CONFIGMAP_NAMESPACE,
879
+ body=V1ConfigMap(
880
+ metadata=V1ObjectMeta(name=PROMETHEUS_ADAPTER_CONFIGMAP_NAME),
881
+ data={
882
+ PROMETHEUS_ADAPTER_CONFIGMAP_FILENAME: yaml.dump(
883
+ config,
884
+ default_flow_style=False,
885
+ explicit_start=True,
886
+ width=sys.maxsize,
887
+ )
888
+ },
889
+ ),
890
+ )
891
+
892
+
893
+ def create_prometheus_adapter_configmap(
894
+ kube_client: KubeClient, config: PrometheusAdapterConfig
895
+ ) -> None:
896
+ kube_client.core.create_namespaced_config_map(
897
+ namespace=PROMETHEUS_ADAPTER_CONFIGMAP_NAMESPACE,
898
+ body=V1ConfigMap(
899
+ metadata=V1ObjectMeta(name=PROMETHEUS_ADAPTER_CONFIGMAP_NAME),
900
+ data={
901
+ PROMETHEUS_ADAPTER_CONFIGMAP_FILENAME: yaml.dump(
902
+ config, default_flow_style=False, explicit_start=True
903
+ )
904
+ },
905
+ ),
906
+ )
907
+
908
+
909
+ def get_prometheus_adapter_configmap(
910
+ kube_client: KubeClient,
911
+ ) -> Optional[PrometheusAdapterConfig]:
912
+ try:
913
+ config = cast(
914
+ # we cast since mypy infers the wrong type since the k8s clientlib is untyped
915
+ V1ConfigMap,
916
+ kube_client.core.read_namespaced_config_map(
917
+ name=PROMETHEUS_ADAPTER_CONFIGMAP_NAME,
918
+ namespace=PROMETHEUS_ADAPTER_CONFIGMAP_NAMESPACE,
919
+ ),
920
+ )
921
+ except ApiException as e:
922
+ if e.status == 404:
923
+ return None
924
+ else:
925
+ raise
926
+
927
+ if not config:
928
+ return None
929
+
930
+ return yaml.safe_load(config.data[PROMETHEUS_ADAPTER_CONFIGMAP_FILENAME])
931
+
932
+
933
+ def restart_prometheus_adapter(kube_client: KubeClient) -> None:
934
+ log.info("Attempting to remove existing adapter pod(s).")
935
+ all_pods = cast(
936
+ # once again, we cast since the kubernetes python api isn't typed
937
+ List[V1Pod],
938
+ kube_client.core.list_namespaced_pod(
939
+ namespace=PROMETHEUS_ADAPTER_POD_NAMESPACE
940
+ ).items,
941
+ )
942
+ # there should only ever be one pod actually up, but we might as well enforce that here
943
+ # just in case there are more
944
+ pods_to_delete = [
945
+ pod
946
+ for pod in all_pods
947
+ if pod.metadata.name.startswith(PROMETHEUS_ADAPTER_POD_NAME_PREFIX)
948
+ and pod.status.phase in PROMETHEUS_ADAPTER_POD_PHASES_TO_REMOVE
949
+ ]
950
+ log.debug("Found the following pods to delete: %s", pods_to_delete)
951
+
952
+ for pod in pods_to_delete:
953
+ log.debug("Attempting to remove %s.", pod.metadata.name)
954
+ kube_client.core.delete_namespaced_pod(
955
+ name=pod.metadata.name,
956
+ namespace=pod.metadata.namespace,
957
+ body=V1DeleteOptions(),
958
+ # background propagation with no grace period is equivalent to doing a force-delete from kubectl
959
+ grace_period_seconds=0,
960
+ propagation_policy="Background",
961
+ )
962
+ log.debug("Removed %s.", pod.metadata.name)
963
+
964
+ log.info("Adapter restarted successfully")
965
+
966
+
967
+ def main() -> int:
968
+ args = parse_args()
969
+ if args.verbose:
970
+ logging.basicConfig(level=logging.DEBUG)
971
+ else:
972
+ logging.basicConfig(level=logging.INFO)
973
+
974
+ log.info("Generating adapter config from soaconfigs.")
975
+ config = create_prometheus_adapter_config(
976
+ paasta_cluster=args.cluster,
977
+ soa_dir=args.soa_dir,
978
+ )
979
+ log.info("Generated adapter config from soaconfigs.")
980
+ if args.dry_run:
981
+ log.info(
982
+ "Generated the following config:\n%s",
983
+ yaml.dump(
984
+ config, default_flow_style=False, explicit_start=True, width=sys.maxsize
985
+ ),
986
+ )
987
+ return 0 # everything after this point requires creds/updates state
988
+ else:
989
+ log.debug(
990
+ "Generated the following config:\n%s",
991
+ yaml.dump(
992
+ config, default_flow_style=False, explicit_start=True, width=sys.maxsize
993
+ ),
994
+ )
995
+
996
+ if not config["rules"]:
997
+ log.error("Got empty rule configuration - refusing to continue.")
998
+ return 0
999
+
1000
+ kube_client = KubeClient()
1001
+ if not args.dry_run:
1002
+ ensure_namespace(kube_client, namespace=PROMETHEUS_ADAPTER_CONFIGMAP_NAMESPACE)
1003
+
1004
+ existing_config = get_prometheus_adapter_configmap(kube_client=kube_client)
1005
+ if existing_config and existing_config != config:
1006
+ log.info("Existing config differs from soaconfigs - updating.")
1007
+ log.debug("Existing data: %s", existing_config)
1008
+ log.debug("Desired data: %s", config)
1009
+ update_prometheus_adapter_configmap(kube_client=kube_client, config=config)
1010
+ log.info("Updated adapter config.")
1011
+ elif existing_config:
1012
+ log.info("Existing config matches soaconfigs - exiting.")
1013
+ return 0
1014
+ else:
1015
+ log.info("No existing config - creating.")
1016
+ create_prometheus_adapter_configmap(kube_client=kube_client, config=config)
1017
+ log.info("Created adapter config.")
1018
+
1019
+ # the prometheus adapter doesn't currently have a good way to reload on config changes
1020
+ # so we do the next best thing: restart the pod so that it picks up the new config.
1021
+ # see: https://github.com/DirectXMan12/k8s-prometheus-adapter/issues/104
1022
+ restart_prometheus_adapter(kube_client=kube_client)
1023
+
1024
+ return 0
1025
+
1026
+
1027
+ if __name__ == "__main__":
1028
+ sys.exit(main())