paasta-tools 1.21.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (348) hide show
  1. k8s_itests/__init__.py +0 -0
  2. k8s_itests/test_autoscaling.py +23 -0
  3. k8s_itests/utils.py +38 -0
  4. paasta_tools/__init__.py +20 -0
  5. paasta_tools/adhoc_tools.py +142 -0
  6. paasta_tools/api/__init__.py +13 -0
  7. paasta_tools/api/api.py +330 -0
  8. paasta_tools/api/api_docs/swagger.json +2323 -0
  9. paasta_tools/api/client.py +106 -0
  10. paasta_tools/api/settings.py +33 -0
  11. paasta_tools/api/tweens/__init__.py +6 -0
  12. paasta_tools/api/tweens/auth.py +125 -0
  13. paasta_tools/api/tweens/profiling.py +108 -0
  14. paasta_tools/api/tweens/request_logger.py +124 -0
  15. paasta_tools/api/views/__init__.py +13 -0
  16. paasta_tools/api/views/autoscaler.py +100 -0
  17. paasta_tools/api/views/exception.py +45 -0
  18. paasta_tools/api/views/flink.py +73 -0
  19. paasta_tools/api/views/instance.py +395 -0
  20. paasta_tools/api/views/pause_autoscaler.py +71 -0
  21. paasta_tools/api/views/remote_run.py +113 -0
  22. paasta_tools/api/views/resources.py +76 -0
  23. paasta_tools/api/views/service.py +35 -0
  24. paasta_tools/api/views/version.py +25 -0
  25. paasta_tools/apply_external_resources.py +79 -0
  26. paasta_tools/async_utils.py +109 -0
  27. paasta_tools/autoscaling/__init__.py +0 -0
  28. paasta_tools/autoscaling/autoscaling_service_lib.py +57 -0
  29. paasta_tools/autoscaling/forecasting.py +106 -0
  30. paasta_tools/autoscaling/max_all_k8s_services.py +41 -0
  31. paasta_tools/autoscaling/pause_service_autoscaler.py +77 -0
  32. paasta_tools/autoscaling/utils.py +52 -0
  33. paasta_tools/bounce_lib.py +184 -0
  34. paasta_tools/broadcast_log_to_services.py +62 -0
  35. paasta_tools/cassandracluster_tools.py +210 -0
  36. paasta_tools/check_autoscaler_max_instances.py +212 -0
  37. paasta_tools/check_cassandracluster_services_replication.py +35 -0
  38. paasta_tools/check_flink_services_health.py +203 -0
  39. paasta_tools/check_kubernetes_api.py +57 -0
  40. paasta_tools/check_kubernetes_services_replication.py +141 -0
  41. paasta_tools/check_oom_events.py +244 -0
  42. paasta_tools/check_services_replication_tools.py +324 -0
  43. paasta_tools/check_spark_jobs.py +234 -0
  44. paasta_tools/cleanup_kubernetes_cr.py +138 -0
  45. paasta_tools/cleanup_kubernetes_crd.py +145 -0
  46. paasta_tools/cleanup_kubernetes_jobs.py +344 -0
  47. paasta_tools/cleanup_tron_namespaces.py +96 -0
  48. paasta_tools/cli/__init__.py +13 -0
  49. paasta_tools/cli/authentication.py +85 -0
  50. paasta_tools/cli/cli.py +260 -0
  51. paasta_tools/cli/cmds/__init__.py +13 -0
  52. paasta_tools/cli/cmds/autoscale.py +143 -0
  53. paasta_tools/cli/cmds/check.py +334 -0
  54. paasta_tools/cli/cmds/cook_image.py +147 -0
  55. paasta_tools/cli/cmds/get_docker_image.py +76 -0
  56. paasta_tools/cli/cmds/get_image_version.py +172 -0
  57. paasta_tools/cli/cmds/get_latest_deployment.py +93 -0
  58. paasta_tools/cli/cmds/info.py +155 -0
  59. paasta_tools/cli/cmds/itest.py +117 -0
  60. paasta_tools/cli/cmds/list.py +66 -0
  61. paasta_tools/cli/cmds/list_clusters.py +42 -0
  62. paasta_tools/cli/cmds/list_deploy_queue.py +171 -0
  63. paasta_tools/cli/cmds/list_namespaces.py +84 -0
  64. paasta_tools/cli/cmds/local_run.py +1396 -0
  65. paasta_tools/cli/cmds/logs.py +1601 -0
  66. paasta_tools/cli/cmds/mark_for_deployment.py +1988 -0
  67. paasta_tools/cli/cmds/mesh_status.py +174 -0
  68. paasta_tools/cli/cmds/pause_service_autoscaler.py +107 -0
  69. paasta_tools/cli/cmds/push_to_registry.py +275 -0
  70. paasta_tools/cli/cmds/remote_run.py +252 -0
  71. paasta_tools/cli/cmds/rollback.py +347 -0
  72. paasta_tools/cli/cmds/secret.py +549 -0
  73. paasta_tools/cli/cmds/security_check.py +59 -0
  74. paasta_tools/cli/cmds/spark_run.py +1400 -0
  75. paasta_tools/cli/cmds/start_stop_restart.py +401 -0
  76. paasta_tools/cli/cmds/status.py +2302 -0
  77. paasta_tools/cli/cmds/validate.py +1012 -0
  78. paasta_tools/cli/cmds/wait_for_deployment.py +275 -0
  79. paasta_tools/cli/fsm/__init__.py +13 -0
  80. paasta_tools/cli/fsm/autosuggest.py +82 -0
  81. paasta_tools/cli/fsm/template/README.md +8 -0
  82. paasta_tools/cli/fsm/template/cookiecutter.json +7 -0
  83. paasta_tools/cli/fsm/template/{{cookiecutter.service}}/kubernetes-PROD.yaml +91 -0
  84. paasta_tools/cli/fsm/template/{{cookiecutter.service}}/monitoring.yaml +20 -0
  85. paasta_tools/cli/fsm/template/{{cookiecutter.service}}/service.yaml +8 -0
  86. paasta_tools/cli/fsm/template/{{cookiecutter.service}}/smartstack.yaml +6 -0
  87. paasta_tools/cli/fsm_cmd.py +121 -0
  88. paasta_tools/cli/paasta_tabcomplete.sh +23 -0
  89. paasta_tools/cli/schemas/adhoc_schema.json +199 -0
  90. paasta_tools/cli/schemas/autoscaling_schema.json +91 -0
  91. paasta_tools/cli/schemas/autotuned_defaults/cassandracluster_schema.json +37 -0
  92. paasta_tools/cli/schemas/autotuned_defaults/kubernetes_schema.json +89 -0
  93. paasta_tools/cli/schemas/deploy_schema.json +173 -0
  94. paasta_tools/cli/schemas/eks_schema.json +970 -0
  95. paasta_tools/cli/schemas/kubernetes_schema.json +970 -0
  96. paasta_tools/cli/schemas/rollback_schema.json +160 -0
  97. paasta_tools/cli/schemas/service_schema.json +25 -0
  98. paasta_tools/cli/schemas/smartstack_schema.json +322 -0
  99. paasta_tools/cli/schemas/tron_schema.json +699 -0
  100. paasta_tools/cli/utils.py +1118 -0
  101. paasta_tools/clusterman.py +21 -0
  102. paasta_tools/config_utils.py +385 -0
  103. paasta_tools/contrib/__init__.py +0 -0
  104. paasta_tools/contrib/bounce_log_latency_parser.py +68 -0
  105. paasta_tools/contrib/check_manual_oapi_changes.sh +24 -0
  106. paasta_tools/contrib/check_orphans.py +306 -0
  107. paasta_tools/contrib/create_dynamodb_table.py +35 -0
  108. paasta_tools/contrib/create_paasta_playground.py +105 -0
  109. paasta_tools/contrib/emit_allocated_cpu_metrics.py +50 -0
  110. paasta_tools/contrib/get_running_task_allocation.py +346 -0
  111. paasta_tools/contrib/habitat_fixer.py +86 -0
  112. paasta_tools/contrib/ide_helper.py +316 -0
  113. paasta_tools/contrib/is_pod_healthy_in_proxy.py +139 -0
  114. paasta_tools/contrib/is_pod_healthy_in_smartstack.py +50 -0
  115. paasta_tools/contrib/kill_bad_containers.py +109 -0
  116. paasta_tools/contrib/mass-deploy-tag.sh +44 -0
  117. paasta_tools/contrib/mock_patch_checker.py +86 -0
  118. paasta_tools/contrib/paasta_update_soa_memcpu.py +520 -0
  119. paasta_tools/contrib/render_template.py +129 -0
  120. paasta_tools/contrib/rightsizer_soaconfigs_update.py +348 -0
  121. paasta_tools/contrib/service_shard_remove.py +157 -0
  122. paasta_tools/contrib/service_shard_update.py +373 -0
  123. paasta_tools/contrib/shared_ip_check.py +77 -0
  124. paasta_tools/contrib/timeouts_metrics_prom.py +64 -0
  125. paasta_tools/delete_kubernetes_deployments.py +89 -0
  126. paasta_tools/deployment_utils.py +44 -0
  127. paasta_tools/docker_wrapper.py +234 -0
  128. paasta_tools/docker_wrapper_imports.py +13 -0
  129. paasta_tools/drain_lib.py +351 -0
  130. paasta_tools/dump_locally_running_services.py +71 -0
  131. paasta_tools/eks_tools.py +119 -0
  132. paasta_tools/envoy_tools.py +373 -0
  133. paasta_tools/firewall.py +504 -0
  134. paasta_tools/firewall_logging.py +154 -0
  135. paasta_tools/firewall_update.py +172 -0
  136. paasta_tools/flink_tools.py +345 -0
  137. paasta_tools/flinkeks_tools.py +90 -0
  138. paasta_tools/frameworks/__init__.py +0 -0
  139. paasta_tools/frameworks/adhoc_scheduler.py +71 -0
  140. paasta_tools/frameworks/constraints.py +87 -0
  141. paasta_tools/frameworks/native_scheduler.py +652 -0
  142. paasta_tools/frameworks/native_service_config.py +301 -0
  143. paasta_tools/frameworks/task_store.py +245 -0
  144. paasta_tools/generate_all_deployments +9 -0
  145. paasta_tools/generate_authenticating_services.py +94 -0
  146. paasta_tools/generate_deployments_for_service.py +255 -0
  147. paasta_tools/generate_services_file.py +114 -0
  148. paasta_tools/generate_services_yaml.py +30 -0
  149. paasta_tools/hacheck.py +76 -0
  150. paasta_tools/instance/__init__.py +0 -0
  151. paasta_tools/instance/hpa_metrics_parser.py +122 -0
  152. paasta_tools/instance/kubernetes.py +1362 -0
  153. paasta_tools/iptables.py +240 -0
  154. paasta_tools/kafkacluster_tools.py +143 -0
  155. paasta_tools/kubernetes/__init__.py +0 -0
  156. paasta_tools/kubernetes/application/__init__.py +0 -0
  157. paasta_tools/kubernetes/application/controller_wrappers.py +476 -0
  158. paasta_tools/kubernetes/application/tools.py +90 -0
  159. paasta_tools/kubernetes/bin/__init__.py +0 -0
  160. paasta_tools/kubernetes/bin/kubernetes_remove_evicted_pods.py +164 -0
  161. paasta_tools/kubernetes/bin/paasta_cleanup_remote_run_resources.py +135 -0
  162. paasta_tools/kubernetes/bin/paasta_cleanup_stale_nodes.py +181 -0
  163. paasta_tools/kubernetes/bin/paasta_secrets_sync.py +758 -0
  164. paasta_tools/kubernetes/remote_run.py +558 -0
  165. paasta_tools/kubernetes_tools.py +4679 -0
  166. paasta_tools/list_kubernetes_service_instances.py +128 -0
  167. paasta_tools/list_tron_namespaces.py +60 -0
  168. paasta_tools/long_running_service_tools.py +678 -0
  169. paasta_tools/mac_address.py +44 -0
  170. paasta_tools/marathon_dashboard.py +0 -0
  171. paasta_tools/mesos/__init__.py +0 -0
  172. paasta_tools/mesos/cfg.py +46 -0
  173. paasta_tools/mesos/cluster.py +60 -0
  174. paasta_tools/mesos/exceptions.py +59 -0
  175. paasta_tools/mesos/framework.py +77 -0
  176. paasta_tools/mesos/log.py +48 -0
  177. paasta_tools/mesos/master.py +306 -0
  178. paasta_tools/mesos/mesos_file.py +169 -0
  179. paasta_tools/mesos/parallel.py +52 -0
  180. paasta_tools/mesos/slave.py +115 -0
  181. paasta_tools/mesos/task.py +94 -0
  182. paasta_tools/mesos/util.py +69 -0
  183. paasta_tools/mesos/zookeeper.py +37 -0
  184. paasta_tools/mesos_maintenance.py +848 -0
  185. paasta_tools/mesos_tools.py +1051 -0
  186. paasta_tools/metrics/__init__.py +0 -0
  187. paasta_tools/metrics/metastatus_lib.py +1110 -0
  188. paasta_tools/metrics/metrics_lib.py +217 -0
  189. paasta_tools/monitoring/__init__.py +13 -0
  190. paasta_tools/monitoring/check_k8s_api_performance.py +110 -0
  191. paasta_tools/monitoring_tools.py +652 -0
  192. paasta_tools/monkrelaycluster_tools.py +146 -0
  193. paasta_tools/nrtsearchservice_tools.py +143 -0
  194. paasta_tools/nrtsearchserviceeks_tools.py +68 -0
  195. paasta_tools/oom_logger.py +321 -0
  196. paasta_tools/paasta_deploy_tron_jobs +3 -0
  197. paasta_tools/paasta_execute_docker_command.py +123 -0
  198. paasta_tools/paasta_native_serviceinit.py +21 -0
  199. paasta_tools/paasta_service_config_loader.py +201 -0
  200. paasta_tools/paastaapi/__init__.py +29 -0
  201. paasta_tools/paastaapi/api/__init__.py +3 -0
  202. paasta_tools/paastaapi/api/autoscaler_api.py +302 -0
  203. paasta_tools/paastaapi/api/default_api.py +569 -0
  204. paasta_tools/paastaapi/api/remote_run_api.py +604 -0
  205. paasta_tools/paastaapi/api/resources_api.py +157 -0
  206. paasta_tools/paastaapi/api/service_api.py +1736 -0
  207. paasta_tools/paastaapi/api_client.py +818 -0
  208. paasta_tools/paastaapi/apis/__init__.py +22 -0
  209. paasta_tools/paastaapi/configuration.py +455 -0
  210. paasta_tools/paastaapi/exceptions.py +137 -0
  211. paasta_tools/paastaapi/model/__init__.py +5 -0
  212. paasta_tools/paastaapi/model/adhoc_launch_history.py +176 -0
  213. paasta_tools/paastaapi/model/autoscaler_count_msg.py +176 -0
  214. paasta_tools/paastaapi/model/deploy_queue.py +178 -0
  215. paasta_tools/paastaapi/model/deploy_queue_service_instance.py +194 -0
  216. paasta_tools/paastaapi/model/envoy_backend.py +185 -0
  217. paasta_tools/paastaapi/model/envoy_location.py +184 -0
  218. paasta_tools/paastaapi/model/envoy_status.py +181 -0
  219. paasta_tools/paastaapi/model/flink_cluster_overview.py +188 -0
  220. paasta_tools/paastaapi/model/flink_config.py +173 -0
  221. paasta_tools/paastaapi/model/flink_job.py +186 -0
  222. paasta_tools/paastaapi/model/flink_job_details.py +192 -0
  223. paasta_tools/paastaapi/model/flink_jobs.py +175 -0
  224. paasta_tools/paastaapi/model/float_and_error.py +173 -0
  225. paasta_tools/paastaapi/model/hpa_metric.py +176 -0
  226. paasta_tools/paastaapi/model/inline_object.py +170 -0
  227. paasta_tools/paastaapi/model/inline_response200.py +170 -0
  228. paasta_tools/paastaapi/model/inline_response2001.py +170 -0
  229. paasta_tools/paastaapi/model/instance_bounce_status.py +200 -0
  230. paasta_tools/paastaapi/model/instance_mesh_status.py +186 -0
  231. paasta_tools/paastaapi/model/instance_status.py +220 -0
  232. paasta_tools/paastaapi/model/instance_status_adhoc.py +187 -0
  233. paasta_tools/paastaapi/model/instance_status_cassandracluster.py +173 -0
  234. paasta_tools/paastaapi/model/instance_status_flink.py +173 -0
  235. paasta_tools/paastaapi/model/instance_status_kafkacluster.py +173 -0
  236. paasta_tools/paastaapi/model/instance_status_kubernetes.py +263 -0
  237. paasta_tools/paastaapi/model/instance_status_kubernetes_autoscaling_status.py +187 -0
  238. paasta_tools/paastaapi/model/instance_status_kubernetes_v2.py +197 -0
  239. paasta_tools/paastaapi/model/instance_status_tron.py +204 -0
  240. paasta_tools/paastaapi/model/instance_tasks.py +182 -0
  241. paasta_tools/paastaapi/model/integer_and_error.py +173 -0
  242. paasta_tools/paastaapi/model/kubernetes_container.py +178 -0
  243. paasta_tools/paastaapi/model/kubernetes_container_v2.py +219 -0
  244. paasta_tools/paastaapi/model/kubernetes_healthcheck.py +176 -0
  245. paasta_tools/paastaapi/model/kubernetes_pod.py +201 -0
  246. paasta_tools/paastaapi/model/kubernetes_pod_event.py +176 -0
  247. paasta_tools/paastaapi/model/kubernetes_pod_v2.py +213 -0
  248. paasta_tools/paastaapi/model/kubernetes_replica_set.py +185 -0
  249. paasta_tools/paastaapi/model/kubernetes_version.py +202 -0
  250. paasta_tools/paastaapi/model/remote_run_outcome.py +189 -0
  251. paasta_tools/paastaapi/model/remote_run_start.py +185 -0
  252. paasta_tools/paastaapi/model/remote_run_stop.py +176 -0
  253. paasta_tools/paastaapi/model/remote_run_token.py +173 -0
  254. paasta_tools/paastaapi/model/resource.py +187 -0
  255. paasta_tools/paastaapi/model/resource_item.py +187 -0
  256. paasta_tools/paastaapi/model/resource_value.py +176 -0
  257. paasta_tools/paastaapi/model/smartstack_backend.py +191 -0
  258. paasta_tools/paastaapi/model/smartstack_location.py +181 -0
  259. paasta_tools/paastaapi/model/smartstack_status.py +181 -0
  260. paasta_tools/paastaapi/model/task_tail_lines.py +176 -0
  261. paasta_tools/paastaapi/model_utils.py +1879 -0
  262. paasta_tools/paastaapi/models/__init__.py +62 -0
  263. paasta_tools/paastaapi/rest.py +287 -0
  264. paasta_tools/prune_completed_pods.py +220 -0
  265. paasta_tools/puppet_service_tools.py +59 -0
  266. paasta_tools/py.typed +1 -0
  267. paasta_tools/remote_git.py +127 -0
  268. paasta_tools/run-paasta-api-in-dev-mode.py +57 -0
  269. paasta_tools/run-paasta-api-playground.py +51 -0
  270. paasta_tools/secret_providers/__init__.py +66 -0
  271. paasta_tools/secret_providers/vault.py +214 -0
  272. paasta_tools/secret_tools.py +277 -0
  273. paasta_tools/setup_istio_mesh.py +353 -0
  274. paasta_tools/setup_kubernetes_cr.py +412 -0
  275. paasta_tools/setup_kubernetes_crd.py +138 -0
  276. paasta_tools/setup_kubernetes_internal_crd.py +154 -0
  277. paasta_tools/setup_kubernetes_job.py +353 -0
  278. paasta_tools/setup_prometheus_adapter_config.py +1028 -0
  279. paasta_tools/setup_tron_namespace.py +248 -0
  280. paasta_tools/slack.py +75 -0
  281. paasta_tools/smartstack_tools.py +676 -0
  282. paasta_tools/spark_tools.py +283 -0
  283. paasta_tools/synapse_srv_namespaces_fact.py +42 -0
  284. paasta_tools/tron/__init__.py +0 -0
  285. paasta_tools/tron/client.py +158 -0
  286. paasta_tools/tron/tron_command_context.py +194 -0
  287. paasta_tools/tron/tron_timeutils.py +101 -0
  288. paasta_tools/tron_tools.py +1448 -0
  289. paasta_tools/utils.py +4307 -0
  290. paasta_tools/yaml_tools.py +44 -0
  291. paasta_tools-1.21.3.data/scripts/apply_external_resources.py +79 -0
  292. paasta_tools-1.21.3.data/scripts/bounce_log_latency_parser.py +68 -0
  293. paasta_tools-1.21.3.data/scripts/check_autoscaler_max_instances.py +212 -0
  294. paasta_tools-1.21.3.data/scripts/check_cassandracluster_services_replication.py +35 -0
  295. paasta_tools-1.21.3.data/scripts/check_flink_services_health.py +203 -0
  296. paasta_tools-1.21.3.data/scripts/check_kubernetes_api.py +57 -0
  297. paasta_tools-1.21.3.data/scripts/check_kubernetes_services_replication.py +141 -0
  298. paasta_tools-1.21.3.data/scripts/check_manual_oapi_changes.sh +24 -0
  299. paasta_tools-1.21.3.data/scripts/check_oom_events.py +244 -0
  300. paasta_tools-1.21.3.data/scripts/check_orphans.py +306 -0
  301. paasta_tools-1.21.3.data/scripts/check_spark_jobs.py +234 -0
  302. paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_cr.py +138 -0
  303. paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_crd.py +145 -0
  304. paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_jobs.py +344 -0
  305. paasta_tools-1.21.3.data/scripts/create_dynamodb_table.py +35 -0
  306. paasta_tools-1.21.3.data/scripts/create_paasta_playground.py +105 -0
  307. paasta_tools-1.21.3.data/scripts/delete_kubernetes_deployments.py +89 -0
  308. paasta_tools-1.21.3.data/scripts/emit_allocated_cpu_metrics.py +50 -0
  309. paasta_tools-1.21.3.data/scripts/generate_all_deployments +9 -0
  310. paasta_tools-1.21.3.data/scripts/generate_authenticating_services.py +94 -0
  311. paasta_tools-1.21.3.data/scripts/generate_deployments_for_service.py +255 -0
  312. paasta_tools-1.21.3.data/scripts/generate_services_file.py +114 -0
  313. paasta_tools-1.21.3.data/scripts/generate_services_yaml.py +30 -0
  314. paasta_tools-1.21.3.data/scripts/get_running_task_allocation.py +346 -0
  315. paasta_tools-1.21.3.data/scripts/habitat_fixer.py +86 -0
  316. paasta_tools-1.21.3.data/scripts/ide_helper.py +316 -0
  317. paasta_tools-1.21.3.data/scripts/is_pod_healthy_in_proxy.py +139 -0
  318. paasta_tools-1.21.3.data/scripts/is_pod_healthy_in_smartstack.py +50 -0
  319. paasta_tools-1.21.3.data/scripts/kill_bad_containers.py +109 -0
  320. paasta_tools-1.21.3.data/scripts/kubernetes_remove_evicted_pods.py +164 -0
  321. paasta_tools-1.21.3.data/scripts/mass-deploy-tag.sh +44 -0
  322. paasta_tools-1.21.3.data/scripts/mock_patch_checker.py +86 -0
  323. paasta_tools-1.21.3.data/scripts/paasta_cleanup_remote_run_resources.py +135 -0
  324. paasta_tools-1.21.3.data/scripts/paasta_cleanup_stale_nodes.py +181 -0
  325. paasta_tools-1.21.3.data/scripts/paasta_deploy_tron_jobs +3 -0
  326. paasta_tools-1.21.3.data/scripts/paasta_execute_docker_command.py +123 -0
  327. paasta_tools-1.21.3.data/scripts/paasta_secrets_sync.py +758 -0
  328. paasta_tools-1.21.3.data/scripts/paasta_tabcomplete.sh +23 -0
  329. paasta_tools-1.21.3.data/scripts/paasta_update_soa_memcpu.py +520 -0
  330. paasta_tools-1.21.3.data/scripts/render_template.py +129 -0
  331. paasta_tools-1.21.3.data/scripts/rightsizer_soaconfigs_update.py +348 -0
  332. paasta_tools-1.21.3.data/scripts/service_shard_remove.py +157 -0
  333. paasta_tools-1.21.3.data/scripts/service_shard_update.py +373 -0
  334. paasta_tools-1.21.3.data/scripts/setup_istio_mesh.py +353 -0
  335. paasta_tools-1.21.3.data/scripts/setup_kubernetes_cr.py +412 -0
  336. paasta_tools-1.21.3.data/scripts/setup_kubernetes_crd.py +138 -0
  337. paasta_tools-1.21.3.data/scripts/setup_kubernetes_internal_crd.py +154 -0
  338. paasta_tools-1.21.3.data/scripts/setup_kubernetes_job.py +353 -0
  339. paasta_tools-1.21.3.data/scripts/setup_prometheus_adapter_config.py +1028 -0
  340. paasta_tools-1.21.3.data/scripts/shared_ip_check.py +77 -0
  341. paasta_tools-1.21.3.data/scripts/synapse_srv_namespaces_fact.py +42 -0
  342. paasta_tools-1.21.3.data/scripts/timeouts_metrics_prom.py +64 -0
  343. paasta_tools-1.21.3.dist-info/LICENSE +201 -0
  344. paasta_tools-1.21.3.dist-info/METADATA +74 -0
  345. paasta_tools-1.21.3.dist-info/RECORD +348 -0
  346. paasta_tools-1.21.3.dist-info/WHEEL +5 -0
  347. paasta_tools-1.21.3.dist-info/entry_points.txt +20 -0
  348. paasta_tools-1.21.3.dist-info/top_level.txt +2 -0
@@ -0,0 +1,652 @@
1
+ #!/usr/bin/env python
2
+ # Copyright 2015-2016 Yelp Inc.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """
16
+ Getters for deriving monitoring parameters for mesos-deployed stuff.
17
+ This leaves a place for sane defaults that might change depending
18
+ on the framework that is asking, and still allows you to set your team
19
+ *once* for a service in the general config.
20
+
21
+ Everything in here is private, and you shouldn't worry about it.
22
+ """
23
+ import abc
24
+ import json
25
+ import logging
26
+ import os
27
+ from typing import Dict
28
+ from typing import Mapping
29
+ from typing import Optional
30
+ from typing import Tuple
31
+
32
+ import pysensu_yelp
33
+ import service_configuration_lib
34
+
35
+ from paasta_tools.long_running_service_tools import LongRunningServiceConfig
36
+ from paasta_tools.utils import _log
37
+ from paasta_tools.utils import DEFAULT_SOA_DIR
38
+ from paasta_tools.utils import is_under_replicated
39
+ from paasta_tools.utils import load_system_paasta_config
40
+ from paasta_tools.utils import PaastaNotConfiguredError
41
+ from paasta_tools.utils import time_cache
42
+
43
+
44
+ class ReplicationChecker(abc.ABC):
45
+ @abc.abstractmethod
46
+ def get_replication_for_instance(
47
+ self, instance_config: LongRunningServiceConfig
48
+ ) -> Dict[str, Dict[str, Dict[str, int]]]:
49
+ ...
50
+
51
+
52
+ try:
53
+ import yelp_meteorite
54
+ except ImportError:
55
+ yelp_meteorite = None
56
+
57
+
58
+ DEFAULT_REPLICATION_RUNBOOK = "y/unhealthy-paasta-instances"
59
+
60
+ log = logging.getLogger(__name__)
61
+
62
+
63
+ def monitoring_defaults(key):
64
+ defaults = {
65
+ "runbook": 'Please set a `runbook` field in your monitoring.yaml. Like "y/rb-mesos". Docs: '
66
+ "https://paasta.readthedocs.io/en/latest/yelpsoa_configs.html#monitoring-yaml",
67
+ "tip": "Please set a `tip` field in your monitoring.yaml. Docs: "
68
+ "https://paasta.readthedocs.io/en/latest/yelpsoa_configs.html#monitoring-yaml",
69
+ "ticket": False,
70
+ "project": None,
71
+ "realert_every": -1,
72
+ "tags": [],
73
+ }
74
+ return defaults.get(key, None)
75
+
76
+
77
+ def get_team(overrides, service, soa_dir=DEFAULT_SOA_DIR):
78
+ return __get_monitoring_config_value("team", overrides, service, soa_dir)
79
+
80
+
81
+ def get_runbook(overrides, service, soa_dir=DEFAULT_SOA_DIR):
82
+ return __get_monitoring_config_value("runbook", overrides, service, soa_dir)
83
+
84
+
85
+ def get_tip(overrides, service, soa_dir=DEFAULT_SOA_DIR):
86
+ return __get_monitoring_config_value("tip", overrides, service, soa_dir)
87
+
88
+
89
+ def get_notification_email(overrides, service, soa_dir=DEFAULT_SOA_DIR):
90
+ return __get_monitoring_config_value(
91
+ "notification_email", overrides, service, soa_dir
92
+ )
93
+
94
+
95
+ def get_page(overrides, service, soa_dir=DEFAULT_SOA_DIR):
96
+ return __get_monitoring_config_value("page", overrides, service, soa_dir)
97
+
98
+
99
+ def get_alert_after(overrides, service, soa_dir=DEFAULT_SOA_DIR):
100
+ return __get_monitoring_config_value("alert_after", overrides, service, soa_dir)
101
+
102
+
103
+ def get_realert_every(
104
+ overrides, service, soa_dir=DEFAULT_SOA_DIR, monitoring_defaults=monitoring_defaults
105
+ ):
106
+ return __get_monitoring_config_value(
107
+ "realert_every",
108
+ overrides=overrides,
109
+ service=service,
110
+ soa_dir=soa_dir,
111
+ monitoring_defaults=monitoring_defaults,
112
+ )
113
+
114
+
115
+ def get_check_every(overrides, service, soa_dir=DEFAULT_SOA_DIR):
116
+ return __get_monitoring_config_value("check_every", overrides, service, soa_dir)
117
+
118
+
119
+ def get_irc_channels(overrides, service, soa_dir=DEFAULT_SOA_DIR):
120
+ return __get_monitoring_config_value("irc_channels", overrides, service, soa_dir)
121
+
122
+
123
+ def get_slack_channels(overrides, service, soa_dir=DEFAULT_SOA_DIR):
124
+ return __get_monitoring_config_value("slack_channels", overrides, service, soa_dir)
125
+
126
+
127
+ def get_dependencies(overrides, service, soa_dir=DEFAULT_SOA_DIR):
128
+ return __get_monitoring_config_value("dependencies", overrides, service, soa_dir)
129
+
130
+
131
+ def get_ticket(overrides, service, soa_dir=DEFAULT_SOA_DIR):
132
+ return __get_monitoring_config_value("ticket", overrides, service, soa_dir)
133
+
134
+
135
+ def get_project(overrides, service, soa_dir=DEFAULT_SOA_DIR):
136
+ return __get_monitoring_config_value("project", overrides, service, soa_dir)
137
+
138
+
139
+ def get_priority(overrides, service, soa_dir=DEFAULT_SOA_DIR):
140
+ return __get_monitoring_config_value("priority", overrides, service, soa_dir)
141
+
142
+
143
+ def get_tags(overrides, service, soa_dir=DEFAULT_SOA_DIR):
144
+ return __get_monitoring_config_value("tags", overrides, service, soa_dir)
145
+
146
+
147
+ def get_component(overrides, service, soa_dir=DEFAULT_SOA_DIR):
148
+ return __get_monitoring_config_value("component", overrides, service, soa_dir)
149
+
150
+
151
+ def get_description(overrides, service, soa_dir=DEFAULT_SOA_DIR):
152
+ return __get_monitoring_config_value("description", overrides, service, soa_dir)
153
+
154
+
155
+ # Our typical usage pattern is that we call all the different get_* functions back to back. Applying a small amount of
156
+ # cache here helps cut down on the number of times we re-parse service.yaml.
157
+ _cached_read_service_configuration = time_cache(ttl=5)(
158
+ service_configuration_lib.read_service_configuration
159
+ )
160
+
161
+
162
+ def __get_monitoring_config_value(
163
+ key,
164
+ overrides,
165
+ service,
166
+ soa_dir=DEFAULT_SOA_DIR,
167
+ monitoring_defaults=monitoring_defaults,
168
+ ):
169
+ general_config = _cached_read_service_configuration(service, soa_dir=soa_dir)
170
+ monitor_config = read_monitoring_config(service, soa_dir=soa_dir)
171
+ service_default = general_config.get(key, monitoring_defaults(key))
172
+ service_default = general_config.get("monitoring", {key: service_default}).get(
173
+ key, service_default
174
+ )
175
+ service_default = monitor_config.get(key, service_default)
176
+ return overrides.get(key, service_default)
177
+
178
+
179
+ def get_sensu_team_data(team):
180
+ """Takes a team and returns the dictionary of Sensu configuration
181
+ settings for that team. The data is in this format:
182
+ https://github.com/Yelp/sensu_handlers#teams
183
+ Returns an empty dictionary if there is nothing to return.
184
+
185
+ Not all teams specify all the different types of configuration settings.
186
+ for example, a team may not specify a `notification_email`. It is up
187
+ to the caller of this function to handle that case.
188
+ """
189
+ global_team_data = _load_sensu_team_data()["team_data"]
190
+ return global_team_data.get(team, {})
191
+
192
+
193
+ def _load_sensu_team_data():
194
+ try:
195
+ with open("/etc/sensu/team_data.json") as f:
196
+ team_data = json.load(f)
197
+ except IOError:
198
+ log.warning(
199
+ "No Sensu Team data (/etc/sensu/team_data.json) available. Using empty defaults"
200
+ )
201
+ team_data = {}
202
+ return team_data
203
+
204
+
205
+ def send_event(
206
+ service,
207
+ check_name,
208
+ overrides,
209
+ status,
210
+ output,
211
+ soa_dir,
212
+ ttl=None,
213
+ cluster=None,
214
+ system_paasta_config=None,
215
+ dry_run=False,
216
+ ):
217
+ """Send an event to sensu via pysensu_yelp with the given information.
218
+
219
+ :param service: The service name the event is about
220
+ :param check_name: The name of the check as it appears in Sensu
221
+ :param overrides: A dictionary containing overrides for monitoring options
222
+ (e.g. notification_email, ticket, page)
223
+ :param status: The status to emit for this event
224
+ :param output: The output to emit for this event
225
+ :param soa_dir: The service directory to read monitoring information from
226
+ :param ttl: TTL (optional)
227
+ :param cluster: The cluster name (optional)
228
+ :param system_paasta_config: A SystemPaastaConfig object representing the system
229
+ :param dry_run: Print the Sensu event instead of emitting it
230
+ """
231
+ # This function assumes the input is a string like "mumble.main"
232
+ team = get_team(overrides, service, soa_dir)
233
+ if not team:
234
+ return
235
+
236
+ if system_paasta_config is None:
237
+ system_paasta_config = load_system_paasta_config()
238
+ if cluster is None:
239
+ try:
240
+ cluster = system_paasta_config.get_cluster()
241
+ except PaastaNotConfiguredError:
242
+ cluster = "localhost"
243
+
244
+ alert_after = overrides.get("alert_after", "5m")
245
+ result_dict = {
246
+ "name": check_name,
247
+ "runbook": overrides.get("runbook", "http://y/paasta-troubleshooting"),
248
+ "status": status,
249
+ "output": output,
250
+ "team": team,
251
+ "page": get_page(overrides, service, soa_dir),
252
+ "tip": get_tip(overrides, service, soa_dir),
253
+ "notification_email": get_notification_email(overrides, service, soa_dir),
254
+ "check_every": overrides.get("check_every", "1m"),
255
+ "realert_every": overrides.get(
256
+ "realert_every", monitoring_defaults("realert_every")
257
+ ),
258
+ "alert_after": f"{alert_after}s"
259
+ if isinstance(alert_after, int)
260
+ else alert_after,
261
+ "irc_channels": get_irc_channels(overrides, service, soa_dir),
262
+ "slack_channels": get_slack_channels(overrides, service, soa_dir),
263
+ "ticket": get_ticket(overrides, service, soa_dir),
264
+ "project": get_project(overrides, service, soa_dir),
265
+ "priority": get_priority(overrides, service, soa_dir),
266
+ "source": "paasta-%s" % cluster,
267
+ "tags": get_tags(overrides, service, soa_dir),
268
+ "ttl": ttl,
269
+ "sensu_host": system_paasta_config.get_sensu_host(),
270
+ "sensu_port": system_paasta_config.get_sensu_port(),
271
+ "component": get_component(overrides, service, soa_dir),
272
+ "description": get_description(overrides, service, soa_dir),
273
+ }
274
+
275
+ if dry_run:
276
+ if status == pysensu_yelp.Status.OK:
277
+ print(f"Would've sent an OK event for check '{check_name}'")
278
+ else:
279
+ from pprint import pprint # only import during testing
280
+
281
+ print(f"Would've sent the following alert for check '{check_name}':")
282
+ pprint(result_dict)
283
+
284
+ elif result_dict.get("sensu_host"):
285
+ pysensu_yelp.send_event(**result_dict)
286
+
287
+
288
+ @time_cache(ttl=5)
289
+ def read_monitoring_config(service, soa_dir=DEFAULT_SOA_DIR):
290
+ """Read a service's monitoring.yaml file.
291
+
292
+ :param service: The service name
293
+ :param soa_dir: THe SOA configuration directory to read from
294
+ :returns: A dictionary of whatever was in soa_dir/name/monitoring.yaml"""
295
+ rootdir = os.path.abspath(soa_dir)
296
+ monitoring_file = os.path.join(rootdir, service, "monitoring.yaml")
297
+ monitor_conf = service_configuration_lib.read_monitoring(monitoring_file)
298
+ return monitor_conf
299
+
300
+
301
+ def list_teams():
302
+ """Loads team data from the system. Returns a set of team names (or empty
303
+ set).
304
+ """
305
+ team_data = _load_sensu_team_data()
306
+ teams = set(team_data.get("team_data", {}).keys())
307
+ return teams
308
+
309
+
310
+ def send_replication_event(
311
+ instance_config,
312
+ status,
313
+ output,
314
+ description,
315
+ dry_run=False,
316
+ ):
317
+ """Send an event to sensu via pysensu_yelp with the given information.
318
+
319
+ :param instance_config: an instance of LongRunningServiceConfig
320
+ :param status: The status to emit for this event
321
+ :param output: The output to emit for this event
322
+ :param dry_run: Print the event instead of emitting it
323
+ """
324
+ # This function assumes the input is a string like "mumble.main"
325
+ monitoring_overrides = instance_config.get_monitoring()
326
+ if "alert_after" not in monitoring_overrides:
327
+ monitoring_overrides["alert_after"] = "2m"
328
+ monitoring_overrides["check_every"] = "1m"
329
+ monitoring_overrides["runbook"] = __get_monitoring_config_value(
330
+ "runbook",
331
+ monitoring_overrides,
332
+ instance_config.service,
333
+ soa_dir=instance_config.soa_dir,
334
+ monitoring_defaults=lambda _: DEFAULT_REPLICATION_RUNBOOK,
335
+ )
336
+ monitoring_overrides["tip"] = __get_monitoring_config_value(
337
+ "tip",
338
+ monitoring_overrides,
339
+ instance_config.service,
340
+ soa_dir=instance_config.soa_dir,
341
+ monitoring_defaults=lambda _: (
342
+ f"Check the instance with: `paasta status -s {instance_config.service} "
343
+ f"-i {instance_config.instance} -c {instance_config.cluster} -vv`"
344
+ ),
345
+ )
346
+ monitoring_overrides["description"] = description
347
+
348
+ check_name = "check_paasta_services_replication.%s" % instance_config.job_id
349
+ send_event(
350
+ service=instance_config.service,
351
+ check_name=check_name,
352
+ overrides=monitoring_overrides,
353
+ status=status,
354
+ output=output,
355
+ soa_dir=instance_config.soa_dir,
356
+ cluster=instance_config.cluster,
357
+ dry_run=dry_run,
358
+ )
359
+ _log(
360
+ service=instance_config.service,
361
+ line="Replication: %s" % output,
362
+ component="monitoring",
363
+ level="debug",
364
+ cluster=instance_config.cluster,
365
+ instance=instance_config.instance,
366
+ )
367
+
368
+
369
+ def emit_replication_metrics(
370
+ replication_infos: Mapping[str, Mapping[str, Mapping[str, int]]],
371
+ instance_config: LongRunningServiceConfig,
372
+ expected_count: int,
373
+ dry_run: bool = False,
374
+ ) -> None:
375
+ for provider, replication_info in replication_infos.items():
376
+ meteorite_dims = {
377
+ "paasta_service": instance_config.service,
378
+ "paasta_cluster": instance_config.cluster,
379
+ "paasta_instance": instance_config.instance,
380
+ "paasta_pool": instance_config.get_pool(),
381
+ "service_discovery_provider": provider,
382
+ }
383
+
384
+ num_available_backends = 0
385
+ for available_backends in replication_info.values():
386
+ num_available_backends += available_backends.get(instance_config.job_id, 0)
387
+ available_backends_metric = "paasta.service.available_backends"
388
+ if dry_run:
389
+ print(
390
+ f"Would've sent value {num_available_backends} for metric '{available_backends_metric}'"
391
+ )
392
+ else:
393
+ available_backends_gauge = yelp_meteorite.create_gauge(
394
+ available_backends_metric, meteorite_dims
395
+ )
396
+ available_backends_gauge.set(num_available_backends)
397
+
398
+ critical_percentage = instance_config.get_replication_crit_percentage()
399
+ num_critical_backends = critical_percentage * expected_count / 100.0
400
+ critical_backends_metric = "paasta.service.critical_backends"
401
+ if dry_run:
402
+ print(
403
+ f"Would've sent value {num_critical_backends} for metric '{critical_backends_metric}'"
404
+ )
405
+ else:
406
+ critical_backends_gauge = yelp_meteorite.create_gauge(
407
+ critical_backends_metric, meteorite_dims
408
+ )
409
+ critical_backends_gauge.set(num_critical_backends)
410
+
411
+ expected_backends_metric = "paasta.service.expected_backends"
412
+ if dry_run:
413
+ print(
414
+ f"Would've sent value {expected_count} for metric '{expected_backends_metric}'"
415
+ )
416
+ else:
417
+ expected_backends_gauge = yelp_meteorite.create_gauge(
418
+ "paasta.service.expected_backends", meteorite_dims
419
+ )
420
+ expected_backends_gauge.set(expected_count)
421
+
422
+
423
+ def check_replication_for_instance(
424
+ instance_config: LongRunningServiceConfig,
425
+ expected_count: int,
426
+ replication_checker: ReplicationChecker,
427
+ dry_run: bool = False,
428
+ ) -> bool:
429
+ """Check a set of namespaces to see if their number of available backends is too low,
430
+ emitting events to Sensu based on the fraction available and the thresholds defined in
431
+ the corresponding yelpsoa config.
432
+
433
+ :param instance_config: an instance of LongRunningServiceConfig
434
+ :param replication_checker: an instance of ReplicationChecker
435
+ :param dry_run: Print Sensu event and metrics instead of emitting them
436
+ """
437
+
438
+ crit_threshold = instance_config.get_replication_crit_percentage()
439
+
440
+ log.info(
441
+ "Checking instance %s in service discovery providers", instance_config.job_id
442
+ )
443
+ replication_infos = replication_checker.get_replication_for_instance(
444
+ instance_config
445
+ )
446
+
447
+ log.debug(f"Got replication info for {instance_config.job_id}: {replication_infos}")
448
+ if yelp_meteorite is not None:
449
+ emit_replication_metrics(
450
+ replication_infos,
451
+ instance_config,
452
+ expected_count,
453
+ dry_run=dry_run,
454
+ )
455
+
456
+ service_is_under_replicated = False
457
+ failed_service_discovery_providers = set()
458
+ for service_discovery_provider, replication_info in replication_infos.items():
459
+ if len(replication_info) == 0:
460
+ output = (
461
+ "Service %s has no %s replication info. Make sure the discover key in the corresponding config (e.g. smartstack.yaml for Smartstack) is valid!\n"
462
+ ) % (instance_config.job_id, service_discovery_provider)
463
+ log.error(output)
464
+ service_is_under_replicated = True
465
+ failed_service_discovery_providers.add(service_discovery_provider)
466
+ else:
467
+ expected_count_per_location = int(expected_count / len(replication_info))
468
+ output_critical = []
469
+ output_ok = []
470
+ under_replication_per_location = []
471
+
472
+ for location, available_backends in sorted(replication_info.items()):
473
+ num_available_in_location = available_backends.get(
474
+ instance_config.job_id, 0
475
+ )
476
+ under_replicated, ratio = is_under_replicated(
477
+ num_available_in_location,
478
+ expected_count_per_location,
479
+ crit_threshold,
480
+ )
481
+ if under_replicated:
482
+ output_critical.append(
483
+ "{} has {}/{} replicas in {} according to {} (CRITICAL: {}%)\n".format(
484
+ instance_config.job_id,
485
+ num_available_in_location,
486
+ expected_count_per_location,
487
+ location,
488
+ service_discovery_provider,
489
+ ratio,
490
+ )
491
+ )
492
+ failed_service_discovery_providers.add(service_discovery_provider)
493
+ else:
494
+ output_ok.append(
495
+ "{} has {}/{} replicas in {} according to {} (OK: {}%)\n".format(
496
+ instance_config.job_id,
497
+ num_available_in_location,
498
+ expected_count_per_location,
499
+ location,
500
+ service_discovery_provider,
501
+ ratio,
502
+ )
503
+ )
504
+ under_replication_per_location.append(under_replicated)
505
+
506
+ output = ", ".join(output_critical)
507
+ if output_critical and output_ok:
508
+ output += ". The following locations are OK: "
509
+ output += ", ".join(output_ok)
510
+
511
+ service_is_under_replicated_anywhere = any(under_replication_per_location)
512
+ service_is_under_replicated |= service_is_under_replicated_anywhere
513
+ if service_is_under_replicated_anywhere:
514
+ log.error(output)
515
+ else:
516
+ log.info(output)
517
+
518
+ if service_is_under_replicated:
519
+ failed_service_discovery_providers_list = ",".join(
520
+ failed_service_discovery_providers
521
+ )
522
+ description = (
523
+ "This replication alert means that a {service_discovery_provider} powered loadbalancer\n"
524
+ "doesn't have enough healthy backends. Not having enough healthy backends\n"
525
+ "means that clients of that service will get 503s (http) or connection refused\n"
526
+ "(tcp) when trying to connect to it.\n"
527
+ "\n"
528
+ "Reasons this might be happening:\n"
529
+ "\n"
530
+ " The service may simply not have enough copies or it could simply be\n"
531
+ " unhealthy in that location. There also may not be enough resources\n"
532
+ " in the cluster to support the requested instance count.\n"
533
+ "\n"
534
+ "Things you can do:\n"
535
+ "\n"
536
+ " * You can view the logs for the job with:\n"
537
+ " paasta logs -s {service} -i {instance} -c {cluster}\n"
538
+ "\n"
539
+ " * Fix the cause of the unhealthy service. Try running:\n"
540
+ "\n"
541
+ " paasta status -s {service} -i {instance} -c {cluster} -vv\n"
542
+ "\n"
543
+ " * Widen {service_discovery_provider} discovery settings\n"
544
+ " * Increase the instance count\n"
545
+ "\n"
546
+ ).format(
547
+ service=instance_config.service,
548
+ instance=instance_config.instance,
549
+ cluster=instance_config.cluster,
550
+ service_discovery_provider=failed_service_discovery_providers_list,
551
+ )
552
+ status = pysensu_yelp.Status.CRITICAL
553
+ else:
554
+ description = (
555
+ "{} is well-replicated because it has over {}% of its "
556
+ "expected replicas up."
557
+ ).format(instance_config.job_id, crit_threshold)
558
+ status = pysensu_yelp.Status.OK
559
+
560
+ send_replication_event(
561
+ instance_config=instance_config,
562
+ status=status,
563
+ output=output,
564
+ description=description,
565
+ dry_run=dry_run,
566
+ )
567
+ return not service_is_under_replicated
568
+
569
+
570
+ def check_under_replication(
571
+ instance_config: LongRunningServiceConfig,
572
+ expected_count: int,
573
+ num_available: int,
574
+ sub_component: Optional[str] = None,
575
+ ) -> Tuple[bool, str, str]:
576
+ """Check if a component/sub_component is under-replicated and returns both the result of the check in the form of a
577
+ boolean and a human-readable text to be used in logging or monitoring events.
578
+ """
579
+ crit_threshold = instance_config.get_replication_crit_percentage()
580
+
581
+ # Keep output short, with rest of context in description. This is because
582
+ # by default, Slack-Sensu messages have a 400 char limit, incl. the output.
583
+ # If it is too long, the runbook and tip won't show up.
584
+ if sub_component is not None:
585
+ output = ("{} has {}/{} replicas of {} available (threshold: {}%)").format(
586
+ instance_config.job_id,
587
+ num_available,
588
+ expected_count,
589
+ sub_component,
590
+ crit_threshold,
591
+ )
592
+ else:
593
+ output = ("{} has {}/{} replicas available (threshold: {}%)").format(
594
+ instance_config.job_id, num_available, expected_count, crit_threshold
595
+ )
596
+
597
+ under_replicated, _ = is_under_replicated(
598
+ num_available, expected_count, crit_threshold
599
+ )
600
+ if under_replicated:
601
+ description = (
602
+ "This replication alert means that PaaSTA can't keep the\n"
603
+ "requested number of replicas up and healthy in the cluster for "
604
+ "the instance {service}.{instance}.\n"
605
+ "\n"
606
+ "Reasons this might be happening:\n"
607
+ "\n"
608
+ " The service may simply be unhealthy. There also may not be enough resources\n"
609
+ " in the cluster to support the requested instance count.\n"
610
+ "\n"
611
+ "Things you can do:\n"
612
+ "\n"
613
+ " * Increase the instance count\n"
614
+ " * Fix the cause of the unhealthy service. Try running:\n"
615
+ "\n"
616
+ " paasta status -s {service} -i {instance} -c {cluster} -vv\n"
617
+ ).format(
618
+ service=instance_config.service,
619
+ instance=instance_config.instance,
620
+ cluster=instance_config.cluster,
621
+ )
622
+ else:
623
+ description = (
624
+ "{} is well-replicated because it has over {}% of its "
625
+ "expected replicas up."
626
+ ).format(instance_config.job_id, crit_threshold)
627
+ return under_replicated, output, description
628
+
629
+
630
+ def send_replication_event_if_under_replication(
631
+ instance_config: LongRunningServiceConfig,
632
+ expected_count: int,
633
+ num_available: int,
634
+ sub_component: Optional[str] = None,
635
+ dry_run: bool = False,
636
+ ):
637
+ under_replicated, output, description = check_under_replication(
638
+ instance_config, expected_count, num_available, sub_component
639
+ )
640
+ if under_replicated:
641
+ log.error(output)
642
+ status = pysensu_yelp.Status.CRITICAL
643
+ else:
644
+ log.info(output)
645
+ status = pysensu_yelp.Status.OK
646
+ send_replication_event(
647
+ instance_config=instance_config,
648
+ status=status,
649
+ output=output,
650
+ description=description,
651
+ dry_run=dry_run,
652
+ )