paasta-tools 1.21.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (348) hide show
  1. k8s_itests/__init__.py +0 -0
  2. k8s_itests/test_autoscaling.py +23 -0
  3. k8s_itests/utils.py +38 -0
  4. paasta_tools/__init__.py +20 -0
  5. paasta_tools/adhoc_tools.py +142 -0
  6. paasta_tools/api/__init__.py +13 -0
  7. paasta_tools/api/api.py +330 -0
  8. paasta_tools/api/api_docs/swagger.json +2323 -0
  9. paasta_tools/api/client.py +106 -0
  10. paasta_tools/api/settings.py +33 -0
  11. paasta_tools/api/tweens/__init__.py +6 -0
  12. paasta_tools/api/tweens/auth.py +125 -0
  13. paasta_tools/api/tweens/profiling.py +108 -0
  14. paasta_tools/api/tweens/request_logger.py +124 -0
  15. paasta_tools/api/views/__init__.py +13 -0
  16. paasta_tools/api/views/autoscaler.py +100 -0
  17. paasta_tools/api/views/exception.py +45 -0
  18. paasta_tools/api/views/flink.py +73 -0
  19. paasta_tools/api/views/instance.py +395 -0
  20. paasta_tools/api/views/pause_autoscaler.py +71 -0
  21. paasta_tools/api/views/remote_run.py +113 -0
  22. paasta_tools/api/views/resources.py +76 -0
  23. paasta_tools/api/views/service.py +35 -0
  24. paasta_tools/api/views/version.py +25 -0
  25. paasta_tools/apply_external_resources.py +79 -0
  26. paasta_tools/async_utils.py +109 -0
  27. paasta_tools/autoscaling/__init__.py +0 -0
  28. paasta_tools/autoscaling/autoscaling_service_lib.py +57 -0
  29. paasta_tools/autoscaling/forecasting.py +106 -0
  30. paasta_tools/autoscaling/max_all_k8s_services.py +41 -0
  31. paasta_tools/autoscaling/pause_service_autoscaler.py +77 -0
  32. paasta_tools/autoscaling/utils.py +52 -0
  33. paasta_tools/bounce_lib.py +184 -0
  34. paasta_tools/broadcast_log_to_services.py +62 -0
  35. paasta_tools/cassandracluster_tools.py +210 -0
  36. paasta_tools/check_autoscaler_max_instances.py +212 -0
  37. paasta_tools/check_cassandracluster_services_replication.py +35 -0
  38. paasta_tools/check_flink_services_health.py +203 -0
  39. paasta_tools/check_kubernetes_api.py +57 -0
  40. paasta_tools/check_kubernetes_services_replication.py +141 -0
  41. paasta_tools/check_oom_events.py +244 -0
  42. paasta_tools/check_services_replication_tools.py +324 -0
  43. paasta_tools/check_spark_jobs.py +234 -0
  44. paasta_tools/cleanup_kubernetes_cr.py +138 -0
  45. paasta_tools/cleanup_kubernetes_crd.py +145 -0
  46. paasta_tools/cleanup_kubernetes_jobs.py +344 -0
  47. paasta_tools/cleanup_tron_namespaces.py +96 -0
  48. paasta_tools/cli/__init__.py +13 -0
  49. paasta_tools/cli/authentication.py +85 -0
  50. paasta_tools/cli/cli.py +260 -0
  51. paasta_tools/cli/cmds/__init__.py +13 -0
  52. paasta_tools/cli/cmds/autoscale.py +143 -0
  53. paasta_tools/cli/cmds/check.py +334 -0
  54. paasta_tools/cli/cmds/cook_image.py +147 -0
  55. paasta_tools/cli/cmds/get_docker_image.py +76 -0
  56. paasta_tools/cli/cmds/get_image_version.py +172 -0
  57. paasta_tools/cli/cmds/get_latest_deployment.py +93 -0
  58. paasta_tools/cli/cmds/info.py +155 -0
  59. paasta_tools/cli/cmds/itest.py +117 -0
  60. paasta_tools/cli/cmds/list.py +66 -0
  61. paasta_tools/cli/cmds/list_clusters.py +42 -0
  62. paasta_tools/cli/cmds/list_deploy_queue.py +171 -0
  63. paasta_tools/cli/cmds/list_namespaces.py +84 -0
  64. paasta_tools/cli/cmds/local_run.py +1396 -0
  65. paasta_tools/cli/cmds/logs.py +1601 -0
  66. paasta_tools/cli/cmds/mark_for_deployment.py +1988 -0
  67. paasta_tools/cli/cmds/mesh_status.py +174 -0
  68. paasta_tools/cli/cmds/pause_service_autoscaler.py +107 -0
  69. paasta_tools/cli/cmds/push_to_registry.py +275 -0
  70. paasta_tools/cli/cmds/remote_run.py +252 -0
  71. paasta_tools/cli/cmds/rollback.py +347 -0
  72. paasta_tools/cli/cmds/secret.py +549 -0
  73. paasta_tools/cli/cmds/security_check.py +59 -0
  74. paasta_tools/cli/cmds/spark_run.py +1400 -0
  75. paasta_tools/cli/cmds/start_stop_restart.py +401 -0
  76. paasta_tools/cli/cmds/status.py +2302 -0
  77. paasta_tools/cli/cmds/validate.py +1012 -0
  78. paasta_tools/cli/cmds/wait_for_deployment.py +275 -0
  79. paasta_tools/cli/fsm/__init__.py +13 -0
  80. paasta_tools/cli/fsm/autosuggest.py +82 -0
  81. paasta_tools/cli/fsm/template/README.md +8 -0
  82. paasta_tools/cli/fsm/template/cookiecutter.json +7 -0
  83. paasta_tools/cli/fsm/template/{{cookiecutter.service}}/kubernetes-PROD.yaml +91 -0
  84. paasta_tools/cli/fsm/template/{{cookiecutter.service}}/monitoring.yaml +20 -0
  85. paasta_tools/cli/fsm/template/{{cookiecutter.service}}/service.yaml +8 -0
  86. paasta_tools/cli/fsm/template/{{cookiecutter.service}}/smartstack.yaml +6 -0
  87. paasta_tools/cli/fsm_cmd.py +121 -0
  88. paasta_tools/cli/paasta_tabcomplete.sh +23 -0
  89. paasta_tools/cli/schemas/adhoc_schema.json +199 -0
  90. paasta_tools/cli/schemas/autoscaling_schema.json +91 -0
  91. paasta_tools/cli/schemas/autotuned_defaults/cassandracluster_schema.json +37 -0
  92. paasta_tools/cli/schemas/autotuned_defaults/kubernetes_schema.json +89 -0
  93. paasta_tools/cli/schemas/deploy_schema.json +173 -0
  94. paasta_tools/cli/schemas/eks_schema.json +970 -0
  95. paasta_tools/cli/schemas/kubernetes_schema.json +970 -0
  96. paasta_tools/cli/schemas/rollback_schema.json +160 -0
  97. paasta_tools/cli/schemas/service_schema.json +25 -0
  98. paasta_tools/cli/schemas/smartstack_schema.json +322 -0
  99. paasta_tools/cli/schemas/tron_schema.json +699 -0
  100. paasta_tools/cli/utils.py +1118 -0
  101. paasta_tools/clusterman.py +21 -0
  102. paasta_tools/config_utils.py +385 -0
  103. paasta_tools/contrib/__init__.py +0 -0
  104. paasta_tools/contrib/bounce_log_latency_parser.py +68 -0
  105. paasta_tools/contrib/check_manual_oapi_changes.sh +24 -0
  106. paasta_tools/contrib/check_orphans.py +306 -0
  107. paasta_tools/contrib/create_dynamodb_table.py +35 -0
  108. paasta_tools/contrib/create_paasta_playground.py +105 -0
  109. paasta_tools/contrib/emit_allocated_cpu_metrics.py +50 -0
  110. paasta_tools/contrib/get_running_task_allocation.py +346 -0
  111. paasta_tools/contrib/habitat_fixer.py +86 -0
  112. paasta_tools/contrib/ide_helper.py +316 -0
  113. paasta_tools/contrib/is_pod_healthy_in_proxy.py +139 -0
  114. paasta_tools/contrib/is_pod_healthy_in_smartstack.py +50 -0
  115. paasta_tools/contrib/kill_bad_containers.py +109 -0
  116. paasta_tools/contrib/mass-deploy-tag.sh +44 -0
  117. paasta_tools/contrib/mock_patch_checker.py +86 -0
  118. paasta_tools/contrib/paasta_update_soa_memcpu.py +520 -0
  119. paasta_tools/contrib/render_template.py +129 -0
  120. paasta_tools/contrib/rightsizer_soaconfigs_update.py +348 -0
  121. paasta_tools/contrib/service_shard_remove.py +157 -0
  122. paasta_tools/contrib/service_shard_update.py +373 -0
  123. paasta_tools/contrib/shared_ip_check.py +77 -0
  124. paasta_tools/contrib/timeouts_metrics_prom.py +64 -0
  125. paasta_tools/delete_kubernetes_deployments.py +89 -0
  126. paasta_tools/deployment_utils.py +44 -0
  127. paasta_tools/docker_wrapper.py +234 -0
  128. paasta_tools/docker_wrapper_imports.py +13 -0
  129. paasta_tools/drain_lib.py +351 -0
  130. paasta_tools/dump_locally_running_services.py +71 -0
  131. paasta_tools/eks_tools.py +119 -0
  132. paasta_tools/envoy_tools.py +373 -0
  133. paasta_tools/firewall.py +504 -0
  134. paasta_tools/firewall_logging.py +154 -0
  135. paasta_tools/firewall_update.py +172 -0
  136. paasta_tools/flink_tools.py +345 -0
  137. paasta_tools/flinkeks_tools.py +90 -0
  138. paasta_tools/frameworks/__init__.py +0 -0
  139. paasta_tools/frameworks/adhoc_scheduler.py +71 -0
  140. paasta_tools/frameworks/constraints.py +87 -0
  141. paasta_tools/frameworks/native_scheduler.py +652 -0
  142. paasta_tools/frameworks/native_service_config.py +301 -0
  143. paasta_tools/frameworks/task_store.py +245 -0
  144. paasta_tools/generate_all_deployments +9 -0
  145. paasta_tools/generate_authenticating_services.py +94 -0
  146. paasta_tools/generate_deployments_for_service.py +255 -0
  147. paasta_tools/generate_services_file.py +114 -0
  148. paasta_tools/generate_services_yaml.py +30 -0
  149. paasta_tools/hacheck.py +76 -0
  150. paasta_tools/instance/__init__.py +0 -0
  151. paasta_tools/instance/hpa_metrics_parser.py +122 -0
  152. paasta_tools/instance/kubernetes.py +1362 -0
  153. paasta_tools/iptables.py +240 -0
  154. paasta_tools/kafkacluster_tools.py +143 -0
  155. paasta_tools/kubernetes/__init__.py +0 -0
  156. paasta_tools/kubernetes/application/__init__.py +0 -0
  157. paasta_tools/kubernetes/application/controller_wrappers.py +476 -0
  158. paasta_tools/kubernetes/application/tools.py +90 -0
  159. paasta_tools/kubernetes/bin/__init__.py +0 -0
  160. paasta_tools/kubernetes/bin/kubernetes_remove_evicted_pods.py +164 -0
  161. paasta_tools/kubernetes/bin/paasta_cleanup_remote_run_resources.py +135 -0
  162. paasta_tools/kubernetes/bin/paasta_cleanup_stale_nodes.py +181 -0
  163. paasta_tools/kubernetes/bin/paasta_secrets_sync.py +758 -0
  164. paasta_tools/kubernetes/remote_run.py +558 -0
  165. paasta_tools/kubernetes_tools.py +4679 -0
  166. paasta_tools/list_kubernetes_service_instances.py +128 -0
  167. paasta_tools/list_tron_namespaces.py +60 -0
  168. paasta_tools/long_running_service_tools.py +678 -0
  169. paasta_tools/mac_address.py +44 -0
  170. paasta_tools/marathon_dashboard.py +0 -0
  171. paasta_tools/mesos/__init__.py +0 -0
  172. paasta_tools/mesos/cfg.py +46 -0
  173. paasta_tools/mesos/cluster.py +60 -0
  174. paasta_tools/mesos/exceptions.py +59 -0
  175. paasta_tools/mesos/framework.py +77 -0
  176. paasta_tools/mesos/log.py +48 -0
  177. paasta_tools/mesos/master.py +306 -0
  178. paasta_tools/mesos/mesos_file.py +169 -0
  179. paasta_tools/mesos/parallel.py +52 -0
  180. paasta_tools/mesos/slave.py +115 -0
  181. paasta_tools/mesos/task.py +94 -0
  182. paasta_tools/mesos/util.py +69 -0
  183. paasta_tools/mesos/zookeeper.py +37 -0
  184. paasta_tools/mesos_maintenance.py +848 -0
  185. paasta_tools/mesos_tools.py +1051 -0
  186. paasta_tools/metrics/__init__.py +0 -0
  187. paasta_tools/metrics/metastatus_lib.py +1110 -0
  188. paasta_tools/metrics/metrics_lib.py +217 -0
  189. paasta_tools/monitoring/__init__.py +13 -0
  190. paasta_tools/monitoring/check_k8s_api_performance.py +110 -0
  191. paasta_tools/monitoring_tools.py +652 -0
  192. paasta_tools/monkrelaycluster_tools.py +146 -0
  193. paasta_tools/nrtsearchservice_tools.py +143 -0
  194. paasta_tools/nrtsearchserviceeks_tools.py +68 -0
  195. paasta_tools/oom_logger.py +321 -0
  196. paasta_tools/paasta_deploy_tron_jobs +3 -0
  197. paasta_tools/paasta_execute_docker_command.py +123 -0
  198. paasta_tools/paasta_native_serviceinit.py +21 -0
  199. paasta_tools/paasta_service_config_loader.py +201 -0
  200. paasta_tools/paastaapi/__init__.py +29 -0
  201. paasta_tools/paastaapi/api/__init__.py +3 -0
  202. paasta_tools/paastaapi/api/autoscaler_api.py +302 -0
  203. paasta_tools/paastaapi/api/default_api.py +569 -0
  204. paasta_tools/paastaapi/api/remote_run_api.py +604 -0
  205. paasta_tools/paastaapi/api/resources_api.py +157 -0
  206. paasta_tools/paastaapi/api/service_api.py +1736 -0
  207. paasta_tools/paastaapi/api_client.py +818 -0
  208. paasta_tools/paastaapi/apis/__init__.py +22 -0
  209. paasta_tools/paastaapi/configuration.py +455 -0
  210. paasta_tools/paastaapi/exceptions.py +137 -0
  211. paasta_tools/paastaapi/model/__init__.py +5 -0
  212. paasta_tools/paastaapi/model/adhoc_launch_history.py +176 -0
  213. paasta_tools/paastaapi/model/autoscaler_count_msg.py +176 -0
  214. paasta_tools/paastaapi/model/deploy_queue.py +178 -0
  215. paasta_tools/paastaapi/model/deploy_queue_service_instance.py +194 -0
  216. paasta_tools/paastaapi/model/envoy_backend.py +185 -0
  217. paasta_tools/paastaapi/model/envoy_location.py +184 -0
  218. paasta_tools/paastaapi/model/envoy_status.py +181 -0
  219. paasta_tools/paastaapi/model/flink_cluster_overview.py +188 -0
  220. paasta_tools/paastaapi/model/flink_config.py +173 -0
  221. paasta_tools/paastaapi/model/flink_job.py +186 -0
  222. paasta_tools/paastaapi/model/flink_job_details.py +192 -0
  223. paasta_tools/paastaapi/model/flink_jobs.py +175 -0
  224. paasta_tools/paastaapi/model/float_and_error.py +173 -0
  225. paasta_tools/paastaapi/model/hpa_metric.py +176 -0
  226. paasta_tools/paastaapi/model/inline_object.py +170 -0
  227. paasta_tools/paastaapi/model/inline_response200.py +170 -0
  228. paasta_tools/paastaapi/model/inline_response2001.py +170 -0
  229. paasta_tools/paastaapi/model/instance_bounce_status.py +200 -0
  230. paasta_tools/paastaapi/model/instance_mesh_status.py +186 -0
  231. paasta_tools/paastaapi/model/instance_status.py +220 -0
  232. paasta_tools/paastaapi/model/instance_status_adhoc.py +187 -0
  233. paasta_tools/paastaapi/model/instance_status_cassandracluster.py +173 -0
  234. paasta_tools/paastaapi/model/instance_status_flink.py +173 -0
  235. paasta_tools/paastaapi/model/instance_status_kafkacluster.py +173 -0
  236. paasta_tools/paastaapi/model/instance_status_kubernetes.py +263 -0
  237. paasta_tools/paastaapi/model/instance_status_kubernetes_autoscaling_status.py +187 -0
  238. paasta_tools/paastaapi/model/instance_status_kubernetes_v2.py +197 -0
  239. paasta_tools/paastaapi/model/instance_status_tron.py +204 -0
  240. paasta_tools/paastaapi/model/instance_tasks.py +182 -0
  241. paasta_tools/paastaapi/model/integer_and_error.py +173 -0
  242. paasta_tools/paastaapi/model/kubernetes_container.py +178 -0
  243. paasta_tools/paastaapi/model/kubernetes_container_v2.py +219 -0
  244. paasta_tools/paastaapi/model/kubernetes_healthcheck.py +176 -0
  245. paasta_tools/paastaapi/model/kubernetes_pod.py +201 -0
  246. paasta_tools/paastaapi/model/kubernetes_pod_event.py +176 -0
  247. paasta_tools/paastaapi/model/kubernetes_pod_v2.py +213 -0
  248. paasta_tools/paastaapi/model/kubernetes_replica_set.py +185 -0
  249. paasta_tools/paastaapi/model/kubernetes_version.py +202 -0
  250. paasta_tools/paastaapi/model/remote_run_outcome.py +189 -0
  251. paasta_tools/paastaapi/model/remote_run_start.py +185 -0
  252. paasta_tools/paastaapi/model/remote_run_stop.py +176 -0
  253. paasta_tools/paastaapi/model/remote_run_token.py +173 -0
  254. paasta_tools/paastaapi/model/resource.py +187 -0
  255. paasta_tools/paastaapi/model/resource_item.py +187 -0
  256. paasta_tools/paastaapi/model/resource_value.py +176 -0
  257. paasta_tools/paastaapi/model/smartstack_backend.py +191 -0
  258. paasta_tools/paastaapi/model/smartstack_location.py +181 -0
  259. paasta_tools/paastaapi/model/smartstack_status.py +181 -0
  260. paasta_tools/paastaapi/model/task_tail_lines.py +176 -0
  261. paasta_tools/paastaapi/model_utils.py +1879 -0
  262. paasta_tools/paastaapi/models/__init__.py +62 -0
  263. paasta_tools/paastaapi/rest.py +287 -0
  264. paasta_tools/prune_completed_pods.py +220 -0
  265. paasta_tools/puppet_service_tools.py +59 -0
  266. paasta_tools/py.typed +1 -0
  267. paasta_tools/remote_git.py +127 -0
  268. paasta_tools/run-paasta-api-in-dev-mode.py +57 -0
  269. paasta_tools/run-paasta-api-playground.py +51 -0
  270. paasta_tools/secret_providers/__init__.py +66 -0
  271. paasta_tools/secret_providers/vault.py +214 -0
  272. paasta_tools/secret_tools.py +277 -0
  273. paasta_tools/setup_istio_mesh.py +353 -0
  274. paasta_tools/setup_kubernetes_cr.py +412 -0
  275. paasta_tools/setup_kubernetes_crd.py +138 -0
  276. paasta_tools/setup_kubernetes_internal_crd.py +154 -0
  277. paasta_tools/setup_kubernetes_job.py +353 -0
  278. paasta_tools/setup_prometheus_adapter_config.py +1028 -0
  279. paasta_tools/setup_tron_namespace.py +248 -0
  280. paasta_tools/slack.py +75 -0
  281. paasta_tools/smartstack_tools.py +676 -0
  282. paasta_tools/spark_tools.py +283 -0
  283. paasta_tools/synapse_srv_namespaces_fact.py +42 -0
  284. paasta_tools/tron/__init__.py +0 -0
  285. paasta_tools/tron/client.py +158 -0
  286. paasta_tools/tron/tron_command_context.py +194 -0
  287. paasta_tools/tron/tron_timeutils.py +101 -0
  288. paasta_tools/tron_tools.py +1448 -0
  289. paasta_tools/utils.py +4307 -0
  290. paasta_tools/yaml_tools.py +44 -0
  291. paasta_tools-1.21.3.data/scripts/apply_external_resources.py +79 -0
  292. paasta_tools-1.21.3.data/scripts/bounce_log_latency_parser.py +68 -0
  293. paasta_tools-1.21.3.data/scripts/check_autoscaler_max_instances.py +212 -0
  294. paasta_tools-1.21.3.data/scripts/check_cassandracluster_services_replication.py +35 -0
  295. paasta_tools-1.21.3.data/scripts/check_flink_services_health.py +203 -0
  296. paasta_tools-1.21.3.data/scripts/check_kubernetes_api.py +57 -0
  297. paasta_tools-1.21.3.data/scripts/check_kubernetes_services_replication.py +141 -0
  298. paasta_tools-1.21.3.data/scripts/check_manual_oapi_changes.sh +24 -0
  299. paasta_tools-1.21.3.data/scripts/check_oom_events.py +244 -0
  300. paasta_tools-1.21.3.data/scripts/check_orphans.py +306 -0
  301. paasta_tools-1.21.3.data/scripts/check_spark_jobs.py +234 -0
  302. paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_cr.py +138 -0
  303. paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_crd.py +145 -0
  304. paasta_tools-1.21.3.data/scripts/cleanup_kubernetes_jobs.py +344 -0
  305. paasta_tools-1.21.3.data/scripts/create_dynamodb_table.py +35 -0
  306. paasta_tools-1.21.3.data/scripts/create_paasta_playground.py +105 -0
  307. paasta_tools-1.21.3.data/scripts/delete_kubernetes_deployments.py +89 -0
  308. paasta_tools-1.21.3.data/scripts/emit_allocated_cpu_metrics.py +50 -0
  309. paasta_tools-1.21.3.data/scripts/generate_all_deployments +9 -0
  310. paasta_tools-1.21.3.data/scripts/generate_authenticating_services.py +94 -0
  311. paasta_tools-1.21.3.data/scripts/generate_deployments_for_service.py +255 -0
  312. paasta_tools-1.21.3.data/scripts/generate_services_file.py +114 -0
  313. paasta_tools-1.21.3.data/scripts/generate_services_yaml.py +30 -0
  314. paasta_tools-1.21.3.data/scripts/get_running_task_allocation.py +346 -0
  315. paasta_tools-1.21.3.data/scripts/habitat_fixer.py +86 -0
  316. paasta_tools-1.21.3.data/scripts/ide_helper.py +316 -0
  317. paasta_tools-1.21.3.data/scripts/is_pod_healthy_in_proxy.py +139 -0
  318. paasta_tools-1.21.3.data/scripts/is_pod_healthy_in_smartstack.py +50 -0
  319. paasta_tools-1.21.3.data/scripts/kill_bad_containers.py +109 -0
  320. paasta_tools-1.21.3.data/scripts/kubernetes_remove_evicted_pods.py +164 -0
  321. paasta_tools-1.21.3.data/scripts/mass-deploy-tag.sh +44 -0
  322. paasta_tools-1.21.3.data/scripts/mock_patch_checker.py +86 -0
  323. paasta_tools-1.21.3.data/scripts/paasta_cleanup_remote_run_resources.py +135 -0
  324. paasta_tools-1.21.3.data/scripts/paasta_cleanup_stale_nodes.py +181 -0
  325. paasta_tools-1.21.3.data/scripts/paasta_deploy_tron_jobs +3 -0
  326. paasta_tools-1.21.3.data/scripts/paasta_execute_docker_command.py +123 -0
  327. paasta_tools-1.21.3.data/scripts/paasta_secrets_sync.py +758 -0
  328. paasta_tools-1.21.3.data/scripts/paasta_tabcomplete.sh +23 -0
  329. paasta_tools-1.21.3.data/scripts/paasta_update_soa_memcpu.py +520 -0
  330. paasta_tools-1.21.3.data/scripts/render_template.py +129 -0
  331. paasta_tools-1.21.3.data/scripts/rightsizer_soaconfigs_update.py +348 -0
  332. paasta_tools-1.21.3.data/scripts/service_shard_remove.py +157 -0
  333. paasta_tools-1.21.3.data/scripts/service_shard_update.py +373 -0
  334. paasta_tools-1.21.3.data/scripts/setup_istio_mesh.py +353 -0
  335. paasta_tools-1.21.3.data/scripts/setup_kubernetes_cr.py +412 -0
  336. paasta_tools-1.21.3.data/scripts/setup_kubernetes_crd.py +138 -0
  337. paasta_tools-1.21.3.data/scripts/setup_kubernetes_internal_crd.py +154 -0
  338. paasta_tools-1.21.3.data/scripts/setup_kubernetes_job.py +353 -0
  339. paasta_tools-1.21.3.data/scripts/setup_prometheus_adapter_config.py +1028 -0
  340. paasta_tools-1.21.3.data/scripts/shared_ip_check.py +77 -0
  341. paasta_tools-1.21.3.data/scripts/synapse_srv_namespaces_fact.py +42 -0
  342. paasta_tools-1.21.3.data/scripts/timeouts_metrics_prom.py +64 -0
  343. paasta_tools-1.21.3.dist-info/LICENSE +201 -0
  344. paasta_tools-1.21.3.dist-info/METADATA +74 -0
  345. paasta_tools-1.21.3.dist-info/RECORD +348 -0
  346. paasta_tools-1.21.3.dist-info/WHEEL +5 -0
  347. paasta_tools-1.21.3.dist-info/entry_points.txt +20 -0
  348. paasta_tools-1.21.3.dist-info/top_level.txt +2 -0
@@ -0,0 +1,1448 @@
1
+ # Copyright 2015-2018 Yelp Inc.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ import datetime
14
+ import difflib
15
+ import glob
16
+ import json
17
+ import logging
18
+ import os
19
+ import pkgutil
20
+ import re
21
+ import subprocess
22
+ from string import Formatter
23
+ from typing import cast
24
+ from typing import List
25
+ from typing import Mapping
26
+ from typing import Tuple
27
+ from typing import Union
28
+
29
+ from mypy_extensions import TypedDict
30
+ from service_configuration_lib import read_extra_service_information
31
+ from service_configuration_lib import read_yaml_file
32
+ from service_configuration_lib.spark_config import get_total_driver_memory_mb
33
+ from service_configuration_lib.spark_config import SparkConfBuilder
34
+
35
+ from paasta_tools import yaml_tools as yaml
36
+ from paasta_tools.mesos_tools import mesos_services_running_here
37
+
38
+ try:
39
+ from yaml.cyaml import CSafeDumper as Dumper
40
+ except ImportError: # pragma: no cover (no libyaml-dev / pypy)
41
+ Dumper = yaml.SafeDumper # type: ignore
42
+
43
+ from paasta_tools.clusterman import get_clusterman_metrics
44
+ from paasta_tools.tron.client import TronClient
45
+ from paasta_tools.tron import tron_command_context
46
+ from paasta_tools.utils import DEFAULT_SOA_DIR, InstanceConfigDict
47
+ from paasta_tools.utils import InstanceConfig
48
+ from paasta_tools.utils import InvalidInstanceConfig
49
+ from paasta_tools.utils import load_system_paasta_config
50
+ from paasta_tools.utils import SystemPaastaConfig
51
+ from paasta_tools.utils import load_v2_deployments_json
52
+ from paasta_tools.utils import NoConfigurationForServiceError
53
+ from paasta_tools.utils import NoDeploymentsAvailable
54
+ from paasta_tools.utils import time_cache
55
+ from paasta_tools.utils import filter_templates_from_config
56
+ from paasta_tools.utils import TronSecretVolume
57
+ from paasta_tools.utils import get_k8s_url_for_cluster
58
+ from paasta_tools.utils import validate_pool
59
+ from paasta_tools.utils import PoolsNotConfiguredError
60
+ from paasta_tools.utils import DockerVolume
61
+ from paasta_tools.utils import ProjectedSAVolume
62
+
63
+ from paasta_tools import spark_tools
64
+
65
+ from paasta_tools.kubernetes_tools import (
66
+ NodeSelectorConfig,
67
+ allowlist_denylist_to_requirements,
68
+ contains_zone_label,
69
+ get_service_account_name,
70
+ limit_size_with_hash,
71
+ raw_selectors_to_requirements,
72
+ to_node_label,
73
+ )
74
+ from paasta_tools.secret_tools import is_secret_ref
75
+ from paasta_tools.secret_tools import is_shared_secret
76
+ from paasta_tools.secret_tools import is_shared_secret_from_secret_name
77
+ from paasta_tools.secret_tools import get_secret_name_from_ref
78
+ from paasta_tools.kubernetes_tools import get_paasta_secret_name
79
+ from paasta_tools.kubernetes_tools import add_volumes_for_authenticating_services
80
+ from paasta_tools.secret_tools import SHARED_SECRET_SERVICE
81
+
82
+ from paasta_tools import monitoring_tools
83
+ from paasta_tools.monitoring_tools import list_teams
84
+ from typing import Optional
85
+ from typing import Dict
86
+ from typing import Any
87
+
88
+ log = logging.getLogger(__name__)
89
+ logging.getLogger("tron").setLevel(logging.WARNING)
90
+
91
+ MASTER_NAMESPACE = "MASTER"
92
+ SPACER = "."
93
+ VALID_MONITORING_KEYS = set(
94
+ json.loads(
95
+ pkgutil.get_data("paasta_tools.cli", "schemas/tron_schema.json").decode()
96
+ )["definitions"]["job"]["properties"]["monitoring"]["properties"].keys()
97
+ )
98
+ MESOS_EXECUTOR_NAMES = ("paasta",)
99
+ KUBERNETES_EXECUTOR_NAMES = ("paasta", "spark")
100
+ EXECUTOR_NAME_TO_TRON_EXECUTOR_TYPE = {"paasta": "kubernetes", "spark": "spark"}
101
+ KUBERNETES_NAMESPACE = "tron"
102
+ DEFAULT_AWS_REGION = "us-west-2"
103
+ EXECUTOR_TYPE_TO_NAMESPACE = {
104
+ "paasta": "tron",
105
+ "spark": "tron",
106
+ }
107
+ DEFAULT_TZ = "US/Pacific"
108
+ clusterman_metrics, _ = get_clusterman_metrics()
109
+ EXECUTOR_TYPES = ["paasta", "ssh", "spark"]
110
+ DEFAULT_SPARK_EXECUTOR_POOL = "batch"
111
+
112
+
113
+ class FieldSelectorConfig(TypedDict):
114
+ field_path: str
115
+
116
+
117
+ class TronNotConfigured(Exception):
118
+ pass
119
+
120
+
121
+ class InvalidTronConfig(Exception):
122
+ pass
123
+
124
+
125
+ class InvalidPoolError(Exception):
126
+ pass
127
+
128
+
129
+ class TronConfig(dict):
130
+ """System-level configuration for Tron."""
131
+
132
+ def __init__(self, config):
133
+ super().__init__(config)
134
+
135
+ def get_cluster_name(self):
136
+ """:returns The name of the Tron cluster"""
137
+ try:
138
+ return self["cluster_name"]
139
+ except KeyError:
140
+ raise TronNotConfigured(
141
+ "Could not find name of Tron cluster in system Tron config"
142
+ )
143
+
144
+ def get_url(self):
145
+ """:returns The URL for the Tron master's API"""
146
+ try:
147
+ return self["url"]
148
+ except KeyError:
149
+ raise TronNotConfigured(
150
+ "Could not find URL of Tron master in system Tron config"
151
+ )
152
+
153
+
154
+ def get_tronfig_folder(cluster, soa_dir):
155
+ return os.path.join(soa_dir, "tron", cluster)
156
+
157
+
158
+ def load_tron_config():
159
+ return TronConfig(load_system_paasta_config().get_tron_config())
160
+
161
+
162
+ def get_tron_client():
163
+ return TronClient(load_tron_config().get_url())
164
+
165
+
166
+ def compose_instance(job, action):
167
+ return f"{job}{SPACER}{action}"
168
+
169
+
170
+ def decompose_instance(instance):
171
+ """Get (job_name, action_name) from an instance."""
172
+ decomposed = instance.split(SPACER)
173
+ if len(decomposed) != 2:
174
+ raise InvalidInstanceConfig("Invalid instance name: %s" % instance)
175
+ return (decomposed[0], decomposed[1])
176
+
177
+
178
+ def decompose_executor_id(executor_id) -> Tuple[str, str, int, str]:
179
+ """(service, job, run_number, action)"""
180
+ service, job, str_run_number, action, _ = executor_id.split(SPACER)
181
+ return (service, job, int(str_run_number), action)
182
+
183
+
184
+ class StringFormatter(Formatter):
185
+ def __init__(self, context=None):
186
+ Formatter.__init__(self)
187
+ self.context = context
188
+
189
+ def get_value(self, key, args, kwds):
190
+ if isinstance(key, str):
191
+ try:
192
+ return kwds[key]
193
+ except KeyError:
194
+ return self.context[key]
195
+ else:
196
+ return Formatter.get_value(key, args, kwds)
197
+
198
+
199
+ def parse_time_variables(command: str, parse_time: datetime.datetime = None) -> str:
200
+ """Parses an input string and uses the Tron-style dateparsing
201
+ to replace time variables. Currently supports only the date/time
202
+ variables listed in the tron documentation:
203
+ http://tron.readthedocs.io/en/latest/command_context.html#built-in-cc
204
+
205
+ :param input_string: input string to be parsed
206
+ :param parse_time: Reference Datetime object to parse the date and time strings, defaults to now.
207
+ :returns: A string with the date and time variables replaced
208
+ """
209
+ if parse_time is None:
210
+ parse_time = datetime.datetime.now()
211
+ # We build up a tron context object that has the right
212
+ # methods to parse tron-style time syntax
213
+ job_context = tron_command_context.JobRunContext(
214
+ tron_command_context.CommandContext()
215
+ )
216
+ # The tron context object needs the run_time attribute set so it knows
217
+ # how to interpret the date strings
218
+ job_context.job_run.run_time = parse_time
219
+ return StringFormatter(job_context).format(command)
220
+
221
+
222
+ def _get_tron_k8s_cluster_override(cluster: str) -> Optional[str]:
223
+ """
224
+ Return the name of a compute cluster if there's a different compute cluster that should be used to run a Tronjob.
225
+ Will return None if no override mapping is present
226
+
227
+ We have certain Tron masters that are named differently from the compute cluster that should actually be used (
228
+ e.g., we might have tron-XYZ-test-prod, but instead of scheduling on XYZ-test-prod, we'd like to schedule jobs
229
+ on test-prod).
230
+
231
+ To control this, we have an optional config item that we'll puppet onto Tron masters that need this type of
232
+ tron master -> compute cluster override which this function will read.
233
+ """
234
+ return (
235
+ load_system_paasta_config()
236
+ .get_tron_k8s_cluster_overrides()
237
+ .get(
238
+ cluster,
239
+ None,
240
+ )
241
+ )
242
+
243
+
244
+ def _spark_k8s_role() -> str:
245
+ return load_system_paasta_config().get_spark_k8s_role()
246
+
247
+
248
+ class TronActionConfigDict(InstanceConfigDict, total=False):
249
+ # this is kinda confusing: long-running stuff is currently using cmd
250
+ # ...but tron are using command - this is going to require a little
251
+ # maneuvering to unify
252
+ command: str
253
+ service_account_name: str
254
+ node_selectors: Dict[str, NodeSelectorConfig]
255
+
256
+ # the values for this dict can be anything since it's whatever
257
+ # spark accepts
258
+ spark_args: Dict[str, Any]
259
+ force_spark_resource_configs: bool
260
+ # TODO: TRON-2145: use this to implement timeout for non-spark actions in tron
261
+ max_runtime: str
262
+ mrjob: bool
263
+
264
+
265
+ class TronActionConfig(InstanceConfig):
266
+ config_dict: TronActionConfigDict
267
+ config_filename_prefix = "tron"
268
+
269
+ def __init__(
270
+ self,
271
+ service,
272
+ instance,
273
+ cluster,
274
+ config_dict,
275
+ branch_dict,
276
+ soa_dir=DEFAULT_SOA_DIR,
277
+ for_validation=False,
278
+ ):
279
+ super().__init__(
280
+ cluster=cluster,
281
+ instance=instance,
282
+ service=service,
283
+ config_dict=config_dict,
284
+ branch_dict=branch_dict,
285
+ soa_dir=soa_dir,
286
+ )
287
+ self.job, self.action = decompose_instance(instance)
288
+
289
+ # Indicate whether this config object is created for validation
290
+ self.for_validation = for_validation
291
+
292
+ self.action_spark_config = None
293
+ if self.get_executor() == "spark":
294
+ # build the complete Spark configuration
295
+ # TODO: add conditional check for Spark specific commands spark-submit, pyspark etc ?
296
+ self.action_spark_config = self.build_spark_config()
297
+
298
+ def get_cpus(self) -> float:
299
+ # set Spark driver pod CPU if it is specified by Spark arguments
300
+ if (
301
+ self.action_spark_config
302
+ and "spark.driver.cores" in self.action_spark_config
303
+ ):
304
+ return float(self.action_spark_config["spark.driver.cores"])
305
+ # we fall back to this default if there's no spark.driver.cores config
306
+ return super().get_cpus()
307
+
308
+ def get_mem(self) -> float:
309
+ # get Spark driver pod memory specified by Spark arguments
310
+ if self.action_spark_config:
311
+ return get_total_driver_memory_mb(self.action_spark_config)
312
+ # we fall back to this default if there's no Spark config
313
+ return super().get_mem()
314
+
315
+ def get_disk(self, default: float = 1024) -> float:
316
+ # increase default threshold for Spark driver pod memory because 1G is too low
317
+ if self.action_spark_config and "disk" not in self.config_dict:
318
+ return spark_tools.SPARK_DRIVER_DEFAULT_DISK_MB
319
+ # we fall back to this default if there's no Spark config
320
+ return super().get_disk()
321
+
322
+ def build_spark_config(self) -> Dict[str, str]:
323
+ system_paasta_config = load_system_paasta_config()
324
+ resolved_cluster = system_paasta_config.get_eks_cluster_aliases().get(
325
+ self.get_cluster(), self.get_cluster()
326
+ )
327
+ pool = self.get_spark_executor_pool()
328
+ try:
329
+ if not validate_pool(resolved_cluster, pool, system_paasta_config):
330
+ raise InvalidPoolError(
331
+ f"Job {self.get_service()}.{self.get_instance()}: "
332
+ f"pool '{pool}' is invalid for cluster '{resolved_cluster}'"
333
+ )
334
+ except PoolsNotConfiguredError:
335
+ log.warning(
336
+ f"Could not fetch allowed_pools for `{resolved_cluster}`. Skipping pool validation.\n"
337
+ )
338
+
339
+ spark_args = self.config_dict.get("spark_args", {})
340
+ # most of the service_configuration_lib function expected string values only
341
+ # so let's go ahead and convert the values now instead of once per-wrapper
342
+ stringified_spark_args = {
343
+ k: (str(v) if not isinstance(v, bool) else str(v).lower())
344
+ for k, v in spark_args.items()
345
+ }
346
+
347
+ spark_app_name = stringified_spark_args.get(
348
+ "spark.app.name",
349
+ f"tron_spark_{self.get_service()}_{self.get_instance()}",
350
+ )
351
+
352
+ spark_conf_builder = SparkConfBuilder(is_driver_on_k8s_tron=True)
353
+ spark_conf = spark_conf_builder.get_spark_conf(
354
+ cluster_manager="kubernetes",
355
+ spark_app_base_name=spark_app_name,
356
+ user_spark_opts=stringified_spark_args,
357
+ paasta_cluster=resolved_cluster,
358
+ paasta_pool=self.get_spark_executor_pool(),
359
+ paasta_service=self.get_service(),
360
+ paasta_instance=self.get_instance(),
361
+ docker_img=f"{self.get_docker_registry()}/$PAASTA_DOCKER_IMAGE",
362
+ extra_volumes=cast(
363
+ List[Mapping[str, str]],
364
+ self.get_volumes(
365
+ system_paasta_config.get_volumes(),
366
+ ),
367
+ ),
368
+ use_eks=True,
369
+ k8s_server_address=get_k8s_url_for_cluster(self.get_cluster()),
370
+ force_spark_resource_configs=self.config_dict.get(
371
+ "force_spark_resource_configs", False
372
+ ),
373
+ user=spark_tools.SPARK_TRON_JOB_USER,
374
+ )
375
+ # delete the dynamically generated spark.app.id to prevent frequent config updates in Tron.
376
+ # spark.app.id will be generated later by yelp spark-submit wrapper or Spark itself.
377
+ spark_conf.pop("spark.app.id", None)
378
+ # use a static spark.app.name to prevent frequent config updates in Tron.
379
+ # md5 and base64 will always generate the same encoding for a string.
380
+ # This spark.app.name might be overridden by yelp spark-submit wrapper.
381
+ if "spark.app.name" in spark_conf:
382
+ spark_conf["spark.app.name"] = limit_size_with_hash(
383
+ f"tron_spark_{self.get_service()}_{self.get_instance()}_{self.get_action_name()}"
384
+ if "spark.app.name" not in stringified_spark_args
385
+ else stringified_spark_args["spark.app.name"]
386
+ )
387
+
388
+ # TODO(MLCOMPUTE-1220): Remove this once dynamic pod template is generated inside the driver using spark-submit wrapper
389
+ if "spark.kubernetes.executor.podTemplateFile" in spark_conf:
390
+ log.info(
391
+ f"Replacing spark.kubernetes.executor.podTemplateFile="
392
+ f"{spark_conf['spark.kubernetes.executor.podTemplateFile']} with "
393
+ f"spark.kubernetes.executor.podTemplateFile={spark_tools.SPARK_DNS_POD_TEMPLATE}"
394
+ )
395
+ spark_conf[
396
+ "spark.kubernetes.executor.podTemplateFile"
397
+ ] = spark_tools.SPARK_DNS_POD_TEMPLATE
398
+
399
+ spark_conf.update(
400
+ {
401
+ "spark.hadoop.fs.s3a.aws.credentials.provider": spark_tools.SPARK_AWS_CREDS_PROVIDER,
402
+ "spark.driver.host": "$PAASTA_POD_IP",
403
+ }
404
+ )
405
+ spark_conf.setdefault(
406
+ "spark.kubernetes.executor.label.yelp.com/owner", self.get_team()
407
+ )
408
+
409
+ # We are using the Service Account created using the provided or default IAM role.
410
+ spark_conf[
411
+ "spark.kubernetes.authenticate.executor.serviceAccountName"
412
+ ] = get_service_account_name(
413
+ iam_role=self.get_spark_executor_iam_role(),
414
+ )
415
+
416
+ return spark_conf
417
+
418
+ def get_cmd(self):
419
+ command = self.config_dict.get("command")
420
+ return command
421
+
422
+ def get_job_name(self):
423
+ return self.job
424
+
425
+ def get_action_name(self):
426
+ return self.action
427
+
428
+ # mypy does not like the SecretVolume -> TronSecretVolume conversion, because TypedDict inheritence is broken.
429
+ # Until this is fixed, let's ignore this issue.
430
+ def get_secret_volumes(self) -> List[TronSecretVolume]: # type: ignore
431
+ """Adds the secret_volume_name to the object so tron/task_processing can load it downstream without replicating code."""
432
+ secret_volumes = super().get_secret_volumes()
433
+ tron_secret_volumes = []
434
+ for secret_volume in secret_volumes:
435
+ tron_secret_volume = TronSecretVolume(
436
+ secret_volume_name=self.get_secret_volume_name(
437
+ secret_volume["secret_name"]
438
+ ),
439
+ secret_name=secret_volume["secret_name"],
440
+ container_path=secret_volume["container_path"],
441
+ items=secret_volume.get("items", []),
442
+ )
443
+ # we have a different place where the default can come from (tron) and we don't want to insert the wrong default here
444
+ if "default_mode" in secret_volume:
445
+ tron_secret_volume["default_mode"] = secret_volume["default_mode"]
446
+
447
+ tron_secret_volumes.append(tron_secret_volume)
448
+ return tron_secret_volumes
449
+
450
+ def get_namespace(self) -> str:
451
+ """Get namespace from config, default to 'paasta'"""
452
+ return self.config_dict.get("namespace", KUBERNETES_NAMESPACE)
453
+
454
+ def get_secret_volume_name(self, secret_name: str) -> str:
455
+ service = (
456
+ self.service
457
+ if not is_shared_secret_from_secret_name(
458
+ soa_dir=self.soa_dir, secret_name=secret_name
459
+ )
460
+ else SHARED_SECRET_SERVICE
461
+ )
462
+ return get_paasta_secret_name(
463
+ self.get_namespace(),
464
+ service,
465
+ secret_name,
466
+ )
467
+
468
+ def get_deploy_group(self) -> Optional[str]:
469
+ return self.config_dict.get("deploy_group", None)
470
+
471
+ def get_docker_url(
472
+ self, system_paasta_config: Optional[SystemPaastaConfig] = None
473
+ ) -> str:
474
+ # It's okay for tronfig to contain things that aren't deployed yet - it's normal for developers to
475
+ # push tronfig well before the job is scheduled to run, and either they'll deploy the service before
476
+ # or get notified when the job fails.
477
+ #
478
+ # This logic ensures that we can still pass validation and run setup_tron_namespace even if
479
+ # there's nothing in deployments.json yet.
480
+ return (
481
+ ""
482
+ if not self.get_docker_image()
483
+ else super().get_docker_url(system_paasta_config=system_paasta_config)
484
+ )
485
+
486
+ def get_env(
487
+ self,
488
+ system_paasta_config: Optional["SystemPaastaConfig"] = None,
489
+ ) -> Dict[str, str]:
490
+ env = super().get_env(system_paasta_config=system_paasta_config)
491
+ if self.get_executor() == "spark":
492
+ # Required by some sdks like boto3 client. Throws NoRegionError otherwise.
493
+ # AWS_REGION takes precedence if set.
494
+ env["AWS_DEFAULT_REGION"] = DEFAULT_AWS_REGION
495
+ env["PAASTA_INSTANCE_TYPE"] = "spark"
496
+ # XXX: is this actually necessary? every PR that's added this hasn't really mentioned why,
497
+ # and Chesterton's Fence makes me very wary about removing it
498
+ env["SPARK_USER"] = "root"
499
+ # XXX: we were adding the commandline we were starting the Spark driver with to SPARK_OPTS
500
+ # before, but that doesn't really seem necessary from my testing (driver starts just fine)
501
+ # if this changes and we do need it - please add a comment about *why* we need it!
502
+ # XXX: update PAASTA_RESOURCE_* env vars to use the correct value from spark_args and set
503
+ # these to the correct values for the executors as part of the driver commandline
504
+
505
+ return env
506
+
507
+ def get_iam_role(self) -> str:
508
+ iam_role = super().get_iam_role()
509
+
510
+ if not iam_role and self.get_executor() == "spark":
511
+ iam_role = load_system_paasta_config().get_spark_driver_iam_role()
512
+
513
+ return iam_role
514
+
515
+ def get_spark_executor_iam_role(self) -> str:
516
+ return (
517
+ self.get_iam_role()
518
+ or load_system_paasta_config().get_spark_executor_iam_role()
519
+ )
520
+
521
+ def get_secret_env(self) -> Mapping[str, dict]:
522
+ base_env = self.config_dict.get("env", {})
523
+ secret_env = {}
524
+ for k, v in base_env.items():
525
+ if is_secret_ref(v):
526
+ secret = get_secret_name_from_ref(v)
527
+ service = (
528
+ self.service if not is_shared_secret(v) else SHARED_SECRET_SERVICE
529
+ )
530
+ secret_env[k] = {
531
+ "secret_name": get_paasta_secret_name(
532
+ self.get_namespace(),
533
+ service,
534
+ secret,
535
+ ),
536
+ "key": secret,
537
+ }
538
+ return secret_env
539
+
540
+ def get_field_selector_env(self) -> Dict[str, FieldSelectorConfig]:
541
+ # we're not expecting users to need to add any of these themselves, so for now
542
+ # we'll just hardcode the env vars we want to add by default
543
+ return {
544
+ "PAASTA_POD_IP": {
545
+ "field_path": "status.podIP",
546
+ }
547
+ }
548
+
549
+ def get_cpu_burst_add(self) -> float:
550
+ """For Tron jobs, we don't let them burst by default, because they
551
+ don't represent "real-time" workloads, and should not impact
552
+ neighbors"""
553
+ return self.config_dict.get("cpu_burst_add", 0)
554
+
555
+ def get_executor(self):
556
+ return self.config_dict.get("executor", "paasta")
557
+
558
+ def get_healthcheck_mode(self, _) -> None:
559
+ return None
560
+
561
+ def get_node(self):
562
+ return self.config_dict.get("node")
563
+
564
+ def get_retries(self):
565
+ return self.config_dict.get("retries")
566
+
567
+ def get_retries_delay(self):
568
+ return self.config_dict.get("retries_delay")
569
+
570
+ def get_requires(self):
571
+ return self.config_dict.get("requires")
572
+
573
+ def get_expected_runtime(self):
574
+ return self.config_dict.get("expected_runtime")
575
+
576
+ def get_triggered_by(self):
577
+ return self.config_dict.get("triggered_by", None)
578
+
579
+ def get_trigger_downstreams(self):
580
+ return self.config_dict.get("trigger_downstreams", None)
581
+
582
+ def get_on_upstream_rerun(self):
583
+ return self.config_dict.get("on_upstream_rerun", None)
584
+
585
+ def get_trigger_timeout(self):
586
+ return self.config_dict.get("trigger_timeout", None)
587
+
588
+ def get_node_selectors(self) -> Dict[str, str]:
589
+ raw_selectors: Dict[str, Any] = self.config_dict.get("node_selectors", {}) # type: ignore
590
+ node_selectors = {
591
+ to_node_label(label): value
592
+ for label, value in raw_selectors.items()
593
+ if isinstance(value, str)
594
+ }
595
+ node_selectors["yelp.com/pool"] = self.get_pool()
596
+ return node_selectors
597
+
598
+ def get_node_affinities(self) -> Optional[List[Dict[str, Union[str, List[str]]]]]:
599
+ """Converts deploy_whitelist and deploy_blacklist in node affinities.
600
+
601
+ NOTE: At the time of writing, `kubectl describe` does not show affinities,
602
+ only selectors. To see affinities, use `kubectl get pod -o json` instead.
603
+
604
+ WARNING: At the time of writing, we only used requiredDuringSchedulingIgnoredDuringExecution node affinities in Tron as we currently have
605
+ no use case for preferredDuringSchedulingIgnoredDuringExecution node affinities.
606
+ """
607
+ requirements = allowlist_denylist_to_requirements(
608
+ allowlist=self.get_deploy_whitelist(),
609
+ denylist=self.get_deploy_blacklist(),
610
+ )
611
+ node_selectors = self.config_dict.get("node_selectors", {})
612
+ requirements.extend(
613
+ raw_selectors_to_requirements(
614
+ raw_selectors=node_selectors,
615
+ )
616
+ )
617
+
618
+ system_paasta_config = load_system_paasta_config()
619
+ if system_paasta_config.get_enable_tron_tsc():
620
+ # PAASTA-18198: To improve AZ balance with Karpenter, we temporarily allow specifying zone affinities per pool
621
+ pool_node_affinities = system_paasta_config.get_pool_node_affinities()
622
+ if pool_node_affinities and self.get_pool() in pool_node_affinities:
623
+ current_pool_node_affinities = pool_node_affinities[self.get_pool()]
624
+ # If the service already has a node selector for a zone, we don't want to override it
625
+ if current_pool_node_affinities and not contains_zone_label(
626
+ node_selectors
627
+ ):
628
+ requirements.extend(
629
+ raw_selectors_to_requirements(
630
+ raw_selectors=current_pool_node_affinities,
631
+ )
632
+ )
633
+
634
+ if not requirements:
635
+ return None
636
+
637
+ return [
638
+ {"key": key, "operator": op, "value": value}
639
+ for key, op, value in requirements
640
+ ]
641
+
642
+ def get_calculated_constraints(self):
643
+ """Combine all configured Mesos constraints."""
644
+ constraints = self.get_constraints()
645
+ if constraints is not None:
646
+ return constraints
647
+ else:
648
+ constraints = self.get_extra_constraints()
649
+ constraints.extend(
650
+ self.get_deploy_constraints(
651
+ blacklist=self.get_deploy_blacklist(),
652
+ whitelist=self.get_deploy_whitelist(),
653
+ # Don't have configs for the paasta cluster
654
+ system_deploy_blacklist=[],
655
+ system_deploy_whitelist=None,
656
+ )
657
+ )
658
+ constraints.extend(self.get_pool_constraints())
659
+ return constraints
660
+
661
+ def get_nerve_namespace(self) -> None:
662
+ return None
663
+
664
+ def validate(self):
665
+ error_msgs = []
666
+ error_msgs.extend(super().validate())
667
+ # Tron is a little special, because it can *not* have a deploy group
668
+ # But only if an action is running via ssh and not via paasta
669
+ if (
670
+ self.get_deploy_group() is None
671
+ and self.get_executor() in MESOS_EXECUTOR_NAMES
672
+ ):
673
+ error_msgs.append(
674
+ f"{self.get_job_name()}.{self.get_action_name()} must have a deploy_group set"
675
+ )
676
+ # We are not allowing users to specify `cpus` and `mem` configuration if the action is a Spark job
677
+ # with driver running on k8s (executor: spark), because we derive these values from `spark.driver.cores`
678
+ # and `spark.driver.memory` in order to avoid confusion.
679
+ if self.get_executor() == "spark":
680
+ if "cpus" in self.config_dict:
681
+ error_msgs.append(
682
+ f"{self.get_job_name()}.{self.get_action_name()} is a Spark job. `cpus` config is not allowed. "
683
+ f"Please specify the driver cores using `spark.driver.cores`."
684
+ )
685
+ if "mem" in self.config_dict:
686
+ error_msgs.append(
687
+ f"{self.get_job_name()}.{self.get_action_name()} is a Spark job. `mem` config is not allowed. "
688
+ f"Please specify the driver memory using `spark.driver.memory`."
689
+ )
690
+ return error_msgs
691
+
692
+ def get_pool(self) -> str:
693
+ """
694
+ Returns the default pool override if pool is not defined in the action configuration.
695
+
696
+ This is useful for environments like spam to allow us to default the pool to spam but allow users to
697
+ override this value. To control this, we have an optional config item that we'll puppet onto Tron masters
698
+ which this function will read.
699
+ """
700
+ if self.get_executor() == "spark":
701
+ pool = load_system_paasta_config().get_default_spark_driver_pool_override()
702
+ else:
703
+ pool = self.config_dict.get(
704
+ "pool", load_system_paasta_config().get_tron_default_pool_override()
705
+ )
706
+
707
+ return pool
708
+
709
+ def get_spark_executor_pool(self) -> str:
710
+ return self.config_dict.get("pool", DEFAULT_SPARK_EXECUTOR_POOL)
711
+
712
+ def get_service_account_name(self) -> Optional[str]:
713
+ return self.config_dict.get("service_account_name")
714
+
715
+ def get_projected_sa_volumes(self) -> Optional[List[ProjectedSAVolume]]:
716
+ projected_volumes = add_volumes_for_authenticating_services(
717
+ service_name=self.service,
718
+ config_volumes=super().get_projected_sa_volumes(),
719
+ soa_dir=self.soa_dir,
720
+ )
721
+ return projected_volumes if projected_volumes else None
722
+
723
+
724
+ class TronJobConfig:
725
+ """Represents a job in Tron, consisting of action(s) and job-level configuration values."""
726
+
727
+ def __init__(
728
+ self,
729
+ name: str,
730
+ config_dict: Dict[str, Any],
731
+ cluster: str,
732
+ service: Optional[str] = None,
733
+ load_deployments: bool = True,
734
+ soa_dir: str = DEFAULT_SOA_DIR,
735
+ for_validation: bool = False,
736
+ ) -> None:
737
+ self.name = name
738
+ self.config_dict = config_dict
739
+ self.cluster = cluster
740
+ self.service = service
741
+ self.load_deployments = load_deployments
742
+ self.soa_dir = soa_dir
743
+ # Indicate whether this config object is created for validation
744
+ self.for_validation = for_validation
745
+
746
+ def get_name(self):
747
+ return self.name
748
+
749
+ def get_node(self):
750
+ return self.config_dict.get("node", "paasta")
751
+
752
+ def get_schedule(self):
753
+ return self.config_dict.get("schedule")
754
+
755
+ def get_cron_expression(self) -> Optional[str]:
756
+ schedule = self.config_dict.get("schedule")
757
+ # TODO(TRON-1746): once we simplify this format, we can clean this code up
758
+ if (
759
+ isinstance(schedule, dict)
760
+ and "type" in schedule
761
+ and schedule["type"] == "cron"
762
+ ):
763
+ return schedule["value"]
764
+ elif isinstance(schedule, str) and schedule.startswith("cron"):
765
+ # most cron parsers won't understand our schedule tag, so we need to strip
766
+ # that off before passing it to anything else
767
+ return schedule.replace("cron", "")
768
+
769
+ return None
770
+
771
+ def get_monitoring(self):
772
+ srv_monitoring = dict(
773
+ monitoring_tools.read_monitoring_config(self.service, soa_dir=self.soa_dir)
774
+ )
775
+ tron_monitoring = self.config_dict.get("monitoring", {})
776
+ srv_monitoring.update(tron_monitoring)
777
+ # filter out non-tron monitoring keys
778
+ srv_monitoring = {
779
+ k: v for k, v in srv_monitoring.items() if k in VALID_MONITORING_KEYS
780
+ }
781
+ return srv_monitoring
782
+
783
+ def get_queueing(self):
784
+ return self.config_dict.get("queueing")
785
+
786
+ def get_run_limit(self):
787
+ return self.config_dict.get("run_limit")
788
+
789
+ def get_all_nodes(self):
790
+ return self.config_dict.get("all_nodes")
791
+
792
+ def get_enabled(self):
793
+ return self.config_dict.get("enabled")
794
+
795
+ def get_allow_overlap(self):
796
+ return self.config_dict.get("allow_overlap")
797
+
798
+ def get_max_runtime(self):
799
+ return self.config_dict.get("max_runtime")
800
+
801
+ def get_time_zone(self):
802
+ return self.config_dict.get("time_zone")
803
+
804
+ def get_service(self) -> Optional[str]:
805
+ return self.service or self.config_dict.get("service")
806
+
807
+ def get_deploy_group(self) -> Optional[str]:
808
+ return self.config_dict.get("deploy_group", None)
809
+
810
+ def get_cluster(self):
811
+ return self.cluster
812
+
813
+ def get_expected_runtime(self):
814
+ return self.config_dict.get("expected_runtime")
815
+
816
+ def _get_action_config(self, action_name, action_dict) -> TronActionConfig:
817
+ action_service = action_dict.setdefault("service", self.get_service())
818
+ action_deploy_group = action_dict.setdefault(
819
+ "deploy_group", self.get_deploy_group()
820
+ )
821
+ if action_service and action_deploy_group and self.load_deployments:
822
+ try:
823
+ deployments_json = load_v2_deployments_json(
824
+ service=action_service, soa_dir=self.soa_dir
825
+ )
826
+ branch_dict = {
827
+ "docker_image": deployments_json.get_docker_image_for_deploy_group(
828
+ action_deploy_group
829
+ ),
830
+ "git_sha": deployments_json.get_git_sha_for_deploy_group(
831
+ action_deploy_group
832
+ ),
833
+ "image_version": deployments_json.get_image_version_for_deploy_group(
834
+ action_deploy_group
835
+ ),
836
+ # TODO: add Tron instances when generating deployments json
837
+ "desired_state": "start",
838
+ "force_bounce": None,
839
+ }
840
+ except NoDeploymentsAvailable:
841
+ log.warning(
842
+ f'Docker image unavailable for {action_service}.{self.get_name()}.{action_dict.get("name")}'
843
+ " is it deployed yet?"
844
+ )
845
+
846
+ if self.soa_dir != DEFAULT_SOA_DIR:
847
+ log.warning(
848
+ f"Error: No deployments.json found in {self.soa_dir}/{action_service}. "
849
+ "You can generate this by running: "
850
+ f"generate_deployments_for_service -d {self.soa_dir} -s {action_service}"
851
+ )
852
+
853
+ branch_dict = None
854
+ else:
855
+ branch_dict = None
856
+ action_dict["monitoring"] = self.get_monitoring()
857
+
858
+ cluster_override = _get_tron_k8s_cluster_override(self.get_cluster())
859
+ return TronActionConfig(
860
+ service=action_service,
861
+ instance=compose_instance(self.get_name(), action_name),
862
+ cluster=cluster_override or self.get_cluster(),
863
+ config_dict=action_dict,
864
+ branch_dict=branch_dict,
865
+ soa_dir=self.soa_dir,
866
+ for_validation=self.for_validation,
867
+ )
868
+
869
+ def get_actions(self) -> List[TronActionConfig]:
870
+ actions = self.config_dict.get("actions")
871
+ return [
872
+ self._get_action_config(name, action_dict)
873
+ for name, action_dict in actions.items()
874
+ ]
875
+
876
+ def get_cleanup_action(self):
877
+ action_dict = self.config_dict.get("cleanup_action")
878
+ if not action_dict:
879
+ return None
880
+
881
+ # TODO: we should keep this trickery outside paasta repo
882
+ return self._get_action_config("cleanup", action_dict)
883
+
884
+ def check_monitoring(self) -> Tuple[bool, str]:
885
+ monitoring = self.get_monitoring()
886
+ valid_teams = list_teams()
887
+ if monitoring is not None:
888
+ team_name = monitoring.get("team", None)
889
+ if team_name is None:
890
+ return False, "Team name is required for monitoring"
891
+ elif team_name not in valid_teams:
892
+ suggest_teams = difflib.get_close_matches(
893
+ word=team_name, possibilities=valid_teams
894
+ )
895
+ return (
896
+ False,
897
+ f"Invalid team name: {team_name}. Do you mean one of these: {suggest_teams}",
898
+ )
899
+ return True, ""
900
+
901
+ def check_actions(self) -> Tuple[bool, List[str]]:
902
+ actions = self.get_actions()
903
+ cleanup_action = self.get_cleanup_action()
904
+ if cleanup_action:
905
+ actions.append(cleanup_action)
906
+
907
+ checks_passed = True
908
+ msgs: List[str] = []
909
+ for action in actions:
910
+ action_msgs = action.validate()
911
+ if action_msgs:
912
+ checks_passed = False
913
+ msgs.extend(action_msgs)
914
+ return checks_passed, msgs
915
+
916
+ def validate(self) -> List[str]:
917
+ _, error_msgs = self.check_actions()
918
+ checks = ["check_monitoring"]
919
+ for check in checks:
920
+ check_passed, check_msg = getattr(self, check)()
921
+ if not check_passed:
922
+ error_msgs.append(check_msg)
923
+ return error_msgs
924
+
925
+ def __eq__(self, other):
926
+ if isinstance(other, type(self)):
927
+ return self.config_dict == other.config_dict
928
+ return False
929
+
930
+
931
+ def format_volumes(paasta_volume_list):
932
+ return [
933
+ {
934
+ "container_path": v["containerPath"],
935
+ "host_path": v["hostPath"],
936
+ "mode": v["mode"],
937
+ }
938
+ for v in paasta_volume_list
939
+ ]
940
+
941
+
942
+ def format_master_config(master_config, default_volumes, dockercfg_location):
943
+ mesos_options = master_config.get("mesos_options", {})
944
+ mesos_options.update(
945
+ {
946
+ "default_volumes": format_volumes(default_volumes),
947
+ "dockercfg_location": dockercfg_location,
948
+ }
949
+ )
950
+ master_config["mesos_options"] = mesos_options
951
+
952
+ k8s_options = master_config.get("k8s_options", {})
953
+ if k8s_options:
954
+ # Only add default volumes if we already have k8s_options
955
+ k8s_options.update(
956
+ {
957
+ "default_volumes": format_volumes(default_volumes),
958
+ }
959
+ )
960
+ master_config["k8s_options"] = k8s_options
961
+ return master_config
962
+
963
+
964
+ def format_tron_action_dict(action_config: TronActionConfig):
965
+ """Generate a dict of tronfig for an action, from the TronActionConfig.
966
+
967
+ :param action_config: TronActionConfig
968
+ """
969
+ executor = action_config.get_executor()
970
+ result = {
971
+ "command": action_config.get_cmd(),
972
+ "executor": executor,
973
+ "requires": action_config.get_requires(),
974
+ "node": action_config.get_node(),
975
+ "retries": action_config.get_retries(),
976
+ "retries_delay": action_config.get_retries_delay(),
977
+ "secret_volumes": action_config.get_secret_volumes(),
978
+ "expected_runtime": action_config.get_expected_runtime(),
979
+ "trigger_downstreams": action_config.get_trigger_downstreams(),
980
+ "triggered_by": action_config.get_triggered_by(),
981
+ "on_upstream_rerun": action_config.get_on_upstream_rerun(),
982
+ "trigger_timeout": action_config.get_trigger_timeout(),
983
+ # outside of Spark use-cases, we also allow users to specify an expected-to-exist Service Account name
984
+ # in the Tron namespace in case an action needs specific k8s permissions (e.g., a Jolt batch may need
985
+ # k8s permissions to list Jolt pods in the jolt namespace to do science™ to them).
986
+ # if the provided Service Account does not exist, Tron should simply fail to create the Podspec and report
987
+ # a failure
988
+ # NOTE: this will get overridden if an action specifies Pod Identity configs
989
+ "service_account_name": action_config.get_service_account_name(),
990
+ }
991
+
992
+ # we need this loaded in several branches, so we'll load it once at the start to simplify things
993
+ system_paasta_config = load_system_paasta_config()
994
+
995
+ if executor in KUBERNETES_EXECUTOR_NAMES:
996
+ # we'd like Tron to be able to distinguish between spark and normal actions
997
+ # even though they both run on k8s
998
+ result["executor"] = EXECUTOR_NAME_TO_TRON_EXECUTOR_TYPE.get(
999
+ executor, "kubernetes"
1000
+ )
1001
+
1002
+ result["secret_env"] = action_config.get_secret_env()
1003
+ result["field_selector_env"] = action_config.get_field_selector_env()
1004
+ all_env = action_config.get_env()
1005
+ # For k8s, we do not want secret envvars to be duplicated in both `env` and `secret_env`
1006
+ # or for field selector env vars to be overwritten
1007
+ result["env"] = {
1008
+ k: v
1009
+ for k, v in all_env.items()
1010
+ if not is_secret_ref(v) and k not in result["field_selector_env"]
1011
+ }
1012
+ result["env"]["ENABLE_PER_INSTANCE_LOGSPOUT"] = "1"
1013
+ result["node_selectors"] = action_config.get_node_selectors()
1014
+ result["node_affinities"] = action_config.get_node_affinities()
1015
+
1016
+ if system_paasta_config.get_enable_tron_tsc():
1017
+ # XXX: this is currently hardcoded since we should only really need TSC for zone-aware scheduling
1018
+ result["topology_spread_constraints"] = [
1019
+ {
1020
+ # try to evenly spread pods across specified topology
1021
+ "max_skew": 1,
1022
+ # narrow down what pods to consider when spreading
1023
+ "label_selector": {
1024
+ # only consider pods that are managed by tron
1025
+ "app.kubernetes.io/managed-by": "tron",
1026
+ # and in the same pool
1027
+ "paasta.yelp.com/pool": action_config.get_pool(),
1028
+ },
1029
+ # now, spread across AZs
1030
+ "topology_key": "topology.kubernetes.io/zone",
1031
+ # but if not possible, schedule even with a zonal imbalance
1032
+ "when_unsatisfiable": "ScheduleAnyway",
1033
+ },
1034
+ ]
1035
+
1036
+ # XXX: once we're off mesos we can make get_cap_* return just the cap names as a list
1037
+ result["cap_add"] = [cap["value"] for cap in action_config.get_cap_add()]
1038
+ result["cap_drop"] = [cap["value"] for cap in action_config.get_cap_drop()]
1039
+
1040
+ result["labels"] = {
1041
+ "paasta.yelp.com/cluster": action_config.get_cluster(),
1042
+ "paasta.yelp.com/pool": action_config.get_pool(),
1043
+ "paasta.yelp.com/service": action_config.get_service(),
1044
+ "paasta.yelp.com/instance": limit_size_with_hash(
1045
+ action_config.get_instance(),
1046
+ limit=63,
1047
+ suffix=4,
1048
+ ),
1049
+ # XXX: should this be different for Spark drivers launched by Tron?
1050
+ "app.kubernetes.io/managed-by": "tron",
1051
+ }
1052
+
1053
+ result["annotations"] = {
1054
+ # we can hardcode this for now as batches really shouldn't
1055
+ # need routable IPs and we know that Spark does.
1056
+ "paasta.yelp.com/routable_ip": "true" if executor == "spark" else "false",
1057
+ # we have a large amount of tron pods whose instance names are too long for a k8s label
1058
+ # ...so let's toss them into an annotation so that tooling can read them (since the length
1059
+ # limit is much higher (256kb))
1060
+ "paasta.yelp.com/service": action_config.get_service(),
1061
+ "paasta.yelp.com/instance": action_config.get_instance(),
1062
+ }
1063
+
1064
+ result["labels"]["yelp.com/owner"] = "compute_infra_platform_experience"
1065
+
1066
+ if (
1067
+ action_config.get_iam_role_provider() == "aws"
1068
+ and action_config.get_iam_role()
1069
+ ):
1070
+ # this service account will be used for normal Tron batches as well as for Spark drivers
1071
+ result["service_account_name"] = get_service_account_name(
1072
+ iam_role=action_config.get_iam_role(),
1073
+ k8s_role=None,
1074
+ )
1075
+
1076
+ # service account token volumes for service authentication
1077
+ result["projected_sa_volumes"] = action_config.get_projected_sa_volumes()
1078
+
1079
+ # XXX: now that we're actually passing through extra_volumes correctly (e.g., using get_volumes()),
1080
+ # we can get rid of the default_volumes from the Tron master config
1081
+ extra_volumes = action_config.get_volumes(
1082
+ system_paasta_config.get_volumes(),
1083
+ )
1084
+ if executor == "spark":
1085
+ is_mrjob = action_config.config_dict.get("mrjob", False)
1086
+ # inject additional Spark configs in case of Spark commands
1087
+ result["command"] = spark_tools.build_spark_command(
1088
+ result["command"],
1089
+ action_config.action_spark_config,
1090
+ is_mrjob,
1091
+ action_config.config_dict.get(
1092
+ "max_runtime", spark_tools.DEFAULT_SPARK_RUNTIME_TIMEOUT
1093
+ ),
1094
+ silent=True,
1095
+ )
1096
+ # point to the KUBECONFIG needed by Spark driver
1097
+ result["env"]["KUBECONFIG"] = system_paasta_config.get_spark_kubeconfig()
1098
+
1099
+ # spark, unlike normal batches, needs to expose several ports for things like the spark
1100
+ # ui and for executor->driver communication
1101
+ result["ports"] = list(
1102
+ set(
1103
+ spark_tools.get_spark_ports_from_config(
1104
+ action_config.action_spark_config
1105
+ )
1106
+ )
1107
+ )
1108
+ # mount KUBECONFIG file for Spark drivers to communicate with EKS cluster
1109
+ extra_volumes.append(
1110
+ DockerVolume(
1111
+ {
1112
+ "containerPath": system_paasta_config.get_spark_kubeconfig(),
1113
+ "hostPath": system_paasta_config.get_spark_kubeconfig(),
1114
+ "mode": "RO",
1115
+ }
1116
+ )
1117
+ )
1118
+ # Add pod annotations and labels for Spark monitoring metrics
1119
+ monitoring_annotations = (
1120
+ spark_tools.get_spark_driver_monitoring_annotations(
1121
+ action_config.action_spark_config
1122
+ )
1123
+ )
1124
+ monitoring_labels = spark_tools.get_spark_driver_monitoring_labels(
1125
+ action_config.action_spark_config,
1126
+ user=spark_tools.SPARK_TRON_JOB_USER,
1127
+ )
1128
+ result["annotations"].update(monitoring_annotations)
1129
+ result["labels"].update(monitoring_labels)
1130
+
1131
+ elif executor in MESOS_EXECUTOR_NAMES:
1132
+ result["executor"] = "mesos"
1133
+ constraint_labels = ["attribute", "operator", "value"]
1134
+ result["constraints"] = [
1135
+ dict(zip(constraint_labels, constraint))
1136
+ for constraint in action_config.get_calculated_constraints()
1137
+ ]
1138
+ result["docker_parameters"] = [
1139
+ {"key": param["key"], "value": param["value"]}
1140
+ for param in action_config.format_docker_parameters()
1141
+ ]
1142
+ result["env"] = action_config.get_env()
1143
+
1144
+ # the following config is only valid for k8s/Mesos since we're not running SSH actions
1145
+ # in a containerized fashion
1146
+ if executor in (KUBERNETES_EXECUTOR_NAMES + MESOS_EXECUTOR_NAMES):
1147
+ result["cpus"] = action_config.get_cpus()
1148
+ result["mem"] = action_config.get_mem()
1149
+ result["disk"] = action_config.get_disk()
1150
+ result["extra_volumes"] = format_volumes(extra_volumes)
1151
+ result["docker_image"] = action_config.get_docker_url()
1152
+
1153
+ # Only pass non-None values, so Tron will use defaults for others
1154
+ return {key: val for key, val in result.items() if val is not None}
1155
+
1156
+
1157
+ def format_tron_job_dict(job_config: TronJobConfig, k8s_enabled: bool = False):
1158
+ """Generate a dict of tronfig for a job, from the TronJobConfig.
1159
+
1160
+ :param job_config: TronJobConfig
1161
+ """
1162
+ action_dict = {
1163
+ action_config.get_action_name(): format_tron_action_dict(
1164
+ action_config=action_config,
1165
+ )
1166
+ for action_config in job_config.get_actions()
1167
+ }
1168
+
1169
+ result = {
1170
+ "node": job_config.get_node(),
1171
+ "schedule": job_config.get_schedule(),
1172
+ "actions": action_dict,
1173
+ "monitoring": job_config.get_monitoring(),
1174
+ "queueing": job_config.get_queueing(),
1175
+ "run_limit": job_config.get_run_limit(),
1176
+ "all_nodes": job_config.get_all_nodes(),
1177
+ "enabled": job_config.get_enabled(),
1178
+ "allow_overlap": job_config.get_allow_overlap(),
1179
+ "max_runtime": job_config.get_max_runtime(),
1180
+ "time_zone": job_config.get_time_zone(),
1181
+ "expected_runtime": job_config.get_expected_runtime(),
1182
+ }
1183
+
1184
+ cleanup_config = job_config.get_cleanup_action()
1185
+ if cleanup_config:
1186
+ cleanup_action = format_tron_action_dict(
1187
+ action_config=cleanup_config,
1188
+ )
1189
+ result["cleanup_action"] = cleanup_action
1190
+
1191
+ # Only pass non-None values, so Tron will use defaults for others
1192
+ return {key: val for key, val in result.items() if val is not None}
1193
+
1194
+
1195
+ def load_tron_instance_config(
1196
+ service: str,
1197
+ instance: str,
1198
+ cluster: str,
1199
+ load_deployments: bool = True,
1200
+ soa_dir: str = DEFAULT_SOA_DIR,
1201
+ ) -> TronActionConfig:
1202
+ for action in load_tron_instance_configs(
1203
+ service=service,
1204
+ cluster=cluster,
1205
+ load_deployments=load_deployments,
1206
+ soa_dir=soa_dir,
1207
+ ):
1208
+ if action.get_instance() == instance:
1209
+ return action
1210
+ raise NoConfigurationForServiceError(
1211
+ f"No tron configuration found for {service} {instance}"
1212
+ )
1213
+
1214
+
1215
+ @time_cache(ttl=5)
1216
+ def load_tron_instance_configs(
1217
+ service: str,
1218
+ cluster: str,
1219
+ load_deployments: bool = True,
1220
+ soa_dir: str = DEFAULT_SOA_DIR,
1221
+ ) -> Tuple[TronActionConfig, ...]:
1222
+ ret: List[TronActionConfig] = []
1223
+
1224
+ jobs = load_tron_service_config(
1225
+ service=service,
1226
+ cluster=cluster,
1227
+ load_deployments=load_deployments,
1228
+ soa_dir=soa_dir,
1229
+ )
1230
+
1231
+ for job in jobs:
1232
+ ret.extend(job.get_actions())
1233
+
1234
+ return tuple(ret)
1235
+
1236
+
1237
+ @time_cache(ttl=5)
1238
+ def load_tron_service_config(
1239
+ service,
1240
+ cluster,
1241
+ load_deployments=True,
1242
+ soa_dir=DEFAULT_SOA_DIR,
1243
+ for_validation=False,
1244
+ ):
1245
+ return load_tron_service_config_no_cache(
1246
+ service,
1247
+ cluster,
1248
+ load_deployments,
1249
+ soa_dir,
1250
+ for_validation,
1251
+ )
1252
+
1253
+
1254
+ def load_tron_service_config_no_cache(
1255
+ service,
1256
+ cluster,
1257
+ load_deployments=True,
1258
+ soa_dir=DEFAULT_SOA_DIR,
1259
+ for_validation=False,
1260
+ ):
1261
+ """Load all configured jobs for a service, and any additional config values."""
1262
+ config = read_extra_service_information(
1263
+ service_name=service, extra_info=f"tron-{cluster}", soa_dir=soa_dir
1264
+ )
1265
+ jobs = filter_templates_from_config(config)
1266
+ job_configs = [
1267
+ TronJobConfig(
1268
+ name=name,
1269
+ service=service,
1270
+ cluster=cluster,
1271
+ config_dict=job,
1272
+ load_deployments=load_deployments,
1273
+ soa_dir=soa_dir,
1274
+ for_validation=for_validation,
1275
+ )
1276
+ for name, job in jobs.items()
1277
+ ]
1278
+ return job_configs
1279
+
1280
+
1281
+ def create_complete_master_config(cluster, soa_dir=DEFAULT_SOA_DIR):
1282
+ system_paasta_config = load_system_paasta_config()
1283
+ tronfig_folder = get_tronfig_folder(soa_dir=soa_dir, cluster=cluster)
1284
+ config = read_yaml_file(os.path.join(tronfig_folder, f"MASTER.yaml"))
1285
+ master_config = format_master_config(
1286
+ config,
1287
+ system_paasta_config.get_volumes(),
1288
+ system_paasta_config.get_dockercfg_location(),
1289
+ )
1290
+ return yaml.dump(master_config, Dumper=Dumper, default_flow_style=False)
1291
+
1292
+
1293
+ def create_complete_config(
1294
+ service: str,
1295
+ cluster: str,
1296
+ soa_dir: str = DEFAULT_SOA_DIR,
1297
+ k8s_enabled: bool = False,
1298
+ dry_run: bool = False,
1299
+ ):
1300
+ """Generate a namespace configuration file for Tron, for a service."""
1301
+ job_configs = load_tron_service_config(
1302
+ service=service,
1303
+ cluster=cluster,
1304
+ load_deployments=True,
1305
+ soa_dir=soa_dir,
1306
+ for_validation=dry_run,
1307
+ )
1308
+ preproccessed_config = {}
1309
+ preproccessed_config["jobs"] = {
1310
+ job_config.get_name(): format_tron_job_dict(
1311
+ job_config=job_config, k8s_enabled=k8s_enabled
1312
+ )
1313
+ for job_config in job_configs
1314
+ }
1315
+ return yaml.dump(preproccessed_config, Dumper=Dumper, default_flow_style=False)
1316
+
1317
+
1318
+ def validate_complete_config(
1319
+ service: str, cluster: str, soa_dir: str = DEFAULT_SOA_DIR
1320
+ ) -> List[str]:
1321
+ job_configs = load_tron_service_config(
1322
+ service=service,
1323
+ cluster=cluster,
1324
+ load_deployments=False,
1325
+ soa_dir=soa_dir,
1326
+ for_validation=True,
1327
+ )
1328
+
1329
+ # PaaSTA-specific validation
1330
+ for job_config in job_configs:
1331
+ check_msgs = job_config.validate()
1332
+ if check_msgs:
1333
+ return check_msgs
1334
+
1335
+ master_config_path = os.path.join(
1336
+ os.path.abspath(soa_dir), "tron", cluster, MASTER_NAMESPACE + ".yaml"
1337
+ )
1338
+
1339
+ # TODO: remove creating the master config here once we're fully off of mesos
1340
+ # since we only have it here to verify that the generated tronfig will be valid
1341
+ # given that the kill-switch will affect PaaSTA's setup_tron_namespace script (we're
1342
+ # not reading the kill-switch in Tron since it's not easily accessible at the point
1343
+ # at which we'd like to fallback to Mesos if toggled)
1344
+ master_config = yaml.safe_load(
1345
+ create_complete_master_config(cluster=cluster, soa_dir=soa_dir)
1346
+ )
1347
+ k8s_enabled_for_cluster = master_config.get("k8s_options", {}).get("enabled", False)
1348
+
1349
+ preproccessed_config = {}
1350
+ # Use Tronfig on generated config from PaaSTA to validate the rest
1351
+ preproccessed_config["jobs"] = {
1352
+ job_config.get_name(): format_tron_job_dict(
1353
+ job_config=job_config, k8s_enabled=k8s_enabled_for_cluster
1354
+ )
1355
+ for job_config in job_configs
1356
+ }
1357
+
1358
+ complete_config = yaml.dump(preproccessed_config, Dumper=Dumper)
1359
+
1360
+ proc = subprocess.run(
1361
+ ["tronfig", "-", "-V", "-n", service, "-m", master_config_path],
1362
+ input=complete_config,
1363
+ stdout=subprocess.PIPE,
1364
+ stderr=subprocess.PIPE,
1365
+ encoding="utf-8",
1366
+ )
1367
+
1368
+ if proc.returncode != 0:
1369
+ process_errors = proc.stderr.strip()
1370
+ if process_errors: # Error running tronfig
1371
+ print(proc.stderr)
1372
+ return [proc.stdout.strip()]
1373
+
1374
+ return []
1375
+
1376
+
1377
+ def _is_valid_namespace(job: Any, tron_executors: List[str]) -> bool:
1378
+ for action_info in job.get("actions", {}).values():
1379
+ if action_info.get("executor", "paasta") in tron_executors:
1380
+ return True
1381
+ return False
1382
+
1383
+
1384
+ def get_tron_namespaces(
1385
+ cluster: str,
1386
+ soa_dir: str,
1387
+ tron_executors: List[str] = EXECUTOR_TYPES,
1388
+ ) -> List[str]:
1389
+ tron_config_file = f"tron-{cluster}.yaml"
1390
+ config_dirs = [
1391
+ _dir[0]
1392
+ for _dir in os.walk(os.path.abspath(soa_dir))
1393
+ if tron_config_file in _dir[2]
1394
+ ]
1395
+ namespaces = [os.path.split(config_dir)[1] for config_dir in config_dirs]
1396
+ tron_namespaces = set()
1397
+ for namespace in namespaces:
1398
+ config = filter_templates_from_config(
1399
+ read_extra_service_information(
1400
+ namespace,
1401
+ extra_info=f"tron-{cluster}",
1402
+ soa_dir=soa_dir,
1403
+ deepcopy=False,
1404
+ )
1405
+ )
1406
+ for job in config.values():
1407
+ if _is_valid_namespace(job, tron_executors):
1408
+ tron_namespaces.add(namespace)
1409
+ break
1410
+ return list(tron_namespaces)
1411
+
1412
+
1413
+ def list_tron_clusters(service: str, soa_dir: str = DEFAULT_SOA_DIR) -> List[str]:
1414
+ """Returns the Tron clusters a service is configured to deploy to."""
1415
+ search_re = r"/tron-([0-9a-z-_]*)\.yaml$"
1416
+ service_dir = os.path.join(soa_dir, service)
1417
+ clusters = []
1418
+ for filename in glob.glob(f"{service_dir}/*.yaml"):
1419
+ cluster_re_match = re.search(search_re, filename)
1420
+ if cluster_re_match is not None:
1421
+ clusters.append(cluster_re_match.group(1))
1422
+ return clusters
1423
+
1424
+
1425
+ def get_tron_dashboard_for_cluster(cluster: str):
1426
+ dashboards = load_system_paasta_config().get_dashboard_links()[cluster]
1427
+ if "Tron" not in dashboards:
1428
+ raise Exception(f"tron api endpoint is not defined for cluster {cluster}")
1429
+ return dashboards["Tron"]
1430
+
1431
+
1432
+ def tron_jobs_running_here() -> List[Tuple[str, str, int]]:
1433
+ return mesos_services_running_here(
1434
+ framework_filter=lambda fw: fw["name"].startswith("tron"),
1435
+ parse_service_instance_from_executor_id=parse_service_instance_from_executor_id,
1436
+ )
1437
+
1438
+
1439
+ def parse_service_instance_from_executor_id(task_id: str) -> Tuple[str, str]:
1440
+ """Parses tron mesos task ids, like schematizer.traffic_generator.28414.turnstyle.46da87d7-6092-4ed4-b926-ffa7b21c7785"""
1441
+ try:
1442
+ service, job, job_run, action, uuid = task_id.split(".")
1443
+ except Exception as e:
1444
+ log.warning(
1445
+ f"Couldn't parse the mesos task id into a valid tron job: {task_id}: {e}"
1446
+ )
1447
+ service, job, action = "unknown_service", "unknown_job", "unknown_action"
1448
+ return service, f"{job}.{action}"