qontract-reconcile 0.9.1rc298__py3-none-any.whl → 0.10.1.dev1203__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (843) hide show
  1. qontract_reconcile-0.10.1.dev1203.dist-info/METADATA +500 -0
  2. qontract_reconcile-0.10.1.dev1203.dist-info/RECORD +771 -0
  3. {qontract_reconcile-0.9.1rc298.dist-info → qontract_reconcile-0.10.1.dev1203.dist-info}/WHEEL +1 -2
  4. {qontract_reconcile-0.9.1rc298.dist-info → qontract_reconcile-0.10.1.dev1203.dist-info}/entry_points.txt +4 -2
  5. reconcile/acs_notifiers.py +126 -0
  6. reconcile/acs_policies.py +243 -0
  7. reconcile/acs_rbac.py +596 -0
  8. reconcile/aus/advanced_upgrade_service.py +621 -8
  9. reconcile/aus/aus_label_source.py +115 -0
  10. reconcile/aus/base.py +1053 -353
  11. reconcile/{utils → aus}/cluster_version_data.py +27 -12
  12. reconcile/aus/healthchecks.py +77 -0
  13. reconcile/aus/metrics.py +158 -0
  14. reconcile/aus/models.py +245 -5
  15. reconcile/aus/node_pool_spec.py +35 -0
  16. reconcile/aus/ocm_addons_upgrade_scheduler_org.py +225 -110
  17. reconcile/aus/ocm_upgrade_scheduler.py +76 -71
  18. reconcile/aus/ocm_upgrade_scheduler_org.py +81 -23
  19. reconcile/aus/version_gate_approver.py +204 -0
  20. reconcile/aus/version_gates/__init__.py +12 -0
  21. reconcile/aus/version_gates/handler.py +33 -0
  22. reconcile/aus/version_gates/ingress_gate_handler.py +32 -0
  23. reconcile/aus/version_gates/ocp_gate_handler.py +26 -0
  24. reconcile/aus/version_gates/sts_version_gate_handler.py +100 -0
  25. reconcile/aws_account_manager/README.md +5 -0
  26. reconcile/aws_account_manager/integration.py +373 -0
  27. reconcile/aws_account_manager/merge_request_manager.py +114 -0
  28. reconcile/aws_account_manager/metrics.py +39 -0
  29. reconcile/aws_account_manager/reconciler.py +403 -0
  30. reconcile/aws_account_manager/utils.py +41 -0
  31. reconcile/aws_ami_cleanup/integration.py +273 -0
  32. reconcile/aws_ami_share.py +18 -14
  33. reconcile/aws_cloudwatch_log_retention/integration.py +253 -0
  34. reconcile/aws_iam_keys.py +1 -1
  35. reconcile/aws_iam_password_reset.py +56 -20
  36. reconcile/aws_saml_idp/integration.py +204 -0
  37. reconcile/aws_saml_roles/integration.py +322 -0
  38. reconcile/aws_support_cases_sos.py +2 -2
  39. reconcile/aws_version_sync/integration.py +430 -0
  40. reconcile/aws_version_sync/merge_request_manager/merge_request.py +156 -0
  41. reconcile/aws_version_sync/merge_request_manager/merge_request_manager.py +160 -0
  42. reconcile/aws_version_sync/utils.py +64 -0
  43. reconcile/blackbox_exporter_endpoint_monitoring.py +2 -5
  44. reconcile/change_owners/README.md +34 -0
  45. reconcile/change_owners/approver.py +7 -9
  46. reconcile/change_owners/bundle.py +134 -9
  47. reconcile/change_owners/change_log_tracking.py +236 -0
  48. reconcile/change_owners/change_owners.py +204 -194
  49. reconcile/change_owners/change_types.py +183 -265
  50. reconcile/change_owners/changes.py +488 -0
  51. reconcile/change_owners/decision.py +120 -41
  52. reconcile/change_owners/diff.py +63 -92
  53. reconcile/change_owners/implicit_ownership.py +19 -16
  54. reconcile/change_owners/self_service_roles.py +158 -35
  55. reconcile/change_owners/tester.py +20 -18
  56. reconcile/checkpoint.py +4 -6
  57. reconcile/cli.py +1523 -242
  58. reconcile/closedbox_endpoint_monitoring_base.py +10 -17
  59. reconcile/cluster_auth_rhidp/integration.py +257 -0
  60. reconcile/cluster_deployment_mapper.py +2 -5
  61. reconcile/cna/assets/asset.py +4 -7
  62. reconcile/cna/assets/null.py +2 -5
  63. reconcile/cna/integration.py +2 -3
  64. reconcile/cna/state.py +6 -9
  65. reconcile/dashdotdb_base.py +31 -10
  66. reconcile/dashdotdb_cso.py +3 -6
  67. reconcile/dashdotdb_dora.py +530 -0
  68. reconcile/dashdotdb_dvo.py +10 -13
  69. reconcile/dashdotdb_slo.py +75 -19
  70. reconcile/database_access_manager.py +753 -0
  71. reconcile/deadmanssnitch.py +207 -0
  72. reconcile/dynatrace_token_provider/dependencies.py +69 -0
  73. reconcile/dynatrace_token_provider/integration.py +656 -0
  74. reconcile/dynatrace_token_provider/metrics.py +62 -0
  75. reconcile/dynatrace_token_provider/model.py +14 -0
  76. reconcile/dynatrace_token_provider/ocm.py +140 -0
  77. reconcile/dynatrace_token_provider/validate.py +48 -0
  78. reconcile/endpoints_discovery/integration.py +348 -0
  79. reconcile/endpoints_discovery/merge_request.py +96 -0
  80. reconcile/endpoints_discovery/merge_request_manager.py +178 -0
  81. reconcile/external_resources/aws.py +204 -0
  82. reconcile/external_resources/factories.py +163 -0
  83. reconcile/external_resources/integration.py +194 -0
  84. reconcile/external_resources/integration_secrets_sync.py +47 -0
  85. reconcile/external_resources/manager.py +405 -0
  86. reconcile/external_resources/meta.py +17 -0
  87. reconcile/external_resources/metrics.py +95 -0
  88. reconcile/external_resources/model.py +350 -0
  89. reconcile/external_resources/reconciler.py +265 -0
  90. reconcile/external_resources/secrets_sync.py +465 -0
  91. reconcile/external_resources/state.py +258 -0
  92. reconcile/gabi_authorized_users.py +19 -11
  93. reconcile/gcr_mirror.py +43 -34
  94. reconcile/github_org.py +4 -6
  95. reconcile/github_owners.py +1 -1
  96. reconcile/github_repo_invites.py +2 -5
  97. reconcile/gitlab_fork_compliance.py +14 -13
  98. reconcile/gitlab_housekeeping.py +185 -91
  99. reconcile/gitlab_labeler.py +15 -14
  100. reconcile/gitlab_members.py +126 -120
  101. reconcile/gitlab_owners.py +53 -66
  102. reconcile/gitlab_permissions.py +167 -6
  103. reconcile/glitchtip/README.md +150 -0
  104. reconcile/glitchtip/integration.py +99 -51
  105. reconcile/glitchtip/reconciler.py +99 -70
  106. reconcile/glitchtip_project_alerts/__init__.py +0 -0
  107. reconcile/glitchtip_project_alerts/integration.py +333 -0
  108. reconcile/glitchtip_project_dsn/integration.py +43 -43
  109. reconcile/gql_definitions/acs/__init__.py +0 -0
  110. reconcile/gql_definitions/acs/acs_instances.py +83 -0
  111. reconcile/gql_definitions/acs/acs_policies.py +239 -0
  112. reconcile/gql_definitions/acs/acs_rbac.py +111 -0
  113. reconcile/gql_definitions/advanced_upgrade_service/aus_clusters.py +46 -8
  114. reconcile/gql_definitions/advanced_upgrade_service/aus_organization.py +38 -8
  115. reconcile/gql_definitions/app_interface_metrics_exporter/__init__.py +0 -0
  116. reconcile/gql_definitions/app_interface_metrics_exporter/onboarding_status.py +61 -0
  117. reconcile/gql_definitions/aws_account_manager/__init__.py +0 -0
  118. reconcile/gql_definitions/aws_account_manager/aws_accounts.py +177 -0
  119. reconcile/gql_definitions/aws_ami_cleanup/__init__.py +0 -0
  120. reconcile/gql_definitions/aws_ami_cleanup/aws_accounts.py +161 -0
  121. reconcile/gql_definitions/aws_saml_idp/__init__.py +0 -0
  122. reconcile/gql_definitions/aws_saml_idp/aws_accounts.py +117 -0
  123. reconcile/gql_definitions/aws_saml_roles/__init__.py +0 -0
  124. reconcile/gql_definitions/aws_saml_roles/aws_accounts.py +117 -0
  125. reconcile/gql_definitions/aws_saml_roles/roles.py +97 -0
  126. reconcile/gql_definitions/aws_version_sync/__init__.py +0 -0
  127. reconcile/gql_definitions/aws_version_sync/clusters.py +83 -0
  128. reconcile/gql_definitions/aws_version_sync/namespaces.py +143 -0
  129. reconcile/gql_definitions/change_owners/queries/change_types.py +16 -29
  130. reconcile/gql_definitions/change_owners/queries/self_service_roles.py +45 -11
  131. reconcile/gql_definitions/cluster_auth_rhidp/__init__.py +0 -0
  132. reconcile/gql_definitions/cluster_auth_rhidp/clusters.py +128 -0
  133. reconcile/gql_definitions/cna/queries/cna_provisioners.py +6 -8
  134. reconcile/gql_definitions/cna/queries/cna_resources.py +3 -5
  135. reconcile/gql_definitions/common/alerting_services_settings.py +2 -2
  136. reconcile/gql_definitions/common/app_code_component_repos.py +9 -5
  137. reconcile/gql_definitions/{glitchtip/glitchtip_settings.py → common/app_interface_custom_messages.py} +14 -16
  138. reconcile/gql_definitions/common/app_interface_dms_settings.py +86 -0
  139. reconcile/gql_definitions/common/app_interface_repo_settings.py +2 -2
  140. reconcile/gql_definitions/common/app_interface_state_settings.py +3 -5
  141. reconcile/gql_definitions/common/app_interface_vault_settings.py +3 -5
  142. reconcile/gql_definitions/common/app_quay_repos_escalation_policies.py +120 -0
  143. reconcile/gql_definitions/common/apps.py +72 -0
  144. reconcile/gql_definitions/common/aws_vpc_requests.py +109 -0
  145. reconcile/gql_definitions/common/aws_vpcs.py +84 -0
  146. reconcile/gql_definitions/common/clusters.py +120 -254
  147. reconcile/gql_definitions/common/clusters_minimal.py +11 -35
  148. reconcile/gql_definitions/common/clusters_with_dms.py +72 -0
  149. reconcile/gql_definitions/common/clusters_with_peering.py +70 -98
  150. reconcile/gql_definitions/common/github_orgs.py +2 -2
  151. reconcile/gql_definitions/common/jira_settings.py +68 -0
  152. reconcile/gql_definitions/common/jiralert_settings.py +68 -0
  153. reconcile/gql_definitions/common/namespaces.py +74 -32
  154. reconcile/gql_definitions/common/namespaces_minimal.py +4 -10
  155. reconcile/gql_definitions/common/ocm_env_telemeter.py +95 -0
  156. reconcile/gql_definitions/common/ocm_environments.py +4 -2
  157. reconcile/gql_definitions/common/pagerduty_instances.py +5 -5
  158. reconcile/gql_definitions/common/pgp_reencryption_settings.py +5 -11
  159. reconcile/gql_definitions/common/pipeline_providers.py +45 -90
  160. reconcile/gql_definitions/common/quay_instances.py +64 -0
  161. reconcile/gql_definitions/common/quay_orgs.py +68 -0
  162. reconcile/gql_definitions/common/reserved_networks.py +94 -0
  163. reconcile/gql_definitions/common/saas_files.py +133 -95
  164. reconcile/gql_definitions/common/saas_target_namespaces.py +41 -26
  165. reconcile/gql_definitions/common/saasherder_settings.py +2 -2
  166. reconcile/gql_definitions/common/slack_workspaces.py +62 -0
  167. reconcile/gql_definitions/common/smtp_client_settings.py +2 -2
  168. reconcile/gql_definitions/common/state_aws_account.py +77 -0
  169. reconcile/gql_definitions/common/users.py +3 -2
  170. reconcile/gql_definitions/cost_report/__init__.py +0 -0
  171. reconcile/gql_definitions/cost_report/app_names.py +68 -0
  172. reconcile/gql_definitions/cost_report/cost_namespaces.py +86 -0
  173. reconcile/gql_definitions/cost_report/settings.py +77 -0
  174. reconcile/gql_definitions/dashdotdb_slo/slo_documents_query.py +42 -12
  175. reconcile/gql_definitions/dynatrace_token_provider/__init__.py +0 -0
  176. reconcile/gql_definitions/dynatrace_token_provider/dynatrace_bootstrap_tokens.py +79 -0
  177. reconcile/gql_definitions/dynatrace_token_provider/token_specs.py +84 -0
  178. reconcile/gql_definitions/endpoints_discovery/__init__.py +0 -0
  179. reconcile/gql_definitions/endpoints_discovery/namespaces.py +127 -0
  180. reconcile/gql_definitions/external_resources/__init__.py +0 -0
  181. reconcile/gql_definitions/external_resources/aws_accounts.py +73 -0
  182. reconcile/gql_definitions/external_resources/external_resources_modules.py +78 -0
  183. reconcile/gql_definitions/external_resources/external_resources_namespaces.py +1111 -0
  184. reconcile/gql_definitions/external_resources/external_resources_settings.py +98 -0
  185. reconcile/gql_definitions/fragments/aus_organization.py +34 -39
  186. reconcile/gql_definitions/fragments/aws_account_common.py +62 -0
  187. reconcile/gql_definitions/fragments/aws_account_managed.py +57 -0
  188. reconcile/gql_definitions/fragments/aws_account_sso.py +35 -0
  189. reconcile/gql_definitions/fragments/aws_infra_management_account.py +2 -2
  190. reconcile/gql_definitions/fragments/aws_vpc.py +47 -0
  191. reconcile/gql_definitions/fragments/aws_vpc_request.py +65 -0
  192. reconcile/gql_definitions/fragments/aws_vpc_request_subnet.py +29 -0
  193. reconcile/gql_definitions/fragments/deplopy_resources.py +7 -7
  194. reconcile/gql_definitions/fragments/disable.py +28 -0
  195. reconcile/gql_definitions/fragments/jumphost_common_fields.py +2 -2
  196. reconcile/gql_definitions/fragments/membership_source.py +47 -0
  197. reconcile/gql_definitions/fragments/minimal_ocm_organization.py +29 -0
  198. reconcile/gql_definitions/fragments/oc_connection_cluster.py +4 -9
  199. reconcile/gql_definitions/fragments/ocm_environment.py +5 -5
  200. reconcile/gql_definitions/fragments/pipeline_provider_retention.py +30 -0
  201. reconcile/gql_definitions/fragments/prometheus_instance.py +48 -0
  202. reconcile/gql_definitions/fragments/resource_limits_requirements.py +29 -0
  203. reconcile/gql_definitions/fragments/{resource_requirements.py → resource_requests_requirements.py} +3 -3
  204. reconcile/gql_definitions/fragments/resource_values.py +2 -2
  205. reconcile/gql_definitions/fragments/saas_target_namespace.py +55 -12
  206. reconcile/gql_definitions/fragments/serviceaccount_token.py +38 -0
  207. reconcile/gql_definitions/fragments/terraform_state.py +36 -0
  208. reconcile/gql_definitions/fragments/upgrade_policy.py +5 -3
  209. reconcile/gql_definitions/fragments/user.py +3 -2
  210. reconcile/gql_definitions/fragments/vault_secret.py +2 -2
  211. reconcile/gql_definitions/gitlab_members/gitlab_instances.py +6 -2
  212. reconcile/gql_definitions/gitlab_members/permissions.py +3 -5
  213. reconcile/gql_definitions/glitchtip/glitchtip_instance.py +16 -2
  214. reconcile/gql_definitions/glitchtip/glitchtip_project.py +22 -23
  215. reconcile/gql_definitions/glitchtip_project_alerts/__init__.py +0 -0
  216. reconcile/gql_definitions/glitchtip_project_alerts/glitchtip_project.py +173 -0
  217. reconcile/gql_definitions/integrations/integrations.py +62 -45
  218. reconcile/gql_definitions/introspection.json +51176 -0
  219. reconcile/gql_definitions/jenkins_configs/jenkins_configs.py +13 -5
  220. reconcile/gql_definitions/jenkins_configs/jenkins_instances.py +79 -0
  221. reconcile/gql_definitions/jira/__init__.py +0 -0
  222. reconcile/gql_definitions/jira/jira_servers.py +80 -0
  223. reconcile/gql_definitions/jira_permissions_validator/__init__.py +0 -0
  224. reconcile/gql_definitions/jira_permissions_validator/jira_boards_for_permissions_validator.py +131 -0
  225. reconcile/gql_definitions/jumphosts/jumphosts.py +3 -5
  226. reconcile/gql_definitions/ldap_groups/__init__.py +0 -0
  227. reconcile/gql_definitions/ldap_groups/roles.py +111 -0
  228. reconcile/gql_definitions/ldap_groups/settings.py +79 -0
  229. reconcile/gql_definitions/maintenance/__init__.py +0 -0
  230. reconcile/gql_definitions/maintenance/maintenances.py +101 -0
  231. reconcile/gql_definitions/membershipsources/__init__.py +0 -0
  232. reconcile/gql_definitions/membershipsources/roles.py +112 -0
  233. reconcile/gql_definitions/ocm_labels/__init__.py +0 -0
  234. reconcile/gql_definitions/ocm_labels/clusters.py +112 -0
  235. reconcile/gql_definitions/ocm_labels/organizations.py +78 -0
  236. reconcile/gql_definitions/ocm_subscription_labels/__init__.py +0 -0
  237. reconcile/gql_definitions/openshift_cluster_bots/__init__.py +0 -0
  238. reconcile/gql_definitions/openshift_cluster_bots/clusters.py +126 -0
  239. reconcile/gql_definitions/openshift_groups/managed_groups.py +2 -2
  240. reconcile/gql_definitions/openshift_groups/managed_roles.py +3 -2
  241. reconcile/gql_definitions/openshift_serviceaccount_tokens/__init__.py +0 -0
  242. reconcile/gql_definitions/openshift_serviceaccount_tokens/tokens.py +132 -0
  243. reconcile/gql_definitions/quay_membership/quay_membership.py +3 -5
  244. reconcile/gql_definitions/rhidp/__init__.py +0 -0
  245. reconcile/gql_definitions/rhidp/organizations.py +96 -0
  246. reconcile/gql_definitions/service_dependencies/jenkins_instance_fragment.py +2 -2
  247. reconcile/gql_definitions/service_dependencies/service_dependencies.py +9 -31
  248. reconcile/gql_definitions/sharding/aws_accounts.py +2 -2
  249. reconcile/gql_definitions/sharding/ocm_organization.py +63 -0
  250. reconcile/gql_definitions/skupper_network/site_controller_template.py +2 -2
  251. reconcile/gql_definitions/skupper_network/skupper_networks.py +12 -38
  252. reconcile/gql_definitions/slack_usergroups/clusters.py +2 -2
  253. reconcile/gql_definitions/slack_usergroups/permissions.py +8 -15
  254. reconcile/gql_definitions/slack_usergroups/users.py +3 -2
  255. reconcile/gql_definitions/slo_documents/__init__.py +0 -0
  256. reconcile/gql_definitions/slo_documents/slo_documents.py +142 -0
  257. reconcile/gql_definitions/status_board/__init__.py +0 -0
  258. reconcile/gql_definitions/status_board/status_board.py +163 -0
  259. reconcile/gql_definitions/statuspage/statuspages.py +56 -7
  260. reconcile/gql_definitions/templating/__init__.py +0 -0
  261. reconcile/gql_definitions/templating/template_collection.py +130 -0
  262. reconcile/gql_definitions/templating/templates.py +108 -0
  263. reconcile/gql_definitions/terraform_cloudflare_dns/app_interface_cloudflare_dns_settings.py +4 -8
  264. reconcile/gql_definitions/terraform_cloudflare_dns/terraform_cloudflare_zones.py +8 -8
  265. reconcile/gql_definitions/terraform_cloudflare_resources/terraform_cloudflare_accounts.py +6 -8
  266. reconcile/gql_definitions/terraform_cloudflare_resources/terraform_cloudflare_resources.py +45 -56
  267. reconcile/gql_definitions/terraform_cloudflare_users/app_interface_setting_cloudflare_and_vault.py +4 -8
  268. reconcile/gql_definitions/terraform_cloudflare_users/terraform_cloudflare_roles.py +4 -8
  269. reconcile/gql_definitions/terraform_init/__init__.py +0 -0
  270. reconcile/gql_definitions/terraform_init/aws_accounts.py +93 -0
  271. reconcile/gql_definitions/terraform_repo/__init__.py +0 -0
  272. reconcile/gql_definitions/terraform_repo/terraform_repo.py +141 -0
  273. reconcile/gql_definitions/terraform_resources/database_access_manager.py +158 -0
  274. reconcile/gql_definitions/terraform_resources/terraform_resources_namespaces.py +153 -162
  275. reconcile/gql_definitions/terraform_tgw_attachments/__init__.py +0 -0
  276. reconcile/gql_definitions/terraform_tgw_attachments/aws_accounts.py +119 -0
  277. reconcile/gql_definitions/unleash_feature_toggles/__init__.py +0 -0
  278. reconcile/gql_definitions/unleash_feature_toggles/feature_toggles.py +113 -0
  279. reconcile/gql_definitions/vault_instances/vault_instances.py +17 -50
  280. reconcile/gql_definitions/vault_policies/vault_policies.py +2 -2
  281. reconcile/gql_definitions/vpc_peerings_validator/vpc_peerings_validator.py +49 -12
  282. reconcile/gql_definitions/vpc_peerings_validator/vpc_peerings_validator_peered_cluster_fragment.py +7 -2
  283. reconcile/integrations_manager.py +25 -13
  284. reconcile/jenkins/types.py +5 -1
  285. reconcile/jenkins_base.py +36 -0
  286. reconcile/jenkins_job_builder.py +10 -48
  287. reconcile/jenkins_job_builds_cleaner.py +40 -25
  288. reconcile/jenkins_job_cleaner.py +1 -3
  289. reconcile/jenkins_roles.py +22 -26
  290. reconcile/jenkins_webhooks.py +9 -6
  291. reconcile/jenkins_worker_fleets.py +11 -6
  292. reconcile/jira_permissions_validator.py +340 -0
  293. reconcile/jira_watcher.py +3 -5
  294. reconcile/ldap_groups/__init__.py +0 -0
  295. reconcile/ldap_groups/integration.py +279 -0
  296. reconcile/ldap_users.py +3 -0
  297. reconcile/ocm/types.py +39 -59
  298. reconcile/ocm_additional_routers.py +0 -1
  299. reconcile/ocm_addons_upgrade_tests_trigger.py +10 -15
  300. reconcile/ocm_aws_infrastructure_access.py +30 -32
  301. reconcile/ocm_clusters.py +217 -130
  302. reconcile/ocm_external_configuration_labels.py +15 -0
  303. reconcile/ocm_github_idp.py +1 -1
  304. reconcile/ocm_groups.py +25 -5
  305. reconcile/ocm_internal_notifications/__init__.py +0 -0
  306. reconcile/ocm_internal_notifications/integration.py +119 -0
  307. reconcile/ocm_labels/__init__.py +0 -0
  308. reconcile/ocm_labels/integration.py +409 -0
  309. reconcile/ocm_machine_pools.py +517 -108
  310. reconcile/ocm_upgrade_scheduler_org_updater.py +15 -11
  311. reconcile/openshift_base.py +609 -207
  312. reconcile/openshift_cluster_bots.py +344 -0
  313. reconcile/openshift_clusterrolebindings.py +15 -15
  314. reconcile/openshift_groups.py +42 -45
  315. reconcile/openshift_limitranges.py +1 -0
  316. reconcile/openshift_namespace_labels.py +22 -28
  317. reconcile/openshift_namespaces.py +22 -22
  318. reconcile/openshift_network_policies.py +4 -8
  319. reconcile/openshift_prometheus_rules.py +43 -0
  320. reconcile/openshift_resourcequotas.py +2 -16
  321. reconcile/openshift_resources.py +12 -10
  322. reconcile/openshift_resources_base.py +304 -328
  323. reconcile/openshift_rolebindings.py +18 -20
  324. reconcile/openshift_saas_deploy.py +105 -21
  325. reconcile/openshift_saas_deploy_change_tester.py +30 -35
  326. reconcile/openshift_saas_deploy_trigger_base.py +39 -36
  327. reconcile/openshift_saas_deploy_trigger_cleaner.py +41 -27
  328. reconcile/openshift_saas_deploy_trigger_configs.py +1 -2
  329. reconcile/openshift_saas_deploy_trigger_images.py +1 -2
  330. reconcile/openshift_saas_deploy_trigger_moving_commits.py +1 -2
  331. reconcile/openshift_saas_deploy_trigger_upstream_jobs.py +1 -2
  332. reconcile/openshift_serviceaccount_tokens.py +138 -74
  333. reconcile/openshift_tekton_resources.py +89 -24
  334. reconcile/openshift_upgrade_watcher.py +110 -62
  335. reconcile/openshift_users.py +16 -15
  336. reconcile/openshift_vault_secrets.py +11 -6
  337. reconcile/oum/__init__.py +0 -0
  338. reconcile/oum/base.py +387 -0
  339. reconcile/oum/labelset.py +55 -0
  340. reconcile/oum/metrics.py +71 -0
  341. reconcile/oum/models.py +69 -0
  342. reconcile/oum/providers.py +59 -0
  343. reconcile/oum/standalone.py +196 -0
  344. reconcile/prometheus_rules_tester/integration.py +31 -23
  345. reconcile/quay_base.py +4 -1
  346. reconcile/quay_membership.py +1 -2
  347. reconcile/quay_mirror.py +111 -61
  348. reconcile/quay_mirror_org.py +34 -21
  349. reconcile/quay_permissions.py +7 -3
  350. reconcile/quay_repos.py +24 -32
  351. reconcile/queries.py +263 -198
  352. reconcile/query_validator.py +3 -5
  353. reconcile/resource_scraper.py +3 -4
  354. reconcile/{template_tester.py → resource_template_tester.py} +3 -3
  355. reconcile/rhidp/__init__.py +0 -0
  356. reconcile/rhidp/common.py +214 -0
  357. reconcile/rhidp/metrics.py +20 -0
  358. reconcile/rhidp/ocm_oidc_idp/__init__.py +0 -0
  359. reconcile/rhidp/ocm_oidc_idp/base.py +221 -0
  360. reconcile/rhidp/ocm_oidc_idp/integration.py +56 -0
  361. reconcile/rhidp/ocm_oidc_idp/metrics.py +22 -0
  362. reconcile/rhidp/sso_client/__init__.py +0 -0
  363. reconcile/rhidp/sso_client/base.py +266 -0
  364. reconcile/rhidp/sso_client/integration.py +60 -0
  365. reconcile/rhidp/sso_client/metrics.py +39 -0
  366. reconcile/run_integration.py +293 -0
  367. reconcile/saas_auto_promotions_manager/integration.py +69 -24
  368. reconcile/saas_auto_promotions_manager/merge_request_manager/batcher.py +208 -0
  369. reconcile/saas_auto_promotions_manager/merge_request_manager/desired_state.py +28 -0
  370. reconcile/saas_auto_promotions_manager/merge_request_manager/merge_request.py +3 -4
  371. reconcile/saas_auto_promotions_manager/merge_request_manager/merge_request_manager_v2.py +172 -0
  372. reconcile/saas_auto_promotions_manager/merge_request_manager/metrics.py +42 -0
  373. reconcile/saas_auto_promotions_manager/merge_request_manager/mr_parser.py +226 -0
  374. reconcile/saas_auto_promotions_manager/merge_request_manager/open_merge_requests.py +23 -0
  375. reconcile/saas_auto_promotions_manager/merge_request_manager/renderer.py +108 -32
  376. reconcile/saas_auto_promotions_manager/meta.py +4 -0
  377. reconcile/saas_auto_promotions_manager/publisher.py +32 -4
  378. reconcile/saas_auto_promotions_manager/s3_exporter.py +77 -0
  379. reconcile/saas_auto_promotions_manager/subscriber.py +110 -23
  380. reconcile/saas_auto_promotions_manager/utils/saas_files_inventory.py +48 -41
  381. reconcile/saas_file_validator.py +16 -6
  382. reconcile/sendgrid_teammates.py +27 -12
  383. reconcile/service_dependencies.py +0 -3
  384. reconcile/signalfx_endpoint_monitoring.py +2 -5
  385. reconcile/skupper_network/integration.py +10 -11
  386. reconcile/skupper_network/models.py +3 -5
  387. reconcile/skupper_network/reconciler.py +28 -35
  388. reconcile/skupper_network/site_controller.py +8 -8
  389. reconcile/slack_base.py +4 -7
  390. reconcile/slack_usergroups.py +249 -171
  391. reconcile/sql_query.py +324 -171
  392. reconcile/status.py +0 -1
  393. reconcile/status_board.py +275 -0
  394. reconcile/statuspage/__init__.py +0 -5
  395. reconcile/statuspage/atlassian.py +219 -80
  396. reconcile/statuspage/integration.py +9 -97
  397. reconcile/statuspage/integrations/__init__.py +0 -0
  398. reconcile/statuspage/integrations/components.py +77 -0
  399. reconcile/statuspage/integrations/maintenances.py +111 -0
  400. reconcile/statuspage/page.py +107 -72
  401. reconcile/statuspage/state.py +6 -11
  402. reconcile/statuspage/status.py +8 -12
  403. reconcile/templates/rosa-classic-cluster-creation.sh.j2 +60 -0
  404. reconcile/templates/rosa-hcp-cluster-creation.sh.j2 +61 -0
  405. reconcile/templating/__init__.py +0 -0
  406. reconcile/templating/lib/__init__.py +0 -0
  407. reconcile/templating/lib/merge_request_manager.py +180 -0
  408. reconcile/templating/lib/model.py +20 -0
  409. reconcile/templating/lib/rendering.py +191 -0
  410. reconcile/templating/renderer.py +410 -0
  411. reconcile/templating/validator.py +153 -0
  412. reconcile/terraform_aws_route53.py +13 -10
  413. reconcile/terraform_cloudflare_dns.py +92 -122
  414. reconcile/terraform_cloudflare_resources.py +15 -13
  415. reconcile/terraform_cloudflare_users.py +27 -27
  416. reconcile/terraform_init/__init__.py +0 -0
  417. reconcile/terraform_init/integration.py +165 -0
  418. reconcile/terraform_init/merge_request.py +57 -0
  419. reconcile/terraform_init/merge_request_manager.py +102 -0
  420. reconcile/terraform_repo.py +403 -0
  421. reconcile/terraform_resources.py +266 -168
  422. reconcile/terraform_tgw_attachments.py +417 -167
  423. reconcile/terraform_users.py +40 -17
  424. reconcile/terraform_vpc_peerings.py +310 -142
  425. reconcile/terraform_vpc_resources/__init__.py +0 -0
  426. reconcile/terraform_vpc_resources/integration.py +220 -0
  427. reconcile/terraform_vpc_resources/merge_request.py +57 -0
  428. reconcile/terraform_vpc_resources/merge_request_manager.py +107 -0
  429. reconcile/typed_queries/alerting_services_settings.py +1 -2
  430. reconcile/typed_queries/app_interface_custom_messages.py +24 -0
  431. reconcile/typed_queries/app_interface_deadmanssnitch_settings.py +17 -0
  432. reconcile/typed_queries/app_interface_metrics_exporter/__init__.py +0 -0
  433. reconcile/typed_queries/app_interface_metrics_exporter/onboarding_status.py +13 -0
  434. reconcile/typed_queries/app_interface_repo_url.py +1 -2
  435. reconcile/typed_queries/app_interface_state_settings.py +1 -3
  436. reconcile/typed_queries/app_interface_vault_settings.py +1 -2
  437. reconcile/typed_queries/app_quay_repos_escalation_policies.py +14 -0
  438. reconcile/typed_queries/apps.py +11 -0
  439. reconcile/typed_queries/aws_vpc_requests.py +9 -0
  440. reconcile/typed_queries/aws_vpcs.py +12 -0
  441. reconcile/typed_queries/cloudflare.py +10 -0
  442. reconcile/typed_queries/clusters.py +7 -5
  443. reconcile/typed_queries/clusters_minimal.py +6 -5
  444. reconcile/typed_queries/clusters_with_dms.py +16 -0
  445. reconcile/typed_queries/cost_report/__init__.py +0 -0
  446. reconcile/typed_queries/cost_report/app_names.py +22 -0
  447. reconcile/typed_queries/cost_report/cost_namespaces.py +43 -0
  448. reconcile/typed_queries/cost_report/settings.py +15 -0
  449. reconcile/typed_queries/dynatrace.py +10 -0
  450. reconcile/typed_queries/dynatrace_environments.py +14 -0
  451. reconcile/typed_queries/dynatrace_token_provider_token_specs.py +14 -0
  452. reconcile/typed_queries/external_resources.py +46 -0
  453. reconcile/typed_queries/get_state_aws_account.py +20 -0
  454. reconcile/typed_queries/glitchtip.py +10 -0
  455. reconcile/typed_queries/jenkins.py +25 -0
  456. reconcile/typed_queries/jira.py +7 -0
  457. reconcile/typed_queries/jira_settings.py +16 -0
  458. reconcile/typed_queries/jiralert_settings.py +22 -0
  459. reconcile/typed_queries/ocm.py +8 -0
  460. reconcile/typed_queries/pagerduty_instances.py +2 -7
  461. reconcile/typed_queries/quay.py +23 -0
  462. reconcile/typed_queries/repos.py +20 -8
  463. reconcile/typed_queries/reserved_networks.py +12 -0
  464. reconcile/typed_queries/saas_files.py +221 -167
  465. reconcile/typed_queries/slack.py +7 -0
  466. reconcile/typed_queries/slo_documents.py +12 -0
  467. reconcile/typed_queries/status_board.py +58 -0
  468. reconcile/typed_queries/tekton_pipeline_providers.py +1 -2
  469. reconcile/typed_queries/terraform_namespaces.py +1 -2
  470. reconcile/typed_queries/terraform_tgw_attachments/__init__.py +0 -0
  471. reconcile/typed_queries/terraform_tgw_attachments/aws_accounts.py +16 -0
  472. reconcile/typed_queries/unleash.py +10 -0
  473. reconcile/typed_queries/users.py +11 -0
  474. reconcile/typed_queries/vault.py +10 -0
  475. reconcile/unleash_feature_toggles/__init__.py +0 -0
  476. reconcile/unleash_feature_toggles/integration.py +287 -0
  477. reconcile/utils/acs/__init__.py +0 -0
  478. reconcile/utils/acs/base.py +81 -0
  479. reconcile/utils/acs/notifiers.py +143 -0
  480. reconcile/utils/acs/policies.py +163 -0
  481. reconcile/utils/acs/rbac.py +277 -0
  482. reconcile/utils/aggregated_list.py +11 -9
  483. reconcile/utils/amtool.py +6 -4
  484. reconcile/utils/aws_api.py +279 -66
  485. reconcile/utils/aws_api_typed/__init__.py +0 -0
  486. reconcile/utils/aws_api_typed/account.py +23 -0
  487. reconcile/utils/aws_api_typed/api.py +273 -0
  488. reconcile/utils/aws_api_typed/dynamodb.py +16 -0
  489. reconcile/utils/aws_api_typed/iam.py +67 -0
  490. reconcile/utils/aws_api_typed/organization.py +152 -0
  491. reconcile/utils/aws_api_typed/s3.py +26 -0
  492. reconcile/utils/aws_api_typed/service_quotas.py +79 -0
  493. reconcile/utils/aws_api_typed/sts.py +36 -0
  494. reconcile/utils/aws_api_typed/support.py +79 -0
  495. reconcile/utils/aws_helper.py +42 -3
  496. reconcile/utils/batches.py +11 -0
  497. reconcile/utils/binary.py +7 -9
  498. reconcile/utils/cloud_resource_best_practice/__init__.py +0 -0
  499. reconcile/utils/cloud_resource_best_practice/aws_rds.py +66 -0
  500. reconcile/utils/clusterhealth/__init__.py +0 -0
  501. reconcile/utils/clusterhealth/providerbase.py +39 -0
  502. reconcile/utils/clusterhealth/telemeter.py +39 -0
  503. reconcile/utils/config.py +3 -4
  504. reconcile/utils/deadmanssnitch_api.py +86 -0
  505. reconcile/utils/differ.py +205 -0
  506. reconcile/utils/disabled_integrations.py +4 -6
  507. reconcile/utils/dynatrace/__init__.py +0 -0
  508. reconcile/utils/dynatrace/client.py +93 -0
  509. reconcile/utils/early_exit_cache.py +289 -0
  510. reconcile/utils/elasticsearch_exceptions.py +5 -0
  511. reconcile/utils/environ.py +2 -2
  512. reconcile/utils/exceptions.py +4 -0
  513. reconcile/utils/expiration.py +4 -8
  514. reconcile/utils/extended_early_exit.py +210 -0
  515. reconcile/utils/external_resource_spec.py +34 -12
  516. reconcile/utils/external_resources.py +48 -20
  517. reconcile/utils/filtering.py +16 -0
  518. reconcile/utils/git.py +49 -16
  519. reconcile/utils/github_api.py +10 -9
  520. reconcile/utils/gitlab_api.py +333 -190
  521. reconcile/utils/glitchtip/client.py +97 -100
  522. reconcile/utils/glitchtip/models.py +89 -11
  523. reconcile/utils/gql.py +157 -58
  524. reconcile/utils/grouping.py +17 -0
  525. reconcile/utils/helm.py +89 -18
  526. reconcile/utils/helpers.py +51 -0
  527. reconcile/utils/imap_client.py +5 -6
  528. reconcile/utils/internal_groups/__init__.py +0 -0
  529. reconcile/utils/internal_groups/client.py +160 -0
  530. reconcile/utils/internal_groups/models.py +71 -0
  531. reconcile/utils/jenkins_api.py +10 -34
  532. reconcile/utils/jinja2/__init__.py +0 -0
  533. reconcile/utils/{jinja2_ext.py → jinja2/extensions.py} +6 -4
  534. reconcile/utils/jinja2/filters.py +142 -0
  535. reconcile/utils/jinja2/utils.py +278 -0
  536. reconcile/utils/jira_client.py +165 -8
  537. reconcile/utils/jjb_client.py +47 -35
  538. reconcile/utils/jobcontroller/__init__.py +0 -0
  539. reconcile/utils/jobcontroller/controller.py +413 -0
  540. reconcile/utils/jobcontroller/models.py +195 -0
  541. reconcile/utils/jsonpath.py +4 -5
  542. reconcile/utils/jump_host.py +13 -12
  543. reconcile/utils/keycloak.py +106 -0
  544. reconcile/utils/ldap_client.py +35 -6
  545. reconcile/utils/lean_terraform_client.py +115 -6
  546. reconcile/utils/membershipsources/__init__.py +0 -0
  547. reconcile/utils/membershipsources/app_interface_resolver.py +60 -0
  548. reconcile/utils/membershipsources/models.py +91 -0
  549. reconcile/utils/membershipsources/resolver.py +110 -0
  550. reconcile/utils/merge_request_manager/__init__.py +0 -0
  551. reconcile/utils/merge_request_manager/merge_request_manager.py +99 -0
  552. reconcile/utils/merge_request_manager/parser.py +67 -0
  553. reconcile/utils/metrics.py +511 -1
  554. reconcile/utils/models.py +123 -0
  555. reconcile/utils/mr/README.md +198 -0
  556. reconcile/utils/mr/__init__.py +14 -10
  557. reconcile/utils/mr/app_interface_reporter.py +2 -2
  558. reconcile/utils/mr/aws_access.py +4 -4
  559. reconcile/utils/mr/base.py +51 -31
  560. reconcile/utils/mr/clusters_updates.py +10 -7
  561. reconcile/utils/mr/glitchtip_access_reporter.py +2 -4
  562. reconcile/utils/mr/labels.py +14 -1
  563. reconcile/utils/mr/notificator.py +1 -3
  564. reconcile/utils/mr/ocm_update_recommended_version.py +1 -2
  565. reconcile/utils/mr/ocm_upgrade_scheduler_org_updates.py +7 -3
  566. reconcile/utils/mr/promote_qontract.py +203 -0
  567. reconcile/utils/mr/user_maintenance.py +24 -4
  568. reconcile/utils/oauth2_backend_application_session.py +132 -0
  569. reconcile/utils/oc.py +194 -170
  570. reconcile/utils/oc_connection_parameters.py +40 -51
  571. reconcile/utils/oc_filters.py +11 -13
  572. reconcile/utils/oc_map.py +14 -35
  573. reconcile/utils/ocm/__init__.py +30 -1
  574. reconcile/utils/ocm/addons.py +228 -0
  575. reconcile/utils/ocm/base.py +618 -5
  576. reconcile/utils/ocm/cluster_groups.py +5 -56
  577. reconcile/utils/ocm/clusters.py +111 -99
  578. reconcile/utils/ocm/identity_providers.py +66 -0
  579. reconcile/utils/ocm/label_sources.py +75 -0
  580. reconcile/utils/ocm/labels.py +139 -54
  581. reconcile/utils/ocm/manifests.py +39 -0
  582. reconcile/utils/ocm/ocm.py +182 -928
  583. reconcile/utils/ocm/products.py +758 -0
  584. reconcile/utils/ocm/search_filters.py +20 -28
  585. reconcile/utils/ocm/service_log.py +32 -79
  586. reconcile/utils/ocm/sre_capability_labels.py +51 -0
  587. reconcile/utils/ocm/status_board.py +66 -0
  588. reconcile/utils/ocm/subscriptions.py +49 -59
  589. reconcile/utils/ocm/syncsets.py +39 -0
  590. reconcile/utils/ocm/upgrades.py +181 -0
  591. reconcile/utils/ocm_base_client.py +71 -36
  592. reconcile/utils/openshift_resource.py +113 -67
  593. reconcile/utils/output.py +18 -11
  594. reconcile/utils/pagerduty_api.py +16 -10
  595. reconcile/utils/parse_dhms_duration.py +13 -1
  596. reconcile/utils/prometheus.py +123 -0
  597. reconcile/utils/promotion_state.py +56 -19
  598. reconcile/utils/promtool.py +5 -8
  599. reconcile/utils/quay_api.py +13 -25
  600. reconcile/utils/raw_github_api.py +3 -5
  601. reconcile/utils/repo_owners.py +2 -8
  602. reconcile/utils/rest_api_base.py +126 -0
  603. reconcile/utils/rosa/__init__.py +0 -0
  604. reconcile/utils/rosa/rosa_cli.py +310 -0
  605. reconcile/utils/rosa/session.py +201 -0
  606. reconcile/utils/ruamel.py +16 -0
  607. reconcile/utils/runtime/__init__.py +0 -1
  608. reconcile/utils/runtime/desired_state_diff.py +9 -20
  609. reconcile/utils/runtime/environment.py +33 -8
  610. reconcile/utils/runtime/integration.py +28 -12
  611. reconcile/utils/runtime/meta.py +1 -3
  612. reconcile/utils/runtime/runner.py +8 -11
  613. reconcile/utils/runtime/sharding.py +93 -36
  614. reconcile/utils/saasherder/__init__.py +1 -1
  615. reconcile/utils/saasherder/interfaces.py +143 -138
  616. reconcile/utils/saasherder/models.py +201 -43
  617. reconcile/utils/saasherder/saasherder.py +508 -378
  618. reconcile/utils/secret_reader.py +22 -27
  619. reconcile/utils/semver_helper.py +15 -1
  620. reconcile/utils/slack_api.py +124 -36
  621. reconcile/utils/smtp_client.py +1 -2
  622. reconcile/utils/sqs_gateway.py +10 -6
  623. reconcile/utils/state.py +276 -127
  624. reconcile/utils/terraform/config_client.py +6 -7
  625. reconcile/utils/terraform_client.py +284 -125
  626. reconcile/utils/terrascript/cloudflare_client.py +38 -17
  627. reconcile/utils/terrascript/cloudflare_resources.py +67 -18
  628. reconcile/utils/terrascript/models.py +2 -3
  629. reconcile/utils/terrascript/resources.py +1 -2
  630. reconcile/utils/terrascript_aws_client.py +1292 -540
  631. reconcile/utils/three_way_diff_strategy.py +157 -0
  632. reconcile/utils/unleash/__init__.py +11 -0
  633. reconcile/utils/{unleash.py → unleash/client.py} +35 -29
  634. reconcile/utils/unleash/server.py +145 -0
  635. reconcile/utils/vault.py +42 -32
  636. reconcile/utils/vaultsecretref.py +2 -4
  637. reconcile/utils/vcs.py +250 -0
  638. reconcile/vault_replication.py +38 -31
  639. reconcile/vpc_peerings_validator.py +82 -13
  640. tools/app_interface_metrics_exporter.py +70 -0
  641. tools/app_interface_reporter.py +44 -157
  642. tools/cli_commands/container_images_report.py +154 -0
  643. tools/cli_commands/cost_report/__init__.py +0 -0
  644. tools/cli_commands/cost_report/aws.py +137 -0
  645. tools/cli_commands/cost_report/cost_management_api.py +155 -0
  646. tools/cli_commands/cost_report/model.py +49 -0
  647. tools/cli_commands/cost_report/openshift.py +166 -0
  648. tools/cli_commands/cost_report/openshift_cost_optimization.py +187 -0
  649. tools/cli_commands/cost_report/response.py +124 -0
  650. tools/cli_commands/cost_report/util.py +72 -0
  651. tools/cli_commands/cost_report/view.py +524 -0
  652. tools/cli_commands/erv2.py +620 -0
  653. tools/cli_commands/gpg_encrypt.py +5 -8
  654. tools/cli_commands/systems_and_tools.py +489 -0
  655. tools/glitchtip_access_revalidation.py +1 -1
  656. tools/qontract_cli.py +2301 -673
  657. tools/saas_metrics_exporter/__init__.py +0 -0
  658. tools/saas_metrics_exporter/commit_distance/__init__.py +0 -0
  659. tools/saas_metrics_exporter/commit_distance/channel.py +63 -0
  660. tools/saas_metrics_exporter/commit_distance/commit_distance.py +103 -0
  661. tools/saas_metrics_exporter/commit_distance/metrics.py +19 -0
  662. tools/saas_metrics_exporter/main.py +99 -0
  663. tools/saas_promotion_state/__init__.py +0 -0
  664. tools/saas_promotion_state/saas_promotion_state.py +105 -0
  665. tools/sd_app_sre_alert_report.py +145 -0
  666. tools/template_validation.py +107 -0
  667. e2e_tests/cli.py +0 -83
  668. e2e_tests/create_namespace.py +0 -43
  669. e2e_tests/dedicated_admin_rolebindings.py +0 -44
  670. e2e_tests/dedicated_admin_test_base.py +0 -39
  671. e2e_tests/default_network_policies.py +0 -47
  672. e2e_tests/default_project_labels.py +0 -52
  673. e2e_tests/network_policy_test_base.py +0 -17
  674. e2e_tests/test_base.py +0 -56
  675. qontract_reconcile-0.9.1rc298.dist-info/METADATA +0 -63
  676. qontract_reconcile-0.9.1rc298.dist-info/RECORD +0 -585
  677. qontract_reconcile-0.9.1rc298.dist-info/top_level.txt +0 -4
  678. reconcile/ecr_mirror.py +0 -152
  679. reconcile/github_scanner.py +0 -74
  680. reconcile/gitlab_integrations.py +0 -63
  681. reconcile/gql_definitions/ocm_oidc_idp/clusters.py +0 -195
  682. reconcile/gql_definitions/ocp_release_mirror/ocp_release_mirror.py +0 -287
  683. reconcile/integrations_validator.py +0 -18
  684. reconcile/jenkins_plugins.py +0 -129
  685. reconcile/kafka_clusters.py +0 -208
  686. reconcile/ocm_cluster_admin.py +0 -42
  687. reconcile/ocm_oidc_idp.py +0 -198
  688. reconcile/ocp_release_mirror.py +0 -373
  689. reconcile/prometheus_rules_tester_old.py +0 -436
  690. reconcile/saas_auto_promotions_manager/merge_request_manager/merge_request_manager.py +0 -279
  691. reconcile/saas_auto_promotions_manager/utils/vcs.py +0 -141
  692. reconcile/sentry_config.py +0 -613
  693. reconcile/sentry_helper.py +0 -69
  694. reconcile/test/conftest.py +0 -187
  695. reconcile/test/fixtures.py +0 -24
  696. reconcile/test/saas_auto_promotions_manager/conftest.py +0 -69
  697. reconcile/test/saas_auto_promotions_manager/merge_request_manager/merge_request_manager/conftest.py +0 -110
  698. reconcile/test/saas_auto_promotions_manager/merge_request_manager/merge_request_manager/data_keys.py +0 -10
  699. reconcile/test/saas_auto_promotions_manager/merge_request_manager/merge_request_manager/test_housekeeping.py +0 -200
  700. reconcile/test/saas_auto_promotions_manager/merge_request_manager/merge_request_manager/test_merge_request_manager.py +0 -151
  701. reconcile/test/saas_auto_promotions_manager/merge_request_manager/renderer/conftest.py +0 -63
  702. reconcile/test/saas_auto_promotions_manager/merge_request_manager/renderer/data_keys.py +0 -4
  703. reconcile/test/saas_auto_promotions_manager/merge_request_manager/renderer/test_content_multiple_namespaces.py +0 -46
  704. reconcile/test/saas_auto_promotions_manager/merge_request_manager/renderer/test_content_single_namespace.py +0 -94
  705. reconcile/test/saas_auto_promotions_manager/merge_request_manager/renderer/test_content_single_target.py +0 -44
  706. reconcile/test/saas_auto_promotions_manager/subscriber/conftest.py +0 -74
  707. reconcile/test/saas_auto_promotions_manager/subscriber/data_keys.py +0 -11
  708. reconcile/test/saas_auto_promotions_manager/subscriber/test_content_hash.py +0 -155
  709. reconcile/test/saas_auto_promotions_manager/subscriber/test_diff.py +0 -173
  710. reconcile/test/saas_auto_promotions_manager/subscriber/test_multiple_channels_config_hash.py +0 -226
  711. reconcile/test/saas_auto_promotions_manager/subscriber/test_multiple_channels_moving_ref.py +0 -224
  712. reconcile/test/saas_auto_promotions_manager/subscriber/test_single_channel_with_single_publisher.py +0 -350
  713. reconcile/test/saas_auto_promotions_manager/test_integration_test.py +0 -129
  714. reconcile/test/saas_auto_promotions_manager/utils/saas_files_inventory/test_multiple_publishers_for_single_channel.py +0 -70
  715. reconcile/test/saas_auto_promotions_manager/utils/saas_files_inventory/test_saas_files_use_target_config_hash.py +0 -63
  716. reconcile/test/saas_auto_promotions_manager/utils/saas_files_inventory/test_saas_files_with_auto_promote.py +0 -74
  717. reconcile/test/saas_auto_promotions_manager/utils/saas_files_inventory/test_saas_files_without_auto_promote.py +0 -65
  718. reconcile/test/test_aggregated_list.py +0 -237
  719. reconcile/test/test_amtool.py +0 -37
  720. reconcile/test/test_auto_promoter.py +0 -295
  721. reconcile/test/test_aws_ami_share.py +0 -68
  722. reconcile/test/test_aws_iam_keys.py +0 -70
  723. reconcile/test/test_aws_iam_password_reset.py +0 -35
  724. reconcile/test/test_aws_support_cases_sos.py +0 -23
  725. reconcile/test/test_checkpoint.py +0 -178
  726. reconcile/test/test_cli.py +0 -41
  727. reconcile/test/test_closedbox_endpoint_monitoring.py +0 -207
  728. reconcile/test/test_gabi_authorized_users.py +0 -72
  729. reconcile/test/test_github_org.py +0 -154
  730. reconcile/test/test_github_repo_invites.py +0 -123
  731. reconcile/test/test_gitlab_housekeeping.py +0 -88
  732. reconcile/test/test_gitlab_labeler.py +0 -129
  733. reconcile/test/test_gitlab_members.py +0 -283
  734. reconcile/test/test_instrumented_wrappers.py +0 -18
  735. reconcile/test/test_integrations_manager.py +0 -995
  736. reconcile/test/test_jenkins_worker_fleets.py +0 -55
  737. reconcile/test/test_jump_host.py +0 -117
  738. reconcile/test/test_ldap_users.py +0 -123
  739. reconcile/test/test_make.py +0 -28
  740. reconcile/test/test_ocm_additional_routers.py +0 -134
  741. reconcile/test/test_ocm_addons_upgrade_scheduler_org.py +0 -149
  742. reconcile/test/test_ocm_clusters.py +0 -598
  743. reconcile/test/test_ocm_clusters_manifest_updates.py +0 -89
  744. reconcile/test/test_ocm_oidc_idp.py +0 -315
  745. reconcile/test/test_ocm_update_recommended_version.py +0 -145
  746. reconcile/test/test_ocm_upgrade_scheduler.py +0 -614
  747. reconcile/test/test_ocm_upgrade_scheduler_org_updater.py +0 -129
  748. reconcile/test/test_openshift_base.py +0 -730
  749. reconcile/test/test_openshift_namespace_labels.py +0 -345
  750. reconcile/test/test_openshift_namespaces.py +0 -256
  751. reconcile/test/test_openshift_resource.py +0 -415
  752. reconcile/test/test_openshift_resources_base.py +0 -440
  753. reconcile/test/test_openshift_saas_deploy_change_tester.py +0 -310
  754. reconcile/test/test_openshift_tekton_resources.py +0 -253
  755. reconcile/test/test_openshift_upgrade_watcher.py +0 -146
  756. reconcile/test/test_prometheus_rules_tester.py +0 -151
  757. reconcile/test/test_prometheus_rules_tester_old.py +0 -77
  758. reconcile/test/test_quay_membership.py +0 -86
  759. reconcile/test/test_quay_mirror.py +0 -109
  760. reconcile/test/test_quay_mirror_org.py +0 -70
  761. reconcile/test/test_quay_repos.py +0 -59
  762. reconcile/test/test_queries.py +0 -53
  763. reconcile/test/test_repo_owners.py +0 -47
  764. reconcile/test/test_requests_sender.py +0 -139
  765. reconcile/test/test_saasherder.py +0 -1074
  766. reconcile/test/test_saasherder_allowed_secret_paths.py +0 -127
  767. reconcile/test/test_secret_reader.py +0 -153
  768. reconcile/test/test_slack_base.py +0 -185
  769. reconcile/test/test_slack_usergroups.py +0 -744
  770. reconcile/test/test_sql_query.py +0 -19
  771. reconcile/test/test_terraform_cloudflare_dns.py +0 -117
  772. reconcile/test/test_terraform_cloudflare_resources.py +0 -106
  773. reconcile/test/test_terraform_cloudflare_users.py +0 -749
  774. reconcile/test/test_terraform_resources.py +0 -257
  775. reconcile/test/test_terraform_tgw_attachments.py +0 -631
  776. reconcile/test/test_terraform_users.py +0 -57
  777. reconcile/test/test_terraform_vpc_peerings.py +0 -499
  778. reconcile/test/test_terraform_vpc_peerings_build_desired_state.py +0 -1061
  779. reconcile/test/test_unleash.py +0 -138
  780. reconcile/test/test_utils_aws_api.py +0 -240
  781. reconcile/test/test_utils_aws_helper.py +0 -80
  782. reconcile/test/test_utils_cluster_version_data.py +0 -177
  783. reconcile/test/test_utils_data_structures.py +0 -13
  784. reconcile/test/test_utils_disabled_integrations.py +0 -86
  785. reconcile/test/test_utils_expiration.py +0 -109
  786. reconcile/test/test_utils_external_resource_spec.py +0 -383
  787. reconcile/test/test_utils_external_resources.py +0 -247
  788. reconcile/test/test_utils_github_api.py +0 -73
  789. reconcile/test/test_utils_gitlab_api.py +0 -20
  790. reconcile/test/test_utils_gpg.py +0 -69
  791. reconcile/test/test_utils_gql.py +0 -81
  792. reconcile/test/test_utils_helm.py +0 -306
  793. reconcile/test/test_utils_helpers.py +0 -55
  794. reconcile/test/test_utils_imap_client.py +0 -65
  795. reconcile/test/test_utils_jjb_client.py +0 -52
  796. reconcile/test/test_utils_jsonpath.py +0 -286
  797. reconcile/test/test_utils_ldap_client.py +0 -51
  798. reconcile/test/test_utils_mr.py +0 -226
  799. reconcile/test/test_utils_mr_clusters_updates.py +0 -77
  800. reconcile/test/test_utils_oc.py +0 -984
  801. reconcile/test/test_utils_ocm.py +0 -110
  802. reconcile/test/test_utils_pagerduty_api.py +0 -251
  803. reconcile/test/test_utils_parse_dhms_duration.py +0 -34
  804. reconcile/test/test_utils_password_validator.py +0 -155
  805. reconcile/test/test_utils_quay_api.py +0 -86
  806. reconcile/test/test_utils_semver_helper.py +0 -19
  807. reconcile/test/test_utils_sharding.py +0 -56
  808. reconcile/test/test_utils_slack_api.py +0 -439
  809. reconcile/test/test_utils_smtp_client.py +0 -73
  810. reconcile/test/test_utils_state.py +0 -256
  811. reconcile/test/test_utils_terraform.py +0 -13
  812. reconcile/test/test_utils_terraform_client.py +0 -585
  813. reconcile/test/test_utils_terraform_config_client.py +0 -219
  814. reconcile/test/test_utils_terrascript_aws_client.py +0 -277
  815. reconcile/test/test_utils_terrascript_cloudflare_client.py +0 -597
  816. reconcile/test/test_utils_terrascript_cloudflare_resources.py +0 -26
  817. reconcile/test/test_vault_replication.py +0 -515
  818. reconcile/test/test_vault_utils.py +0 -47
  819. reconcile/test/test_version_bump.py +0 -18
  820. reconcile/test/test_vpc_peerings_validator.py +0 -103
  821. reconcile/test/test_wrong_region.py +0 -78
  822. reconcile/typed_queries/glitchtip_settings.py +0 -18
  823. reconcile/typed_queries/ocp_release_mirror.py +0 -11
  824. reconcile/unleash_watcher.py +0 -120
  825. reconcile/utils/git_secrets.py +0 -63
  826. reconcile/utils/mr/auto_promoter.py +0 -218
  827. reconcile/utils/sentry_client.py +0 -383
  828. release/test_version.py +0 -50
  829. release/version.py +0 -100
  830. tools/test/test_qontract_cli.py +0 -60
  831. tools/test/test_sre_checkpoints.py +0 -79
  832. /e2e_tests/__init__.py → /reconcile/aus/upgrades.py +0 -0
  833. /reconcile/{gql_definitions/ocp_release_mirror → aws_account_manager}/__init__.py +0 -0
  834. /reconcile/{test → aws_ami_cleanup}/__init__.py +0 -0
  835. /reconcile/{test/saas_auto_promotions_manager → aws_cloudwatch_log_retention}/__init__.py +0 -0
  836. /reconcile/{test/saas_auto_promotions_manager/merge_request_manager → aws_saml_idp}/__init__.py +0 -0
  837. /reconcile/{test/saas_auto_promotions_manager/merge_request_manager/merge_request_manager → aws_saml_roles}/__init__.py +0 -0
  838. /reconcile/{test/saas_auto_promotions_manager/merge_request_manager/renderer → aws_version_sync}/__init__.py +0 -0
  839. /reconcile/{test/saas_auto_promotions_manager/subscriber → aws_version_sync/merge_request_manager}/__init__.py +0 -0
  840. /reconcile/{test/saas_auto_promotions_manager/utils → cluster_auth_rhidp}/__init__.py +0 -0
  841. /reconcile/{test/saas_auto_promotions_manager/utils/saas_files_inventory → dynatrace_token_provider}/__init__.py +0 -0
  842. {release → reconcile/endpoints_discovery}/__init__.py +0 -0
  843. {tools/test → reconcile/external_resources}/__init__.py +0 -0
reconcile/aus/base.py CHANGED
@@ -1,166 +1,637 @@
1
- import copy
1
+ import datetime as dt
2
2
  import logging
3
3
  import sys
4
- from abc import abstractmethod
5
- from collections.abc import (
6
- Iterable,
7
- Mapping,
4
+ from abc import (
5
+ ABC,
6
+ abstractmethod,
7
+ )
8
+ from collections.abc import Callable, Sequence
9
+ from datetime import (
10
+ datetime,
11
+ timedelta,
8
12
  )
9
- from datetime import datetime
10
13
  from typing import (
11
- Any,
12
- Callable,
13
- Optional,
14
+ Protocol,
15
+ cast,
14
16
  )
15
17
 
16
18
  from croniter import croniter
19
+ from pydantic import BaseModel, Extra
17
20
  from semver import VersionInfo
18
21
 
19
- from reconcile.aus.models import OrganizationUpgradeSpec
22
+ from reconcile.aus.cluster_version_data import (
23
+ VersionData,
24
+ VersionDataMap,
25
+ WorkloadHistory,
26
+ get_version_data,
27
+ )
28
+ from reconcile.aus.metrics import (
29
+ CLUSTER_HEALTH_HEALTHY_METRIC_VALUE,
30
+ CLUSTER_HEALTH_UNHEALTHY_METRIC_VALUE,
31
+ UPGRADE_BLOCKED_METRIC_VALUE,
32
+ UPGRADE_LONG_RUNNING_METRIC_VALUE,
33
+ UPGRADE_SCHEDULED_METRIC_VALUE,
34
+ UPGRADE_STARTED_METRIC_VALUE,
35
+ AUSClusterHealthStateGauge,
36
+ AUSClusterUpgradePolicyInfoMetric,
37
+ AUSOCMEnvironmentError,
38
+ AUSOrganizationErrorRate,
39
+ AUSOrganizationValidationErrorsGauge,
40
+ )
41
+ from reconcile.aus.models import (
42
+ ClusterAddonUpgradeSpec,
43
+ ClusterUpgradeSpec,
44
+ OrganizationUpgradeSpec,
45
+ Sector,
46
+ )
47
+ from reconcile.aus.version_gates import HANDLERS
48
+ from reconcile.gql_definitions.advanced_upgrade_service.aus_organization import (
49
+ query as aus_organizations_query,
50
+ )
51
+ from reconcile.gql_definitions.common.ocm_env_telemeter import (
52
+ query as ocm_env_telemeter_query,
53
+ )
20
54
  from reconcile.gql_definitions.common.ocm_environments import (
21
55
  query as ocm_environment_query,
22
56
  )
57
+ from reconcile.gql_definitions.fragments.aus_organization import AUSOCMOrganization
23
58
  from reconcile.gql_definitions.fragments.ocm_environment import OCMEnvironment
24
- from reconcile.utils import gql
25
- from reconcile.utils.cluster_version_data import (
26
- VersionData,
27
- WorkloadHistory,
28
- get_version_data,
59
+ from reconcile.gql_definitions.fragments.upgrade_policy import ClusterUpgradePolicyV1
60
+ from reconcile.utils import (
61
+ gql,
62
+ metrics,
63
+ )
64
+ from reconcile.utils.clusterhealth.providerbase import (
65
+ ClusterHealthProvider,
66
+ )
67
+ from reconcile.utils.clusterhealth.telemeter import (
68
+ TELEMETER_SOURCE,
69
+ TelemeterClusterHealthProvider,
29
70
  )
30
71
  from reconcile.utils.defer import defer
31
- from reconcile.utils.ocm import (
32
- OCM,
33
- OCMMap,
34
- Sector,
72
+ from reconcile.utils.disabled_integrations import integration_is_enabled
73
+ from reconcile.utils.filtering import remove_none_values_from_dict
74
+ from reconcile.utils.ocm.addons import AddonService, AddonServiceV1, AddonServiceV2
75
+ from reconcile.utils.ocm.clusters import (
76
+ OCMCluster,
77
+ )
78
+ from reconcile.utils.ocm.upgrades import (
79
+ OCMVersionGate,
80
+ create_control_plane_upgrade_policy,
81
+ create_node_pool_upgrade_policy,
82
+ create_upgrade_policy,
83
+ delete_control_plane_upgrade_policy,
84
+ delete_upgrade_policy,
85
+ get_control_plane_upgrade_policies,
86
+ get_node_pool_upgrade_policies,
87
+ get_upgrade_policies,
88
+ get_version_agreement,
89
+ get_version_gates,
90
+ )
91
+ from reconcile.utils.ocm_base_client import OCMBaseClient
92
+ from reconcile.utils.prometheus import (
93
+ init_prometheus_http_querier_from_prometheus_instance,
35
94
  )
36
95
  from reconcile.utils.runtime.integration import (
37
96
  PydanticRunParams,
38
97
  QontractReconcileIntegration,
39
98
  )
40
99
  from reconcile.utils.semver_helper import (
100
+ get_version_prefix,
41
101
  parse_semver,
42
102
  sort_versions,
43
103
  )
44
104
  from reconcile.utils.state import init_state
45
105
 
106
+ MIN_DELTA_MINUTES = 6
107
+
46
108
 
47
109
  class AdvancedUpgradeSchedulerBaseIntegrationParams(PydanticRunParams):
110
+ ocm_environment: str | None = None
111
+ ocm_organization_ids: set[str] | None = None
112
+ excluded_ocm_organization_ids: set[str] | None = None
113
+ ignore_sts_clusters: bool = False
48
114
 
49
- ocm_environment: Optional[str] = None
50
- ocm_organization: Optional[str] = None
115
+
116
+ class ReconcileErrorSummary(Exception):
117
+ def __init__(self, exceptions: list[str]) -> None:
118
+ self.exceptions = exceptions
119
+
120
+ def __str__(self) -> str:
121
+ formatted_exceptions = "\n".join([f"- {e}" for e in self.exceptions])
122
+ return f"Reconcile exceptions:\n{formatted_exceptions}"
51
123
 
52
124
 
53
125
  class AdvancedUpgradeSchedulerBaseIntegration(
54
126
  QontractReconcileIntegration[AdvancedUpgradeSchedulerBaseIntegrationParams]
55
127
  ):
56
128
  def run(self, dry_run: bool) -> None:
57
- upgrade_specs = self.get_upgrade_specs()
58
- for ocm_env, env_upgrade_specs in upgrade_specs.items():
59
- for org_name, org_upgrade_spec in env_upgrade_specs.items():
60
- if org_upgrade_spec.specs:
61
- self.process_upgrade_policies_in_org(dry_run, org_upgrade_spec)
62
- else:
63
- logging.debug(
64
- f"Skip org {org_name} in {ocm_env} because it defines no upgrade policies"
65
- )
129
+ with metrics.transactional_metrics(self.name):
130
+ upgrade_specs = self.get_upgrade_specs()
131
+ unhandled_exceptions = []
132
+ for ocm_env, env_upgrade_specs in upgrade_specs.items():
133
+ for org_upgrade_spec in env_upgrade_specs.values():
134
+ try:
135
+ with AUSOrganizationErrorRate(
136
+ integration=self.name,
137
+ ocm_env=ocm_env,
138
+ org_id=org_upgrade_spec.org.org_id,
139
+ ):
140
+ self.process_org(dry_run, ocm_env, org_upgrade_spec)
141
+ except Exception as e:
142
+ if not self.signal_reconcile_issues(
143
+ dry_run, org_upgrade_spec, e
144
+ ):
145
+ unhandled_exceptions.append(
146
+ f"{ocm_env}/{org_upgrade_spec.org.name}: {e}"
147
+ )
148
+
149
+ if unhandled_exceptions:
150
+ raise ReconcileErrorSummary(unhandled_exceptions)
66
151
  sys.exit(0)
67
152
 
68
- def get_upgrade_specs(self) -> dict[str, dict[str, OrganizationUpgradeSpec]]:
69
- return {
70
- ocm_env.name: self.get_ocm_env_upgrade_specs(
71
- ocm_env,
72
- self.params.ocm_organization,
153
+ def get_orgs_for_environment(
154
+ self, ocm_env: OCMEnvironment, only_addon_managed_upgrades: bool = False
155
+ ) -> list[AUSOCMOrganization]:
156
+ return get_orgs_for_environment(
157
+ integration=self.name,
158
+ ocm_env_name=ocm_env.name,
159
+ query_func=gql.get_api().query,
160
+ ocm_organization_ids=self.params.ocm_organization_ids,
161
+ excluded_ocm_organization_ids=self.params.excluded_ocm_organization_ids,
162
+ only_addon_managed_upgrades=only_addon_managed_upgrades,
163
+ )
164
+
165
+ def process_org(
166
+ self, dry_run: bool, ocm_env: str, org_upgrade_spec: OrganizationUpgradeSpec
167
+ ) -> None:
168
+ org_name = org_upgrade_spec.org.name
169
+ self.expose_org_upgrade_spec_metrics(ocm_env, org_upgrade_spec)
170
+ if org_upgrade_spec.has_validation_errors:
171
+ self.signal_validation_issues(dry_run, org_upgrade_spec)
172
+ elif org_upgrade_spec.specs:
173
+ self.process_upgrade_policies_in_org(dry_run, org_upgrade_spec)
174
+ else:
175
+ logging.debug(
176
+ f"Skip org {org_upgrade_spec.org.org_id}/{org_name} in {ocm_env} because it defines no upgrade policies"
73
177
  )
74
- for ocm_env in self.get_ocm_environments()
75
- }
76
178
 
77
- def get_ocm_environments(self) -> list[OCMEnvironment]:
179
+ def get_upgrade_specs(self) -> dict[str, dict[str, OrganizationUpgradeSpec]]:
180
+ envs_org_upgrade_specs: dict[str, dict[str, OrganizationUpgradeSpec]] = {}
181
+ for ocm_env in self.get_ocm_environments():
182
+ try:
183
+ envs_org_upgrade_specs[ocm_env.name] = self.get_ocm_env_upgrade_specs(
184
+ ocm_env=ocm_env
185
+ )
186
+ except Exception as e:
187
+ logging.exception(
188
+ "Failed to get org upgrade specs for OCM environment %s. Skipping. %s",
189
+ ocm_env.name,
190
+ e,
191
+ )
192
+ metrics.inc_counter(
193
+ AUSOCMEnvironmentError(
194
+ integration=self.name,
195
+ ocm_env=ocm_env.name,
196
+ )
197
+ )
198
+ return envs_org_upgrade_specs
199
+
200
+ def get_ocm_environments(self, filter: bool = True) -> list[OCMEnvironment]:
78
201
  return ocm_environment_query(
79
202
  gql.get_api().query,
80
203
  variables={"name": self.params.ocm_environment}
81
- if self.params.ocm_environment
204
+ if self.params.ocm_environment and filter
82
205
  else None,
83
206
  ).environments
84
207
 
208
+ def expose_remaining_soak_day_metrics(
209
+ self,
210
+ org_upgrade_spec: OrganizationUpgradeSpec,
211
+ version_data: VersionData,
212
+ current_state: Sequence["AbstractUpgradePolicy"],
213
+ metrics_builder: "RemainingSoakDayMetricsBuilder",
214
+ ) -> None:
215
+ current_cluster_upgrade_policies = {
216
+ p.cluster.external_id: p for p in current_state
217
+ }
218
+ for spec in org_upgrade_spec.specs:
219
+ upgrades = spec.get_available_upgrades()
220
+ if not upgrades:
221
+ continue
222
+
223
+ # calculate the amount every version has soaked. if a version has soaked for
224
+ # multiple workloads, we will pick the minimum soak day value of all workloads
225
+ # relevant on the cluster.
226
+ soaked_versions: dict[str, float] = {}
227
+ for workload in spec.upgrade_policy.workloads:
228
+ for version, soak_days in soaking_days(
229
+ version_data, upgrades, workload, False
230
+ ).items():
231
+ soaked_versions[version] = min(
232
+ soak_days, soaked_versions.get(version, soak_days)
233
+ )
234
+
235
+ current_upgrade = current_cluster_upgrade_policies.get(spec.cluster_uuid)
236
+ for version, metric_value in remaining_soak_day_metric_values_for_cluster(
237
+ spec, soaked_versions, current_upgrade
238
+ ).items():
239
+ metrics.set_gauge(
240
+ metrics_builder(
241
+ cluster_uuid=spec.cluster.external_id, soaking_version=version
242
+ ),
243
+ metric_value,
244
+ )
245
+
85
246
  @abstractmethod
86
247
  def process_upgrade_policies_in_org(
87
248
  self, dry_run: bool, org_upgrade_spec: OrganizationUpgradeSpec
88
- ) -> None:
89
- ...
249
+ ) -> None: ...
90
250
 
91
251
  @abstractmethod
92
252
  def get_ocm_env_upgrade_specs(
93
- self, ocm_env: OCMEnvironment, org_name: Optional[str] = None
94
- ) -> dict[str, OrganizationUpgradeSpec]:
95
- ...
253
+ self, ocm_env: OCMEnvironment
254
+ ) -> dict[str, OrganizationUpgradeSpec]: ...
96
255
 
256
+ def signal_validation_issues(
257
+ self, dry_run: bool, org_upgrade_spec: OrganizationUpgradeSpec
258
+ ) -> None: ...
259
+
260
+ def signal_reconcile_issues(
261
+ self,
262
+ dry_run: bool,
263
+ org_upgrade_spec: OrganizationUpgradeSpec,
264
+ exception: Exception,
265
+ ) -> bool:
266
+ """
267
+ The bool return value is used to indicate if the exception was properly handled.
268
+
269
+ The default behaviour returns False, indicating that the exception was not
270
+ handled so that it can bubble up and potentially fail the integration.
271
+
272
+ This function can be overridden to handle exceptions in a custom way.
273
+ """
274
+ return False
275
+
276
+ def expose_org_upgrade_spec_metrics(
277
+ self, ocm_env: str, org_upgrade_spec: OrganizationUpgradeSpec
278
+ ) -> None:
279
+ metrics.set_gauge(
280
+ AUSOrganizationValidationErrorsGauge(
281
+ integration=self.name,
282
+ ocm_env=ocm_env,
283
+ org_id=org_upgrade_spec.org.org_id,
284
+ ),
285
+ org_upgrade_spec.nr_of_validation_errors,
286
+ )
287
+ for cluster_upgrade_spec in org_upgrade_spec.specs:
288
+ mutexes = cluster_upgrade_spec.upgrade_policy.conditions.mutexes
289
+ metrics.set_info(
290
+ AUSClusterUpgradePolicyInfoMetric(
291
+ integration=self.name,
292
+ ocm_env=ocm_env,
293
+ cluster_uuid=cluster_upgrade_spec.cluster_uuid,
294
+ org_id=cluster_upgrade_spec.org.org_id,
295
+ org_name=org_upgrade_spec.org.name,
296
+ channel=cluster_upgrade_spec.cluster.version.channel_group,
297
+ current_version=cluster_upgrade_spec.oldest_current_version,
298
+ cluster_name=cluster_upgrade_spec.name,
299
+ schedule=cluster_upgrade_spec.upgrade_policy.schedule,
300
+ sector=cluster_upgrade_spec.upgrade_policy.conditions.sector or "",
301
+ mutexes=",".join(mutexes) if mutexes else "",
302
+ soak_days=str(
303
+ cluster_upgrade_spec.upgrade_policy.conditions.soak_days or 0
304
+ ),
305
+ workloads=",".join(cluster_upgrade_spec.upgrade_policy.workloads),
306
+ product=cluster_upgrade_spec.cluster.product.id,
307
+ hypershift=cluster_upgrade_spec.cluster.hypershift.enabled,
308
+ ),
309
+ )
310
+ for (
311
+ source,
312
+ has_health_error,
313
+ ) in cluster_upgrade_spec.health.health_errors_by_source().items():
314
+ metrics.set_gauge(
315
+ AUSClusterHealthStateGauge(
316
+ integration=self.name,
317
+ ocm_env=ocm_env,
318
+ health_source=source,
319
+ cluster_uuid=cluster_upgrade_spec.cluster_uuid,
320
+ ),
321
+ CLUSTER_HEALTH_UNHEALTHY_METRIC_VALUE
322
+ if has_health_error
323
+ else CLUSTER_HEALTH_HEALTHY_METRIC_VALUE,
324
+ )
97
325
 
98
- # consider first lower versions and lower soakdays (when versions are equal)
99
- def sort_key(d: dict) -> tuple:
100
- return (
101
- parse_semver(d["current_version"]),
102
- d["conditions"].get("soakDays") or 0,
103
- )
326
+ def _health_check_providers_for_env(
327
+ self, ocm_env_name: str
328
+ ) -> dict[str, ClusterHealthProvider]:
329
+ providers: dict[str, ClusterHealthProvider] = {}
330
+ telemeter_provider = self._build_telemeter_health_check_provider_for_env(
331
+ ocm_env_name
332
+ )
333
+ if telemeter_provider:
334
+ providers[TELEMETER_SOURCE] = telemeter_provider
335
+ return providers
336
+
337
+ def _build_telemeter_health_check_provider_for_env(
338
+ self,
339
+ ocm_env_name: str,
340
+ ) -> TelemeterClusterHealthProvider | None:
341
+ ocm_env = next(
342
+ iter(
343
+ ocm_env_telemeter_query(
344
+ gql.get_api().query, variables={"name": ocm_env_name}
345
+ ).ocm_envs
346
+ ),
347
+ None,
348
+ )
349
+
350
+ if ocm_env and ocm_env.telemeter:
351
+ return TelemeterClusterHealthProvider(
352
+ querier=init_prometheus_http_querier_from_prometheus_instance(
353
+ prometheus=ocm_env.telemeter,
354
+ secret_reader=self.secret_reader,
355
+ )
356
+ )
104
357
 
358
+ return None
105
359
 
106
- def fetch_current_state(
107
- clusters: list[dict[str, Any]], ocm_map: OCMMap, addons: bool = False
108
- ) -> list[dict[str, Any]]:
109
- current_state = []
110
- for cluster in clusters:
111
- cluster_name = cluster["name"]
112
- ocm = ocm_map.get(cluster_name)
113
- if addons:
114
- upgrade_policies = ocm.get_addon_upgrade_policies(cluster_name)
115
- else:
116
- upgrade_policies = ocm.get_upgrade_policies(cluster_name)
117
- for upgrade_policy in upgrade_policies:
118
- upgrade_policy["cluster"] = cluster_name
119
- current_state.append(upgrade_policy)
120
360
 
121
- return current_state
361
+ def init_addon_service(ocm_env: OCMEnvironment) -> AddonService:
362
+ """
363
+ Initialize the right version of addon-service for an OCM environment.
364
+ Since this is just temporary until all OCM environments are on v2, we
365
+ use a label on the OCM environmentschema to determine which version to use.
366
+ """
367
+ addon_service_version = (ocm_env.labels or {}).get(
368
+ "feature_flag_addon_service_version"
369
+ ) or "v2"
370
+ return init_addon_service_version(addon_service_version)
122
371
 
123
372
 
124
- def fetch_desired_state(
125
- clusters: list[dict[str, Any]], ocm_map: OCMMap, addons: bool = False
126
- ) -> list[dict[str, Any]]:
127
- desired_state = []
128
- for cluster in clusters:
129
- cluster_name = cluster["name"]
130
- upgrade_policy = cluster["upgradePolicy"]
131
- upgrade_policy["cluster"] = cluster_name
132
- ocm: OCM = ocm_map.get(cluster_name)
133
- if not ocm.is_ready(cluster_name):
134
- # cluster has been deleted in OCM or is not ready yet
135
- continue
136
- # Replace sector names by their related OCM Sector object, including dependencies
137
- sector_name = upgrade_policy["conditions"].get("sector")
138
- if sector_name:
139
- upgrade_policy["conditions"]["sector"] = ocm.sectors[sector_name]
140
-
141
- if addons:
142
- cluster_addons = ocm.get_cluster_addons(cluster_name, with_version=True)
143
- for addon in cluster_addons:
144
- policy = copy.deepcopy(upgrade_policy)
145
- policy["addon_id"] = addon["id"]
146
- policy["current_version"] = addon["version"]
147
- desired_state.append(policy)
148
- else:
149
- spec = ocm.clusters[cluster_name].spec
150
- upgrade_policy["current_version"] = spec.version
151
- upgrade_policy["channel"] = spec.channel
152
- upgrade_policy["available_upgrades"] = ocm.available_cluster_upgrades.get(
153
- cluster_name
373
+ def init_addon_service_version(addon_service_version: str) -> AddonService:
374
+ """
375
+ Initialize the right version of addon-service based on the version string.
376
+ Supported versions are:
377
+ - v1: part of CS
378
+ - v2: standalone service using upgrade-plans instead of upgrade-policies
379
+ """
380
+ match addon_service_version:
381
+ case "v1":
382
+ return AddonServiceV1()
383
+ case "v2":
384
+ return AddonServiceV2()
385
+ case _:
386
+ raise ValueError(f"Unknown addon service version: {addon_service_version}")
387
+
388
+
389
+ class RemainingSoakDayMetricsBuilder(Protocol):
390
+ def __call__(
391
+ self, cluster_uuid: str, soaking_version: str
392
+ ) -> metrics.GaugeMetric: ...
393
+
394
+
395
+ class AbstractUpgradePolicy(ABC, BaseModel):
396
+ """Abstract class for upgrade policies
397
+ Used to create and delete upgrade policies in OCM."""
398
+
399
+ cluster: OCMCluster
400
+
401
+ id: str | None
402
+ next_run: str | None
403
+ schedule: str | None
404
+ schedule_type: str
405
+ version: str
406
+ state: str | None
407
+
408
+ @abstractmethod
409
+ def create(self, ocm_api: OCMBaseClient) -> None:
410
+ pass
411
+
412
+ @abstractmethod
413
+ def delete(self, ocm_api: OCMBaseClient) -> None:
414
+ pass
415
+
416
+ @abstractmethod
417
+ def summarize(self) -> str:
418
+ pass
419
+
420
+
421
+ def addon_upgrade_policy_soonest_next_run() -> str:
422
+ now = datetime.now(tz=dt.UTC)
423
+ next_run = now + timedelta(minutes=MIN_DELTA_MINUTES)
424
+ return next_run.strftime("%Y-%m-%dT%H:%M:%SZ")
425
+
426
+
427
+ class AddonUpgradePolicy(AbstractUpgradePolicy):
428
+ """Class to create and delete Addon upgrade policies in OCM"""
429
+
430
+ addon_id: str
431
+ addon_service: AddonService
432
+
433
+ class Config:
434
+ arbitrary_types_allowed = True
435
+
436
+ def create(self, ocm_api: OCMBaseClient) -> None:
437
+ self.addon_service.create_addon_upgrade_policy(
438
+ ocm_api=ocm_api,
439
+ cluster_id=self.cluster.id,
440
+ addon_id=self.addon_id,
441
+ schedule_type="manual",
442
+ version=self.version,
443
+ next_run=self.next_run or addon_upgrade_policy_soonest_next_run(),
444
+ )
445
+
446
+ def delete(self, ocm_api: OCMBaseClient) -> None:
447
+ if not self.id:
448
+ raise ValueError(
449
+ "Cannot delete addon upgrade policy without id (not created yet)"
154
450
  )
155
- desired_state.append(upgrade_policy)
451
+ self.addon_service.delete_addon_upgrade_policy(
452
+ ocm_api=ocm_api, cluster_id=self.cluster.id, policy_id=self.id
453
+ )
454
+
455
+ def summarize(self) -> str:
456
+ details = {
457
+ "cluster": self.cluster.name,
458
+ "cluster_id": self.cluster.id,
459
+ "version": self.version,
460
+ "next_run": self.next_run,
461
+ "addon_id": self.addon_id,
462
+ }
463
+ return f"addon upgrade policy - {remove_none_values_from_dict(details)}"
464
+
156
465
 
157
- sorted_desired_state = sorted(desired_state, key=sort_key)
466
+ class ClusterUpgradePolicy(AbstractUpgradePolicy):
467
+ """Class to create and delete ClusterUpgradePolicies in OCM"""
158
468
 
159
- return sorted_desired_state
469
+ def create(self, ocm_api: OCMBaseClient) -> None:
470
+ policy = {
471
+ "version": self.version,
472
+ "schedule_type": "manual",
473
+ "next_run": self.next_run,
474
+ }
475
+ create_upgrade_policy(ocm_api, self.cluster.id, policy)
476
+
477
+ def delete(self, ocm_api: OCMBaseClient) -> None:
478
+ if not self.id:
479
+ raise ValueError(
480
+ "Cannot delete cluster upgrade policy without id (not created yet)"
481
+ )
482
+ delete_upgrade_policy(ocm_api, self.cluster.id, self.id)
483
+
484
+ def summarize(self) -> str:
485
+ details = {
486
+ "cluster": self.cluster.name,
487
+ "cluster_id": self.cluster.id,
488
+ "from_version": self.cluster.version.raw_id,
489
+ "to_version": self.version,
490
+ "next_run": self.next_run,
491
+ }
492
+ return f"cluster upgrade policy - {remove_none_values_from_dict(details)}"
493
+
494
+
495
+ class ControlPlaneUpgradePolicy(AbstractUpgradePolicy):
496
+ """Class to create and delete ControlPlanUpgradePolicies in OCM"""
497
+
498
+ def create(self, ocm_api: OCMBaseClient) -> None:
499
+ policy = {
500
+ "version": self.version,
501
+ "schedule_type": "manual",
502
+ "upgrade_type": "ControlPlane",
503
+ "cluster_id": self.cluster.id,
504
+ "next_run": self.next_run,
505
+ }
506
+ create_control_plane_upgrade_policy(ocm_api, self.cluster.id, policy)
507
+
508
+ def delete(self, ocm_api: OCMBaseClient) -> None:
509
+ if not self.id:
510
+ raise ValueError(
511
+ "Cannot delete controlplane upgrade policy without id (not created yet)"
512
+ )
513
+ delete_control_plane_upgrade_policy(ocm_api, self.cluster.id, self.id)
514
+
515
+ def summarize(self) -> str:
516
+ details = {
517
+ "cluster": self.cluster.name,
518
+ "cluster_id": self.cluster.id,
519
+ "version": self.version,
520
+ "next_run": self.next_run,
521
+ }
522
+ return f"cluster upgrade policy - {remove_none_values_from_dict(details)}"
523
+
524
+
525
+ class NodePoolUpgradePolicy(AbstractUpgradePolicy):
526
+ node_pool: str
527
+ """Class to create and delete NodePoolUpgradePolicies in OCM"""
528
+
529
+ def create(self, ocm_api: OCMBaseClient) -> None:
530
+ policy = {
531
+ "version": self.version,
532
+ "schedule_type": "manual",
533
+ "upgrade_type": "NodePool",
534
+ "cluster_id": self.cluster.id,
535
+ "next_run": self.next_run,
536
+ }
537
+ create_node_pool_upgrade_policy(
538
+ ocm_api, self.cluster.id, self.node_pool, policy
539
+ )
540
+
541
+ def delete(self, ocm_api: OCMBaseClient) -> None:
542
+ raise NotImplementedError("NodePoolUpgradePolicy.delete() not implemented")
543
+
544
+ def summarize(self) -> str:
545
+ details = {
546
+ "cluster": self.cluster.name,
547
+ "cluster_id": self.cluster.id,
548
+ "node_pool": self.node_pool,
549
+ "version": self.version,
550
+ "next_run": self.next_run,
551
+ }
552
+ return f"node pool upgrade policy - {remove_none_values_from_dict(details)}"
553
+
554
+
555
+ class UpgradePolicyHandler(BaseModel, extra=Extra.forbid):
556
+ """Class to handle upgrade policy actions"""
557
+
558
+ action: str
559
+ policy: AbstractUpgradePolicy
560
+
561
+ def act(self, dry_run: bool, ocm_api: OCMBaseClient) -> None:
562
+ logging.info(f"{self.action} {self.policy.summarize()}")
563
+ if dry_run:
564
+ return
565
+
566
+ if not self.action:
567
+ pass
568
+ elif self.action == "delete":
569
+ self.policy.delete(ocm_api)
570
+ elif self.action == "create":
571
+ self.policy.create(ocm_api)
572
+
573
+
574
+ def fetch_current_state(
575
+ ocm_api: OCMBaseClient,
576
+ org_upgrade_spec: OrganizationUpgradeSpec,
577
+ addons: bool = False,
578
+ ) -> list[AbstractUpgradePolicy]:
579
+ current_state: list[AbstractUpgradePolicy] = []
580
+ addon_service = init_addon_service(org_upgrade_spec.org.environment)
581
+ for spec in org_upgrade_spec.specs:
582
+ if addons and isinstance(spec, ClusterAddonUpgradeSpec):
583
+ addon_spec = cast(ClusterAddonUpgradeSpec, spec)
584
+ addon_upgrade_policies = addon_service.get_addon_upgrade_policies(
585
+ ocm_api, spec.cluster.id, addon_id=addon_spec.addon.addon.id
586
+ )
587
+ for addon_upgrade_policy in addon_upgrade_policies:
588
+ current_state.append(
589
+ AddonUpgradePolicy(
590
+ id=addon_upgrade_policy.id,
591
+ addon_id=addon_spec.addon.addon.id,
592
+ cluster=spec.cluster,
593
+ next_run=addon_upgrade_policy.next_run,
594
+ schedule=addon_upgrade_policy.schedule,
595
+ schedule_type=addon_upgrade_policy.schedule_type,
596
+ version=addon_upgrade_policy.version,
597
+ state=addon_upgrade_policy.state,
598
+ addon_service=addon_service,
599
+ )
600
+ )
601
+ elif spec.cluster.is_rosa_hypershift():
602
+ upgrade_policies = get_control_plane_upgrade_policies(
603
+ ocm_api, spec.cluster.id
604
+ )
605
+ for upgrade_policy in upgrade_policies:
606
+ upgrade_policy["cluster"] = spec.cluster
607
+ current_state.append(ControlPlaneUpgradePolicy(**upgrade_policy))
608
+ for node_pool in spec.node_pools:
609
+ node_upgrade_policies = get_node_pool_upgrade_policies(
610
+ ocm_api, spec.cluster.id, node_pool.id
611
+ )
612
+ for upgrade_policy in node_upgrade_policies:
613
+ upgrade_policy["cluster"] = spec.cluster
614
+ upgrade_policy["node_pool"] = node_pool.id
615
+ current_state.append(NodePoolUpgradePolicy(**upgrade_policy))
616
+ else:
617
+ upgrade_policies = get_upgrade_policies(ocm_api, spec.cluster.id)
618
+ for upgrade_policy in upgrade_policies:
619
+ upgrade_policy["cluster"] = spec.cluster
620
+ current_state.append(ClusterUpgradePolicy(**upgrade_policy))
621
+
622
+ return current_state
623
+
624
+
625
+ # consider first lower versions and lower soakdays (when versions are equal)
626
+ def sort_key(spec: ClusterUpgradeSpec) -> tuple:
627
+ return (
628
+ parse_semver(spec.cluster.version.raw_id),
629
+ spec.upgrade_policy.conditions.soak_days or 0,
630
+ )
160
631
 
161
632
 
162
633
  def update_history(
163
- version_data: VersionData, upgrade_policies: list[dict[str, Any]]
634
+ version_data: VersionData, org_upgrade_spec: OrganizationUpgradeSpec
164
635
  ) -> None:
165
636
  """Update history with information from clusters with upgrade policies.
166
637
 
@@ -172,10 +643,21 @@ def update_history(
172
643
  check_in = version_data.check_in or now
173
644
 
174
645
  # we iterate over clusters upgrade policies and update the version history
175
- for item in upgrade_policies:
176
- current_version = item["current_version"]
177
- cluster = item["cluster"]
178
- workloads = item["workloads"]
646
+ for spec in org_upgrade_spec.specs:
647
+ # ... but we only care about healthy cluster
648
+ errors = spec.health.get_errors(only_enforced=True)
649
+ if errors:
650
+ logging.debug(
651
+ f"unhealthy cluster {spec.cluster.name} "
652
+ f"(id={spec.cluster.id}, org_id={spec.org.org_id}, org_name={spec.org.name}) "
653
+ f"will not contribute to soak days for {spec.cluster.version.raw_id} "
654
+ f"and workloads {spec.upgrade_policy.workloads}: "
655
+ f"{', '.join([e.error for e in errors])}"
656
+ )
657
+ continue
658
+ current_version = spec.current_version
659
+ cluster = spec.cluster.name
660
+ workloads = spec.upgrade_policy.workloads
179
661
  # we keep the version history per workload
180
662
  for w in workloads:
181
663
  workload_history = version_data.workload_history(
@@ -191,67 +673,83 @@ def update_history(
191
673
  else:
192
674
  workload_history.reporting.append(cluster)
193
675
 
194
- version_data.update_stats(upgrade_policies)
676
+ version_data.update_stats(org_upgrade_spec)
195
677
 
196
678
  version_data.check_in = now
197
679
 
198
680
 
681
+ def version_data_state_key(ocm_env: str, org_id: str, addon_id: str | None) -> str:
682
+ return f"{ocm_env}/{org_id}/{addon_id}" if addon_id else f"{ocm_env}/{org_id}"
683
+
684
+
199
685
  @defer
200
686
  def get_version_data_map(
201
687
  dry_run: bool,
202
- upgrade_policies: list[dict[str, Any]],
203
- ocm_map: OCMMap,
688
+ org_upgrade_spec: OrganizationUpgradeSpec,
204
689
  integration: str,
205
690
  addon_id: str = "",
206
- defer: Optional[Callable] = None,
207
- ) -> dict[str, VersionData]:
691
+ inherit_version_data: bool = True,
692
+ defer: Callable | None = None,
693
+ ) -> VersionDataMap:
208
694
  """Get a summary of versions history per OCM instance
209
695
 
210
696
  Args:
211
697
  dry_run (bool): save updated history to remote state
212
- upgrade_policies (list): query results of clusters upgrade policies
213
- ocm_map (OCMMap): OCM clients per OCM instance
698
+ org_upgrade_spec (OrganizationUpgradeSpec): organization upgrade spec
214
699
  addon_id (str): optional addon id to get & store the addon specific state,
215
700
  additionally to the ocm org name
701
+ inherit_version_data: whether to inherit version data from other OCM orgs
216
702
  defer (Optional<Callable>): defer function
217
703
 
218
704
  Returns:
219
- dict: version data per OCM instance
705
+ dict: version data per OCM organization keyed by the organization ID
220
706
  """
221
707
  state = init_state(integration=integration)
222
708
  if defer:
223
709
  defer(state.cleanup)
224
- results: dict[str, VersionData] = {}
225
- # we keep a remote state per OCM instance
226
- for ocm_name in ocm_map.instances():
227
- state_key = f"{ocm_name}/{addon_id}" if addon_id else ocm_name
228
- version_data = get_version_data(state, state_key)
229
- update_history(version_data, upgrade_policies)
230
- results[ocm_name] = version_data
231
- if not dry_run:
232
- version_data.save(state, state_key)
710
+ result = VersionDataMap()
711
+
712
+ # we keep a remote state per OCM org
713
+ state_key = version_data_state_key(
714
+ org_upgrade_spec.org.environment.name, org_upgrade_spec.org.org_id, addon_id
715
+ )
716
+ version_data = get_version_data(state, state_key)
717
+ update_history(version_data, org_upgrade_spec)
718
+ result.add(
719
+ org_upgrade_spec.org.environment.name, org_upgrade_spec.org.org_id, version_data
720
+ )
721
+ if not dry_run:
722
+ version_data.save(state, state_key)
233
723
 
234
724
  # aggregate data from other ocm orgs
235
725
  # this is done *after* saving the state: we do not store the other orgs data in our state.
236
- for ocm_name in ocm_map.instances():
237
- ocm = ocm_map[ocm_name]
238
- for other_ocm in ocm.inheritVersionData:
239
- other_ocm_name = other_ocm["name"]
240
- if ocm_name == other_ocm_name:
726
+ if inherit_version_data:
727
+ for other_ocm in org_upgrade_spec.org.inherit_version_data or []:
728
+ if org_upgrade_spec.org.org_id == other_ocm.org_id:
241
729
  raise ValueError(
242
- f"[{ocm_name}] OCM organization inherits version data from itself"
730
+ f"[{org_upgrade_spec.org.name} - {org_upgrade_spec.org.org_id}] OCM organization inherits version data from itself"
243
731
  )
244
- if ocm.name not in [
245
- o["name"] for o in other_ocm.get("publishVersionData") or []
732
+ if org_upgrade_spec.org.org_id not in [
733
+ o.org_id for o in other_ocm.publish_version_data or []
246
734
  ]:
247
735
  raise ValueError(
248
- f"[{ocm_name}] OCM organization inherits version data from {other_ocm_name}, but this data is not published to it: missing publishVersionData in {other_ocm_name}"
736
+ f"[{org_upgrade_spec.org.name} - {org_upgrade_spec.org.org_id}] OCM organization inherits version data from "
737
+ f"{other_ocm.org_id}, but this data is not published to it: "
738
+ f"missing publishVersionData in {other_ocm.org_id}"
249
739
  )
250
- state_key = f"{other_ocm_name}/{addon_id}" if addon_id else other_ocm_name
251
- other_ocm_data = get_version_data(state, state_key)
252
- results[ocm_name].aggregate(other_ocm_data, other_ocm_name)
740
+ other_ocm_data = get_version_data(
741
+ state,
742
+ version_data_state_key(
743
+ other_ocm.environment.name, other_ocm.org_id, addon_id
744
+ ),
745
+ )
746
+ result.get(
747
+ org_upgrade_spec.org.environment.name, org_upgrade_spec.org.org_id
748
+ ).aggregate(
749
+ other_ocm_data, f"{other_ocm.environment.name}/{other_ocm.org_id}"
750
+ )
253
751
 
254
- return results
752
+ return result
255
753
 
256
754
 
257
755
  def workload_sector_versions(sector: Sector, workload: str) -> list[VersionInfo]:
@@ -259,13 +757,11 @@ def workload_sector_versions(sector: Sector, workload: str) -> list[VersionInfo]
259
757
  get all versions of clusters running the specified workload in that sector
260
758
  """
261
759
  versions = []
262
- for cluster_info in sector.cluster_infos:
760
+ for spec in sector.specs:
263
761
  # clusters within a sector always have workloads (mandatory in schema)
264
- workloads = cluster_info["upgradePolicy"]["workloads"]
762
+ workloads = spec.upgrade_policy.workloads
265
763
  if workload in workloads:
266
- versions.append(
267
- parse_semver(sector.ocmspec(cluster_info["name"]).spec.version)
268
- )
764
+ versions.append(parse_semver(spec.cluster.version.raw_id))
269
765
  return versions
270
766
 
271
767
 
@@ -285,34 +781,33 @@ def workload_sector_dependencies(sector: Sector, workload: str) -> set[Sector]:
285
781
 
286
782
  def version_conditions_met(
287
783
  version: str,
288
- version_data_map: dict[str, VersionData],
289
- ocm_name: str,
290
- workloads: list[str],
291
- upgrade_conditions: dict[str, Any],
784
+ version_data: VersionData,
785
+ upgrade_policy: ClusterUpgradePolicyV1,
786
+ sector: Sector | None,
292
787
  ) -> bool:
293
788
  """Check that upgrade conditions are met for a version
294
789
 
295
790
  Args:
296
791
  version (string): version to check
297
- history (dict): history of versions per OCM instance
298
- ocm_name (string): name of OCM instance
299
- upgrade_conditions (dict): query results of upgrade conditions
792
+ version_data (VersionData): history of versions of an OCM organization
300
793
  workloads (list): strings representing types of workloads
794
+ upgrade_policy (ClusterUpgradePolicy): the upgrade policy to validate
795
+
301
796
 
302
797
  Returns:
303
798
  bool: are version upgrade conditions met
304
799
  """
305
- sector = upgrade_conditions.get("sector")
306
800
  if sector:
307
- version_data = version_data_map[ocm_name]
308
801
  # check that inherited orgs run at least that version for our workloads
309
- if not version_data.validate_against_inherited(version, workloads):
802
+ if not version_data.validate_against_inherited(
803
+ version, upgrade_policy.workloads
804
+ ):
310
805
  return False
311
806
 
312
807
  # check if previous sectors run at least this version for that workload
313
808
  # we will check dependencies recursively until there are versions for the given workload
314
809
  # or no more dependencies to check
315
- for w in workloads:
810
+ for w in upgrade_policy.workloads:
316
811
  for dep in workload_sector_dependencies(sector, w):
317
812
  dep_versions = workload_sector_versions(dep, w)
318
813
  if not dep_versions:
@@ -321,10 +816,9 @@ def version_conditions_met(
321
816
  return False
322
817
 
323
818
  # check soak days condition is met for this version
324
- soak_days = upgrade_conditions.get("soakDays", None)
819
+ soak_days = upgrade_policy.conditions.soak_days
325
820
  if soak_days is not None:
326
- version_data = version_data_map[ocm_name]
327
- for w in workloads:
821
+ for w in upgrade_policy.workloads:
328
822
  workload_history = version_data.workload_history(version, w)
329
823
  if soak_days > workload_history.soak_days:
330
824
  return False
@@ -332,261 +826,467 @@ def version_conditions_met(
332
826
  return True
333
827
 
334
828
 
829
+ def gates_for_minor_version(
830
+ gates: list[OCMVersionGate],
831
+ target_version_prefix: str,
832
+ ) -> list[OCMVersionGate]:
833
+ return [g for g in gates if g.version_raw_id_prefix == target_version_prefix]
834
+
835
+
836
+ def is_gate_applicable_to_cluster(gate: OCMVersionGate, cluster: OCMCluster) -> bool:
837
+ # check that the cluster has an upgrade path that crosses the gate version
838
+ minor_version_upgrade_paths = {
839
+ get_version_prefix(version) for version in cluster.available_upgrades()
840
+ }
841
+ if gate.version_raw_id_prefix not in minor_version_upgrade_paths:
842
+ return False
843
+
844
+ # consider only gates after the clusters current minor version
845
+ # OCM onls supports creating gate agreements for later minor versions than the
846
+ # current cluster version
847
+ if not parse_semver(f"{cluster.minor_version()}.0").match(
848
+ f"<{gate.version_raw_id_prefix}.0"
849
+ ):
850
+ return False
851
+
852
+ # check the handler for the gate type if it is responsible for this kind
853
+ # of cluster
854
+ handler = HANDLERS.get(gate.label)
855
+ if handler:
856
+ return handler.gate_applicable_to_cluster(cluster)
857
+ return False
858
+
859
+
335
860
  def gates_to_agree(
336
- version_prefix: str, cluster: str, cluster_version: str, ocm: OCM
337
- ) -> list[str]:
861
+ gates: list[OCMVersionGate],
862
+ cluster: OCMCluster,
863
+ acked_gate_ids: set[str],
864
+ ) -> list[OCMVersionGate]:
338
865
  """Check via OCM if a version is agreed
339
866
 
340
867
  Args:
341
- version_prefix (string): major.minor version prefix
342
- cluster (string)
343
- cluster_version (string): current version of the cluster
344
- ocm (OCM): used to fetch infos from OCM
868
+ gates (OCMVersionGate): list of OCMVersionGate objects to check for agreements
869
+ cluster_id (str): the cluster that needs gate agreements
870
+ ocm_api (OCMBaseClient): used to fetch infos from OCM
345
871
 
346
872
  Returns:
347
- bool: true on missing agreement
873
+ list[OCMVersionGate]: list of gates a cluster has not agreed on yet
348
874
  """
349
- agreements = {
350
- agreement["version_gate"]["id"]
351
- for agreement in ocm.get_version_agreement(cluster)
352
- }
353
- semver_cluster = parse_semver(f"{cluster_version}")
354
-
355
- return [
356
- gate["id"]
357
- for gate in ocm.get_version_gates(version_prefix)
358
- if gate["id"] not in agreements and semver_cluster.match(f"<{version_prefix}.0")
359
- ]
360
-
875
+ applicable_gates = [g for g in gates if is_gate_applicable_to_cluster(g, cluster)]
361
876
 
362
- def get_version_prefix(version: str) -> str:
363
- semver = parse_semver(version)
364
- return f"{semver.major}.{semver.minor}"
877
+ if applicable_gates:
878
+ return [gate for gate in applicable_gates if gate.id not in acked_gate_ids]
879
+ return []
365
880
 
366
881
 
367
882
  def upgradeable_version(
368
- policy: Mapping,
369
- version_data_map: dict[str, VersionData],
370
- ocm: OCM,
371
- upgrades: Iterable[str],
372
- addon_id: str = "",
373
- ) -> Optional[str]:
883
+ spec: ClusterUpgradeSpec,
884
+ version_data: VersionData,
885
+ sector: Sector | None,
886
+ ) -> str | None:
374
887
  """Get the highest next version we can upgrade to, fulfilling all conditions"""
375
- for version in reversed(sort_versions(upgrades)):
376
- if addon_id and ocm.addon_version_blocked(version, addon_id):
377
- continue
378
- if not addon_id and ocm.version_blocked(version):
888
+ for version in reversed(sort_versions(spec.get_available_upgrades())):
889
+ if spec.version_blocked(version):
379
890
  continue
380
891
  if version_conditions_met(
381
892
  version,
382
- version_data_map,
383
- ocm.name,
384
- policy["workloads"],
385
- policy["conditions"],
893
+ version_data,
894
+ spec.upgrade_policy,
895
+ sector,
386
896
  ):
387
897
  return version
388
898
  return None
389
899
 
390
900
 
391
- def cluster_mutexes(policy: dict) -> list[str]:
392
- """List all mutex locks for the given cluster"""
393
- return (policy.get("conditions") or {}).get("mutexes") or []
901
+ def verify_current_should_skip(
902
+ current_state: Sequence[AbstractUpgradePolicy],
903
+ desired: ClusterUpgradeSpec,
904
+ now: datetime,
905
+ addon_id: str = "",
906
+ ) -> tuple[bool, UpgradePolicyHandler | None]:
907
+ current_policies = [c for c in current_state if c.cluster.id == desired.cluster.id]
908
+ if not current_policies:
909
+ return False, None
910
+
911
+ # there can only be one upgrade policy per cluster
912
+ if len(current_policies) != 1:
913
+ raise ValueError(
914
+ f"[{desired.org.org_id}/{desired.cluster.name}] expected only one upgrade policy"
915
+ )
916
+ current = current_policies[0]
917
+ version = current.version # may not exist in automatic upgrades
918
+ if version and not addon_id and desired.version_blocked(version):
919
+ next_run = current.next_run
920
+ if next_run and datetime.strptime(next_run, "%Y-%m-%dT%H:%M:%SZ") < now:
921
+ logging.warning(
922
+ f"[{desired.org.org_id}/{desired.org.name}/{desired.cluster.name}] currently upgrading to blocked version '{version}'"
923
+ )
924
+ return True, None
925
+ logging.debug(
926
+ f"[{desired.org.org_id}/{desired.org.name}/{desired.cluster.name}] found planned upgrade policy "
927
+ + f"with blocked version {version}"
928
+ )
929
+ return False, UpgradePolicyHandler(action="delete", policy=current)
930
+
931
+ # else
932
+ logging.debug(
933
+ f"[{desired.org.org_id}/{desired.org.name}/{desired.cluster.name}] skipping cluster with existing upgrade policy"
934
+ )
935
+ return True, None
936
+
937
+
938
+ def verify_schedule_should_skip(
939
+ desired: ClusterUpgradeSpec,
940
+ now: datetime,
941
+ addon_id: str = "",
942
+ ) -> str | None:
943
+ schedule = desired.upgrade_policy.schedule
944
+ iter = croniter(schedule, day_or=False)
945
+ # ClusterService refuses scheduling upgrades less than 5m in advance
946
+ # Let's find the next schedule that is at least 5m ahead.
947
+ # We do not need that much delay for addon upgrades since they run
948
+ # immediately
949
+ delay_minutes = 1 if addon_id else MIN_DELTA_MINUTES
950
+ next_schedule = iter.get_next(
951
+ dt.datetime, start_time=now + timedelta(minutes=delay_minutes)
952
+ )
953
+ next_schedule_in_seconds = (next_schedule - now).total_seconds()
954
+ next_schedule_in_hours = next_schedule_in_seconds / 3600 # seconds in hour
955
+
956
+ # ignore clusters with an upgrade schedule not within the next 2 hours
957
+ within_upgrade_timeframe = next_schedule_in_hours <= 2
958
+ if addon_id:
959
+ # addons upgrade cannot be scheduled in advance as the "next_run" field
960
+ # is not supported. So we run this only 10min before schedule to be somewhat
961
+ # correct
962
+ within_upgrade_timeframe = next_schedule_in_seconds / 60 <= 10
963
+ if not within_upgrade_timeframe:
964
+ logging.debug(
965
+ f"[{desired.org.org_id}/{desired.org.name}/{desired.cluster.name}] skipping cluster with no upcoming upgrade"
966
+ )
967
+ return None
968
+ return next_schedule.strftime("%Y-%m-%dT%H:%M:%SZ")
969
+
970
+
971
+ def verify_lock_should_skip(
972
+ desired: ClusterUpgradeSpec, locked: dict[str, str]
973
+ ) -> bool:
974
+ mutexes = desired.effective_mutexes
975
+ if any(lock in locked for lock in mutexes):
976
+ locking = {lock: locked[lock] for lock in mutexes if lock in locked}
977
+ logging.debug(
978
+ f"[{desired.org.org_id}/{desired.org.name}/{desired.cluster.name}] skipping cluster: locked out by {locking}"
979
+ )
980
+ return True
981
+ return False
982
+
983
+
984
+ def _create_upgrade_policy(
985
+ next_schedule: str, spec: ClusterUpgradeSpec, version: str
986
+ ) -> AbstractUpgradePolicy:
987
+ if spec.cluster.is_rosa_hypershift():
988
+ return ControlPlaneUpgradePolicy(
989
+ cluster=spec.cluster,
990
+ version=version,
991
+ schedule_type="manual",
992
+ next_run=next_schedule,
993
+ )
994
+ return ClusterUpgradePolicy(
995
+ cluster=spec.cluster,
996
+ version=version,
997
+ schedule_type="manual",
998
+ next_run=next_schedule,
999
+ )
1000
+
1001
+
1002
+ def _calculate_node_pool_diffs(
1003
+ spec: ClusterUpgradeSpec, now: datetime
1004
+ ) -> UpgradePolicyHandler | None:
1005
+ for pool in spec.node_pools:
1006
+ if parse_semver(pool.version).match(f"<{spec.current_version}"):
1007
+ next_schedule = (now + timedelta(minutes=MIN_DELTA_MINUTES)).strftime(
1008
+ "%Y-%m-%dT%H:%M:%SZ"
1009
+ )
1010
+ return UpgradePolicyHandler(
1011
+ action="create",
1012
+ policy=NodePoolUpgradePolicy(
1013
+ cluster=spec.cluster,
1014
+ version=spec.current_version,
1015
+ schedule_type="manual",
1016
+ next_run=next_schedule,
1017
+ node_pool=pool.id,
1018
+ ),
1019
+ )
1020
+ return None
394
1021
 
395
1022
 
396
1023
  def calculate_diff(
397
- current_state: list[dict[str, Any]],
398
- desired_state: list[dict[str, Any]],
399
- ocm_map: OCMMap,
400
- version_data_map: dict[str, VersionData],
1024
+ current_state: Sequence[AbstractUpgradePolicy],
1025
+ desired_state: OrganizationUpgradeSpec,
1026
+ ocm_api: OCMBaseClient,
1027
+ version_data: VersionData,
401
1028
  addon_id: str = "",
402
- ) -> list[Any]:
1029
+ ) -> list[UpgradePolicyHandler]:
403
1030
  """Check available upgrades for each cluster in the desired state
404
1031
  according to upgrade conditions
405
1032
 
406
1033
  Args:
407
- current_state (list): current state of upgrade policies
408
- desired_state (list): desired state of upgrade policies
409
- ocm_map (OCMMap): OCM clients per OCM instance
410
- version_data_map (dict): version data history per OCM instance
1034
+ current_state (list): currently existing upgrade policies
1035
+ desired_state (OrganizationUpgradeSpec): organization upgrade spec
1036
+ ocm_api (OCMBaseClient): OCM API client
1037
+ version_data (VersionData): version data history of the org
411
1038
  addon_id (str): optional addonid to calculate diffs for
412
1039
 
413
1040
  Returns:
414
1041
  list: upgrade policies to be applied
415
1042
  """
416
- diffs = []
417
1043
 
418
- # all clusters with a current upgradePolicy are considered locked
419
- locked = {}
420
- for policy in desired_state:
421
- if policy["cluster"] in [s["cluster"] for s in current_state]:
422
- for mutex in cluster_mutexes(policy):
423
- locked[mutex] = policy["cluster"]
1044
+ def set_mutex(
1045
+ locked: dict[str, str], cluster_id: str, mutexes: set[str] | None = None
1046
+ ) -> None:
1047
+ for mutex in mutexes or set():
1048
+ locked[mutex] = cluster_id
1049
+
1050
+ diffs: list[UpgradePolicyHandler] = []
424
1051
 
1052
+ # all clusters IDs with a current upgradePolicy are considered locked
1053
+ locked: dict[str, str] = {}
1054
+ for spec in desired_state.specs:
1055
+ if spec.cluster.id in [s.cluster.id for s in current_state]:
1056
+ for mutex in spec.effective_mutexes:
1057
+ locked[mutex] = spec.cluster.id
1058
+
1059
+ addon_service = init_addon_service(desired_state.org.environment)
425
1060
  now = datetime.utcnow()
426
- for d in desired_state:
427
- # ignore clusters with an existing upgrade policy
428
- cluster = d["cluster"]
429
- current_version = d["current_version"]
430
- ocm = ocm_map.get(cluster)
431
- c = [c for c in current_state if c["cluster"] == cluster]
432
- if c:
433
- # there can only be one upgrade policy per cluster
434
- if len(c) != 1:
435
- raise ValueError(f"[{cluster}] expected only one upgrade policy")
436
- current = c[0]
437
- version = current.get("version") # may not exist in automatic upgrades
438
- if version and not addon_id and ocm.version_blocked(version):
439
- next_run = current.get("next_run")
440
- if next_run and datetime.strptime(next_run, "%Y-%m-%dT%H:%M:%SZ") < now:
441
- logging.warning(
442
- f"[{cluster}] currently upgrading to blocked version '{version}'"
443
- )
444
- continue
445
- logging.debug(
446
- f"[{ocm.name}/{cluster}] found planned upgrade policy "
447
- + f"with blocked version {version}"
448
- )
449
- item = {
450
- "action": "delete",
451
- "cluster": cluster,
452
- "version": version,
453
- "id": current["id"],
454
- }
455
- diffs.append(item)
456
- else:
457
- logging.debug(
458
- f"[{ocm.name}/{cluster}] skipping cluster with existing upgrade policy"
459
- )
1061
+ gates = get_version_gates(ocm_api)
1062
+ for spec in desired_state.specs:
1063
+ # Upgrading node pools, only required for Hypershift clusters
1064
+ # do this in the same loop, to skip cluster on node pool upgrade
1065
+ if spec.cluster.is_rosa_hypershift():
1066
+ if verify_lock_should_skip(spec, locked):
460
1067
  continue
461
1068
 
462
- schedule = d["schedule"]
463
- next_schedule_in_seconds = 0
464
- iter = croniter(schedule)
465
- # ClusterService refuses scheduling upgrades less than 5m in advance
466
- # Let's find the next schedule that is at least 5m ahead.
467
- # We do not need that much delay for addon upgrades since they run
468
- # immediately
469
- delay_minutes = 1 if addon_id else 5
470
- while next_schedule_in_seconds < delay_minutes * 60:
471
- next_schedule = iter.get_next(datetime)
472
- next_schedule_in_seconds = (next_schedule - now).total_seconds()
473
- next_schedule_in_hours = next_schedule_in_seconds / 3600 # seconds in hour
474
-
475
- # ignore clusters with an upgrade schedule not within the next 2 hours
476
- within_upgrade_timeframe = next_schedule_in_hours <= 2
477
- if addon_id:
478
- # addons upgrade cannot be scheduled in advance as the "next_run" field
479
- # is not supported. So we run this only 10min before schedule to be somewhat
480
- # correct
481
- within_upgrade_timeframe = next_schedule_in_seconds / 60 <= 10
482
- if not within_upgrade_timeframe:
483
- logging.debug(
484
- f"[{ocm.name}/{cluster}] skipping cluster with no upcoming upgrade"
485
- )
1069
+ node_pool_update = _calculate_node_pool_diffs(spec, now)
1070
+ if node_pool_update: # node pool update policy not yet created
1071
+ diffs.append(node_pool_update)
1072
+ set_mutex(locked, spec.cluster.id, spec.effective_mutexes)
1073
+ continue
1074
+
1075
+ # ignore clusters with an existing upgrade policy
1076
+ skip, delete_policy = verify_current_should_skip(
1077
+ current_state, spec, now, addon_id
1078
+ )
1079
+ if skip:
486
1080
  continue
1081
+ if delete_policy:
1082
+ diffs.append(delete_policy)
487
1083
 
488
- if any(lock in locked for lock in cluster_mutexes(d)):
489
- locking = {
490
- lock: locked[lock] for lock in cluster_mutexes(d) if lock in locked
491
- }
492
- logging.debug(
493
- f"[{ocm.name}/{cluster}] skipping cluster: locked out by {locking}"
494
- )
1084
+ next_schedule = verify_schedule_should_skip(spec, now, addon_id)
1085
+ if not next_schedule:
495
1086
  continue
496
1087
 
497
- # choose version that meets the conditions and add it to the diffs
498
- if addon_id:
499
- # an alternative is to find available upgrades for our current version from
500
- # ${API_CLUSTERS_MGMT}/addons/${addon_id}/versions
501
- # .items[] | select(.id == {current_version}) | .available_upgrades
502
- # but we will always want to get the one that is currently published normally
503
- upgrades = [
504
- a["version"]["id"]
505
- for a in ocm.addons
506
- if a["id"] == addon_id and a["version"]["id"] != d["current_version"]
507
- ]
508
- else:
509
- upgrades = ocm.get_available_upgrades(d["current_version"], d["channel"])
510
- version = upgradeable_version(d, version_data_map, ocm, upgrades, addon_id)
1088
+ if verify_lock_should_skip(spec, locked):
1089
+ continue
511
1090
 
1091
+ sector_name = spec.upgrade_policy.conditions.sector
1092
+ sector = None
1093
+ if sector_name:
1094
+ sector = desired_state.sectors[sector_name]
1095
+ version = upgradeable_version(spec, version_data, sector)
512
1096
  if version:
513
- item = {
514
- "action": "create",
515
- "cluster": cluster,
516
- "version": version,
517
- "schedule_type": "manual",
518
- }
519
1097
  if addon_id:
520
- item["addon_id"] = addon_id
521
- item["cluster_id"] = ocm.cluster_ids[cluster]
522
- item["upgrade_type"] = "ADDON"
523
- # next_run is not supported by addons
1098
+ diffs.append(
1099
+ UpgradePolicyHandler(
1100
+ action="create",
1101
+ policy=AddonUpgradePolicy(
1102
+ action="create",
1103
+ cluster=spec.cluster,
1104
+ version=version,
1105
+ schedule_type="manual",
1106
+ addon_id=addon_id,
1107
+ upgrade_type="ADDON",
1108
+ addon_service=addon_service,
1109
+ ),
1110
+ )
1111
+ )
524
1112
  else:
525
- item["next_run"] = next_schedule.strftime("%Y-%m-%dT%H:%M:%SZ")
526
- item["gates_to_agree"] = gates_to_agree(
527
- get_version_prefix(version),
528
- cluster,
529
- current_version,
530
- ocm,
1113
+ target_version_prefix = get_version_prefix(version)
1114
+ minor_version_gates = gates_for_minor_version(
1115
+ gates=gates,
1116
+ target_version_prefix=target_version_prefix,
1117
+ )
1118
+ gates_with_missing_agreements = gates_to_agree(
1119
+ gates=minor_version_gates,
1120
+ cluster=spec.cluster,
1121
+ acked_gate_ids={
1122
+ agreement["version_gate"]["id"]
1123
+ for agreement in get_version_agreement(ocm_api, spec.cluster.id)
1124
+ },
1125
+ )
1126
+ if gates_with_missing_agreements:
1127
+ missing_gate_ids = [
1128
+ gate.id for gate in gates_with_missing_agreements
1129
+ ]
1130
+ logging.info(
1131
+ f"[{spec.org.org_id}/{spec.org.name}/{spec.cluster.name}] found gates with missing agreements for {target_version_prefix} - {missing_gate_ids} "
1132
+ "Skip creation of an upgrade policy until all of them have been acked by the version-gate-approver integration or a user."
1133
+ )
1134
+ continue
1135
+ diffs.append(
1136
+ UpgradePolicyHandler(
1137
+ action="create",
1138
+ policy=_create_upgrade_policy(next_schedule, spec, version),
1139
+ )
531
1140
  )
532
- for mutex in cluster_mutexes(d):
533
- locked[mutex] = cluster
534
- diffs.append(item)
1141
+ set_mutex(locked, spec.cluster.id, spec.effective_mutexes)
535
1142
 
536
1143
  return diffs
537
1144
 
538
1145
 
539
- def sort_diffs(diff: dict[str, Any]) -> int:
540
- if diff["action"] == "delete":
1146
+ def sort_diffs(diff: UpgradePolicyHandler) -> int:
1147
+ if diff.action == "delete":
541
1148
  return 1
542
1149
  return 2
543
1150
 
544
1151
 
545
- def action_log(*items: Optional[str]) -> None:
546
- # log all non-empty, non-null items
547
- logging.info([item for item in items if item])
548
-
549
-
550
- def act(dry_run: bool, diffs: list[dict], ocm_map: OCMMap, addon_id: str = "") -> None:
1152
+ def act(
1153
+ dry_run: bool,
1154
+ diffs: list[UpgradePolicyHandler],
1155
+ ocm_api: OCMBaseClient,
1156
+ addon_id: str | None = None,
1157
+ ) -> None:
551
1158
  diffs.sort(key=sort_diffs)
552
1159
  for diff in diffs:
553
- action = diff.pop("action")
554
- cluster = diff.pop("cluster")
555
- ocm = ocm_map.get(cluster)
556
- if action == "create":
557
- action_log(
558
- action,
559
- ocm.name,
560
- cluster,
561
- addon_id,
562
- diff["version"],
563
- diff.get("next_run"),
564
- )
565
- if not dry_run:
566
- if addon_id:
567
- ocm.create_addon_upgrade_policy(cluster, diff)
568
- else:
569
- gates_to_agree = diff.pop("gates_to_agree")
570
- for gate in gates_to_agree:
571
- action_log(
572
- action,
573
- ocm.name,
574
- cluster,
575
- addon_id,
576
- diff["version"],
577
- f"Creating version agreement for gate {gate}",
578
- )
579
- agreement = ocm.create_version_agreement(gate, cluster)
580
- if agreement.get("version_gate") is None:
581
- logging.error(
582
- f"Unexpected response while creating version "
583
- f"agreement with id {gate} for cluster {cluster}"
584
- )
585
- ocm.create_upgrade_policy(cluster, diff)
586
- elif action == "delete":
587
- action_log(action, ocm.name, cluster, addon_id, diff["version"])
588
- if not dry_run:
589
- if addon_id:
590
- ocm.delete_addon_upgrade_policy(cluster, diff)
591
- else:
592
- ocm.delete_upgrade_policy(cluster, diff)
1160
+ policy = diff.policy
1161
+ if (
1162
+ addon_id
1163
+ and isinstance(policy, AddonUpgradePolicy)
1164
+ and addon_id != policy.addon_id
1165
+ ):
1166
+ continue
1167
+ diff.act(dry_run, ocm_api)
1168
+
1169
+
1170
+ def soaking_days(
1171
+ version_data: VersionData,
1172
+ upgrades: list[str],
1173
+ workload: str,
1174
+ only_soaking: bool,
1175
+ ) -> dict[str, float]:
1176
+ soaking = {}
1177
+ for version in upgrades:
1178
+ workload_history = version_data.workload_history(version, workload)
1179
+ soaking[version] = round(workload_history.soak_days, 2)
1180
+ if not only_soaking and version not in soaking:
1181
+ soaking[version] = 0
1182
+ return soaking
1183
+
1184
+
1185
+ def get_orgs_for_environment(
1186
+ integration: str,
1187
+ ocm_env_name: str,
1188
+ query_func: Callable,
1189
+ ocm_organization_ids: set[str] | None = None,
1190
+ excluded_ocm_organization_ids: set[str] | None = None,
1191
+ only_addon_managed_upgrades: bool = False,
1192
+ ) -> list[AUSOCMOrganization]:
1193
+ """
1194
+ Returns a list of organizations for the given OCM environment, applying
1195
+ filters based on the provided arguments.
1196
+
1197
+ Args:
1198
+ ocm_env_name (str): OCM environment name to filter
1199
+ ocm_organization_ids (Optional[set[str]]): if any organization IDs are provided, any other organizations are excluded from the results
1200
+ excluded_ocm_organization_ids (Optional[set[str]]): if any organization IDs are provided, these organizations are excluded from the results
1201
+ only_addon_managed_upgrades (bool): if True, organizations without enabled addon management are excluded from the results
1202
+ query_func (Callable): function to query organizations via GQL
1203
+
1204
+ Returns:
1205
+ list[AUSOCMOrganization]: list of organizations matching the given filters
1206
+ """
1207
+ orgs = aus_organizations_query(query_func=query_func).organizations or []
1208
+ return [
1209
+ org
1210
+ for org in orgs or []
1211
+ if org.environment.name == ocm_env_name
1212
+ and integration_is_enabled(integration, org)
1213
+ and (not only_addon_managed_upgrades or org.addon_managed_upgrades)
1214
+ and (not ocm_organization_ids or org.org_id in ocm_organization_ids)
1215
+ and (
1216
+ not excluded_ocm_organization_ids
1217
+ or org.org_id not in excluded_ocm_organization_ids
1218
+ )
1219
+ ]
1220
+
1221
+
1222
+ def remaining_soak_day_metric_values_for_cluster(
1223
+ spec: ClusterUpgradeSpec,
1224
+ soaked_versions: dict[str, float],
1225
+ current_upgrade: AbstractUpgradePolicy | None,
1226
+ ) -> dict[str, float]:
1227
+ """
1228
+ Calculate what versions and metric values to report for `AUS*VersionRemainingSoakDaysGauge` metrics.
1229
+ Usually, the remaining soak days for a version are reported but there are some special cases
1230
+ where we report negative values to indicate that a version is blocked or an upgrade has been
1231
+ scheduled or started.
1232
+
1233
+ Additionally certain versions are not reported when it is not meaningful (e.g. an upgrade will never happen)
1234
+ to prevent metric clutter.
1235
+ """
1236
+ upgrades = spec.get_available_upgrades()
1237
+ if not upgrades:
1238
+ return {}
1239
+
1240
+ # calculate the remaining soakdays for each upgrade version candidate of the cluster.
1241
+ # when a version is soaking, it has a value > 0 and when it soaked enough, the value is 0.
1242
+ remaining_soakdays: list[float] = [
1243
+ max(
1244
+ (spec.upgrade_policy.conditions.soak_days or 0) - soaked_versions.get(v, 0),
1245
+ 0,
1246
+ )
1247
+ for v in upgrades
1248
+ ]
1249
+
1250
+ # under certain conditions, the remaining soak day value for a version needs to be
1251
+ # replaced with special marker values
1252
+ version_metrics: dict[str, float] = {}
1253
+ for idx, version in reversed(list(enumerate(upgrades))):
1254
+ # if an upgrade is `scheduled` or `started`` for the specific version, their respective negative
1255
+ # marker values will be used instead of their actual soak days. there are other states than `scheduled`
1256
+ # and `started` but the `UpgradePolicy` vanishes too quickly to observe them reliably, when such
1257
+ # states are reached.
1258
+ if current_upgrade and current_upgrade.version == version:
1259
+ if current_upgrade.state == "scheduled":
1260
+ remaining_soakdays[idx] = UPGRADE_SCHEDULED_METRIC_VALUE
1261
+ elif current_upgrade.state in {"started", "delayed"}:
1262
+ remaining_soakdays[idx] = UPGRADE_STARTED_METRIC_VALUE
1263
+ if current_upgrade.next_run:
1264
+ # if an upgrade runs for over 6 hours, we mark it as a long running upgrade
1265
+ next_run = datetime.strptime(
1266
+ current_upgrade.next_run, "%Y-%m-%dT%H:%M:%SZ"
1267
+ )
1268
+ now = datetime.utcnow()
1269
+ hours_ago = (now - next_run).total_seconds() / 3600
1270
+ if hours_ago >= 6:
1271
+ remaining_soakdays[idx] = UPGRADE_LONG_RUNNING_METRIC_VALUE
1272
+ elif spec.version_blocked(version):
1273
+ # if a version is blocked, we will still report it but with a dedicated negative marker value
1274
+ remaining_soakdays[idx] = UPGRADE_BLOCKED_METRIC_VALUE
1275
+
1276
+ # we are intentionally not reporting versions that still soak or soaked enough when
1277
+ # there is a later version that also soaked enough. the later one will be picked
1278
+ # for an upgrade over the older one anyways.
1279
+ if remaining_soakdays[idx] >= 0 and any(
1280
+ later_version_remaining_soak_days
1281
+ in {
1282
+ 0,
1283
+ UPGRADE_SCHEDULED_METRIC_VALUE,
1284
+ UPGRADE_STARTED_METRIC_VALUE,
1285
+ UPGRADE_LONG_RUNNING_METRIC_VALUE,
1286
+ }
1287
+ for later_version_remaining_soak_days in remaining_soakdays[idx + 1 :]
1288
+ ):
1289
+ continue
1290
+ version_metrics[version] = remaining_soakdays[idx]
1291
+
1292
+ return version_metrics