qontract-reconcile 0.9.1rc298__py3-none-any.whl → 0.10.1.dev1203__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- qontract_reconcile-0.10.1.dev1203.dist-info/METADATA +500 -0
- qontract_reconcile-0.10.1.dev1203.dist-info/RECORD +771 -0
- {qontract_reconcile-0.9.1rc298.dist-info → qontract_reconcile-0.10.1.dev1203.dist-info}/WHEEL +1 -2
- {qontract_reconcile-0.9.1rc298.dist-info → qontract_reconcile-0.10.1.dev1203.dist-info}/entry_points.txt +4 -2
- reconcile/acs_notifiers.py +126 -0
- reconcile/acs_policies.py +243 -0
- reconcile/acs_rbac.py +596 -0
- reconcile/aus/advanced_upgrade_service.py +621 -8
- reconcile/aus/aus_label_source.py +115 -0
- reconcile/aus/base.py +1053 -353
- reconcile/{utils → aus}/cluster_version_data.py +27 -12
- reconcile/aus/healthchecks.py +77 -0
- reconcile/aus/metrics.py +158 -0
- reconcile/aus/models.py +245 -5
- reconcile/aus/node_pool_spec.py +35 -0
- reconcile/aus/ocm_addons_upgrade_scheduler_org.py +225 -110
- reconcile/aus/ocm_upgrade_scheduler.py +76 -71
- reconcile/aus/ocm_upgrade_scheduler_org.py +81 -23
- reconcile/aus/version_gate_approver.py +204 -0
- reconcile/aus/version_gates/__init__.py +12 -0
- reconcile/aus/version_gates/handler.py +33 -0
- reconcile/aus/version_gates/ingress_gate_handler.py +32 -0
- reconcile/aus/version_gates/ocp_gate_handler.py +26 -0
- reconcile/aus/version_gates/sts_version_gate_handler.py +100 -0
- reconcile/aws_account_manager/README.md +5 -0
- reconcile/aws_account_manager/integration.py +373 -0
- reconcile/aws_account_manager/merge_request_manager.py +114 -0
- reconcile/aws_account_manager/metrics.py +39 -0
- reconcile/aws_account_manager/reconciler.py +403 -0
- reconcile/aws_account_manager/utils.py +41 -0
- reconcile/aws_ami_cleanup/integration.py +273 -0
- reconcile/aws_ami_share.py +18 -14
- reconcile/aws_cloudwatch_log_retention/integration.py +253 -0
- reconcile/aws_iam_keys.py +1 -1
- reconcile/aws_iam_password_reset.py +56 -20
- reconcile/aws_saml_idp/integration.py +204 -0
- reconcile/aws_saml_roles/integration.py +322 -0
- reconcile/aws_support_cases_sos.py +2 -2
- reconcile/aws_version_sync/integration.py +430 -0
- reconcile/aws_version_sync/merge_request_manager/merge_request.py +156 -0
- reconcile/aws_version_sync/merge_request_manager/merge_request_manager.py +160 -0
- reconcile/aws_version_sync/utils.py +64 -0
- reconcile/blackbox_exporter_endpoint_monitoring.py +2 -5
- reconcile/change_owners/README.md +34 -0
- reconcile/change_owners/approver.py +7 -9
- reconcile/change_owners/bundle.py +134 -9
- reconcile/change_owners/change_log_tracking.py +236 -0
- reconcile/change_owners/change_owners.py +204 -194
- reconcile/change_owners/change_types.py +183 -265
- reconcile/change_owners/changes.py +488 -0
- reconcile/change_owners/decision.py +120 -41
- reconcile/change_owners/diff.py +63 -92
- reconcile/change_owners/implicit_ownership.py +19 -16
- reconcile/change_owners/self_service_roles.py +158 -35
- reconcile/change_owners/tester.py +20 -18
- reconcile/checkpoint.py +4 -6
- reconcile/cli.py +1523 -242
- reconcile/closedbox_endpoint_monitoring_base.py +10 -17
- reconcile/cluster_auth_rhidp/integration.py +257 -0
- reconcile/cluster_deployment_mapper.py +2 -5
- reconcile/cna/assets/asset.py +4 -7
- reconcile/cna/assets/null.py +2 -5
- reconcile/cna/integration.py +2 -3
- reconcile/cna/state.py +6 -9
- reconcile/dashdotdb_base.py +31 -10
- reconcile/dashdotdb_cso.py +3 -6
- reconcile/dashdotdb_dora.py +530 -0
- reconcile/dashdotdb_dvo.py +10 -13
- reconcile/dashdotdb_slo.py +75 -19
- reconcile/database_access_manager.py +753 -0
- reconcile/deadmanssnitch.py +207 -0
- reconcile/dynatrace_token_provider/dependencies.py +69 -0
- reconcile/dynatrace_token_provider/integration.py +656 -0
- reconcile/dynatrace_token_provider/metrics.py +62 -0
- reconcile/dynatrace_token_provider/model.py +14 -0
- reconcile/dynatrace_token_provider/ocm.py +140 -0
- reconcile/dynatrace_token_provider/validate.py +48 -0
- reconcile/endpoints_discovery/integration.py +348 -0
- reconcile/endpoints_discovery/merge_request.py +96 -0
- reconcile/endpoints_discovery/merge_request_manager.py +178 -0
- reconcile/external_resources/aws.py +204 -0
- reconcile/external_resources/factories.py +163 -0
- reconcile/external_resources/integration.py +194 -0
- reconcile/external_resources/integration_secrets_sync.py +47 -0
- reconcile/external_resources/manager.py +405 -0
- reconcile/external_resources/meta.py +17 -0
- reconcile/external_resources/metrics.py +95 -0
- reconcile/external_resources/model.py +350 -0
- reconcile/external_resources/reconciler.py +265 -0
- reconcile/external_resources/secrets_sync.py +465 -0
- reconcile/external_resources/state.py +258 -0
- reconcile/gabi_authorized_users.py +19 -11
- reconcile/gcr_mirror.py +43 -34
- reconcile/github_org.py +4 -6
- reconcile/github_owners.py +1 -1
- reconcile/github_repo_invites.py +2 -5
- reconcile/gitlab_fork_compliance.py +14 -13
- reconcile/gitlab_housekeeping.py +185 -91
- reconcile/gitlab_labeler.py +15 -14
- reconcile/gitlab_members.py +126 -120
- reconcile/gitlab_owners.py +53 -66
- reconcile/gitlab_permissions.py +167 -6
- reconcile/glitchtip/README.md +150 -0
- reconcile/glitchtip/integration.py +99 -51
- reconcile/glitchtip/reconciler.py +99 -70
- reconcile/glitchtip_project_alerts/__init__.py +0 -0
- reconcile/glitchtip_project_alerts/integration.py +333 -0
- reconcile/glitchtip_project_dsn/integration.py +43 -43
- reconcile/gql_definitions/acs/__init__.py +0 -0
- reconcile/gql_definitions/acs/acs_instances.py +83 -0
- reconcile/gql_definitions/acs/acs_policies.py +239 -0
- reconcile/gql_definitions/acs/acs_rbac.py +111 -0
- reconcile/gql_definitions/advanced_upgrade_service/aus_clusters.py +46 -8
- reconcile/gql_definitions/advanced_upgrade_service/aus_organization.py +38 -8
- reconcile/gql_definitions/app_interface_metrics_exporter/__init__.py +0 -0
- reconcile/gql_definitions/app_interface_metrics_exporter/onboarding_status.py +61 -0
- reconcile/gql_definitions/aws_account_manager/__init__.py +0 -0
- reconcile/gql_definitions/aws_account_manager/aws_accounts.py +177 -0
- reconcile/gql_definitions/aws_ami_cleanup/__init__.py +0 -0
- reconcile/gql_definitions/aws_ami_cleanup/aws_accounts.py +161 -0
- reconcile/gql_definitions/aws_saml_idp/__init__.py +0 -0
- reconcile/gql_definitions/aws_saml_idp/aws_accounts.py +117 -0
- reconcile/gql_definitions/aws_saml_roles/__init__.py +0 -0
- reconcile/gql_definitions/aws_saml_roles/aws_accounts.py +117 -0
- reconcile/gql_definitions/aws_saml_roles/roles.py +97 -0
- reconcile/gql_definitions/aws_version_sync/__init__.py +0 -0
- reconcile/gql_definitions/aws_version_sync/clusters.py +83 -0
- reconcile/gql_definitions/aws_version_sync/namespaces.py +143 -0
- reconcile/gql_definitions/change_owners/queries/change_types.py +16 -29
- reconcile/gql_definitions/change_owners/queries/self_service_roles.py +45 -11
- reconcile/gql_definitions/cluster_auth_rhidp/__init__.py +0 -0
- reconcile/gql_definitions/cluster_auth_rhidp/clusters.py +128 -0
- reconcile/gql_definitions/cna/queries/cna_provisioners.py +6 -8
- reconcile/gql_definitions/cna/queries/cna_resources.py +3 -5
- reconcile/gql_definitions/common/alerting_services_settings.py +2 -2
- reconcile/gql_definitions/common/app_code_component_repos.py +9 -5
- reconcile/gql_definitions/{glitchtip/glitchtip_settings.py → common/app_interface_custom_messages.py} +14 -16
- reconcile/gql_definitions/common/app_interface_dms_settings.py +86 -0
- reconcile/gql_definitions/common/app_interface_repo_settings.py +2 -2
- reconcile/gql_definitions/common/app_interface_state_settings.py +3 -5
- reconcile/gql_definitions/common/app_interface_vault_settings.py +3 -5
- reconcile/gql_definitions/common/app_quay_repos_escalation_policies.py +120 -0
- reconcile/gql_definitions/common/apps.py +72 -0
- reconcile/gql_definitions/common/aws_vpc_requests.py +109 -0
- reconcile/gql_definitions/common/aws_vpcs.py +84 -0
- reconcile/gql_definitions/common/clusters.py +120 -254
- reconcile/gql_definitions/common/clusters_minimal.py +11 -35
- reconcile/gql_definitions/common/clusters_with_dms.py +72 -0
- reconcile/gql_definitions/common/clusters_with_peering.py +70 -98
- reconcile/gql_definitions/common/github_orgs.py +2 -2
- reconcile/gql_definitions/common/jira_settings.py +68 -0
- reconcile/gql_definitions/common/jiralert_settings.py +68 -0
- reconcile/gql_definitions/common/namespaces.py +74 -32
- reconcile/gql_definitions/common/namespaces_minimal.py +4 -10
- reconcile/gql_definitions/common/ocm_env_telemeter.py +95 -0
- reconcile/gql_definitions/common/ocm_environments.py +4 -2
- reconcile/gql_definitions/common/pagerduty_instances.py +5 -5
- reconcile/gql_definitions/common/pgp_reencryption_settings.py +5 -11
- reconcile/gql_definitions/common/pipeline_providers.py +45 -90
- reconcile/gql_definitions/common/quay_instances.py +64 -0
- reconcile/gql_definitions/common/quay_orgs.py +68 -0
- reconcile/gql_definitions/common/reserved_networks.py +94 -0
- reconcile/gql_definitions/common/saas_files.py +133 -95
- reconcile/gql_definitions/common/saas_target_namespaces.py +41 -26
- reconcile/gql_definitions/common/saasherder_settings.py +2 -2
- reconcile/gql_definitions/common/slack_workspaces.py +62 -0
- reconcile/gql_definitions/common/smtp_client_settings.py +2 -2
- reconcile/gql_definitions/common/state_aws_account.py +77 -0
- reconcile/gql_definitions/common/users.py +3 -2
- reconcile/gql_definitions/cost_report/__init__.py +0 -0
- reconcile/gql_definitions/cost_report/app_names.py +68 -0
- reconcile/gql_definitions/cost_report/cost_namespaces.py +86 -0
- reconcile/gql_definitions/cost_report/settings.py +77 -0
- reconcile/gql_definitions/dashdotdb_slo/slo_documents_query.py +42 -12
- reconcile/gql_definitions/dynatrace_token_provider/__init__.py +0 -0
- reconcile/gql_definitions/dynatrace_token_provider/dynatrace_bootstrap_tokens.py +79 -0
- reconcile/gql_definitions/dynatrace_token_provider/token_specs.py +84 -0
- reconcile/gql_definitions/endpoints_discovery/__init__.py +0 -0
- reconcile/gql_definitions/endpoints_discovery/namespaces.py +127 -0
- reconcile/gql_definitions/external_resources/__init__.py +0 -0
- reconcile/gql_definitions/external_resources/aws_accounts.py +73 -0
- reconcile/gql_definitions/external_resources/external_resources_modules.py +78 -0
- reconcile/gql_definitions/external_resources/external_resources_namespaces.py +1111 -0
- reconcile/gql_definitions/external_resources/external_resources_settings.py +98 -0
- reconcile/gql_definitions/fragments/aus_organization.py +34 -39
- reconcile/gql_definitions/fragments/aws_account_common.py +62 -0
- reconcile/gql_definitions/fragments/aws_account_managed.py +57 -0
- reconcile/gql_definitions/fragments/aws_account_sso.py +35 -0
- reconcile/gql_definitions/fragments/aws_infra_management_account.py +2 -2
- reconcile/gql_definitions/fragments/aws_vpc.py +47 -0
- reconcile/gql_definitions/fragments/aws_vpc_request.py +65 -0
- reconcile/gql_definitions/fragments/aws_vpc_request_subnet.py +29 -0
- reconcile/gql_definitions/fragments/deplopy_resources.py +7 -7
- reconcile/gql_definitions/fragments/disable.py +28 -0
- reconcile/gql_definitions/fragments/jumphost_common_fields.py +2 -2
- reconcile/gql_definitions/fragments/membership_source.py +47 -0
- reconcile/gql_definitions/fragments/minimal_ocm_organization.py +29 -0
- reconcile/gql_definitions/fragments/oc_connection_cluster.py +4 -9
- reconcile/gql_definitions/fragments/ocm_environment.py +5 -5
- reconcile/gql_definitions/fragments/pipeline_provider_retention.py +30 -0
- reconcile/gql_definitions/fragments/prometheus_instance.py +48 -0
- reconcile/gql_definitions/fragments/resource_limits_requirements.py +29 -0
- reconcile/gql_definitions/fragments/{resource_requirements.py → resource_requests_requirements.py} +3 -3
- reconcile/gql_definitions/fragments/resource_values.py +2 -2
- reconcile/gql_definitions/fragments/saas_target_namespace.py +55 -12
- reconcile/gql_definitions/fragments/serviceaccount_token.py +38 -0
- reconcile/gql_definitions/fragments/terraform_state.py +36 -0
- reconcile/gql_definitions/fragments/upgrade_policy.py +5 -3
- reconcile/gql_definitions/fragments/user.py +3 -2
- reconcile/gql_definitions/fragments/vault_secret.py +2 -2
- reconcile/gql_definitions/gitlab_members/gitlab_instances.py +6 -2
- reconcile/gql_definitions/gitlab_members/permissions.py +3 -5
- reconcile/gql_definitions/glitchtip/glitchtip_instance.py +16 -2
- reconcile/gql_definitions/glitchtip/glitchtip_project.py +22 -23
- reconcile/gql_definitions/glitchtip_project_alerts/__init__.py +0 -0
- reconcile/gql_definitions/glitchtip_project_alerts/glitchtip_project.py +173 -0
- reconcile/gql_definitions/integrations/integrations.py +62 -45
- reconcile/gql_definitions/introspection.json +51176 -0
- reconcile/gql_definitions/jenkins_configs/jenkins_configs.py +13 -5
- reconcile/gql_definitions/jenkins_configs/jenkins_instances.py +79 -0
- reconcile/gql_definitions/jira/__init__.py +0 -0
- reconcile/gql_definitions/jira/jira_servers.py +80 -0
- reconcile/gql_definitions/jira_permissions_validator/__init__.py +0 -0
- reconcile/gql_definitions/jira_permissions_validator/jira_boards_for_permissions_validator.py +131 -0
- reconcile/gql_definitions/jumphosts/jumphosts.py +3 -5
- reconcile/gql_definitions/ldap_groups/__init__.py +0 -0
- reconcile/gql_definitions/ldap_groups/roles.py +111 -0
- reconcile/gql_definitions/ldap_groups/settings.py +79 -0
- reconcile/gql_definitions/maintenance/__init__.py +0 -0
- reconcile/gql_definitions/maintenance/maintenances.py +101 -0
- reconcile/gql_definitions/membershipsources/__init__.py +0 -0
- reconcile/gql_definitions/membershipsources/roles.py +112 -0
- reconcile/gql_definitions/ocm_labels/__init__.py +0 -0
- reconcile/gql_definitions/ocm_labels/clusters.py +112 -0
- reconcile/gql_definitions/ocm_labels/organizations.py +78 -0
- reconcile/gql_definitions/ocm_subscription_labels/__init__.py +0 -0
- reconcile/gql_definitions/openshift_cluster_bots/__init__.py +0 -0
- reconcile/gql_definitions/openshift_cluster_bots/clusters.py +126 -0
- reconcile/gql_definitions/openshift_groups/managed_groups.py +2 -2
- reconcile/gql_definitions/openshift_groups/managed_roles.py +3 -2
- reconcile/gql_definitions/openshift_serviceaccount_tokens/__init__.py +0 -0
- reconcile/gql_definitions/openshift_serviceaccount_tokens/tokens.py +132 -0
- reconcile/gql_definitions/quay_membership/quay_membership.py +3 -5
- reconcile/gql_definitions/rhidp/__init__.py +0 -0
- reconcile/gql_definitions/rhidp/organizations.py +96 -0
- reconcile/gql_definitions/service_dependencies/jenkins_instance_fragment.py +2 -2
- reconcile/gql_definitions/service_dependencies/service_dependencies.py +9 -31
- reconcile/gql_definitions/sharding/aws_accounts.py +2 -2
- reconcile/gql_definitions/sharding/ocm_organization.py +63 -0
- reconcile/gql_definitions/skupper_network/site_controller_template.py +2 -2
- reconcile/gql_definitions/skupper_network/skupper_networks.py +12 -38
- reconcile/gql_definitions/slack_usergroups/clusters.py +2 -2
- reconcile/gql_definitions/slack_usergroups/permissions.py +8 -15
- reconcile/gql_definitions/slack_usergroups/users.py +3 -2
- reconcile/gql_definitions/slo_documents/__init__.py +0 -0
- reconcile/gql_definitions/slo_documents/slo_documents.py +142 -0
- reconcile/gql_definitions/status_board/__init__.py +0 -0
- reconcile/gql_definitions/status_board/status_board.py +163 -0
- reconcile/gql_definitions/statuspage/statuspages.py +56 -7
- reconcile/gql_definitions/templating/__init__.py +0 -0
- reconcile/gql_definitions/templating/template_collection.py +130 -0
- reconcile/gql_definitions/templating/templates.py +108 -0
- reconcile/gql_definitions/terraform_cloudflare_dns/app_interface_cloudflare_dns_settings.py +4 -8
- reconcile/gql_definitions/terraform_cloudflare_dns/terraform_cloudflare_zones.py +8 -8
- reconcile/gql_definitions/terraform_cloudflare_resources/terraform_cloudflare_accounts.py +6 -8
- reconcile/gql_definitions/terraform_cloudflare_resources/terraform_cloudflare_resources.py +45 -56
- reconcile/gql_definitions/terraform_cloudflare_users/app_interface_setting_cloudflare_and_vault.py +4 -8
- reconcile/gql_definitions/terraform_cloudflare_users/terraform_cloudflare_roles.py +4 -8
- reconcile/gql_definitions/terraform_init/__init__.py +0 -0
- reconcile/gql_definitions/terraform_init/aws_accounts.py +93 -0
- reconcile/gql_definitions/terraform_repo/__init__.py +0 -0
- reconcile/gql_definitions/terraform_repo/terraform_repo.py +141 -0
- reconcile/gql_definitions/terraform_resources/database_access_manager.py +158 -0
- reconcile/gql_definitions/terraform_resources/terraform_resources_namespaces.py +153 -162
- reconcile/gql_definitions/terraform_tgw_attachments/__init__.py +0 -0
- reconcile/gql_definitions/terraform_tgw_attachments/aws_accounts.py +119 -0
- reconcile/gql_definitions/unleash_feature_toggles/__init__.py +0 -0
- reconcile/gql_definitions/unleash_feature_toggles/feature_toggles.py +113 -0
- reconcile/gql_definitions/vault_instances/vault_instances.py +17 -50
- reconcile/gql_definitions/vault_policies/vault_policies.py +2 -2
- reconcile/gql_definitions/vpc_peerings_validator/vpc_peerings_validator.py +49 -12
- reconcile/gql_definitions/vpc_peerings_validator/vpc_peerings_validator_peered_cluster_fragment.py +7 -2
- reconcile/integrations_manager.py +25 -13
- reconcile/jenkins/types.py +5 -1
- reconcile/jenkins_base.py +36 -0
- reconcile/jenkins_job_builder.py +10 -48
- reconcile/jenkins_job_builds_cleaner.py +40 -25
- reconcile/jenkins_job_cleaner.py +1 -3
- reconcile/jenkins_roles.py +22 -26
- reconcile/jenkins_webhooks.py +9 -6
- reconcile/jenkins_worker_fleets.py +11 -6
- reconcile/jira_permissions_validator.py +340 -0
- reconcile/jira_watcher.py +3 -5
- reconcile/ldap_groups/__init__.py +0 -0
- reconcile/ldap_groups/integration.py +279 -0
- reconcile/ldap_users.py +3 -0
- reconcile/ocm/types.py +39 -59
- reconcile/ocm_additional_routers.py +0 -1
- reconcile/ocm_addons_upgrade_tests_trigger.py +10 -15
- reconcile/ocm_aws_infrastructure_access.py +30 -32
- reconcile/ocm_clusters.py +217 -130
- reconcile/ocm_external_configuration_labels.py +15 -0
- reconcile/ocm_github_idp.py +1 -1
- reconcile/ocm_groups.py +25 -5
- reconcile/ocm_internal_notifications/__init__.py +0 -0
- reconcile/ocm_internal_notifications/integration.py +119 -0
- reconcile/ocm_labels/__init__.py +0 -0
- reconcile/ocm_labels/integration.py +409 -0
- reconcile/ocm_machine_pools.py +517 -108
- reconcile/ocm_upgrade_scheduler_org_updater.py +15 -11
- reconcile/openshift_base.py +609 -207
- reconcile/openshift_cluster_bots.py +344 -0
- reconcile/openshift_clusterrolebindings.py +15 -15
- reconcile/openshift_groups.py +42 -45
- reconcile/openshift_limitranges.py +1 -0
- reconcile/openshift_namespace_labels.py +22 -28
- reconcile/openshift_namespaces.py +22 -22
- reconcile/openshift_network_policies.py +4 -8
- reconcile/openshift_prometheus_rules.py +43 -0
- reconcile/openshift_resourcequotas.py +2 -16
- reconcile/openshift_resources.py +12 -10
- reconcile/openshift_resources_base.py +304 -328
- reconcile/openshift_rolebindings.py +18 -20
- reconcile/openshift_saas_deploy.py +105 -21
- reconcile/openshift_saas_deploy_change_tester.py +30 -35
- reconcile/openshift_saas_deploy_trigger_base.py +39 -36
- reconcile/openshift_saas_deploy_trigger_cleaner.py +41 -27
- reconcile/openshift_saas_deploy_trigger_configs.py +1 -2
- reconcile/openshift_saas_deploy_trigger_images.py +1 -2
- reconcile/openshift_saas_deploy_trigger_moving_commits.py +1 -2
- reconcile/openshift_saas_deploy_trigger_upstream_jobs.py +1 -2
- reconcile/openshift_serviceaccount_tokens.py +138 -74
- reconcile/openshift_tekton_resources.py +89 -24
- reconcile/openshift_upgrade_watcher.py +110 -62
- reconcile/openshift_users.py +16 -15
- reconcile/openshift_vault_secrets.py +11 -6
- reconcile/oum/__init__.py +0 -0
- reconcile/oum/base.py +387 -0
- reconcile/oum/labelset.py +55 -0
- reconcile/oum/metrics.py +71 -0
- reconcile/oum/models.py +69 -0
- reconcile/oum/providers.py +59 -0
- reconcile/oum/standalone.py +196 -0
- reconcile/prometheus_rules_tester/integration.py +31 -23
- reconcile/quay_base.py +4 -1
- reconcile/quay_membership.py +1 -2
- reconcile/quay_mirror.py +111 -61
- reconcile/quay_mirror_org.py +34 -21
- reconcile/quay_permissions.py +7 -3
- reconcile/quay_repos.py +24 -32
- reconcile/queries.py +263 -198
- reconcile/query_validator.py +3 -5
- reconcile/resource_scraper.py +3 -4
- reconcile/{template_tester.py → resource_template_tester.py} +3 -3
- reconcile/rhidp/__init__.py +0 -0
- reconcile/rhidp/common.py +214 -0
- reconcile/rhidp/metrics.py +20 -0
- reconcile/rhidp/ocm_oidc_idp/__init__.py +0 -0
- reconcile/rhidp/ocm_oidc_idp/base.py +221 -0
- reconcile/rhidp/ocm_oidc_idp/integration.py +56 -0
- reconcile/rhidp/ocm_oidc_idp/metrics.py +22 -0
- reconcile/rhidp/sso_client/__init__.py +0 -0
- reconcile/rhidp/sso_client/base.py +266 -0
- reconcile/rhidp/sso_client/integration.py +60 -0
- reconcile/rhidp/sso_client/metrics.py +39 -0
- reconcile/run_integration.py +293 -0
- reconcile/saas_auto_promotions_manager/integration.py +69 -24
- reconcile/saas_auto_promotions_manager/merge_request_manager/batcher.py +208 -0
- reconcile/saas_auto_promotions_manager/merge_request_manager/desired_state.py +28 -0
- reconcile/saas_auto_promotions_manager/merge_request_manager/merge_request.py +3 -4
- reconcile/saas_auto_promotions_manager/merge_request_manager/merge_request_manager_v2.py +172 -0
- reconcile/saas_auto_promotions_manager/merge_request_manager/metrics.py +42 -0
- reconcile/saas_auto_promotions_manager/merge_request_manager/mr_parser.py +226 -0
- reconcile/saas_auto_promotions_manager/merge_request_manager/open_merge_requests.py +23 -0
- reconcile/saas_auto_promotions_manager/merge_request_manager/renderer.py +108 -32
- reconcile/saas_auto_promotions_manager/meta.py +4 -0
- reconcile/saas_auto_promotions_manager/publisher.py +32 -4
- reconcile/saas_auto_promotions_manager/s3_exporter.py +77 -0
- reconcile/saas_auto_promotions_manager/subscriber.py +110 -23
- reconcile/saas_auto_promotions_manager/utils/saas_files_inventory.py +48 -41
- reconcile/saas_file_validator.py +16 -6
- reconcile/sendgrid_teammates.py +27 -12
- reconcile/service_dependencies.py +0 -3
- reconcile/signalfx_endpoint_monitoring.py +2 -5
- reconcile/skupper_network/integration.py +10 -11
- reconcile/skupper_network/models.py +3 -5
- reconcile/skupper_network/reconciler.py +28 -35
- reconcile/skupper_network/site_controller.py +8 -8
- reconcile/slack_base.py +4 -7
- reconcile/slack_usergroups.py +249 -171
- reconcile/sql_query.py +324 -171
- reconcile/status.py +0 -1
- reconcile/status_board.py +275 -0
- reconcile/statuspage/__init__.py +0 -5
- reconcile/statuspage/atlassian.py +219 -80
- reconcile/statuspage/integration.py +9 -97
- reconcile/statuspage/integrations/__init__.py +0 -0
- reconcile/statuspage/integrations/components.py +77 -0
- reconcile/statuspage/integrations/maintenances.py +111 -0
- reconcile/statuspage/page.py +107 -72
- reconcile/statuspage/state.py +6 -11
- reconcile/statuspage/status.py +8 -12
- reconcile/templates/rosa-classic-cluster-creation.sh.j2 +60 -0
- reconcile/templates/rosa-hcp-cluster-creation.sh.j2 +61 -0
- reconcile/templating/__init__.py +0 -0
- reconcile/templating/lib/__init__.py +0 -0
- reconcile/templating/lib/merge_request_manager.py +180 -0
- reconcile/templating/lib/model.py +20 -0
- reconcile/templating/lib/rendering.py +191 -0
- reconcile/templating/renderer.py +410 -0
- reconcile/templating/validator.py +153 -0
- reconcile/terraform_aws_route53.py +13 -10
- reconcile/terraform_cloudflare_dns.py +92 -122
- reconcile/terraform_cloudflare_resources.py +15 -13
- reconcile/terraform_cloudflare_users.py +27 -27
- reconcile/terraform_init/__init__.py +0 -0
- reconcile/terraform_init/integration.py +165 -0
- reconcile/terraform_init/merge_request.py +57 -0
- reconcile/terraform_init/merge_request_manager.py +102 -0
- reconcile/terraform_repo.py +403 -0
- reconcile/terraform_resources.py +266 -168
- reconcile/terraform_tgw_attachments.py +417 -167
- reconcile/terraform_users.py +40 -17
- reconcile/terraform_vpc_peerings.py +310 -142
- reconcile/terraform_vpc_resources/__init__.py +0 -0
- reconcile/terraform_vpc_resources/integration.py +220 -0
- reconcile/terraform_vpc_resources/merge_request.py +57 -0
- reconcile/terraform_vpc_resources/merge_request_manager.py +107 -0
- reconcile/typed_queries/alerting_services_settings.py +1 -2
- reconcile/typed_queries/app_interface_custom_messages.py +24 -0
- reconcile/typed_queries/app_interface_deadmanssnitch_settings.py +17 -0
- reconcile/typed_queries/app_interface_metrics_exporter/__init__.py +0 -0
- reconcile/typed_queries/app_interface_metrics_exporter/onboarding_status.py +13 -0
- reconcile/typed_queries/app_interface_repo_url.py +1 -2
- reconcile/typed_queries/app_interface_state_settings.py +1 -3
- reconcile/typed_queries/app_interface_vault_settings.py +1 -2
- reconcile/typed_queries/app_quay_repos_escalation_policies.py +14 -0
- reconcile/typed_queries/apps.py +11 -0
- reconcile/typed_queries/aws_vpc_requests.py +9 -0
- reconcile/typed_queries/aws_vpcs.py +12 -0
- reconcile/typed_queries/cloudflare.py +10 -0
- reconcile/typed_queries/clusters.py +7 -5
- reconcile/typed_queries/clusters_minimal.py +6 -5
- reconcile/typed_queries/clusters_with_dms.py +16 -0
- reconcile/typed_queries/cost_report/__init__.py +0 -0
- reconcile/typed_queries/cost_report/app_names.py +22 -0
- reconcile/typed_queries/cost_report/cost_namespaces.py +43 -0
- reconcile/typed_queries/cost_report/settings.py +15 -0
- reconcile/typed_queries/dynatrace.py +10 -0
- reconcile/typed_queries/dynatrace_environments.py +14 -0
- reconcile/typed_queries/dynatrace_token_provider_token_specs.py +14 -0
- reconcile/typed_queries/external_resources.py +46 -0
- reconcile/typed_queries/get_state_aws_account.py +20 -0
- reconcile/typed_queries/glitchtip.py +10 -0
- reconcile/typed_queries/jenkins.py +25 -0
- reconcile/typed_queries/jira.py +7 -0
- reconcile/typed_queries/jira_settings.py +16 -0
- reconcile/typed_queries/jiralert_settings.py +22 -0
- reconcile/typed_queries/ocm.py +8 -0
- reconcile/typed_queries/pagerduty_instances.py +2 -7
- reconcile/typed_queries/quay.py +23 -0
- reconcile/typed_queries/repos.py +20 -8
- reconcile/typed_queries/reserved_networks.py +12 -0
- reconcile/typed_queries/saas_files.py +221 -167
- reconcile/typed_queries/slack.py +7 -0
- reconcile/typed_queries/slo_documents.py +12 -0
- reconcile/typed_queries/status_board.py +58 -0
- reconcile/typed_queries/tekton_pipeline_providers.py +1 -2
- reconcile/typed_queries/terraform_namespaces.py +1 -2
- reconcile/typed_queries/terraform_tgw_attachments/__init__.py +0 -0
- reconcile/typed_queries/terraform_tgw_attachments/aws_accounts.py +16 -0
- reconcile/typed_queries/unleash.py +10 -0
- reconcile/typed_queries/users.py +11 -0
- reconcile/typed_queries/vault.py +10 -0
- reconcile/unleash_feature_toggles/__init__.py +0 -0
- reconcile/unleash_feature_toggles/integration.py +287 -0
- reconcile/utils/acs/__init__.py +0 -0
- reconcile/utils/acs/base.py +81 -0
- reconcile/utils/acs/notifiers.py +143 -0
- reconcile/utils/acs/policies.py +163 -0
- reconcile/utils/acs/rbac.py +277 -0
- reconcile/utils/aggregated_list.py +11 -9
- reconcile/utils/amtool.py +6 -4
- reconcile/utils/aws_api.py +279 -66
- reconcile/utils/aws_api_typed/__init__.py +0 -0
- reconcile/utils/aws_api_typed/account.py +23 -0
- reconcile/utils/aws_api_typed/api.py +273 -0
- reconcile/utils/aws_api_typed/dynamodb.py +16 -0
- reconcile/utils/aws_api_typed/iam.py +67 -0
- reconcile/utils/aws_api_typed/organization.py +152 -0
- reconcile/utils/aws_api_typed/s3.py +26 -0
- reconcile/utils/aws_api_typed/service_quotas.py +79 -0
- reconcile/utils/aws_api_typed/sts.py +36 -0
- reconcile/utils/aws_api_typed/support.py +79 -0
- reconcile/utils/aws_helper.py +42 -3
- reconcile/utils/batches.py +11 -0
- reconcile/utils/binary.py +7 -9
- reconcile/utils/cloud_resource_best_practice/__init__.py +0 -0
- reconcile/utils/cloud_resource_best_practice/aws_rds.py +66 -0
- reconcile/utils/clusterhealth/__init__.py +0 -0
- reconcile/utils/clusterhealth/providerbase.py +39 -0
- reconcile/utils/clusterhealth/telemeter.py +39 -0
- reconcile/utils/config.py +3 -4
- reconcile/utils/deadmanssnitch_api.py +86 -0
- reconcile/utils/differ.py +205 -0
- reconcile/utils/disabled_integrations.py +4 -6
- reconcile/utils/dynatrace/__init__.py +0 -0
- reconcile/utils/dynatrace/client.py +93 -0
- reconcile/utils/early_exit_cache.py +289 -0
- reconcile/utils/elasticsearch_exceptions.py +5 -0
- reconcile/utils/environ.py +2 -2
- reconcile/utils/exceptions.py +4 -0
- reconcile/utils/expiration.py +4 -8
- reconcile/utils/extended_early_exit.py +210 -0
- reconcile/utils/external_resource_spec.py +34 -12
- reconcile/utils/external_resources.py +48 -20
- reconcile/utils/filtering.py +16 -0
- reconcile/utils/git.py +49 -16
- reconcile/utils/github_api.py +10 -9
- reconcile/utils/gitlab_api.py +333 -190
- reconcile/utils/glitchtip/client.py +97 -100
- reconcile/utils/glitchtip/models.py +89 -11
- reconcile/utils/gql.py +157 -58
- reconcile/utils/grouping.py +17 -0
- reconcile/utils/helm.py +89 -18
- reconcile/utils/helpers.py +51 -0
- reconcile/utils/imap_client.py +5 -6
- reconcile/utils/internal_groups/__init__.py +0 -0
- reconcile/utils/internal_groups/client.py +160 -0
- reconcile/utils/internal_groups/models.py +71 -0
- reconcile/utils/jenkins_api.py +10 -34
- reconcile/utils/jinja2/__init__.py +0 -0
- reconcile/utils/{jinja2_ext.py → jinja2/extensions.py} +6 -4
- reconcile/utils/jinja2/filters.py +142 -0
- reconcile/utils/jinja2/utils.py +278 -0
- reconcile/utils/jira_client.py +165 -8
- reconcile/utils/jjb_client.py +47 -35
- reconcile/utils/jobcontroller/__init__.py +0 -0
- reconcile/utils/jobcontroller/controller.py +413 -0
- reconcile/utils/jobcontroller/models.py +195 -0
- reconcile/utils/jsonpath.py +4 -5
- reconcile/utils/jump_host.py +13 -12
- reconcile/utils/keycloak.py +106 -0
- reconcile/utils/ldap_client.py +35 -6
- reconcile/utils/lean_terraform_client.py +115 -6
- reconcile/utils/membershipsources/__init__.py +0 -0
- reconcile/utils/membershipsources/app_interface_resolver.py +60 -0
- reconcile/utils/membershipsources/models.py +91 -0
- reconcile/utils/membershipsources/resolver.py +110 -0
- reconcile/utils/merge_request_manager/__init__.py +0 -0
- reconcile/utils/merge_request_manager/merge_request_manager.py +99 -0
- reconcile/utils/merge_request_manager/parser.py +67 -0
- reconcile/utils/metrics.py +511 -1
- reconcile/utils/models.py +123 -0
- reconcile/utils/mr/README.md +198 -0
- reconcile/utils/mr/__init__.py +14 -10
- reconcile/utils/mr/app_interface_reporter.py +2 -2
- reconcile/utils/mr/aws_access.py +4 -4
- reconcile/utils/mr/base.py +51 -31
- reconcile/utils/mr/clusters_updates.py +10 -7
- reconcile/utils/mr/glitchtip_access_reporter.py +2 -4
- reconcile/utils/mr/labels.py +14 -1
- reconcile/utils/mr/notificator.py +1 -3
- reconcile/utils/mr/ocm_update_recommended_version.py +1 -2
- reconcile/utils/mr/ocm_upgrade_scheduler_org_updates.py +7 -3
- reconcile/utils/mr/promote_qontract.py +203 -0
- reconcile/utils/mr/user_maintenance.py +24 -4
- reconcile/utils/oauth2_backend_application_session.py +132 -0
- reconcile/utils/oc.py +194 -170
- reconcile/utils/oc_connection_parameters.py +40 -51
- reconcile/utils/oc_filters.py +11 -13
- reconcile/utils/oc_map.py +14 -35
- reconcile/utils/ocm/__init__.py +30 -1
- reconcile/utils/ocm/addons.py +228 -0
- reconcile/utils/ocm/base.py +618 -5
- reconcile/utils/ocm/cluster_groups.py +5 -56
- reconcile/utils/ocm/clusters.py +111 -99
- reconcile/utils/ocm/identity_providers.py +66 -0
- reconcile/utils/ocm/label_sources.py +75 -0
- reconcile/utils/ocm/labels.py +139 -54
- reconcile/utils/ocm/manifests.py +39 -0
- reconcile/utils/ocm/ocm.py +182 -928
- reconcile/utils/ocm/products.py +758 -0
- reconcile/utils/ocm/search_filters.py +20 -28
- reconcile/utils/ocm/service_log.py +32 -79
- reconcile/utils/ocm/sre_capability_labels.py +51 -0
- reconcile/utils/ocm/status_board.py +66 -0
- reconcile/utils/ocm/subscriptions.py +49 -59
- reconcile/utils/ocm/syncsets.py +39 -0
- reconcile/utils/ocm/upgrades.py +181 -0
- reconcile/utils/ocm_base_client.py +71 -36
- reconcile/utils/openshift_resource.py +113 -67
- reconcile/utils/output.py +18 -11
- reconcile/utils/pagerduty_api.py +16 -10
- reconcile/utils/parse_dhms_duration.py +13 -1
- reconcile/utils/prometheus.py +123 -0
- reconcile/utils/promotion_state.py +56 -19
- reconcile/utils/promtool.py +5 -8
- reconcile/utils/quay_api.py +13 -25
- reconcile/utils/raw_github_api.py +3 -5
- reconcile/utils/repo_owners.py +2 -8
- reconcile/utils/rest_api_base.py +126 -0
- reconcile/utils/rosa/__init__.py +0 -0
- reconcile/utils/rosa/rosa_cli.py +310 -0
- reconcile/utils/rosa/session.py +201 -0
- reconcile/utils/ruamel.py +16 -0
- reconcile/utils/runtime/__init__.py +0 -1
- reconcile/utils/runtime/desired_state_diff.py +9 -20
- reconcile/utils/runtime/environment.py +33 -8
- reconcile/utils/runtime/integration.py +28 -12
- reconcile/utils/runtime/meta.py +1 -3
- reconcile/utils/runtime/runner.py +8 -11
- reconcile/utils/runtime/sharding.py +93 -36
- reconcile/utils/saasherder/__init__.py +1 -1
- reconcile/utils/saasherder/interfaces.py +143 -138
- reconcile/utils/saasherder/models.py +201 -43
- reconcile/utils/saasherder/saasherder.py +508 -378
- reconcile/utils/secret_reader.py +22 -27
- reconcile/utils/semver_helper.py +15 -1
- reconcile/utils/slack_api.py +124 -36
- reconcile/utils/smtp_client.py +1 -2
- reconcile/utils/sqs_gateway.py +10 -6
- reconcile/utils/state.py +276 -127
- reconcile/utils/terraform/config_client.py +6 -7
- reconcile/utils/terraform_client.py +284 -125
- reconcile/utils/terrascript/cloudflare_client.py +38 -17
- reconcile/utils/terrascript/cloudflare_resources.py +67 -18
- reconcile/utils/terrascript/models.py +2 -3
- reconcile/utils/terrascript/resources.py +1 -2
- reconcile/utils/terrascript_aws_client.py +1292 -540
- reconcile/utils/three_way_diff_strategy.py +157 -0
- reconcile/utils/unleash/__init__.py +11 -0
- reconcile/utils/{unleash.py → unleash/client.py} +35 -29
- reconcile/utils/unleash/server.py +145 -0
- reconcile/utils/vault.py +42 -32
- reconcile/utils/vaultsecretref.py +2 -4
- reconcile/utils/vcs.py +250 -0
- reconcile/vault_replication.py +38 -31
- reconcile/vpc_peerings_validator.py +82 -13
- tools/app_interface_metrics_exporter.py +70 -0
- tools/app_interface_reporter.py +44 -157
- tools/cli_commands/container_images_report.py +154 -0
- tools/cli_commands/cost_report/__init__.py +0 -0
- tools/cli_commands/cost_report/aws.py +137 -0
- tools/cli_commands/cost_report/cost_management_api.py +155 -0
- tools/cli_commands/cost_report/model.py +49 -0
- tools/cli_commands/cost_report/openshift.py +166 -0
- tools/cli_commands/cost_report/openshift_cost_optimization.py +187 -0
- tools/cli_commands/cost_report/response.py +124 -0
- tools/cli_commands/cost_report/util.py +72 -0
- tools/cli_commands/cost_report/view.py +524 -0
- tools/cli_commands/erv2.py +620 -0
- tools/cli_commands/gpg_encrypt.py +5 -8
- tools/cli_commands/systems_and_tools.py +489 -0
- tools/glitchtip_access_revalidation.py +1 -1
- tools/qontract_cli.py +2301 -673
- tools/saas_metrics_exporter/__init__.py +0 -0
- tools/saas_metrics_exporter/commit_distance/__init__.py +0 -0
- tools/saas_metrics_exporter/commit_distance/channel.py +63 -0
- tools/saas_metrics_exporter/commit_distance/commit_distance.py +103 -0
- tools/saas_metrics_exporter/commit_distance/metrics.py +19 -0
- tools/saas_metrics_exporter/main.py +99 -0
- tools/saas_promotion_state/__init__.py +0 -0
- tools/saas_promotion_state/saas_promotion_state.py +105 -0
- tools/sd_app_sre_alert_report.py +145 -0
- tools/template_validation.py +107 -0
- e2e_tests/cli.py +0 -83
- e2e_tests/create_namespace.py +0 -43
- e2e_tests/dedicated_admin_rolebindings.py +0 -44
- e2e_tests/dedicated_admin_test_base.py +0 -39
- e2e_tests/default_network_policies.py +0 -47
- e2e_tests/default_project_labels.py +0 -52
- e2e_tests/network_policy_test_base.py +0 -17
- e2e_tests/test_base.py +0 -56
- qontract_reconcile-0.9.1rc298.dist-info/METADATA +0 -63
- qontract_reconcile-0.9.1rc298.dist-info/RECORD +0 -585
- qontract_reconcile-0.9.1rc298.dist-info/top_level.txt +0 -4
- reconcile/ecr_mirror.py +0 -152
- reconcile/github_scanner.py +0 -74
- reconcile/gitlab_integrations.py +0 -63
- reconcile/gql_definitions/ocm_oidc_idp/clusters.py +0 -195
- reconcile/gql_definitions/ocp_release_mirror/ocp_release_mirror.py +0 -287
- reconcile/integrations_validator.py +0 -18
- reconcile/jenkins_plugins.py +0 -129
- reconcile/kafka_clusters.py +0 -208
- reconcile/ocm_cluster_admin.py +0 -42
- reconcile/ocm_oidc_idp.py +0 -198
- reconcile/ocp_release_mirror.py +0 -373
- reconcile/prometheus_rules_tester_old.py +0 -436
- reconcile/saas_auto_promotions_manager/merge_request_manager/merge_request_manager.py +0 -279
- reconcile/saas_auto_promotions_manager/utils/vcs.py +0 -141
- reconcile/sentry_config.py +0 -613
- reconcile/sentry_helper.py +0 -69
- reconcile/test/conftest.py +0 -187
- reconcile/test/fixtures.py +0 -24
- reconcile/test/saas_auto_promotions_manager/conftest.py +0 -69
- reconcile/test/saas_auto_promotions_manager/merge_request_manager/merge_request_manager/conftest.py +0 -110
- reconcile/test/saas_auto_promotions_manager/merge_request_manager/merge_request_manager/data_keys.py +0 -10
- reconcile/test/saas_auto_promotions_manager/merge_request_manager/merge_request_manager/test_housekeeping.py +0 -200
- reconcile/test/saas_auto_promotions_manager/merge_request_manager/merge_request_manager/test_merge_request_manager.py +0 -151
- reconcile/test/saas_auto_promotions_manager/merge_request_manager/renderer/conftest.py +0 -63
- reconcile/test/saas_auto_promotions_manager/merge_request_manager/renderer/data_keys.py +0 -4
- reconcile/test/saas_auto_promotions_manager/merge_request_manager/renderer/test_content_multiple_namespaces.py +0 -46
- reconcile/test/saas_auto_promotions_manager/merge_request_manager/renderer/test_content_single_namespace.py +0 -94
- reconcile/test/saas_auto_promotions_manager/merge_request_manager/renderer/test_content_single_target.py +0 -44
- reconcile/test/saas_auto_promotions_manager/subscriber/conftest.py +0 -74
- reconcile/test/saas_auto_promotions_manager/subscriber/data_keys.py +0 -11
- reconcile/test/saas_auto_promotions_manager/subscriber/test_content_hash.py +0 -155
- reconcile/test/saas_auto_promotions_manager/subscriber/test_diff.py +0 -173
- reconcile/test/saas_auto_promotions_manager/subscriber/test_multiple_channels_config_hash.py +0 -226
- reconcile/test/saas_auto_promotions_manager/subscriber/test_multiple_channels_moving_ref.py +0 -224
- reconcile/test/saas_auto_promotions_manager/subscriber/test_single_channel_with_single_publisher.py +0 -350
- reconcile/test/saas_auto_promotions_manager/test_integration_test.py +0 -129
- reconcile/test/saas_auto_promotions_manager/utils/saas_files_inventory/test_multiple_publishers_for_single_channel.py +0 -70
- reconcile/test/saas_auto_promotions_manager/utils/saas_files_inventory/test_saas_files_use_target_config_hash.py +0 -63
- reconcile/test/saas_auto_promotions_manager/utils/saas_files_inventory/test_saas_files_with_auto_promote.py +0 -74
- reconcile/test/saas_auto_promotions_manager/utils/saas_files_inventory/test_saas_files_without_auto_promote.py +0 -65
- reconcile/test/test_aggregated_list.py +0 -237
- reconcile/test/test_amtool.py +0 -37
- reconcile/test/test_auto_promoter.py +0 -295
- reconcile/test/test_aws_ami_share.py +0 -68
- reconcile/test/test_aws_iam_keys.py +0 -70
- reconcile/test/test_aws_iam_password_reset.py +0 -35
- reconcile/test/test_aws_support_cases_sos.py +0 -23
- reconcile/test/test_checkpoint.py +0 -178
- reconcile/test/test_cli.py +0 -41
- reconcile/test/test_closedbox_endpoint_monitoring.py +0 -207
- reconcile/test/test_gabi_authorized_users.py +0 -72
- reconcile/test/test_github_org.py +0 -154
- reconcile/test/test_github_repo_invites.py +0 -123
- reconcile/test/test_gitlab_housekeeping.py +0 -88
- reconcile/test/test_gitlab_labeler.py +0 -129
- reconcile/test/test_gitlab_members.py +0 -283
- reconcile/test/test_instrumented_wrappers.py +0 -18
- reconcile/test/test_integrations_manager.py +0 -995
- reconcile/test/test_jenkins_worker_fleets.py +0 -55
- reconcile/test/test_jump_host.py +0 -117
- reconcile/test/test_ldap_users.py +0 -123
- reconcile/test/test_make.py +0 -28
- reconcile/test/test_ocm_additional_routers.py +0 -134
- reconcile/test/test_ocm_addons_upgrade_scheduler_org.py +0 -149
- reconcile/test/test_ocm_clusters.py +0 -598
- reconcile/test/test_ocm_clusters_manifest_updates.py +0 -89
- reconcile/test/test_ocm_oidc_idp.py +0 -315
- reconcile/test/test_ocm_update_recommended_version.py +0 -145
- reconcile/test/test_ocm_upgrade_scheduler.py +0 -614
- reconcile/test/test_ocm_upgrade_scheduler_org_updater.py +0 -129
- reconcile/test/test_openshift_base.py +0 -730
- reconcile/test/test_openshift_namespace_labels.py +0 -345
- reconcile/test/test_openshift_namespaces.py +0 -256
- reconcile/test/test_openshift_resource.py +0 -415
- reconcile/test/test_openshift_resources_base.py +0 -440
- reconcile/test/test_openshift_saas_deploy_change_tester.py +0 -310
- reconcile/test/test_openshift_tekton_resources.py +0 -253
- reconcile/test/test_openshift_upgrade_watcher.py +0 -146
- reconcile/test/test_prometheus_rules_tester.py +0 -151
- reconcile/test/test_prometheus_rules_tester_old.py +0 -77
- reconcile/test/test_quay_membership.py +0 -86
- reconcile/test/test_quay_mirror.py +0 -109
- reconcile/test/test_quay_mirror_org.py +0 -70
- reconcile/test/test_quay_repos.py +0 -59
- reconcile/test/test_queries.py +0 -53
- reconcile/test/test_repo_owners.py +0 -47
- reconcile/test/test_requests_sender.py +0 -139
- reconcile/test/test_saasherder.py +0 -1074
- reconcile/test/test_saasherder_allowed_secret_paths.py +0 -127
- reconcile/test/test_secret_reader.py +0 -153
- reconcile/test/test_slack_base.py +0 -185
- reconcile/test/test_slack_usergroups.py +0 -744
- reconcile/test/test_sql_query.py +0 -19
- reconcile/test/test_terraform_cloudflare_dns.py +0 -117
- reconcile/test/test_terraform_cloudflare_resources.py +0 -106
- reconcile/test/test_terraform_cloudflare_users.py +0 -749
- reconcile/test/test_terraform_resources.py +0 -257
- reconcile/test/test_terraform_tgw_attachments.py +0 -631
- reconcile/test/test_terraform_users.py +0 -57
- reconcile/test/test_terraform_vpc_peerings.py +0 -499
- reconcile/test/test_terraform_vpc_peerings_build_desired_state.py +0 -1061
- reconcile/test/test_unleash.py +0 -138
- reconcile/test/test_utils_aws_api.py +0 -240
- reconcile/test/test_utils_aws_helper.py +0 -80
- reconcile/test/test_utils_cluster_version_data.py +0 -177
- reconcile/test/test_utils_data_structures.py +0 -13
- reconcile/test/test_utils_disabled_integrations.py +0 -86
- reconcile/test/test_utils_expiration.py +0 -109
- reconcile/test/test_utils_external_resource_spec.py +0 -383
- reconcile/test/test_utils_external_resources.py +0 -247
- reconcile/test/test_utils_github_api.py +0 -73
- reconcile/test/test_utils_gitlab_api.py +0 -20
- reconcile/test/test_utils_gpg.py +0 -69
- reconcile/test/test_utils_gql.py +0 -81
- reconcile/test/test_utils_helm.py +0 -306
- reconcile/test/test_utils_helpers.py +0 -55
- reconcile/test/test_utils_imap_client.py +0 -65
- reconcile/test/test_utils_jjb_client.py +0 -52
- reconcile/test/test_utils_jsonpath.py +0 -286
- reconcile/test/test_utils_ldap_client.py +0 -51
- reconcile/test/test_utils_mr.py +0 -226
- reconcile/test/test_utils_mr_clusters_updates.py +0 -77
- reconcile/test/test_utils_oc.py +0 -984
- reconcile/test/test_utils_ocm.py +0 -110
- reconcile/test/test_utils_pagerduty_api.py +0 -251
- reconcile/test/test_utils_parse_dhms_duration.py +0 -34
- reconcile/test/test_utils_password_validator.py +0 -155
- reconcile/test/test_utils_quay_api.py +0 -86
- reconcile/test/test_utils_semver_helper.py +0 -19
- reconcile/test/test_utils_sharding.py +0 -56
- reconcile/test/test_utils_slack_api.py +0 -439
- reconcile/test/test_utils_smtp_client.py +0 -73
- reconcile/test/test_utils_state.py +0 -256
- reconcile/test/test_utils_terraform.py +0 -13
- reconcile/test/test_utils_terraform_client.py +0 -585
- reconcile/test/test_utils_terraform_config_client.py +0 -219
- reconcile/test/test_utils_terrascript_aws_client.py +0 -277
- reconcile/test/test_utils_terrascript_cloudflare_client.py +0 -597
- reconcile/test/test_utils_terrascript_cloudflare_resources.py +0 -26
- reconcile/test/test_vault_replication.py +0 -515
- reconcile/test/test_vault_utils.py +0 -47
- reconcile/test/test_version_bump.py +0 -18
- reconcile/test/test_vpc_peerings_validator.py +0 -103
- reconcile/test/test_wrong_region.py +0 -78
- reconcile/typed_queries/glitchtip_settings.py +0 -18
- reconcile/typed_queries/ocp_release_mirror.py +0 -11
- reconcile/unleash_watcher.py +0 -120
- reconcile/utils/git_secrets.py +0 -63
- reconcile/utils/mr/auto_promoter.py +0 -218
- reconcile/utils/sentry_client.py +0 -383
- release/test_version.py +0 -50
- release/version.py +0 -100
- tools/test/test_qontract_cli.py +0 -60
- tools/test/test_sre_checkpoints.py +0 -79
- /e2e_tests/__init__.py → /reconcile/aus/upgrades.py +0 -0
- /reconcile/{gql_definitions/ocp_release_mirror → aws_account_manager}/__init__.py +0 -0
- /reconcile/{test → aws_ami_cleanup}/__init__.py +0 -0
- /reconcile/{test/saas_auto_promotions_manager → aws_cloudwatch_log_retention}/__init__.py +0 -0
- /reconcile/{test/saas_auto_promotions_manager/merge_request_manager → aws_saml_idp}/__init__.py +0 -0
- /reconcile/{test/saas_auto_promotions_manager/merge_request_manager/merge_request_manager → aws_saml_roles}/__init__.py +0 -0
- /reconcile/{test/saas_auto_promotions_manager/merge_request_manager/renderer → aws_version_sync}/__init__.py +0 -0
- /reconcile/{test/saas_auto_promotions_manager/subscriber → aws_version_sync/merge_request_manager}/__init__.py +0 -0
- /reconcile/{test/saas_auto_promotions_manager/utils → cluster_auth_rhidp}/__init__.py +0 -0
- /reconcile/{test/saas_auto_promotions_manager/utils/saas_files_inventory → dynatrace_token_provider}/__init__.py +0 -0
- {release → reconcile/endpoints_discovery}/__init__.py +0 -0
- {tools/test → reconcile/external_resources}/__init__.py +0 -0
reconcile/aus/base.py
CHANGED
@@ -1,166 +1,637 @@
|
|
1
|
-
import
|
1
|
+
import datetime as dt
|
2
2
|
import logging
|
3
3
|
import sys
|
4
|
-
from abc import
|
5
|
-
|
6
|
-
|
7
|
-
|
4
|
+
from abc import (
|
5
|
+
ABC,
|
6
|
+
abstractmethod,
|
7
|
+
)
|
8
|
+
from collections.abc import Callable, Sequence
|
9
|
+
from datetime import (
|
10
|
+
datetime,
|
11
|
+
timedelta,
|
8
12
|
)
|
9
|
-
from datetime import datetime
|
10
13
|
from typing import (
|
11
|
-
|
12
|
-
|
13
|
-
Optional,
|
14
|
+
Protocol,
|
15
|
+
cast,
|
14
16
|
)
|
15
17
|
|
16
18
|
from croniter import croniter
|
19
|
+
from pydantic import BaseModel, Extra
|
17
20
|
from semver import VersionInfo
|
18
21
|
|
19
|
-
from reconcile.aus.
|
22
|
+
from reconcile.aus.cluster_version_data import (
|
23
|
+
VersionData,
|
24
|
+
VersionDataMap,
|
25
|
+
WorkloadHistory,
|
26
|
+
get_version_data,
|
27
|
+
)
|
28
|
+
from reconcile.aus.metrics import (
|
29
|
+
CLUSTER_HEALTH_HEALTHY_METRIC_VALUE,
|
30
|
+
CLUSTER_HEALTH_UNHEALTHY_METRIC_VALUE,
|
31
|
+
UPGRADE_BLOCKED_METRIC_VALUE,
|
32
|
+
UPGRADE_LONG_RUNNING_METRIC_VALUE,
|
33
|
+
UPGRADE_SCHEDULED_METRIC_VALUE,
|
34
|
+
UPGRADE_STARTED_METRIC_VALUE,
|
35
|
+
AUSClusterHealthStateGauge,
|
36
|
+
AUSClusterUpgradePolicyInfoMetric,
|
37
|
+
AUSOCMEnvironmentError,
|
38
|
+
AUSOrganizationErrorRate,
|
39
|
+
AUSOrganizationValidationErrorsGauge,
|
40
|
+
)
|
41
|
+
from reconcile.aus.models import (
|
42
|
+
ClusterAddonUpgradeSpec,
|
43
|
+
ClusterUpgradeSpec,
|
44
|
+
OrganizationUpgradeSpec,
|
45
|
+
Sector,
|
46
|
+
)
|
47
|
+
from reconcile.aus.version_gates import HANDLERS
|
48
|
+
from reconcile.gql_definitions.advanced_upgrade_service.aus_organization import (
|
49
|
+
query as aus_organizations_query,
|
50
|
+
)
|
51
|
+
from reconcile.gql_definitions.common.ocm_env_telemeter import (
|
52
|
+
query as ocm_env_telemeter_query,
|
53
|
+
)
|
20
54
|
from reconcile.gql_definitions.common.ocm_environments import (
|
21
55
|
query as ocm_environment_query,
|
22
56
|
)
|
57
|
+
from reconcile.gql_definitions.fragments.aus_organization import AUSOCMOrganization
|
23
58
|
from reconcile.gql_definitions.fragments.ocm_environment import OCMEnvironment
|
24
|
-
from reconcile.
|
25
|
-
from reconcile.utils
|
26
|
-
|
27
|
-
|
28
|
-
|
59
|
+
from reconcile.gql_definitions.fragments.upgrade_policy import ClusterUpgradePolicyV1
|
60
|
+
from reconcile.utils import (
|
61
|
+
gql,
|
62
|
+
metrics,
|
63
|
+
)
|
64
|
+
from reconcile.utils.clusterhealth.providerbase import (
|
65
|
+
ClusterHealthProvider,
|
66
|
+
)
|
67
|
+
from reconcile.utils.clusterhealth.telemeter import (
|
68
|
+
TELEMETER_SOURCE,
|
69
|
+
TelemeterClusterHealthProvider,
|
29
70
|
)
|
30
71
|
from reconcile.utils.defer import defer
|
31
|
-
from reconcile.utils.
|
32
|
-
|
33
|
-
|
34
|
-
|
72
|
+
from reconcile.utils.disabled_integrations import integration_is_enabled
|
73
|
+
from reconcile.utils.filtering import remove_none_values_from_dict
|
74
|
+
from reconcile.utils.ocm.addons import AddonService, AddonServiceV1, AddonServiceV2
|
75
|
+
from reconcile.utils.ocm.clusters import (
|
76
|
+
OCMCluster,
|
77
|
+
)
|
78
|
+
from reconcile.utils.ocm.upgrades import (
|
79
|
+
OCMVersionGate,
|
80
|
+
create_control_plane_upgrade_policy,
|
81
|
+
create_node_pool_upgrade_policy,
|
82
|
+
create_upgrade_policy,
|
83
|
+
delete_control_plane_upgrade_policy,
|
84
|
+
delete_upgrade_policy,
|
85
|
+
get_control_plane_upgrade_policies,
|
86
|
+
get_node_pool_upgrade_policies,
|
87
|
+
get_upgrade_policies,
|
88
|
+
get_version_agreement,
|
89
|
+
get_version_gates,
|
90
|
+
)
|
91
|
+
from reconcile.utils.ocm_base_client import OCMBaseClient
|
92
|
+
from reconcile.utils.prometheus import (
|
93
|
+
init_prometheus_http_querier_from_prometheus_instance,
|
35
94
|
)
|
36
95
|
from reconcile.utils.runtime.integration import (
|
37
96
|
PydanticRunParams,
|
38
97
|
QontractReconcileIntegration,
|
39
98
|
)
|
40
99
|
from reconcile.utils.semver_helper import (
|
100
|
+
get_version_prefix,
|
41
101
|
parse_semver,
|
42
102
|
sort_versions,
|
43
103
|
)
|
44
104
|
from reconcile.utils.state import init_state
|
45
105
|
|
106
|
+
MIN_DELTA_MINUTES = 6
|
107
|
+
|
46
108
|
|
47
109
|
class AdvancedUpgradeSchedulerBaseIntegrationParams(PydanticRunParams):
|
110
|
+
ocm_environment: str | None = None
|
111
|
+
ocm_organization_ids: set[str] | None = None
|
112
|
+
excluded_ocm_organization_ids: set[str] | None = None
|
113
|
+
ignore_sts_clusters: bool = False
|
48
114
|
|
49
|
-
|
50
|
-
|
115
|
+
|
116
|
+
class ReconcileErrorSummary(Exception):
|
117
|
+
def __init__(self, exceptions: list[str]) -> None:
|
118
|
+
self.exceptions = exceptions
|
119
|
+
|
120
|
+
def __str__(self) -> str:
|
121
|
+
formatted_exceptions = "\n".join([f"- {e}" for e in self.exceptions])
|
122
|
+
return f"Reconcile exceptions:\n{formatted_exceptions}"
|
51
123
|
|
52
124
|
|
53
125
|
class AdvancedUpgradeSchedulerBaseIntegration(
|
54
126
|
QontractReconcileIntegration[AdvancedUpgradeSchedulerBaseIntegrationParams]
|
55
127
|
):
|
56
128
|
def run(self, dry_run: bool) -> None:
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
129
|
+
with metrics.transactional_metrics(self.name):
|
130
|
+
upgrade_specs = self.get_upgrade_specs()
|
131
|
+
unhandled_exceptions = []
|
132
|
+
for ocm_env, env_upgrade_specs in upgrade_specs.items():
|
133
|
+
for org_upgrade_spec in env_upgrade_specs.values():
|
134
|
+
try:
|
135
|
+
with AUSOrganizationErrorRate(
|
136
|
+
integration=self.name,
|
137
|
+
ocm_env=ocm_env,
|
138
|
+
org_id=org_upgrade_spec.org.org_id,
|
139
|
+
):
|
140
|
+
self.process_org(dry_run, ocm_env, org_upgrade_spec)
|
141
|
+
except Exception as e:
|
142
|
+
if not self.signal_reconcile_issues(
|
143
|
+
dry_run, org_upgrade_spec, e
|
144
|
+
):
|
145
|
+
unhandled_exceptions.append(
|
146
|
+
f"{ocm_env}/{org_upgrade_spec.org.name}: {e}"
|
147
|
+
)
|
148
|
+
|
149
|
+
if unhandled_exceptions:
|
150
|
+
raise ReconcileErrorSummary(unhandled_exceptions)
|
66
151
|
sys.exit(0)
|
67
152
|
|
68
|
-
def
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
153
|
+
def get_orgs_for_environment(
|
154
|
+
self, ocm_env: OCMEnvironment, only_addon_managed_upgrades: bool = False
|
155
|
+
) -> list[AUSOCMOrganization]:
|
156
|
+
return get_orgs_for_environment(
|
157
|
+
integration=self.name,
|
158
|
+
ocm_env_name=ocm_env.name,
|
159
|
+
query_func=gql.get_api().query,
|
160
|
+
ocm_organization_ids=self.params.ocm_organization_ids,
|
161
|
+
excluded_ocm_organization_ids=self.params.excluded_ocm_organization_ids,
|
162
|
+
only_addon_managed_upgrades=only_addon_managed_upgrades,
|
163
|
+
)
|
164
|
+
|
165
|
+
def process_org(
|
166
|
+
self, dry_run: bool, ocm_env: str, org_upgrade_spec: OrganizationUpgradeSpec
|
167
|
+
) -> None:
|
168
|
+
org_name = org_upgrade_spec.org.name
|
169
|
+
self.expose_org_upgrade_spec_metrics(ocm_env, org_upgrade_spec)
|
170
|
+
if org_upgrade_spec.has_validation_errors:
|
171
|
+
self.signal_validation_issues(dry_run, org_upgrade_spec)
|
172
|
+
elif org_upgrade_spec.specs:
|
173
|
+
self.process_upgrade_policies_in_org(dry_run, org_upgrade_spec)
|
174
|
+
else:
|
175
|
+
logging.debug(
|
176
|
+
f"Skip org {org_upgrade_spec.org.org_id}/{org_name} in {ocm_env} because it defines no upgrade policies"
|
73
177
|
)
|
74
|
-
for ocm_env in self.get_ocm_environments()
|
75
|
-
}
|
76
178
|
|
77
|
-
def
|
179
|
+
def get_upgrade_specs(self) -> dict[str, dict[str, OrganizationUpgradeSpec]]:
|
180
|
+
envs_org_upgrade_specs: dict[str, dict[str, OrganizationUpgradeSpec]] = {}
|
181
|
+
for ocm_env in self.get_ocm_environments():
|
182
|
+
try:
|
183
|
+
envs_org_upgrade_specs[ocm_env.name] = self.get_ocm_env_upgrade_specs(
|
184
|
+
ocm_env=ocm_env
|
185
|
+
)
|
186
|
+
except Exception as e:
|
187
|
+
logging.exception(
|
188
|
+
"Failed to get org upgrade specs for OCM environment %s. Skipping. %s",
|
189
|
+
ocm_env.name,
|
190
|
+
e,
|
191
|
+
)
|
192
|
+
metrics.inc_counter(
|
193
|
+
AUSOCMEnvironmentError(
|
194
|
+
integration=self.name,
|
195
|
+
ocm_env=ocm_env.name,
|
196
|
+
)
|
197
|
+
)
|
198
|
+
return envs_org_upgrade_specs
|
199
|
+
|
200
|
+
def get_ocm_environments(self, filter: bool = True) -> list[OCMEnvironment]:
|
78
201
|
return ocm_environment_query(
|
79
202
|
gql.get_api().query,
|
80
203
|
variables={"name": self.params.ocm_environment}
|
81
|
-
if self.params.ocm_environment
|
204
|
+
if self.params.ocm_environment and filter
|
82
205
|
else None,
|
83
206
|
).environments
|
84
207
|
|
208
|
+
def expose_remaining_soak_day_metrics(
|
209
|
+
self,
|
210
|
+
org_upgrade_spec: OrganizationUpgradeSpec,
|
211
|
+
version_data: VersionData,
|
212
|
+
current_state: Sequence["AbstractUpgradePolicy"],
|
213
|
+
metrics_builder: "RemainingSoakDayMetricsBuilder",
|
214
|
+
) -> None:
|
215
|
+
current_cluster_upgrade_policies = {
|
216
|
+
p.cluster.external_id: p for p in current_state
|
217
|
+
}
|
218
|
+
for spec in org_upgrade_spec.specs:
|
219
|
+
upgrades = spec.get_available_upgrades()
|
220
|
+
if not upgrades:
|
221
|
+
continue
|
222
|
+
|
223
|
+
# calculate the amount every version has soaked. if a version has soaked for
|
224
|
+
# multiple workloads, we will pick the minimum soak day value of all workloads
|
225
|
+
# relevant on the cluster.
|
226
|
+
soaked_versions: dict[str, float] = {}
|
227
|
+
for workload in spec.upgrade_policy.workloads:
|
228
|
+
for version, soak_days in soaking_days(
|
229
|
+
version_data, upgrades, workload, False
|
230
|
+
).items():
|
231
|
+
soaked_versions[version] = min(
|
232
|
+
soak_days, soaked_versions.get(version, soak_days)
|
233
|
+
)
|
234
|
+
|
235
|
+
current_upgrade = current_cluster_upgrade_policies.get(spec.cluster_uuid)
|
236
|
+
for version, metric_value in remaining_soak_day_metric_values_for_cluster(
|
237
|
+
spec, soaked_versions, current_upgrade
|
238
|
+
).items():
|
239
|
+
metrics.set_gauge(
|
240
|
+
metrics_builder(
|
241
|
+
cluster_uuid=spec.cluster.external_id, soaking_version=version
|
242
|
+
),
|
243
|
+
metric_value,
|
244
|
+
)
|
245
|
+
|
85
246
|
@abstractmethod
|
86
247
|
def process_upgrade_policies_in_org(
|
87
248
|
self, dry_run: bool, org_upgrade_spec: OrganizationUpgradeSpec
|
88
|
-
) -> None:
|
89
|
-
...
|
249
|
+
) -> None: ...
|
90
250
|
|
91
251
|
@abstractmethod
|
92
252
|
def get_ocm_env_upgrade_specs(
|
93
|
-
self, ocm_env: OCMEnvironment
|
94
|
-
) -> dict[str, OrganizationUpgradeSpec]:
|
95
|
-
...
|
253
|
+
self, ocm_env: OCMEnvironment
|
254
|
+
) -> dict[str, OrganizationUpgradeSpec]: ...
|
96
255
|
|
256
|
+
def signal_validation_issues(
|
257
|
+
self, dry_run: bool, org_upgrade_spec: OrganizationUpgradeSpec
|
258
|
+
) -> None: ...
|
259
|
+
|
260
|
+
def signal_reconcile_issues(
|
261
|
+
self,
|
262
|
+
dry_run: bool,
|
263
|
+
org_upgrade_spec: OrganizationUpgradeSpec,
|
264
|
+
exception: Exception,
|
265
|
+
) -> bool:
|
266
|
+
"""
|
267
|
+
The bool return value is used to indicate if the exception was properly handled.
|
268
|
+
|
269
|
+
The default behaviour returns False, indicating that the exception was not
|
270
|
+
handled so that it can bubble up and potentially fail the integration.
|
271
|
+
|
272
|
+
This function can be overridden to handle exceptions in a custom way.
|
273
|
+
"""
|
274
|
+
return False
|
275
|
+
|
276
|
+
def expose_org_upgrade_spec_metrics(
|
277
|
+
self, ocm_env: str, org_upgrade_spec: OrganizationUpgradeSpec
|
278
|
+
) -> None:
|
279
|
+
metrics.set_gauge(
|
280
|
+
AUSOrganizationValidationErrorsGauge(
|
281
|
+
integration=self.name,
|
282
|
+
ocm_env=ocm_env,
|
283
|
+
org_id=org_upgrade_spec.org.org_id,
|
284
|
+
),
|
285
|
+
org_upgrade_spec.nr_of_validation_errors,
|
286
|
+
)
|
287
|
+
for cluster_upgrade_spec in org_upgrade_spec.specs:
|
288
|
+
mutexes = cluster_upgrade_spec.upgrade_policy.conditions.mutexes
|
289
|
+
metrics.set_info(
|
290
|
+
AUSClusterUpgradePolicyInfoMetric(
|
291
|
+
integration=self.name,
|
292
|
+
ocm_env=ocm_env,
|
293
|
+
cluster_uuid=cluster_upgrade_spec.cluster_uuid,
|
294
|
+
org_id=cluster_upgrade_spec.org.org_id,
|
295
|
+
org_name=org_upgrade_spec.org.name,
|
296
|
+
channel=cluster_upgrade_spec.cluster.version.channel_group,
|
297
|
+
current_version=cluster_upgrade_spec.oldest_current_version,
|
298
|
+
cluster_name=cluster_upgrade_spec.name,
|
299
|
+
schedule=cluster_upgrade_spec.upgrade_policy.schedule,
|
300
|
+
sector=cluster_upgrade_spec.upgrade_policy.conditions.sector or "",
|
301
|
+
mutexes=",".join(mutexes) if mutexes else "",
|
302
|
+
soak_days=str(
|
303
|
+
cluster_upgrade_spec.upgrade_policy.conditions.soak_days or 0
|
304
|
+
),
|
305
|
+
workloads=",".join(cluster_upgrade_spec.upgrade_policy.workloads),
|
306
|
+
product=cluster_upgrade_spec.cluster.product.id,
|
307
|
+
hypershift=cluster_upgrade_spec.cluster.hypershift.enabled,
|
308
|
+
),
|
309
|
+
)
|
310
|
+
for (
|
311
|
+
source,
|
312
|
+
has_health_error,
|
313
|
+
) in cluster_upgrade_spec.health.health_errors_by_source().items():
|
314
|
+
metrics.set_gauge(
|
315
|
+
AUSClusterHealthStateGauge(
|
316
|
+
integration=self.name,
|
317
|
+
ocm_env=ocm_env,
|
318
|
+
health_source=source,
|
319
|
+
cluster_uuid=cluster_upgrade_spec.cluster_uuid,
|
320
|
+
),
|
321
|
+
CLUSTER_HEALTH_UNHEALTHY_METRIC_VALUE
|
322
|
+
if has_health_error
|
323
|
+
else CLUSTER_HEALTH_HEALTHY_METRIC_VALUE,
|
324
|
+
)
|
97
325
|
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
326
|
+
def _health_check_providers_for_env(
|
327
|
+
self, ocm_env_name: str
|
328
|
+
) -> dict[str, ClusterHealthProvider]:
|
329
|
+
providers: dict[str, ClusterHealthProvider] = {}
|
330
|
+
telemeter_provider = self._build_telemeter_health_check_provider_for_env(
|
331
|
+
ocm_env_name
|
332
|
+
)
|
333
|
+
if telemeter_provider:
|
334
|
+
providers[TELEMETER_SOURCE] = telemeter_provider
|
335
|
+
return providers
|
336
|
+
|
337
|
+
def _build_telemeter_health_check_provider_for_env(
|
338
|
+
self,
|
339
|
+
ocm_env_name: str,
|
340
|
+
) -> TelemeterClusterHealthProvider | None:
|
341
|
+
ocm_env = next(
|
342
|
+
iter(
|
343
|
+
ocm_env_telemeter_query(
|
344
|
+
gql.get_api().query, variables={"name": ocm_env_name}
|
345
|
+
).ocm_envs
|
346
|
+
),
|
347
|
+
None,
|
348
|
+
)
|
349
|
+
|
350
|
+
if ocm_env and ocm_env.telemeter:
|
351
|
+
return TelemeterClusterHealthProvider(
|
352
|
+
querier=init_prometheus_http_querier_from_prometheus_instance(
|
353
|
+
prometheus=ocm_env.telemeter,
|
354
|
+
secret_reader=self.secret_reader,
|
355
|
+
)
|
356
|
+
)
|
104
357
|
|
358
|
+
return None
|
105
359
|
|
106
|
-
def fetch_current_state(
|
107
|
-
clusters: list[dict[str, Any]], ocm_map: OCMMap, addons: bool = False
|
108
|
-
) -> list[dict[str, Any]]:
|
109
|
-
current_state = []
|
110
|
-
for cluster in clusters:
|
111
|
-
cluster_name = cluster["name"]
|
112
|
-
ocm = ocm_map.get(cluster_name)
|
113
|
-
if addons:
|
114
|
-
upgrade_policies = ocm.get_addon_upgrade_policies(cluster_name)
|
115
|
-
else:
|
116
|
-
upgrade_policies = ocm.get_upgrade_policies(cluster_name)
|
117
|
-
for upgrade_policy in upgrade_policies:
|
118
|
-
upgrade_policy["cluster"] = cluster_name
|
119
|
-
current_state.append(upgrade_policy)
|
120
360
|
|
121
|
-
|
361
|
+
def init_addon_service(ocm_env: OCMEnvironment) -> AddonService:
|
362
|
+
"""
|
363
|
+
Initialize the right version of addon-service for an OCM environment.
|
364
|
+
Since this is just temporary until all OCM environments are on v2, we
|
365
|
+
use a label on the OCM environmentschema to determine which version to use.
|
366
|
+
"""
|
367
|
+
addon_service_version = (ocm_env.labels or {}).get(
|
368
|
+
"feature_flag_addon_service_version"
|
369
|
+
) or "v2"
|
370
|
+
return init_addon_service_version(addon_service_version)
|
122
371
|
|
123
372
|
|
124
|
-
def
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
373
|
+
def init_addon_service_version(addon_service_version: str) -> AddonService:
|
374
|
+
"""
|
375
|
+
Initialize the right version of addon-service based on the version string.
|
376
|
+
Supported versions are:
|
377
|
+
- v1: part of CS
|
378
|
+
- v2: standalone service using upgrade-plans instead of upgrade-policies
|
379
|
+
"""
|
380
|
+
match addon_service_version:
|
381
|
+
case "v1":
|
382
|
+
return AddonServiceV1()
|
383
|
+
case "v2":
|
384
|
+
return AddonServiceV2()
|
385
|
+
case _:
|
386
|
+
raise ValueError(f"Unknown addon service version: {addon_service_version}")
|
387
|
+
|
388
|
+
|
389
|
+
class RemainingSoakDayMetricsBuilder(Protocol):
|
390
|
+
def __call__(
|
391
|
+
self, cluster_uuid: str, soaking_version: str
|
392
|
+
) -> metrics.GaugeMetric: ...
|
393
|
+
|
394
|
+
|
395
|
+
class AbstractUpgradePolicy(ABC, BaseModel):
|
396
|
+
"""Abstract class for upgrade policies
|
397
|
+
Used to create and delete upgrade policies in OCM."""
|
398
|
+
|
399
|
+
cluster: OCMCluster
|
400
|
+
|
401
|
+
id: str | None
|
402
|
+
next_run: str | None
|
403
|
+
schedule: str | None
|
404
|
+
schedule_type: str
|
405
|
+
version: str
|
406
|
+
state: str | None
|
407
|
+
|
408
|
+
@abstractmethod
|
409
|
+
def create(self, ocm_api: OCMBaseClient) -> None:
|
410
|
+
pass
|
411
|
+
|
412
|
+
@abstractmethod
|
413
|
+
def delete(self, ocm_api: OCMBaseClient) -> None:
|
414
|
+
pass
|
415
|
+
|
416
|
+
@abstractmethod
|
417
|
+
def summarize(self) -> str:
|
418
|
+
pass
|
419
|
+
|
420
|
+
|
421
|
+
def addon_upgrade_policy_soonest_next_run() -> str:
|
422
|
+
now = datetime.now(tz=dt.UTC)
|
423
|
+
next_run = now + timedelta(minutes=MIN_DELTA_MINUTES)
|
424
|
+
return next_run.strftime("%Y-%m-%dT%H:%M:%SZ")
|
425
|
+
|
426
|
+
|
427
|
+
class AddonUpgradePolicy(AbstractUpgradePolicy):
|
428
|
+
"""Class to create and delete Addon upgrade policies in OCM"""
|
429
|
+
|
430
|
+
addon_id: str
|
431
|
+
addon_service: AddonService
|
432
|
+
|
433
|
+
class Config:
|
434
|
+
arbitrary_types_allowed = True
|
435
|
+
|
436
|
+
def create(self, ocm_api: OCMBaseClient) -> None:
|
437
|
+
self.addon_service.create_addon_upgrade_policy(
|
438
|
+
ocm_api=ocm_api,
|
439
|
+
cluster_id=self.cluster.id,
|
440
|
+
addon_id=self.addon_id,
|
441
|
+
schedule_type="manual",
|
442
|
+
version=self.version,
|
443
|
+
next_run=self.next_run or addon_upgrade_policy_soonest_next_run(),
|
444
|
+
)
|
445
|
+
|
446
|
+
def delete(self, ocm_api: OCMBaseClient) -> None:
|
447
|
+
if not self.id:
|
448
|
+
raise ValueError(
|
449
|
+
"Cannot delete addon upgrade policy without id (not created yet)"
|
154
450
|
)
|
155
|
-
|
451
|
+
self.addon_service.delete_addon_upgrade_policy(
|
452
|
+
ocm_api=ocm_api, cluster_id=self.cluster.id, policy_id=self.id
|
453
|
+
)
|
454
|
+
|
455
|
+
def summarize(self) -> str:
|
456
|
+
details = {
|
457
|
+
"cluster": self.cluster.name,
|
458
|
+
"cluster_id": self.cluster.id,
|
459
|
+
"version": self.version,
|
460
|
+
"next_run": self.next_run,
|
461
|
+
"addon_id": self.addon_id,
|
462
|
+
}
|
463
|
+
return f"addon upgrade policy - {remove_none_values_from_dict(details)}"
|
464
|
+
|
156
465
|
|
157
|
-
|
466
|
+
class ClusterUpgradePolicy(AbstractUpgradePolicy):
|
467
|
+
"""Class to create and delete ClusterUpgradePolicies in OCM"""
|
158
468
|
|
159
|
-
|
469
|
+
def create(self, ocm_api: OCMBaseClient) -> None:
|
470
|
+
policy = {
|
471
|
+
"version": self.version,
|
472
|
+
"schedule_type": "manual",
|
473
|
+
"next_run": self.next_run,
|
474
|
+
}
|
475
|
+
create_upgrade_policy(ocm_api, self.cluster.id, policy)
|
476
|
+
|
477
|
+
def delete(self, ocm_api: OCMBaseClient) -> None:
|
478
|
+
if not self.id:
|
479
|
+
raise ValueError(
|
480
|
+
"Cannot delete cluster upgrade policy without id (not created yet)"
|
481
|
+
)
|
482
|
+
delete_upgrade_policy(ocm_api, self.cluster.id, self.id)
|
483
|
+
|
484
|
+
def summarize(self) -> str:
|
485
|
+
details = {
|
486
|
+
"cluster": self.cluster.name,
|
487
|
+
"cluster_id": self.cluster.id,
|
488
|
+
"from_version": self.cluster.version.raw_id,
|
489
|
+
"to_version": self.version,
|
490
|
+
"next_run": self.next_run,
|
491
|
+
}
|
492
|
+
return f"cluster upgrade policy - {remove_none_values_from_dict(details)}"
|
493
|
+
|
494
|
+
|
495
|
+
class ControlPlaneUpgradePolicy(AbstractUpgradePolicy):
|
496
|
+
"""Class to create and delete ControlPlanUpgradePolicies in OCM"""
|
497
|
+
|
498
|
+
def create(self, ocm_api: OCMBaseClient) -> None:
|
499
|
+
policy = {
|
500
|
+
"version": self.version,
|
501
|
+
"schedule_type": "manual",
|
502
|
+
"upgrade_type": "ControlPlane",
|
503
|
+
"cluster_id": self.cluster.id,
|
504
|
+
"next_run": self.next_run,
|
505
|
+
}
|
506
|
+
create_control_plane_upgrade_policy(ocm_api, self.cluster.id, policy)
|
507
|
+
|
508
|
+
def delete(self, ocm_api: OCMBaseClient) -> None:
|
509
|
+
if not self.id:
|
510
|
+
raise ValueError(
|
511
|
+
"Cannot delete controlplane upgrade policy without id (not created yet)"
|
512
|
+
)
|
513
|
+
delete_control_plane_upgrade_policy(ocm_api, self.cluster.id, self.id)
|
514
|
+
|
515
|
+
def summarize(self) -> str:
|
516
|
+
details = {
|
517
|
+
"cluster": self.cluster.name,
|
518
|
+
"cluster_id": self.cluster.id,
|
519
|
+
"version": self.version,
|
520
|
+
"next_run": self.next_run,
|
521
|
+
}
|
522
|
+
return f"cluster upgrade policy - {remove_none_values_from_dict(details)}"
|
523
|
+
|
524
|
+
|
525
|
+
class NodePoolUpgradePolicy(AbstractUpgradePolicy):
|
526
|
+
node_pool: str
|
527
|
+
"""Class to create and delete NodePoolUpgradePolicies in OCM"""
|
528
|
+
|
529
|
+
def create(self, ocm_api: OCMBaseClient) -> None:
|
530
|
+
policy = {
|
531
|
+
"version": self.version,
|
532
|
+
"schedule_type": "manual",
|
533
|
+
"upgrade_type": "NodePool",
|
534
|
+
"cluster_id": self.cluster.id,
|
535
|
+
"next_run": self.next_run,
|
536
|
+
}
|
537
|
+
create_node_pool_upgrade_policy(
|
538
|
+
ocm_api, self.cluster.id, self.node_pool, policy
|
539
|
+
)
|
540
|
+
|
541
|
+
def delete(self, ocm_api: OCMBaseClient) -> None:
|
542
|
+
raise NotImplementedError("NodePoolUpgradePolicy.delete() not implemented")
|
543
|
+
|
544
|
+
def summarize(self) -> str:
|
545
|
+
details = {
|
546
|
+
"cluster": self.cluster.name,
|
547
|
+
"cluster_id": self.cluster.id,
|
548
|
+
"node_pool": self.node_pool,
|
549
|
+
"version": self.version,
|
550
|
+
"next_run": self.next_run,
|
551
|
+
}
|
552
|
+
return f"node pool upgrade policy - {remove_none_values_from_dict(details)}"
|
553
|
+
|
554
|
+
|
555
|
+
class UpgradePolicyHandler(BaseModel, extra=Extra.forbid):
|
556
|
+
"""Class to handle upgrade policy actions"""
|
557
|
+
|
558
|
+
action: str
|
559
|
+
policy: AbstractUpgradePolicy
|
560
|
+
|
561
|
+
def act(self, dry_run: bool, ocm_api: OCMBaseClient) -> None:
|
562
|
+
logging.info(f"{self.action} {self.policy.summarize()}")
|
563
|
+
if dry_run:
|
564
|
+
return
|
565
|
+
|
566
|
+
if not self.action:
|
567
|
+
pass
|
568
|
+
elif self.action == "delete":
|
569
|
+
self.policy.delete(ocm_api)
|
570
|
+
elif self.action == "create":
|
571
|
+
self.policy.create(ocm_api)
|
572
|
+
|
573
|
+
|
574
|
+
def fetch_current_state(
|
575
|
+
ocm_api: OCMBaseClient,
|
576
|
+
org_upgrade_spec: OrganizationUpgradeSpec,
|
577
|
+
addons: bool = False,
|
578
|
+
) -> list[AbstractUpgradePolicy]:
|
579
|
+
current_state: list[AbstractUpgradePolicy] = []
|
580
|
+
addon_service = init_addon_service(org_upgrade_spec.org.environment)
|
581
|
+
for spec in org_upgrade_spec.specs:
|
582
|
+
if addons and isinstance(spec, ClusterAddonUpgradeSpec):
|
583
|
+
addon_spec = cast(ClusterAddonUpgradeSpec, spec)
|
584
|
+
addon_upgrade_policies = addon_service.get_addon_upgrade_policies(
|
585
|
+
ocm_api, spec.cluster.id, addon_id=addon_spec.addon.addon.id
|
586
|
+
)
|
587
|
+
for addon_upgrade_policy in addon_upgrade_policies:
|
588
|
+
current_state.append(
|
589
|
+
AddonUpgradePolicy(
|
590
|
+
id=addon_upgrade_policy.id,
|
591
|
+
addon_id=addon_spec.addon.addon.id,
|
592
|
+
cluster=spec.cluster,
|
593
|
+
next_run=addon_upgrade_policy.next_run,
|
594
|
+
schedule=addon_upgrade_policy.schedule,
|
595
|
+
schedule_type=addon_upgrade_policy.schedule_type,
|
596
|
+
version=addon_upgrade_policy.version,
|
597
|
+
state=addon_upgrade_policy.state,
|
598
|
+
addon_service=addon_service,
|
599
|
+
)
|
600
|
+
)
|
601
|
+
elif spec.cluster.is_rosa_hypershift():
|
602
|
+
upgrade_policies = get_control_plane_upgrade_policies(
|
603
|
+
ocm_api, spec.cluster.id
|
604
|
+
)
|
605
|
+
for upgrade_policy in upgrade_policies:
|
606
|
+
upgrade_policy["cluster"] = spec.cluster
|
607
|
+
current_state.append(ControlPlaneUpgradePolicy(**upgrade_policy))
|
608
|
+
for node_pool in spec.node_pools:
|
609
|
+
node_upgrade_policies = get_node_pool_upgrade_policies(
|
610
|
+
ocm_api, spec.cluster.id, node_pool.id
|
611
|
+
)
|
612
|
+
for upgrade_policy in node_upgrade_policies:
|
613
|
+
upgrade_policy["cluster"] = spec.cluster
|
614
|
+
upgrade_policy["node_pool"] = node_pool.id
|
615
|
+
current_state.append(NodePoolUpgradePolicy(**upgrade_policy))
|
616
|
+
else:
|
617
|
+
upgrade_policies = get_upgrade_policies(ocm_api, spec.cluster.id)
|
618
|
+
for upgrade_policy in upgrade_policies:
|
619
|
+
upgrade_policy["cluster"] = spec.cluster
|
620
|
+
current_state.append(ClusterUpgradePolicy(**upgrade_policy))
|
621
|
+
|
622
|
+
return current_state
|
623
|
+
|
624
|
+
|
625
|
+
# consider first lower versions and lower soakdays (when versions are equal)
|
626
|
+
def sort_key(spec: ClusterUpgradeSpec) -> tuple:
|
627
|
+
return (
|
628
|
+
parse_semver(spec.cluster.version.raw_id),
|
629
|
+
spec.upgrade_policy.conditions.soak_days or 0,
|
630
|
+
)
|
160
631
|
|
161
632
|
|
162
633
|
def update_history(
|
163
|
-
version_data: VersionData,
|
634
|
+
version_data: VersionData, org_upgrade_spec: OrganizationUpgradeSpec
|
164
635
|
) -> None:
|
165
636
|
"""Update history with information from clusters with upgrade policies.
|
166
637
|
|
@@ -172,10 +643,21 @@ def update_history(
|
|
172
643
|
check_in = version_data.check_in or now
|
173
644
|
|
174
645
|
# we iterate over clusters upgrade policies and update the version history
|
175
|
-
for
|
176
|
-
|
177
|
-
|
178
|
-
|
646
|
+
for spec in org_upgrade_spec.specs:
|
647
|
+
# ... but we only care about healthy cluster
|
648
|
+
errors = spec.health.get_errors(only_enforced=True)
|
649
|
+
if errors:
|
650
|
+
logging.debug(
|
651
|
+
f"unhealthy cluster {spec.cluster.name} "
|
652
|
+
f"(id={spec.cluster.id}, org_id={spec.org.org_id}, org_name={spec.org.name}) "
|
653
|
+
f"will not contribute to soak days for {spec.cluster.version.raw_id} "
|
654
|
+
f"and workloads {spec.upgrade_policy.workloads}: "
|
655
|
+
f"{', '.join([e.error for e in errors])}"
|
656
|
+
)
|
657
|
+
continue
|
658
|
+
current_version = spec.current_version
|
659
|
+
cluster = spec.cluster.name
|
660
|
+
workloads = spec.upgrade_policy.workloads
|
179
661
|
# we keep the version history per workload
|
180
662
|
for w in workloads:
|
181
663
|
workload_history = version_data.workload_history(
|
@@ -191,67 +673,83 @@ def update_history(
|
|
191
673
|
else:
|
192
674
|
workload_history.reporting.append(cluster)
|
193
675
|
|
194
|
-
version_data.update_stats(
|
676
|
+
version_data.update_stats(org_upgrade_spec)
|
195
677
|
|
196
678
|
version_data.check_in = now
|
197
679
|
|
198
680
|
|
681
|
+
def version_data_state_key(ocm_env: str, org_id: str, addon_id: str | None) -> str:
|
682
|
+
return f"{ocm_env}/{org_id}/{addon_id}" if addon_id else f"{ocm_env}/{org_id}"
|
683
|
+
|
684
|
+
|
199
685
|
@defer
|
200
686
|
def get_version_data_map(
|
201
687
|
dry_run: bool,
|
202
|
-
|
203
|
-
ocm_map: OCMMap,
|
688
|
+
org_upgrade_spec: OrganizationUpgradeSpec,
|
204
689
|
integration: str,
|
205
690
|
addon_id: str = "",
|
206
|
-
|
207
|
-
|
691
|
+
inherit_version_data: bool = True,
|
692
|
+
defer: Callable | None = None,
|
693
|
+
) -> VersionDataMap:
|
208
694
|
"""Get a summary of versions history per OCM instance
|
209
695
|
|
210
696
|
Args:
|
211
697
|
dry_run (bool): save updated history to remote state
|
212
|
-
|
213
|
-
ocm_map (OCMMap): OCM clients per OCM instance
|
698
|
+
org_upgrade_spec (OrganizationUpgradeSpec): organization upgrade spec
|
214
699
|
addon_id (str): optional addon id to get & store the addon specific state,
|
215
700
|
additionally to the ocm org name
|
701
|
+
inherit_version_data: whether to inherit version data from other OCM orgs
|
216
702
|
defer (Optional<Callable>): defer function
|
217
703
|
|
218
704
|
Returns:
|
219
|
-
dict: version data per OCM
|
705
|
+
dict: version data per OCM organization keyed by the organization ID
|
220
706
|
"""
|
221
707
|
state = init_state(integration=integration)
|
222
708
|
if defer:
|
223
709
|
defer(state.cleanup)
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
710
|
+
result = VersionDataMap()
|
711
|
+
|
712
|
+
# we keep a remote state per OCM org
|
713
|
+
state_key = version_data_state_key(
|
714
|
+
org_upgrade_spec.org.environment.name, org_upgrade_spec.org.org_id, addon_id
|
715
|
+
)
|
716
|
+
version_data = get_version_data(state, state_key)
|
717
|
+
update_history(version_data, org_upgrade_spec)
|
718
|
+
result.add(
|
719
|
+
org_upgrade_spec.org.environment.name, org_upgrade_spec.org.org_id, version_data
|
720
|
+
)
|
721
|
+
if not dry_run:
|
722
|
+
version_data.save(state, state_key)
|
233
723
|
|
234
724
|
# aggregate data from other ocm orgs
|
235
725
|
# this is done *after* saving the state: we do not store the other orgs data in our state.
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
other_ocm_name = other_ocm["name"]
|
240
|
-
if ocm_name == other_ocm_name:
|
726
|
+
if inherit_version_data:
|
727
|
+
for other_ocm in org_upgrade_spec.org.inherit_version_data or []:
|
728
|
+
if org_upgrade_spec.org.org_id == other_ocm.org_id:
|
241
729
|
raise ValueError(
|
242
|
-
f"[{
|
730
|
+
f"[{org_upgrade_spec.org.name} - {org_upgrade_spec.org.org_id}] OCM organization inherits version data from itself"
|
243
731
|
)
|
244
|
-
if
|
245
|
-
o
|
732
|
+
if org_upgrade_spec.org.org_id not in [
|
733
|
+
o.org_id for o in other_ocm.publish_version_data or []
|
246
734
|
]:
|
247
735
|
raise ValueError(
|
248
|
-
f"[{
|
736
|
+
f"[{org_upgrade_spec.org.name} - {org_upgrade_spec.org.org_id}] OCM organization inherits version data from "
|
737
|
+
f"{other_ocm.org_id}, but this data is not published to it: "
|
738
|
+
f"missing publishVersionData in {other_ocm.org_id}"
|
249
739
|
)
|
250
|
-
|
251
|
-
|
252
|
-
|
740
|
+
other_ocm_data = get_version_data(
|
741
|
+
state,
|
742
|
+
version_data_state_key(
|
743
|
+
other_ocm.environment.name, other_ocm.org_id, addon_id
|
744
|
+
),
|
745
|
+
)
|
746
|
+
result.get(
|
747
|
+
org_upgrade_spec.org.environment.name, org_upgrade_spec.org.org_id
|
748
|
+
).aggregate(
|
749
|
+
other_ocm_data, f"{other_ocm.environment.name}/{other_ocm.org_id}"
|
750
|
+
)
|
253
751
|
|
254
|
-
return
|
752
|
+
return result
|
255
753
|
|
256
754
|
|
257
755
|
def workload_sector_versions(sector: Sector, workload: str) -> list[VersionInfo]:
|
@@ -259,13 +757,11 @@ def workload_sector_versions(sector: Sector, workload: str) -> list[VersionInfo]
|
|
259
757
|
get all versions of clusters running the specified workload in that sector
|
260
758
|
"""
|
261
759
|
versions = []
|
262
|
-
for
|
760
|
+
for spec in sector.specs:
|
263
761
|
# clusters within a sector always have workloads (mandatory in schema)
|
264
|
-
workloads =
|
762
|
+
workloads = spec.upgrade_policy.workloads
|
265
763
|
if workload in workloads:
|
266
|
-
versions.append(
|
267
|
-
parse_semver(sector.ocmspec(cluster_info["name"]).spec.version)
|
268
|
-
)
|
764
|
+
versions.append(parse_semver(spec.cluster.version.raw_id))
|
269
765
|
return versions
|
270
766
|
|
271
767
|
|
@@ -285,34 +781,33 @@ def workload_sector_dependencies(sector: Sector, workload: str) -> set[Sector]:
|
|
285
781
|
|
286
782
|
def version_conditions_met(
|
287
783
|
version: str,
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
upgrade_conditions: dict[str, Any],
|
784
|
+
version_data: VersionData,
|
785
|
+
upgrade_policy: ClusterUpgradePolicyV1,
|
786
|
+
sector: Sector | None,
|
292
787
|
) -> bool:
|
293
788
|
"""Check that upgrade conditions are met for a version
|
294
789
|
|
295
790
|
Args:
|
296
791
|
version (string): version to check
|
297
|
-
|
298
|
-
ocm_name (string): name of OCM instance
|
299
|
-
upgrade_conditions (dict): query results of upgrade conditions
|
792
|
+
version_data (VersionData): history of versions of an OCM organization
|
300
793
|
workloads (list): strings representing types of workloads
|
794
|
+
upgrade_policy (ClusterUpgradePolicy): the upgrade policy to validate
|
795
|
+
|
301
796
|
|
302
797
|
Returns:
|
303
798
|
bool: are version upgrade conditions met
|
304
799
|
"""
|
305
|
-
sector = upgrade_conditions.get("sector")
|
306
800
|
if sector:
|
307
|
-
version_data = version_data_map[ocm_name]
|
308
801
|
# check that inherited orgs run at least that version for our workloads
|
309
|
-
if not version_data.validate_against_inherited(
|
802
|
+
if not version_data.validate_against_inherited(
|
803
|
+
version, upgrade_policy.workloads
|
804
|
+
):
|
310
805
|
return False
|
311
806
|
|
312
807
|
# check if previous sectors run at least this version for that workload
|
313
808
|
# we will check dependencies recursively until there are versions for the given workload
|
314
809
|
# or no more dependencies to check
|
315
|
-
for w in workloads:
|
810
|
+
for w in upgrade_policy.workloads:
|
316
811
|
for dep in workload_sector_dependencies(sector, w):
|
317
812
|
dep_versions = workload_sector_versions(dep, w)
|
318
813
|
if not dep_versions:
|
@@ -321,10 +816,9 @@ def version_conditions_met(
|
|
321
816
|
return False
|
322
817
|
|
323
818
|
# check soak days condition is met for this version
|
324
|
-
soak_days =
|
819
|
+
soak_days = upgrade_policy.conditions.soak_days
|
325
820
|
if soak_days is not None:
|
326
|
-
|
327
|
-
for w in workloads:
|
821
|
+
for w in upgrade_policy.workloads:
|
328
822
|
workload_history = version_data.workload_history(version, w)
|
329
823
|
if soak_days > workload_history.soak_days:
|
330
824
|
return False
|
@@ -332,261 +826,467 @@ def version_conditions_met(
|
|
332
826
|
return True
|
333
827
|
|
334
828
|
|
829
|
+
def gates_for_minor_version(
|
830
|
+
gates: list[OCMVersionGate],
|
831
|
+
target_version_prefix: str,
|
832
|
+
) -> list[OCMVersionGate]:
|
833
|
+
return [g for g in gates if g.version_raw_id_prefix == target_version_prefix]
|
834
|
+
|
835
|
+
|
836
|
+
def is_gate_applicable_to_cluster(gate: OCMVersionGate, cluster: OCMCluster) -> bool:
|
837
|
+
# check that the cluster has an upgrade path that crosses the gate version
|
838
|
+
minor_version_upgrade_paths = {
|
839
|
+
get_version_prefix(version) for version in cluster.available_upgrades()
|
840
|
+
}
|
841
|
+
if gate.version_raw_id_prefix not in minor_version_upgrade_paths:
|
842
|
+
return False
|
843
|
+
|
844
|
+
# consider only gates after the clusters current minor version
|
845
|
+
# OCM onls supports creating gate agreements for later minor versions than the
|
846
|
+
# current cluster version
|
847
|
+
if not parse_semver(f"{cluster.minor_version()}.0").match(
|
848
|
+
f"<{gate.version_raw_id_prefix}.0"
|
849
|
+
):
|
850
|
+
return False
|
851
|
+
|
852
|
+
# check the handler for the gate type if it is responsible for this kind
|
853
|
+
# of cluster
|
854
|
+
handler = HANDLERS.get(gate.label)
|
855
|
+
if handler:
|
856
|
+
return handler.gate_applicable_to_cluster(cluster)
|
857
|
+
return False
|
858
|
+
|
859
|
+
|
335
860
|
def gates_to_agree(
|
336
|
-
|
337
|
-
|
861
|
+
gates: list[OCMVersionGate],
|
862
|
+
cluster: OCMCluster,
|
863
|
+
acked_gate_ids: set[str],
|
864
|
+
) -> list[OCMVersionGate]:
|
338
865
|
"""Check via OCM if a version is agreed
|
339
866
|
|
340
867
|
Args:
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
ocm (OCM): used to fetch infos from OCM
|
868
|
+
gates (OCMVersionGate): list of OCMVersionGate objects to check for agreements
|
869
|
+
cluster_id (str): the cluster that needs gate agreements
|
870
|
+
ocm_api (OCMBaseClient): used to fetch infos from OCM
|
345
871
|
|
346
872
|
Returns:
|
347
|
-
|
873
|
+
list[OCMVersionGate]: list of gates a cluster has not agreed on yet
|
348
874
|
"""
|
349
|
-
|
350
|
-
agreement["version_gate"]["id"]
|
351
|
-
for agreement in ocm.get_version_agreement(cluster)
|
352
|
-
}
|
353
|
-
semver_cluster = parse_semver(f"{cluster_version}")
|
354
|
-
|
355
|
-
return [
|
356
|
-
gate["id"]
|
357
|
-
for gate in ocm.get_version_gates(version_prefix)
|
358
|
-
if gate["id"] not in agreements and semver_cluster.match(f"<{version_prefix}.0")
|
359
|
-
]
|
360
|
-
|
875
|
+
applicable_gates = [g for g in gates if is_gate_applicable_to_cluster(g, cluster)]
|
361
876
|
|
362
|
-
|
363
|
-
|
364
|
-
return
|
877
|
+
if applicable_gates:
|
878
|
+
return [gate for gate in applicable_gates if gate.id not in acked_gate_ids]
|
879
|
+
return []
|
365
880
|
|
366
881
|
|
367
882
|
def upgradeable_version(
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
addon_id: str = "",
|
373
|
-
) -> Optional[str]:
|
883
|
+
spec: ClusterUpgradeSpec,
|
884
|
+
version_data: VersionData,
|
885
|
+
sector: Sector | None,
|
886
|
+
) -> str | None:
|
374
887
|
"""Get the highest next version we can upgrade to, fulfilling all conditions"""
|
375
|
-
for version in reversed(sort_versions(
|
376
|
-
if
|
377
|
-
continue
|
378
|
-
if not addon_id and ocm.version_blocked(version):
|
888
|
+
for version in reversed(sort_versions(spec.get_available_upgrades())):
|
889
|
+
if spec.version_blocked(version):
|
379
890
|
continue
|
380
891
|
if version_conditions_met(
|
381
892
|
version,
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
policy["conditions"],
|
893
|
+
version_data,
|
894
|
+
spec.upgrade_policy,
|
895
|
+
sector,
|
386
896
|
):
|
387
897
|
return version
|
388
898
|
return None
|
389
899
|
|
390
900
|
|
391
|
-
def
|
392
|
-
|
393
|
-
|
901
|
+
def verify_current_should_skip(
|
902
|
+
current_state: Sequence[AbstractUpgradePolicy],
|
903
|
+
desired: ClusterUpgradeSpec,
|
904
|
+
now: datetime,
|
905
|
+
addon_id: str = "",
|
906
|
+
) -> tuple[bool, UpgradePolicyHandler | None]:
|
907
|
+
current_policies = [c for c in current_state if c.cluster.id == desired.cluster.id]
|
908
|
+
if not current_policies:
|
909
|
+
return False, None
|
910
|
+
|
911
|
+
# there can only be one upgrade policy per cluster
|
912
|
+
if len(current_policies) != 1:
|
913
|
+
raise ValueError(
|
914
|
+
f"[{desired.org.org_id}/{desired.cluster.name}] expected only one upgrade policy"
|
915
|
+
)
|
916
|
+
current = current_policies[0]
|
917
|
+
version = current.version # may not exist in automatic upgrades
|
918
|
+
if version and not addon_id and desired.version_blocked(version):
|
919
|
+
next_run = current.next_run
|
920
|
+
if next_run and datetime.strptime(next_run, "%Y-%m-%dT%H:%M:%SZ") < now:
|
921
|
+
logging.warning(
|
922
|
+
f"[{desired.org.org_id}/{desired.org.name}/{desired.cluster.name}] currently upgrading to blocked version '{version}'"
|
923
|
+
)
|
924
|
+
return True, None
|
925
|
+
logging.debug(
|
926
|
+
f"[{desired.org.org_id}/{desired.org.name}/{desired.cluster.name}] found planned upgrade policy "
|
927
|
+
+ f"with blocked version {version}"
|
928
|
+
)
|
929
|
+
return False, UpgradePolicyHandler(action="delete", policy=current)
|
930
|
+
|
931
|
+
# else
|
932
|
+
logging.debug(
|
933
|
+
f"[{desired.org.org_id}/{desired.org.name}/{desired.cluster.name}] skipping cluster with existing upgrade policy"
|
934
|
+
)
|
935
|
+
return True, None
|
936
|
+
|
937
|
+
|
938
|
+
def verify_schedule_should_skip(
|
939
|
+
desired: ClusterUpgradeSpec,
|
940
|
+
now: datetime,
|
941
|
+
addon_id: str = "",
|
942
|
+
) -> str | None:
|
943
|
+
schedule = desired.upgrade_policy.schedule
|
944
|
+
iter = croniter(schedule, day_or=False)
|
945
|
+
# ClusterService refuses scheduling upgrades less than 5m in advance
|
946
|
+
# Let's find the next schedule that is at least 5m ahead.
|
947
|
+
# We do not need that much delay for addon upgrades since they run
|
948
|
+
# immediately
|
949
|
+
delay_minutes = 1 if addon_id else MIN_DELTA_MINUTES
|
950
|
+
next_schedule = iter.get_next(
|
951
|
+
dt.datetime, start_time=now + timedelta(minutes=delay_minutes)
|
952
|
+
)
|
953
|
+
next_schedule_in_seconds = (next_schedule - now).total_seconds()
|
954
|
+
next_schedule_in_hours = next_schedule_in_seconds / 3600 # seconds in hour
|
955
|
+
|
956
|
+
# ignore clusters with an upgrade schedule not within the next 2 hours
|
957
|
+
within_upgrade_timeframe = next_schedule_in_hours <= 2
|
958
|
+
if addon_id:
|
959
|
+
# addons upgrade cannot be scheduled in advance as the "next_run" field
|
960
|
+
# is not supported. So we run this only 10min before schedule to be somewhat
|
961
|
+
# correct
|
962
|
+
within_upgrade_timeframe = next_schedule_in_seconds / 60 <= 10
|
963
|
+
if not within_upgrade_timeframe:
|
964
|
+
logging.debug(
|
965
|
+
f"[{desired.org.org_id}/{desired.org.name}/{desired.cluster.name}] skipping cluster with no upcoming upgrade"
|
966
|
+
)
|
967
|
+
return None
|
968
|
+
return next_schedule.strftime("%Y-%m-%dT%H:%M:%SZ")
|
969
|
+
|
970
|
+
|
971
|
+
def verify_lock_should_skip(
|
972
|
+
desired: ClusterUpgradeSpec, locked: dict[str, str]
|
973
|
+
) -> bool:
|
974
|
+
mutexes = desired.effective_mutexes
|
975
|
+
if any(lock in locked for lock in mutexes):
|
976
|
+
locking = {lock: locked[lock] for lock in mutexes if lock in locked}
|
977
|
+
logging.debug(
|
978
|
+
f"[{desired.org.org_id}/{desired.org.name}/{desired.cluster.name}] skipping cluster: locked out by {locking}"
|
979
|
+
)
|
980
|
+
return True
|
981
|
+
return False
|
982
|
+
|
983
|
+
|
984
|
+
def _create_upgrade_policy(
|
985
|
+
next_schedule: str, spec: ClusterUpgradeSpec, version: str
|
986
|
+
) -> AbstractUpgradePolicy:
|
987
|
+
if spec.cluster.is_rosa_hypershift():
|
988
|
+
return ControlPlaneUpgradePolicy(
|
989
|
+
cluster=spec.cluster,
|
990
|
+
version=version,
|
991
|
+
schedule_type="manual",
|
992
|
+
next_run=next_schedule,
|
993
|
+
)
|
994
|
+
return ClusterUpgradePolicy(
|
995
|
+
cluster=spec.cluster,
|
996
|
+
version=version,
|
997
|
+
schedule_type="manual",
|
998
|
+
next_run=next_schedule,
|
999
|
+
)
|
1000
|
+
|
1001
|
+
|
1002
|
+
def _calculate_node_pool_diffs(
|
1003
|
+
spec: ClusterUpgradeSpec, now: datetime
|
1004
|
+
) -> UpgradePolicyHandler | None:
|
1005
|
+
for pool in spec.node_pools:
|
1006
|
+
if parse_semver(pool.version).match(f"<{spec.current_version}"):
|
1007
|
+
next_schedule = (now + timedelta(minutes=MIN_DELTA_MINUTES)).strftime(
|
1008
|
+
"%Y-%m-%dT%H:%M:%SZ"
|
1009
|
+
)
|
1010
|
+
return UpgradePolicyHandler(
|
1011
|
+
action="create",
|
1012
|
+
policy=NodePoolUpgradePolicy(
|
1013
|
+
cluster=spec.cluster,
|
1014
|
+
version=spec.current_version,
|
1015
|
+
schedule_type="manual",
|
1016
|
+
next_run=next_schedule,
|
1017
|
+
node_pool=pool.id,
|
1018
|
+
),
|
1019
|
+
)
|
1020
|
+
return None
|
394
1021
|
|
395
1022
|
|
396
1023
|
def calculate_diff(
|
397
|
-
current_state:
|
398
|
-
desired_state:
|
399
|
-
|
400
|
-
|
1024
|
+
current_state: Sequence[AbstractUpgradePolicy],
|
1025
|
+
desired_state: OrganizationUpgradeSpec,
|
1026
|
+
ocm_api: OCMBaseClient,
|
1027
|
+
version_data: VersionData,
|
401
1028
|
addon_id: str = "",
|
402
|
-
) -> list[
|
1029
|
+
) -> list[UpgradePolicyHandler]:
|
403
1030
|
"""Check available upgrades for each cluster in the desired state
|
404
1031
|
according to upgrade conditions
|
405
1032
|
|
406
1033
|
Args:
|
407
|
-
current_state (list):
|
408
|
-
desired_state (
|
409
|
-
|
410
|
-
|
1034
|
+
current_state (list): currently existing upgrade policies
|
1035
|
+
desired_state (OrganizationUpgradeSpec): organization upgrade spec
|
1036
|
+
ocm_api (OCMBaseClient): OCM API client
|
1037
|
+
version_data (VersionData): version data history of the org
|
411
1038
|
addon_id (str): optional addonid to calculate diffs for
|
412
1039
|
|
413
1040
|
Returns:
|
414
1041
|
list: upgrade policies to be applied
|
415
1042
|
"""
|
416
|
-
diffs = []
|
417
1043
|
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
1044
|
+
def set_mutex(
|
1045
|
+
locked: dict[str, str], cluster_id: str, mutexes: set[str] | None = None
|
1046
|
+
) -> None:
|
1047
|
+
for mutex in mutexes or set():
|
1048
|
+
locked[mutex] = cluster_id
|
1049
|
+
|
1050
|
+
diffs: list[UpgradePolicyHandler] = []
|
424
1051
|
|
1052
|
+
# all clusters IDs with a current upgradePolicy are considered locked
|
1053
|
+
locked: dict[str, str] = {}
|
1054
|
+
for spec in desired_state.specs:
|
1055
|
+
if spec.cluster.id in [s.cluster.id for s in current_state]:
|
1056
|
+
for mutex in spec.effective_mutexes:
|
1057
|
+
locked[mutex] = spec.cluster.id
|
1058
|
+
|
1059
|
+
addon_service = init_addon_service(desired_state.org.environment)
|
425
1060
|
now = datetime.utcnow()
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
if c:
|
433
|
-
# there can only be one upgrade policy per cluster
|
434
|
-
if len(c) != 1:
|
435
|
-
raise ValueError(f"[{cluster}] expected only one upgrade policy")
|
436
|
-
current = c[0]
|
437
|
-
version = current.get("version") # may not exist in automatic upgrades
|
438
|
-
if version and not addon_id and ocm.version_blocked(version):
|
439
|
-
next_run = current.get("next_run")
|
440
|
-
if next_run and datetime.strptime(next_run, "%Y-%m-%dT%H:%M:%SZ") < now:
|
441
|
-
logging.warning(
|
442
|
-
f"[{cluster}] currently upgrading to blocked version '{version}'"
|
443
|
-
)
|
444
|
-
continue
|
445
|
-
logging.debug(
|
446
|
-
f"[{ocm.name}/{cluster}] found planned upgrade policy "
|
447
|
-
+ f"with blocked version {version}"
|
448
|
-
)
|
449
|
-
item = {
|
450
|
-
"action": "delete",
|
451
|
-
"cluster": cluster,
|
452
|
-
"version": version,
|
453
|
-
"id": current["id"],
|
454
|
-
}
|
455
|
-
diffs.append(item)
|
456
|
-
else:
|
457
|
-
logging.debug(
|
458
|
-
f"[{ocm.name}/{cluster}] skipping cluster with existing upgrade policy"
|
459
|
-
)
|
1061
|
+
gates = get_version_gates(ocm_api)
|
1062
|
+
for spec in desired_state.specs:
|
1063
|
+
# Upgrading node pools, only required for Hypershift clusters
|
1064
|
+
# do this in the same loop, to skip cluster on node pool upgrade
|
1065
|
+
if spec.cluster.is_rosa_hypershift():
|
1066
|
+
if verify_lock_should_skip(spec, locked):
|
460
1067
|
continue
|
461
1068
|
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
#
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
next_schedule_in_hours = next_schedule_in_seconds / 3600 # seconds in hour
|
474
|
-
|
475
|
-
# ignore clusters with an upgrade schedule not within the next 2 hours
|
476
|
-
within_upgrade_timeframe = next_schedule_in_hours <= 2
|
477
|
-
if addon_id:
|
478
|
-
# addons upgrade cannot be scheduled in advance as the "next_run" field
|
479
|
-
# is not supported. So we run this only 10min before schedule to be somewhat
|
480
|
-
# correct
|
481
|
-
within_upgrade_timeframe = next_schedule_in_seconds / 60 <= 10
|
482
|
-
if not within_upgrade_timeframe:
|
483
|
-
logging.debug(
|
484
|
-
f"[{ocm.name}/{cluster}] skipping cluster with no upcoming upgrade"
|
485
|
-
)
|
1069
|
+
node_pool_update = _calculate_node_pool_diffs(spec, now)
|
1070
|
+
if node_pool_update: # node pool update policy not yet created
|
1071
|
+
diffs.append(node_pool_update)
|
1072
|
+
set_mutex(locked, spec.cluster.id, spec.effective_mutexes)
|
1073
|
+
continue
|
1074
|
+
|
1075
|
+
# ignore clusters with an existing upgrade policy
|
1076
|
+
skip, delete_policy = verify_current_should_skip(
|
1077
|
+
current_state, spec, now, addon_id
|
1078
|
+
)
|
1079
|
+
if skip:
|
486
1080
|
continue
|
1081
|
+
if delete_policy:
|
1082
|
+
diffs.append(delete_policy)
|
487
1083
|
|
488
|
-
|
489
|
-
|
490
|
-
lock: locked[lock] for lock in cluster_mutexes(d) if lock in locked
|
491
|
-
}
|
492
|
-
logging.debug(
|
493
|
-
f"[{ocm.name}/{cluster}] skipping cluster: locked out by {locking}"
|
494
|
-
)
|
1084
|
+
next_schedule = verify_schedule_should_skip(spec, now, addon_id)
|
1085
|
+
if not next_schedule:
|
495
1086
|
continue
|
496
1087
|
|
497
|
-
|
498
|
-
|
499
|
-
# an alternative is to find available upgrades for our current version from
|
500
|
-
# ${API_CLUSTERS_MGMT}/addons/${addon_id}/versions
|
501
|
-
# .items[] | select(.id == {current_version}) | .available_upgrades
|
502
|
-
# but we will always want to get the one that is currently published normally
|
503
|
-
upgrades = [
|
504
|
-
a["version"]["id"]
|
505
|
-
for a in ocm.addons
|
506
|
-
if a["id"] == addon_id and a["version"]["id"] != d["current_version"]
|
507
|
-
]
|
508
|
-
else:
|
509
|
-
upgrades = ocm.get_available_upgrades(d["current_version"], d["channel"])
|
510
|
-
version = upgradeable_version(d, version_data_map, ocm, upgrades, addon_id)
|
1088
|
+
if verify_lock_should_skip(spec, locked):
|
1089
|
+
continue
|
511
1090
|
|
1091
|
+
sector_name = spec.upgrade_policy.conditions.sector
|
1092
|
+
sector = None
|
1093
|
+
if sector_name:
|
1094
|
+
sector = desired_state.sectors[sector_name]
|
1095
|
+
version = upgradeable_version(spec, version_data, sector)
|
512
1096
|
if version:
|
513
|
-
item = {
|
514
|
-
"action": "create",
|
515
|
-
"cluster": cluster,
|
516
|
-
"version": version,
|
517
|
-
"schedule_type": "manual",
|
518
|
-
}
|
519
1097
|
if addon_id:
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
1098
|
+
diffs.append(
|
1099
|
+
UpgradePolicyHandler(
|
1100
|
+
action="create",
|
1101
|
+
policy=AddonUpgradePolicy(
|
1102
|
+
action="create",
|
1103
|
+
cluster=spec.cluster,
|
1104
|
+
version=version,
|
1105
|
+
schedule_type="manual",
|
1106
|
+
addon_id=addon_id,
|
1107
|
+
upgrade_type="ADDON",
|
1108
|
+
addon_service=addon_service,
|
1109
|
+
),
|
1110
|
+
)
|
1111
|
+
)
|
524
1112
|
else:
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
1113
|
+
target_version_prefix = get_version_prefix(version)
|
1114
|
+
minor_version_gates = gates_for_minor_version(
|
1115
|
+
gates=gates,
|
1116
|
+
target_version_prefix=target_version_prefix,
|
1117
|
+
)
|
1118
|
+
gates_with_missing_agreements = gates_to_agree(
|
1119
|
+
gates=minor_version_gates,
|
1120
|
+
cluster=spec.cluster,
|
1121
|
+
acked_gate_ids={
|
1122
|
+
agreement["version_gate"]["id"]
|
1123
|
+
for agreement in get_version_agreement(ocm_api, spec.cluster.id)
|
1124
|
+
},
|
1125
|
+
)
|
1126
|
+
if gates_with_missing_agreements:
|
1127
|
+
missing_gate_ids = [
|
1128
|
+
gate.id for gate in gates_with_missing_agreements
|
1129
|
+
]
|
1130
|
+
logging.info(
|
1131
|
+
f"[{spec.org.org_id}/{spec.org.name}/{spec.cluster.name}] found gates with missing agreements for {target_version_prefix} - {missing_gate_ids} "
|
1132
|
+
"Skip creation of an upgrade policy until all of them have been acked by the version-gate-approver integration or a user."
|
1133
|
+
)
|
1134
|
+
continue
|
1135
|
+
diffs.append(
|
1136
|
+
UpgradePolicyHandler(
|
1137
|
+
action="create",
|
1138
|
+
policy=_create_upgrade_policy(next_schedule, spec, version),
|
1139
|
+
)
|
531
1140
|
)
|
532
|
-
|
533
|
-
locked[mutex] = cluster
|
534
|
-
diffs.append(item)
|
1141
|
+
set_mutex(locked, spec.cluster.id, spec.effective_mutexes)
|
535
1142
|
|
536
1143
|
return diffs
|
537
1144
|
|
538
1145
|
|
539
|
-
def sort_diffs(diff:
|
540
|
-
if diff
|
1146
|
+
def sort_diffs(diff: UpgradePolicyHandler) -> int:
|
1147
|
+
if diff.action == "delete":
|
541
1148
|
return 1
|
542
1149
|
return 2
|
543
1150
|
|
544
1151
|
|
545
|
-
def
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
1152
|
+
def act(
|
1153
|
+
dry_run: bool,
|
1154
|
+
diffs: list[UpgradePolicyHandler],
|
1155
|
+
ocm_api: OCMBaseClient,
|
1156
|
+
addon_id: str | None = None,
|
1157
|
+
) -> None:
|
551
1158
|
diffs.sort(key=sort_diffs)
|
552
1159
|
for diff in diffs:
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
1160
|
+
policy = diff.policy
|
1161
|
+
if (
|
1162
|
+
addon_id
|
1163
|
+
and isinstance(policy, AddonUpgradePolicy)
|
1164
|
+
and addon_id != policy.addon_id
|
1165
|
+
):
|
1166
|
+
continue
|
1167
|
+
diff.act(dry_run, ocm_api)
|
1168
|
+
|
1169
|
+
|
1170
|
+
def soaking_days(
|
1171
|
+
version_data: VersionData,
|
1172
|
+
upgrades: list[str],
|
1173
|
+
workload: str,
|
1174
|
+
only_soaking: bool,
|
1175
|
+
) -> dict[str, float]:
|
1176
|
+
soaking = {}
|
1177
|
+
for version in upgrades:
|
1178
|
+
workload_history = version_data.workload_history(version, workload)
|
1179
|
+
soaking[version] = round(workload_history.soak_days, 2)
|
1180
|
+
if not only_soaking and version not in soaking:
|
1181
|
+
soaking[version] = 0
|
1182
|
+
return soaking
|
1183
|
+
|
1184
|
+
|
1185
|
+
def get_orgs_for_environment(
|
1186
|
+
integration: str,
|
1187
|
+
ocm_env_name: str,
|
1188
|
+
query_func: Callable,
|
1189
|
+
ocm_organization_ids: set[str] | None = None,
|
1190
|
+
excluded_ocm_organization_ids: set[str] | None = None,
|
1191
|
+
only_addon_managed_upgrades: bool = False,
|
1192
|
+
) -> list[AUSOCMOrganization]:
|
1193
|
+
"""
|
1194
|
+
Returns a list of organizations for the given OCM environment, applying
|
1195
|
+
filters based on the provided arguments.
|
1196
|
+
|
1197
|
+
Args:
|
1198
|
+
ocm_env_name (str): OCM environment name to filter
|
1199
|
+
ocm_organization_ids (Optional[set[str]]): if any organization IDs are provided, any other organizations are excluded from the results
|
1200
|
+
excluded_ocm_organization_ids (Optional[set[str]]): if any organization IDs are provided, these organizations are excluded from the results
|
1201
|
+
only_addon_managed_upgrades (bool): if True, organizations without enabled addon management are excluded from the results
|
1202
|
+
query_func (Callable): function to query organizations via GQL
|
1203
|
+
|
1204
|
+
Returns:
|
1205
|
+
list[AUSOCMOrganization]: list of organizations matching the given filters
|
1206
|
+
"""
|
1207
|
+
orgs = aus_organizations_query(query_func=query_func).organizations or []
|
1208
|
+
return [
|
1209
|
+
org
|
1210
|
+
for org in orgs or []
|
1211
|
+
if org.environment.name == ocm_env_name
|
1212
|
+
and integration_is_enabled(integration, org)
|
1213
|
+
and (not only_addon_managed_upgrades or org.addon_managed_upgrades)
|
1214
|
+
and (not ocm_organization_ids or org.org_id in ocm_organization_ids)
|
1215
|
+
and (
|
1216
|
+
not excluded_ocm_organization_ids
|
1217
|
+
or org.org_id not in excluded_ocm_organization_ids
|
1218
|
+
)
|
1219
|
+
]
|
1220
|
+
|
1221
|
+
|
1222
|
+
def remaining_soak_day_metric_values_for_cluster(
|
1223
|
+
spec: ClusterUpgradeSpec,
|
1224
|
+
soaked_versions: dict[str, float],
|
1225
|
+
current_upgrade: AbstractUpgradePolicy | None,
|
1226
|
+
) -> dict[str, float]:
|
1227
|
+
"""
|
1228
|
+
Calculate what versions and metric values to report for `AUS*VersionRemainingSoakDaysGauge` metrics.
|
1229
|
+
Usually, the remaining soak days for a version are reported but there are some special cases
|
1230
|
+
where we report negative values to indicate that a version is blocked or an upgrade has been
|
1231
|
+
scheduled or started.
|
1232
|
+
|
1233
|
+
Additionally certain versions are not reported when it is not meaningful (e.g. an upgrade will never happen)
|
1234
|
+
to prevent metric clutter.
|
1235
|
+
"""
|
1236
|
+
upgrades = spec.get_available_upgrades()
|
1237
|
+
if not upgrades:
|
1238
|
+
return {}
|
1239
|
+
|
1240
|
+
# calculate the remaining soakdays for each upgrade version candidate of the cluster.
|
1241
|
+
# when a version is soaking, it has a value > 0 and when it soaked enough, the value is 0.
|
1242
|
+
remaining_soakdays: list[float] = [
|
1243
|
+
max(
|
1244
|
+
(spec.upgrade_policy.conditions.soak_days or 0) - soaked_versions.get(v, 0),
|
1245
|
+
0,
|
1246
|
+
)
|
1247
|
+
for v in upgrades
|
1248
|
+
]
|
1249
|
+
|
1250
|
+
# under certain conditions, the remaining soak day value for a version needs to be
|
1251
|
+
# replaced with special marker values
|
1252
|
+
version_metrics: dict[str, float] = {}
|
1253
|
+
for idx, version in reversed(list(enumerate(upgrades))):
|
1254
|
+
# if an upgrade is `scheduled` or `started`` for the specific version, their respective negative
|
1255
|
+
# marker values will be used instead of their actual soak days. there are other states than `scheduled`
|
1256
|
+
# and `started` but the `UpgradePolicy` vanishes too quickly to observe them reliably, when such
|
1257
|
+
# states are reached.
|
1258
|
+
if current_upgrade and current_upgrade.version == version:
|
1259
|
+
if current_upgrade.state == "scheduled":
|
1260
|
+
remaining_soakdays[idx] = UPGRADE_SCHEDULED_METRIC_VALUE
|
1261
|
+
elif current_upgrade.state in {"started", "delayed"}:
|
1262
|
+
remaining_soakdays[idx] = UPGRADE_STARTED_METRIC_VALUE
|
1263
|
+
if current_upgrade.next_run:
|
1264
|
+
# if an upgrade runs for over 6 hours, we mark it as a long running upgrade
|
1265
|
+
next_run = datetime.strptime(
|
1266
|
+
current_upgrade.next_run, "%Y-%m-%dT%H:%M:%SZ"
|
1267
|
+
)
|
1268
|
+
now = datetime.utcnow()
|
1269
|
+
hours_ago = (now - next_run).total_seconds() / 3600
|
1270
|
+
if hours_ago >= 6:
|
1271
|
+
remaining_soakdays[idx] = UPGRADE_LONG_RUNNING_METRIC_VALUE
|
1272
|
+
elif spec.version_blocked(version):
|
1273
|
+
# if a version is blocked, we will still report it but with a dedicated negative marker value
|
1274
|
+
remaining_soakdays[idx] = UPGRADE_BLOCKED_METRIC_VALUE
|
1275
|
+
|
1276
|
+
# we are intentionally not reporting versions that still soak or soaked enough when
|
1277
|
+
# there is a later version that also soaked enough. the later one will be picked
|
1278
|
+
# for an upgrade over the older one anyways.
|
1279
|
+
if remaining_soakdays[idx] >= 0 and any(
|
1280
|
+
later_version_remaining_soak_days
|
1281
|
+
in {
|
1282
|
+
0,
|
1283
|
+
UPGRADE_SCHEDULED_METRIC_VALUE,
|
1284
|
+
UPGRADE_STARTED_METRIC_VALUE,
|
1285
|
+
UPGRADE_LONG_RUNNING_METRIC_VALUE,
|
1286
|
+
}
|
1287
|
+
for later_version_remaining_soak_days in remaining_soakdays[idx + 1 :]
|
1288
|
+
):
|
1289
|
+
continue
|
1290
|
+
version_metrics[version] = remaining_soakdays[idx]
|
1291
|
+
|
1292
|
+
return version_metrics
|