qontract-reconcile 0.10.1rc418__py3-none-any.whl → 0.10.1rc419__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: qontract-reconcile
3
- Version: 0.10.1rc418
3
+ Version: 0.10.1rc419
4
4
  Summary: Collection of tools to reconcile services with their desired state as defined in the app-interface DB.
5
5
  Home-page: https://github.com/app-sre/qontract-reconcile
6
6
  Author: Red Hat App-SRE Team
@@ -68,7 +68,7 @@ reconcile/openshift_namespaces.py,sha256=mcWsMRoHPPS0TNMguGka7Q0fLl0Rj7rzypX98K2
68
68
  reconcile/openshift_network_policies.py,sha256=_9Xt0ERUknGh4F41PnJQ_a9Weam2vA_hOBny9SPwKag,4237
69
69
  reconcile/openshift_resourcequotas.py,sha256=yUi56PiOn3inMMfq_x_FEHmaW-reGipzoorjdar372g,2415
70
70
  reconcile/openshift_resources.py,sha256=kwsY5cko7udEKNlhL2oKiKv_5wzEw9wmmwROE016ng8,1400
71
- reconcile/openshift_resources_base.py,sha256=yYhmRzJpqsl0WQBRook-XJYSC1KYnWKVo5ESMvbq86M,44935
71
+ reconcile/openshift_resources_base.py,sha256=iKS8mpKnJca6MO5dl1a1az2YdEa_6zDRNf_XS_i9FBA,44936
72
72
  reconcile/openshift_rolebindings.py,sha256=K6alhxtnxifnytQKMqIGdVkqGEa28AVwFv4B7SjbgIk,6628
73
73
  reconcile/openshift_routes.py,sha256=fXvuPSjcjVw1X3j2EQvUAdbOepmIFdKk-M3qP8QzPiw,1075
74
74
  reconcile/openshift_saas_deploy.py,sha256=QpQAQTeDZPOtgxV9RoAyu2NeX4Jlc4xAslVdgwD0sgQ,10811
@@ -118,12 +118,12 @@ reconcile/vpc_peerings_validator.py,sha256=oiYwmQ2yYBobFhIixmHNUP1GxzUADocMPJnCB
118
118
  reconcile/aus/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
119
119
  reconcile/aus/advanced_upgrade_service.py,sha256=lKNWrKrHTjgyDVJaR7cx9Mgk5hULkEvKnqnef98xYi4,21794
120
120
  reconcile/aus/aus_label_source.py,sha256=X6FD4NYcX27llMUSmmBcCh-pG7U5FnBd0zl-0zwCj2U,4118
121
- reconcile/aus/base.py,sha256=bLJWyH8JrGx_g4OVLF9Swu2gig_57Fz5kbY3ETDrTH4,37788
121
+ reconcile/aus/base.py,sha256=gg-1DmbNKPC9BOy62RO7TOxvDVKa5Twgqgds1EVEKvA,43303
122
122
  reconcile/aus/cluster_version_data.py,sha256=j4UyEBi5mQuvPq5Lo7a_L_0blxvH790wJV07uAiikFU,7126
123
- reconcile/aus/metrics.py,sha256=-9aYydrUU2IZH0siyck8BciQpDSv6-wMTQDmrgl-4FU,2955
123
+ reconcile/aus/metrics.py,sha256=CI5H3kzWh5VUYjt8NfzcYdJ8L4HdTCcUQTZiLdwdLFc,3506
124
124
  reconcile/aus/models.py,sha256=oBSVZ-3JTngxKg_bH1vAfREpz55t8K-Y3eC9TA4pOTw,6849
125
- reconcile/aus/ocm_addons_upgrade_scheduler_org.py,sha256=60DLXtcP9UdwLStrBJ3kJ8avkrL2onEl3LY2Wj1FYBc,7158
126
- reconcile/aus/ocm_upgrade_scheduler.py,sha256=m77mqE8Bn_t49u3piHDE9Rc55X3K_KwBBCKpZyaASQk,8897
125
+ reconcile/aus/ocm_addons_upgrade_scheduler_org.py,sha256=4f4AOoEUBxYdTIUnyev1ECPvbfVHSESQhXl7fuU-iGk,8811
126
+ reconcile/aus/ocm_upgrade_scheduler.py,sha256=7cK2SakCFkl5EdnqUEAYdUo4pUnnf-SsUR10uytAGyE,3058
127
127
  reconcile/aus/ocm_upgrade_scheduler_org.py,sha256=OBgE5mnVdQQV4tMH0AE2V_PDt9Gy6d-LyuPceqjORts,2331
128
128
  reconcile/aus/upgrades.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
129
129
  reconcile/aws_ami_cleanup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -636,8 +636,8 @@ tools/test/test_app_interface_metrics_exporter.py,sha256=dmEcNwZltP1rd_4DbxIYakO
636
636
  tools/test/test_qontract_cli.py,sha256=awwTHEc2DWlykuqGIYM0WOBoSL0KRnOraCLk3C7izis,1401
637
637
  tools/test/test_sd_app_sre_alert_report.py,sha256=v363r9zM7__0kR5K6mvJoGFcM9BvE33fWAayrqkpojA,2116
638
638
  tools/test/test_sre_checkpoints.py,sha256=SKqPPTl9ua0RFdSSofnoQX-JZE6dFLO3LRhfQzqtfh8,2607
639
- qontract_reconcile-0.10.1rc418.dist-info/METADATA,sha256=bYR73yjW15cE_wXs6HuU9pT1haQ-xRZBBOnXU4whpII,2347
640
- qontract_reconcile-0.10.1rc418.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
641
- qontract_reconcile-0.10.1rc418.dist-info/entry_points.txt,sha256=rTjAv28I_CHLM8ID3OPqMI_suoQ9s7tFbim4aYjn9kk,376
642
- qontract_reconcile-0.10.1rc418.dist-info/top_level.txt,sha256=l5ISPoXzt0SdR4jVdkfa7RPSKNc8zAHYWAnR-Dw8Ey8,24
643
- qontract_reconcile-0.10.1rc418.dist-info/RECORD,,
639
+ qontract_reconcile-0.10.1rc419.dist-info/METADATA,sha256=EtYPYARSLUV9Fzy9jSWtyBZ9EJ0c8JssJw4oNySSVoU,2347
640
+ qontract_reconcile-0.10.1rc419.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
641
+ qontract_reconcile-0.10.1rc419.dist-info/entry_points.txt,sha256=rTjAv28I_CHLM8ID3OPqMI_suoQ9s7tFbim4aYjn9kk,376
642
+ qontract_reconcile-0.10.1rc419.dist-info/top_level.txt,sha256=l5ISPoXzt0SdR4jVdkfa7RPSKNc8zAHYWAnR-Dw8Ey8,24
643
+ qontract_reconcile-0.10.1rc419.dist-info/RECORD,,
reconcile/aus/base.py CHANGED
@@ -12,6 +12,8 @@ from datetime import (
12
12
  from typing import (
13
13
  Callable,
14
14
  Optional,
15
+ Protocol,
16
+ Sequence,
15
17
  cast,
16
18
  )
17
19
 
@@ -27,6 +29,10 @@ from reconcile.aus.cluster_version_data import (
27
29
  get_version_data,
28
30
  )
29
31
  from reconcile.aus.metrics import (
32
+ UPGRADE_BLOCKED_METRIC_VALUE,
33
+ UPGRADE_LONG_RUNNING_METRIC_VALUE,
34
+ UPGRADE_SCHEDULED_METRIC_VALUE,
35
+ UPGRADE_STARTED_METRIC_VALUE,
30
36
  AUSClusterUpgradePolicyInfoMetric,
31
37
  AUSOCMEnvironmentError,
32
38
  AUSOrganizationErrorRate,
@@ -187,6 +193,44 @@ class AdvancedUpgradeSchedulerBaseIntegration(
187
193
  else None,
188
194
  ).environments
189
195
 
196
+ def expose_remaining_soak_day_metrics(
197
+ self,
198
+ org_upgrade_spec: OrganizationUpgradeSpec,
199
+ version_data: VersionData,
200
+ current_state: Sequence["AbstractUpgradePolicy"],
201
+ metrics_builder: "RemainingSoakDayMetricsBuilder",
202
+ ) -> None:
203
+ current_cluster_upgrade_policies = {
204
+ p.cluster.external_id: p for p in current_state
205
+ }
206
+ for spec in org_upgrade_spec.specs:
207
+ upgrades = spec.get_available_upgrades()
208
+ if not upgrades:
209
+ continue
210
+
211
+ # calculate the amount every version has soaked. if a version has soaked for
212
+ # multiple workloads, we will pick the minimum soak day value of all workloads
213
+ # relevant on the cluster.
214
+ soaked_versions: dict[str, float] = {}
215
+ for workload in spec.upgrade_policy.workloads:
216
+ for version, soak_days in soaking_days(
217
+ version_data, upgrades, workload, False
218
+ ).items():
219
+ soaked_versions[version] = min(
220
+ soak_days, soaked_versions.get(version, soak_days)
221
+ )
222
+
223
+ current_upgrade = current_cluster_upgrade_policies.get(spec.cluster_uuid)
224
+ for version, metric_value in remaining_soak_day_metric_values_for_cluster(
225
+ spec, soaked_versions, current_upgrade
226
+ ).items():
227
+ metrics.set_gauge(
228
+ metrics_builder(
229
+ cluster_uuid=spec.cluster.external_id, soaking_version=version
230
+ ),
231
+ metric_value,
232
+ )
233
+
190
234
  @abstractmethod
191
235
  def process_upgrade_policies_in_org(
192
236
  self, dry_run: bool, org_upgrade_spec: OrganizationUpgradeSpec
@@ -269,6 +313,11 @@ class GateAgreement(BaseModel):
269
313
  )
270
314
 
271
315
 
316
+ class RemainingSoakDayMetricsBuilder(Protocol):
317
+ def __call__(self, cluster_uuid: str, soaking_version: str) -> metrics.GaugeMetric:
318
+ ...
319
+
320
+
272
321
  class AbstractUpgradePolicy(ABC, BaseModel):
273
322
  """Abstract class for upgrade policies
274
323
  Used to create and delete upgrade policies in OCM."""
@@ -735,7 +784,7 @@ def upgradeable_version(
735
784
 
736
785
 
737
786
  def verify_current_should_skip(
738
- current_state: list[AbstractUpgradePolicy],
787
+ current_state: Sequence[AbstractUpgradePolicy],
739
788
  desired: ClusterUpgradeSpec,
740
789
  now: datetime,
741
790
  addon_id: str = "",
@@ -861,7 +910,7 @@ def _calculate_node_pool_diffs(
861
910
 
862
911
 
863
912
  def calculate_diff(
864
- current_state: list[AbstractUpgradePolicy],
913
+ current_state: Sequence[AbstractUpgradePolicy],
865
914
  desired_state: OrganizationUpgradeSpec,
866
915
  ocm_api: OCMBaseClient,
867
916
  version_data: VersionData,
@@ -1042,3 +1091,76 @@ def get_orgs_for_environment(
1042
1091
  or org.org_id not in excluded_ocm_organization_ids
1043
1092
  )
1044
1093
  ]
1094
+
1095
+
1096
+ def remaining_soak_day_metric_values_for_cluster(
1097
+ spec: ClusterUpgradeSpec,
1098
+ soaked_versions: dict[str, float],
1099
+ current_upgrade: Optional[AbstractUpgradePolicy],
1100
+ ) -> dict[str, float]:
1101
+ """
1102
+ Calculate what versions and metric values to report for `AUS*VersionRemainingSoakDaysGauge` metrics.
1103
+ Usually, the remaining soak days for a version are reported but there are some special cases
1104
+ where we report negative values to indicate that a version is blocked or an upgrade has been
1105
+ scheduled or started.
1106
+
1107
+ Additionally certain versions are not reported when it is not meaningful (e.g. an upgrade will never happen)
1108
+ to prevent metric clutter.
1109
+ """
1110
+ upgrades = spec.get_available_upgrades()
1111
+ if not upgrades:
1112
+ return {}
1113
+
1114
+ # calculate the remaining soakdays for each upgrade version candidate of the cluster.
1115
+ # when a version is soaking, it has a value > 0 and when it soaked enough, the value is 0.
1116
+ remaining_soakdays: list[float] = [
1117
+ max(
1118
+ (spec.upgrade_policy.conditions.soak_days or 0) - soaked_versions.get(v, 0),
1119
+ 0,
1120
+ )
1121
+ for v in upgrades
1122
+ ]
1123
+
1124
+ # under certain conditions, the remaining soak day value for a version needs to be
1125
+ # replaced with special marker values
1126
+ version_metrics: dict[str, float] = {}
1127
+ for idx, version in reversed(list(enumerate(upgrades))):
1128
+ # if an upgrade is `scheduled` or `started`` for the specific version, their respective negative
1129
+ # marker values will be used instead of their actual soak days. there are other states than `scheduled`
1130
+ # and `started` but the `UpgradePolicy` vanishes too quickly to observe them reliably, when such
1131
+ # states are reached.
1132
+ if current_upgrade and current_upgrade.version == version:
1133
+ if current_upgrade.state == "scheduled":
1134
+ remaining_soakdays[idx] = UPGRADE_SCHEDULED_METRIC_VALUE
1135
+ elif current_upgrade.state in ("started", "delayed"):
1136
+ remaining_soakdays[idx] = UPGRADE_STARTED_METRIC_VALUE
1137
+ if current_upgrade.next_run:
1138
+ # if an upgrade runs for over 6 hours, we mark it as a long running upgrade
1139
+ next_run = datetime.strptime(
1140
+ current_upgrade.next_run, "%Y-%m-%dT%H:%M:%SZ"
1141
+ )
1142
+ now = datetime.utcnow()
1143
+ hours_ago = (now - next_run).total_seconds() / 3600
1144
+ if hours_ago >= 6:
1145
+ remaining_soakdays[idx] = UPGRADE_LONG_RUNNING_METRIC_VALUE
1146
+ elif spec.version_blocked(version):
1147
+ # if a version is blocked, we will still report it but with a dedicated negative marker value
1148
+ remaining_soakdays[idx] = UPGRADE_BLOCKED_METRIC_VALUE
1149
+
1150
+ # we are intentionally not reporting versions that still soak or soaked enough when
1151
+ # there is a later version that also soaked enough. the later one will be picked
1152
+ # for an upgrade over the older one anyways.
1153
+ if remaining_soakdays[idx] >= 0 and any(
1154
+ later_version_remaining_soak_days
1155
+ in (
1156
+ 0,
1157
+ UPGRADE_SCHEDULED_METRIC_VALUE,
1158
+ UPGRADE_STARTED_METRIC_VALUE,
1159
+ UPGRADE_LONG_RUNNING_METRIC_VALUE,
1160
+ )
1161
+ for later_version_remaining_soak_days in remaining_soakdays[idx + 1 :]
1162
+ ):
1163
+ continue
1164
+ version_metrics[version] = remaining_soakdays[idx]
1165
+
1166
+ return version_metrics
reconcile/aus/metrics.py CHANGED
@@ -42,6 +42,16 @@ class AUSClusterVersionRemainingSoakDaysGauge(AUSBaseMetric, GaugeMetric):
42
42
  return "aus_cluster_version_remaining_soak_days"
43
43
 
44
44
 
45
+ class AUSAddonVersionRemainingSoakDaysGauge(AUSClusterVersionRemainingSoakDaysGauge):
46
+ "Remaining days a version needs to soak for an addon on a cluster"
47
+
48
+ addon: str
49
+
50
+ @classmethod
51
+ def name(cls) -> str:
52
+ return "aus_addon_version_remaining_soak_days"
53
+
54
+
45
55
  class AUSClusterUpgradePolicyInfoMetric(AUSBaseMetric, InfoMetric):
46
56
  "Info metric for clusters under AUS upgrade control"
47
57
 
@@ -62,6 +72,18 @@ class AUSClusterUpgradePolicyInfoMetric(AUSBaseMetric, InfoMetric):
62
72
  return "aus_cluster_upgrade_policy_info"
63
73
 
64
74
 
75
+ class AUSAddonUpgradePolicyInfoMetric(
76
+ AUSClusterUpgradePolicyInfoMetric
77
+ ): # pylint: disable=R0901
78
+ "Info metric for cluster addons under AUS upgrade control"
79
+
80
+ addon: str
81
+
82
+ @classmethod
83
+ def name(cls) -> str:
84
+ return "aus_addon_upgrade_policy_info"
85
+
86
+
65
87
  class AUSOrganizationValidationErrorsGauge(AUSBaseMetric, GaugeMetric):
66
88
  "Current validation errors within an OCM organization"
67
89
 
@@ -1,3 +1,5 @@
1
+ import functools
2
+
1
3
  from pydantic import BaseModel
2
4
 
3
5
  from reconcile.aus import base as aus
@@ -6,7 +8,10 @@ from reconcile.aus.base import (
6
8
  AddonUpgradePolicy,
7
9
  )
8
10
  from reconcile.aus.cluster_version_data import VersionData
9
- from reconcile.aus.metrics import AUSOrganizationReconcileCounter
11
+ from reconcile.aus.metrics import (
12
+ AUSAddonUpgradePolicyInfoMetric,
13
+ AUSAddonVersionRemainingSoakDaysGauge,
14
+ )
10
15
  from reconcile.aus.models import (
11
16
  ClusterAddonUpgradeSpec,
12
17
  ClusterUpgradeSpec,
@@ -62,6 +67,7 @@ class OCMAddonsUpgradeSchedulerOrgIntegration(
62
67
  for spec in org_upgrade_spec.specs
63
68
  if isinstance(spec, ClusterAddonUpgradeSpec)
64
69
  }
70
+
65
71
  for addon_id in addons:
66
72
  addon_org_upgrade_spec = OrganizationUpgradeSpec(
67
73
  org=org_upgrade_spec.org,
@@ -79,12 +85,26 @@ class OCMAddonsUpgradeSchedulerOrgIntegration(
79
85
  integration=self.name,
80
86
  ).get(org_upgrade_spec.org.environment.name, org_upgrade_spec.org.org_id)
81
87
 
88
+ addon_current_state: list[AddonUpgradePolicy] = [
89
+ s
90
+ for s in current_state
91
+ if isinstance(s, AddonUpgradePolicy) and s.addon_id == addon_id
92
+ ]
93
+
94
+ self.expose_remaining_soak_day_metrics(
95
+ org_upgrade_spec=org_upgrade_spec,
96
+ version_data=version_data,
97
+ current_state=addon_current_state,
98
+ metrics_builder=functools.partial(
99
+ AUSAddonVersionRemainingSoakDaysGauge,
100
+ integration=self.name,
101
+ ocm_env=org_upgrade_spec.org.environment.name,
102
+ addon=addon_id,
103
+ ),
104
+ )
105
+
82
106
  diffs = calculate_diff(
83
- addon_current_state=[
84
- s
85
- for s in current_state
86
- if isinstance(s, AddonUpgradePolicy) and s.addon_id == addon_id
87
- ],
107
+ addon_current_state=addon_current_state,
88
108
  org_upgrade_spec=addon_org_upgrade_spec,
89
109
  ocm_api=ocm_api,
90
110
  version_data=version_data,
@@ -169,17 +189,34 @@ class OCMAddonsUpgradeSchedulerOrgIntegration(
169
189
  def expose_org_upgrade_spec_metrics(
170
190
  self, ocm_env: str, org_upgrade_spec: OrganizationUpgradeSpec
171
191
  ) -> None:
172
- metrics.inc_counter(
173
- AUSOrganizationReconcileCounter(
174
- integration=self.name,
175
- ocm_env=ocm_env,
176
- org_id=org_upgrade_spec.org.org_id,
192
+ for cluster_upgrade_spec in org_upgrade_spec.specs:
193
+ if not isinstance(cluster_upgrade_spec, ClusterAddonUpgradeSpec):
194
+ continue
195
+ mutexes = cluster_upgrade_spec.upgrade_policy.conditions.mutexes
196
+ metrics.set_info(
197
+ AUSAddonUpgradePolicyInfoMetric(
198
+ integration=self.name,
199
+ ocm_env=ocm_env,
200
+ cluster_uuid=cluster_upgrade_spec.cluster_uuid,
201
+ org_id=cluster_upgrade_spec.org.org_id,
202
+ org_name=org_upgrade_spec.org.name,
203
+ channel=cluster_upgrade_spec.cluster.version.channel_group,
204
+ current_version=cluster_upgrade_spec.current_version,
205
+ cluster_name=cluster_upgrade_spec.name,
206
+ schedule=cluster_upgrade_spec.upgrade_policy.schedule,
207
+ sector=cluster_upgrade_spec.upgrade_policy.conditions.sector or "",
208
+ mutexes=",".join(mutexes) if mutexes else "",
209
+ soak_days=str(
210
+ cluster_upgrade_spec.upgrade_policy.conditions.soak_days or 0
211
+ ),
212
+ workloads=",".join(cluster_upgrade_spec.upgrade_policy.workloads),
213
+ addon=cluster_upgrade_spec.addon.id,
214
+ ),
177
215
  )
178
- )
179
216
 
180
217
 
181
218
  def calculate_diff(
182
- addon_current_state: list[AbstractUpgradePolicy],
219
+ addon_current_state: list[AddonUpgradePolicy],
183
220
  org_upgrade_spec: OrganizationUpgradeSpec,
184
221
  ocm_api: OCMBaseClient,
185
222
  version_data: VersionData,
@@ -193,11 +230,7 @@ def calculate_diff(
193
230
  addon_id,
194
231
  )
195
232
  for current in addon_current_state:
196
- if (
197
- isinstance(current, AddonUpgradePolicy)
198
- and addon_id == current.addon_id
199
- and current.schedule_type == "automatic"
200
- ):
233
+ if addon_id == current.addon_id and current.schedule_type == "automatic":
201
234
  diffs.append(
202
235
  aus.UpgradePolicyHandler(
203
236
  action="delete",
@@ -1,22 +1,13 @@
1
+ import functools
1
2
  from abc import ABC
2
- from datetime import datetime
3
- from typing import Optional
4
3
 
5
4
  from reconcile.aus import base as aus
6
5
  from reconcile.aus.cluster_version_data import VersionData
7
6
  from reconcile.aus.metrics import (
8
- UPGRADE_BLOCKED_METRIC_VALUE,
9
- UPGRADE_LONG_RUNNING_METRIC_VALUE,
10
- UPGRADE_SCHEDULED_METRIC_VALUE,
11
- UPGRADE_STARTED_METRIC_VALUE,
12
7
  AUSClusterVersionRemainingSoakDaysGauge,
13
8
  AUSOrganizationVersionDataGauge,
14
9
  )
15
- from reconcile.aus.models import (
16
- ClusterUpgradeSpec,
17
- OrganizationUpgradeSpec,
18
- )
19
- from reconcile.gql_definitions.fragments.ocm_environment import OCMEnvironment
10
+ from reconcile.aus.models import OrganizationUpgradeSpec
20
11
  from reconcile.utils import metrics
21
12
  from reconcile.utils.ocm import (
22
13
  OCM_PRODUCT_OSD,
@@ -63,10 +54,14 @@ class OCMClusterUpgradeSchedulerIntegration(
63
54
  version_data=version_data,
64
55
  )
65
56
  self.expose_remaining_soak_day_metrics(
66
- ocm_env=org_upgrade_spec.org.environment.name,
67
57
  org_upgrade_spec=org_upgrade_spec,
68
58
  version_data=version_data,
69
59
  current_state=current_state,
60
+ metrics_builder=functools.partial(
61
+ AUSClusterVersionRemainingSoakDaysGauge,
62
+ integration=self.name,
63
+ ocm_env=org_upgrade_spec.org.environment.name,
64
+ ),
70
65
  )
71
66
 
72
67
  diffs = aus.calculate_diff(
@@ -74,15 +69,6 @@ class OCMClusterUpgradeSchedulerIntegration(
74
69
  )
75
70
  aus.act(dry_run, diffs, ocm_api)
76
71
 
77
- def get_ocm_env_upgrade_specs(
78
- self, ocm_env: OCMEnvironment
79
- ) -> dict[str, OrganizationUpgradeSpec]:
80
- raise NotImplementedError(
81
- "Don't use ocm-upgrade-scheduler anymore but use: \n"
82
- "* ocm-label to transfer upgrade policies to OCM subscription labels \n"
83
- "* advanced-upgrade-service to drive upgrade policies based on OCM subscription labels"
84
- )
85
-
86
72
  def expose_version_data_metrics(
87
73
  self,
88
74
  ocm_env: str,
@@ -101,117 +87,3 @@ class OCMClusterUpgradeSchedulerIntegration(
101
87
  ),
102
88
  workload_history.soak_days,
103
89
  )
104
-
105
- def expose_remaining_soak_day_metrics(
106
- self,
107
- ocm_env: str,
108
- org_upgrade_spec: OrganizationUpgradeSpec,
109
- version_data: VersionData,
110
- current_state: list[aus.AbstractUpgradePolicy],
111
- ) -> None:
112
- current_cluster_upgrade_policies = {
113
- p.cluster.external_id: p for p in current_state
114
- }
115
- for spec in org_upgrade_spec.specs:
116
- upgrades = spec.get_available_upgrades()
117
- if not upgrades:
118
- continue
119
-
120
- # calculate the amount every version has soaked. if a version has soaked for
121
- # multiple workloads, we will pick the minimum soak day value of all workloads
122
- # relevant on the cluster.
123
- soaked_versions: dict[str, float] = {}
124
- for workload in spec.upgrade_policy.workloads:
125
- for version, soak_days in aus.soaking_days(
126
- version_data, upgrades, workload, False
127
- ).items():
128
- soaked_versions[version] = min(
129
- soak_days, soaked_versions.get(version, soak_days)
130
- )
131
-
132
- current_upgrade = current_cluster_upgrade_policies.get(spec.cluster_uuid)
133
- for version, metric_value in remaining_soak_day_metric_values_for_cluster(
134
- spec, soaked_versions, current_upgrade
135
- ).items():
136
- metrics.set_gauge(
137
- AUSClusterVersionRemainingSoakDaysGauge(
138
- integration=self.name,
139
- ocm_env=ocm_env,
140
- cluster_uuid=spec.cluster.external_id,
141
- soaking_version=version,
142
- ),
143
- metric_value,
144
- )
145
-
146
-
147
- def remaining_soak_day_metric_values_for_cluster(
148
- spec: ClusterUpgradeSpec,
149
- soaked_versions: dict[str, float],
150
- current_upgrade: Optional[aus.AbstractUpgradePolicy],
151
- ) -> dict[str, float]:
152
- """
153
- Calculate what versions and metric values to report for `AUSClusterVersionRemainingSoakDaysGauge`.
154
- Usually, the remaining soak days for a version are reported but there are some special cases
155
- where we report negative values to indicate that a version is blocked or an upgrade has been
156
- scheduled or started.
157
-
158
- Additionally certain versions are not reported when it is not meaningful (e.g. an upgrade will never happen)
159
- to prevent metric clutter.
160
- """
161
- upgrades = spec.get_available_upgrades()
162
- if not upgrades:
163
- return {}
164
-
165
- # calculate the remaining soakdays for each upgrade version candidate of the cluster.
166
- # when a version is soaking, it has a value > 0 and when it soaked enough, the value is 0.
167
- remaining_soakdays: list[float] = [
168
- max(
169
- (spec.upgrade_policy.conditions.soak_days or 0) - soaked_versions.get(v, 0),
170
- 0,
171
- )
172
- for v in upgrades
173
- ]
174
-
175
- # under certain conditions, the remaining soak day value for a version needs to be
176
- # replaced with special marker values
177
- version_metrics: dict[str, float] = {}
178
- for idx, version in reversed(list(enumerate(upgrades))):
179
- # if an upgrade is `scheduled` or `started`` for the specific version, their respective negative
180
- # marker values will be used instead of their actual soak days. there are other states than `scheduled`
181
- # and `started` but the `UpgradePolicy` vanishes too quickly to observe them reliably, when such
182
- # states are reached.
183
- if current_upgrade and current_upgrade.version == version:
184
- if current_upgrade.state == "scheduled":
185
- remaining_soakdays[idx] = UPGRADE_SCHEDULED_METRIC_VALUE
186
- elif current_upgrade.state in ("started", "delayed"):
187
- remaining_soakdays[idx] = UPGRADE_STARTED_METRIC_VALUE
188
- if current_upgrade.next_run:
189
- # if an upgrade runs for over 6 hours, we mark it as a long running upgrade
190
- next_run = datetime.strptime(
191
- current_upgrade.next_run, "%Y-%m-%dT%H:%M:%SZ"
192
- )
193
- now = datetime.utcnow()
194
- hours_ago = (now - next_run).total_seconds() / 3600
195
- if hours_ago >= 6:
196
- remaining_soakdays[idx] = UPGRADE_LONG_RUNNING_METRIC_VALUE
197
- elif spec.version_blocked(version):
198
- # if a version is blocked, we will still report it but with a dedicated negative marker value
199
- remaining_soakdays[idx] = UPGRADE_BLOCKED_METRIC_VALUE
200
-
201
- # we are intentionally not reporting versions that still soak or soaked enough when
202
- # there is a later version that also soaked enough. the later one will be picked
203
- # for an upgrade over the older one anyways.
204
- if remaining_soakdays[idx] >= 0 and any(
205
- later_version_remaining_soak_days
206
- in (
207
- 0,
208
- UPGRADE_SCHEDULED_METRIC_VALUE,
209
- UPGRADE_STARTED_METRIC_VALUE,
210
- UPGRADE_LONG_RUNNING_METRIC_VALUE,
211
- )
212
- for later_version_remaining_soak_days in remaining_soakdays[idx + 1 :]
213
- ):
214
- continue
215
- version_metrics[version] = remaining_soakdays[idx]
216
-
217
- return version_metrics
@@ -1056,7 +1056,7 @@ def run(
1056
1056
  namespace_name=namespace_name,
1057
1057
  )
1058
1058
  if not namespaces:
1059
- logging.info(
1059
+ logging.debug(
1060
1060
  "No namespaces found when filtering for "
1061
1061
  f"cluster={cluster_name}, namespace={namespace_name}. "
1062
1062
  "Exiting."