qontract-reconcile 0.10.1rc418__py3-none-any.whl → 0.10.1rc419__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {qontract_reconcile-0.10.1rc418.dist-info → qontract_reconcile-0.10.1rc419.dist-info}/METADATA +1 -1
- {qontract_reconcile-0.10.1rc418.dist-info → qontract_reconcile-0.10.1rc419.dist-info}/RECORD +10 -10
- reconcile/aus/base.py +124 -2
- reconcile/aus/metrics.py +22 -0
- reconcile/aus/ocm_addons_upgrade_scheduler_org.py +51 -18
- reconcile/aus/ocm_upgrade_scheduler.py +7 -135
- reconcile/openshift_resources_base.py +1 -1
- {qontract_reconcile-0.10.1rc418.dist-info → qontract_reconcile-0.10.1rc419.dist-info}/WHEEL +0 -0
- {qontract_reconcile-0.10.1rc418.dist-info → qontract_reconcile-0.10.1rc419.dist-info}/entry_points.txt +0 -0
- {qontract_reconcile-0.10.1rc418.dist-info → qontract_reconcile-0.10.1rc419.dist-info}/top_level.txt +0 -0
{qontract_reconcile-0.10.1rc418.dist-info → qontract_reconcile-0.10.1rc419.dist-info}/METADATA
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: qontract-reconcile
|
3
|
-
Version: 0.10.
|
3
|
+
Version: 0.10.1rc419
|
4
4
|
Summary: Collection of tools to reconcile services with their desired state as defined in the app-interface DB.
|
5
5
|
Home-page: https://github.com/app-sre/qontract-reconcile
|
6
6
|
Author: Red Hat App-SRE Team
|
{qontract_reconcile-0.10.1rc418.dist-info → qontract_reconcile-0.10.1rc419.dist-info}/RECORD
RENAMED
@@ -68,7 +68,7 @@ reconcile/openshift_namespaces.py,sha256=mcWsMRoHPPS0TNMguGka7Q0fLl0Rj7rzypX98K2
|
|
68
68
|
reconcile/openshift_network_policies.py,sha256=_9Xt0ERUknGh4F41PnJQ_a9Weam2vA_hOBny9SPwKag,4237
|
69
69
|
reconcile/openshift_resourcequotas.py,sha256=yUi56PiOn3inMMfq_x_FEHmaW-reGipzoorjdar372g,2415
|
70
70
|
reconcile/openshift_resources.py,sha256=kwsY5cko7udEKNlhL2oKiKv_5wzEw9wmmwROE016ng8,1400
|
71
|
-
reconcile/openshift_resources_base.py,sha256=
|
71
|
+
reconcile/openshift_resources_base.py,sha256=iKS8mpKnJca6MO5dl1a1az2YdEa_6zDRNf_XS_i9FBA,44936
|
72
72
|
reconcile/openshift_rolebindings.py,sha256=K6alhxtnxifnytQKMqIGdVkqGEa28AVwFv4B7SjbgIk,6628
|
73
73
|
reconcile/openshift_routes.py,sha256=fXvuPSjcjVw1X3j2EQvUAdbOepmIFdKk-M3qP8QzPiw,1075
|
74
74
|
reconcile/openshift_saas_deploy.py,sha256=QpQAQTeDZPOtgxV9RoAyu2NeX4Jlc4xAslVdgwD0sgQ,10811
|
@@ -118,12 +118,12 @@ reconcile/vpc_peerings_validator.py,sha256=oiYwmQ2yYBobFhIixmHNUP1GxzUADocMPJnCB
|
|
118
118
|
reconcile/aus/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
119
119
|
reconcile/aus/advanced_upgrade_service.py,sha256=lKNWrKrHTjgyDVJaR7cx9Mgk5hULkEvKnqnef98xYi4,21794
|
120
120
|
reconcile/aus/aus_label_source.py,sha256=X6FD4NYcX27llMUSmmBcCh-pG7U5FnBd0zl-0zwCj2U,4118
|
121
|
-
reconcile/aus/base.py,sha256=
|
121
|
+
reconcile/aus/base.py,sha256=gg-1DmbNKPC9BOy62RO7TOxvDVKa5Twgqgds1EVEKvA,43303
|
122
122
|
reconcile/aus/cluster_version_data.py,sha256=j4UyEBi5mQuvPq5Lo7a_L_0blxvH790wJV07uAiikFU,7126
|
123
|
-
reconcile/aus/metrics.py,sha256
|
123
|
+
reconcile/aus/metrics.py,sha256=CI5H3kzWh5VUYjt8NfzcYdJ8L4HdTCcUQTZiLdwdLFc,3506
|
124
124
|
reconcile/aus/models.py,sha256=oBSVZ-3JTngxKg_bH1vAfREpz55t8K-Y3eC9TA4pOTw,6849
|
125
|
-
reconcile/aus/ocm_addons_upgrade_scheduler_org.py,sha256=
|
126
|
-
reconcile/aus/ocm_upgrade_scheduler.py,sha256=
|
125
|
+
reconcile/aus/ocm_addons_upgrade_scheduler_org.py,sha256=4f4AOoEUBxYdTIUnyev1ECPvbfVHSESQhXl7fuU-iGk,8811
|
126
|
+
reconcile/aus/ocm_upgrade_scheduler.py,sha256=7cK2SakCFkl5EdnqUEAYdUo4pUnnf-SsUR10uytAGyE,3058
|
127
127
|
reconcile/aus/ocm_upgrade_scheduler_org.py,sha256=OBgE5mnVdQQV4tMH0AE2V_PDt9Gy6d-LyuPceqjORts,2331
|
128
128
|
reconcile/aus/upgrades.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
129
129
|
reconcile/aws_ami_cleanup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -636,8 +636,8 @@ tools/test/test_app_interface_metrics_exporter.py,sha256=dmEcNwZltP1rd_4DbxIYakO
|
|
636
636
|
tools/test/test_qontract_cli.py,sha256=awwTHEc2DWlykuqGIYM0WOBoSL0KRnOraCLk3C7izis,1401
|
637
637
|
tools/test/test_sd_app_sre_alert_report.py,sha256=v363r9zM7__0kR5K6mvJoGFcM9BvE33fWAayrqkpojA,2116
|
638
638
|
tools/test/test_sre_checkpoints.py,sha256=SKqPPTl9ua0RFdSSofnoQX-JZE6dFLO3LRhfQzqtfh8,2607
|
639
|
-
qontract_reconcile-0.10.
|
640
|
-
qontract_reconcile-0.10.
|
641
|
-
qontract_reconcile-0.10.
|
642
|
-
qontract_reconcile-0.10.
|
643
|
-
qontract_reconcile-0.10.
|
639
|
+
qontract_reconcile-0.10.1rc419.dist-info/METADATA,sha256=EtYPYARSLUV9Fzy9jSWtyBZ9EJ0c8JssJw4oNySSVoU,2347
|
640
|
+
qontract_reconcile-0.10.1rc419.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
|
641
|
+
qontract_reconcile-0.10.1rc419.dist-info/entry_points.txt,sha256=rTjAv28I_CHLM8ID3OPqMI_suoQ9s7tFbim4aYjn9kk,376
|
642
|
+
qontract_reconcile-0.10.1rc419.dist-info/top_level.txt,sha256=l5ISPoXzt0SdR4jVdkfa7RPSKNc8zAHYWAnR-Dw8Ey8,24
|
643
|
+
qontract_reconcile-0.10.1rc419.dist-info/RECORD,,
|
reconcile/aus/base.py
CHANGED
@@ -12,6 +12,8 @@ from datetime import (
|
|
12
12
|
from typing import (
|
13
13
|
Callable,
|
14
14
|
Optional,
|
15
|
+
Protocol,
|
16
|
+
Sequence,
|
15
17
|
cast,
|
16
18
|
)
|
17
19
|
|
@@ -27,6 +29,10 @@ from reconcile.aus.cluster_version_data import (
|
|
27
29
|
get_version_data,
|
28
30
|
)
|
29
31
|
from reconcile.aus.metrics import (
|
32
|
+
UPGRADE_BLOCKED_METRIC_VALUE,
|
33
|
+
UPGRADE_LONG_RUNNING_METRIC_VALUE,
|
34
|
+
UPGRADE_SCHEDULED_METRIC_VALUE,
|
35
|
+
UPGRADE_STARTED_METRIC_VALUE,
|
30
36
|
AUSClusterUpgradePolicyInfoMetric,
|
31
37
|
AUSOCMEnvironmentError,
|
32
38
|
AUSOrganizationErrorRate,
|
@@ -187,6 +193,44 @@ class AdvancedUpgradeSchedulerBaseIntegration(
|
|
187
193
|
else None,
|
188
194
|
).environments
|
189
195
|
|
196
|
+
def expose_remaining_soak_day_metrics(
|
197
|
+
self,
|
198
|
+
org_upgrade_spec: OrganizationUpgradeSpec,
|
199
|
+
version_data: VersionData,
|
200
|
+
current_state: Sequence["AbstractUpgradePolicy"],
|
201
|
+
metrics_builder: "RemainingSoakDayMetricsBuilder",
|
202
|
+
) -> None:
|
203
|
+
current_cluster_upgrade_policies = {
|
204
|
+
p.cluster.external_id: p for p in current_state
|
205
|
+
}
|
206
|
+
for spec in org_upgrade_spec.specs:
|
207
|
+
upgrades = spec.get_available_upgrades()
|
208
|
+
if not upgrades:
|
209
|
+
continue
|
210
|
+
|
211
|
+
# calculate the amount every version has soaked. if a version has soaked for
|
212
|
+
# multiple workloads, we will pick the minimum soak day value of all workloads
|
213
|
+
# relevant on the cluster.
|
214
|
+
soaked_versions: dict[str, float] = {}
|
215
|
+
for workload in spec.upgrade_policy.workloads:
|
216
|
+
for version, soak_days in soaking_days(
|
217
|
+
version_data, upgrades, workload, False
|
218
|
+
).items():
|
219
|
+
soaked_versions[version] = min(
|
220
|
+
soak_days, soaked_versions.get(version, soak_days)
|
221
|
+
)
|
222
|
+
|
223
|
+
current_upgrade = current_cluster_upgrade_policies.get(spec.cluster_uuid)
|
224
|
+
for version, metric_value in remaining_soak_day_metric_values_for_cluster(
|
225
|
+
spec, soaked_versions, current_upgrade
|
226
|
+
).items():
|
227
|
+
metrics.set_gauge(
|
228
|
+
metrics_builder(
|
229
|
+
cluster_uuid=spec.cluster.external_id, soaking_version=version
|
230
|
+
),
|
231
|
+
metric_value,
|
232
|
+
)
|
233
|
+
|
190
234
|
@abstractmethod
|
191
235
|
def process_upgrade_policies_in_org(
|
192
236
|
self, dry_run: bool, org_upgrade_spec: OrganizationUpgradeSpec
|
@@ -269,6 +313,11 @@ class GateAgreement(BaseModel):
|
|
269
313
|
)
|
270
314
|
|
271
315
|
|
316
|
+
class RemainingSoakDayMetricsBuilder(Protocol):
|
317
|
+
def __call__(self, cluster_uuid: str, soaking_version: str) -> metrics.GaugeMetric:
|
318
|
+
...
|
319
|
+
|
320
|
+
|
272
321
|
class AbstractUpgradePolicy(ABC, BaseModel):
|
273
322
|
"""Abstract class for upgrade policies
|
274
323
|
Used to create and delete upgrade policies in OCM."""
|
@@ -735,7 +784,7 @@ def upgradeable_version(
|
|
735
784
|
|
736
785
|
|
737
786
|
def verify_current_should_skip(
|
738
|
-
current_state:
|
787
|
+
current_state: Sequence[AbstractUpgradePolicy],
|
739
788
|
desired: ClusterUpgradeSpec,
|
740
789
|
now: datetime,
|
741
790
|
addon_id: str = "",
|
@@ -861,7 +910,7 @@ def _calculate_node_pool_diffs(
|
|
861
910
|
|
862
911
|
|
863
912
|
def calculate_diff(
|
864
|
-
current_state:
|
913
|
+
current_state: Sequence[AbstractUpgradePolicy],
|
865
914
|
desired_state: OrganizationUpgradeSpec,
|
866
915
|
ocm_api: OCMBaseClient,
|
867
916
|
version_data: VersionData,
|
@@ -1042,3 +1091,76 @@ def get_orgs_for_environment(
|
|
1042
1091
|
or org.org_id not in excluded_ocm_organization_ids
|
1043
1092
|
)
|
1044
1093
|
]
|
1094
|
+
|
1095
|
+
|
1096
|
+
def remaining_soak_day_metric_values_for_cluster(
|
1097
|
+
spec: ClusterUpgradeSpec,
|
1098
|
+
soaked_versions: dict[str, float],
|
1099
|
+
current_upgrade: Optional[AbstractUpgradePolicy],
|
1100
|
+
) -> dict[str, float]:
|
1101
|
+
"""
|
1102
|
+
Calculate what versions and metric values to report for `AUS*VersionRemainingSoakDaysGauge` metrics.
|
1103
|
+
Usually, the remaining soak days for a version are reported but there are some special cases
|
1104
|
+
where we report negative values to indicate that a version is blocked or an upgrade has been
|
1105
|
+
scheduled or started.
|
1106
|
+
|
1107
|
+
Additionally certain versions are not reported when it is not meaningful (e.g. an upgrade will never happen)
|
1108
|
+
to prevent metric clutter.
|
1109
|
+
"""
|
1110
|
+
upgrades = spec.get_available_upgrades()
|
1111
|
+
if not upgrades:
|
1112
|
+
return {}
|
1113
|
+
|
1114
|
+
# calculate the remaining soakdays for each upgrade version candidate of the cluster.
|
1115
|
+
# when a version is soaking, it has a value > 0 and when it soaked enough, the value is 0.
|
1116
|
+
remaining_soakdays: list[float] = [
|
1117
|
+
max(
|
1118
|
+
(spec.upgrade_policy.conditions.soak_days or 0) - soaked_versions.get(v, 0),
|
1119
|
+
0,
|
1120
|
+
)
|
1121
|
+
for v in upgrades
|
1122
|
+
]
|
1123
|
+
|
1124
|
+
# under certain conditions, the remaining soak day value for a version needs to be
|
1125
|
+
# replaced with special marker values
|
1126
|
+
version_metrics: dict[str, float] = {}
|
1127
|
+
for idx, version in reversed(list(enumerate(upgrades))):
|
1128
|
+
# if an upgrade is `scheduled` or `started`` for the specific version, their respective negative
|
1129
|
+
# marker values will be used instead of their actual soak days. there are other states than `scheduled`
|
1130
|
+
# and `started` but the `UpgradePolicy` vanishes too quickly to observe them reliably, when such
|
1131
|
+
# states are reached.
|
1132
|
+
if current_upgrade and current_upgrade.version == version:
|
1133
|
+
if current_upgrade.state == "scheduled":
|
1134
|
+
remaining_soakdays[idx] = UPGRADE_SCHEDULED_METRIC_VALUE
|
1135
|
+
elif current_upgrade.state in ("started", "delayed"):
|
1136
|
+
remaining_soakdays[idx] = UPGRADE_STARTED_METRIC_VALUE
|
1137
|
+
if current_upgrade.next_run:
|
1138
|
+
# if an upgrade runs for over 6 hours, we mark it as a long running upgrade
|
1139
|
+
next_run = datetime.strptime(
|
1140
|
+
current_upgrade.next_run, "%Y-%m-%dT%H:%M:%SZ"
|
1141
|
+
)
|
1142
|
+
now = datetime.utcnow()
|
1143
|
+
hours_ago = (now - next_run).total_seconds() / 3600
|
1144
|
+
if hours_ago >= 6:
|
1145
|
+
remaining_soakdays[idx] = UPGRADE_LONG_RUNNING_METRIC_VALUE
|
1146
|
+
elif spec.version_blocked(version):
|
1147
|
+
# if a version is blocked, we will still report it but with a dedicated negative marker value
|
1148
|
+
remaining_soakdays[idx] = UPGRADE_BLOCKED_METRIC_VALUE
|
1149
|
+
|
1150
|
+
# we are intentionally not reporting versions that still soak or soaked enough when
|
1151
|
+
# there is a later version that also soaked enough. the later one will be picked
|
1152
|
+
# for an upgrade over the older one anyways.
|
1153
|
+
if remaining_soakdays[idx] >= 0 and any(
|
1154
|
+
later_version_remaining_soak_days
|
1155
|
+
in (
|
1156
|
+
0,
|
1157
|
+
UPGRADE_SCHEDULED_METRIC_VALUE,
|
1158
|
+
UPGRADE_STARTED_METRIC_VALUE,
|
1159
|
+
UPGRADE_LONG_RUNNING_METRIC_VALUE,
|
1160
|
+
)
|
1161
|
+
for later_version_remaining_soak_days in remaining_soakdays[idx + 1 :]
|
1162
|
+
):
|
1163
|
+
continue
|
1164
|
+
version_metrics[version] = remaining_soakdays[idx]
|
1165
|
+
|
1166
|
+
return version_metrics
|
reconcile/aus/metrics.py
CHANGED
@@ -42,6 +42,16 @@ class AUSClusterVersionRemainingSoakDaysGauge(AUSBaseMetric, GaugeMetric):
|
|
42
42
|
return "aus_cluster_version_remaining_soak_days"
|
43
43
|
|
44
44
|
|
45
|
+
class AUSAddonVersionRemainingSoakDaysGauge(AUSClusterVersionRemainingSoakDaysGauge):
|
46
|
+
"Remaining days a version needs to soak for an addon on a cluster"
|
47
|
+
|
48
|
+
addon: str
|
49
|
+
|
50
|
+
@classmethod
|
51
|
+
def name(cls) -> str:
|
52
|
+
return "aus_addon_version_remaining_soak_days"
|
53
|
+
|
54
|
+
|
45
55
|
class AUSClusterUpgradePolicyInfoMetric(AUSBaseMetric, InfoMetric):
|
46
56
|
"Info metric for clusters under AUS upgrade control"
|
47
57
|
|
@@ -62,6 +72,18 @@ class AUSClusterUpgradePolicyInfoMetric(AUSBaseMetric, InfoMetric):
|
|
62
72
|
return "aus_cluster_upgrade_policy_info"
|
63
73
|
|
64
74
|
|
75
|
+
class AUSAddonUpgradePolicyInfoMetric(
|
76
|
+
AUSClusterUpgradePolicyInfoMetric
|
77
|
+
): # pylint: disable=R0901
|
78
|
+
"Info metric for cluster addons under AUS upgrade control"
|
79
|
+
|
80
|
+
addon: str
|
81
|
+
|
82
|
+
@classmethod
|
83
|
+
def name(cls) -> str:
|
84
|
+
return "aus_addon_upgrade_policy_info"
|
85
|
+
|
86
|
+
|
65
87
|
class AUSOrganizationValidationErrorsGauge(AUSBaseMetric, GaugeMetric):
|
66
88
|
"Current validation errors within an OCM organization"
|
67
89
|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
import functools
|
2
|
+
|
1
3
|
from pydantic import BaseModel
|
2
4
|
|
3
5
|
from reconcile.aus import base as aus
|
@@ -6,7 +8,10 @@ from reconcile.aus.base import (
|
|
6
8
|
AddonUpgradePolicy,
|
7
9
|
)
|
8
10
|
from reconcile.aus.cluster_version_data import VersionData
|
9
|
-
from reconcile.aus.metrics import
|
11
|
+
from reconcile.aus.metrics import (
|
12
|
+
AUSAddonUpgradePolicyInfoMetric,
|
13
|
+
AUSAddonVersionRemainingSoakDaysGauge,
|
14
|
+
)
|
10
15
|
from reconcile.aus.models import (
|
11
16
|
ClusterAddonUpgradeSpec,
|
12
17
|
ClusterUpgradeSpec,
|
@@ -62,6 +67,7 @@ class OCMAddonsUpgradeSchedulerOrgIntegration(
|
|
62
67
|
for spec in org_upgrade_spec.specs
|
63
68
|
if isinstance(spec, ClusterAddonUpgradeSpec)
|
64
69
|
}
|
70
|
+
|
65
71
|
for addon_id in addons:
|
66
72
|
addon_org_upgrade_spec = OrganizationUpgradeSpec(
|
67
73
|
org=org_upgrade_spec.org,
|
@@ -79,12 +85,26 @@ class OCMAddonsUpgradeSchedulerOrgIntegration(
|
|
79
85
|
integration=self.name,
|
80
86
|
).get(org_upgrade_spec.org.environment.name, org_upgrade_spec.org.org_id)
|
81
87
|
|
88
|
+
addon_current_state: list[AddonUpgradePolicy] = [
|
89
|
+
s
|
90
|
+
for s in current_state
|
91
|
+
if isinstance(s, AddonUpgradePolicy) and s.addon_id == addon_id
|
92
|
+
]
|
93
|
+
|
94
|
+
self.expose_remaining_soak_day_metrics(
|
95
|
+
org_upgrade_spec=org_upgrade_spec,
|
96
|
+
version_data=version_data,
|
97
|
+
current_state=addon_current_state,
|
98
|
+
metrics_builder=functools.partial(
|
99
|
+
AUSAddonVersionRemainingSoakDaysGauge,
|
100
|
+
integration=self.name,
|
101
|
+
ocm_env=org_upgrade_spec.org.environment.name,
|
102
|
+
addon=addon_id,
|
103
|
+
),
|
104
|
+
)
|
105
|
+
|
82
106
|
diffs = calculate_diff(
|
83
|
-
addon_current_state=
|
84
|
-
s
|
85
|
-
for s in current_state
|
86
|
-
if isinstance(s, AddonUpgradePolicy) and s.addon_id == addon_id
|
87
|
-
],
|
107
|
+
addon_current_state=addon_current_state,
|
88
108
|
org_upgrade_spec=addon_org_upgrade_spec,
|
89
109
|
ocm_api=ocm_api,
|
90
110
|
version_data=version_data,
|
@@ -169,17 +189,34 @@ class OCMAddonsUpgradeSchedulerOrgIntegration(
|
|
169
189
|
def expose_org_upgrade_spec_metrics(
|
170
190
|
self, ocm_env: str, org_upgrade_spec: OrganizationUpgradeSpec
|
171
191
|
) -> None:
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
192
|
+
for cluster_upgrade_spec in org_upgrade_spec.specs:
|
193
|
+
if not isinstance(cluster_upgrade_spec, ClusterAddonUpgradeSpec):
|
194
|
+
continue
|
195
|
+
mutexes = cluster_upgrade_spec.upgrade_policy.conditions.mutexes
|
196
|
+
metrics.set_info(
|
197
|
+
AUSAddonUpgradePolicyInfoMetric(
|
198
|
+
integration=self.name,
|
199
|
+
ocm_env=ocm_env,
|
200
|
+
cluster_uuid=cluster_upgrade_spec.cluster_uuid,
|
201
|
+
org_id=cluster_upgrade_spec.org.org_id,
|
202
|
+
org_name=org_upgrade_spec.org.name,
|
203
|
+
channel=cluster_upgrade_spec.cluster.version.channel_group,
|
204
|
+
current_version=cluster_upgrade_spec.current_version,
|
205
|
+
cluster_name=cluster_upgrade_spec.name,
|
206
|
+
schedule=cluster_upgrade_spec.upgrade_policy.schedule,
|
207
|
+
sector=cluster_upgrade_spec.upgrade_policy.conditions.sector or "",
|
208
|
+
mutexes=",".join(mutexes) if mutexes else "",
|
209
|
+
soak_days=str(
|
210
|
+
cluster_upgrade_spec.upgrade_policy.conditions.soak_days or 0
|
211
|
+
),
|
212
|
+
workloads=",".join(cluster_upgrade_spec.upgrade_policy.workloads),
|
213
|
+
addon=cluster_upgrade_spec.addon.id,
|
214
|
+
),
|
177
215
|
)
|
178
|
-
)
|
179
216
|
|
180
217
|
|
181
218
|
def calculate_diff(
|
182
|
-
addon_current_state: list[
|
219
|
+
addon_current_state: list[AddonUpgradePolicy],
|
183
220
|
org_upgrade_spec: OrganizationUpgradeSpec,
|
184
221
|
ocm_api: OCMBaseClient,
|
185
222
|
version_data: VersionData,
|
@@ -193,11 +230,7 @@ def calculate_diff(
|
|
193
230
|
addon_id,
|
194
231
|
)
|
195
232
|
for current in addon_current_state:
|
196
|
-
if
|
197
|
-
isinstance(current, AddonUpgradePolicy)
|
198
|
-
and addon_id == current.addon_id
|
199
|
-
and current.schedule_type == "automatic"
|
200
|
-
):
|
233
|
+
if addon_id == current.addon_id and current.schedule_type == "automatic":
|
201
234
|
diffs.append(
|
202
235
|
aus.UpgradePolicyHandler(
|
203
236
|
action="delete",
|
@@ -1,22 +1,13 @@
|
|
1
|
+
import functools
|
1
2
|
from abc import ABC
|
2
|
-
from datetime import datetime
|
3
|
-
from typing import Optional
|
4
3
|
|
5
4
|
from reconcile.aus import base as aus
|
6
5
|
from reconcile.aus.cluster_version_data import VersionData
|
7
6
|
from reconcile.aus.metrics import (
|
8
|
-
UPGRADE_BLOCKED_METRIC_VALUE,
|
9
|
-
UPGRADE_LONG_RUNNING_METRIC_VALUE,
|
10
|
-
UPGRADE_SCHEDULED_METRIC_VALUE,
|
11
|
-
UPGRADE_STARTED_METRIC_VALUE,
|
12
7
|
AUSClusterVersionRemainingSoakDaysGauge,
|
13
8
|
AUSOrganizationVersionDataGauge,
|
14
9
|
)
|
15
|
-
from reconcile.aus.models import
|
16
|
-
ClusterUpgradeSpec,
|
17
|
-
OrganizationUpgradeSpec,
|
18
|
-
)
|
19
|
-
from reconcile.gql_definitions.fragments.ocm_environment import OCMEnvironment
|
10
|
+
from reconcile.aus.models import OrganizationUpgradeSpec
|
20
11
|
from reconcile.utils import metrics
|
21
12
|
from reconcile.utils.ocm import (
|
22
13
|
OCM_PRODUCT_OSD,
|
@@ -63,10 +54,14 @@ class OCMClusterUpgradeSchedulerIntegration(
|
|
63
54
|
version_data=version_data,
|
64
55
|
)
|
65
56
|
self.expose_remaining_soak_day_metrics(
|
66
|
-
ocm_env=org_upgrade_spec.org.environment.name,
|
67
57
|
org_upgrade_spec=org_upgrade_spec,
|
68
58
|
version_data=version_data,
|
69
59
|
current_state=current_state,
|
60
|
+
metrics_builder=functools.partial(
|
61
|
+
AUSClusterVersionRemainingSoakDaysGauge,
|
62
|
+
integration=self.name,
|
63
|
+
ocm_env=org_upgrade_spec.org.environment.name,
|
64
|
+
),
|
70
65
|
)
|
71
66
|
|
72
67
|
diffs = aus.calculate_diff(
|
@@ -74,15 +69,6 @@ class OCMClusterUpgradeSchedulerIntegration(
|
|
74
69
|
)
|
75
70
|
aus.act(dry_run, diffs, ocm_api)
|
76
71
|
|
77
|
-
def get_ocm_env_upgrade_specs(
|
78
|
-
self, ocm_env: OCMEnvironment
|
79
|
-
) -> dict[str, OrganizationUpgradeSpec]:
|
80
|
-
raise NotImplementedError(
|
81
|
-
"Don't use ocm-upgrade-scheduler anymore but use: \n"
|
82
|
-
"* ocm-label to transfer upgrade policies to OCM subscription labels \n"
|
83
|
-
"* advanced-upgrade-service to drive upgrade policies based on OCM subscription labels"
|
84
|
-
)
|
85
|
-
|
86
72
|
def expose_version_data_metrics(
|
87
73
|
self,
|
88
74
|
ocm_env: str,
|
@@ -101,117 +87,3 @@ class OCMClusterUpgradeSchedulerIntegration(
|
|
101
87
|
),
|
102
88
|
workload_history.soak_days,
|
103
89
|
)
|
104
|
-
|
105
|
-
def expose_remaining_soak_day_metrics(
|
106
|
-
self,
|
107
|
-
ocm_env: str,
|
108
|
-
org_upgrade_spec: OrganizationUpgradeSpec,
|
109
|
-
version_data: VersionData,
|
110
|
-
current_state: list[aus.AbstractUpgradePolicy],
|
111
|
-
) -> None:
|
112
|
-
current_cluster_upgrade_policies = {
|
113
|
-
p.cluster.external_id: p for p in current_state
|
114
|
-
}
|
115
|
-
for spec in org_upgrade_spec.specs:
|
116
|
-
upgrades = spec.get_available_upgrades()
|
117
|
-
if not upgrades:
|
118
|
-
continue
|
119
|
-
|
120
|
-
# calculate the amount every version has soaked. if a version has soaked for
|
121
|
-
# multiple workloads, we will pick the minimum soak day value of all workloads
|
122
|
-
# relevant on the cluster.
|
123
|
-
soaked_versions: dict[str, float] = {}
|
124
|
-
for workload in spec.upgrade_policy.workloads:
|
125
|
-
for version, soak_days in aus.soaking_days(
|
126
|
-
version_data, upgrades, workload, False
|
127
|
-
).items():
|
128
|
-
soaked_versions[version] = min(
|
129
|
-
soak_days, soaked_versions.get(version, soak_days)
|
130
|
-
)
|
131
|
-
|
132
|
-
current_upgrade = current_cluster_upgrade_policies.get(spec.cluster_uuid)
|
133
|
-
for version, metric_value in remaining_soak_day_metric_values_for_cluster(
|
134
|
-
spec, soaked_versions, current_upgrade
|
135
|
-
).items():
|
136
|
-
metrics.set_gauge(
|
137
|
-
AUSClusterVersionRemainingSoakDaysGauge(
|
138
|
-
integration=self.name,
|
139
|
-
ocm_env=ocm_env,
|
140
|
-
cluster_uuid=spec.cluster.external_id,
|
141
|
-
soaking_version=version,
|
142
|
-
),
|
143
|
-
metric_value,
|
144
|
-
)
|
145
|
-
|
146
|
-
|
147
|
-
def remaining_soak_day_metric_values_for_cluster(
|
148
|
-
spec: ClusterUpgradeSpec,
|
149
|
-
soaked_versions: dict[str, float],
|
150
|
-
current_upgrade: Optional[aus.AbstractUpgradePolicy],
|
151
|
-
) -> dict[str, float]:
|
152
|
-
"""
|
153
|
-
Calculate what versions and metric values to report for `AUSClusterVersionRemainingSoakDaysGauge`.
|
154
|
-
Usually, the remaining soak days for a version are reported but there are some special cases
|
155
|
-
where we report negative values to indicate that a version is blocked or an upgrade has been
|
156
|
-
scheduled or started.
|
157
|
-
|
158
|
-
Additionally certain versions are not reported when it is not meaningful (e.g. an upgrade will never happen)
|
159
|
-
to prevent metric clutter.
|
160
|
-
"""
|
161
|
-
upgrades = spec.get_available_upgrades()
|
162
|
-
if not upgrades:
|
163
|
-
return {}
|
164
|
-
|
165
|
-
# calculate the remaining soakdays for each upgrade version candidate of the cluster.
|
166
|
-
# when a version is soaking, it has a value > 0 and when it soaked enough, the value is 0.
|
167
|
-
remaining_soakdays: list[float] = [
|
168
|
-
max(
|
169
|
-
(spec.upgrade_policy.conditions.soak_days or 0) - soaked_versions.get(v, 0),
|
170
|
-
0,
|
171
|
-
)
|
172
|
-
for v in upgrades
|
173
|
-
]
|
174
|
-
|
175
|
-
# under certain conditions, the remaining soak day value for a version needs to be
|
176
|
-
# replaced with special marker values
|
177
|
-
version_metrics: dict[str, float] = {}
|
178
|
-
for idx, version in reversed(list(enumerate(upgrades))):
|
179
|
-
# if an upgrade is `scheduled` or `started`` for the specific version, their respective negative
|
180
|
-
# marker values will be used instead of their actual soak days. there are other states than `scheduled`
|
181
|
-
# and `started` but the `UpgradePolicy` vanishes too quickly to observe them reliably, when such
|
182
|
-
# states are reached.
|
183
|
-
if current_upgrade and current_upgrade.version == version:
|
184
|
-
if current_upgrade.state == "scheduled":
|
185
|
-
remaining_soakdays[idx] = UPGRADE_SCHEDULED_METRIC_VALUE
|
186
|
-
elif current_upgrade.state in ("started", "delayed"):
|
187
|
-
remaining_soakdays[idx] = UPGRADE_STARTED_METRIC_VALUE
|
188
|
-
if current_upgrade.next_run:
|
189
|
-
# if an upgrade runs for over 6 hours, we mark it as a long running upgrade
|
190
|
-
next_run = datetime.strptime(
|
191
|
-
current_upgrade.next_run, "%Y-%m-%dT%H:%M:%SZ"
|
192
|
-
)
|
193
|
-
now = datetime.utcnow()
|
194
|
-
hours_ago = (now - next_run).total_seconds() / 3600
|
195
|
-
if hours_ago >= 6:
|
196
|
-
remaining_soakdays[idx] = UPGRADE_LONG_RUNNING_METRIC_VALUE
|
197
|
-
elif spec.version_blocked(version):
|
198
|
-
# if a version is blocked, we will still report it but with a dedicated negative marker value
|
199
|
-
remaining_soakdays[idx] = UPGRADE_BLOCKED_METRIC_VALUE
|
200
|
-
|
201
|
-
# we are intentionally not reporting versions that still soak or soaked enough when
|
202
|
-
# there is a later version that also soaked enough. the later one will be picked
|
203
|
-
# for an upgrade over the older one anyways.
|
204
|
-
if remaining_soakdays[idx] >= 0 and any(
|
205
|
-
later_version_remaining_soak_days
|
206
|
-
in (
|
207
|
-
0,
|
208
|
-
UPGRADE_SCHEDULED_METRIC_VALUE,
|
209
|
-
UPGRADE_STARTED_METRIC_VALUE,
|
210
|
-
UPGRADE_LONG_RUNNING_METRIC_VALUE,
|
211
|
-
)
|
212
|
-
for later_version_remaining_soak_days in remaining_soakdays[idx + 1 :]
|
213
|
-
):
|
214
|
-
continue
|
215
|
-
version_metrics[version] = remaining_soakdays[idx]
|
216
|
-
|
217
|
-
return version_metrics
|
File without changes
|
File without changes
|
{qontract_reconcile-0.10.1rc418.dist-info → qontract_reconcile-0.10.1rc419.dist-info}/top_level.txt
RENAMED
File without changes
|