qontract-reconcile 0.10.2.dev167__py3-none-any.whl → 0.10.2.dev168__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {qontract_reconcile-0.10.2.dev167.dist-info → qontract_reconcile-0.10.2.dev168.dist-info}/METADATA +1 -1
- {qontract_reconcile-0.10.2.dev167.dist-info → qontract_reconcile-0.10.2.dev168.dist-info}/RECORD +10 -8
- reconcile/dashdotdb_slo.py +45 -156
- reconcile/gql_definitions/common/saas_files.py +49 -0
- reconcile/gql_definitions/dashdotdb_slo/slo_documents_query.py +15 -67
- reconcile/gql_definitions/fragments/saas_slo_document.py +82 -0
- reconcile/gql_definitions/introspection.json +249 -229
- reconcile/utils/slo_document_manager.py +278 -0
- {qontract_reconcile-0.10.2.dev167.dist-info → qontract_reconcile-0.10.2.dev168.dist-info}/WHEEL +0 -0
- {qontract_reconcile-0.10.2.dev167.dist-info → qontract_reconcile-0.10.2.dev168.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,278 @@
|
|
1
|
+
import itertools
|
2
|
+
import logging
|
3
|
+
from dataclasses import dataclass
|
4
|
+
from math import isnan
|
5
|
+
from typing import Any, Self
|
6
|
+
|
7
|
+
import jinja2
|
8
|
+
import requests
|
9
|
+
from sretoolbox.utils import threaded
|
10
|
+
|
11
|
+
from reconcile.gql_definitions.fragments.saas_slo_document import (
|
12
|
+
SLODocument,
|
13
|
+
SLODocumentSLOV1,
|
14
|
+
SLOExternalPrometheusAccessV1,
|
15
|
+
SLONamespacesV1,
|
16
|
+
)
|
17
|
+
from reconcile.utils.rest_api_base import ApiBase, BearerTokenAuth
|
18
|
+
from reconcile.utils.secret_reader import SecretReaderBase
|
19
|
+
|
20
|
+
PROM_QUERY_URL = "api/v1/query"
|
21
|
+
|
22
|
+
DEFAULT_READ_TIMEOUT = 30
|
23
|
+
DEFAULT_RETRIES = 3
|
24
|
+
DEFAULT_THREAD_POOL_SIZE = 10
|
25
|
+
|
26
|
+
|
27
|
+
class EmptySLOResult(Exception):
|
28
|
+
pass
|
29
|
+
|
30
|
+
|
31
|
+
class EmptySLOValue(Exception):
|
32
|
+
pass
|
33
|
+
|
34
|
+
|
35
|
+
class InvalidSLOValue(Exception):
|
36
|
+
pass
|
37
|
+
|
38
|
+
|
39
|
+
@dataclass
|
40
|
+
class SLODetails:
|
41
|
+
namespace_name: str
|
42
|
+
slo_document_name: str
|
43
|
+
cluster_name: str
|
44
|
+
slo: SLODocumentSLOV1
|
45
|
+
service_name: str
|
46
|
+
current_slo_value: float
|
47
|
+
|
48
|
+
|
49
|
+
@dataclass
|
50
|
+
class NamespaceSLODocument:
|
51
|
+
name: str
|
52
|
+
namespace: SLONamespacesV1
|
53
|
+
slos: list[SLODocumentSLOV1] | None
|
54
|
+
|
55
|
+
def get_host_url(self) -> str:
|
56
|
+
return (
|
57
|
+
self.namespace.prometheus_access.url
|
58
|
+
if self.namespace.prometheus_access
|
59
|
+
else self.namespace.namespace.cluster.prometheus_url
|
60
|
+
)
|
61
|
+
|
62
|
+
|
63
|
+
class PrometheusClient(ApiBase):
|
64
|
+
def get_current_slo_value(
|
65
|
+
self,
|
66
|
+
slo: SLODocumentSLOV1,
|
67
|
+
slo_document_name: str,
|
68
|
+
namespace_name: str,
|
69
|
+
service_name: str,
|
70
|
+
cluster_name: str,
|
71
|
+
) -> SLODetails | None:
|
72
|
+
"""
|
73
|
+
Retrieve the current SLO value from Prometheus for provided SLO configuration.
|
74
|
+
Returns an SLODetails instance if successful, or None on error.
|
75
|
+
"""
|
76
|
+
template = jinja2.Template(slo.expr)
|
77
|
+
prom_query = template.render({"window": slo.slo_parameters.window})
|
78
|
+
try:
|
79
|
+
current_slo_response = self._get(
|
80
|
+
url=PROM_QUERY_URL, params={"query": (prom_query)}
|
81
|
+
)
|
82
|
+
current_slo_value = self._extract_current_slo_value(
|
83
|
+
data=current_slo_response
|
84
|
+
)
|
85
|
+
return SLODetails(
|
86
|
+
namespace_name=namespace_name,
|
87
|
+
slo=slo,
|
88
|
+
slo_document_name=slo_document_name,
|
89
|
+
current_slo_value=current_slo_value,
|
90
|
+
cluster_name=cluster_name,
|
91
|
+
service_name=service_name,
|
92
|
+
)
|
93
|
+
except requests.exceptions.ConnectionError:
|
94
|
+
logging.error(
|
95
|
+
f"Connection error getting current value for SLO: {slo.name} of document: {slo_document_name} for namespace: {namespace_name}"
|
96
|
+
)
|
97
|
+
raise
|
98
|
+
except Exception as e:
|
99
|
+
logging.error(
|
100
|
+
f"Unexpected error getting current value for SLO: {slo.name} of document: {slo_document_name} for namespace: {namespace_name} details: {e}"
|
101
|
+
)
|
102
|
+
return None
|
103
|
+
|
104
|
+
def _extract_current_slo_value(self, data: dict[str, Any]) -> float:
|
105
|
+
result = data["data"]["result"]
|
106
|
+
if not result:
|
107
|
+
raise EmptySLOResult("prometheus returned empty result")
|
108
|
+
slo_value = result[0]["value"]
|
109
|
+
if not slo_value:
|
110
|
+
raise EmptySLOValue("prometheus returned empty SLO value")
|
111
|
+
slo_value = float(slo_value[1])
|
112
|
+
if isnan(slo_value):
|
113
|
+
raise InvalidSLOValue("slo value should be a number")
|
114
|
+
return slo_value
|
115
|
+
|
116
|
+
|
117
|
+
class PrometheusClientMap:
|
118
|
+
"""
|
119
|
+
A mapping from Prometheus URLs to PrometheusClient instances.
|
120
|
+
"""
|
121
|
+
|
122
|
+
def __init__(
|
123
|
+
self,
|
124
|
+
secret_reader: SecretReaderBase,
|
125
|
+
namespace_slo_documents: list[NamespaceSLODocument],
|
126
|
+
read_timeout: int = DEFAULT_READ_TIMEOUT,
|
127
|
+
max_retries: int = DEFAULT_RETRIES,
|
128
|
+
):
|
129
|
+
self.secret_reader = secret_reader
|
130
|
+
self.read_timeout = read_timeout
|
131
|
+
self.max_retries = max_retries
|
132
|
+
self.pc_map: dict[str, PrometheusClient] = self._build_pc_map(
|
133
|
+
namespace_slo_documents
|
134
|
+
)
|
135
|
+
|
136
|
+
def __enter__(self) -> Self:
|
137
|
+
return self
|
138
|
+
|
139
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
140
|
+
self.cleanup()
|
141
|
+
|
142
|
+
def get_prometheus_client(self, prom_url: str) -> PrometheusClient:
|
143
|
+
return self.pc_map[prom_url]
|
144
|
+
|
145
|
+
def _build_pc_map(
|
146
|
+
self, namespace_slo_documents: list[NamespaceSLODocument]
|
147
|
+
) -> dict[str, PrometheusClient]:
|
148
|
+
pc_map: dict[str, PrometheusClient] = {}
|
149
|
+
for doc in namespace_slo_documents:
|
150
|
+
key = doc.get_host_url()
|
151
|
+
if key not in pc_map:
|
152
|
+
prom_client = self.build_prom_client_from_namespace(doc.namespace)
|
153
|
+
pc_map[key] = prom_client
|
154
|
+
return pc_map
|
155
|
+
|
156
|
+
def cleanup(self) -> None:
|
157
|
+
for prom_client in self.pc_map.values():
|
158
|
+
prom_client.cleanup()
|
159
|
+
|
160
|
+
def build_auth_for_prometheus_access(
|
161
|
+
self, prometheus_access: SLOExternalPrometheusAccessV1
|
162
|
+
) -> requests.auth.HTTPBasicAuth | None:
|
163
|
+
"""
|
164
|
+
Build authentication for Prometheus endpoint referred in prometheusAccess section.
|
165
|
+
"""
|
166
|
+
if prometheus_access.username and prometheus_access.password:
|
167
|
+
username = self.secret_reader.read_secret(prometheus_access.username)
|
168
|
+
password = self.secret_reader.read_secret(prometheus_access.password)
|
169
|
+
return requests.auth.HTTPBasicAuth(username, password)
|
170
|
+
return None
|
171
|
+
|
172
|
+
def build_prom_client_from_namespace(
|
173
|
+
self, namespace: SLONamespacesV1
|
174
|
+
) -> PrometheusClient:
|
175
|
+
auth: requests.auth.HTTPBasicAuth | BearerTokenAuth | None
|
176
|
+
if namespace.prometheus_access:
|
177
|
+
prom_url = namespace.prometheus_access.url
|
178
|
+
auth = self.build_auth_for_prometheus_access(namespace.prometheus_access)
|
179
|
+
return PrometheusClient(
|
180
|
+
host=prom_url,
|
181
|
+
read_timeout=self.read_timeout,
|
182
|
+
max_retries=self.max_retries,
|
183
|
+
auth=auth,
|
184
|
+
)
|
185
|
+
if not namespace.namespace.cluster.automation_token:
|
186
|
+
raise Exception(
|
187
|
+
f"cluster {namespace.namespace.cluster.name} does not have automation token set"
|
188
|
+
)
|
189
|
+
auth = BearerTokenAuth(
|
190
|
+
self.secret_reader.read_secret(namespace.namespace.cluster.automation_token)
|
191
|
+
)
|
192
|
+
return PrometheusClient(
|
193
|
+
host=namespace.namespace.cluster.prometheus_url,
|
194
|
+
read_timeout=self.read_timeout,
|
195
|
+
max_retries=self.max_retries,
|
196
|
+
auth=auth,
|
197
|
+
)
|
198
|
+
|
199
|
+
|
200
|
+
class SLODocumentManager:
|
201
|
+
"""
|
202
|
+
Manages SLO document including authentication, querying, and SLO value extraction.
|
203
|
+
"""
|
204
|
+
|
205
|
+
def __init__(
|
206
|
+
self,
|
207
|
+
slo_documents: list[SLODocument],
|
208
|
+
secret_reader: SecretReaderBase,
|
209
|
+
thread_pool_size: int = DEFAULT_THREAD_POOL_SIZE,
|
210
|
+
read_timeout: int = DEFAULT_READ_TIMEOUT,
|
211
|
+
max_retries: int = DEFAULT_RETRIES,
|
212
|
+
):
|
213
|
+
self.namespace_slo_documents = self._build_namespace_slo_documents(
|
214
|
+
slo_documents
|
215
|
+
)
|
216
|
+
self.thread_pool_size = thread_pool_size
|
217
|
+
self.secret_reader = secret_reader
|
218
|
+
self.max_retries = max_retries
|
219
|
+
self.read_timeout = read_timeout
|
220
|
+
|
221
|
+
@staticmethod
|
222
|
+
def _build_namespace_slo_documents(
|
223
|
+
slo_documents: list[SLODocument],
|
224
|
+
) -> list[NamespaceSLODocument]:
|
225
|
+
return [
|
226
|
+
NamespaceSLODocument(
|
227
|
+
name=slo_document.name,
|
228
|
+
namespace=namespace,
|
229
|
+
slos=slo_document.slos,
|
230
|
+
)
|
231
|
+
for slo_document in slo_documents
|
232
|
+
for namespace in slo_document.namespaces
|
233
|
+
]
|
234
|
+
|
235
|
+
def get_current_slo_list(self) -> list[SLODetails | None]:
|
236
|
+
with PrometheusClientMap(
|
237
|
+
secret_reader=self.secret_reader,
|
238
|
+
namespace_slo_documents=self.namespace_slo_documents,
|
239
|
+
read_timeout=self.read_timeout,
|
240
|
+
max_retries=self.max_retries,
|
241
|
+
) as pc_map:
|
242
|
+
current_slo_list_iterable = threaded.run(
|
243
|
+
func=self._get_current_slo_details_list,
|
244
|
+
pc_map=pc_map,
|
245
|
+
iterable=self.namespace_slo_documents,
|
246
|
+
thread_pool_size=self.thread_pool_size,
|
247
|
+
)
|
248
|
+
return list(itertools.chain.from_iterable(current_slo_list_iterable))
|
249
|
+
|
250
|
+
def get_breached_slos(self) -> list[SLODetails]:
|
251
|
+
current_slo_details_list = self.get_current_slo_list()
|
252
|
+
missing_slos = [slo for slo in current_slo_details_list if not slo]
|
253
|
+
if missing_slos:
|
254
|
+
raise RuntimeError("slo validation failed due to retrival errors")
|
255
|
+
return [
|
256
|
+
slo
|
257
|
+
for slo in current_slo_details_list
|
258
|
+
if slo and slo.current_slo_value < slo.slo.slo_target
|
259
|
+
]
|
260
|
+
|
261
|
+
@staticmethod
|
262
|
+
def _get_current_slo_details_list(
|
263
|
+
slo_document: NamespaceSLODocument,
|
264
|
+
pc_map: PrometheusClientMap,
|
265
|
+
) -> list[SLODetails | None]:
|
266
|
+
key = slo_document.get_host_url()
|
267
|
+
prom_client = pc_map.get_prometheus_client(key)
|
268
|
+
slo_details_list: list[SLODetails | None] = [
|
269
|
+
prom_client.get_current_slo_value(
|
270
|
+
slo=slo,
|
271
|
+
slo_document_name=slo_document.name,
|
272
|
+
namespace_name=slo_document.namespace.namespace.name,
|
273
|
+
service_name=slo_document.namespace.namespace.app.name,
|
274
|
+
cluster_name=slo_document.namespace.namespace.cluster.name,
|
275
|
+
)
|
276
|
+
for slo in slo_document.slos or []
|
277
|
+
]
|
278
|
+
return slo_details_list
|
{qontract_reconcile-0.10.2.dev167.dist-info → qontract_reconcile-0.10.2.dev168.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|