rlwatch 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rlwatch/__init__.py +8 -0
- rlwatch/alerts.py +483 -0
- rlwatch/cli.py +308 -0
- rlwatch/config.py +368 -0
- rlwatch/core.py +380 -0
- rlwatch/dashboard.py +318 -0
- rlwatch/detectors.py +693 -0
- rlwatch/py.typed +0 -0
- rlwatch/storage.py +257 -0
- rlwatch-0.3.0.dist-info/METADATA +293 -0
- rlwatch-0.3.0.dist-info/RECORD +14 -0
- rlwatch-0.3.0.dist-info/WHEEL +5 -0
- rlwatch-0.3.0.dist-info/entry_points.txt +2 -0
- rlwatch-0.3.0.dist-info/top_level.txt +1 -0
rlwatch/__init__.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"""rlwatch - Real-time GRPO/PPO training instability detection."""
|
|
2
|
+
|
|
3
|
+
__version__ = "0.1.0"
|
|
4
|
+
|
|
5
|
+
from rlwatch.core import attach, log_step, get_monitor, RLWatch
|
|
6
|
+
from rlwatch.config import RLWatchConfig, load_config
|
|
7
|
+
|
|
8
|
+
__all__ = ["attach", "log_step", "get_monitor", "RLWatch", "RLWatchConfig", "load_config"]
|
rlwatch/alerts.py
ADDED
|
@@ -0,0 +1,483 @@
|
|
|
1
|
+
"""Alert delivery channels — console, Slack, email, Discord, generic webhook.
|
|
2
|
+
|
|
3
|
+
This module is the only place in the codebase that's allowed to make network
|
|
4
|
+
calls (CLAUDE.md cardinal rule #4). The CI forbidden-pattern grep enforces
|
|
5
|
+
this — all ``urllib.request`` / ``requests`` / ``httpx`` references must live
|
|
6
|
+
here.
|
|
7
|
+
|
|
8
|
+
Every sender follows the same shape:
|
|
9
|
+
- Constructed with config, holds no global state.
|
|
10
|
+
- ``send(alert, run_id)`` is called from a daemon thread by ``AlertManager``.
|
|
11
|
+
- Catches and logs every exception. **Never raises into the training loop.**
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import json
|
|
17
|
+
import logging
|
|
18
|
+
import smtplib
|
|
19
|
+
import string
|
|
20
|
+
import threading
|
|
21
|
+
from datetime import datetime, timezone
|
|
22
|
+
from email.mime.multipart import MIMEMultipart
|
|
23
|
+
from email.mime.text import MIMEText
|
|
24
|
+
from typing import Optional
|
|
25
|
+
|
|
26
|
+
from rlwatch.config import AlertConfig, DiscordConfig, WebhookConfig
|
|
27
|
+
from rlwatch.detectors import Alert
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger("rlwatch.alerts")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class AlertManager:
|
|
33
|
+
"""Manages alert delivery with cooldown and rate limiting."""
|
|
34
|
+
|
|
35
|
+
def __init__(self, config: AlertConfig, run_id: str = ""):
|
|
36
|
+
self.config = config
|
|
37
|
+
self.run_id = run_id
|
|
38
|
+
self._alert_count = 0
|
|
39
|
+
# (detector, severity) -> last step that severity fired. Tracking per
|
|
40
|
+
# severity lets a critical preempt a warning that's still in cooldown.
|
|
41
|
+
self._last_alert_step: dict[tuple[str, str], int] = {}
|
|
42
|
+
# Last step *any* severity fired for this detector — used to honor the
|
|
43
|
+
# warning cooldown against repeated warnings.
|
|
44
|
+
self._last_warning_step: dict[str, int] = {}
|
|
45
|
+
self._slack_client: Optional[_SlackSender] = None
|
|
46
|
+
self._email_client: Optional[_EmailSender] = None
|
|
47
|
+
self._discord_client: Optional[_DiscordSender] = None
|
|
48
|
+
self._webhook_client: Optional[_WebhookSender] = None
|
|
49
|
+
|
|
50
|
+
if config.slack.enabled and config.slack.webhook_url:
|
|
51
|
+
self._slack_client = _SlackSender(config.slack.webhook_url)
|
|
52
|
+
|
|
53
|
+
if config.email.enabled and config.email.to_addrs:
|
|
54
|
+
self._email_client = _EmailSender(
|
|
55
|
+
host=config.email.smtp_host,
|
|
56
|
+
port=config.email.smtp_port,
|
|
57
|
+
user=config.email.smtp_user,
|
|
58
|
+
password=config.email.smtp_password,
|
|
59
|
+
from_addr=config.email.from_addr,
|
|
60
|
+
to_addrs=config.email.to_addrs,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
if config.discord.enabled and config.discord.webhook_url:
|
|
64
|
+
self._discord_client = _DiscordSender(config.discord)
|
|
65
|
+
|
|
66
|
+
if config.webhook.enabled and config.webhook.url:
|
|
67
|
+
self._webhook_client = _WebhookSender(config.webhook)
|
|
68
|
+
|
|
69
|
+
def should_send(self, alert: Alert) -> bool:
|
|
70
|
+
"""Check if an alert should be sent based on cooldown and rate limits.
|
|
71
|
+
|
|
72
|
+
Cooldown semantics:
|
|
73
|
+
* A repeat alert at the same (detector, severity) within
|
|
74
|
+
``cooldown_steps`` is suppressed.
|
|
75
|
+
* A *critical* alert is allowed through even if a warning from the
|
|
76
|
+
same detector is still inside its cooldown window — escalation
|
|
77
|
+
should never be muted by an earlier, lesser alert. The critical
|
|
78
|
+
still respects its own per-severity cooldown.
|
|
79
|
+
"""
|
|
80
|
+
if self._alert_count >= self.config.max_alerts_per_run:
|
|
81
|
+
return False
|
|
82
|
+
|
|
83
|
+
key = (alert.detector, alert.severity)
|
|
84
|
+
last_step = self._last_alert_step.get(key, -self.config.cooldown_steps - 1)
|
|
85
|
+
if alert.step - last_step < self.config.cooldown_steps:
|
|
86
|
+
return False
|
|
87
|
+
|
|
88
|
+
return True
|
|
89
|
+
|
|
90
|
+
def send(self, alert: Alert) -> bool:
|
|
91
|
+
"""Send an alert via all configured channels (non-blocking).
|
|
92
|
+
|
|
93
|
+
Returns True if the alert was actually sent (not suppressed by cooldown).
|
|
94
|
+
"""
|
|
95
|
+
if not self.should_send(alert):
|
|
96
|
+
return False
|
|
97
|
+
|
|
98
|
+
self._alert_count += 1
|
|
99
|
+
self._last_alert_step[(alert.detector, alert.severity)] = alert.step
|
|
100
|
+
|
|
101
|
+
# Log to console always
|
|
102
|
+
_log_alert_console(alert, self.run_id)
|
|
103
|
+
|
|
104
|
+
# Send via configured channels in background threads
|
|
105
|
+
if self._slack_client:
|
|
106
|
+
threading.Thread(
|
|
107
|
+
target=self._slack_client.send,
|
|
108
|
+
args=(alert, self.run_id),
|
|
109
|
+
daemon=True,
|
|
110
|
+
).start()
|
|
111
|
+
|
|
112
|
+
if self._email_client:
|
|
113
|
+
threading.Thread(
|
|
114
|
+
target=self._email_client.send,
|
|
115
|
+
args=(alert, self.run_id),
|
|
116
|
+
daemon=True,
|
|
117
|
+
).start()
|
|
118
|
+
|
|
119
|
+
if self._discord_client:
|
|
120
|
+
threading.Thread(
|
|
121
|
+
target=self._discord_client.send,
|
|
122
|
+
args=(alert, self.run_id),
|
|
123
|
+
daemon=True,
|
|
124
|
+
).start()
|
|
125
|
+
|
|
126
|
+
if self._webhook_client:
|
|
127
|
+
threading.Thread(
|
|
128
|
+
target=self._webhook_client.send,
|
|
129
|
+
args=(alert, self.run_id),
|
|
130
|
+
daemon=True,
|
|
131
|
+
).start()
|
|
132
|
+
|
|
133
|
+
return True
|
|
134
|
+
|
|
135
|
+
@property
|
|
136
|
+
def total_alerts_sent(self) -> int:
|
|
137
|
+
return self._alert_count
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _log_alert_console(alert: Alert, run_id: str):
|
|
141
|
+
"""Log an alert to the console using rich formatting."""
|
|
142
|
+
try:
|
|
143
|
+
from rich.console import Console
|
|
144
|
+
from rich.panel import Panel
|
|
145
|
+
|
|
146
|
+
console = Console(stderr=True)
|
|
147
|
+
severity_color = "red" if alert.severity == "critical" else "yellow"
|
|
148
|
+
title = f"[bold {severity_color}]rlwatch {alert.severity.upper()}: {alert.detector}[/]"
|
|
149
|
+
body = (
|
|
150
|
+
f"[bold]Step {alert.step}[/] | Run: {run_id}\n\n"
|
|
151
|
+
f"{alert.message}\n\n"
|
|
152
|
+
f"[dim]Recommendation:[/] {alert.recommendation}"
|
|
153
|
+
)
|
|
154
|
+
console.print(Panel(body, title=title, border_style=severity_color))
|
|
155
|
+
except ImportError:
|
|
156
|
+
# Fallback without rich
|
|
157
|
+
prefix = "CRITICAL" if alert.severity == "critical" else "WARNING"
|
|
158
|
+
logger.warning(
|
|
159
|
+
"[rlwatch %s] %s at step %d: %s | %s",
|
|
160
|
+
prefix, alert.detector, alert.step, alert.message, alert.recommendation,
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
class _SlackSender:
|
|
165
|
+
"""Sends alerts to Slack via webhook."""
|
|
166
|
+
|
|
167
|
+
def __init__(self, webhook_url: str):
|
|
168
|
+
self.webhook_url = webhook_url
|
|
169
|
+
|
|
170
|
+
def send(self, alert: Alert, run_id: str):
|
|
171
|
+
try:
|
|
172
|
+
from slack_sdk.webhook import WebhookClient
|
|
173
|
+
|
|
174
|
+
client = WebhookClient(self.webhook_url)
|
|
175
|
+
emoji = ":rotating_light:" if alert.severity == "critical" else ":warning:"
|
|
176
|
+
blocks = [
|
|
177
|
+
{
|
|
178
|
+
"type": "header",
|
|
179
|
+
"text": {
|
|
180
|
+
"type": "plain_text",
|
|
181
|
+
"text": f"{emoji} rlwatch {alert.severity.upper()}: {alert.detector}",
|
|
182
|
+
},
|
|
183
|
+
},
|
|
184
|
+
{
|
|
185
|
+
"type": "section",
|
|
186
|
+
"fields": [
|
|
187
|
+
{"type": "mrkdwn", "text": f"*Run:* `{run_id}`"},
|
|
188
|
+
{"type": "mrkdwn", "text": f"*Step:* {alert.step}"},
|
|
189
|
+
],
|
|
190
|
+
},
|
|
191
|
+
{
|
|
192
|
+
"type": "section",
|
|
193
|
+
"text": {
|
|
194
|
+
"type": "mrkdwn",
|
|
195
|
+
"text": alert.message,
|
|
196
|
+
},
|
|
197
|
+
},
|
|
198
|
+
{
|
|
199
|
+
"type": "section",
|
|
200
|
+
"text": {
|
|
201
|
+
"type": "mrkdwn",
|
|
202
|
+
"text": f"*Recommended action:* {alert.recommendation}",
|
|
203
|
+
},
|
|
204
|
+
},
|
|
205
|
+
]
|
|
206
|
+
|
|
207
|
+
# Add metric values as context
|
|
208
|
+
metric_fields = []
|
|
209
|
+
for k, v in alert.metric_values.items():
|
|
210
|
+
if v is not None:
|
|
211
|
+
formatted = f"{v:.4f}" if isinstance(v, float) else str(v)
|
|
212
|
+
metric_fields.append(
|
|
213
|
+
{"type": "mrkdwn", "text": f"`{k}`: {formatted}"}
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
if metric_fields:
|
|
217
|
+
# Slack limits fields to 10
|
|
218
|
+
blocks.append({
|
|
219
|
+
"type": "section",
|
|
220
|
+
"fields": metric_fields[:10],
|
|
221
|
+
})
|
|
222
|
+
|
|
223
|
+
response = client.send(blocks=blocks)
|
|
224
|
+
if response.status_code != 200:
|
|
225
|
+
logger.error("Slack webhook returned %d: %s", response.status_code, response.body)
|
|
226
|
+
except Exception as e:
|
|
227
|
+
logger.error("Failed to send Slack alert: %s", e)
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
class _EmailSender:
|
|
231
|
+
"""Sends alerts via email."""
|
|
232
|
+
|
|
233
|
+
def __init__(
|
|
234
|
+
self,
|
|
235
|
+
host: str,
|
|
236
|
+
port: int,
|
|
237
|
+
user: str,
|
|
238
|
+
password: str,
|
|
239
|
+
from_addr: str,
|
|
240
|
+
to_addrs: list[str],
|
|
241
|
+
):
|
|
242
|
+
self.host = host
|
|
243
|
+
self.port = port
|
|
244
|
+
self.user = user
|
|
245
|
+
self.password = password
|
|
246
|
+
self.from_addr = from_addr
|
|
247
|
+
self.to_addrs = to_addrs
|
|
248
|
+
|
|
249
|
+
def send(self, alert: Alert, run_id: str):
|
|
250
|
+
try:
|
|
251
|
+
msg = MIMEMultipart("alternative")
|
|
252
|
+
msg["Subject"] = f"[rlwatch {alert.severity.upper()}] {alert.detector} — Run {run_id} Step {alert.step}"
|
|
253
|
+
msg["From"] = self.from_addr
|
|
254
|
+
msg["To"] = ", ".join(self.to_addrs)
|
|
255
|
+
|
|
256
|
+
# Plain text
|
|
257
|
+
text = (
|
|
258
|
+
f"rlwatch {alert.severity.upper()}: {alert.detector}\n\n"
|
|
259
|
+
f"Run: {run_id}\n"
|
|
260
|
+
f"Step: {alert.step}\n\n"
|
|
261
|
+
f"{alert.message}\n\n"
|
|
262
|
+
f"Recommendation: {alert.recommendation}\n\n"
|
|
263
|
+
f"Metrics:\n"
|
|
264
|
+
)
|
|
265
|
+
for k, v in alert.metric_values.items():
|
|
266
|
+
if v is not None:
|
|
267
|
+
formatted = f"{v:.4f}" if isinstance(v, float) else str(v)
|
|
268
|
+
text += f" {k}: {formatted}\n"
|
|
269
|
+
|
|
270
|
+
# HTML
|
|
271
|
+
html = f"""
|
|
272
|
+
<html>
|
|
273
|
+
<body>
|
|
274
|
+
<h2 style="color: {'red' if alert.severity == 'critical' else 'orange'}">
|
|
275
|
+
rlwatch {alert.severity.upper()}: {alert.detector}
|
|
276
|
+
</h2>
|
|
277
|
+
<p><strong>Run:</strong> <code>{run_id}</code> | <strong>Step:</strong> {alert.step}</p>
|
|
278
|
+
<p>{alert.message}</p>
|
|
279
|
+
<p><strong>Recommendation:</strong> {alert.recommendation}</p>
|
|
280
|
+
<h3>Metrics</h3>
|
|
281
|
+
<table border="1" cellpadding="5" cellspacing="0">
|
|
282
|
+
"""
|
|
283
|
+
for k, v in alert.metric_values.items():
|
|
284
|
+
if v is not None:
|
|
285
|
+
formatted = f"{v:.4f}" if isinstance(v, float) else str(v)
|
|
286
|
+
html += f"<tr><td><code>{k}</code></td><td>{formatted}</td></tr>"
|
|
287
|
+
html += "</table></body></html>"
|
|
288
|
+
|
|
289
|
+
msg.attach(MIMEText(text, "plain"))
|
|
290
|
+
msg.attach(MIMEText(html, "html"))
|
|
291
|
+
|
|
292
|
+
with smtplib.SMTP(self.host, self.port) as server:
|
|
293
|
+
server.starttls()
|
|
294
|
+
if self.user and self.password:
|
|
295
|
+
server.login(self.user, self.password)
|
|
296
|
+
server.sendmail(self.from_addr, self.to_addrs, msg.as_string())
|
|
297
|
+
|
|
298
|
+
logger.info("Email alert sent to %s", self.to_addrs)
|
|
299
|
+
except Exception as e:
|
|
300
|
+
logger.error("Failed to send email alert: %s", e)
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
# ---------------------------------------------------------------------------
|
|
304
|
+
# Discord webhook sender
|
|
305
|
+
# ---------------------------------------------------------------------------
|
|
306
|
+
class _DiscordSender:
|
|
307
|
+
"""Sends alerts to a Discord channel via the webhook API.
|
|
308
|
+
|
|
309
|
+
Discord webhooks accept JSON at ``https://discord.com/api/webhooks/{id}/{token}``
|
|
310
|
+
with optional ``content`` (plain text), ``embeds`` (rich blocks), ``username``,
|
|
311
|
+
and ``avatar_url`` fields. We use one embed per alert with severity-coded
|
|
312
|
+
color and an emoji-prefixed title.
|
|
313
|
+
"""
|
|
314
|
+
|
|
315
|
+
def __init__(self, config: DiscordConfig):
|
|
316
|
+
self.config = config
|
|
317
|
+
|
|
318
|
+
def send(self, alert: Alert, run_id: str):
|
|
319
|
+
try:
|
|
320
|
+
from urllib.error import HTTPError, URLError
|
|
321
|
+
from urllib.request import Request, urlopen
|
|
322
|
+
|
|
323
|
+
emoji = "🚨" if alert.severity == "critical" else "⚠️"
|
|
324
|
+
color = 0xFF0000 if alert.severity == "critical" else 0xFFA500
|
|
325
|
+
|
|
326
|
+
# Mention configured roles only on critical alerts so warnings
|
|
327
|
+
# don't ping the on-call rotation in the middle of the night.
|
|
328
|
+
mention_content: Optional[str] = None
|
|
329
|
+
if alert.severity == "critical" and self.config.mention_role_ids:
|
|
330
|
+
mention_content = " ".join(
|
|
331
|
+
f"<@&{rid}>" for rid in self.config.mention_role_ids
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
fields = [
|
|
335
|
+
{"name": "Run", "value": f"`{run_id}`", "inline": True},
|
|
336
|
+
{"name": "Step", "value": str(alert.step), "inline": True},
|
|
337
|
+
{
|
|
338
|
+
"name": "Recommended action",
|
|
339
|
+
"value": alert.recommendation,
|
|
340
|
+
"inline": False,
|
|
341
|
+
},
|
|
342
|
+
]
|
|
343
|
+
# Discord caps embed fields at 25; cap our metric overflow at 10
|
|
344
|
+
# to leave headroom and stay readable.
|
|
345
|
+
for k, v in list(alert.metric_values.items())[:10]:
|
|
346
|
+
if v is None:
|
|
347
|
+
continue
|
|
348
|
+
formatted = f"{v:.4f}" if isinstance(v, float) else str(v)
|
|
349
|
+
fields.append(
|
|
350
|
+
{"name": f"`{k}`", "value": formatted, "inline": True}
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
payload: dict = {
|
|
354
|
+
"username": self.config.username,
|
|
355
|
+
"embeds": [
|
|
356
|
+
{
|
|
357
|
+
"title": f"{emoji} rlwatch {alert.severity.upper()}: {alert.detector}",
|
|
358
|
+
"description": alert.message,
|
|
359
|
+
"color": color,
|
|
360
|
+
"fields": fields,
|
|
361
|
+
}
|
|
362
|
+
],
|
|
363
|
+
}
|
|
364
|
+
if self.config.avatar_url:
|
|
365
|
+
payload["avatar_url"] = self.config.avatar_url
|
|
366
|
+
if mention_content:
|
|
367
|
+
payload["content"] = mention_content
|
|
368
|
+
|
|
369
|
+
data = json.dumps(payload).encode("utf-8")
|
|
370
|
+
req = Request(
|
|
371
|
+
self.config.webhook_url,
|
|
372
|
+
data=data,
|
|
373
|
+
headers={"Content-Type": "application/json"},
|
|
374
|
+
method="POST",
|
|
375
|
+
)
|
|
376
|
+
with urlopen(req, timeout=10) as resp:
|
|
377
|
+
# Discord returns 204 No Content on success.
|
|
378
|
+
if resp.status >= 300:
|
|
379
|
+
logger.error("Discord webhook returned %d", resp.status)
|
|
380
|
+
except (HTTPError, URLError) as e:
|
|
381
|
+
logger.error("Failed to send Discord alert: %s", e)
|
|
382
|
+
except Exception as e:
|
|
383
|
+
logger.error("Unexpected Discord send error: %s", e)
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
# ---------------------------------------------------------------------------
|
|
387
|
+
# Generic HTTP webhook sender
|
|
388
|
+
# ---------------------------------------------------------------------------
|
|
389
|
+
_DEFAULT_WEBHOOK_TEMPLATE = """{
|
|
390
|
+
"detector": "${detector}",
|
|
391
|
+
"severity": "${severity}",
|
|
392
|
+
"step": ${step},
|
|
393
|
+
"run_id": "${run_id}",
|
|
394
|
+
"message": "${message}",
|
|
395
|
+
"recommendation": "${recommendation}",
|
|
396
|
+
"metrics": ${metrics_json},
|
|
397
|
+
"timestamp": "${timestamp}"
|
|
398
|
+
}"""
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
def _json_escape(s: str) -> str:
|
|
402
|
+
"""Escape a string so it can be safely substituted into a JSON string slot.
|
|
403
|
+
|
|
404
|
+
Uses ``json.dumps`` and strips the surrounding quotes — that's the
|
|
405
|
+
canonical "give me a JSON-safe string body" trick. Handles quotes,
|
|
406
|
+
backslashes, newlines, control chars, and non-ASCII unicode.
|
|
407
|
+
"""
|
|
408
|
+
if s is None:
|
|
409
|
+
return ""
|
|
410
|
+
return json.dumps(s)[1:-1]
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
class _WebhookSender:
|
|
414
|
+
"""Generic HTTP webhook sender with ``string.Template`` substitution.
|
|
415
|
+
|
|
416
|
+
POSTs (or PUTs) a JSON body to a user-supplied URL. The body is built
|
|
417
|
+
from a ``string.Template`` so users can customize the payload shape for
|
|
418
|
+
whatever downstream system they're feeding (incident tracker, internal
|
|
419
|
+
log aggregator, custom Slack-of-record, etc.).
|
|
420
|
+
|
|
421
|
+
Substitutable fields:
|
|
422
|
+
${detector} — alert.detector
|
|
423
|
+
${severity} — "critical" | "warning"
|
|
424
|
+
${severity_upper} — "CRITICAL" | "WARNING"
|
|
425
|
+
${step} — int (unquoted in default template — numeric slot)
|
|
426
|
+
${message} — alert.message (JSON-escaped)
|
|
427
|
+
${recommendation} — alert.recommendation (JSON-escaped)
|
|
428
|
+
${run_id} — manager run_id
|
|
429
|
+
${timestamp} — ISO8601 UTC at send time
|
|
430
|
+
${metrics_json} — json.dumps(alert.metric_values), unquoted (object slot)
|
|
431
|
+
|
|
432
|
+
The substituted body is validated with ``json.loads`` before sending.
|
|
433
|
+
Invalid JSON is logged and dropped — we never POST something that won't
|
|
434
|
+
parse on the other end.
|
|
435
|
+
"""
|
|
436
|
+
|
|
437
|
+
def __init__(self, config: WebhookConfig):
|
|
438
|
+
self.config = config
|
|
439
|
+
|
|
440
|
+
def send(self, alert: Alert, run_id: str):
|
|
441
|
+
try:
|
|
442
|
+
from urllib.error import HTTPError, URLError
|
|
443
|
+
from urllib.request import Request, urlopen
|
|
444
|
+
|
|
445
|
+
tmpl_str = self.config.template_json or _DEFAULT_WEBHOOK_TEMPLATE
|
|
446
|
+
tmpl = string.Template(tmpl_str)
|
|
447
|
+
body = tmpl.safe_substitute(
|
|
448
|
+
detector=alert.detector,
|
|
449
|
+
severity=alert.severity,
|
|
450
|
+
severity_upper=alert.severity.upper(),
|
|
451
|
+
step=alert.step,
|
|
452
|
+
message=_json_escape(alert.message),
|
|
453
|
+
recommendation=_json_escape(alert.recommendation),
|
|
454
|
+
run_id=_json_escape(run_id),
|
|
455
|
+
metrics_json=json.dumps(alert.metric_values),
|
|
456
|
+
timestamp=datetime.now(timezone.utc).isoformat(),
|
|
457
|
+
)
|
|
458
|
+
|
|
459
|
+
# Validate the substituted body is still parseable JSON. A
|
|
460
|
+
# malformed custom template should fail loudly here, not on the
|
|
461
|
+
# receiving server.
|
|
462
|
+
try:
|
|
463
|
+
json.loads(body)
|
|
464
|
+
except json.JSONDecodeError as e:
|
|
465
|
+
logger.error(
|
|
466
|
+
"Webhook template produced invalid JSON after substitution: %s",
|
|
467
|
+
e,
|
|
468
|
+
)
|
|
469
|
+
return
|
|
470
|
+
|
|
471
|
+
req = Request(
|
|
472
|
+
self.config.url,
|
|
473
|
+
data=body.encode("utf-8"),
|
|
474
|
+
headers={"Content-Type": "application/json", **self.config.headers},
|
|
475
|
+
method=self.config.method,
|
|
476
|
+
)
|
|
477
|
+
with urlopen(req, timeout=self.config.timeout_seconds) as resp:
|
|
478
|
+
if resp.status >= 300:
|
|
479
|
+
logger.error("Webhook returned %d", resp.status)
|
|
480
|
+
except (HTTPError, URLError) as e:
|
|
481
|
+
logger.error("Failed to send webhook alert: %s", e)
|
|
482
|
+
except Exception as e:
|
|
483
|
+
logger.error("Unexpected webhook send error: %s", e)
|