detectkit 0.4.1__tar.gz → 0.5.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {detectkit-0.4.1/detectkit.egg-info → detectkit-0.5.1}/PKG-INFO +4 -3
- {detectkit-0.4.1 → detectkit-0.5.1}/README.md +2 -1
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/__init__.py +1 -1
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/alerting/channels/__init__.py +3 -4
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/alerting/channels/base.py +106 -23
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/alerting/channels/email.py +7 -10
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/alerting/channels/factory.py +9 -14
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/alerting/channels/mattermost.py +4 -4
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/alerting/channels/slack.py +5 -5
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/alerting/channels/telegram.py +3 -5
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/alerting/channels/webhook.py +31 -8
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/alerting/orchestrator/_base.py +6 -8
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/alerting/orchestrator/_cooldown.py +1 -3
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/alerting/orchestrator/_decision.py +61 -14
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/alerting/orchestrator/_dispatch.py +10 -12
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/alerting/orchestrator/_recovery.py +14 -35
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/alerting/orchestrator/_types.py +5 -5
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/cli/commands/init.py +0 -1
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/cli/commands/run.py +55 -26
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/cli/commands/test_alert.py +10 -9
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/config/metric_config.py +61 -73
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/config/profile.py +20 -29
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/config/project_config.py +49 -8
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/config/validator.py +4 -7
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/core/interval.py +16 -17
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/core/models.py +10 -14
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/database/clickhouse_manager.py +29 -58
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/database/internal_tables/_alert_states.py +10 -21
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/database/internal_tables/_base.py +1 -3
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/database/internal_tables/_datapoints.py +49 -35
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/database/internal_tables/_detections.py +18 -34
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/database/internal_tables/_metrics.py +9 -26
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/database/internal_tables/_schema.py +3 -9
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/database/internal_tables/_tasks.py +4 -9
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/database/manager.py +13 -35
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/database/tables.py +4 -18
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/detectors/base.py +14 -15
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/detectors/factory.py +6 -11
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/detectors/seasonality.py +6 -8
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/detectors/statistical/iqr.py +30 -33
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/detectors/statistical/mad.py +37 -30
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/detectors/statistical/manual_bounds.py +6 -9
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/detectors/statistical/zscore.py +27 -28
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/loaders/metric_loader.py +12 -15
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/loaders/query_template.py +17 -14
- detectkit-0.5.1/detectkit/orchestration/error_dispatch.py +151 -0
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/orchestration/task_manager/_alert_step.py +78 -48
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/orchestration/task_manager/_base.py +11 -18
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/orchestration/task_manager/_detect_step.py +30 -43
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/orchestration/task_manager/_load_step.py +15 -27
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/orchestration/task_manager/_types.py +1 -3
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/orchestration/task_manager/manager.py +49 -18
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/utils/datetime_utils.py +2 -3
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/utils/stats.py +4 -17
- {detectkit-0.4.1 → detectkit-0.5.1/detectkit.egg-info}/PKG-INFO +4 -3
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit.egg-info/SOURCES.txt +1 -0
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit.egg-info/requires.txt +1 -1
- {detectkit-0.4.1 → detectkit-0.5.1}/pyproject.toml +5 -3
- {detectkit-0.4.1 → detectkit-0.5.1}/LICENSE +0 -0
- {detectkit-0.4.1 → detectkit-0.5.1}/MANIFEST.in +0 -0
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/alerting/__init__.py +0 -0
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/alerting/orchestrator/__init__.py +0 -0
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/alerting/orchestrator/orchestrator.py +0 -0
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/cli/__init__.py +0 -0
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/cli/commands/__init__.py +0 -0
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/cli/main.py +0 -0
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/config/__init__.py +3 -3
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/core/__init__.py +0 -0
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/database/__init__.py +2 -2
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/database/internal_tables/__init__.py +0 -0
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/database/internal_tables/manager.py +0 -0
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/detectors/__init__.py +0 -0
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/detectors/statistical/__init__.py +0 -0
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/loaders/__init__.py +1 -1
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/orchestration/__init__.py +0 -0
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/orchestration/task_manager/__init__.py +0 -0
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/utils/__init__.py +8 -8
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/utils/env_interpolation.py +0 -0
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit/utils/json_utils.py +0 -0
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit.egg-info/dependency_links.txt +0 -0
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit.egg-info/entry_points.txt +0 -0
- {detectkit-0.4.1 → detectkit-0.5.1}/detectkit.egg-info/top_level.txt +0 -0
- {detectkit-0.4.1 → detectkit-0.5.1}/requirements.txt +0 -0
- {detectkit-0.4.1 → detectkit-0.5.1}/setup.cfg +0 -0
- {detectkit-0.4.1 → detectkit-0.5.1}/setup.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: detectkit
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.1
|
|
4
4
|
Summary: Metric monitoring with automatic anomaly detection
|
|
5
5
|
Author: detectkit team
|
|
6
6
|
License: MIT
|
|
@@ -55,7 +55,7 @@ Requires-Dist: timesfm>=0.1.0; extra == "all"
|
|
|
55
55
|
Provides-Extra: dev
|
|
56
56
|
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
57
57
|
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
58
|
-
Requires-Dist: pytest-
|
|
58
|
+
Requires-Dist: pytest-mock>=3.0; extra == "dev"
|
|
59
59
|
Requires-Dist: requests-mock>=1.12; extra == "dev"
|
|
60
60
|
Requires-Dist: black>=23.0; extra == "dev"
|
|
61
61
|
Requires-Dist: mypy>=1.0; extra == "dev"
|
|
@@ -79,7 +79,8 @@ Dynamic: license-file
|
|
|
79
79
|
- **Statistical detectors** — Z-Score, MAD, IQR, Manual Bounds
|
|
80
80
|
- **Multi-channel alerting** — Mattermost, Slack, Telegram, Email, Webhook
|
|
81
81
|
- **@mentions** — tag users/groups in alerts, each channel formats natively
|
|
82
|
-
- **Alert lifecycle** — consecutive anomalies, cooldown, recovery notifications
|
|
82
|
+
- **Alert lifecycle** — consecutive anomalies, cooldown, recovery notifications, no-data alerts
|
|
83
|
+
- **Project-level error alerts** — catch DB outages and pipeline crashes once per run
|
|
83
84
|
- **Database agnostic** — ClickHouse, PostgreSQL, MySQL
|
|
84
85
|
- **Idempotent** — resume from interruptions, no duplicate processing
|
|
85
86
|
- **CLI** — `dtk init`, `dtk run --select`, tag-based selectors
|
|
@@ -13,7 +13,8 @@
|
|
|
13
13
|
- **Statistical detectors** — Z-Score, MAD, IQR, Manual Bounds
|
|
14
14
|
- **Multi-channel alerting** — Mattermost, Slack, Telegram, Email, Webhook
|
|
15
15
|
- **@mentions** — tag users/groups in alerts, each channel formats natively
|
|
16
|
-
- **Alert lifecycle** — consecutive anomalies, cooldown, recovery notifications
|
|
16
|
+
- **Alert lifecycle** — consecutive anomalies, cooldown, recovery notifications, no-data alerts
|
|
17
|
+
- **Project-level error alerts** — catch DB outages and pipeline crashes once per run
|
|
17
18
|
- **Database agnostic** — ClickHouse, PostgreSQL, MySQL
|
|
18
19
|
- **Idempotent** — resume from interruptions, no duplicate processing
|
|
19
20
|
- **CLI** — `dtk init`, `dtk run --select`, tag-based selectors
|
|
@@ -4,7 +4,7 @@ detectk - Anomaly Detection for Time-Series Metrics
|
|
|
4
4
|
A Python library for data analysts and engineers to monitor metrics with automatic anomaly detection.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
-
__version__ = "0.
|
|
7
|
+
__version__ = "0.5.1"
|
|
8
8
|
|
|
9
9
|
from detectkit.core.interval import Interval
|
|
10
10
|
from detectkit.core.models import ColumnDefinition, TableModel
|
|
@@ -1,13 +1,12 @@
|
|
|
1
1
|
"""Alert channels for external notifications."""
|
|
2
2
|
|
|
3
3
|
from detectkit.alerting.channels.base import AlertData, BaseAlertChannel
|
|
4
|
+
from detectkit.alerting.channels.email import EmailChannel
|
|
5
|
+
from detectkit.alerting.channels.factory import AlertChannelFactory
|
|
4
6
|
from detectkit.alerting.channels.mattermost import MattermostChannel
|
|
5
7
|
from detectkit.alerting.channels.slack import SlackChannel
|
|
6
|
-
from detectkit.alerting.channels.webhook import WebhookChannel
|
|
7
8
|
from detectkit.alerting.channels.telegram import TelegramChannel
|
|
8
|
-
from detectkit.alerting.channels.
|
|
9
|
-
|
|
10
|
-
from detectkit.alerting.channels.factory import AlertChannelFactory
|
|
9
|
+
from detectkit.alerting.channels.webhook import WebhookChannel
|
|
11
10
|
|
|
12
11
|
__all__ = [
|
|
13
12
|
"AlertData",
|
|
@@ -7,9 +7,7 @@ the send() method for delivering alerts to specific destinations.
|
|
|
7
7
|
|
|
8
8
|
from abc import ABC, abstractmethod
|
|
9
9
|
from dataclasses import dataclass, field
|
|
10
|
-
from typing import Any
|
|
11
|
-
|
|
12
|
-
from detectkit.detectors.base import DetectionResult
|
|
10
|
+
from typing import Any
|
|
13
11
|
|
|
14
12
|
|
|
15
13
|
@dataclass
|
|
@@ -23,7 +21,7 @@ class AlertData:
|
|
|
23
21
|
metric_name: Name of the metric
|
|
24
22
|
timestamp: Timestamp of the anomaly (datetime64)
|
|
25
23
|
timezone: Timezone for display (e.g., "Europe/Moscow")
|
|
26
|
-
value: Actual metric value
|
|
24
|
+
value: Actual metric value (None for no-data alerts)
|
|
27
25
|
confidence_lower: Lower confidence bound
|
|
28
26
|
confidence_upper: Upper confidence bound
|
|
29
27
|
detector_name: Name/ID of detector that found the anomaly
|
|
@@ -32,23 +30,29 @@ class AlertData:
|
|
|
32
30
|
severity: Severity score
|
|
33
31
|
detection_metadata: Additional metadata from detector
|
|
34
32
|
consecutive_count: Number of consecutive anomalies
|
|
33
|
+
is_recovery: True for recovery notifications
|
|
34
|
+
is_no_data: True for missing-data alerts (no_data_alert)
|
|
35
35
|
"""
|
|
36
36
|
|
|
37
37
|
metric_name: str
|
|
38
38
|
timestamp: Any # datetime64 or datetime
|
|
39
39
|
timezone: str
|
|
40
|
-
value: float
|
|
41
|
-
confidence_lower:
|
|
42
|
-
confidence_upper:
|
|
40
|
+
value: float | None
|
|
41
|
+
confidence_lower: float | None
|
|
42
|
+
confidence_upper: float | None
|
|
43
43
|
detector_name: str
|
|
44
44
|
detector_params: str
|
|
45
45
|
direction: str
|
|
46
46
|
severity: float
|
|
47
|
-
detection_metadata:
|
|
47
|
+
detection_metadata: dict[str, Any]
|
|
48
48
|
consecutive_count: int = 1
|
|
49
49
|
is_recovery: bool = False
|
|
50
|
-
|
|
51
|
-
|
|
50
|
+
is_no_data: bool = False
|
|
51
|
+
is_error: bool = False
|
|
52
|
+
error_type: str | None = None
|
|
53
|
+
error_message: str | None = None
|
|
54
|
+
description: str | None = None
|
|
55
|
+
mentions: list[str] = field(default_factory=list)
|
|
52
56
|
|
|
53
57
|
|
|
54
58
|
class BaseAlertChannel(ABC):
|
|
@@ -71,7 +75,7 @@ class BaseAlertChannel(ABC):
|
|
|
71
75
|
def send(
|
|
72
76
|
self,
|
|
73
77
|
alert_data: AlertData,
|
|
74
|
-
template:
|
|
78
|
+
template: str | None = None,
|
|
75
79
|
) -> bool:
|
|
76
80
|
"""
|
|
77
81
|
Send alert to this channel.
|
|
@@ -101,8 +105,8 @@ class BaseAlertChannel(ABC):
|
|
|
101
105
|
def format_message(
|
|
102
106
|
self,
|
|
103
107
|
alert_data: AlertData,
|
|
104
|
-
template:
|
|
105
|
-
recovery_template:
|
|
108
|
+
template: str | None = None,
|
|
109
|
+
recovery_template: str | None = None,
|
|
106
110
|
) -> str:
|
|
107
111
|
"""
|
|
108
112
|
Format alert message from template.
|
|
@@ -111,13 +115,14 @@ class BaseAlertChannel(ABC):
|
|
|
111
115
|
- {metric_name}
|
|
112
116
|
- {timestamp}
|
|
113
117
|
- {timezone}
|
|
114
|
-
- {value}
|
|
118
|
+
- {value} / {value_display}
|
|
115
119
|
- {confidence_lower}
|
|
116
120
|
- {confidence_upper}
|
|
117
121
|
- {detector_name}
|
|
118
122
|
- {direction}
|
|
119
123
|
- {severity}
|
|
120
124
|
- {consecutive_count}
|
|
125
|
+
- {status}
|
|
121
126
|
|
|
122
127
|
Args:
|
|
123
128
|
alert_data: Alert data to format
|
|
@@ -131,13 +136,19 @@ class BaseAlertChannel(ABC):
|
|
|
131
136
|
>>> message = channel.format_message(alert_data, template)
|
|
132
137
|
"""
|
|
133
138
|
if template is None:
|
|
134
|
-
if alert_data.
|
|
139
|
+
if alert_data.is_error:
|
|
140
|
+
template = self.get_default_error_template()
|
|
141
|
+
elif alert_data.is_no_data:
|
|
142
|
+
template = self.get_default_no_data_template()
|
|
143
|
+
elif alert_data.is_recovery:
|
|
135
144
|
template = recovery_template or self.get_default_recovery_template()
|
|
136
145
|
else:
|
|
137
146
|
template = self.get_default_template()
|
|
138
147
|
|
|
139
148
|
# Format timestamp to string
|
|
149
|
+
import math
|
|
140
150
|
from datetime import datetime
|
|
151
|
+
|
|
141
152
|
import numpy as np
|
|
142
153
|
|
|
143
154
|
ts = alert_data.timestamp
|
|
@@ -147,6 +158,7 @@ class BaseAlertChannel(ABC):
|
|
|
147
158
|
# Convert naive UTC timestamp to target timezone if specified
|
|
148
159
|
if alert_data.timezone:
|
|
149
160
|
from zoneinfo import ZoneInfo
|
|
161
|
+
|
|
150
162
|
ts = ts.replace(tzinfo=ZoneInfo("UTC")).astimezone(ZoneInfo(alert_data.timezone))
|
|
151
163
|
ts_str = f"{ts.strftime('%Y-%m-%d %H:%M:%S')} ({alert_data.timezone})"
|
|
152
164
|
else:
|
|
@@ -154,10 +166,21 @@ class BaseAlertChannel(ABC):
|
|
|
154
166
|
|
|
155
167
|
# Format confidence interval
|
|
156
168
|
if alert_data.confidence_lower is not None and alert_data.confidence_upper is not None:
|
|
157
|
-
confidence_str =
|
|
169
|
+
confidence_str = (
|
|
170
|
+
f"[{alert_data.confidence_lower:.2f}, {alert_data.confidence_upper:.2f}]"
|
|
171
|
+
)
|
|
158
172
|
else:
|
|
159
173
|
confidence_str = "N/A"
|
|
160
174
|
|
|
175
|
+
# Display-safe value: stays usable even when value is None/NaN (no-data).
|
|
176
|
+
raw_value = alert_data.value
|
|
177
|
+
if raw_value is None or (isinstance(raw_value, float) and math.isnan(raw_value)):
|
|
178
|
+
value_display = "no data"
|
|
179
|
+
value_for_template: Any = "no data"
|
|
180
|
+
else:
|
|
181
|
+
value_display = f"{raw_value}"
|
|
182
|
+
value_for_template = raw_value
|
|
183
|
+
|
|
161
184
|
# Format description line (empty string if no description)
|
|
162
185
|
description_line = f"{alert_data.description}\n" if alert_data.description else ""
|
|
163
186
|
|
|
@@ -166,14 +189,22 @@ class BaseAlertChannel(ABC):
|
|
|
166
189
|
mentions_line = f"\n{mentions_str}" if mentions_str else ""
|
|
167
190
|
|
|
168
191
|
# Format message
|
|
169
|
-
|
|
192
|
+
if alert_data.is_error:
|
|
193
|
+
status = "ERROR"
|
|
194
|
+
elif alert_data.is_no_data:
|
|
195
|
+
status = "NO_DATA"
|
|
196
|
+
elif alert_data.is_recovery:
|
|
197
|
+
status = "RECOVERED"
|
|
198
|
+
else:
|
|
199
|
+
status = "ANOMALY"
|
|
170
200
|
|
|
171
201
|
try:
|
|
172
202
|
message = template.format(
|
|
173
203
|
metric_name=alert_data.metric_name,
|
|
174
204
|
timestamp=ts_str,
|
|
175
205
|
timezone=alert_data.timezone,
|
|
176
|
-
value=
|
|
206
|
+
value=value_for_template,
|
|
207
|
+
value_display=value_display,
|
|
177
208
|
confidence_lower=alert_data.confidence_lower,
|
|
178
209
|
confidence_upper=alert_data.confidence_upper,
|
|
179
210
|
confidence_interval=confidence_str,
|
|
@@ -183,18 +214,33 @@ class BaseAlertChannel(ABC):
|
|
|
183
214
|
severity=alert_data.severity,
|
|
184
215
|
consecutive_count=alert_data.consecutive_count,
|
|
185
216
|
status=status,
|
|
217
|
+
error_type=alert_data.error_type or "",
|
|
218
|
+
error_message=alert_data.error_message or "",
|
|
186
219
|
description=alert_data.description or "",
|
|
187
220
|
description_line=description_line,
|
|
188
221
|
mentions=mentions_str,
|
|
189
222
|
mentions_line=mentions_line,
|
|
190
223
|
)
|
|
191
|
-
except KeyError
|
|
192
|
-
#
|
|
193
|
-
|
|
224
|
+
except (KeyError, ValueError, TypeError):
|
|
225
|
+
# Template has an unknown variable or a format spec that doesn't fit
|
|
226
|
+
# the actual value (e.g. ``{value:.2f}`` in a no-data template where
|
|
227
|
+
# value is a string). Fall back to the kind-appropriate default.
|
|
228
|
+
if alert_data.is_error:
|
|
229
|
+
fallback = self.get_default_error_template()
|
|
230
|
+
elif alert_data.is_no_data:
|
|
231
|
+
fallback = self.get_default_no_data_template()
|
|
232
|
+
elif alert_data.is_recovery:
|
|
233
|
+
fallback = self.get_default_recovery_template()
|
|
234
|
+
else:
|
|
235
|
+
fallback = self.get_default_template()
|
|
236
|
+
if template == fallback:
|
|
237
|
+
# Already on the default — re-raise instead of recursing.
|
|
238
|
+
raise
|
|
239
|
+
message = self.format_message(alert_data, fallback)
|
|
194
240
|
|
|
195
241
|
return message
|
|
196
242
|
|
|
197
|
-
def format_mentions(self, mentions:
|
|
243
|
+
def format_mentions(self, mentions: list[str]) -> str:
|
|
198
244
|
"""
|
|
199
245
|
Format mentions list into platform-native syntax.
|
|
200
246
|
|
|
@@ -227,7 +273,11 @@ class BaseAlertChannel(ABC):
|
|
|
227
273
|
Returns:
|
|
228
274
|
Formatted title string
|
|
229
275
|
"""
|
|
230
|
-
if alert_data.
|
|
276
|
+
if alert_data.is_error:
|
|
277
|
+
title_template = self.get_default_error_title_template()
|
|
278
|
+
elif alert_data.is_no_data:
|
|
279
|
+
title_template = self.get_default_no_data_title_template()
|
|
280
|
+
elif alert_data.is_recovery:
|
|
231
281
|
title_template = self.get_default_recovery_title_template()
|
|
232
282
|
else:
|
|
233
283
|
title_template = self.get_default_title_template()
|
|
@@ -289,6 +339,39 @@ class BaseAlertChannel(ABC):
|
|
|
289
339
|
"""
|
|
290
340
|
return "Metric recovered: {metric_name}"
|
|
291
341
|
|
|
342
|
+
def get_default_no_data_template(self) -> str:
|
|
343
|
+
"""
|
|
344
|
+
Get default message template for no-data alerts.
|
|
345
|
+
|
|
346
|
+
Used when ``no_data_alert: true`` and the latest expected interval
|
|
347
|
+
has no datapoint (no row OR row with NULL/NaN value).
|
|
348
|
+
"""
|
|
349
|
+
return (
|
|
350
|
+
"No data for metric: {metric_name}\n"
|
|
351
|
+
"{description_line}"
|
|
352
|
+
"Time: {timestamp}\n"
|
|
353
|
+
"Status: query returned no datapoint for the latest interval"
|
|
354
|
+
"{mentions_line}"
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
def get_default_no_data_title_template(self) -> str:
|
|
358
|
+
"""Get default title template for no-data alerts."""
|
|
359
|
+
return "No data: {metric_name}"
|
|
360
|
+
|
|
361
|
+
def get_default_error_template(self) -> str:
|
|
362
|
+
"""Default body template for project-level error alerts."""
|
|
363
|
+
return (
|
|
364
|
+
"Pipeline failed for metric: {metric_name}\n"
|
|
365
|
+
"{description_line}"
|
|
366
|
+
"Time: {timestamp}\n"
|
|
367
|
+
"Error: {error_type}: {error_message}"
|
|
368
|
+
"{mentions_line}"
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
def get_default_error_title_template(self) -> str:
|
|
372
|
+
"""Default title template for project-level error alerts."""
|
|
373
|
+
return "Pipeline error: {metric_name}"
|
|
374
|
+
|
|
292
375
|
def __repr__(self) -> str:
|
|
293
376
|
"""String representation of channel."""
|
|
294
377
|
return f"{self.__class__.__name__}()"
|
|
@@ -7,7 +7,6 @@ Sends anomaly alerts via SMTP email.
|
|
|
7
7
|
import smtplib
|
|
8
8
|
from email.mime.multipart import MIMEMultipart
|
|
9
9
|
from email.mime.text import MIMEText
|
|
10
|
-
from typing import List, Optional
|
|
11
10
|
|
|
12
11
|
from detectkit.alerting.channels.base import AlertData, BaseAlertChannel
|
|
13
12
|
|
|
@@ -51,12 +50,12 @@ class EmailChannel(BaseAlertChannel):
|
|
|
51
50
|
smtp_host: str,
|
|
52
51
|
smtp_port: int,
|
|
53
52
|
from_email: str,
|
|
54
|
-
to_emails:
|
|
55
|
-
smtp_username:
|
|
56
|
-
smtp_password:
|
|
53
|
+
to_emails: list[str],
|
|
54
|
+
smtp_username: str | None = None,
|
|
55
|
+
smtp_password: str | None = None,
|
|
57
56
|
use_tls: bool = True,
|
|
58
57
|
subject_template: str = "Anomaly Alert: {metric_name}",
|
|
59
|
-
template:
|
|
58
|
+
template: str | None = None,
|
|
60
59
|
**kwargs,
|
|
61
60
|
):
|
|
62
61
|
"""
|
|
@@ -115,9 +114,7 @@ class EmailChannel(BaseAlertChannel):
|
|
|
115
114
|
msg = MIMEMultipart("alternative")
|
|
116
115
|
msg["From"] = self.from_email
|
|
117
116
|
msg["To"] = ", ".join(self.to_emails)
|
|
118
|
-
msg["Subject"] = self.subject_template.format(
|
|
119
|
-
metric_name=alert_data.metric_name
|
|
120
|
-
)
|
|
117
|
+
msg["Subject"] = self.subject_template.format(metric_name=alert_data.metric_name)
|
|
121
118
|
|
|
122
119
|
# Attach plain text body
|
|
123
120
|
msg.attach(MIMEText(message_body, "plain"))
|
|
@@ -139,9 +136,9 @@ class EmailChannel(BaseAlertChannel):
|
|
|
139
136
|
server.quit()
|
|
140
137
|
|
|
141
138
|
except smtplib.SMTPException as e:
|
|
142
|
-
raise smtplib.SMTPException(f"Failed to send email alert: {e}")
|
|
139
|
+
raise smtplib.SMTPException(f"Failed to send email alert: {e}") from e
|
|
143
140
|
|
|
144
|
-
def format_mentions(self, mentions:
|
|
141
|
+
def format_mentions(self, mentions: list[str]) -> str:
|
|
145
142
|
"""
|
|
146
143
|
Format mentions for email.
|
|
147
144
|
|
|
@@ -2,14 +2,12 @@
|
|
|
2
2
|
Alert channel factory for creating channel instances from configuration.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
from typing import Dict, List
|
|
6
|
-
|
|
7
5
|
from detectkit.alerting.channels.base import BaseAlertChannel
|
|
6
|
+
from detectkit.alerting.channels.email import EmailChannel
|
|
8
7
|
from detectkit.alerting.channels.mattermost import MattermostChannel
|
|
9
8
|
from detectkit.alerting.channels.slack import SlackChannel
|
|
10
|
-
from detectkit.alerting.channels.webhook import WebhookChannel
|
|
11
9
|
from detectkit.alerting.channels.telegram import TelegramChannel
|
|
12
|
-
from detectkit.alerting.channels.
|
|
10
|
+
from detectkit.alerting.channels.webhook import WebhookChannel
|
|
13
11
|
from detectkit.utils.env_interpolation import interpolate_env_vars
|
|
14
12
|
|
|
15
13
|
|
|
@@ -36,7 +34,7 @@ class AlertChannelFactory:
|
|
|
36
34
|
}
|
|
37
35
|
|
|
38
36
|
@classmethod
|
|
39
|
-
def create(cls, channel_type: str, params:
|
|
37
|
+
def create(cls, channel_type: str, params: dict) -> BaseAlertChannel:
|
|
40
38
|
"""
|
|
41
39
|
Create alert channel instance from type and parameters.
|
|
42
40
|
|
|
@@ -64,8 +62,7 @@ class AlertChannelFactory:
|
|
|
64
62
|
if channel_type not in cls.CHANNEL_TYPES:
|
|
65
63
|
available = ", ".join(sorted(cls.CHANNEL_TYPES.keys()))
|
|
66
64
|
raise ValueError(
|
|
67
|
-
f"Unknown channel type: '{channel_type}'. "
|
|
68
|
-
f"Available types: {available}"
|
|
65
|
+
f"Unknown channel type: '{channel_type}'. " f"Available types: {available}"
|
|
69
66
|
)
|
|
70
67
|
|
|
71
68
|
# Interpolate environment variables in params
|
|
@@ -76,12 +73,10 @@ class AlertChannelFactory:
|
|
|
76
73
|
try:
|
|
77
74
|
return channel_class(**interpolated_params)
|
|
78
75
|
except TypeError as e:
|
|
79
|
-
raise ValueError(
|
|
80
|
-
f"Invalid parameters for {channel_type} channel: {e}"
|
|
81
|
-
) from e
|
|
76
|
+
raise ValueError(f"Invalid parameters for {channel_type} channel: {e}") from e
|
|
82
77
|
|
|
83
78
|
@classmethod
|
|
84
|
-
def _interpolate_env_vars(cls, params:
|
|
79
|
+
def _interpolate_env_vars(cls, params: dict) -> dict:
|
|
85
80
|
"""Interpolate ``${VAR}`` and ``{{ env_var('VAR') }}`` placeholders.
|
|
86
81
|
|
|
87
82
|
Delegates to :func:`detectkit.utils.env_interpolation.interpolate_env_vars`,
|
|
@@ -90,7 +85,7 @@ class AlertChannelFactory:
|
|
|
90
85
|
return interpolate_env_vars(params)
|
|
91
86
|
|
|
92
87
|
@classmethod
|
|
93
|
-
def create_from_config(cls, channel_config:
|
|
88
|
+
def create_from_config(cls, channel_config: dict) -> BaseAlertChannel:
|
|
94
89
|
"""
|
|
95
90
|
Create channel from configuration dictionary.
|
|
96
91
|
|
|
@@ -122,7 +117,7 @@ class AlertChannelFactory:
|
|
|
122
117
|
return cls.create(channel_type, params)
|
|
123
118
|
|
|
124
119
|
@classmethod
|
|
125
|
-
def create_multiple(cls, channel_configs:
|
|
120
|
+
def create_multiple(cls, channel_configs: list[dict]) -> list[BaseAlertChannel]:
|
|
126
121
|
"""
|
|
127
122
|
Create multiple channels from list of configurations.
|
|
128
123
|
|
|
@@ -148,7 +143,7 @@ class AlertChannelFactory:
|
|
|
148
143
|
return channels
|
|
149
144
|
|
|
150
145
|
@classmethod
|
|
151
|
-
def list_available_types(cls) ->
|
|
146
|
+
def list_available_types(cls) -> list[str]:
|
|
152
147
|
"""
|
|
153
148
|
Get list of available channel types.
|
|
154
149
|
|
|
@@ -4,8 +4,6 @@ Mattermost alert channel.
|
|
|
4
4
|
Convenience wrapper around WebhookChannel for Mattermost.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
-
from typing import Optional
|
|
8
|
-
|
|
9
7
|
from detectkit.alerting.channels.webhook import WebhookChannel
|
|
10
8
|
|
|
11
9
|
|
|
@@ -35,7 +33,7 @@ class MattermostChannel(WebhookChannel):
|
|
|
35
33
|
webhook_url: str,
|
|
36
34
|
username: str = "detectk",
|
|
37
35
|
icon_emoji: str = ":warning:",
|
|
38
|
-
channel:
|
|
36
|
+
channel: str | None = None,
|
|
39
37
|
timeout: int = 10,
|
|
40
38
|
):
|
|
41
39
|
"""Initialize Mattermost channel with webhook URL."""
|
|
@@ -49,5 +47,7 @@ class MattermostChannel(WebhookChannel):
|
|
|
49
47
|
|
|
50
48
|
def __repr__(self) -> str:
|
|
51
49
|
"""String representation."""
|
|
52
|
-
url_preview =
|
|
50
|
+
url_preview = (
|
|
51
|
+
self.webhook_url[:30] + "..." if len(self.webhook_url) > 30 else self.webhook_url
|
|
52
|
+
)
|
|
53
53
|
return f"MattermostChannel(url='{url_preview}', username='{self.username}')"
|
|
@@ -4,8 +4,6 @@ Slack alert channel.
|
|
|
4
4
|
Convenience wrapper around WebhookChannel for Slack.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
-
from typing import List, Optional
|
|
8
|
-
|
|
9
7
|
from detectkit.alerting.channels.webhook import WebhookChannel
|
|
10
8
|
|
|
11
9
|
|
|
@@ -36,7 +34,7 @@ class SlackChannel(WebhookChannel):
|
|
|
36
34
|
webhook_url: str,
|
|
37
35
|
username: str = "detectk",
|
|
38
36
|
icon_emoji: str = ":warning:",
|
|
39
|
-
channel:
|
|
37
|
+
channel: str | None = None,
|
|
40
38
|
timeout: int = 10,
|
|
41
39
|
):
|
|
42
40
|
"""Initialize Slack channel with webhook URL."""
|
|
@@ -48,7 +46,7 @@ class SlackChannel(WebhookChannel):
|
|
|
48
46
|
timeout=timeout,
|
|
49
47
|
)
|
|
50
48
|
|
|
51
|
-
def format_mentions(self, mentions:
|
|
49
|
+
def format_mentions(self, mentions: list[str]) -> str:
|
|
52
50
|
"""
|
|
53
51
|
Format mentions for Slack.
|
|
54
52
|
|
|
@@ -78,6 +76,8 @@ class SlackChannel(WebhookChannel):
|
|
|
78
76
|
|
|
79
77
|
def __repr__(self) -> str:
|
|
80
78
|
"""String representation."""
|
|
81
|
-
url_preview =
|
|
79
|
+
url_preview = (
|
|
80
|
+
self.webhook_url[:30] + "..." if len(self.webhook_url) > 30 else self.webhook_url
|
|
81
|
+
)
|
|
82
82
|
channel_info = f", channel='{self.channel}'" if self.channel else ""
|
|
83
83
|
return f"SlackChannel(url='{url_preview}', username='{self.username}'{channel_info})"
|
|
@@ -4,8 +4,6 @@ Telegram alert channel implementation.
|
|
|
4
4
|
Sends anomaly alerts via Telegram Bot API.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
-
from typing import Any, Dict, List, Optional
|
|
8
|
-
|
|
9
7
|
import requests
|
|
10
8
|
|
|
11
9
|
from detectkit.alerting.channels.base import AlertData, BaseAlertChannel
|
|
@@ -43,7 +41,7 @@ class TelegramChannel(BaseAlertChannel):
|
|
|
43
41
|
chat_id: str,
|
|
44
42
|
parse_mode: str = "Markdown",
|
|
45
43
|
disable_notification: bool = False,
|
|
46
|
-
template:
|
|
44
|
+
template: str | None = None,
|
|
47
45
|
**kwargs,
|
|
48
46
|
):
|
|
49
47
|
"""
|
|
@@ -103,9 +101,9 @@ class TelegramChannel(BaseAlertChannel):
|
|
|
103
101
|
response = requests.post(url, json=payload, timeout=10)
|
|
104
102
|
response.raise_for_status()
|
|
105
103
|
except requests.RequestException as e:
|
|
106
|
-
raise requests.RequestException(f"Failed to send Telegram alert: {e}")
|
|
104
|
+
raise requests.RequestException(f"Failed to send Telegram alert: {e}") from e
|
|
107
105
|
|
|
108
|
-
def format_mentions(self, mentions:
|
|
106
|
+
def format_mentions(self, mentions: list[str]) -> str:
|
|
109
107
|
"""
|
|
110
108
|
Format mentions for Telegram.
|
|
111
109
|
|
|
@@ -5,8 +5,6 @@ Sends alerts to any webhook endpoint that accepts JSON payload.
|
|
|
5
5
|
Compatible with Mattermost, Slack, and other webhook-based systems.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
from typing import Dict, Optional
|
|
9
|
-
|
|
10
8
|
import requests
|
|
11
9
|
|
|
12
10
|
from detectkit.alerting.channels.base import AlertData, BaseAlertChannel
|
|
@@ -62,9 +60,9 @@ class WebhookChannel(BaseAlertChannel):
|
|
|
62
60
|
webhook_url: str,
|
|
63
61
|
username: str = "detectk",
|
|
64
62
|
icon_emoji: str = ":warning:",
|
|
65
|
-
channel:
|
|
63
|
+
channel: str | None = None,
|
|
66
64
|
timeout: int = 10,
|
|
67
|
-
extra_headers:
|
|
65
|
+
extra_headers: dict[str, str] | None = None,
|
|
68
66
|
):
|
|
69
67
|
"""Initialize webhook channel."""
|
|
70
68
|
if not webhook_url:
|
|
@@ -80,7 +78,7 @@ class WebhookChannel(BaseAlertChannel):
|
|
|
80
78
|
def send(
|
|
81
79
|
self,
|
|
82
80
|
alert_data: AlertData,
|
|
83
|
-
template:
|
|
81
|
+
template: str | None = None,
|
|
84
82
|
) -> bool:
|
|
85
83
|
"""
|
|
86
84
|
Send alert to webhook.
|
|
@@ -103,8 +101,13 @@ class WebhookChannel(BaseAlertChannel):
|
|
|
103
101
|
title = self.format_title(alert_data)
|
|
104
102
|
body = self.format_message(alert_data, template)
|
|
105
103
|
|
|
106
|
-
# Color: red for anomaly, green for recovery
|
|
107
|
-
|
|
104
|
+
# Color: red for anomaly, green for recovery, amber for no-data.
|
|
105
|
+
if alert_data.is_recovery:
|
|
106
|
+
color = "#36A64F"
|
|
107
|
+
elif alert_data.is_no_data:
|
|
108
|
+
color = "#F0AD4E"
|
|
109
|
+
else:
|
|
110
|
+
color = "#D63232"
|
|
108
111
|
|
|
109
112
|
# Prepare payload using Mattermost/Slack attachments format.
|
|
110
113
|
# Attachments give us: colored left sidebar, separate title, and
|
|
@@ -175,8 +178,28 @@ class WebhookChannel(BaseAlertChannel):
|
|
|
175
178
|
"{mentions_line}"
|
|
176
179
|
)
|
|
177
180
|
|
|
181
|
+
def get_default_no_data_template(self) -> str:
|
|
182
|
+
"""Default no-data body template (metric name lives in the title)."""
|
|
183
|
+
return (
|
|
184
|
+
"{description_line}"
|
|
185
|
+
"Time: {timestamp}\n"
|
|
186
|
+
"Status: query returned no datapoint for the latest interval"
|
|
187
|
+
"{mentions_line}"
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
def get_default_error_template(self) -> str:
|
|
191
|
+
"""Default error body template (metric name lives in the title)."""
|
|
192
|
+
return (
|
|
193
|
+
"{description_line}"
|
|
194
|
+
"Time: {timestamp}\n"
|
|
195
|
+
"Error: {error_type}: {error_message}"
|
|
196
|
+
"{mentions_line}"
|
|
197
|
+
)
|
|
198
|
+
|
|
178
199
|
def __repr__(self) -> str:
|
|
179
200
|
"""String representation."""
|
|
180
|
-
url_preview =
|
|
201
|
+
url_preview = (
|
|
202
|
+
self.webhook_url[:30] + "..." if len(self.webhook_url) > 30 else self.webhook_url
|
|
203
|
+
)
|
|
181
204
|
channel_info = f", channel='{self.channel}'" if self.channel else ""
|
|
182
205
|
return f"WebhookChannel(url='{url_preview}', username='{self.username}'{channel_info})"
|
|
@@ -2,8 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from typing import Dict, List, Optional
|
|
6
|
-
|
|
7
5
|
import numpy as np
|
|
8
6
|
|
|
9
7
|
from detectkit.alerting.orchestrator._types import (
|
|
@@ -19,12 +17,12 @@ class _OrchestratorBase:
|
|
|
19
17
|
metric_name: str,
|
|
20
18
|
interval: Interval,
|
|
21
19
|
alert_config_id: str,
|
|
22
|
-
conditions:
|
|
20
|
+
conditions: AlertConditions | None = None,
|
|
23
21
|
timezone_display: str = "UTC",
|
|
24
22
|
internal=None, # InternalTablesManager
|
|
25
23
|
alert_config=None, # AlertConfig
|
|
26
|
-
description:
|
|
27
|
-
mentions:
|
|
24
|
+
description: str | None = None,
|
|
25
|
+
mentions: list[str] | None = None,
|
|
28
26
|
):
|
|
29
27
|
self.metric_name = metric_name
|
|
30
28
|
self.interval = interval
|
|
@@ -38,9 +36,9 @@ class _OrchestratorBase:
|
|
|
38
36
|
|
|
39
37
|
@staticmethod
|
|
40
38
|
def _group_by_timestamp(
|
|
41
|
-
detections:
|
|
42
|
-
) ->
|
|
43
|
-
grouped:
|
|
39
|
+
detections: list[DetectionRecord],
|
|
40
|
+
) -> dict[np.datetime64, list[DetectionRecord]]:
|
|
41
|
+
grouped: dict[np.datetime64, list[DetectionRecord]] = {}
|
|
44
42
|
for d in detections:
|
|
45
43
|
grouped.setdefault(d.timestamp, []).append(d)
|
|
46
44
|
return grouped
|
|
@@ -24,9 +24,7 @@ class _CooldownMixin(_OrchestratorBase):
|
|
|
24
24
|
if not self.internal:
|
|
25
25
|
return False
|
|
26
26
|
|
|
27
|
-
last_sent = self.internal.get_last_alert_timestamp(
|
|
28
|
-
self.metric_name, self.alert_config_id
|
|
29
|
-
)
|
|
27
|
+
last_sent = self.internal.get_last_alert_timestamp(self.metric_name, self.alert_config_id)
|
|
30
28
|
if not last_sent:
|
|
31
29
|
return False
|
|
32
30
|
|