detectkit 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- detectkit/__init__.py +17 -0
- detectkit/alerting/__init__.py +13 -0
- detectkit/alerting/channels/__init__.py +21 -0
- detectkit/alerting/channels/base.py +191 -0
- detectkit/alerting/channels/email.py +146 -0
- detectkit/alerting/channels/factory.py +193 -0
- detectkit/alerting/channels/mattermost.py +53 -0
- detectkit/alerting/channels/slack.py +55 -0
- detectkit/alerting/channels/telegram.py +110 -0
- detectkit/alerting/channels/webhook.py +139 -0
- detectkit/alerting/orchestrator.py +368 -0
- detectkit/cli/__init__.py +1 -0
- detectkit/cli/commands/__init__.py +1 -0
- detectkit/cli/commands/init.py +282 -0
- detectkit/cli/commands/run.py +427 -0
- detectkit/cli/commands/test_alert.py +184 -0
- detectkit/cli/main.py +186 -0
- detectkit/config/__init__.py +30 -0
- detectkit/config/metric_config.py +467 -0
- detectkit/config/profile.py +285 -0
- detectkit/config/project_config.py +164 -0
- detectkit/core/__init__.py +6 -0
- detectkit/core/interval.py +132 -0
- detectkit/core/models.py +106 -0
- detectkit/database/__init__.py +27 -0
- detectkit/database/clickhouse_manager.py +385 -0
- detectkit/database/internal_tables.py +581 -0
- detectkit/database/manager.py +324 -0
- detectkit/database/tables.py +134 -0
- detectkit/detectors/__init__.py +6 -0
- detectkit/detectors/base.py +222 -0
- detectkit/detectors/factory.py +138 -0
- detectkit/detectors/statistical/__init__.py +8 -0
- detectkit/detectors/statistical/iqr.py +230 -0
- detectkit/detectors/statistical/mad.py +423 -0
- detectkit/detectors/statistical/manual_bounds.py +177 -0
- detectkit/detectors/statistical/zscore.py +225 -0
- detectkit/loaders/__init__.py +6 -0
- detectkit/loaders/metric_loader.py +470 -0
- detectkit/loaders/query_template.py +164 -0
- detectkit/orchestration/__init__.py +9 -0
- detectkit/orchestration/task_manager.py +698 -0
- detectkit/utils/__init__.py +1 -0
- detectkit-0.1.0.dist-info/METADATA +231 -0
- detectkit-0.1.0.dist-info/RECORD +49 -0
- detectkit-0.1.0.dist-info/WHEEL +5 -0
- detectkit-0.1.0.dist-info/entry_points.txt +2 -0
- detectkit-0.1.0.dist-info/licenses/LICENSE +21 -0
- detectkit-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Detector factory for creating detector instances from configuration.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import Dict, List
|
|
6
|
+
|
|
7
|
+
from detectkit.detectors.base import BaseDetector
|
|
8
|
+
from detectkit.detectors.statistical.iqr import IQRDetector
|
|
9
|
+
from detectkit.detectors.statistical.mad import MADDetector
|
|
10
|
+
from detectkit.detectors.statistical.manual_bounds import ManualBoundsDetector
|
|
11
|
+
from detectkit.detectors.statistical.zscore import ZScoreDetector
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class DetectorFactory:
|
|
15
|
+
"""
|
|
16
|
+
Factory for creating detector instances from configuration.
|
|
17
|
+
|
|
18
|
+
Supports creating detectors by type name with parameters.
|
|
19
|
+
|
|
20
|
+
Example:
|
|
21
|
+
>>> factory = DetectorFactory()
|
|
22
|
+
>>> detector = factory.create("zscore", {"threshold": 3.0})
|
|
23
|
+
>>> isinstance(detector, ZScoreDetector)
|
|
24
|
+
True
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
# Registry of available detector types
|
|
28
|
+
DETECTOR_TYPES = {
|
|
29
|
+
"zscore": ZScoreDetector,
|
|
30
|
+
"mad": MADDetector,
|
|
31
|
+
"iqr": IQRDetector,
|
|
32
|
+
"manual_bounds": ManualBoundsDetector,
|
|
33
|
+
"manual": ManualBoundsDetector, # Alias
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
@classmethod
|
|
37
|
+
def create(cls, detector_type: str, params: Dict = None) -> BaseDetector:
|
|
38
|
+
"""
|
|
39
|
+
Create detector instance from type and parameters.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
detector_type: Type of detector (e.g., "zscore", "mad")
|
|
43
|
+
params: Detector parameters (optional)
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
Detector instance
|
|
47
|
+
|
|
48
|
+
Raises:
|
|
49
|
+
ValueError: If detector type is unknown
|
|
50
|
+
|
|
51
|
+
Example:
|
|
52
|
+
>>> detector = DetectorFactory.create("zscore", {"threshold": 3.0, "window_size": 100})
|
|
53
|
+
>>> detector = DetectorFactory.create("mad", {"threshold": 2.5})
|
|
54
|
+
"""
|
|
55
|
+
params = params or {}
|
|
56
|
+
|
|
57
|
+
detector_type = detector_type.lower()
|
|
58
|
+
|
|
59
|
+
if detector_type not in cls.DETECTOR_TYPES:
|
|
60
|
+
available = ", ".join(sorted(cls.DETECTOR_TYPES.keys()))
|
|
61
|
+
raise ValueError(
|
|
62
|
+
f"Unknown detector type: '{detector_type}'. "
|
|
63
|
+
f"Available types: {available}"
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
detector_class = cls.DETECTOR_TYPES[detector_type]
|
|
67
|
+
|
|
68
|
+
try:
|
|
69
|
+
return detector_class(**params)
|
|
70
|
+
except TypeError as e:
|
|
71
|
+
raise ValueError(
|
|
72
|
+
f"Invalid parameters for {detector_type} detector: {e}"
|
|
73
|
+
) from e
|
|
74
|
+
|
|
75
|
+
@classmethod
|
|
76
|
+
def create_from_config(cls, detector_config: Dict) -> BaseDetector:
|
|
77
|
+
"""
|
|
78
|
+
Create detector from configuration dictionary.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
detector_config: Configuration with 'type' and optional 'params'
|
|
82
|
+
Example: {"type": "zscore", "params": {"threshold": 3.0}}
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
Detector instance
|
|
86
|
+
|
|
87
|
+
Example:
|
|
88
|
+
>>> config = {"type": "zscore", "params": {"threshold": 3.0}}
|
|
89
|
+
>>> detector = DetectorFactory.create_from_config(config)
|
|
90
|
+
"""
|
|
91
|
+
detector_type = detector_config.get("type")
|
|
92
|
+
if not detector_type:
|
|
93
|
+
raise ValueError("Detector config must have 'type' field")
|
|
94
|
+
|
|
95
|
+
params = detector_config.get("params", {})
|
|
96
|
+
|
|
97
|
+
return cls.create(detector_type, params)
|
|
98
|
+
|
|
99
|
+
@classmethod
|
|
100
|
+
def create_multiple(cls, detector_configs: List[Dict]) -> List[BaseDetector]:
|
|
101
|
+
"""
|
|
102
|
+
Create multiple detectors from list of configurations.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
detector_configs: List of detector configurations
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
List of detector instances
|
|
109
|
+
|
|
110
|
+
Example:
|
|
111
|
+
>>> configs = [
|
|
112
|
+
... {"type": "zscore", "params": {"threshold": 3.0}},
|
|
113
|
+
... {"type": "mad", "params": {"threshold": 2.5}},
|
|
114
|
+
... ]
|
|
115
|
+
>>> detectors = DetectorFactory.create_multiple(configs)
|
|
116
|
+
>>> len(detectors)
|
|
117
|
+
2
|
|
118
|
+
"""
|
|
119
|
+
detectors = []
|
|
120
|
+
for config in detector_configs:
|
|
121
|
+
detector = cls.create_from_config(config)
|
|
122
|
+
detectors.append(detector)
|
|
123
|
+
return detectors
|
|
124
|
+
|
|
125
|
+
@classmethod
|
|
126
|
+
def list_available_types(cls) -> List[str]:
|
|
127
|
+
"""
|
|
128
|
+
Get list of available detector types.
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
List of detector type names
|
|
132
|
+
|
|
133
|
+
Example:
|
|
134
|
+
>>> types = DetectorFactory.list_available_types()
|
|
135
|
+
>>> "zscore" in types
|
|
136
|
+
True
|
|
137
|
+
"""
|
|
138
|
+
return sorted(cls.DETECTOR_TYPES.keys())
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"""Statistical anomaly detectors."""
|
|
2
|
+
|
|
3
|
+
from detectkit.detectors.statistical.iqr import IQRDetector
|
|
4
|
+
from detectkit.detectors.statistical.mad import MADDetector
|
|
5
|
+
from detectkit.detectors.statistical.manual_bounds import ManualBoundsDetector
|
|
6
|
+
from detectkit.detectors.statistical.zscore import ZScoreDetector
|
|
7
|
+
|
|
8
|
+
__all__ = ["IQRDetector", "MADDetector", "ManualBoundsDetector", "ZScoreDetector"]
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Interquartile Range (IQR) anomaly detector.
|
|
3
|
+
|
|
4
|
+
IQR is a robust statistical method for outlier detection that:
|
|
5
|
+
- Uses quartiles (Q1, Q3) instead of mean
|
|
6
|
+
- Measures spread using IQR = Q3 - Q1
|
|
7
|
+
- Less sensitive to outliers than Z-Score
|
|
8
|
+
- Similar robustness to MAD
|
|
9
|
+
|
|
10
|
+
Formula:
|
|
11
|
+
- Q1 = 25th percentile
|
|
12
|
+
- Q3 = 75th percentile
|
|
13
|
+
- IQR = Q3 - Q1
|
|
14
|
+
- lower_bound = Q1 - threshold × IQR
|
|
15
|
+
- upper_bound = Q3 + threshold × IQR
|
|
16
|
+
|
|
17
|
+
Default threshold = 1.5 (standard Tukey's fences)
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from typing import Any, Dict
|
|
21
|
+
|
|
22
|
+
import numpy as np
|
|
23
|
+
|
|
24
|
+
from detectkit.detectors.base import BaseDetector, DetectionResult
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class IQRDetector(BaseDetector):
|
|
28
|
+
"""
|
|
29
|
+
Interquartile Range (IQR) detector for anomaly detection.
|
|
30
|
+
|
|
31
|
+
Detects anomalies using Tukey's fences method based on quartiles.
|
|
32
|
+
This is a robust method that works well with skewed distributions.
|
|
33
|
+
|
|
34
|
+
Parameters:
|
|
35
|
+
threshold (float): IQR multiplier for bounds (default: 1.5)
|
|
36
|
+
- 1.5 is standard Tukey's fences (identifies outliers)
|
|
37
|
+
- 3.0 identifies extreme outliers
|
|
38
|
+
- Higher = less sensitive (fewer anomalies)
|
|
39
|
+
- Lower = more sensitive (more anomalies)
|
|
40
|
+
|
|
41
|
+
window_size (int): Historical window size in points (default: 100)
|
|
42
|
+
- Uses last N points to compute statistics
|
|
43
|
+
- Larger = more stable but less responsive
|
|
44
|
+
- Smaller = more responsive but less stable
|
|
45
|
+
|
|
46
|
+
min_samples (int): Minimum samples required for detection (default: 30)
|
|
47
|
+
- Skip detection if window has fewer valid points
|
|
48
|
+
- Ensures statistical reliability
|
|
49
|
+
|
|
50
|
+
Example:
|
|
51
|
+
>>> detector = IQRDetector(threshold=1.5, window_size=100)
|
|
52
|
+
>>> results = detector.detect(data)
|
|
53
|
+
>>> for r in results:
|
|
54
|
+
... if r.is_anomaly:
|
|
55
|
+
... print(f"Anomaly: {r.value} outside [{r.confidence_lower}, {r.confidence_upper}]")
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
def __init__(
|
|
59
|
+
self,
|
|
60
|
+
threshold: float = 1.5,
|
|
61
|
+
window_size: int = 100,
|
|
62
|
+
min_samples: int = 30,
|
|
63
|
+
):
|
|
64
|
+
"""Initialize IQR detector with parameters."""
|
|
65
|
+
super().__init__(
|
|
66
|
+
threshold=threshold,
|
|
67
|
+
window_size=window_size,
|
|
68
|
+
min_samples=min_samples,
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
def _validate_params(self):
|
|
72
|
+
"""Validate detector parameters."""
|
|
73
|
+
threshold = self.params.get("threshold")
|
|
74
|
+
if threshold is None or threshold <= 0:
|
|
75
|
+
raise ValueError("threshold must be positive")
|
|
76
|
+
|
|
77
|
+
window_size = self.params.get("window_size")
|
|
78
|
+
if window_size is None or window_size < 1:
|
|
79
|
+
raise ValueError("window_size must be at least 1")
|
|
80
|
+
|
|
81
|
+
min_samples = self.params.get("min_samples")
|
|
82
|
+
if min_samples is None or min_samples < 4:
|
|
83
|
+
raise ValueError("min_samples must be at least 4 (for quartiles)")
|
|
84
|
+
|
|
85
|
+
if min_samples > window_size:
|
|
86
|
+
raise ValueError("min_samples cannot exceed window_size")
|
|
87
|
+
|
|
88
|
+
def detect(self, data: Dict[str, np.ndarray]) -> list[DetectionResult]:
|
|
89
|
+
"""
|
|
90
|
+
Perform IQR-based anomaly detection.
|
|
91
|
+
|
|
92
|
+
For each point, uses historical window to compute:
|
|
93
|
+
1. Q1 = 25th percentile of window
|
|
94
|
+
2. Q3 = 75th percentile of window
|
|
95
|
+
3. IQR = Q3 - Q1
|
|
96
|
+
4. lower_bound = Q1 - threshold × IQR
|
|
97
|
+
5. upper_bound = Q3 + threshold × IQR
|
|
98
|
+
6. is_anomaly = value outside [lower_bound, upper_bound]
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
data: Dictionary with keys:
|
|
102
|
+
- timestamp: np.array of datetime64[ms]
|
|
103
|
+
- value: np.array of float64 (may contain NaN)
|
|
104
|
+
- seasonality_data: np.array of JSON strings (not used yet)
|
|
105
|
+
- seasonality_columns: list of column names (not used yet)
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
List of DetectionResult for each point
|
|
109
|
+
|
|
110
|
+
Notes:
|
|
111
|
+
- NaN values are skipped (marked as non-anomalous)
|
|
112
|
+
- First min_samples-1 points are skipped (insufficient history)
|
|
113
|
+
- Uses linear interpolation for percentile calculation
|
|
114
|
+
- Seasonality support will be added in future versions
|
|
115
|
+
"""
|
|
116
|
+
timestamps = data["timestamp"]
|
|
117
|
+
values = data["value"]
|
|
118
|
+
threshold = self.params["threshold"]
|
|
119
|
+
window_size = self.params["window_size"]
|
|
120
|
+
min_samples = self.params["min_samples"]
|
|
121
|
+
|
|
122
|
+
results = []
|
|
123
|
+
n_points = len(timestamps)
|
|
124
|
+
|
|
125
|
+
for i in range(n_points):
|
|
126
|
+
current_val = values[i]
|
|
127
|
+
current_ts = timestamps[i]
|
|
128
|
+
|
|
129
|
+
# Skip NaN values
|
|
130
|
+
if np.isnan(current_val):
|
|
131
|
+
results.append(
|
|
132
|
+
DetectionResult(
|
|
133
|
+
timestamp=current_ts,
|
|
134
|
+
value=current_val,
|
|
135
|
+
is_anomaly=False,
|
|
136
|
+
detection_metadata={"reason": "missing_data"},
|
|
137
|
+
)
|
|
138
|
+
)
|
|
139
|
+
continue
|
|
140
|
+
|
|
141
|
+
# Get historical window (not including current point)
|
|
142
|
+
window_start = max(0, i - window_size)
|
|
143
|
+
window_values = values[window_start:i]
|
|
144
|
+
|
|
145
|
+
# Filter out NaN values from window
|
|
146
|
+
window_valid = window_values[~np.isnan(window_values)]
|
|
147
|
+
|
|
148
|
+
# Check if we have enough samples
|
|
149
|
+
if len(window_valid) < min_samples:
|
|
150
|
+
results.append(
|
|
151
|
+
DetectionResult(
|
|
152
|
+
timestamp=current_ts,
|
|
153
|
+
value=current_val,
|
|
154
|
+
is_anomaly=False,
|
|
155
|
+
detection_metadata={
|
|
156
|
+
"reason": "insufficient_data",
|
|
157
|
+
"window_size": int(len(window_valid)),
|
|
158
|
+
"min_samples": min_samples,
|
|
159
|
+
},
|
|
160
|
+
)
|
|
161
|
+
)
|
|
162
|
+
continue
|
|
163
|
+
|
|
164
|
+
# Compute IQR statistics
|
|
165
|
+
q1 = np.percentile(window_valid, 25)
|
|
166
|
+
q3 = np.percentile(window_valid, 75)
|
|
167
|
+
iqr = q3 - q1
|
|
168
|
+
|
|
169
|
+
# Handle edge case: IQR = 0 (all values in same range)
|
|
170
|
+
if iqr == 0:
|
|
171
|
+
# Use Q1/Q3 with small epsilon
|
|
172
|
+
# If no spread, any value outside Q1-Q3 is anomalous
|
|
173
|
+
confidence_lower = q1 - 1e-10
|
|
174
|
+
confidence_upper = q3 + 1e-10
|
|
175
|
+
else:
|
|
176
|
+
confidence_lower = q1 - threshold * iqr
|
|
177
|
+
confidence_upper = q3 + threshold * iqr
|
|
178
|
+
|
|
179
|
+
# Check if current value is anomalous
|
|
180
|
+
is_anomaly = (current_val < confidence_lower) or (current_val > confidence_upper)
|
|
181
|
+
|
|
182
|
+
# Determine direction and severity
|
|
183
|
+
metadata = {
|
|
184
|
+
"q1": float(q1),
|
|
185
|
+
"q3": float(q3),
|
|
186
|
+
"iqr": float(iqr),
|
|
187
|
+
"window_size": int(len(window_valid)),
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
if is_anomaly:
|
|
191
|
+
if current_val < confidence_lower:
|
|
192
|
+
direction = "below"
|
|
193
|
+
distance = confidence_lower - current_val
|
|
194
|
+
else:
|
|
195
|
+
direction = "above"
|
|
196
|
+
distance = current_val - confidence_upper
|
|
197
|
+
|
|
198
|
+
# Severity: how many IQR units away
|
|
199
|
+
severity = distance / iqr if iqr > 0 else float("inf")
|
|
200
|
+
|
|
201
|
+
metadata.update({
|
|
202
|
+
"direction": direction,
|
|
203
|
+
"severity": float(severity),
|
|
204
|
+
"distance": float(distance),
|
|
205
|
+
})
|
|
206
|
+
|
|
207
|
+
results.append(
|
|
208
|
+
DetectionResult(
|
|
209
|
+
timestamp=current_ts,
|
|
210
|
+
value=current_val,
|
|
211
|
+
is_anomaly=is_anomaly,
|
|
212
|
+
confidence_lower=float(confidence_lower),
|
|
213
|
+
confidence_upper=float(confidence_upper),
|
|
214
|
+
detection_metadata=metadata,
|
|
215
|
+
)
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
return results
|
|
219
|
+
|
|
220
|
+
def _get_non_default_params(self) -> Dict[str, Any]:
|
|
221
|
+
"""Get parameters that differ from defaults."""
|
|
222
|
+
defaults = {
|
|
223
|
+
"threshold": 1.5,
|
|
224
|
+
"window_size": 100,
|
|
225
|
+
"min_samples": 30,
|
|
226
|
+
}
|
|
227
|
+
return {
|
|
228
|
+
k: v for k, v in self.params.items()
|
|
229
|
+
if v != defaults.get(k)
|
|
230
|
+
}
|