detectkit 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. detectkit/__init__.py +17 -0
  2. detectkit/alerting/__init__.py +13 -0
  3. detectkit/alerting/channels/__init__.py +21 -0
  4. detectkit/alerting/channels/base.py +191 -0
  5. detectkit/alerting/channels/email.py +146 -0
  6. detectkit/alerting/channels/factory.py +193 -0
  7. detectkit/alerting/channels/mattermost.py +53 -0
  8. detectkit/alerting/channels/slack.py +55 -0
  9. detectkit/alerting/channels/telegram.py +110 -0
  10. detectkit/alerting/channels/webhook.py +139 -0
  11. detectkit/alerting/orchestrator.py +368 -0
  12. detectkit/cli/__init__.py +1 -0
  13. detectkit/cli/commands/__init__.py +1 -0
  14. detectkit/cli/commands/init.py +282 -0
  15. detectkit/cli/commands/run.py +427 -0
  16. detectkit/cli/commands/test_alert.py +184 -0
  17. detectkit/cli/main.py +186 -0
  18. detectkit/config/__init__.py +30 -0
  19. detectkit/config/metric_config.py +467 -0
  20. detectkit/config/profile.py +285 -0
  21. detectkit/config/project_config.py +164 -0
  22. detectkit/core/__init__.py +6 -0
  23. detectkit/core/interval.py +132 -0
  24. detectkit/core/models.py +106 -0
  25. detectkit/database/__init__.py +27 -0
  26. detectkit/database/clickhouse_manager.py +385 -0
  27. detectkit/database/internal_tables.py +581 -0
  28. detectkit/database/manager.py +324 -0
  29. detectkit/database/tables.py +134 -0
  30. detectkit/detectors/__init__.py +6 -0
  31. detectkit/detectors/base.py +222 -0
  32. detectkit/detectors/factory.py +138 -0
  33. detectkit/detectors/statistical/__init__.py +8 -0
  34. detectkit/detectors/statistical/iqr.py +230 -0
  35. detectkit/detectors/statistical/mad.py +423 -0
  36. detectkit/detectors/statistical/manual_bounds.py +177 -0
  37. detectkit/detectors/statistical/zscore.py +225 -0
  38. detectkit/loaders/__init__.py +6 -0
  39. detectkit/loaders/metric_loader.py +470 -0
  40. detectkit/loaders/query_template.py +164 -0
  41. detectkit/orchestration/__init__.py +9 -0
  42. detectkit/orchestration/task_manager.py +698 -0
  43. detectkit/utils/__init__.py +1 -0
  44. detectkit-0.1.0.dist-info/METADATA +231 -0
  45. detectkit-0.1.0.dist-info/RECORD +49 -0
  46. detectkit-0.1.0.dist-info/WHEEL +5 -0
  47. detectkit-0.1.0.dist-info/entry_points.txt +2 -0
  48. detectkit-0.1.0.dist-info/licenses/LICENSE +21 -0
  49. detectkit-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,138 @@
1
+ """
2
+ Detector factory for creating detector instances from configuration.
3
+ """
4
+
5
+ from typing import Dict, List
6
+
7
+ from detectkit.detectors.base import BaseDetector
8
+ from detectkit.detectors.statistical.iqr import IQRDetector
9
+ from detectkit.detectors.statistical.mad import MADDetector
10
+ from detectkit.detectors.statistical.manual_bounds import ManualBoundsDetector
11
+ from detectkit.detectors.statistical.zscore import ZScoreDetector
12
+
13
+
14
+ class DetectorFactory:
15
+ """
16
+ Factory for creating detector instances from configuration.
17
+
18
+ Supports creating detectors by type name with parameters.
19
+
20
+ Example:
21
+ >>> factory = DetectorFactory()
22
+ >>> detector = factory.create("zscore", {"threshold": 3.0})
23
+ >>> isinstance(detector, ZScoreDetector)
24
+ True
25
+ """
26
+
27
+ # Registry of available detector types
28
+ DETECTOR_TYPES = {
29
+ "zscore": ZScoreDetector,
30
+ "mad": MADDetector,
31
+ "iqr": IQRDetector,
32
+ "manual_bounds": ManualBoundsDetector,
33
+ "manual": ManualBoundsDetector, # Alias
34
+ }
35
+
36
+ @classmethod
37
+ def create(cls, detector_type: str, params: Dict = None) -> BaseDetector:
38
+ """
39
+ Create detector instance from type and parameters.
40
+
41
+ Args:
42
+ detector_type: Type of detector (e.g., "zscore", "mad")
43
+ params: Detector parameters (optional)
44
+
45
+ Returns:
46
+ Detector instance
47
+
48
+ Raises:
49
+ ValueError: If detector type is unknown
50
+
51
+ Example:
52
+ >>> detector = DetectorFactory.create("zscore", {"threshold": 3.0, "window_size": 100})
53
+ >>> detector = DetectorFactory.create("mad", {"threshold": 2.5})
54
+ """
55
+ params = params or {}
56
+
57
+ detector_type = detector_type.lower()
58
+
59
+ if detector_type not in cls.DETECTOR_TYPES:
60
+ available = ", ".join(sorted(cls.DETECTOR_TYPES.keys()))
61
+ raise ValueError(
62
+ f"Unknown detector type: '{detector_type}'. "
63
+ f"Available types: {available}"
64
+ )
65
+
66
+ detector_class = cls.DETECTOR_TYPES[detector_type]
67
+
68
+ try:
69
+ return detector_class(**params)
70
+ except TypeError as e:
71
+ raise ValueError(
72
+ f"Invalid parameters for {detector_type} detector: {e}"
73
+ ) from e
74
+
75
+ @classmethod
76
+ def create_from_config(cls, detector_config: Dict) -> BaseDetector:
77
+ """
78
+ Create detector from configuration dictionary.
79
+
80
+ Args:
81
+ detector_config: Configuration with 'type' and optional 'params'
82
+ Example: {"type": "zscore", "params": {"threshold": 3.0}}
83
+
84
+ Returns:
85
+ Detector instance
86
+
87
+ Example:
88
+ >>> config = {"type": "zscore", "params": {"threshold": 3.0}}
89
+ >>> detector = DetectorFactory.create_from_config(config)
90
+ """
91
+ detector_type = detector_config.get("type")
92
+ if not detector_type:
93
+ raise ValueError("Detector config must have 'type' field")
94
+
95
+ params = detector_config.get("params", {})
96
+
97
+ return cls.create(detector_type, params)
98
+
99
+ @classmethod
100
+ def create_multiple(cls, detector_configs: List[Dict]) -> List[BaseDetector]:
101
+ """
102
+ Create multiple detectors from list of configurations.
103
+
104
+ Args:
105
+ detector_configs: List of detector configurations
106
+
107
+ Returns:
108
+ List of detector instances
109
+
110
+ Example:
111
+ >>> configs = [
112
+ ... {"type": "zscore", "params": {"threshold": 3.0}},
113
+ ... {"type": "mad", "params": {"threshold": 2.5}},
114
+ ... ]
115
+ >>> detectors = DetectorFactory.create_multiple(configs)
116
+ >>> len(detectors)
117
+ 2
118
+ """
119
+ detectors = []
120
+ for config in detector_configs:
121
+ detector = cls.create_from_config(config)
122
+ detectors.append(detector)
123
+ return detectors
124
+
125
+ @classmethod
126
+ def list_available_types(cls) -> List[str]:
127
+ """
128
+ Get list of available detector types.
129
+
130
+ Returns:
131
+ List of detector type names
132
+
133
+ Example:
134
+ >>> types = DetectorFactory.list_available_types()
135
+ >>> "zscore" in types
136
+ True
137
+ """
138
+ return sorted(cls.DETECTOR_TYPES.keys())
@@ -0,0 +1,8 @@
1
+ """Statistical anomaly detectors."""
2
+
3
+ from detectkit.detectors.statistical.iqr import IQRDetector
4
+ from detectkit.detectors.statistical.mad import MADDetector
5
+ from detectkit.detectors.statistical.manual_bounds import ManualBoundsDetector
6
+ from detectkit.detectors.statistical.zscore import ZScoreDetector
7
+
8
+ __all__ = ["IQRDetector", "MADDetector", "ManualBoundsDetector", "ZScoreDetector"]
@@ -0,0 +1,230 @@
1
+ """
2
+ Interquartile Range (IQR) anomaly detector.
3
+
4
+ IQR is a robust statistical method for outlier detection that:
5
+ - Uses quartiles (Q1, Q3) instead of mean
6
+ - Measures spread using IQR = Q3 - Q1
7
+ - Less sensitive to outliers than Z-Score
8
+ - Similar robustness to MAD
9
+
10
+ Formula:
11
+ - Q1 = 25th percentile
12
+ - Q3 = 75th percentile
13
+ - IQR = Q3 - Q1
14
+ - lower_bound = Q1 - threshold × IQR
15
+ - upper_bound = Q3 + threshold × IQR
16
+
17
+ Default threshold = 1.5 (standard Tukey's fences)
18
+ """
19
+
20
+ from typing import Any, Dict
21
+
22
+ import numpy as np
23
+
24
+ from detectkit.detectors.base import BaseDetector, DetectionResult
25
+
26
+
27
+ class IQRDetector(BaseDetector):
28
+ """
29
+ Interquartile Range (IQR) detector for anomaly detection.
30
+
31
+ Detects anomalies using Tukey's fences method based on quartiles.
32
+ This is a robust method that works well with skewed distributions.
33
+
34
+ Parameters:
35
+ threshold (float): IQR multiplier for bounds (default: 1.5)
36
+ - 1.5 is standard Tukey's fences (identifies outliers)
37
+ - 3.0 identifies extreme outliers
38
+ - Higher = less sensitive (fewer anomalies)
39
+ - Lower = more sensitive (more anomalies)
40
+
41
+ window_size (int): Historical window size in points (default: 100)
42
+ - Uses last N points to compute statistics
43
+ - Larger = more stable but less responsive
44
+ - Smaller = more responsive but less stable
45
+
46
+ min_samples (int): Minimum samples required for detection (default: 30)
47
+ - Skip detection if window has fewer valid points
48
+ - Ensures statistical reliability
49
+
50
+ Example:
51
+ >>> detector = IQRDetector(threshold=1.5, window_size=100)
52
+ >>> results = detector.detect(data)
53
+ >>> for r in results:
54
+ ... if r.is_anomaly:
55
+ ... print(f"Anomaly: {r.value} outside [{r.confidence_lower}, {r.confidence_upper}]")
56
+ """
57
+
58
+ def __init__(
59
+ self,
60
+ threshold: float = 1.5,
61
+ window_size: int = 100,
62
+ min_samples: int = 30,
63
+ ):
64
+ """Initialize IQR detector with parameters."""
65
+ super().__init__(
66
+ threshold=threshold,
67
+ window_size=window_size,
68
+ min_samples=min_samples,
69
+ )
70
+
71
+ def _validate_params(self):
72
+ """Validate detector parameters."""
73
+ threshold = self.params.get("threshold")
74
+ if threshold is None or threshold <= 0:
75
+ raise ValueError("threshold must be positive")
76
+
77
+ window_size = self.params.get("window_size")
78
+ if window_size is None or window_size < 1:
79
+ raise ValueError("window_size must be at least 1")
80
+
81
+ min_samples = self.params.get("min_samples")
82
+ if min_samples is None or min_samples < 4:
83
+ raise ValueError("min_samples must be at least 4 (for quartiles)")
84
+
85
+ if min_samples > window_size:
86
+ raise ValueError("min_samples cannot exceed window_size")
87
+
88
+ def detect(self, data: Dict[str, np.ndarray]) -> list[DetectionResult]:
89
+ """
90
+ Perform IQR-based anomaly detection.
91
+
92
+ For each point, uses historical window to compute:
93
+ 1. Q1 = 25th percentile of window
94
+ 2. Q3 = 75th percentile of window
95
+ 3. IQR = Q3 - Q1
96
+ 4. lower_bound = Q1 - threshold × IQR
97
+ 5. upper_bound = Q3 + threshold × IQR
98
+ 6. is_anomaly = value outside [lower_bound, upper_bound]
99
+
100
+ Args:
101
+ data: Dictionary with keys:
102
+ - timestamp: np.array of datetime64[ms]
103
+ - value: np.array of float64 (may contain NaN)
104
+ - seasonality_data: np.array of JSON strings (not used yet)
105
+ - seasonality_columns: list of column names (not used yet)
106
+
107
+ Returns:
108
+ List of DetectionResult for each point
109
+
110
+ Notes:
111
+ - NaN values are skipped (marked as non-anomalous)
112
+ - First min_samples-1 points are skipped (insufficient history)
113
+ - Uses linear interpolation for percentile calculation
114
+ - Seasonality support will be added in future versions
115
+ """
116
+ timestamps = data["timestamp"]
117
+ values = data["value"]
118
+ threshold = self.params["threshold"]
119
+ window_size = self.params["window_size"]
120
+ min_samples = self.params["min_samples"]
121
+
122
+ results = []
123
+ n_points = len(timestamps)
124
+
125
+ for i in range(n_points):
126
+ current_val = values[i]
127
+ current_ts = timestamps[i]
128
+
129
+ # Skip NaN values
130
+ if np.isnan(current_val):
131
+ results.append(
132
+ DetectionResult(
133
+ timestamp=current_ts,
134
+ value=current_val,
135
+ is_anomaly=False,
136
+ detection_metadata={"reason": "missing_data"},
137
+ )
138
+ )
139
+ continue
140
+
141
+ # Get historical window (not including current point)
142
+ window_start = max(0, i - window_size)
143
+ window_values = values[window_start:i]
144
+
145
+ # Filter out NaN values from window
146
+ window_valid = window_values[~np.isnan(window_values)]
147
+
148
+ # Check if we have enough samples
149
+ if len(window_valid) < min_samples:
150
+ results.append(
151
+ DetectionResult(
152
+ timestamp=current_ts,
153
+ value=current_val,
154
+ is_anomaly=False,
155
+ detection_metadata={
156
+ "reason": "insufficient_data",
157
+ "window_size": int(len(window_valid)),
158
+ "min_samples": min_samples,
159
+ },
160
+ )
161
+ )
162
+ continue
163
+
164
+ # Compute IQR statistics
165
+ q1 = np.percentile(window_valid, 25)
166
+ q3 = np.percentile(window_valid, 75)
167
+ iqr = q3 - q1
168
+
169
+ # Handle edge case: IQR = 0 (all values in same range)
170
+ if iqr == 0:
171
+ # Use Q1/Q3 with small epsilon
172
+ # If no spread, any value outside Q1-Q3 is anomalous
173
+ confidence_lower = q1 - 1e-10
174
+ confidence_upper = q3 + 1e-10
175
+ else:
176
+ confidence_lower = q1 - threshold * iqr
177
+ confidence_upper = q3 + threshold * iqr
178
+
179
+ # Check if current value is anomalous
180
+ is_anomaly = (current_val < confidence_lower) or (current_val > confidence_upper)
181
+
182
+ # Determine direction and severity
183
+ metadata = {
184
+ "q1": float(q1),
185
+ "q3": float(q3),
186
+ "iqr": float(iqr),
187
+ "window_size": int(len(window_valid)),
188
+ }
189
+
190
+ if is_anomaly:
191
+ if current_val < confidence_lower:
192
+ direction = "below"
193
+ distance = confidence_lower - current_val
194
+ else:
195
+ direction = "above"
196
+ distance = current_val - confidence_upper
197
+
198
+ # Severity: how many IQR units away
199
+ severity = distance / iqr if iqr > 0 else float("inf")
200
+
201
+ metadata.update({
202
+ "direction": direction,
203
+ "severity": float(severity),
204
+ "distance": float(distance),
205
+ })
206
+
207
+ results.append(
208
+ DetectionResult(
209
+ timestamp=current_ts,
210
+ value=current_val,
211
+ is_anomaly=is_anomaly,
212
+ confidence_lower=float(confidence_lower),
213
+ confidence_upper=float(confidence_upper),
214
+ detection_metadata=metadata,
215
+ )
216
+ )
217
+
218
+ return results
219
+
220
+ def _get_non_default_params(self) -> Dict[str, Any]:
221
+ """Get parameters that differ from defaults."""
222
+ defaults = {
223
+ "threshold": 1.5,
224
+ "window_size": 100,
225
+ "min_samples": 30,
226
+ }
227
+ return {
228
+ k: v for k, v in self.params.items()
229
+ if v != defaults.get(k)
230
+ }