detectkit 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. detectkit/__init__.py +17 -0
  2. detectkit/alerting/__init__.py +13 -0
  3. detectkit/alerting/channels/__init__.py +21 -0
  4. detectkit/alerting/channels/base.py +193 -0
  5. detectkit/alerting/channels/email.py +146 -0
  6. detectkit/alerting/channels/factory.py +193 -0
  7. detectkit/alerting/channels/mattermost.py +53 -0
  8. detectkit/alerting/channels/slack.py +55 -0
  9. detectkit/alerting/channels/telegram.py +110 -0
  10. detectkit/alerting/channels/webhook.py +139 -0
  11. detectkit/alerting/orchestrator.py +369 -0
  12. detectkit/cli/__init__.py +1 -0
  13. detectkit/cli/commands/__init__.py +1 -0
  14. detectkit/cli/commands/init.py +282 -0
  15. detectkit/cli/commands/run.py +486 -0
  16. detectkit/cli/commands/test_alert.py +184 -0
  17. detectkit/cli/main.py +186 -0
  18. detectkit/config/__init__.py +30 -0
  19. detectkit/config/metric_config.py +499 -0
  20. detectkit/config/profile.py +285 -0
  21. detectkit/config/project_config.py +164 -0
  22. detectkit/config/validator.py +124 -0
  23. detectkit/core/__init__.py +6 -0
  24. detectkit/core/interval.py +132 -0
  25. detectkit/core/models.py +106 -0
  26. detectkit/database/__init__.py +27 -0
  27. detectkit/database/clickhouse_manager.py +393 -0
  28. detectkit/database/internal_tables.py +724 -0
  29. detectkit/database/manager.py +324 -0
  30. detectkit/database/tables.py +138 -0
  31. detectkit/detectors/__init__.py +6 -0
  32. detectkit/detectors/base.py +441 -0
  33. detectkit/detectors/factory.py +138 -0
  34. detectkit/detectors/statistical/__init__.py +8 -0
  35. detectkit/detectors/statistical/iqr.py +508 -0
  36. detectkit/detectors/statistical/mad.py +478 -0
  37. detectkit/detectors/statistical/manual_bounds.py +206 -0
  38. detectkit/detectors/statistical/zscore.py +491 -0
  39. detectkit/loaders/__init__.py +6 -0
  40. detectkit/loaders/metric_loader.py +470 -0
  41. detectkit/loaders/query_template.py +164 -0
  42. detectkit/orchestration/__init__.py +9 -0
  43. detectkit/orchestration/task_manager.py +746 -0
  44. detectkit/utils/__init__.py +17 -0
  45. detectkit/utils/stats.py +196 -0
  46. detectkit-0.2.4.dist-info/METADATA +237 -0
  47. detectkit-0.2.4.dist-info/RECORD +51 -0
  48. detectkit-0.2.4.dist-info/WHEEL +5 -0
  49. detectkit-0.2.4.dist-info/entry_points.txt +2 -0
  50. detectkit-0.2.4.dist-info/licenses/LICENSE +21 -0
  51. detectkit-0.2.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,508 @@
1
+ """
2
+ Interquartile Range (IQR) anomaly detector.
3
+
4
+ IQR is a robust statistical method for outlier detection that:
5
+ - Uses quartiles (Q1, Q3) instead of mean
6
+ - Measures spread using IQR = Q3 - Q1
7
+ - Less sensitive to outliers than Z-Score
8
+ - Similar robustness to MAD
9
+ - Supports seasonality grouping for adaptive thresholds
10
+
11
+ Formula:
12
+ - Q1 = 25th percentile
13
+ - Q3 = 75th percentile
14
+ - IQR = Q3 - Q1
15
+ - lower_bound = Q1 - threshold × IQR
16
+ - upper_bound = Q3 + threshold × IQR
17
+
18
+ With seasonality:
19
+ - Computes global statistics (entire window)
20
+ - Computes group statistics (seasonality subset)
21
+ - Applies multipliers: adjusted_stat = global_stat × group_multiplier
22
+
23
+ Default threshold = 1.5 (standard Tukey's fences)
24
+ """
25
+
26
+ import json
27
+ from typing import Any, Dict, List, Optional, Union
28
+
29
+ import numpy as np
30
+
31
+ from detectkit.detectors.base import BaseDetector, DetectionResult
32
+
33
+
34
+ class IQRDetector(BaseDetector):
35
+ """
36
+ Interquartile Range (IQR) detector for anomaly detection with seasonality support.
37
+
38
+ Detects anomalies using Tukey's fences method based on quartiles.
39
+ This is a robust method that works well with skewed distributions.
40
+
41
+ Parameters:
42
+ threshold (float): IQR multiplier for bounds (default: 1.5)
43
+ - 1.5 is standard Tukey's fences (identifies outliers)
44
+ - 3.0 identifies extreme outliers
45
+ - Higher = less sensitive (fewer anomalies)
46
+ - Lower = more sensitive (more anomalies)
47
+
48
+ window_size (int): Historical window size in points (default: 100)
49
+ - Uses last N points to compute statistics
50
+ - Larger = more stable but less responsive
51
+ - Smaller = more responsive but less stable
52
+
53
+ min_samples (int): Minimum samples required for detection (default: 30)
54
+ - Skip detection if window has fewer valid points
55
+ - Ensures statistical reliability
56
+
57
+ seasonality_components (list, optional): List of seasonality groupings
58
+ - Single component: ["hour_of_day"]
59
+ - Multiple separate: ["hour_of_day", "day_of_week"]
60
+ - Combined group: [["hour_of_day", "day_of_week"]]
61
+ - Enables adaptive confidence intervals per seasonality pattern
62
+
63
+ min_samples_per_group (int): Minimum samples per seasonality group (default: 4)
64
+ - Groups with fewer samples use global statistics
65
+ - Needs at least 4 for quartile calculation
66
+
67
+ Example:
68
+ >>> # Without seasonality
69
+ >>> detector = IQRDetector(threshold=1.5, window_size=100)
70
+ >>> results = detector.detect(data)
71
+
72
+ >>> # With seasonality
73
+ >>> detector = IQRDetector(
74
+ ... threshold=1.5,
75
+ ... window_size=2016,
76
+ ... seasonality_components=["hour_of_day", "day_of_week"]
77
+ ... )
78
+ >>> results = detector.detect(data)
79
+ """
80
+
81
+ def __init__(
82
+ self,
83
+ threshold: float = 1.5,
84
+ window_size: int = 100,
85
+ min_samples: int = 30,
86
+ seasonality_components: Optional[List[Union[str, List[str]]]] = None,
87
+ min_samples_per_group: int = 4,
88
+ input_type: str = "values",
89
+ smoothing: Optional[str] = None,
90
+ smoothing_alpha: float = 0.3,
91
+ smoothing_window: int = 10,
92
+ window_weights: Optional[str] = None,
93
+ weight_decay: float = 0.95,
94
+ ):
95
+ """
96
+ Initialize IQR detector with parameters.
97
+
98
+ Args:
99
+ threshold: IQR multiplier for bounds
100
+ window_size: Historical window size in points
101
+ min_samples: Minimum total samples required
102
+ seasonality_components: Optional list of seasonality groups
103
+ min_samples_per_group: Minimum samples per seasonality group
104
+ input_type: Input transformation type (values, changes, absolute_changes, log_changes)
105
+ smoothing: Smoothing method (None, ema, sma)
106
+ smoothing_alpha: EMA smoothing factor (0 < alpha <= 1)
107
+ smoothing_window: SMA window size
108
+ window_weights: Weighting method (None, exponential, linear)
109
+ weight_decay: Decay factor for exponential weights (0 < decay < 1)
110
+ """
111
+ super().__init__(
112
+ threshold=threshold,
113
+ window_size=window_size,
114
+ min_samples=min_samples,
115
+ seasonality_components=seasonality_components,
116
+ min_samples_per_group=min_samples_per_group,
117
+ input_type=input_type,
118
+ smoothing=smoothing,
119
+ smoothing_alpha=smoothing_alpha,
120
+ smoothing_window=smoothing_window,
121
+ window_weights=window_weights,
122
+ weight_decay=weight_decay,
123
+ )
124
+
125
+ def _validate_params(self):
126
+ """Validate detector parameters."""
127
+ threshold = self.params.get("threshold")
128
+ if threshold is None or threshold <= 0:
129
+ raise ValueError("threshold must be positive")
130
+
131
+ window_size = self.params.get("window_size")
132
+ if window_size is None or window_size < 1:
133
+ raise ValueError("window_size must be at least 1")
134
+
135
+ min_samples = self.params.get("min_samples")
136
+ if min_samples is None or min_samples < 4:
137
+ raise ValueError("min_samples must be at least 4 (for quartiles)")
138
+
139
+ if min_samples > window_size:
140
+ raise ValueError("min_samples cannot exceed window_size")
141
+
142
+ min_samples_per_group = self.params.get("min_samples_per_group", 4)
143
+ if min_samples_per_group < 4:
144
+ raise ValueError(
145
+ "min_samples_per_group must be at least 4 (for quartiles)"
146
+ )
147
+
148
+ def _parse_seasonality_data(
149
+ self, seasonality_data: np.ndarray, seasonality_columns: List[str]
150
+ ) -> Dict[str, np.ndarray]:
151
+ """
152
+ Parse seasonality JSON strings into structured data.
153
+
154
+ Args:
155
+ seasonality_data: Array of JSON strings
156
+ seasonality_columns: List of column names
157
+
158
+ Returns:
159
+ Dict with column names as keys, numpy arrays as values
160
+ """
161
+ if len(seasonality_data) == 0:
162
+ return {}
163
+
164
+ parsed_data = {col: [] for col in seasonality_columns}
165
+
166
+ for json_str in seasonality_data:
167
+ if json_str is None or json_str == "{}":
168
+ for col in seasonality_columns:
169
+ parsed_data[col].append(None)
170
+ else:
171
+ try:
172
+ data_dict = json.loads(json_str)
173
+ for col in seasonality_columns:
174
+ parsed_data[col].append(data_dict.get(col))
175
+ except (json.JSONDecodeError, TypeError):
176
+ for col in seasonality_columns:
177
+ parsed_data[col].append(None)
178
+
179
+ return {col: np.array(vals) for col, vals in parsed_data.items()}
180
+
181
+ def _create_seasonality_mask(
182
+ self,
183
+ seasonality_dict: Dict[str, np.ndarray],
184
+ window_start: int,
185
+ current_idx: int,
186
+ group_columns: List[str],
187
+ ) -> np.ndarray:
188
+ """
189
+ Create boolean mask for seasonality group.
190
+
191
+ Args:
192
+ seasonality_dict: Parsed seasonality data
193
+ window_start: Start index of window
194
+ current_idx: Current point index
195
+ group_columns: List of columns to group by
196
+
197
+ Returns:
198
+ Boolean mask for window indices matching current point's seasonality
199
+ """
200
+ if not group_columns or not seasonality_dict:
201
+ window_size = current_idx - window_start
202
+ return np.ones(window_size, dtype=bool)
203
+
204
+ current_values = {}
205
+ for col in group_columns:
206
+ if col in seasonality_dict:
207
+ current_values[col] = seasonality_dict[col][current_idx]
208
+ else:
209
+ return np.ones(current_idx - window_start, dtype=bool)
210
+
211
+ mask = np.ones(current_idx - window_start, dtype=bool)
212
+
213
+ for col in group_columns:
214
+ current_val = current_values[col]
215
+ window_vals = seasonality_dict[col][window_start:current_idx]
216
+ mask &= (window_vals == current_val)
217
+
218
+ return mask
219
+
220
+ def detect(self, data: Dict[str, np.ndarray]) -> list[DetectionResult]:
221
+ """
222
+ Perform IQR-based anomaly detection with optional seasonality support.
223
+
224
+ For each point, uses historical window to compute:
225
+ 1. Global Q1, Q3, IQR (entire window)
226
+ 2. If seasonality configured: group Q1, Q3, IQR (seasonality subset)
227
+ 3. Apply multipliers: adjusted = global × (group / global)
228
+ 4. Build confidence interval: [Q1 - threshold×IQR, Q3 + threshold×IQR]
229
+ 5. Detect anomaly if value outside interval
230
+
231
+ Args:
232
+ data: Dictionary with keys:
233
+ - timestamp: np.array of datetime64[ms]
234
+ - value: np.array of float64 (may contain NaN)
235
+ - seasonality_data: np.array of JSON strings (optional)
236
+ - seasonality_columns: list of column names (optional)
237
+
238
+ Returns:
239
+ List of DetectionResult for each point
240
+
241
+ Notes:
242
+ - NaN values are skipped (marked as non-anomalous)
243
+ - First min_samples-1 points are skipped (insufficient history)
244
+ - Uses linear interpolation for percentile calculation
245
+ - Seasonality grouping creates adaptive confidence intervals
246
+ """
247
+ timestamps = data["timestamp"]
248
+ values = data["value"] # ORIGINAL values (always kept)
249
+ threshold = self.params["threshold"]
250
+ window_size = self.params["window_size"]
251
+ min_samples = self.params["min_samples"]
252
+
253
+ # Seasonality parameters
254
+ seasonality_components = self.params.get("seasonality_components")
255
+ min_samples_per_group = self.params.get("min_samples_per_group", 4)
256
+
257
+ # STEP 0: Preprocessing (smoothing + input_type transformation)
258
+ smoothed_values = self._apply_smoothing(values)
259
+ processed_values = self._preprocess_input(smoothed_values)
260
+
261
+ # Parse seasonality data if available
262
+ seasonality_dict = {}
263
+ seasonality_columns = data.get("seasonality_columns", [])
264
+ seasonality_data = data.get("seasonality_data", np.array([]))
265
+
266
+ if (
267
+ seasonality_components
268
+ and len(seasonality_columns) > 0
269
+ and len(seasonality_data) > 0
270
+ ):
271
+ seasonality_dict = self._parse_seasonality_data(
272
+ seasonality_data, seasonality_columns
273
+ )
274
+
275
+ results = []
276
+ n_points = len(timestamps)
277
+
278
+ for i in range(n_points):
279
+ current_val = values[i] # ORIGINAL value
280
+ current_processed = processed_values[i] # PROCESSED value
281
+ current_ts = timestamps[i]
282
+
283
+ # Skip NaN values (in processed)
284
+ if np.isnan(current_processed):
285
+ results.append(
286
+ DetectionResult(
287
+ timestamp=current_ts,
288
+ value=current_val,
289
+ processed_value=current_processed,
290
+ is_anomaly=False,
291
+ detection_metadata={"reason": "missing_data"},
292
+ )
293
+ )
294
+ continue
295
+
296
+ # Get historical window (not including current point)
297
+ window_start = max(0, i - window_size)
298
+ window_processed = processed_values[window_start:i]
299
+
300
+ # Filter out NaN values from window
301
+ valid_mask = ~np.isnan(window_processed)
302
+ window_valid = window_processed[valid_mask]
303
+
304
+ # Check if we have enough samples
305
+ if len(window_valid) < min_samples:
306
+ results.append(
307
+ DetectionResult(
308
+ timestamp=current_ts,
309
+ value=current_val,
310
+ processed_value=current_processed,
311
+ is_anomaly=False,
312
+ detection_metadata={
313
+ "reason": "insufficient_data",
314
+ "window_size": int(len(window_valid)),
315
+ "min_samples": min_samples,
316
+ },
317
+ )
318
+ )
319
+ continue
320
+
321
+ # Compute weights for window (if specified)
322
+ weights = self._compute_weights(len(window_valid))
323
+
324
+ # STEP 1: Compute GLOBAL statistics (entire window)
325
+ # Use weighted statistics if weights are not uniform
326
+ from detectkit.utils import weighted_percentile
327
+
328
+ global_q1 = weighted_percentile(window_valid, weights, 25)
329
+ global_q3 = weighted_percentile(window_valid, weights, 75)
330
+ global_iqr = global_q3 - global_q1
331
+
332
+ # Initialize adjusted statistics
333
+ adjusted_q1 = global_q1
334
+ adjusted_q3 = global_q3
335
+ adjusted_iqr = global_iqr
336
+
337
+ # STEP 2: Apply seasonality adjustments
338
+ multipliers_applied = []
339
+
340
+ if seasonality_components and seasonality_dict:
341
+ for group in seasonality_components:
342
+ # Convert single string to list
343
+ group_cols = [group] if isinstance(group, str) else group
344
+
345
+ # Create mask for this seasonality group
346
+ season_mask = self._create_seasonality_mask(
347
+ seasonality_dict, window_start, i, group_cols
348
+ )
349
+
350
+ # Apply mask to window (only valid values + seasonality match)
351
+ # Both valid_mask and season_mask are same size as window_processed
352
+ combined_mask = valid_mask & season_mask
353
+
354
+ group_values = window_processed[combined_mask]
355
+
356
+ # Check if enough samples in group
357
+ if len(group_values) < min_samples_per_group:
358
+ # Insufficient data - skip this group (multiplier = 1.0)
359
+ multipliers_applied.append({
360
+ "group": group_cols,
361
+ "q1_multiplier": 1.0,
362
+ "q3_multiplier": 1.0,
363
+ "iqr_multiplier": 1.0,
364
+ "reason": "insufficient_group_data",
365
+ "group_size": int(len(group_values)),
366
+ })
367
+ continue
368
+
369
+ # Compute group statistics with weights
370
+ group_weights = self._compute_weights(len(group_values))
371
+ group_q1 = weighted_percentile(group_values, group_weights, 25)
372
+ group_q3 = weighted_percentile(group_values, group_weights, 75)
373
+ group_iqr = group_q3 - group_q1
374
+
375
+ # Calculate multipliers (avoid division by zero)
376
+ if global_q1 != 0:
377
+ q1_multiplier = group_q1 / global_q1
378
+ else:
379
+ q1_multiplier = 1.0
380
+
381
+ if global_q3 != 0:
382
+ q3_multiplier = group_q3 / global_q3
383
+ else:
384
+ q3_multiplier = 1.0
385
+
386
+ if global_iqr > 0:
387
+ iqr_multiplier = group_iqr / global_iqr
388
+ else:
389
+ iqr_multiplier = 1.0
390
+
391
+ # Apply multipliers
392
+ adjusted_q1 *= q1_multiplier
393
+ adjusted_q3 *= q3_multiplier
394
+ adjusted_iqr *= iqr_multiplier
395
+
396
+ multipliers_applied.append({
397
+ "group": group_cols,
398
+ "q1_multiplier": float(q1_multiplier),
399
+ "q3_multiplier": float(q3_multiplier),
400
+ "iqr_multiplier": float(iqr_multiplier),
401
+ "group_size": int(len(group_values)),
402
+ })
403
+
404
+ # STEP 3: Build confidence interval with adjusted statistics
405
+ if adjusted_iqr == 0:
406
+ # No spread - use small epsilon
407
+ confidence_lower = adjusted_q1 - 1e-10
408
+ confidence_upper = adjusted_q3 + 1e-10
409
+ else:
410
+ confidence_lower = adjusted_q1 - threshold * adjusted_iqr
411
+ confidence_upper = adjusted_q3 + threshold * adjusted_iqr
412
+
413
+ # STEP 4: Check if current PROCESSED value is anomalous
414
+ is_anomaly = (current_processed < confidence_lower) or (
415
+ current_processed > confidence_upper
416
+ )
417
+
418
+ # STEP 5: Compute metadata
419
+ metadata = {
420
+ "global_q1": float(global_q1),
421
+ "global_q3": float(global_q3),
422
+ "global_iqr": float(global_iqr),
423
+ "adjusted_q1": float(adjusted_q1),
424
+ "adjusted_q3": float(adjusted_q3),
425
+ "adjusted_iqr": float(adjusted_iqr),
426
+ "window_size": int(len(window_valid)),
427
+ }
428
+
429
+ # Add preprocessing info if used
430
+ if self.params.get("smoothing") or self.params.get("input_type") != "values":
431
+ metadata["preprocessing"] = {
432
+ "input_type": self.params.get("input_type", "values"),
433
+ "smoothing": self.params.get("smoothing"),
434
+ }
435
+ if self.params.get("smoothing"):
436
+ metadata["preprocessing"]["smoothed_value"] = float(smoothed_values[i])
437
+
438
+ if seasonality_components and multipliers_applied:
439
+ metadata["seasonality_groups"] = multipliers_applied
440
+
441
+ if is_anomaly:
442
+ if current_processed < confidence_lower:
443
+ direction = "below"
444
+ distance = confidence_lower - current_processed
445
+ else:
446
+ direction = "above"
447
+ distance = current_processed - confidence_upper
448
+
449
+ # Severity: how many adjusted IQR units away
450
+ if adjusted_iqr > 0:
451
+ severity = distance / adjusted_iqr
452
+ else:
453
+ severity = float("inf")
454
+
455
+ metadata.update(
456
+ {
457
+ "direction": direction,
458
+ "severity": float(severity),
459
+ "distance": float(distance),
460
+ }
461
+ )
462
+
463
+ results.append(
464
+ DetectionResult(
465
+ timestamp=current_ts,
466
+ value=current_val, # ORIGINAL value
467
+ processed_value=current_processed, # PROCESSED value
468
+ is_anomaly=is_anomaly,
469
+ confidence_lower=float(confidence_lower),
470
+ confidence_upper=float(confidence_upper),
471
+ detection_metadata=metadata,
472
+ )
473
+ )
474
+
475
+ return results
476
+
477
+ def _get_non_default_params(self) -> Dict[str, Any]:
478
+ """
479
+ Get parameters that differ from defaults.
480
+
481
+ Excludes execution parameters (seasonality_components, min_samples_per_group)
482
+ from detector ID hash.
483
+ """
484
+ defaults = {
485
+ "threshold": 1.5,
486
+ "window_size": 100,
487
+ "min_samples": 30,
488
+ "min_samples_per_group": 4,
489
+ "input_type": "values",
490
+ "smoothing": None,
491
+ "smoothing_alpha": 0.3,
492
+ "smoothing_window": 10,
493
+ "window_weights": None,
494
+ "weight_decay": 0.95,
495
+ }
496
+ # Execution parameters that don't affect detector ID
497
+ execution_params = {
498
+ "seasonality_components",
499
+ "min_samples_per_group",
500
+ "smoothing_alpha", # Only affects smoothing, not algorithm
501
+ "smoothing_window", # Only affects smoothing, not algorithm
502
+ "weight_decay", # Only affects weighting, not algorithm
503
+ }
504
+
505
+ return {
506
+ k: v for k, v in self.params.items()
507
+ if v != defaults.get(k) and k not in execution_params
508
+ }