detectkit 0.1.2__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. {detectkit-0.1.2/detectkit.egg-info → detectkit-0.2.1}/PKG-INFO +1 -1
  2. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/alerting/channels/base.py +2 -0
  3. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/alerting/orchestrator.py +4 -3
  4. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/database/internal_tables.py +145 -2
  5. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/database/tables.py +5 -1
  6. detectkit-0.2.1/detectkit/detectors/base.py +441 -0
  7. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/detectors/statistical/iqr.py +124 -34
  8. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/detectors/statistical/mad.py +81 -26
  9. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/detectors/statistical/manual_bounds.py +43 -14
  10. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/detectors/statistical/zscore.py +123 -36
  11. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/orchestration/task_manager.py +41 -53
  12. detectkit-0.2.1/detectkit/utils/__init__.py +17 -0
  13. detectkit-0.2.1/detectkit/utils/stats.py +196 -0
  14. {detectkit-0.1.2 → detectkit-0.2.1/detectkit.egg-info}/PKG-INFO +1 -1
  15. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit.egg-info/SOURCES.txt +2 -1
  16. {detectkit-0.1.2 → detectkit-0.2.1}/pyproject.toml +1 -1
  17. detectkit-0.1.2/detectkit/detectors/base.py +0 -222
  18. detectkit-0.1.2/detectkit/utils/__init__.py +0 -1
  19. {detectkit-0.1.2 → detectkit-0.2.1}/LICENSE +0 -0
  20. {detectkit-0.1.2 → detectkit-0.2.1}/MANIFEST.in +0 -0
  21. {detectkit-0.1.2 → detectkit-0.2.1}/README.md +0 -0
  22. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/__init__.py +0 -0
  23. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/alerting/__init__.py +0 -0
  24. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/alerting/channels/__init__.py +0 -0
  25. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/alerting/channels/email.py +0 -0
  26. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/alerting/channels/factory.py +0 -0
  27. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/alerting/channels/mattermost.py +0 -0
  28. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/alerting/channels/slack.py +0 -0
  29. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/alerting/channels/telegram.py +0 -0
  30. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/alerting/channels/webhook.py +0 -0
  31. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/cli/__init__.py +0 -0
  32. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/cli/commands/__init__.py +0 -0
  33. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/cli/commands/init.py +0 -0
  34. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/cli/commands/run.py +0 -0
  35. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/cli/commands/test_alert.py +0 -0
  36. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/cli/main.py +0 -0
  37. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/config/__init__.py +0 -0
  38. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/config/metric_config.py +0 -0
  39. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/config/profile.py +0 -0
  40. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/config/project_config.py +0 -0
  41. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/config/validator.py +0 -0
  42. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/core/__init__.py +0 -0
  43. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/core/interval.py +0 -0
  44. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/core/models.py +0 -0
  45. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/database/__init__.py +0 -0
  46. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/database/clickhouse_manager.py +0 -0
  47. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/database/manager.py +0 -0
  48. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/detectors/__init__.py +0 -0
  49. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/detectors/factory.py +0 -0
  50. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/detectors/statistical/__init__.py +0 -0
  51. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/loaders/__init__.py +0 -0
  52. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/loaders/metric_loader.py +0 -0
  53. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/loaders/query_template.py +0 -0
  54. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit/orchestration/__init__.py +0 -0
  55. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit.egg-info/dependency_links.txt +0 -0
  56. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit.egg-info/entry_points.txt +0 -0
  57. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit.egg-info/requires.txt +0 -0
  58. {detectkit-0.1.2 → detectkit-0.2.1}/detectkit.egg-info/top_level.txt +0 -0
  59. {detectkit-0.1.2 → detectkit-0.2.1}/requirements.txt +0 -0
  60. {detectkit-0.1.2 → detectkit-0.2.1}/setup.cfg +0 -0
  61. {detectkit-0.1.2 → detectkit-0.2.1}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: detectkit
3
- Version: 0.1.2
3
+ Version: 0.2.1
4
4
  Summary: Metric monitoring with automatic anomaly detection
5
5
  Author: detectkit team
6
6
  License: MIT
@@ -159,6 +159,7 @@ class BaseAlertChannel(ABC):
159
159
  confidence_upper=alert_data.confidence_upper,
160
160
  confidence_interval=confidence_str,
161
161
  detector_name=alert_data.detector_name,
162
+ detector_params=alert_data.detector_params,
162
163
  direction=alert_data.direction,
163
164
  severity=alert_data.severity,
164
165
  consecutive_count=alert_data.consecutive_count,
@@ -182,6 +183,7 @@ class BaseAlertChannel(ABC):
182
183
  "Value: {value}\n"
183
184
  "Confidence interval: {confidence_interval}\n"
184
185
  "Detector: {detector_name}\n"
186
+ "Parameters: {detector_params}\n"
185
187
  "Direction: {direction}\n"
186
188
  "Severity: {severity:.2f}"
187
189
  )
@@ -35,6 +35,7 @@ class DetectionRecord:
35
35
  timestamp: np.datetime64
36
36
  detector_name: str
37
37
  detector_id: str
38
+ detector_params: str # JSON string with detector parameters
38
39
  value: float
39
40
  is_anomaly: bool
40
41
  confidence_lower: Optional[float]
@@ -242,9 +243,9 @@ class AlertOrchestrator:
242
243
  detector_names = [d.detector_name for d in anomalies]
243
244
  detector_name = f"{len(anomalies)} detectors"
244
245
  detector_params_list = [
245
- f"{d.detector_name}({d.detector_id[:8]})" for d in anomalies
246
+ f"{d.detector_name}: {d.detector_params}" for d in anomalies
246
247
  ]
247
- detector_params = ", ".join(detector_params_list)
248
+ detector_params = "; ".join(detector_params_list)
248
249
 
249
250
  # Combine metadata
250
251
  combined_metadata = {
@@ -256,7 +257,7 @@ class AlertOrchestrator:
256
257
  else:
257
258
  max_severity = primary.severity
258
259
  detector_name = primary.detector_name
259
- detector_params = f"{primary.detector_id[:16]}"
260
+ detector_params = primary.detector_params
260
261
  combined_metadata = primary.detection_metadata
261
262
 
262
263
  # Convert numpy timestamp for AlertData
@@ -9,7 +9,7 @@ methods underneath. It does NOT duplicate logic - just provides semantic wrapper
9
9
  """
10
10
 
11
11
  from datetime import datetime, timezone
12
- from typing import Dict, Optional
12
+ from typing import Dict, List, Optional
13
13
 
14
14
  import numpy as np
15
15
 
@@ -142,6 +142,7 @@ class InternalTablesManager:
142
142
  self,
143
143
  metric_name: str,
144
144
  detector_id: str,
145
+ detector_name: str,
145
146
  data: Dict[str, np.ndarray],
146
147
  detector_params: str,
147
148
  ) -> int:
@@ -151,12 +152,14 @@ class InternalTablesManager:
151
152
  Args:
152
153
  metric_name: Metric identifier
153
154
  detector_id: Detector identifier (hash)
155
+ detector_name: Detector class name (e.g., "MADDetector")
154
156
  data: Dictionary with keys:
155
157
  - timestamp: np.array of datetime64
156
158
  - is_anomaly: np.array of bool
157
159
  - confidence_lower: np.array of float64 (nullable)
158
160
  - confidence_upper: np.array of float64 (nullable)
159
161
  - value: np.array of float64 (nullable)
162
+ - processed_value: np.array of float64 (nullable)
160
163
  - detection_metadata: np.array of JSON strings
161
164
  detector_params: JSON string with sorted detector parameters
162
165
 
@@ -170,10 +173,11 @@ class InternalTablesManager:
170
173
  ... "confidence_lower": np.array([0.4, 0.5]),
171
174
  ... "confidence_upper": np.array([0.6, 0.7]),
172
175
  ... "value": np.array([0.5, 0.9]),
176
+ ... "processed_value": np.array([0.5, 0.9]),
173
177
  ... "detection_metadata": np.array(['{"severity": 0.0}', '{"severity": 0.8}']),
174
178
  ... }
175
179
  >>> rows = internal.save_detections(
176
- ... "cpu_usage", "mad_abc123", data, '{"threshold": 3.0}'
180
+ ... "cpu_usage", "mad_abc123", "MADDetector", data, '{"threshold": 3.0}'
177
181
  ... )
178
182
  """
179
183
  num_rows = len(data["timestamp"])
@@ -182,11 +186,13 @@ class InternalTablesManager:
182
186
  insert_data = {
183
187
  "metric_name": np.full(num_rows, metric_name, dtype=object),
184
188
  "detector_id": np.full(num_rows, detector_id, dtype=object),
189
+ "detector_name": np.full(num_rows, detector_name, dtype=object),
185
190
  "timestamp": data["timestamp"],
186
191
  "is_anomaly": data["is_anomaly"],
187
192
  "confidence_lower": data["confidence_lower"],
188
193
  "confidence_upper": data["confidence_upper"],
189
194
  "value": data["value"],
195
+ "processed_value": data["processed_value"],
190
196
  "detector_params": np.full(num_rows, detector_params, dtype=object),
191
197
  "detection_metadata": data["detection_metadata"],
192
198
  "created_at": np.full(
@@ -414,6 +420,143 @@ class InternalTablesManager:
414
420
  # ClickHouse ALTER TABLE DELETE is async, return 0
415
421
  return 0
416
422
 
423
+ def get_recent_detections(
424
+ self,
425
+ metric_name: str,
426
+ last_point: datetime,
427
+ num_points: int,
428
+ ) -> List[Dict]:
429
+ """
430
+ Get recent detection results grouped by timestamp.
431
+
432
+ This method is fully database-agnostic - uses simple SELECT
433
+ and groups data in Python (no GROUP BY, no database-specific functions).
434
+
435
+ Args:
436
+ metric_name: Metric identifier
437
+ last_point: Last complete timestamp to query up to
438
+ num_points: Number of recent timestamps to retrieve
439
+
440
+ Returns:
441
+ List of dicts, each containing:
442
+ - timestamp: Detection timestamp
443
+ - detector_ids: List of detector IDs for this timestamp
444
+ - detector_names: List of detector names
445
+ - detector_params_list: List of detector params (JSON strings)
446
+ - is_anomaly_flags: List of is_anomaly bools
447
+ - confidence_lowers: List of lower confidence bounds
448
+ - confidence_uppers: List of upper confidence bounds
449
+ - value: Metric value (same for all detectors at this timestamp)
450
+
451
+ Example:
452
+ >>> detections = internal.get_recent_detections(
453
+ ... "cpu_usage",
454
+ ... datetime(2024, 1, 1, 12, 0, 0),
455
+ ... 5
456
+ ... )
457
+ >>> for det in detections:
458
+ ... print(f"{det['timestamp']}: {len(det['detector_ids'])} detectors")
459
+ """
460
+ full_table_name = self._manager.get_full_table_name(
461
+ TABLE_DETECTIONS, use_internal=True
462
+ )
463
+
464
+ # Step 1: Get distinct timestamps (database-agnostic)
465
+ # Find last N timestamps with detections
466
+ timestamps_query = f"""
467
+ SELECT DISTINCT timestamp
468
+ FROM {full_table_name}
469
+ WHERE metric_name = %(metric_name)s
470
+ AND timestamp <= %(last_point)s
471
+ ORDER BY timestamp DESC
472
+ LIMIT %(num_points)s
473
+ """
474
+
475
+ timestamp_results = self._manager.execute_query(
476
+ timestamps_query,
477
+ params={
478
+ "metric_name": metric_name,
479
+ "last_point": last_point,
480
+ "num_points": num_points,
481
+ },
482
+ )
483
+
484
+ if not timestamp_results:
485
+ return []
486
+
487
+ # Extract timestamps
488
+ timestamps = [row["timestamp"] for row in timestamp_results]
489
+
490
+ # Step 2: Get all detections for these timestamps (simple SELECT)
491
+ # Build IN clause with timestamps
492
+ timestamps_str = ", ".join([
493
+ f"'{ts.strftime('%Y-%m-%d %H:%M:%S')}'" for ts in timestamps
494
+ ])
495
+
496
+ detections_query = f"""
497
+ SELECT
498
+ timestamp,
499
+ detector_id,
500
+ detector_name,
501
+ detector_params,
502
+ is_anomaly,
503
+ confidence_lower,
504
+ confidence_upper,
505
+ value
506
+ FROM {full_table_name}
507
+ WHERE metric_name = %(metric_name)s
508
+ AND timestamp IN ({timestamps_str})
509
+ ORDER BY timestamp DESC, detector_id
510
+ """
511
+
512
+ detection_results = self._manager.execute_query(
513
+ detections_query,
514
+ params={"metric_name": metric_name},
515
+ )
516
+
517
+ if not detection_results:
518
+ return []
519
+
520
+ # Step 3: Group by timestamp in Python (no pandas, pure Python)
521
+ # Use timestamp string as key to avoid datetime comparison issues
522
+ grouped = {}
523
+ for row in detection_results:
524
+ ts = row["timestamp"]
525
+ # Convert timestamp to string key for grouping
526
+ if isinstance(ts, str):
527
+ ts_key = ts
528
+ ts_value = ts
529
+ else:
530
+ # datetime object - normalize and convert to string
531
+ if hasattr(ts, 'tzinfo') and ts.tzinfo is not None:
532
+ ts = ts.replace(tzinfo=None)
533
+ ts_key = ts.isoformat()
534
+ ts_value = ts
535
+
536
+ if ts_key not in grouped:
537
+ grouped[ts_key] = {
538
+ "timestamp": ts_value,
539
+ "detector_ids": [],
540
+ "detector_names": [],
541
+ "detector_params_list": [],
542
+ "is_anomaly_flags": [],
543
+ "confidence_lowers": [],
544
+ "confidence_uppers": [],
545
+ "value": row["value"], # Same for all detectors at this timestamp
546
+ }
547
+
548
+ grouped[ts_key]["detector_ids"].append(row["detector_id"])
549
+ grouped[ts_key]["detector_names"].append(row["detector_name"])
550
+ grouped[ts_key]["detector_params_list"].append(row["detector_params"])
551
+ grouped[ts_key]["is_anomaly_flags"].append(row["is_anomaly"])
552
+ grouped[ts_key]["confidence_lowers"].append(row["confidence_lower"])
553
+ grouped[ts_key]["confidence_uppers"].append(row["confidence_upper"])
554
+
555
+ # Step 4: Convert to list, sorted by timestamp key (desc)
556
+ result = [grouped[ts_key] for ts_key in sorted(grouped.keys(), reverse=True)]
557
+
558
+ return result
559
+
417
560
  def acquire_lock(
418
561
  self,
419
562
  metric_name: str,
@@ -48,11 +48,13 @@ def get_detections_table_model() -> TableModel:
48
48
  Schema:
49
49
  - metric_name: Metric identifier
50
50
  - detector_id: Detector identifier (hash of class + params)
51
+ - detector_name: Detector class name (e.g., "MADDetector", "ZScoreDetector")
51
52
  - timestamp: Detection timestamp (UTC, millisecond precision)
52
53
  - is_anomaly: Whether point is anomalous
53
54
  - confidence_lower: Lower confidence bound
54
55
  - confidence_upper: Upper confidence bound
55
- - value: Actual metric value
56
+ - value: Actual metric value (ALWAYS original value)
57
+ - processed_value: Value analyzed by detector (may be smoothed/transformed)
56
58
  - detector_params: JSON with sorted detector parameters
57
59
  - detection_metadata: JSON with missing_ratio, severity, direction, etc.
58
60
  - created_at: When detection was performed (UTC, millisecond precision)
@@ -63,11 +65,13 @@ def get_detections_table_model() -> TableModel:
63
65
  columns=[
64
66
  ColumnDefinition("metric_name", "String"),
65
67
  ColumnDefinition("detector_id", "String"),
68
+ ColumnDefinition("detector_name", "String"),
66
69
  ColumnDefinition("timestamp", "DateTime64(3, 'UTC')"),
67
70
  ColumnDefinition("is_anomaly", "Bool"),
68
71
  ColumnDefinition("confidence_lower", "Nullable(Float64)", nullable=True),
69
72
  ColumnDefinition("confidence_upper", "Nullable(Float64)", nullable=True),
70
73
  ColumnDefinition("value", "Nullable(Float64)", nullable=True),
74
+ ColumnDefinition("processed_value", "Nullable(Float64)", nullable=True),
71
75
  ColumnDefinition("detector_params", "String"),
72
76
  ColumnDefinition("detection_metadata", "String"),
73
77
  ColumnDefinition("created_at", "DateTime64(3, 'UTC')"),