detectkit 0.3.17__tar.gz → 0.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. {detectkit-0.3.17/detectkit.egg-info → detectkit-0.4.1}/PKG-INFO +5 -1
  2. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/__init__.py +1 -1
  3. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/alerting/channels/factory.py +5 -35
  4. detectkit-0.4.1/detectkit/alerting/orchestrator/__init__.py +19 -0
  5. detectkit-0.4.1/detectkit/alerting/orchestrator/_base.py +46 -0
  6. detectkit-0.4.1/detectkit/alerting/orchestrator/_cooldown.py +40 -0
  7. detectkit-0.4.1/detectkit/alerting/orchestrator/_decision.py +148 -0
  8. detectkit-0.4.1/detectkit/alerting/orchestrator/_dispatch.py +69 -0
  9. detectkit-0.4.1/detectkit/alerting/orchestrator/_recovery.py +203 -0
  10. detectkit-0.4.1/detectkit/alerting/orchestrator/_types.py +82 -0
  11. detectkit-0.4.1/detectkit/alerting/orchestrator/orchestrator.py +36 -0
  12. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/config/profile.py +5 -0
  13. detectkit-0.4.1/detectkit/database/internal_tables/__init__.py +10 -0
  14. detectkit-0.4.1/detectkit/database/internal_tables/_alert_states.py +169 -0
  15. detectkit-0.4.1/detectkit/database/internal_tables/_base.py +43 -0
  16. detectkit-0.4.1/detectkit/database/internal_tables/_datapoints.py +128 -0
  17. detectkit-0.4.1/detectkit/database/internal_tables/_detections.py +198 -0
  18. detectkit-0.4.1/detectkit/database/internal_tables/_metrics.py +93 -0
  19. detectkit-0.4.1/detectkit/database/internal_tables/_schema.py +25 -0
  20. detectkit-0.4.1/detectkit/database/internal_tables/_tasks.py +92 -0
  21. detectkit-0.4.1/detectkit/database/internal_tables/manager.py +26 -0
  22. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/detectors/base.py +34 -37
  23. detectkit-0.4.1/detectkit/detectors/seasonality.py +95 -0
  24. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/detectors/statistical/iqr.py +6 -75
  25. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/detectors/statistical/mad.py +6 -92
  26. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/detectors/statistical/zscore.py +6 -75
  27. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/loaders/metric_loader.py +2 -16
  28. detectkit-0.4.1/detectkit/orchestration/task_manager/__init__.py +28 -0
  29. detectkit-0.4.1/detectkit/orchestration/task_manager/_alert_step.py +193 -0
  30. detectkit-0.4.1/detectkit/orchestration/task_manager/_base.py +128 -0
  31. detectkit-0.4.1/detectkit/orchestration/task_manager/_detect_step.py +215 -0
  32. detectkit-0.4.1/detectkit/orchestration/task_manager/_load_step.py +138 -0
  33. detectkit-0.4.1/detectkit/orchestration/task_manager/_types.py +46 -0
  34. detectkit-0.4.1/detectkit/orchestration/task_manager/manager.py +135 -0
  35. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/utils/__init__.py +5 -0
  36. detectkit-0.4.1/detectkit/utils/env_interpolation.py +50 -0
  37. detectkit-0.4.1/detectkit/utils/json_utils.py +34 -0
  38. {detectkit-0.3.17 → detectkit-0.4.1/detectkit.egg-info}/PKG-INFO +5 -1
  39. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit.egg-info/SOURCES.txt +27 -3
  40. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit.egg-info/requires.txt +5 -0
  41. {detectkit-0.3.17 → detectkit-0.4.1}/pyproject.toml +18 -4
  42. detectkit-0.3.17/detectkit/alerting/orchestrator.py +0 -777
  43. detectkit-0.3.17/detectkit/database/internal_tables.py +0 -1066
  44. detectkit-0.3.17/detectkit/orchestration/task_manager.py +0 -875
  45. {detectkit-0.3.17 → detectkit-0.4.1}/LICENSE +0 -0
  46. {detectkit-0.3.17 → detectkit-0.4.1}/MANIFEST.in +0 -0
  47. {detectkit-0.3.17 → detectkit-0.4.1}/README.md +0 -0
  48. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/alerting/__init__.py +0 -0
  49. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/alerting/channels/__init__.py +0 -0
  50. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/alerting/channels/base.py +0 -0
  51. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/alerting/channels/email.py +0 -0
  52. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/alerting/channels/mattermost.py +0 -0
  53. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/alerting/channels/slack.py +0 -0
  54. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/alerting/channels/telegram.py +0 -0
  55. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/alerting/channels/webhook.py +0 -0
  56. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/cli/__init__.py +0 -0
  57. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/cli/commands/__init__.py +0 -0
  58. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/cli/commands/init.py +0 -0
  59. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/cli/commands/run.py +0 -0
  60. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/cli/commands/test_alert.py +0 -0
  61. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/cli/main.py +0 -0
  62. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/config/__init__.py +0 -0
  63. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/config/metric_config.py +0 -0
  64. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/config/project_config.py +0 -0
  65. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/config/validator.py +0 -0
  66. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/core/__init__.py +0 -0
  67. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/core/interval.py +0 -0
  68. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/core/models.py +0 -0
  69. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/database/__init__.py +0 -0
  70. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/database/clickhouse_manager.py +0 -0
  71. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/database/manager.py +0 -0
  72. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/database/tables.py +0 -0
  73. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/detectors/__init__.py +0 -0
  74. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/detectors/factory.py +0 -0
  75. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/detectors/statistical/__init__.py +0 -0
  76. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/detectors/statistical/manual_bounds.py +0 -0
  77. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/loaders/__init__.py +0 -0
  78. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/loaders/query_template.py +0 -0
  79. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/orchestration/__init__.py +0 -0
  80. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/utils/datetime_utils.py +0 -0
  81. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit/utils/stats.py +0 -0
  82. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit.egg-info/dependency_links.txt +0 -0
  83. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit.egg-info/entry_points.txt +0 -0
  84. {detectkit-0.3.17 → detectkit-0.4.1}/detectkit.egg-info/top_level.txt +0 -0
  85. {detectkit-0.3.17 → detectkit-0.4.1}/requirements.txt +0 -0
  86. {detectkit-0.3.17 → detectkit-0.4.1}/setup.cfg +0 -0
  87. {detectkit-0.3.17 → detectkit-0.4.1}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: detectkit
3
- Version: 0.3.17
3
+ Version: 0.4.1
4
4
  Summary: Metric monitoring with automatic anomaly detection
5
5
  Author: detectkit team
6
6
  License: MIT
@@ -55,9 +55,13 @@ Requires-Dist: timesfm>=0.1.0; extra == "all"
55
55
  Provides-Extra: dev
56
56
  Requires-Dist: pytest>=7.0; extra == "dev"
57
57
  Requires-Dist: pytest-cov>=4.0; extra == "dev"
58
+ Requires-Dist: pytest-requests-mock>=0.1; extra == "dev"
59
+ Requires-Dist: requests-mock>=1.12; extra == "dev"
58
60
  Requires-Dist: black>=23.0; extra == "dev"
59
61
  Requires-Dist: mypy>=1.0; extra == "dev"
60
62
  Requires-Dist: ruff>=0.1.0; extra == "dev"
63
+ Provides-Extra: integration
64
+ Requires-Dist: testcontainers[clickhouse]>=4.0; extra == "integration"
61
65
  Dynamic: license-file
62
66
 
63
67
  # detectkit
@@ -4,7 +4,7 @@ detectk - Anomaly Detection for Time-Series Metrics
4
4
  A Python library for data analysts and engineers to monitor metrics with automatic anomaly detection.
5
5
  """
6
6
 
7
- __version__ = "0.3.14"
7
+ __version__ = "0.4.1"
8
8
 
9
9
  from detectkit.core.interval import Interval
10
10
  from detectkit.core.models import ColumnDefinition, TableModel
@@ -2,7 +2,6 @@
2
2
  Alert channel factory for creating channel instances from configuration.
3
3
  """
4
4
 
5
- import os
6
5
  from typing import Dict, List
7
6
 
8
7
  from detectkit.alerting.channels.base import BaseAlertChannel
@@ -11,6 +10,7 @@ from detectkit.alerting.channels.slack import SlackChannel
11
10
  from detectkit.alerting.channels.webhook import WebhookChannel
12
11
  from detectkit.alerting.channels.telegram import TelegramChannel
13
12
  from detectkit.alerting.channels.email import EmailChannel
13
+ from detectkit.utils.env_interpolation import interpolate_env_vars
14
14
 
15
15
 
16
16
  class AlertChannelFactory:
@@ -82,42 +82,12 @@ class AlertChannelFactory:
82
82
 
83
83
  @classmethod
84
84
  def _interpolate_env_vars(cls, params: Dict) -> Dict:
85
- """
86
- Interpolate environment variables in parameter values.
87
-
88
- Supports formats:
89
- - ${VAR_NAME}
90
- - {{ env_var('VAR_NAME') }}
85
+ """Interpolate ``${VAR}`` and ``{{ env_var('VAR') }}`` placeholders.
91
86
 
92
- Args:
93
- params: Parameters dictionary
94
-
95
- Returns:
96
- Parameters with interpolated values
87
+ Delegates to :func:`detectkit.utils.env_interpolation.interpolate_env_vars`,
88
+ which walks nested dicts/lists recursively.
97
89
  """
98
- import re
99
-
100
- interpolated = {}
101
-
102
- for key, value in params.items():
103
- if isinstance(value, str):
104
- # Handle ${VAR} format
105
- value = re.sub(
106
- r'\$\{([^}]+)\}',
107
- lambda m: os.environ.get(m.group(1), m.group(0)),
108
- value,
109
- )
110
-
111
- # Handle {{ env_var('VAR') }} format
112
- value = re.sub(
113
- r"\{\{\s*env_var\(['\"]([^'\"]+)['\"]\)\s*\}\}",
114
- lambda m: os.environ.get(m.group(1), m.group(0)),
115
- value,
116
- )
117
-
118
- interpolated[key] = value
119
-
120
- return interpolated
90
+ return interpolate_env_vars(params)
121
91
 
122
92
  @classmethod
123
93
  def create_from_config(cls, channel_config: Dict) -> BaseAlertChannel:
@@ -0,0 +1,19 @@
1
+ """Public surface of the alert-orchestrator package."""
2
+
3
+ from detectkit.alerting.orchestrator._types import (
4
+ AlertConditions,
5
+ DetectionRecord,
6
+ _direction_from_metadata,
7
+ _parse_detection_metadata,
8
+ )
9
+ from detectkit.alerting.orchestrator.orchestrator import AlertOrchestrator
10
+
11
+ __all__ = [
12
+ "AlertOrchestrator",
13
+ "AlertConditions",
14
+ "DetectionRecord",
15
+ # Re-exported for callers (notably TaskManager) that build
16
+ # DetectionRecord rows manually before handing them to the orchestrator.
17
+ "_direction_from_metadata",
18
+ "_parse_detection_metadata",
19
+ ]
@@ -0,0 +1,46 @@
1
+ """Shared state for orchestrator mixins."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Dict, List, Optional
6
+
7
+ import numpy as np
8
+
9
+ from detectkit.alerting.orchestrator._types import (
10
+ AlertConditions,
11
+ DetectionRecord,
12
+ )
13
+ from detectkit.core.interval import Interval
14
+
15
+
16
+ class _OrchestratorBase:
17
+ def __init__(
18
+ self,
19
+ metric_name: str,
20
+ interval: Interval,
21
+ alert_config_id: str,
22
+ conditions: Optional[AlertConditions] = None,
23
+ timezone_display: str = "UTC",
24
+ internal=None, # InternalTablesManager
25
+ alert_config=None, # AlertConfig
26
+ description: Optional[str] = None,
27
+ mentions: Optional[List[str]] = None,
28
+ ):
29
+ self.metric_name = metric_name
30
+ self.interval = interval
31
+ self.alert_config_id = alert_config_id
32
+ self.conditions = conditions or AlertConditions()
33
+ self.timezone_display = timezone_display
34
+ self.internal = internal
35
+ self.alert_config = alert_config
36
+ self.description = description
37
+ self.mentions = mentions or []
38
+
39
+ @staticmethod
40
+ def _group_by_timestamp(
41
+ detections: List[DetectionRecord],
42
+ ) -> Dict[np.datetime64, List[DetectionRecord]]:
43
+ grouped: Dict[np.datetime64, List[DetectionRecord]] = {}
44
+ for d in detections:
45
+ grouped.setdefault(d.timestamp, []).append(d)
46
+ return grouped
@@ -0,0 +1,40 @@
1
+ """Cooldown logic — suppresses repeat alerts within a configured window."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from detectkit.alerting.orchestrator._base import _OrchestratorBase
6
+ from detectkit.core.interval import Interval
7
+ from detectkit.utils.datetime_utils import now_utc_naive
8
+
9
+
10
+ class _CooldownMixin(_OrchestratorBase):
11
+ def _is_in_cooldown(self) -> bool:
12
+ """Return ``True`` while a previously sent alert is still cooling down.
13
+
14
+ Logic:
15
+ 1. No ``alert_cooldown`` configured → never in cooldown.
16
+ 2. No internal manager wired in → can't read state, allow alert.
17
+ 3. Never alerted before → no cooldown.
18
+ 4. ``cooldown_reset_on_recovery`` and a recovery has happened
19
+ since the last alert → cooldown is reset, allow alert.
20
+ 5. Otherwise: ``elapsed < cooldown_seconds`` → suppress.
21
+ """
22
+ if not self.alert_config or not self.alert_config.alert_cooldown:
23
+ return False
24
+ if not self.internal:
25
+ return False
26
+
27
+ last_sent = self.internal.get_last_alert_timestamp(
28
+ self.metric_name, self.alert_config_id
29
+ )
30
+ if not last_sent:
31
+ return False
32
+
33
+ cooldown_seconds = Interval(self.alert_config.alert_cooldown).seconds
34
+ elapsed = (now_utc_naive() - last_sent).total_seconds()
35
+
36
+ if self.alert_config.cooldown_reset_on_recovery:
37
+ if self._check_recovery_since_last_alert(last_sent):
38
+ return False
39
+
40
+ return elapsed < cooldown_seconds
@@ -0,0 +1,148 @@
1
+ """Decision logic: ``should_alert`` and the consecutive-anomaly helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from datetime import datetime, timezone
6
+ from typing import Dict, List, Optional, Tuple
7
+
8
+ import numpy as np
9
+
10
+ from detectkit.alerting.channels.base import AlertData
11
+ from detectkit.alerting.orchestrator._base import _OrchestratorBase
12
+ from detectkit.alerting.orchestrator._types import DetectionRecord
13
+ from detectkit.utils.datetime_utils import now_utc, to_aware_utc
14
+
15
+
16
+ class _DecisionMixin(_OrchestratorBase):
17
+ def should_alert(
18
+ self,
19
+ recent_detections: List[DetectionRecord],
20
+ ) -> Tuple[bool, Optional[AlertData]]:
21
+ """Decide whether to fire an alert from recent detections.
22
+
23
+ Steps (cheap → expensive):
24
+ 1. Bail out on empty input.
25
+ 2. Honour the alert cooldown so we don't spam channels.
26
+ 3. Require ``min_detectors`` triggering on the latest point.
27
+ 4. Require ``consecutive_anomalies`` matching the direction.
28
+ """
29
+ if not recent_detections:
30
+ return False, None
31
+
32
+ # Cooldown is checked first so a noisy run doesn't waste effort.
33
+ if self._is_in_cooldown():
34
+ return False, None
35
+
36
+ detections_by_time = self._group_by_timestamp(recent_detections)
37
+ timestamps_sorted = sorted(detections_by_time.keys(), reverse=True)
38
+
39
+ latest_anomalies = [
40
+ d for d in detections_by_time[timestamps_sorted[0]] if d.is_anomaly
41
+ ]
42
+ if len(latest_anomalies) < self.conditions.min_detectors:
43
+ return False, None
44
+
45
+ consecutive = self._count_consecutive_anomalies(
46
+ detections_by_time, timestamps_sorted
47
+ )
48
+ if consecutive < self.conditions.consecutive_anomalies:
49
+ return False, None
50
+
51
+ return True, self._build_alert_data(latest_anomalies, consecutive)
52
+
53
+ def _count_consecutive_anomalies(
54
+ self,
55
+ detections_by_time: Dict[np.datetime64, List[DetectionRecord]],
56
+ timestamps_sorted: List[np.datetime64],
57
+ ) -> int:
58
+ """Walk timestamps newest→oldest counting matching anomalies."""
59
+ direction_condition = self.conditions.direction
60
+ consecutive = 0
61
+ prev_direction: Optional[str] = None
62
+
63
+ for ts in timestamps_sorted:
64
+ anomalies = [d for d in detections_by_time[ts] if d.is_anomaly]
65
+ if len(anomalies) < self.conditions.min_detectors:
66
+ break
67
+
68
+ current_direction = anomalies[0].direction
69
+
70
+ if direction_condition == "any":
71
+ consecutive += 1
72
+ elif direction_condition == "same":
73
+ if prev_direction is None:
74
+ consecutive = 1
75
+ prev_direction = current_direction
76
+ elif current_direction == prev_direction:
77
+ consecutive += 1
78
+ else:
79
+ break # direction flipped → stop counting
80
+ elif direction_condition == "up":
81
+ if current_direction == "up":
82
+ consecutive += 1
83
+ else:
84
+ break
85
+ elif direction_condition == "down":
86
+ if current_direction == "down":
87
+ consecutive += 1
88
+ else:
89
+ break
90
+ else:
91
+ # Unknown direction policy — treat as "any" to stay safe.
92
+ consecutive += 1
93
+
94
+ return consecutive
95
+
96
+ def _build_alert_data(
97
+ self,
98
+ anomalies: List[DetectionRecord],
99
+ consecutive_count: int,
100
+ ) -> AlertData:
101
+ primary = anomalies[0]
102
+
103
+ if len(anomalies) > 1:
104
+ max_severity = max(d.severity for d in anomalies)
105
+ detector_names = [d.detector_name for d in anomalies]
106
+ detector_name = f"{len(anomalies)} detectors"
107
+ detector_params = "; ".join(
108
+ f"{d.detector_name}: {d.detector_params}" for d in anomalies
109
+ )
110
+ combined_metadata = {
111
+ "detectors": detector_names,
112
+ "count": len(anomalies),
113
+ }
114
+ for i, d in enumerate(anomalies):
115
+ combined_metadata[f"detector_{i}_metadata"] = d.detection_metadata
116
+ else:
117
+ max_severity = primary.severity
118
+ detector_name = primary.detector_name
119
+ detector_params = primary.detector_params
120
+ combined_metadata = primary.detection_metadata
121
+
122
+ return AlertData(
123
+ metric_name=self.metric_name,
124
+ timestamp=primary.timestamp,
125
+ timezone=self.timezone_display,
126
+ value=primary.value,
127
+ confidence_lower=primary.confidence_lower,
128
+ confidence_upper=primary.confidence_upper,
129
+ detector_name=detector_name,
130
+ detector_params=detector_params,
131
+ direction=primary.direction,
132
+ severity=max_severity,
133
+ detection_metadata=combined_metadata,
134
+ consecutive_count=consecutive_count,
135
+ description=self.description,
136
+ mentions=self.mentions,
137
+ )
138
+
139
+ def get_last_complete_point(self, now: Optional[datetime] = None) -> datetime:
140
+ """Floor ``now`` to the previous fully completed interval boundary."""
141
+ if now is None:
142
+ now = now_utc()
143
+ now = to_aware_utc(now)
144
+
145
+ interval_seconds = self.interval.seconds
146
+ floored = (int(now.timestamp()) // interval_seconds) * interval_seconds
147
+ last_complete = floored - interval_seconds
148
+ return datetime.fromtimestamp(last_complete, tz=timezone.utc)
@@ -0,0 +1,69 @@
1
+ """Dispatch mixin — actually sends alerts/recoveries via channels."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Dict, List, Optional
6
+
7
+ from detectkit.alerting.channels.base import AlertData, BaseAlertChannel
8
+ from detectkit.alerting.orchestrator._base import _OrchestratorBase
9
+ from detectkit.utils.datetime_utils import now_utc_naive
10
+
11
+
12
+ class _DispatchMixin(_OrchestratorBase):
13
+ def send_alerts(
14
+ self,
15
+ alert_data: AlertData,
16
+ channels: List[BaseAlertChannel],
17
+ template: Optional[str] = None,
18
+ ) -> Dict[str, bool]:
19
+ """Send *alert_data* to every channel; record success per-channel.
20
+
21
+ Updates ``last_alert_sent`` (and increments the counter) when at
22
+ least one channel succeeded — this is what powers cooldown and
23
+ recovery detection.
24
+ """
25
+ results = self._dispatch(channels, alert_data, template, "alert")
26
+
27
+ if any(results.values()) and self.internal:
28
+ self.internal.update_alert_timestamp(
29
+ metric_name=self.metric_name,
30
+ alert_config_id=self.alert_config_id,
31
+ timestamp=now_utc_naive(),
32
+ increment_count=True,
33
+ )
34
+ return results
35
+
36
+ def send_recovery(
37
+ self,
38
+ alert_data: AlertData,
39
+ channels: List[BaseAlertChannel],
40
+ template: Optional[str] = None,
41
+ ) -> Dict[str, bool]:
42
+ """Send a recovery notification and stamp ``last_recovery_sent``."""
43
+ results = self._dispatch(channels, alert_data, template, "recovery")
44
+
45
+ if any(results.values()) and self.internal:
46
+ self.internal.update_recovery_timestamp(
47
+ metric_name=self.metric_name,
48
+ alert_config_id=self.alert_config_id,
49
+ timestamp=now_utc_naive(),
50
+ )
51
+ return results
52
+
53
+ @staticmethod
54
+ def _dispatch(
55
+ channels: List[BaseAlertChannel],
56
+ alert_data: AlertData,
57
+ template: Optional[str],
58
+ kind: str,
59
+ ) -> Dict[str, bool]:
60
+ results: Dict[str, bool] = {}
61
+ for channel in channels:
62
+ channel_name = channel.__class__.__name__
63
+ try:
64
+ results[channel_name] = bool(channel.send(alert_data, template))
65
+ except Exception as exc:
66
+ # One bad channel must not abort the others.
67
+ print(f"Error sending {kind} via {channel_name}: {exc}")
68
+ results[channel_name] = False
69
+ return results
@@ -0,0 +1,203 @@
1
+ """Recovery decision and reconstruction logic."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from datetime import datetime
6
+ from typing import List, Optional, Tuple
7
+
8
+ import numpy as np
9
+
10
+ from detectkit.alerting.channels.base import AlertData
11
+ from detectkit.alerting.orchestrator._base import _OrchestratorBase
12
+ from detectkit.alerting.orchestrator._types import (
13
+ DetectionRecord,
14
+ _direction_from_metadata,
15
+ _parse_detection_metadata,
16
+ )
17
+
18
+
19
+ class _RecoveryMixin(_OrchestratorBase):
20
+ def should_send_recovery(
21
+ self,
22
+ recent_detections: List[DetectionRecord],
23
+ ) -> Tuple[bool, Optional[AlertData]]:
24
+ """Decide whether to send a recovery notification.
25
+
26
+ Conditions (all must hold):
27
+ 1. A previous alert has been sent (``last_alert_sent`` exists).
28
+ 2. The metric has actually recovered (no blocking anomalies).
29
+ 3. We haven't already notified recovery for this incident.
30
+ """
31
+ if not self.internal:
32
+ return False, None
33
+
34
+ last_alert = self.internal.get_last_alert_timestamp(
35
+ self.metric_name, self.alert_config_id
36
+ )
37
+ if not last_alert:
38
+ return False, None
39
+
40
+ last_recovery = self.internal.get_last_recovery_timestamp(
41
+ self.metric_name, self.alert_config_id
42
+ )
43
+ if last_recovery and last_recovery >= last_alert:
44
+ return False, None # already notified for this incident
45
+
46
+ if not self._check_recovery_since_last_alert(last_alert):
47
+ return False, None
48
+
49
+ recovery_data = self._build_recovery_data(recent_detections)
50
+ if not recovery_data:
51
+ return False, None
52
+ return True, recovery_data
53
+
54
+ def _check_recovery_since_last_alert(
55
+ self, last_alert_timestamp: datetime
56
+ ) -> bool:
57
+ """Return ``True`` when the metric has recovered since *last_alert_timestamp*.
58
+
59
+ Direction-aware: a "down"-only alert is not blocked by a fresh
60
+ "up" anomaly, since the alert condition no longer holds.
61
+ """
62
+ if not self.internal:
63
+ return False
64
+
65
+ last_point = self.get_last_complete_point()
66
+ # +5 for safety margin so we don't truncate the consecutive window.
67
+ num_points = self.conditions.consecutive_anomalies + 5
68
+
69
+ recent_detections = self.internal.get_recent_detections(
70
+ metric_name=self.metric_name,
71
+ last_point=last_point,
72
+ num_points=num_points,
73
+ created_after=last_alert_timestamp,
74
+ )
75
+ if not recent_detections:
76
+ # No fresh detections at all → assume recovery.
77
+ return True
78
+
79
+ records: List[DetectionRecord] = []
80
+ for det in recent_detections:
81
+ metadata_list = (
82
+ det.get("detection_metadata_list")
83
+ or [None] * len(det["detector_ids"])
84
+ )
85
+ for i in range(len(det["detector_ids"])):
86
+ is_anomaly = det["is_anomaly_flags"][i]
87
+ metadata = _parse_detection_metadata(metadata_list[i])
88
+ records.append(
89
+ DetectionRecord(
90
+ timestamp=np.datetime64(det["timestamp"]),
91
+ detector_name=det["detector_names"][i],
92
+ detector_id=det["detector_ids"][i],
93
+ detector_params=det["detector_params_list"][i],
94
+ value=det["value"],
95
+ is_anomaly=is_anomaly,
96
+ confidence_lower=det["confidence_lowers"][i],
97
+ confidence_upper=det["confidence_uppers"][i],
98
+ direction=_direction_from_metadata(metadata, is_anomaly),
99
+ severity=0.0, # not used for the recovery check
100
+ detection_metadata=metadata,
101
+ )
102
+ )
103
+
104
+ detections_by_time = self._group_by_timestamp(records)
105
+ timestamps_sorted = sorted(detections_by_time.keys(), reverse=True)
106
+ latest_anomalies = [
107
+ d for d in detections_by_time[timestamps_sorted[0]] if d.is_anomaly
108
+ ]
109
+
110
+ direction_condition = self.conditions.direction
111
+ if direction_condition == "down":
112
+ blocking = [d for d in latest_anomalies if d.direction == "down"]
113
+ elif direction_condition == "up":
114
+ blocking = [d for d in latest_anomalies if d.direction == "up"]
115
+ elif direction_condition == "same":
116
+ trigger_direction = self._get_alert_trigger_direction(
117
+ last_alert_timestamp
118
+ )
119
+ if trigger_direction is None:
120
+ blocking = latest_anomalies # conservative fallback
121
+ else:
122
+ blocking = [
123
+ d for d in latest_anomalies if d.direction == trigger_direction
124
+ ]
125
+ else: # "any" / unknown — preserve historical behaviour
126
+ blocking = latest_anomalies
127
+
128
+ return len(blocking) == 0
129
+
130
+ def _get_alert_trigger_direction(
131
+ self, last_alert_timestamp: datetime
132
+ ) -> Optional[str]:
133
+ """Return the direction of the anomaly that triggered the last alert."""
134
+ if not self.internal:
135
+ return None
136
+
137
+ trigger_detections = self.internal.get_recent_detections(
138
+ metric_name=self.metric_name,
139
+ last_point=last_alert_timestamp,
140
+ num_points=1,
141
+ )
142
+ if not trigger_detections:
143
+ return None
144
+
145
+ det = trigger_detections[0]
146
+ metadata_list = (
147
+ det.get("detection_metadata_list")
148
+ or [None] * len(det["detector_ids"])
149
+ )
150
+ for i in range(len(det["detector_ids"])):
151
+ if not det["is_anomaly_flags"][i]:
152
+ continue
153
+ direction = _direction_from_metadata(metadata_list[i], True)
154
+ if direction in ("up", "down"):
155
+ return direction
156
+ return None
157
+
158
+ def _build_recovery_data(
159
+ self,
160
+ detections: List[DetectionRecord],
161
+ ) -> Optional[AlertData]:
162
+ """Construct the AlertData payload sent as a recovery notification."""
163
+ if not detections:
164
+ return None
165
+
166
+ # ``detections`` is oldest→newest, so the latest point lives at [-1].
167
+ latest = detections[-1]
168
+
169
+ # Prefer the latest CI so the message reflects the *current* interval.
170
+ # Fall back to the last anomalous point only if the latest row has no
171
+ # CI (e.g. missing-data / insufficient-data placeholders).
172
+ recovery_ci_lower = latest.confidence_lower
173
+ recovery_ci_upper = latest.confidence_upper
174
+ recovery_detector_name = latest.detector_name
175
+ recovery_detector_params = latest.detector_params
176
+
177
+ if recovery_ci_lower is None or recovery_ci_upper is None:
178
+ last_anomalous = next(
179
+ (d for d in reversed(detections) if d.is_anomaly), None
180
+ )
181
+ if last_anomalous:
182
+ recovery_detector_name = last_anomalous.detector_name
183
+ recovery_detector_params = last_anomalous.detector_params
184
+ recovery_ci_lower = last_anomalous.confidence_lower
185
+ recovery_ci_upper = last_anomalous.confidence_upper
186
+
187
+ return AlertData(
188
+ metric_name=self.metric_name,
189
+ timestamp=latest.timestamp,
190
+ timezone=self.timezone_display,
191
+ value=latest.value,
192
+ confidence_lower=recovery_ci_lower,
193
+ confidence_upper=recovery_ci_upper,
194
+ detector_name=recovery_detector_name,
195
+ detector_params=recovery_detector_params,
196
+ direction="none",
197
+ severity=0.0,
198
+ detection_metadata={},
199
+ consecutive_count=0,
200
+ is_recovery=True,
201
+ description=self.description,
202
+ mentions=self.mentions,
203
+ )