detectkit 0.3.5__tar.gz → 0.3.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. {detectkit-0.3.5/detectkit.egg-info → detectkit-0.3.6}/PKG-INFO +8 -1
  2. {detectkit-0.3.5 → detectkit-0.3.6}/README.md +7 -0
  3. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/alerting/channels/base.py +25 -1
  4. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/alerting/orchestrator.py +118 -0
  5. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/config/metric_config.py +11 -0
  6. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/database/internal_tables.py +83 -0
  7. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/database/tables.py +5 -0
  8. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/orchestration/task_manager.py +30 -1
  9. {detectkit-0.3.5 → detectkit-0.3.6/detectkit.egg-info}/PKG-INFO +8 -1
  10. {detectkit-0.3.5 → detectkit-0.3.6}/pyproject.toml +1 -1
  11. {detectkit-0.3.5 → detectkit-0.3.6}/LICENSE +0 -0
  12. {detectkit-0.3.5 → detectkit-0.3.6}/MANIFEST.in +0 -0
  13. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/__init__.py +0 -0
  14. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/alerting/__init__.py +0 -0
  15. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/alerting/channels/__init__.py +0 -0
  16. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/alerting/channels/email.py +0 -0
  17. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/alerting/channels/factory.py +0 -0
  18. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/alerting/channels/mattermost.py +0 -0
  19. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/alerting/channels/slack.py +0 -0
  20. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/alerting/channels/telegram.py +0 -0
  21. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/alerting/channels/webhook.py +0 -0
  22. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/cli/__init__.py +0 -0
  23. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/cli/commands/__init__.py +0 -0
  24. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/cli/commands/init.py +0 -0
  25. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/cli/commands/run.py +0 -0
  26. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/cli/commands/test_alert.py +0 -0
  27. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/cli/main.py +0 -0
  28. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/config/__init__.py +0 -0
  29. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/config/profile.py +0 -0
  30. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/config/project_config.py +0 -0
  31. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/config/validator.py +0 -0
  32. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/core/__init__.py +0 -0
  33. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/core/interval.py +0 -0
  34. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/core/models.py +0 -0
  35. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/database/__init__.py +0 -0
  36. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/database/clickhouse_manager.py +0 -0
  37. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/database/manager.py +0 -0
  38. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/detectors/__init__.py +0 -0
  39. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/detectors/base.py +0 -0
  40. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/detectors/factory.py +0 -0
  41. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/detectors/statistical/__init__.py +0 -0
  42. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/detectors/statistical/iqr.py +0 -0
  43. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/detectors/statistical/mad.py +0 -0
  44. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/detectors/statistical/manual_bounds.py +0 -0
  45. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/detectors/statistical/zscore.py +0 -0
  46. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/loaders/__init__.py +0 -0
  47. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/loaders/metric_loader.py +0 -0
  48. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/loaders/query_template.py +0 -0
  49. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/orchestration/__init__.py +0 -0
  50. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/utils/__init__.py +0 -0
  51. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit/utils/stats.py +0 -0
  52. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit.egg-info/SOURCES.txt +0 -0
  53. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit.egg-info/dependency_links.txt +0 -0
  54. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit.egg-info/entry_points.txt +0 -0
  55. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit.egg-info/requires.txt +0 -0
  56. {detectkit-0.3.5 → detectkit-0.3.6}/detectkit.egg-info/top_level.txt +0 -0
  57. {detectkit-0.3.5 → detectkit-0.3.6}/requirements.txt +0 -0
  58. {detectkit-0.3.5 → detectkit-0.3.6}/setup.cfg +0 -0
  59. {detectkit-0.3.5 → detectkit-0.3.6}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: detectkit
3
- Version: 0.3.5
3
+ Version: 0.3.6
4
4
  Summary: Metric monitoring with automatic anomaly detection
5
5
  Author: detectkit team
6
6
  License: MIT
@@ -74,6 +74,13 @@ Published to PyPI: https://pypi.org/project/detectkit/
74
74
 
75
75
  Complete rewrite with modern architecture and full documentation (2025).
76
76
 
77
+ ### What's New in v0.3.6
78
+
79
+ ✅ **Recovery Notifications** - Know when your metrics stabilize
80
+ - `notify_on_recovery: true` sends a message when anomaly resolves
81
+ - Custom `template_recovery` for recovery message format
82
+ - One notification per incident, no duplicates
83
+
77
84
  ### What's New in v0.3.0
78
85
 
79
86
  🎯 **Alert Cooldown** - Prevent alert spam from persistent anomalies
@@ -12,6 +12,13 @@ Published to PyPI: https://pypi.org/project/detectkit/
12
12
 
13
13
  Complete rewrite with modern architecture and full documentation (2025).
14
14
 
15
+ ### What's New in v0.3.6
16
+
17
+ ✅ **Recovery Notifications** - Know when your metrics stabilize
18
+ - `notify_on_recovery: true` sends a message when anomaly resolves
19
+ - Custom `template_recovery` for recovery message format
20
+ - One notification per incident, no duplicates
21
+
15
22
  ### What's New in v0.3.0
16
23
 
17
24
  🎯 **Alert Cooldown** - Prevent alert spam from persistent anomalies
@@ -46,6 +46,7 @@ class AlertData:
46
46
  severity: float
47
47
  detection_metadata: Dict[str, Any]
48
48
  consecutive_count: int = 1
49
+ is_recovery: bool = False
49
50
 
50
51
 
51
52
  class BaseAlertChannel(ABC):
@@ -99,6 +100,7 @@ class BaseAlertChannel(ABC):
99
100
  self,
100
101
  alert_data: AlertData,
101
102
  template: Optional[str] = None,
103
+ recovery_template: Optional[str] = None,
102
104
  ) -> str:
103
105
  """
104
106
  Format alert message from template.
@@ -127,7 +129,10 @@ class BaseAlertChannel(ABC):
127
129
  >>> message = channel.format_message(alert_data, template)
128
130
  """
129
131
  if template is None:
130
- template = self.get_default_template()
132
+ if alert_data.is_recovery:
133
+ template = recovery_template or self.get_default_recovery_template()
134
+ else:
135
+ template = self.get_default_template()
131
136
 
132
137
  # Format timestamp to string
133
138
  from datetime import datetime
@@ -149,6 +154,8 @@ class BaseAlertChannel(ABC):
149
154
  confidence_str = "N/A"
150
155
 
151
156
  # Format message
157
+ status = "RECOVERED" if alert_data.is_recovery else "ANOMALY"
158
+
152
159
  try:
153
160
  message = template.format(
154
161
  metric_name=alert_data.metric_name,
@@ -163,6 +170,7 @@ class BaseAlertChannel(ABC):
163
170
  direction=alert_data.direction,
164
171
  severity=alert_data.severity,
165
172
  consecutive_count=alert_data.consecutive_count,
173
+ status=status,
166
174
  )
167
175
  except KeyError as e:
168
176
  # If template has unknown variables, fall back to default
@@ -188,6 +196,22 @@ class BaseAlertChannel(ABC):
188
196
  "Severity: {severity:.2f}"
189
197
  )
190
198
 
199
+ def get_default_recovery_template(self) -> str:
200
+ """
201
+ Get default recovery message template.
202
+
203
+ Returns:
204
+ Default recovery template string
205
+ """
206
+ return (
207
+ "Metric recovered: {metric_name}\n"
208
+ "Time: {timestamp}\n"
209
+ "Value: {value}\n"
210
+ "Confidence interval: {confidence_interval}\n"
211
+ "Detector: {detector_name}\n"
212
+ "Status: metric returned to normal"
213
+ )
214
+
191
215
  def __repr__(self) -> str:
192
216
  """String representation of channel."""
193
217
  return f"{self.__class__.__name__}()"
@@ -521,6 +521,124 @@ class AlertOrchestrator:
521
521
  # Recovery = consecutive dropped below threshold
522
522
  return consecutive < self.conditions.consecutive_anomalies
523
523
 
524
+ def should_send_recovery(
525
+ self,
526
+ recent_detections: List[DetectionRecord],
527
+ ) -> tuple[bool, Optional[AlertData]]:
528
+ """
529
+ Determine if recovery notification should be sent.
530
+
531
+ Recovery is sent when:
532
+ 1. A previous alert was sent (last_alert_sent exists)
533
+ 2. Metric has recovered (consecutive anomalies < threshold)
534
+ 3. Recovery hasn't already been sent for this incident
535
+ (last_recovery_sent > last_alert_sent would mean already notified)
536
+
537
+ Args:
538
+ recent_detections: List of recent detection records (sorted by time, newest first)
539
+
540
+ Returns:
541
+ Tuple of (should_send, recovery_alert_data)
542
+ """
543
+ if not self.internal:
544
+ return False, None
545
+
546
+ # Check if there was a previous alert
547
+ last_alert = self.internal.get_last_alert_timestamp(self.metric_name)
548
+ if not last_alert:
549
+ return False, None # Never alerted, nothing to recover from
550
+
551
+ # Check if recovery already sent for this incident
552
+ last_recovery = self.internal.get_last_recovery_timestamp(self.metric_name)
553
+ if last_recovery and last_recovery >= last_alert:
554
+ return False, None # Already sent recovery for this alert
555
+
556
+ # Check if metric actually recovered
557
+ has_recovery = self._check_recovery_since_last_alert(last_alert)
558
+ if not has_recovery:
559
+ return False, None # Still in anomaly state
560
+
561
+ # Build recovery AlertData from latest normal point
562
+ recovery_data = self._build_recovery_data(recent_detections)
563
+ if not recovery_data:
564
+ return False, None
565
+
566
+ return True, recovery_data
567
+
568
+ def _build_recovery_data(
569
+ self,
570
+ detections: List[DetectionRecord],
571
+ ) -> Optional[AlertData]:
572
+ """
573
+ Build AlertData for recovery notification from latest detection.
574
+
575
+ Args:
576
+ detections: Recent detection records
577
+
578
+ Returns:
579
+ AlertData with is_recovery=True, or None if no data
580
+ """
581
+ if not detections:
582
+ return None
583
+
584
+ # Use the latest detection point for recovery info
585
+ latest = detections[0]
586
+
587
+ return AlertData(
588
+ metric_name=self.metric_name,
589
+ timestamp=latest.timestamp,
590
+ timezone=self.timezone_display,
591
+ value=latest.value,
592
+ confidence_lower=latest.confidence_lower,
593
+ confidence_upper=latest.confidence_upper,
594
+ detector_name=latest.detector_name,
595
+ detector_params=latest.detector_params,
596
+ direction="none",
597
+ severity=0.0,
598
+ detection_metadata={},
599
+ consecutive_count=0,
600
+ is_recovery=True,
601
+ )
602
+
603
+ def send_recovery(
604
+ self,
605
+ alert_data: AlertData,
606
+ channels: List[BaseAlertChannel],
607
+ template: Optional[str] = None,
608
+ ) -> Dict[str, bool]:
609
+ """
610
+ Send recovery notifications through all configured channels.
611
+
612
+ Args:
613
+ alert_data: Recovery alert data (is_recovery=True)
614
+ channels: List of alert channels
615
+ template: Optional custom recovery message template
616
+
617
+ Returns:
618
+ Dict mapping channel name to success status
619
+ """
620
+ results = {}
621
+
622
+ for channel in channels:
623
+ try:
624
+ success = channel.send(alert_data, template)
625
+ channel_name = channel.__class__.__name__
626
+ results[channel_name] = success
627
+ except Exception as e:
628
+ channel_name = channel.__class__.__name__
629
+ print(f"Error sending recovery via {channel_name}: {e}")
630
+ results[channel_name] = False
631
+
632
+ # Update recovery timestamp after sending
633
+ if any(results.values()) and self.internal:
634
+ from datetime import timezone as tz
635
+ self.internal.update_recovery_timestamp(
636
+ metric_name=self.metric_name,
637
+ timestamp=datetime.now(tz.utc).replace(tzinfo=None),
638
+ )
639
+
640
+ return results
641
+
524
642
  def __repr__(self) -> str:
525
643
  """String representation."""
526
644
  return (
@@ -181,6 +181,17 @@ class AlertConfig(BaseModel):
181
181
  "Only applies if alert_cooldown is set. "
182
182
  "True = cooldown resets on recovery, False = strict cooldown independent of recovery."
183
183
  )
184
+ notify_on_recovery: bool = Field(
185
+ default=False,
186
+ description="Send notification when metric recovers from anomaly state. "
187
+ "Recovery is detected when consecutive anomalies drop below threshold "
188
+ "after an alert was previously sent."
189
+ )
190
+ template_recovery: Optional[str] = Field(
191
+ default=None,
192
+ description="Custom template for recovery notification message. "
193
+ "Supports same variables as anomaly templates plus {status}."
194
+ )
184
195
 
185
196
  @field_validator("consecutive_anomalies")
186
197
  @classmethod
@@ -976,3 +976,86 @@ class InternalTablesManager:
976
976
 
977
977
  # ClickHouse ALTER TABLE UPDATE is async, return 1 (optimistic)
978
978
  return 1
979
+
980
+ def get_last_recovery_timestamp(
981
+ self,
982
+ metric_name: str
983
+ ) -> Optional[datetime]:
984
+ """
985
+ Get timestamp of last sent recovery notification for a metric.
986
+
987
+ Args:
988
+ metric_name: Metric identifier
989
+
990
+ Returns:
991
+ Timestamp of last sent recovery, or None if never sent
992
+ """
993
+ full_table_name = self._manager.get_full_table_name(
994
+ TABLE_TASKS, use_internal=True
995
+ )
996
+
997
+ query = f"""
998
+ SELECT last_recovery_sent
999
+ FROM {full_table_name}
1000
+ WHERE metric_name = %(metric_name)s
1001
+ AND detector_id = 'pipeline'
1002
+ AND process_type = 'pipeline'
1003
+ LIMIT 1
1004
+ """
1005
+
1006
+ results = self._manager.execute_query(
1007
+ query,
1008
+ params={"metric_name": metric_name}
1009
+ )
1010
+
1011
+ if not results or not results[0].get("last_recovery_sent"):
1012
+ return None
1013
+
1014
+ last_sent = results[0]["last_recovery_sent"]
1015
+
1016
+ if hasattr(last_sent, 'tzinfo') and last_sent.tzinfo is not None:
1017
+ last_sent = last_sent.replace(tzinfo=None)
1018
+
1019
+ return last_sent
1020
+
1021
+ def update_recovery_timestamp(
1022
+ self,
1023
+ metric_name: str,
1024
+ timestamp: datetime,
1025
+ ) -> int:
1026
+ """
1027
+ Update last_recovery_sent timestamp after sending recovery notification.
1028
+
1029
+ Args:
1030
+ metric_name: Metric identifier
1031
+ timestamp: Timestamp when recovery was sent
1032
+
1033
+ Returns:
1034
+ Number of rows updated (typically 1)
1035
+ """
1036
+ full_table_name = self._manager.get_full_table_name(
1037
+ TABLE_TASKS, use_internal=True
1038
+ )
1039
+
1040
+ if hasattr(timestamp, 'tzinfo') and timestamp.tzinfo is not None:
1041
+ timestamp = timestamp.replace(tzinfo=None)
1042
+
1043
+ update_query = f"""
1044
+ ALTER TABLE {full_table_name}
1045
+ UPDATE
1046
+ last_recovery_sent = %(timestamp)s,
1047
+ updated_at = %(timestamp)s
1048
+ WHERE metric_name = %(metric_name)s
1049
+ AND detector_id = 'pipeline'
1050
+ AND process_type = 'pipeline'
1051
+ """
1052
+
1053
+ self._manager.execute_query(
1054
+ update_query,
1055
+ params={
1056
+ "metric_name": metric_name,
1057
+ "timestamp": timestamp
1058
+ }
1059
+ )
1060
+
1061
+ return 1
@@ -128,6 +128,11 @@ def get_tasks_table_model() -> TableModel:
128
128
  nullable=True
129
129
  ),
130
130
  ColumnDefinition("alert_count", "UInt32", default="0"),
131
+ ColumnDefinition(
132
+ "last_recovery_sent",
133
+ "Nullable(DateTime64(3, 'UTC'))",
134
+ nullable=True
135
+ ),
131
136
  ],
132
137
  primary_key=["metric_name", "detector_id", "process_type"],
133
138
  engine="MergeTree",
@@ -643,7 +643,36 @@ class TaskManager:
643
643
  else:
644
644
  click.echo(click.style(" └─ No valid alert channels available", fg="yellow"))
645
645
  else:
646
- click.echo(" └─ No alert needed (conditions not met)")
646
+ # Check recovery notification
647
+ if alerting_config.notify_on_recovery:
648
+ should_recover, recovery_data = orchestrator.should_send_recovery(
649
+ recent_detections
650
+ )
651
+
652
+ if should_recover:
653
+ click.echo(click.style(f" │ ✓ Recovery detected! Sending to {len(alerting_config.channels)} channel(s)...", fg="green", bold=True))
654
+
655
+ channels = self._create_alert_channels(alerting_config.channels)
656
+
657
+ if channels:
658
+ results = orchestrator.send_recovery(
659
+ recovery_data,
660
+ channels,
661
+ template=alerting_config.template_recovery,
662
+ )
663
+ recovery_sent = sum(1 for success in results.values() if success)
664
+
665
+ for channel_name, success in results.items():
666
+ status = click.style("✓", fg="green") if success else click.style("✗", fg="red")
667
+ click.echo(f" │ {status} {channel_name}")
668
+
669
+ click.echo(click.style(f" └─ Sent {recovery_sent}/{len(channels)} recovery notifications", fg="green"))
670
+ else:
671
+ click.echo(click.style(" └─ No valid alert channels available", fg="yellow"))
672
+ else:
673
+ click.echo(" └─ No alert needed (conditions not met)")
674
+ else:
675
+ click.echo(" └─ No alert needed (conditions not met)")
647
676
 
648
677
  return {"alerts_sent": alerts_sent}
649
678
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: detectkit
3
- Version: 0.3.5
3
+ Version: 0.3.6
4
4
  Summary: Metric monitoring with automatic anomaly detection
5
5
  Author: detectkit team
6
6
  License: MIT
@@ -74,6 +74,13 @@ Published to PyPI: https://pypi.org/project/detectkit/
74
74
 
75
75
  Complete rewrite with modern architecture and full documentation (2025).
76
76
 
77
+ ### What's New in v0.3.6
78
+
79
+ ✅ **Recovery Notifications** - Know when your metrics stabilize
80
+ - `notify_on_recovery: true` sends a message when anomaly resolves
81
+ - Custom `template_recovery` for recovery message format
82
+ - One notification per incident, no duplicates
83
+
77
84
  ### What's New in v0.3.0
78
85
 
79
86
  🎯 **Alert Cooldown** - Prevent alert spam from persistent anomalies
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "detectkit"
7
- version = "0.3.5"
7
+ version = "0.3.6"
8
8
  description = "Metric monitoring with automatic anomaly detection"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
File without changes
File without changes
File without changes
File without changes
File without changes