PyPI - detectkit - Versions diffs - 0.16.4__tar.gz → 0.17.0__tar.gz - Mend

detectkit 0.16.4tar.gz → 0.17.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (105) hide show

{detectkit-0.16.4/detectkit.egg-info → detectkit-0.17.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: detectkit
-Version: 0.16.4
+Version: 0.17.0
 Summary: Metric monitoring with automatic anomaly detection
 Author: detectkit team
 License: MIT

{detectkit-0.16.4 → detectkit-0.17.0}/detectkit/__init__.py RENAMED Viewed

@@ -4,7 +4,7 @@ detectk - Anomaly Detection for Time-Series Metrics
 A Python library for data analysts and engineers to monitor metrics with automatic anomaly detection.
 """
-__version__ = "0.16.4"
+__version__ = "0.17.0"
 from detectkit.core.interval import Interval
 from detectkit.core.models import ColumnDefinition, TableModel

{detectkit-0.16.4 → detectkit-0.17.0}/detectkit/alerting/channels/base.py RENAMED Viewed

@@ -92,6 +92,18 @@ class AlertData:
     direction_policy: str | None = None
     consecutive_required: int | None = None
     detector_count: int = 1
+    # Incident timing — answers "how long has this been going on". The metric
+    # ``interval_seconds`` lets the message express the streak in wall-clock
+    # time; ``onset_timestamp`` is the first timestamp of the current anomalous
+    # run (anomaly) / the just-ended incident (recovery); ``streak_capped`` is
+    # True when the run is at least as long as the orchestrator's lookback
+    # window, so the duration is rendered as a lower bound ("over …"). The
+    # consecutive streak length itself rides on ``consecutive_count`` (the true
+    # run length, resolved at fire time). All default to None/False so
+    # direct-API callers and non-anomaly alerts render unchanged.
+    interval_seconds: int | None = None
+    onset_timestamp: Any | None = None
+    streak_capped: bool = False
 class BaseAlertChannel(ABC):
@@ -165,8 +177,18 @@ class BaseAlertChannel(ABC):
         - {direction} — observed/locked direction of the anomaly
         - {direction_policy} — configured direction rule ("same"/"any"/...)
         - {min_detectors} — configured quorum threshold (the rule)
-        - {consecutive_count} — observed consecutive points
+        - {consecutive_count} — true consecutive streak length (resolved at
+          fire time, not capped at the rule's threshold)
         - {consecutive_required} — configured consecutive threshold (rule)
+        - {interval_display} — metric interval as a string (e.g. "10min")
+        - {duration_display} — how long the streak/incident lasted
+          (e.g. "2h 30m"; "over …" when it predates the lookback window)
+        - {onset_display} / {started_display} — first timestamp of the run
+          ({started_display} adds "or earlier" when the run is capped)
+        - {anomaly_lead} / {recovery_lead} — the ready-made plain-language
+          lead sentence ("Anomalous for …" / "… Incident lasted …")
+        - {window_line} — "Started: … | Latest/Cleared: …" (or a single
+          "Detected at: …" line when the onset is unknown)
         - {severity}
         - {status}
@@ -228,22 +250,27 @@ class BaseAlertChannel(ABC):
         """
         import math
         from datetime import datetime
+        from zoneinfo import ZoneInfo
         import numpy as np
-        # Format timestamp to string
-        ts = alert_data.timestamp
-        if isinstance(ts, np.datetime64):
-            ts = ts.astype(datetime)
-        # Convert naive UTC timestamp to target timezone if specified
-        if alert_data.timezone:
-            from zoneinfo import ZoneInfo
-            ts = ts.replace(tzinfo=ZoneInfo("UTC")).astimezone(ZoneInfo(alert_data.timezone))
-            ts_str = f"{ts.strftime('%Y-%m-%d %H:%M:%S')} ({alert_data.timezone})"
-        else:
-            ts_str = ts.strftime("%Y-%m-%d %H:%M:%S")
+        def _fmt_ts(value: Any) -> str:
+            """Format a timestamp the same way for the main point and the onset:
+            naive UTC → target timezone, with a ``(tz)`` suffix when set."""
+            if value is None:
+                return ""
+            t = value
+            if isinstance(t, np.datetime64):
+                t = t.astype("datetime64[ms]").astype(datetime)
+            if not isinstance(t, datetime):
+                return str(t)
+            if alert_data.timezone:
+                t = t.replace(tzinfo=ZoneInfo("UTC")).astimezone(ZoneInfo(alert_data.timezone))
+                return f"{t.strftime('%Y-%m-%d %H:%M:%S')} ({alert_data.timezone})"
+            return t.strftime("%Y-%m-%d %H:%M:%S")
+        ts_str = _fmt_ts(alert_data.timestamp)
+        onset_str = _fmt_ts(alert_data.onset_timestamp)
         # Format confidence interval
         if alert_data.confidence_lower is not None and alert_data.confidence_upper is not None:
@@ -287,6 +314,66 @@ class BaseAlertChannel(ABC):
         )
         direction_policy = alert_data.direction_policy or alert_data.direction
+        # Incident timing — the "how long has this been going on" story shared by
+        # every channel. ``consecutive_count`` carries the *true* streak length
+        # (resolved at fire time); together with the metric interval it becomes a
+        # wall-clock duration and a plain-language lead. ``streak_capped`` means
+        # the run is at least as long as the orchestrator's lookback window, so
+        # the duration/started values render as lower bounds. Degrades cleanly to
+        # the legacy "Latest X/Y consecutive points met the quorum." lead when no
+        # interval is wired in (direct-API callers).
+        from detectkit.core.interval import Interval
+        from detectkit.utils.datetime_utils import format_duration
+        interval_seconds = alert_data.interval_seconds
+        streak = alert_data.consecutive_count or 0
+        capped = alert_data.streak_capped
+        interval_display = str(Interval(interval_seconds)) if interval_seconds else ""
+        if interval_seconds and streak >= 1:
+            duration_display = format_duration(streak * interval_seconds)
+            if capped:
+                duration_display = f"over {duration_display}"
+            streak_display = f"{streak}+" if capped else f"{streak}"
+            started_display = f"{onset_str} or earlier" if (capped and onset_str) else onset_str
+            intervals_word = "interval" if streak == 1 else "intervals"
+            anomaly_lead = (
+                f"Anomalous for {duration_display} — "
+                f"{streak_display} consecutive {interval_display} {intervals_word}."
+            )
+            recovery_lead = (
+                "The alert condition no longer holds — the metric is back within "
+                f"expected bounds. Incident lasted {duration_display} "
+                f"({streak_display} consecutive {interval_display} {intervals_word})."
+            )
+        else:
+            duration_display = ""
+            streak_display = f"{streak}" if streak else ""
+            started_display = onset_str
+            anomaly_lead = (
+                f"Latest {alert_data.consecutive_count}/{consecutive_required} "
+                "consecutive points met the quorum."
+            )
+            recovery_lead = (
+                "The alert condition no longer holds — the metric is back within "
+                "expected bounds."
+            )
+        # Kind-aware "window" line for the plain-text templates: the anomalous
+        # span (onset → latest/cleared) when known, else the single point.
+        kind = self.status_kind(alert_data)
+        if started_display and kind == "anomaly":
+            window_line = f"Started: {started_display} | Latest: {ts_str}\n"
+        elif started_display and kind == "recovery":
+            window_line = f"Started: {started_display} | Cleared: {ts_str}\n"
+        else:
+            window_label = {
+                "recovery": "Cleared at",
+                "no_data": "Expected at",
+                "error": "Detected at",
+            }.get(kind, "Detected at")
+            window_line = f"{window_label}: {ts_str}\n"
         # Display-safe value: stays usable even when value is None/NaN (no-data).
         raw_value = alert_data.value
         if raw_value is None or (isinstance(raw_value, float) and math.isnan(raw_value)):
@@ -350,6 +437,15 @@ class BaseAlertChannel(ABC):
             "severity": alert_data.severity,
             "consecutive_count": alert_data.consecutive_count,
             "consecutive_required": consecutive_required,
+            "interval_display": interval_display,
+            "duration_display": duration_display,
+            "streak_display": streak_display,
+            "streak_capped": capped,
+            "onset_display": onset_str,
+            "started_display": started_display,
+            "anomaly_lead": anomaly_lead,
+            "recovery_lead": recovery_lead,
+            "window_line": window_line,
             "status": status,
             "error_type": alert_data.error_type or "",
             "error_message": alert_data.error_message or "",
@@ -472,16 +568,14 @@ class BaseAlertChannel(ABC):
         return (
             "🔴 {project_name_prefix}Alert: {metric_name}\n"
             "{description_line}"
-            "Quorum {detector_count}/{min_detectors} · "
-            "direction {direction} (policy {direction_policy}) · "
-            "consecutive {consecutive_count}/{consecutive_required}\n"
+            "{anomaly_lead}\n"
             "Rule: min_detectors={min_detectors} · "
             "direction={direction_policy} · consecutive={consecutive_required}\n"
             "\n"
-            "Latest point (evidence):\n"
-            "· Time: {timestamp}\n"
-            "· Value: {value_display} | Expected: {expected_range}\n"
-            "· Severity: {severity:.2f}\n"
+            "Value: {value_display} | Expected: {expected_range}\n"
+            "Quorum: {detector_count}/{min_detectors} · {direction}\n"
+            "Severity: {severity:.2f}\n"
+            "{window_line}"
             "Detectors: {detector_name}\n"
             "Parameters: {detector_params}\n"
             "{dashboard_line}"
@@ -499,14 +593,12 @@ class BaseAlertChannel(ABC):
         return (
             "🟢 {project_name_prefix}Alert cleared: {metric_name}\n"
             "{description_line}"
-            "The alert condition no longer holds — "
-            "the metric is back within expected bounds.\n"
+            "{recovery_lead}\n"
             "Rule: min_detectors={min_detectors} · "
             "direction={direction_policy} · consecutive={consecutive_required}\n"
             "\n"
-            "Latest point:\n"
-            "· Time: {timestamp}\n"
-            "· Value: {value_display} | Expected: {expected_range}\n"
+            "Value: {value_display} | Expected: {expected_range}\n"
+            "{window_line}"
             "Detectors: {detector_name}\n"
             "{dashboard_line}"
             "{help_line}"

{detectkit-0.16.4 → detectkit-0.17.0}/detectkit/alerting/channels/email.py RENAMED Viewed

@@ -315,42 +315,38 @@ class EmailChannel(BaseAlertChannel):
             parts.append(self._lead_html(ctx["description"]))
         if kind == "anomaly":
+            # Description (how long it's been going on) leads; the Rule chip sits
+            # right above the stat grid it explains.
+            parts.append(self._lead_html(ctx["anomaly_lead"]))
             parts.append(self._rule_html(ctx))
-            parts.append(
-                self._lead_html(
-                    f"Latest {ctx['consecutive_count']}/{ctx['consecutive_required']} "
-                    "consecutive points met the quorum."
-                )
-            )
-            parts.append(
-                self._stat_grid(
-                    [
-                        ("Value", ctx["value_display"]),
-                        ("Expected", ctx["expected_range"]),
-                        ("Severity", f"{alert_data.severity:.2f}"),
-                        ("Detected at", ctx["timestamp"]),
-                    ]
-                )
-            )
+            stats = [
+                ("Value", ctx["value_display"]),
+                ("Expected", ctx["expected_range"]),
+                ("Severity", f"{alert_data.severity:.2f}"),
+                ("Quorum", f"{ctx['detector_count']}/{ctx['min_detectors']} · {ctx['direction']}"),
+            ]
+            if ctx["started_display"]:
+                stats.append(("Started", ctx["started_display"]))
+                stats.append(("Latest", ctx["timestamp"]))
+            else:
+                stats.append(("Detected at", ctx["timestamp"]))
+            parts.append(self._stat_grid(stats))
             if ctx["detector_params"]:
                 parts.append(self._params_html(ctx["detector_name"], ctx["detector_params"]))
         elif kind == "recovery":
-            lead = (
-                "The alert condition no longer holds — the metric is back within "
-                "expected bounds."
-            )
-            parts.append(self._lead_html(lead))
+            parts.append(self._lead_html(ctx["recovery_lead"]))
             parts.append(self._rule_html(ctx))
-            parts.append(
-                self._stat_grid(
-                    [
-                        ("Value", ctx["value_display"]),
-                        ("Expected", ctx["expected_range"]),
-                        ("Detector", ctx["detector_name"]),
-                        ("Cleared at", ctx["timestamp"]),
-                    ]
-                )
-            )
+            stats = [
+                ("Value", ctx["value_display"]),
+                ("Expected", ctx["expected_range"]),
+            ]
+            if ctx["started_display"]:
+                stats.append(("Started", ctx["started_display"]))
+                stats.append(("Cleared", ctx["timestamp"]))
+            else:
+                stats.append(("Cleared at", ctx["timestamp"]))
+            stats.append(("Detector", ctx["detector_name"]))
+            parts.append(self._stat_grid(stats))
         elif kind == "no_data":
             lead = "Query returned no datapoint for the latest expected interval."
             parts.append(self._lead_html(lead))

{detectkit-0.16.4 → detectkit-0.17.0}/detectkit/alerting/channels/telegram.py RENAMED Viewed

@@ -24,9 +24,10 @@ class TelegramChannel(BaseAlertChannel):
     Sends formatted messages to a Telegram chat using a bot token. The default
     (no custom ``template``) message is a structured **HTML** layout — a colored
-    status dot, a bold headline, the rule that fired, then the evidence
-    (value / expected / severity / time / detector / params) in ``<code>``,
-    plus an optional "Open dashboard" link and @mentions.
+    status dot, a bold headline, the lead (how long the anomaly has been
+    running) followed by the rule that fired, then the evidence
+    (value / expected / quorum / severity / started → latest / detector /
+    params) in ``<code>``, plus an optional "Open dashboard" link and @mentions.
     HTML is the default ``parse_mode`` because the legacy ``Markdown`` mode
     breaks on the detector params JSON (an unmatched ``_`` in e.g.
@@ -161,11 +162,9 @@ class TelegramChannel(BaseAlertChannel):
         lines.append("")  # blank line
         if kind == "anomaly":
-            lines.append(
-                f"<b>Quorum</b> {ctx['detector_count']}/{ctx['min_detectors']} · "
-                f"direction <b>{esc(ctx['direction'])}</b> · "
-                f"<b>{ctx['consecutive_count']}/{ctx['consecutive_required']}</b> consecutive"
-            )
+            # Description (how long it's been going on) leads; the Rule chip sits
+            # right above the evidence it explains.
+            lines.append(esc(ctx["anomaly_lead"]))
             lines.append(
                 f"<b>Rule</b> <code>min_detectors={ctx['min_detectors']} · "
                 f"direction={esc(ctx['direction_policy'])} · "
@@ -176,17 +175,24 @@ class TelegramChannel(BaseAlertChannel):
                 f"• Value: <code>{esc(ctx['value_display'])}</code> · "
                 f"Expected: <code>{esc(ctx['expected_range'])}</code>"
             )
+            lines.append(
+                f"• Quorum: <code>{ctx['detector_count']}/{ctx['min_detectors']} · "
+                f"{esc(ctx['direction'])}</code>"
+            )
             lines.append(f"• Severity: <code>{alert_data.severity:.2f}</code>")
-            lines.append(f"• Time: <code>{esc(ctx['timestamp'])}</code>")
+            if ctx["started_display"]:
+                lines.append(
+                    f"• Started: <code>{esc(ctx['started_display'])}</code> · "
+                    f"Latest: <code>{esc(ctx['timestamp'])}</code>"
+                )
+            else:
+                lines.append(f"• Time: <code>{esc(ctx['timestamp'])}</code>")
             lines.append(f"• Detector: <code>{esc(ctx['detector_name'])}</code>")
             if ctx["detector_params"]:
                 params = self._cap(ctx["detector_params"], _PARAMS_CAP)
                 lines.append(f"• Parameters: <code>{esc(params)}</code>")
         elif kind == "recovery":
-            lines.append(
-                "The alert condition no longer holds — the metric is back within "
-                "expected bounds."
-            )
+            lines.append(esc(ctx["recovery_lead"]))
             lines.append(
                 f"<b>Rule</b> <code>min_detectors={ctx['min_detectors']} · "
                 f"direction={esc(ctx['direction_policy'])} · "
@@ -197,7 +203,13 @@ class TelegramChannel(BaseAlertChannel):
                 f"• Value: <code>{esc(ctx['value_display'])}</code> · "
                 f"Expected: <code>{esc(ctx['expected_range'])}</code>"
             )
-            lines.append(f"• Time: <code>{esc(ctx['timestamp'])}</code>")
+            if ctx["started_display"]:
+                lines.append(
+                    f"• Started: <code>{esc(ctx['started_display'])}</code> · "
+                    f"Cleared: <code>{esc(ctx['timestamp'])}</code>"
+                )
+            else:
+                lines.append(f"• Cleared: <code>{esc(ctx['timestamp'])}</code>")
             lines.append(f"• Detector: <code>{esc(ctx['detector_name'])}</code>")
         elif kind == "no_data":
             lines.append("Query returned no datapoint for the latest expected interval.")

{detectkit-0.16.4 → detectkit-0.17.0}/detectkit/alerting/channels/webhook.py RENAMED Viewed

@@ -27,9 +27,11 @@ class WebhookChannel(BaseAlertChannel):
     Rendering: the default (no custom ``template``) payload is a single
     **Slack/Mattermost message attachment** — a colored accent bar, a title,
-    a short markdown lead, and a compact **fields grid** (Value / Expected /
-    Quorum / Severity, then full-width Detected-at / Detectors / Parameters),
-    branded with a ``footer`` + ``footer_icon``. This renders richly on both
+    a short markdown lead (how long the anomaly has been running) with the
+    **Rule** chip beneath it, and a compact **fields grid** (Value / Expected /
+    Quorum / Severity / Started / Latest — Started / Cleared on recovery — then
+    full-width Detectors / Parameters), branded with a ``footer`` +
+    ``footer_icon``. This renders richly on both
     Slack and Mattermost from one payload. A custom ``template`` degrades to a
     plain text-only attachment (the template is one opaque string that can't be
     sliced into fields), keeping the color, title and branding.
@@ -267,27 +269,32 @@ class WebhookChannel(BaseAlertChannel):
         )
         if kind == "anomaly":
-            lead = (
-                f"{rule_chip}\n"
-                f"Latest {ctx['consecutive_count']}/{ctx['consecutive_required']} "
-                "consecutive points met the quorum."
-            )
+            # Description (how long it's been going on) leads; the Rule chip sits
+            # right above the value/expected fields it explains.
+            lead = f"{ctx['anomaly_lead']}\n{rule_chip}"
             short("Value", code(ctx["value_display"]))
             short("Expected", code(ctx["expected_range"]))
             short("Quorum", f"{ctx['detector_count']}/{ctx['min_detectors']} · {ctx['direction']}")
             short("Severity", f"{alert_data.severity:.2f}")
-            full("Detected at", ctx["timestamp"])
+            # The problematic span: when it started and the latest point in it.
+            if ctx["started_display"]:
+                short("Started", ctx["started_display"])
+                short("Latest", ctx["timestamp"])
+            else:
+                full("Detected at", ctx["timestamp"])
             full("Detectors", code(ctx["detector_name"]))
             if ctx["detector_params"]:
                 full("Parameters", f"```{ctx['detector_params']}```")
         elif kind == "recovery":
-            lead = (
-                "The alert condition no longer holds — the metric is back within "
-                f"expected bounds.\n{rule_chip}"
-            )
+            lead = f"{ctx['recovery_lead']}\n{rule_chip}"
             short("Value", code(ctx["value_display"]))
             short("Expected", code(ctx["expected_range"]))
-            full("Detected at", ctx["timestamp"])
+            # The incident span: when it started and when it cleared.
+            if ctx["started_display"]:
+                short("Started", ctx["started_display"])
+                short("Cleared", ctx["timestamp"])
+            else:
+                full("Cleared at", ctx["timestamp"])
             full("Detectors", code(ctx["detector_name"]))
         elif kind == "no_data":
             lead = "Query returned no datapoint for the latest expected interval."
@@ -383,16 +390,14 @@ class WebhookChannel(BaseAlertChannel):
         """
         return (
             "{description_line}"
-            "Quorum {detector_count}/{min_detectors} · "
-            "direction {direction} (policy {direction_policy}) · "
-            "consecutive {consecutive_count}/{consecutive_required}\n"
+            "{anomaly_lead}\n"
             "Rule: min_detectors={min_detectors} · "
             "direction={direction_policy} · consecutive={consecutive_required}\n"
             "\n"
-            "Latest point (evidence):\n"
-            "· Time: {timestamp}\n"
-            "· Value: {value_display} | Expected: {expected_range}\n"
-            "· Severity: {severity:.2f}\n"
+            "Value: {value_display} | Expected: {expected_range}\n"
+            "Quorum: {detector_count}/{min_detectors} · {direction}\n"
+            "Severity: {severity:.2f}\n"
+            "{window_line}"
             "Detectors: {detector_name}\n"
             "Parameters: {detector_params}\n"
             "{dashboard_line}"
@@ -406,14 +411,12 @@ class WebhookChannel(BaseAlertChannel):
         """
         return (
             "{description_line}"
-            "The alert condition no longer holds — "
-            "the metric is back within expected bounds.\n"
+            "{recovery_lead}\n"
             "Rule: min_detectors={min_detectors} · "
             "direction={direction_policy} · consecutive={consecutive_required}\n"
             "\n"
-            "Latest point:\n"
-            "· Time: {timestamp}\n"
-            "· Value: {value_display} | Expected: {expected_range}\n"
+            "Value: {value_display} | Expected: {expected_range}\n"
+            "{window_line}"
             "Detectors: {detector_name}\n"
             "{dashboard_line}"
             "{help_line}"

{detectkit-0.16.4 → detectkit-0.17.0}/detectkit/alerting/orchestrator/_base.py RENAMED Viewed

@@ -10,6 +10,15 @@ from detectkit.alerting.orchestrator._types import (
 )
 from detectkit.core.interval import Interval
+# How far back the orchestrator looks to reconstruct the *true* length of an
+# anomalous run when an alert fires / clears. The decision itself only needs
+# ``consecutive_anomalies`` points, but the message reports "how long has this
+# been going on", which needs the full streak. Bounded so a metric stuck
+# anomalous for a very long time never loads unboundedly — past this the run is
+# reported as a lower bound ("over …"). Only queried on fire/recovery, never on
+# the hot no-alert path.
+STREAK_LOOKBACK_POINTS = 1000
 class _OrchestratorBase:
     def __init__(

{detectkit-0.16.4 → detectkit-0.17.0}/detectkit/alerting/orchestrator/_decision.py RENAMED Viewed

@@ -34,8 +34,8 @@ from datetime import datetime, timezone
 import numpy as np
 from detectkit.alerting.channels.base import AlertData
-from detectkit.alerting.orchestrator._base import _OrchestratorBase
-from detectkit.alerting.orchestrator._types import DetectionRecord
+from detectkit.alerting.orchestrator._base import STREAK_LOOKBACK_POINTS, _OrchestratorBase
+from detectkit.alerting.orchestrator._types import DetectionRecord, hydrate_detection_records
 from detectkit.utils.datetime_utils import now_utc, to_aware_utc
@@ -70,7 +70,46 @@ class _DecisionMixin(_OrchestratorBase):
         if not latest_quorum or consecutive < self.conditions.consecutive_anomalies:
             return False, None
-        return True, self._build_alert_data(latest_quorum, consecutive, direction)
+        # The decision is made; now resolve the *true* streak length/onset for
+        # the message (the shallow alert window caps ``consecutive`` at the rule
+        # threshold, which can't answer "how long has this been going on").
+        streak, onset_ts, capped = self._resolve_streak(latest_quorum[0].timestamp)
+        return True, self._build_alert_data(latest_quorum, streak, direction, onset_ts, capped)
+    def _resolve_streak(self, latest_ts: np.datetime64) -> tuple[int, np.datetime64, bool]:
+        """Resolve the full anomalous run ending at *latest_ts*.
+        Loads up to :data:`STREAK_LOOKBACK_POINTS` detections and re-walks the
+        same direction-aware quorum logic used to fire, so the message can report
+        the real onset/duration rather than the shallow alert-window count.
+        Returns ``(streak_count, onset_timestamp, capped)`` — ``capped`` is True
+        when the run fills the whole lookback window (onset is older than we saw).
+        Only runs when an alert actually fires, so the hot no-alert path is
+        untouched.
+        """
+        step = np.timedelta64(self.interval.seconds, "s")
+        if not self.internal:
+            # Direct-API path with no DB to walk: report the rule's required
+            # length so the message still carries a duration.
+            n = max(self.conditions.consecutive_anomalies, 1)
+            return n, latest_ts - step * (n - 1), False
+        last_point = latest_ts.astype("datetime64[ms]").astype(datetime)
+        rows = self.internal.get_recent_detections(
+            metric_name=self.metric_name,
+            last_point=last_point,
+            num_points=STREAK_LOOKBACK_POINTS,
+        )
+        records = hydrate_detection_records(rows)
+        if not records:
+            return 1, latest_ts, False
+        by_time = self._group_by_timestamp(records)
+        timestamps_sorted = sorted(by_time.keys(), reverse=True)
+        count, _, _ = self._count_consecutive_anomalies(by_time, timestamps_sorted)
+        count = max(count, 1)
+        capped = count >= STREAK_LOOKBACK_POINTS
+        return count, latest_ts - step * (count - 1), capped
     def _quorum_at(
         self,
@@ -185,6 +224,8 @@ class _DecisionMixin(_OrchestratorBase):
         anomalies: list[DetectionRecord],
         consecutive_count: int,
         direction: str | None,
+        onset_timestamp: np.datetime64 | None = None,
+        streak_capped: bool = False,
     ) -> AlertData:
         primary = self._primary_record(anomalies)
@@ -249,6 +290,10 @@ class _DecisionMixin(_OrchestratorBase):
             direction_policy=self.conditions.direction,
             consecutive_required=self.conditions.consecutive_anomalies,
             detector_count=len(anomalies),
+            # Incident timing for the "how long has this been going on" line.
+            interval_seconds=self.interval.seconds,
+            onset_timestamp=onset_timestamp,
+            streak_capped=streak_capped,
         )
     def should_alert_no_data(
@@ -304,6 +349,7 @@ class _DecisionMixin(_OrchestratorBase):
             links=self.links,
             project_name=self.project_name,
             help_url=self.help_url,
+            interval_seconds=self.interval.seconds,
         )
     def get_last_complete_point(self, now: datetime | None = None) -> datetime:

{detectkit-0.16.4 → detectkit-0.17.0}/detectkit/alerting/orchestrator/_recovery.py RENAMED Viewed

@@ -3,9 +3,12 @@
 from __future__ import annotations
 from datetime import datetime
+from typing import Any
+import numpy as np
 from detectkit.alerting.channels.base import AlertData
-from detectkit.alerting.orchestrator._base import _OrchestratorBase
+from detectkit.alerting.orchestrator._base import STREAK_LOOKBACK_POINTS, _OrchestratorBase
 from detectkit.alerting.orchestrator._types import (
     DetectionRecord,
     hydrate_detection_records,
@@ -160,6 +163,10 @@ class _RecoveryMixin(_OrchestratorBase):
                 recovery_ci_lower = last_anomalous.confidence_lower
                 recovery_ci_upper = last_anomalous.confidence_upper
+        # Reconstruct the just-ended incident so the recovery message can say how
+        # long it lasted (symmetric with the anomaly alert's onset/duration).
+        incident_count, onset_ts, capped = self._resolve_incident(latest.timestamp)
         return AlertData(
             metric_name=self.metric_name,
             timestamp=latest.timestamp,
@@ -172,7 +179,9 @@ class _RecoveryMixin(_OrchestratorBase):
             direction="none",
             severity=0.0,
             detection_metadata={},
-            consecutive_count=0,
+            # The just-ended incident length (0 when it can't be reconstructed,
+            # so the message simply omits the duration).
+            consecutive_count=incident_count,
             is_recovery=True,
             description=self.description,
             mentions=self.mentions,
@@ -185,4 +194,72 @@ class _RecoveryMixin(_OrchestratorBase):
             min_detectors=self.conditions.min_detectors,
             direction_policy=self.conditions.direction,
             consecutive_required=self.conditions.consecutive_anomalies,
+            # Incident timing for the "Incident lasted …" line.
+            interval_seconds=self.interval.seconds,
+            onset_timestamp=onset_ts,
+            streak_capped=capped,
+        )
+    def _resolve_incident(self, cleared_ts: Any) -> tuple[int, Any, bool]:
+        """Find the anomalous run that just ended before the recovery point.
+        Walks back from *cleared_ts* (the latest, now-clean point): skips the
+        clean tail, then counts the contiguous direction-aware quorum run using
+        the same logic that fired the alert. Returns ``(length, onset_timestamp,
+        capped)`` — ``(0, None, False)`` when no run can be reconstructed, so the
+        recovery message just omits the incident duration.
+        """
+        if not self.internal:
+            return 0, None, False
+        step = np.timedelta64(self.interval.seconds, "s")
+        if isinstance(cleared_ts, np.datetime64):
+            last_point = cleared_ts.astype("datetime64[ms]").astype(datetime)
+        else:
+            last_point = cleared_ts
+        rows = self.internal.get_recent_detections(
+            metric_name=self.metric_name,
+            last_point=last_point,
+            num_points=STREAK_LOOKBACK_POINTS,
         )
+        records = hydrate_detection_records(rows)
+        if not records:
+            return 0, None, False
+        by_time = self._group_by_timestamp(records)
+        timestamps_sorted = sorted(by_time.keys(), reverse=True)
+        locked: str | None = None
+        started = False
+        count = 0
+        onset: Any = None
+        prev: np.datetime64 | None = None
+        for ts in timestamps_sorted:
+            anomalies = [d for d in by_time[ts] if d.is_anomaly]
+            # ``_quorum_at`` lives in _DecisionMixin; both mixins compose into
+            # AlertOrchestrator so the call resolves at runtime.
+            quorum, direction = self._quorum_at(anomalies, locked)
+            if not started:
+                # Skip the clean tail (the recovery point + any clean points)
+                # until the first quorum-satisfying point — the incident's end.
+                if quorum is None:
+                    continue
+                started = True
+                if self.conditions.direction == "same":
+                    locked = direction
+                count = 1
+                onset = ts
+                prev = ts
+                continue
+            if quorum is None or (prev is not None and (prev - ts) != step):
+                break
+            if self.conditions.direction == "same":
+                locked = direction
+            count += 1
+            onset = ts
+            prev = ts
+        if count == 0:
+            return 0, None, False
+        capped = count >= STREAK_LOOKBACK_POINTS
+        return count, onset, capped

{detectkit-0.16.4 → detectkit-0.17.0}/detectkit/cli/assets/claude/rules/alerting.md RENAMED Viewed

@@ -194,33 +194,47 @@ leads with a colored **status circle** — 🔴 anomaly, 🟢 recovery, 🟡 no-
 - **Slack / Mattermost / generic webhook** — one message *attachment* with a
   status-colored accent bar, a clickable title (the metric; links to
-  `dashboard_url` when set), a short markdown lead (the rule), and a compact
-  fields grid: short fields Value / Expected / Quorum / Severity, then full-width
-  Detected-at / Detectors / Parameters, plus a branded footer + footer icon.
-  @mentions ride in the **top-level** message text so they notify. A custom
-  `template` instead renders as a plain text-only attachment (color/title/
-  branding kept, no fields grid).
+  `dashboard_url` when set), a short markdown lead (the duration sentence — see
+  "Incident timing" below) with the **Rule** chip beneath it, and a compact
+  fields grid: short fields Value / Expected / Quorum / Severity / Started /
+  Latest (Started / Cleared on recovery), then full-width Detectors / Parameters,
+  plus a branded footer + footer icon. @mentions ride in the **top-level**
+  message text so they notify. A custom `template` instead renders as a plain
+  text-only attachment (color/title/ branding kept, no fields grid).
 - **Telegram** — default `parse_mode` is now **HTML**. The default message is
   structured and HTML-escaped: a colored status dot (red anomaly / green
-  recovery / yellow no-data / blue error), a bold headline, the rule, then
-  evidence in `<code>` (value/expected/severity/time/detector/params), an inline
-  "Open dashboard" link, then mentions. This fixes the old Markdown mode raising
-  "can't parse entities" on params JSON containing underscores (e.g.
-  `window_size`). Custom templates are sent verbatim under the parse mode, so
-  they must be HTML-safe; set `parse_mode: Markdown` to keep the old behavior.
+  recovery / yellow no-data / blue error), a bold headline, the lead + rule, then
+  evidence in `<code>` (value/expected/quorum/severity/started → latest/detector/
+  params), an inline "Open dashboard" link, then mentions. This fixes the old
+  Markdown mode raising "can't parse entities" on params JSON containing
+  underscores (e.g. `window_size`). Custom templates are sent verbatim under the
+  parse mode, so they must be HTML-safe; set `parse_mode: Markdown` to keep the
+  old behavior.
 - **Email** — a branded HTML card (inline-CSS, table-based, Outlook-safe):
-  colored accent + status pill, the metric, a 2-col value/expected/severity
-  table, a monospace params box, an optional "Open dashboard" button, and a
-  footer. The plain-text body remains the multipart fallback.
-On anomaly **and** recovery alerts the **firing rule is set apart uniformly** in
-every default-rendered channel: a bold **Rule** label + an inline-code chip
-(`min_detectors=… · direction=… · consecutive=…`), with the quorum explanation
-on its own line, so the rule reads as "this is the config that fired" at a
-glance. Bold is platform-aware (`*Rule*` on Slack, `**Rule**` on
-Mattermost/generic; `<b>Rule</b>` on Telegram; `<strong>` in email), while the
-code chip is identical everywhere. Custom templates and the plain-text fallbacks
-are unchanged.
+  colored accent + status pill, the metric, the lead + Rule chip, a 2-col stat
+  grid (value/expected/severity/quorum/started/latest), a monospace params box,
+  an optional "Open dashboard" button, and a footer. The plain-text body remains
+  the multipart fallback.
+**Message order is uniform** — `description → Rule → Value/Expected` on every
+channel, for both anomaly and recovery. The **firing rule is set apart
+uniformly**: a bold **Rule** label + an inline-code chip (`min_detectors=… ·
+direction=… · consecutive=…`) sitting right above the value/expected evidence.
+Bold is platform-aware (`*Rule*` on Slack, `**Rule**` on Mattermost/generic;
+`<b>Rule</b>` on Telegram; `<strong>` in email), while the code chip is
+identical everywhere.
+**Incident timing — "how long has this been going on".** Each default anomaly
+leads with `Anomalous for 2h 30m — 15 consecutive 10min intervals.` (metric
+interval + true streak length + wall-clock duration); Started/Latest bound the
+span. Recovery is symmetric (`Incident lasted …`, Started / Cleared). The true
+streak/onset is resolved only when an alert fires/clears (a bounded lookback over
+the detection history; a run older than the window shows `over …`), so the hot
+no-alert path stays cheap. Exposed to templates as `{anomaly_lead}` /
+`{recovery_lead}` / `{duration_display}` / `{interval_display}` /
+`{started_display}` / `{window_line}` — and `{consecutive_count}` now carries the
+*true* streak length. Custom templates and the plain-text fallbacks follow the
+same order.
 ## Project label (multi-project channels)
@@ -277,7 +291,10 @@ referenced by path). Key variables:
 | `{expected_range}` | one-sided-aware band (`>= 7.00`, `<= 1.10`, `[lo, hi]`, `N/A`) |
 | `{detector_name}`, `{detector_count}` | who fired (`"N detectors"` for multi) |
 | `{min_detectors}` / `{direction_policy}` / `{consecutive_required}` | the configured rule |
-| `{direction}`, `{consecutive_count}`, `{severity}` | observed values |
+| `{direction}`, `{severity}` | observed values |
+| `{consecutive_count}` | **true** streak length (resolved at fire time, not capped at the rule) |
+| `{anomaly_lead}` / `{recovery_lead}` | ready-made "how long" lead sentence |
+| `{interval_display}` / `{duration_display}` / `{started_display}` / `{window_line}` | incident-timing bits (interval, duration, onset, `Started… \| Latest…` line) |
 | `{status}` | `ANOMALY` / `RECOVERED` / `NO_DATA` / `ERROR` |
 | `{mentions}` / `{mentions_line}` | formatted mentions |
 | `{dashboard_url}` | raw `dashboard_url` (empty string when unset) |

{detectkit-0.16.4 → detectkit-0.17.0}/detectkit/cli/commands/test_alert.py RENAMED Viewed

@@ -69,10 +69,17 @@ def create_mock_alert_data(
     else:
         observed_direction = "up"
+    # Incident timing for the preview: a real firing reports how long the run
+    # has been going on, so the mock spans ``consecutive_required`` intervals
+    # ending "now" (onset = now − (required − 1) intervals on the grid).
+    interval_seconds = metric_config.get_interval().seconds
+    ts64 = np.datetime64(now, "ms")
+    onset = ts64 - np.timedelta64(interval_seconds, "s") * max(consecutive_required - 1, 0)
     # Create realistic mock data
     return AlertData(
         metric_name=metric_config.name,
-        timestamp=np.datetime64(now, "ms"),
+        timestamp=ts64,
         timezone=timezone_display,
         value=0.8532,  # Mock anomalous value
         confidence_lower=0.4521,
@@ -103,6 +110,8 @@ def create_mock_alert_data(
         direction_policy=direction_policy,
         consecutive_required=consecutive_required,
         detector_count=min_detectors,
+        interval_seconds=interval_seconds,
+        onset_timestamp=onset,
     )

{detectkit-0.16.4 → detectkit-0.17.0}/detectkit/utils/datetime_utils.py RENAMED Viewed

@@ -51,3 +51,39 @@ def to_aware_utc(dt: datetime | None) -> datetime | None:
     if dt is None:
         return None
     return dt if dt.tzinfo is not None else dt.replace(tzinfo=timezone.utc)
+def format_duration(seconds: int | float) -> str:
+    """Format a span of seconds as a compact human string (max two units).
+    Used by the alert messages to express "how long an anomaly has been
+    running" / "how long an incident lasted" in plain language:
+        >>> format_duration(600)      # 10 minutes
+        '10m'
+        >>> format_duration(9000)     # 2h 30m
+        '2h 30m'
+        >>> format_duration(90000)    # 1d 1h
+        '1d 1h'
+        >>> format_duration(30)
+        '30s'
+    Keeps at most the two most-significant non-zero units so the result
+    stays glanceable. Sub-minute spans render in seconds; zero/negative
+    inputs degrade to ``"0m"`` rather than raising.
+    """
+    total = int(round(seconds))
+    if total <= 0:
+        return "0m"
+    if total < 60:
+        return f"{total}s"
+    parts: list[str] = []
+    remaining = total
+    for label, size in (("d", 86400), ("h", 3600), ("m", 60)):
+        if remaining >= size:
+            qty, remaining = divmod(remaining, size)
+            parts.append(f"{qty}{label}")
+        if len(parts) == 2:
+            break
+    return " ".join(parts)

{detectkit-0.16.4 → detectkit-0.17.0/detectkit.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: detectkit
-Version: 0.16.4
+Version: 0.17.0
 Summary: Metric monitoring with automatic anomaly detection
 Author: detectkit team
 License: MIT