detectkit 0.16.4__tar.gz → 0.17.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. {detectkit-0.16.4/detectkit.egg-info → detectkit-0.17.0}/PKG-INFO +1 -1
  2. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/__init__.py +1 -1
  3. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/alerting/channels/base.py +118 -26
  4. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/alerting/channels/email.py +27 -31
  5. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/alerting/channels/telegram.py +26 -14
  6. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/alerting/channels/webhook.py +29 -26
  7. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/alerting/orchestrator/_base.py +9 -0
  8. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/alerting/orchestrator/_decision.py +49 -3
  9. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/alerting/orchestrator/_recovery.py +79 -2
  10. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/cli/assets/claude/rules/alerting.md +42 -25
  11. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/cli/commands/test_alert.py +10 -1
  12. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/utils/datetime_utils.py +36 -0
  13. {detectkit-0.16.4 → detectkit-0.17.0/detectkit.egg-info}/PKG-INFO +1 -1
  14. {detectkit-0.16.4 → detectkit-0.17.0}/LICENSE +0 -0
  15. {detectkit-0.16.4 → detectkit-0.17.0}/MANIFEST.in +0 -0
  16. {detectkit-0.16.4 → detectkit-0.17.0}/README.md +0 -0
  17. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/alerting/__init__.py +0 -0
  18. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/alerting/channels/__init__.py +0 -0
  19. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/alerting/channels/branding.py +0 -0
  20. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/alerting/channels/factory.py +0 -0
  21. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/alerting/channels/mattermost.py +0 -0
  22. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/alerting/channels/slack.py +0 -0
  23. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/alerting/orchestrator/__init__.py +0 -0
  24. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/alerting/orchestrator/_cooldown.py +0 -0
  25. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/alerting/orchestrator/_dispatch.py +0 -0
  26. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/alerting/orchestrator/_types.py +0 -0
  27. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/alerting/orchestrator/orchestrator.py +0 -0
  28. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/cli/__init__.py +0 -0
  29. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/cli/_output.py +0 -0
  30. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/cli/assets/claude/CLAUDE.section.md +0 -0
  31. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/cli/assets/claude/rules/cli.md +0 -0
  32. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/cli/assets/claude/rules/detectors.md +0 -0
  33. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/cli/assets/claude/rules/metrics.md +0 -0
  34. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/cli/assets/claude/rules/overview.md +0 -0
  35. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/cli/assets/claude/rules/project.md +0 -0
  36. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/cli/assets/claude/skills/dtk-feedback/SKILL.md +0 -0
  37. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/cli/assets/claude/skills/dtk-new-metric/SKILL.md +0 -0
  38. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/cli/assets/claude/skills/dtk-setup-project/SKILL.md +0 -0
  39. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/cli/commands/__init__.py +0 -0
  40. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/cli/commands/clean.py +0 -0
  41. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/cli/commands/init.py +0 -0
  42. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/cli/commands/init_claude.py +0 -0
  43. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/cli/commands/run.py +0 -0
  44. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/cli/commands/unlock.py +0 -0
  45. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/cli/main.py +0 -0
  46. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/config/__init__.py +0 -0
  47. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/config/metric_config.py +0 -0
  48. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/config/profile.py +0 -0
  49. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/config/project_config.py +0 -0
  50. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/config/validator.py +0 -0
  51. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/core/__init__.py +0 -0
  52. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/core/interval.py +0 -0
  53. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/core/models.py +0 -0
  54. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/database/__init__.py +0 -0
  55. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/database/_sql_manager.py +0 -0
  56. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/database/clickhouse_manager.py +0 -0
  57. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/database/internal_tables/__init__.py +0 -0
  58. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/database/internal_tables/_alert_states.py +0 -0
  59. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/database/internal_tables/_base.py +0 -0
  60. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/database/internal_tables/_datapoints.py +0 -0
  61. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/database/internal_tables/_detections.py +0 -0
  62. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/database/internal_tables/_maintenance.py +0 -0
  63. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/database/internal_tables/_metrics.py +0 -0
  64. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/database/internal_tables/_schema.py +0 -0
  65. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/database/internal_tables/_tasks.py +0 -0
  66. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/database/internal_tables/manager.py +0 -0
  67. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/database/manager.py +0 -0
  68. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/database/mysql_manager.py +0 -0
  69. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/database/postgres_manager.py +0 -0
  70. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/database/tables.py +0 -0
  71. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/detectors/__init__.py +0 -0
  72. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/detectors/base.py +0 -0
  73. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/detectors/factory.py +0 -0
  74. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/detectors/seasonality.py +0 -0
  75. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/detectors/statistical/__init__.py +0 -0
  76. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/detectors/statistical/_windowed.py +0 -0
  77. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/detectors/statistical/iqr.py +0 -0
  78. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/detectors/statistical/mad.py +0 -0
  79. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/detectors/statistical/manual_bounds.py +0 -0
  80. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/detectors/statistical/zscore.py +0 -0
  81. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/loaders/__init__.py +0 -0
  82. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/loaders/metric_loader.py +0 -0
  83. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/loaders/query_template.py +0 -0
  84. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/orchestration/__init__.py +0 -0
  85. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/orchestration/error_dispatch.py +0 -0
  86. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/orchestration/task_manager/__init__.py +0 -0
  87. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/orchestration/task_manager/_alert_step.py +0 -0
  88. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/orchestration/task_manager/_base.py +0 -0
  89. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/orchestration/task_manager/_detect_step.py +0 -0
  90. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/orchestration/task_manager/_load_step.py +0 -0
  91. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/orchestration/task_manager/_types.py +0 -0
  92. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/orchestration/task_manager/manager.py +0 -0
  93. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/utils/__init__.py +0 -0
  94. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/utils/env_interpolation.py +0 -0
  95. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/utils/json_utils.py +0 -0
  96. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit/utils/stats.py +0 -0
  97. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit.egg-info/SOURCES.txt +0 -0
  98. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit.egg-info/dependency_links.txt +0 -0
  99. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit.egg-info/entry_points.txt +0 -0
  100. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit.egg-info/requires.txt +0 -0
  101. {detectkit-0.16.4 → detectkit-0.17.0}/detectkit.egg-info/top_level.txt +0 -0
  102. {detectkit-0.16.4 → detectkit-0.17.0}/pyproject.toml +0 -0
  103. {detectkit-0.16.4 → detectkit-0.17.0}/requirements.txt +0 -0
  104. {detectkit-0.16.4 → detectkit-0.17.0}/setup.cfg +0 -0
  105. {detectkit-0.16.4 → detectkit-0.17.0}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: detectkit
3
- Version: 0.16.4
3
+ Version: 0.17.0
4
4
  Summary: Metric monitoring with automatic anomaly detection
5
5
  Author: detectkit team
6
6
  License: MIT
@@ -4,7 +4,7 @@ detectk - Anomaly Detection for Time-Series Metrics
4
4
  A Python library for data analysts and engineers to monitor metrics with automatic anomaly detection.
5
5
  """
6
6
 
7
- __version__ = "0.16.4"
7
+ __version__ = "0.17.0"
8
8
 
9
9
  from detectkit.core.interval import Interval
10
10
  from detectkit.core.models import ColumnDefinition, TableModel
@@ -92,6 +92,18 @@ class AlertData:
92
92
  direction_policy: str | None = None
93
93
  consecutive_required: int | None = None
94
94
  detector_count: int = 1
95
+ # Incident timing — answers "how long has this been going on". The metric
96
+ # ``interval_seconds`` lets the message express the streak in wall-clock
97
+ # time; ``onset_timestamp`` is the first timestamp of the current anomalous
98
+ # run (anomaly) / the just-ended incident (recovery); ``streak_capped`` is
99
+ # True when the run is at least as long as the orchestrator's lookback
100
+ # window, so the duration is rendered as a lower bound ("over …"). The
101
+ # consecutive streak length itself rides on ``consecutive_count`` (the true
102
+ # run length, resolved at fire time). All default to None/False so
103
+ # direct-API callers and non-anomaly alerts render unchanged.
104
+ interval_seconds: int | None = None
105
+ onset_timestamp: Any | None = None
106
+ streak_capped: bool = False
95
107
 
96
108
 
97
109
  class BaseAlertChannel(ABC):
@@ -165,8 +177,18 @@ class BaseAlertChannel(ABC):
165
177
  - {direction} — observed/locked direction of the anomaly
166
178
  - {direction_policy} — configured direction rule ("same"/"any"/...)
167
179
  - {min_detectors} — configured quorum threshold (the rule)
168
- - {consecutive_count} — observed consecutive points
180
+ - {consecutive_count} — true consecutive streak length (resolved at
181
+ fire time, not capped at the rule's threshold)
169
182
  - {consecutive_required} — configured consecutive threshold (rule)
183
+ - {interval_display} — metric interval as a string (e.g. "10min")
184
+ - {duration_display} — how long the streak/incident lasted
185
+ (e.g. "2h 30m"; "over …" when it predates the lookback window)
186
+ - {onset_display} / {started_display} — first timestamp of the run
187
+ ({started_display} adds "or earlier" when the run is capped)
188
+ - {anomaly_lead} / {recovery_lead} — the ready-made plain-language
189
+ lead sentence ("Anomalous for …" / "… Incident lasted …")
190
+ - {window_line} — "Started: … | Latest/Cleared: …" (or a single
191
+ "Detected at: …" line when the onset is unknown)
170
192
  - {severity}
171
193
  - {status}
172
194
 
@@ -228,22 +250,27 @@ class BaseAlertChannel(ABC):
228
250
  """
229
251
  import math
230
252
  from datetime import datetime
253
+ from zoneinfo import ZoneInfo
231
254
 
232
255
  import numpy as np
233
256
 
234
- # Format timestamp to string
235
- ts = alert_data.timestamp
236
- if isinstance(ts, np.datetime64):
237
- ts = ts.astype(datetime)
238
-
239
- # Convert naive UTC timestamp to target timezone if specified
240
- if alert_data.timezone:
241
- from zoneinfo import ZoneInfo
242
-
243
- ts = ts.replace(tzinfo=ZoneInfo("UTC")).astimezone(ZoneInfo(alert_data.timezone))
244
- ts_str = f"{ts.strftime('%Y-%m-%d %H:%M:%S')} ({alert_data.timezone})"
245
- else:
246
- ts_str = ts.strftime("%Y-%m-%d %H:%M:%S")
257
+ def _fmt_ts(value: Any) -> str:
258
+ """Format a timestamp the same way for the main point and the onset:
259
+ naive UTC → target timezone, with a ``(tz)`` suffix when set."""
260
+ if value is None:
261
+ return ""
262
+ t = value
263
+ if isinstance(t, np.datetime64):
264
+ t = t.astype("datetime64[ms]").astype(datetime)
265
+ if not isinstance(t, datetime):
266
+ return str(t)
267
+ if alert_data.timezone:
268
+ t = t.replace(tzinfo=ZoneInfo("UTC")).astimezone(ZoneInfo(alert_data.timezone))
269
+ return f"{t.strftime('%Y-%m-%d %H:%M:%S')} ({alert_data.timezone})"
270
+ return t.strftime("%Y-%m-%d %H:%M:%S")
271
+
272
+ ts_str = _fmt_ts(alert_data.timestamp)
273
+ onset_str = _fmt_ts(alert_data.onset_timestamp)
247
274
 
248
275
  # Format confidence interval
249
276
  if alert_data.confidence_lower is not None and alert_data.confidence_upper is not None:
@@ -287,6 +314,66 @@ class BaseAlertChannel(ABC):
287
314
  )
288
315
  direction_policy = alert_data.direction_policy or alert_data.direction
289
316
 
317
+ # Incident timing — the "how long has this been going on" story shared by
318
+ # every channel. ``consecutive_count`` carries the *true* streak length
319
+ # (resolved at fire time); together with the metric interval it becomes a
320
+ # wall-clock duration and a plain-language lead. ``streak_capped`` means
321
+ # the run is at least as long as the orchestrator's lookback window, so
322
+ # the duration/started values render as lower bounds. Degrades cleanly to
323
+ # the legacy "Latest X/Y consecutive points met the quorum." lead when no
324
+ # interval is wired in (direct-API callers).
325
+ from detectkit.core.interval import Interval
326
+ from detectkit.utils.datetime_utils import format_duration
327
+
328
+ interval_seconds = alert_data.interval_seconds
329
+ streak = alert_data.consecutive_count or 0
330
+ capped = alert_data.streak_capped
331
+ interval_display = str(Interval(interval_seconds)) if interval_seconds else ""
332
+
333
+ if interval_seconds and streak >= 1:
334
+ duration_display = format_duration(streak * interval_seconds)
335
+ if capped:
336
+ duration_display = f"over {duration_display}"
337
+ streak_display = f"{streak}+" if capped else f"{streak}"
338
+ started_display = f"{onset_str} or earlier" if (capped and onset_str) else onset_str
339
+ intervals_word = "interval" if streak == 1 else "intervals"
340
+ anomaly_lead = (
341
+ f"Anomalous for {duration_display} — "
342
+ f"{streak_display} consecutive {interval_display} {intervals_word}."
343
+ )
344
+ recovery_lead = (
345
+ "The alert condition no longer holds — the metric is back within "
346
+ f"expected bounds. Incident lasted {duration_display} "
347
+ f"({streak_display} consecutive {interval_display} {intervals_word})."
348
+ )
349
+ else:
350
+ duration_display = ""
351
+ streak_display = f"{streak}" if streak else ""
352
+ started_display = onset_str
353
+ anomaly_lead = (
354
+ f"Latest {alert_data.consecutive_count}/{consecutive_required} "
355
+ "consecutive points met the quorum."
356
+ )
357
+ recovery_lead = (
358
+ "The alert condition no longer holds — the metric is back within "
359
+ "expected bounds."
360
+ )
361
+
362
+ # Kind-aware "window" line for the plain-text templates: the anomalous
363
+ # span (onset → latest/cleared) when known, else the single point.
364
+ kind = self.status_kind(alert_data)
365
+ if started_display and kind == "anomaly":
366
+ window_line = f"Started: {started_display} | Latest: {ts_str}\n"
367
+ elif started_display and kind == "recovery":
368
+ window_line = f"Started: {started_display} | Cleared: {ts_str}\n"
369
+ else:
370
+ window_label = {
371
+ "recovery": "Cleared at",
372
+ "no_data": "Expected at",
373
+ "error": "Detected at",
374
+ }.get(kind, "Detected at")
375
+ window_line = f"{window_label}: {ts_str}\n"
376
+
290
377
  # Display-safe value: stays usable even when value is None/NaN (no-data).
291
378
  raw_value = alert_data.value
292
379
  if raw_value is None or (isinstance(raw_value, float) and math.isnan(raw_value)):
@@ -350,6 +437,15 @@ class BaseAlertChannel(ABC):
350
437
  "severity": alert_data.severity,
351
438
  "consecutive_count": alert_data.consecutive_count,
352
439
  "consecutive_required": consecutive_required,
440
+ "interval_display": interval_display,
441
+ "duration_display": duration_display,
442
+ "streak_display": streak_display,
443
+ "streak_capped": capped,
444
+ "onset_display": onset_str,
445
+ "started_display": started_display,
446
+ "anomaly_lead": anomaly_lead,
447
+ "recovery_lead": recovery_lead,
448
+ "window_line": window_line,
353
449
  "status": status,
354
450
  "error_type": alert_data.error_type or "",
355
451
  "error_message": alert_data.error_message or "",
@@ -472,16 +568,14 @@ class BaseAlertChannel(ABC):
472
568
  return (
473
569
  "🔴 {project_name_prefix}Alert: {metric_name}\n"
474
570
  "{description_line}"
475
- "Quorum {detector_count}/{min_detectors} · "
476
- "direction {direction} (policy {direction_policy}) · "
477
- "consecutive {consecutive_count}/{consecutive_required}\n"
571
+ "{anomaly_lead}\n"
478
572
  "Rule: min_detectors={min_detectors} · "
479
573
  "direction={direction_policy} · consecutive={consecutive_required}\n"
480
574
  "\n"
481
- "Latest point (evidence):\n"
482
- "· Time: {timestamp}\n"
483
- "· Value: {value_display} | Expected: {expected_range}\n"
484
- "· Severity: {severity:.2f}\n"
575
+ "Value: {value_display} | Expected: {expected_range}\n"
576
+ "Quorum: {detector_count}/{min_detectors} · {direction}\n"
577
+ "Severity: {severity:.2f}\n"
578
+ "{window_line}"
485
579
  "Detectors: {detector_name}\n"
486
580
  "Parameters: {detector_params}\n"
487
581
  "{dashboard_line}"
@@ -499,14 +593,12 @@ class BaseAlertChannel(ABC):
499
593
  return (
500
594
  "🟢 {project_name_prefix}Alert cleared: {metric_name}\n"
501
595
  "{description_line}"
502
- "The alert condition no longer holds — "
503
- "the metric is back within expected bounds.\n"
596
+ "{recovery_lead}\n"
504
597
  "Rule: min_detectors={min_detectors} · "
505
598
  "direction={direction_policy} · consecutive={consecutive_required}\n"
506
599
  "\n"
507
- "Latest point:\n"
508
- "· Time: {timestamp}\n"
509
- "· Value: {value_display} | Expected: {expected_range}\n"
600
+ "Value: {value_display} | Expected: {expected_range}\n"
601
+ "{window_line}"
510
602
  "Detectors: {detector_name}\n"
511
603
  "{dashboard_line}"
512
604
  "{help_line}"
@@ -315,42 +315,38 @@ class EmailChannel(BaseAlertChannel):
315
315
  parts.append(self._lead_html(ctx["description"]))
316
316
 
317
317
  if kind == "anomaly":
318
+ # Description (how long it's been going on) leads; the Rule chip sits
319
+ # right above the stat grid it explains.
320
+ parts.append(self._lead_html(ctx["anomaly_lead"]))
318
321
  parts.append(self._rule_html(ctx))
319
- parts.append(
320
- self._lead_html(
321
- f"Latest {ctx['consecutive_count']}/{ctx['consecutive_required']} "
322
- "consecutive points met the quorum."
323
- )
324
- )
325
- parts.append(
326
- self._stat_grid(
327
- [
328
- ("Value", ctx["value_display"]),
329
- ("Expected", ctx["expected_range"]),
330
- ("Severity", f"{alert_data.severity:.2f}"),
331
- ("Detected at", ctx["timestamp"]),
332
- ]
333
- )
334
- )
322
+ stats = [
323
+ ("Value", ctx["value_display"]),
324
+ ("Expected", ctx["expected_range"]),
325
+ ("Severity", f"{alert_data.severity:.2f}"),
326
+ ("Quorum", f"{ctx['detector_count']}/{ctx['min_detectors']} · {ctx['direction']}"),
327
+ ]
328
+ if ctx["started_display"]:
329
+ stats.append(("Started", ctx["started_display"]))
330
+ stats.append(("Latest", ctx["timestamp"]))
331
+ else:
332
+ stats.append(("Detected at", ctx["timestamp"]))
333
+ parts.append(self._stat_grid(stats))
335
334
  if ctx["detector_params"]:
336
335
  parts.append(self._params_html(ctx["detector_name"], ctx["detector_params"]))
337
336
  elif kind == "recovery":
338
- lead = (
339
- "The alert condition no longer holds — the metric is back within "
340
- "expected bounds."
341
- )
342
- parts.append(self._lead_html(lead))
337
+ parts.append(self._lead_html(ctx["recovery_lead"]))
343
338
  parts.append(self._rule_html(ctx))
344
- parts.append(
345
- self._stat_grid(
346
- [
347
- ("Value", ctx["value_display"]),
348
- ("Expected", ctx["expected_range"]),
349
- ("Detector", ctx["detector_name"]),
350
- ("Cleared at", ctx["timestamp"]),
351
- ]
352
- )
353
- )
339
+ stats = [
340
+ ("Value", ctx["value_display"]),
341
+ ("Expected", ctx["expected_range"]),
342
+ ]
343
+ if ctx["started_display"]:
344
+ stats.append(("Started", ctx["started_display"]))
345
+ stats.append(("Cleared", ctx["timestamp"]))
346
+ else:
347
+ stats.append(("Cleared at", ctx["timestamp"]))
348
+ stats.append(("Detector", ctx["detector_name"]))
349
+ parts.append(self._stat_grid(stats))
354
350
  elif kind == "no_data":
355
351
  lead = "Query returned no datapoint for the latest expected interval."
356
352
  parts.append(self._lead_html(lead))
@@ -24,9 +24,10 @@ class TelegramChannel(BaseAlertChannel):
24
24
 
25
25
  Sends formatted messages to a Telegram chat using a bot token. The default
26
26
  (no custom ``template``) message is a structured **HTML** layout — a colored
27
- status dot, a bold headline, the rule that fired, then the evidence
28
- (value / expected / severity / time / detector / params) in ``<code>``,
29
- plus an optional "Open dashboard" link and @mentions.
27
+ status dot, a bold headline, the lead (how long the anomaly has been
28
+ running) followed by the rule that fired, then the evidence
29
+ (value / expected / quorum / severity / started → latest / detector /
30
+ params) in ``<code>``, plus an optional "Open dashboard" link and @mentions.
30
31
 
31
32
  HTML is the default ``parse_mode`` because the legacy ``Markdown`` mode
32
33
  breaks on the detector params JSON (an unmatched ``_`` in e.g.
@@ -161,11 +162,9 @@ class TelegramChannel(BaseAlertChannel):
161
162
  lines.append("") # blank line
162
163
 
163
164
  if kind == "anomaly":
164
- lines.append(
165
- f"<b>Quorum</b> {ctx['detector_count']}/{ctx['min_detectors']} · "
166
- f"direction <b>{esc(ctx['direction'])}</b> · "
167
- f"<b>{ctx['consecutive_count']}/{ctx['consecutive_required']}</b> consecutive"
168
- )
165
+ # Description (how long it's been going on) leads; the Rule chip sits
166
+ # right above the evidence it explains.
167
+ lines.append(esc(ctx["anomaly_lead"]))
169
168
  lines.append(
170
169
  f"<b>Rule</b> <code>min_detectors={ctx['min_detectors']} · "
171
170
  f"direction={esc(ctx['direction_policy'])} · "
@@ -176,17 +175,24 @@ class TelegramChannel(BaseAlertChannel):
176
175
  f"• Value: <code>{esc(ctx['value_display'])}</code> · "
177
176
  f"Expected: <code>{esc(ctx['expected_range'])}</code>"
178
177
  )
178
+ lines.append(
179
+ f"• Quorum: <code>{ctx['detector_count']}/{ctx['min_detectors']} · "
180
+ f"{esc(ctx['direction'])}</code>"
181
+ )
179
182
  lines.append(f"• Severity: <code>{alert_data.severity:.2f}</code>")
180
- lines.append(f"• Time: <code>{esc(ctx['timestamp'])}</code>")
183
+ if ctx["started_display"]:
184
+ lines.append(
185
+ f"• Started: <code>{esc(ctx['started_display'])}</code> · "
186
+ f"Latest: <code>{esc(ctx['timestamp'])}</code>"
187
+ )
188
+ else:
189
+ lines.append(f"• Time: <code>{esc(ctx['timestamp'])}</code>")
181
190
  lines.append(f"• Detector: <code>{esc(ctx['detector_name'])}</code>")
182
191
  if ctx["detector_params"]:
183
192
  params = self._cap(ctx["detector_params"], _PARAMS_CAP)
184
193
  lines.append(f"• Parameters: <code>{esc(params)}</code>")
185
194
  elif kind == "recovery":
186
- lines.append(
187
- "The alert condition no longer holds — the metric is back within "
188
- "expected bounds."
189
- )
195
+ lines.append(esc(ctx["recovery_lead"]))
190
196
  lines.append(
191
197
  f"<b>Rule</b> <code>min_detectors={ctx['min_detectors']} · "
192
198
  f"direction={esc(ctx['direction_policy'])} · "
@@ -197,7 +203,13 @@ class TelegramChannel(BaseAlertChannel):
197
203
  f"• Value: <code>{esc(ctx['value_display'])}</code> · "
198
204
  f"Expected: <code>{esc(ctx['expected_range'])}</code>"
199
205
  )
200
- lines.append(f"• Time: <code>{esc(ctx['timestamp'])}</code>")
206
+ if ctx["started_display"]:
207
+ lines.append(
208
+ f"• Started: <code>{esc(ctx['started_display'])}</code> · "
209
+ f"Cleared: <code>{esc(ctx['timestamp'])}</code>"
210
+ )
211
+ else:
212
+ lines.append(f"• Cleared: <code>{esc(ctx['timestamp'])}</code>")
201
213
  lines.append(f"• Detector: <code>{esc(ctx['detector_name'])}</code>")
202
214
  elif kind == "no_data":
203
215
  lines.append("Query returned no datapoint for the latest expected interval.")
@@ -27,9 +27,11 @@ class WebhookChannel(BaseAlertChannel):
27
27
 
28
28
  Rendering: the default (no custom ``template``) payload is a single
29
29
  **Slack/Mattermost message attachment** — a colored accent bar, a title,
30
- a short markdown lead, and a compact **fields grid** (Value / Expected /
31
- Quorum / Severity, then full-width Detected-at / Detectors / Parameters),
32
- branded with a ``footer`` + ``footer_icon``. This renders richly on both
30
+ a short markdown lead (how long the anomaly has been running) with the
31
+ **Rule** chip beneath it, and a compact **fields grid** (Value / Expected /
32
+ Quorum / Severity / Started / Latest Started / Cleared on recovery — then
33
+ full-width Detectors / Parameters), branded with a ``footer`` +
34
+ ``footer_icon``. This renders richly on both
33
35
  Slack and Mattermost from one payload. A custom ``template`` degrades to a
34
36
  plain text-only attachment (the template is one opaque string that can't be
35
37
  sliced into fields), keeping the color, title and branding.
@@ -267,27 +269,32 @@ class WebhookChannel(BaseAlertChannel):
267
269
  )
268
270
 
269
271
  if kind == "anomaly":
270
- lead = (
271
- f"{rule_chip}\n"
272
- f"Latest {ctx['consecutive_count']}/{ctx['consecutive_required']} "
273
- "consecutive points met the quorum."
274
- )
272
+ # Description (how long it's been going on) leads; the Rule chip sits
273
+ # right above the value/expected fields it explains.
274
+ lead = f"{ctx['anomaly_lead']}\n{rule_chip}"
275
275
  short("Value", code(ctx["value_display"]))
276
276
  short("Expected", code(ctx["expected_range"]))
277
277
  short("Quorum", f"{ctx['detector_count']}/{ctx['min_detectors']} · {ctx['direction']}")
278
278
  short("Severity", f"{alert_data.severity:.2f}")
279
- full("Detected at", ctx["timestamp"])
279
+ # The problematic span: when it started and the latest point in it.
280
+ if ctx["started_display"]:
281
+ short("Started", ctx["started_display"])
282
+ short("Latest", ctx["timestamp"])
283
+ else:
284
+ full("Detected at", ctx["timestamp"])
280
285
  full("Detectors", code(ctx["detector_name"]))
281
286
  if ctx["detector_params"]:
282
287
  full("Parameters", f"```{ctx['detector_params']}```")
283
288
  elif kind == "recovery":
284
- lead = (
285
- "The alert condition no longer holds — the metric is back within "
286
- f"expected bounds.\n{rule_chip}"
287
- )
289
+ lead = f"{ctx['recovery_lead']}\n{rule_chip}"
288
290
  short("Value", code(ctx["value_display"]))
289
291
  short("Expected", code(ctx["expected_range"]))
290
- full("Detected at", ctx["timestamp"])
292
+ # The incident span: when it started and when it cleared.
293
+ if ctx["started_display"]:
294
+ short("Started", ctx["started_display"])
295
+ short("Cleared", ctx["timestamp"])
296
+ else:
297
+ full("Cleared at", ctx["timestamp"])
291
298
  full("Detectors", code(ctx["detector_name"]))
292
299
  elif kind == "no_data":
293
300
  lead = "Query returned no datapoint for the latest expected interval."
@@ -383,16 +390,14 @@ class WebhookChannel(BaseAlertChannel):
383
390
  """
384
391
  return (
385
392
  "{description_line}"
386
- "Quorum {detector_count}/{min_detectors} · "
387
- "direction {direction} (policy {direction_policy}) · "
388
- "consecutive {consecutive_count}/{consecutive_required}\n"
393
+ "{anomaly_lead}\n"
389
394
  "Rule: min_detectors={min_detectors} · "
390
395
  "direction={direction_policy} · consecutive={consecutive_required}\n"
391
396
  "\n"
392
- "Latest point (evidence):\n"
393
- "· Time: {timestamp}\n"
394
- "· Value: {value_display} | Expected: {expected_range}\n"
395
- "· Severity: {severity:.2f}\n"
397
+ "Value: {value_display} | Expected: {expected_range}\n"
398
+ "Quorum: {detector_count}/{min_detectors} · {direction}\n"
399
+ "Severity: {severity:.2f}\n"
400
+ "{window_line}"
396
401
  "Detectors: {detector_name}\n"
397
402
  "Parameters: {detector_params}\n"
398
403
  "{dashboard_line}"
@@ -406,14 +411,12 @@ class WebhookChannel(BaseAlertChannel):
406
411
  """
407
412
  return (
408
413
  "{description_line}"
409
- "The alert condition no longer holds — "
410
- "the metric is back within expected bounds.\n"
414
+ "{recovery_lead}\n"
411
415
  "Rule: min_detectors={min_detectors} · "
412
416
  "direction={direction_policy} · consecutive={consecutive_required}\n"
413
417
  "\n"
414
- "Latest point:\n"
415
- "· Time: {timestamp}\n"
416
- "· Value: {value_display} | Expected: {expected_range}\n"
418
+ "Value: {value_display} | Expected: {expected_range}\n"
419
+ "{window_line}"
417
420
  "Detectors: {detector_name}\n"
418
421
  "{dashboard_line}"
419
422
  "{help_line}"
@@ -10,6 +10,15 @@ from detectkit.alerting.orchestrator._types import (
10
10
  )
11
11
  from detectkit.core.interval import Interval
12
12
 
13
+ # How far back the orchestrator looks to reconstruct the *true* length of an
14
+ # anomalous run when an alert fires / clears. The decision itself only needs
15
+ # ``consecutive_anomalies`` points, but the message reports "how long has this
16
+ # been going on", which needs the full streak. Bounded so a metric stuck
17
+ # anomalous for a very long time never loads unboundedly — past this the run is
18
+ # reported as a lower bound ("over …"). Only queried on fire/recovery, never on
19
+ # the hot no-alert path.
20
+ STREAK_LOOKBACK_POINTS = 1000
21
+
13
22
 
14
23
  class _OrchestratorBase:
15
24
  def __init__(
@@ -34,8 +34,8 @@ from datetime import datetime, timezone
34
34
  import numpy as np
35
35
 
36
36
  from detectkit.alerting.channels.base import AlertData
37
- from detectkit.alerting.orchestrator._base import _OrchestratorBase
38
- from detectkit.alerting.orchestrator._types import DetectionRecord
37
+ from detectkit.alerting.orchestrator._base import STREAK_LOOKBACK_POINTS, _OrchestratorBase
38
+ from detectkit.alerting.orchestrator._types import DetectionRecord, hydrate_detection_records
39
39
  from detectkit.utils.datetime_utils import now_utc, to_aware_utc
40
40
 
41
41
 
@@ -70,7 +70,46 @@ class _DecisionMixin(_OrchestratorBase):
70
70
  if not latest_quorum or consecutive < self.conditions.consecutive_anomalies:
71
71
  return False, None
72
72
 
73
- return True, self._build_alert_data(latest_quorum, consecutive, direction)
73
+ # The decision is made; now resolve the *true* streak length/onset for
74
+ # the message (the shallow alert window caps ``consecutive`` at the rule
75
+ # threshold, which can't answer "how long has this been going on").
76
+ streak, onset_ts, capped = self._resolve_streak(latest_quorum[0].timestamp)
77
+ return True, self._build_alert_data(latest_quorum, streak, direction, onset_ts, capped)
78
+
79
+ def _resolve_streak(self, latest_ts: np.datetime64) -> tuple[int, np.datetime64, bool]:
80
+ """Resolve the full anomalous run ending at *latest_ts*.
81
+
82
+ Loads up to :data:`STREAK_LOOKBACK_POINTS` detections and re-walks the
83
+ same direction-aware quorum logic used to fire, so the message can report
84
+ the real onset/duration rather than the shallow alert-window count.
85
+ Returns ``(streak_count, onset_timestamp, capped)`` — ``capped`` is True
86
+ when the run fills the whole lookback window (onset is older than we saw).
87
+ Only runs when an alert actually fires, so the hot no-alert path is
88
+ untouched.
89
+ """
90
+ step = np.timedelta64(self.interval.seconds, "s")
91
+ if not self.internal:
92
+ # Direct-API path with no DB to walk: report the rule's required
93
+ # length so the message still carries a duration.
94
+ n = max(self.conditions.consecutive_anomalies, 1)
95
+ return n, latest_ts - step * (n - 1), False
96
+
97
+ last_point = latest_ts.astype("datetime64[ms]").astype(datetime)
98
+ rows = self.internal.get_recent_detections(
99
+ metric_name=self.metric_name,
100
+ last_point=last_point,
101
+ num_points=STREAK_LOOKBACK_POINTS,
102
+ )
103
+ records = hydrate_detection_records(rows)
104
+ if not records:
105
+ return 1, latest_ts, False
106
+
107
+ by_time = self._group_by_timestamp(records)
108
+ timestamps_sorted = sorted(by_time.keys(), reverse=True)
109
+ count, _, _ = self._count_consecutive_anomalies(by_time, timestamps_sorted)
110
+ count = max(count, 1)
111
+ capped = count >= STREAK_LOOKBACK_POINTS
112
+ return count, latest_ts - step * (count - 1), capped
74
113
 
75
114
  def _quorum_at(
76
115
  self,
@@ -185,6 +224,8 @@ class _DecisionMixin(_OrchestratorBase):
185
224
  anomalies: list[DetectionRecord],
186
225
  consecutive_count: int,
187
226
  direction: str | None,
227
+ onset_timestamp: np.datetime64 | None = None,
228
+ streak_capped: bool = False,
188
229
  ) -> AlertData:
189
230
  primary = self._primary_record(anomalies)
190
231
 
@@ -249,6 +290,10 @@ class _DecisionMixin(_OrchestratorBase):
249
290
  direction_policy=self.conditions.direction,
250
291
  consecutive_required=self.conditions.consecutive_anomalies,
251
292
  detector_count=len(anomalies),
293
+ # Incident timing for the "how long has this been going on" line.
294
+ interval_seconds=self.interval.seconds,
295
+ onset_timestamp=onset_timestamp,
296
+ streak_capped=streak_capped,
252
297
  )
253
298
 
254
299
  def should_alert_no_data(
@@ -304,6 +349,7 @@ class _DecisionMixin(_OrchestratorBase):
304
349
  links=self.links,
305
350
  project_name=self.project_name,
306
351
  help_url=self.help_url,
352
+ interval_seconds=self.interval.seconds,
307
353
  )
308
354
 
309
355
  def get_last_complete_point(self, now: datetime | None = None) -> datetime:
@@ -3,9 +3,12 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  from datetime import datetime
6
+ from typing import Any
7
+
8
+ import numpy as np
6
9
 
7
10
  from detectkit.alerting.channels.base import AlertData
8
- from detectkit.alerting.orchestrator._base import _OrchestratorBase
11
+ from detectkit.alerting.orchestrator._base import STREAK_LOOKBACK_POINTS, _OrchestratorBase
9
12
  from detectkit.alerting.orchestrator._types import (
10
13
  DetectionRecord,
11
14
  hydrate_detection_records,
@@ -160,6 +163,10 @@ class _RecoveryMixin(_OrchestratorBase):
160
163
  recovery_ci_lower = last_anomalous.confidence_lower
161
164
  recovery_ci_upper = last_anomalous.confidence_upper
162
165
 
166
+ # Reconstruct the just-ended incident so the recovery message can say how
167
+ # long it lasted (symmetric with the anomaly alert's onset/duration).
168
+ incident_count, onset_ts, capped = self._resolve_incident(latest.timestamp)
169
+
163
170
  return AlertData(
164
171
  metric_name=self.metric_name,
165
172
  timestamp=latest.timestamp,
@@ -172,7 +179,9 @@ class _RecoveryMixin(_OrchestratorBase):
172
179
  direction="none",
173
180
  severity=0.0,
174
181
  detection_metadata={},
175
- consecutive_count=0,
182
+ # The just-ended incident length (0 when it can't be reconstructed,
183
+ # so the message simply omits the duration).
184
+ consecutive_count=incident_count,
176
185
  is_recovery=True,
177
186
  description=self.description,
178
187
  mentions=self.mentions,
@@ -185,4 +194,72 @@ class _RecoveryMixin(_OrchestratorBase):
185
194
  min_detectors=self.conditions.min_detectors,
186
195
  direction_policy=self.conditions.direction,
187
196
  consecutive_required=self.conditions.consecutive_anomalies,
197
+ # Incident timing for the "Incident lasted …" line.
198
+ interval_seconds=self.interval.seconds,
199
+ onset_timestamp=onset_ts,
200
+ streak_capped=capped,
201
+ )
202
+
203
+ def _resolve_incident(self, cleared_ts: Any) -> tuple[int, Any, bool]:
204
+ """Find the anomalous run that just ended before the recovery point.
205
+
206
+ Walks back from *cleared_ts* (the latest, now-clean point): skips the
207
+ clean tail, then counts the contiguous direction-aware quorum run using
208
+ the same logic that fired the alert. Returns ``(length, onset_timestamp,
209
+ capped)`` — ``(0, None, False)`` when no run can be reconstructed, so the
210
+ recovery message just omits the incident duration.
211
+ """
212
+ if not self.internal:
213
+ return 0, None, False
214
+
215
+ step = np.timedelta64(self.interval.seconds, "s")
216
+ if isinstance(cleared_ts, np.datetime64):
217
+ last_point = cleared_ts.astype("datetime64[ms]").astype(datetime)
218
+ else:
219
+ last_point = cleared_ts
220
+ rows = self.internal.get_recent_detections(
221
+ metric_name=self.metric_name,
222
+ last_point=last_point,
223
+ num_points=STREAK_LOOKBACK_POINTS,
188
224
  )
225
+ records = hydrate_detection_records(rows)
226
+ if not records:
227
+ return 0, None, False
228
+
229
+ by_time = self._group_by_timestamp(records)
230
+ timestamps_sorted = sorted(by_time.keys(), reverse=True)
231
+
232
+ locked: str | None = None
233
+ started = False
234
+ count = 0
235
+ onset: Any = None
236
+ prev: np.datetime64 | None = None
237
+ for ts in timestamps_sorted:
238
+ anomalies = [d for d in by_time[ts] if d.is_anomaly]
239
+ # ``_quorum_at`` lives in _DecisionMixin; both mixins compose into
240
+ # AlertOrchestrator so the call resolves at runtime.
241
+ quorum, direction = self._quorum_at(anomalies, locked)
242
+ if not started:
243
+ # Skip the clean tail (the recovery point + any clean points)
244
+ # until the first quorum-satisfying point — the incident's end.
245
+ if quorum is None:
246
+ continue
247
+ started = True
248
+ if self.conditions.direction == "same":
249
+ locked = direction
250
+ count = 1
251
+ onset = ts
252
+ prev = ts
253
+ continue
254
+ if quorum is None or (prev is not None and (prev - ts) != step):
255
+ break
256
+ if self.conditions.direction == "same":
257
+ locked = direction
258
+ count += 1
259
+ onset = ts
260
+ prev = ts
261
+
262
+ if count == 0:
263
+ return 0, None, False
264
+ capped = count >= STREAK_LOOKBACK_POINTS
265
+ return count, onset, capped
@@ -194,33 +194,47 @@ leads with a colored **status circle** — 🔴 anomaly, 🟢 recovery, 🟡 no-
194
194
 
195
195
  - **Slack / Mattermost / generic webhook** — one message *attachment* with a
196
196
  status-colored accent bar, a clickable title (the metric; links to
197
- `dashboard_url` when set), a short markdown lead (the rule), and a compact
198
- fields grid: short fields Value / Expected / Quorum / Severity, then full-width
199
- Detected-at / Detectors / Parameters, plus a branded footer + footer icon.
200
- @mentions ride in the **top-level** message text so they notify. A custom
201
- `template` instead renders as a plain text-only attachment (color/title/
202
- branding kept, no fields grid).
197
+ `dashboard_url` when set), a short markdown lead (the duration sentence see
198
+ "Incident timing" below) with the **Rule** chip beneath it, and a compact
199
+ fields grid: short fields Value / Expected / Quorum / Severity / Started /
200
+ Latest (Started / Cleared on recovery), then full-width Detectors / Parameters,
201
+ plus a branded footer + footer icon. @mentions ride in the **top-level**
202
+ message text so they notify. A custom `template` instead renders as a plain
203
+ text-only attachment (color/title/ branding kept, no fields grid).
203
204
  - **Telegram** — default `parse_mode` is now **HTML**. The default message is
204
205
  structured and HTML-escaped: a colored status dot (red anomaly / green
205
- recovery / yellow no-data / blue error), a bold headline, the rule, then
206
- evidence in `<code>` (value/expected/severity/time/detector/params), an inline
207
- "Open dashboard" link, then mentions. This fixes the old Markdown mode raising
208
- "can't parse entities" on params JSON containing underscores (e.g.
209
- `window_size`). Custom templates are sent verbatim under the parse mode, so
210
- they must be HTML-safe; set `parse_mode: Markdown` to keep the old behavior.
206
+ recovery / yellow no-data / blue error), a bold headline, the lead + rule, then
207
+ evidence in `<code>` (value/expected/quorum/severity/started → latest/detector/
208
+ params), an inline "Open dashboard" link, then mentions. This fixes the old
209
+ Markdown mode raising "can't parse entities" on params JSON containing
210
+ underscores (e.g. `window_size`). Custom templates are sent verbatim under the
211
+ parse mode, so they must be HTML-safe; set `parse_mode: Markdown` to keep the
212
+ old behavior.
211
213
  - **Email** — a branded HTML card (inline-CSS, table-based, Outlook-safe):
212
- colored accent + status pill, the metric, a 2-col value/expected/severity
213
- table, a monospace params box, an optional "Open dashboard" button, and a
214
- footer. The plain-text body remains the multipart fallback.
215
-
216
- On anomaly **and** recovery alerts the **firing rule is set apart uniformly** in
217
- every default-rendered channel: a bold **Rule** label + an inline-code chip
218
- (`min_detectors=… · direction=… · consecutive=…`), with the quorum explanation
219
- on its own line, so the rule reads as "this is the config that fired" at a
220
- glance. Bold is platform-aware (`*Rule*` on Slack, `**Rule**` on
221
- Mattermost/generic; `<b>Rule</b>` on Telegram; `<strong>` in email), while the
222
- code chip is identical everywhere. Custom templates and the plain-text fallbacks
223
- are unchanged.
214
+ colored accent + status pill, the metric, the lead + Rule chip, a 2-col stat
215
+ grid (value/expected/severity/quorum/started/latest), a monospace params box,
216
+ an optional "Open dashboard" button, and a footer. The plain-text body remains
217
+ the multipart fallback.
218
+
219
+ **Message order is uniform** `description → Rule Value/Expected` on every
220
+ channel, for both anomaly and recovery. The **firing rule is set apart
221
+ uniformly**: a bold **Rule** label + an inline-code chip (`min_detectors=… ·
222
+ direction=… · consecutive=…`) sitting right above the value/expected evidence.
223
+ Bold is platform-aware (`*Rule*` on Slack, `**Rule**` on Mattermost/generic;
224
+ `<b>Rule</b>` on Telegram; `<strong>` in email), while the code chip is
225
+ identical everywhere.
226
+
227
+ **Incident timing — "how long has this been going on".** Each default anomaly
228
+ leads with `Anomalous for 2h 30m — 15 consecutive 10min intervals.` (metric
229
+ interval + true streak length + wall-clock duration); Started/Latest bound the
230
+ span. Recovery is symmetric (`Incident lasted …`, Started / Cleared). The true
231
+ streak/onset is resolved only when an alert fires/clears (a bounded lookback over
232
+ the detection history; a run older than the window shows `over …`), so the hot
233
+ no-alert path stays cheap. Exposed to templates as `{anomaly_lead}` /
234
+ `{recovery_lead}` / `{duration_display}` / `{interval_display}` /
235
+ `{started_display}` / `{window_line}` — and `{consecutive_count}` now carries the
236
+ *true* streak length. Custom templates and the plain-text fallbacks follow the
237
+ same order.
224
238
 
225
239
  ## Project label (multi-project channels)
226
240
 
@@ -277,7 +291,10 @@ referenced by path). Key variables:
277
291
  | `{expected_range}` | one-sided-aware band (`>= 7.00`, `<= 1.10`, `[lo, hi]`, `N/A`) |
278
292
  | `{detector_name}`, `{detector_count}` | who fired (`"N detectors"` for multi) |
279
293
  | `{min_detectors}` / `{direction_policy}` / `{consecutive_required}` | the configured rule |
280
- | `{direction}`, `{consecutive_count}`, `{severity}` | observed values |
294
+ | `{direction}`, `{severity}` | observed values |
295
+ | `{consecutive_count}` | **true** streak length (resolved at fire time, not capped at the rule) |
296
+ | `{anomaly_lead}` / `{recovery_lead}` | ready-made "how long" lead sentence |
297
+ | `{interval_display}` / `{duration_display}` / `{started_display}` / `{window_line}` | incident-timing bits (interval, duration, onset, `Started… \| Latest…` line) |
281
298
  | `{status}` | `ANOMALY` / `RECOVERED` / `NO_DATA` / `ERROR` |
282
299
  | `{mentions}` / `{mentions_line}` | formatted mentions |
283
300
  | `{dashboard_url}` | raw `dashboard_url` (empty string when unset) |
@@ -69,10 +69,17 @@ def create_mock_alert_data(
69
69
  else:
70
70
  observed_direction = "up"
71
71
 
72
+ # Incident timing for the preview: a real firing reports how long the run
73
+ # has been going on, so the mock spans ``consecutive_required`` intervals
74
+ # ending "now" (onset = now − (required − 1) intervals on the grid).
75
+ interval_seconds = metric_config.get_interval().seconds
76
+ ts64 = np.datetime64(now, "ms")
77
+ onset = ts64 - np.timedelta64(interval_seconds, "s") * max(consecutive_required - 1, 0)
78
+
72
79
  # Create realistic mock data
73
80
  return AlertData(
74
81
  metric_name=metric_config.name,
75
- timestamp=np.datetime64(now, "ms"),
82
+ timestamp=ts64,
76
83
  timezone=timezone_display,
77
84
  value=0.8532, # Mock anomalous value
78
85
  confidence_lower=0.4521,
@@ -103,6 +110,8 @@ def create_mock_alert_data(
103
110
  direction_policy=direction_policy,
104
111
  consecutive_required=consecutive_required,
105
112
  detector_count=min_detectors,
113
+ interval_seconds=interval_seconds,
114
+ onset_timestamp=onset,
106
115
  )
107
116
 
108
117
 
@@ -51,3 +51,39 @@ def to_aware_utc(dt: datetime | None) -> datetime | None:
51
51
  if dt is None:
52
52
  return None
53
53
  return dt if dt.tzinfo is not None else dt.replace(tzinfo=timezone.utc)
54
+
55
+
56
+ def format_duration(seconds: int | float) -> str:
57
+ """Format a span of seconds as a compact human string (max two units).
58
+
59
+ Used by the alert messages to express "how long an anomaly has been
60
+ running" / "how long an incident lasted" in plain language:
61
+
62
+ >>> format_duration(600) # 10 minutes
63
+ '10m'
64
+ >>> format_duration(9000) # 2h 30m
65
+ '2h 30m'
66
+ >>> format_duration(90000) # 1d 1h
67
+ '1d 1h'
68
+ >>> format_duration(30)
69
+ '30s'
70
+
71
+ Keeps at most the two most-significant non-zero units so the result
72
+ stays glanceable. Sub-minute spans render in seconds; zero/negative
73
+ inputs degrade to ``"0m"`` rather than raising.
74
+ """
75
+ total = int(round(seconds))
76
+ if total <= 0:
77
+ return "0m"
78
+ if total < 60:
79
+ return f"{total}s"
80
+
81
+ parts: list[str] = []
82
+ remaining = total
83
+ for label, size in (("d", 86400), ("h", 3600), ("m", 60)):
84
+ if remaining >= size:
85
+ qty, remaining = divmod(remaining, size)
86
+ parts.append(f"{qty}{label}")
87
+ if len(parts) == 2:
88
+ break
89
+ return " ".join(parts)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: detectkit
3
- Version: 0.16.4
3
+ Version: 0.17.0
4
4
  Summary: Metric monitoring with automatic anomaly detection
5
5
  Author: detectkit team
6
6
  License: MIT
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes