detectkit 0.28.0__tar.gz → 0.29.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. {detectkit-0.28.0 → detectkit-0.29.0}/MANIFEST.in +1 -0
  2. {detectkit-0.28.0/detectkit.egg-info → detectkit-0.29.0}/PKG-INFO +1 -1
  3. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/__init__.py +1 -1
  4. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/alerting/orchestrator/__init__.py +2 -0
  5. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/alerting/orchestrator/_recovery.py +23 -15
  6. detectkit-0.29.0/detectkit/alerting/orchestrator/_replay.py +258 -0
  7. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/alerting/orchestrator/orchestrator.py +4 -0
  8. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/cli/assets/claude/rules/cli.md +12 -2
  9. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/cli/commands/autotune.py +39 -0
  10. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/cli/commands/run.py +77 -0
  11. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/cli/main.py +28 -0
  12. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/database/internal_tables/_detections.py +62 -0
  13. detectkit-0.29.0/detectkit/reporting/__init__.py +18 -0
  14. detectkit-0.29.0/detectkit/reporting/assets/report.js +62 -0
  15. detectkit-0.29.0/detectkit/reporting/builder.py +267 -0
  16. detectkit-0.29.0/detectkit/reporting/html_report.py +79 -0
  17. {detectkit-0.28.0 → detectkit-0.29.0/detectkit.egg-info}/PKG-INFO +1 -1
  18. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit.egg-info/SOURCES.txt +5 -0
  19. {detectkit-0.28.0 → detectkit-0.29.0}/pyproject.toml +3 -0
  20. {detectkit-0.28.0 → detectkit-0.29.0}/LICENSE +0 -0
  21. {detectkit-0.28.0 → detectkit-0.29.0}/README.md +0 -0
  22. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/alerting/__init__.py +0 -0
  23. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/alerting/channels/__init__.py +0 -0
  24. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/alerting/channels/base.py +0 -0
  25. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/alerting/channels/branding.py +0 -0
  26. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/alerting/channels/email.py +0 -0
  27. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/alerting/channels/factory.py +0 -0
  28. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/alerting/channels/mattermost.py +0 -0
  29. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/alerting/channels/slack.py +0 -0
  30. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/alerting/channels/telegram.py +0 -0
  31. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/alerting/channels/webhook.py +0 -0
  32. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/alerting/orchestrator/_base.py +0 -0
  33. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/alerting/orchestrator/_cooldown.py +0 -0
  34. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/alerting/orchestrator/_decision.py +0 -0
  35. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/alerting/orchestrator/_dispatch.py +0 -0
  36. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/alerting/orchestrator/_types.py +0 -0
  37. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/autotune/__init__.py +0 -0
  38. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/autotune/_base.py +0 -0
  39. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/autotune/_types.py +0 -0
  40. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/autotune/autotuner.py +0 -0
  41. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/autotune/config_emitter.py +0 -0
  42. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/autotune/crossval.py +0 -0
  43. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/autotune/detector_select.py +0 -0
  44. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/autotune/distribution.py +0 -0
  45. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/autotune/grid_search.py +0 -0
  46. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/autotune/html_labeler.py +0 -0
  47. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/autotune/label_server.py +0 -0
  48. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/autotune/labels.py +0 -0
  49. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/autotune/result.py +0 -0
  50. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/autotune/scoring.py +0 -0
  51. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/autotune/seasonality_search.py +0 -0
  52. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/autotune/settings.py +0 -0
  53. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/autotune/window_select.py +0 -0
  54. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/cli/__init__.py +0 -0
  55. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/cli/_output.py +0 -0
  56. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/cli/assets/claude/CLAUDE.section.md +0 -0
  57. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/cli/assets/claude/rules/alerting.md +0 -0
  58. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/cli/assets/claude/rules/autotune.md +0 -0
  59. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/cli/assets/claude/rules/detectors.md +0 -0
  60. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/cli/assets/claude/rules/metrics.md +0 -0
  61. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/cli/assets/claude/rules/overview.md +0 -0
  62. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/cli/assets/claude/rules/project.md +0 -0
  63. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/cli/assets/claude/skills/dtk-autotune/SKILL.md +0 -0
  64. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/cli/assets/claude/skills/dtk-feedback/SKILL.md +0 -0
  65. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/cli/assets/claude/skills/dtk-new-metric/SKILL.md +0 -0
  66. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/cli/assets/claude/skills/dtk-setup-project/SKILL.md +0 -0
  67. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/cli/commands/__init__.py +0 -0
  68. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/cli/commands/clean.py +0 -0
  69. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/cli/commands/init.py +0 -0
  70. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/cli/commands/init_claude.py +0 -0
  71. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/cli/commands/test_alert.py +0 -0
  72. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/cli/commands/unlock.py +0 -0
  73. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/config/__init__.py +0 -0
  74. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/config/metric_config.py +0 -0
  75. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/config/profile.py +0 -0
  76. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/config/project_config.py +0 -0
  77. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/config/validator.py +0 -0
  78. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/core/__init__.py +0 -0
  79. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/core/interval.py +0 -0
  80. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/core/models.py +0 -0
  81. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/database/__init__.py +0 -0
  82. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/database/_sql_manager.py +0 -0
  83. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/database/clickhouse_manager.py +0 -0
  84. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/database/internal_tables/__init__.py +0 -0
  85. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/database/internal_tables/_alert_states.py +0 -0
  86. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/database/internal_tables/_autotune_runs.py +0 -0
  87. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/database/internal_tables/_base.py +0 -0
  88. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/database/internal_tables/_datapoints.py +0 -0
  89. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/database/internal_tables/_maintenance.py +0 -0
  90. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/database/internal_tables/_metrics.py +0 -0
  91. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/database/internal_tables/_schema.py +0 -0
  92. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/database/internal_tables/_tasks.py +0 -0
  93. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/database/internal_tables/manager.py +0 -0
  94. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/database/manager.py +0 -0
  95. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/database/mysql_manager.py +0 -0
  96. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/database/postgres_manager.py +0 -0
  97. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/database/tables.py +0 -0
  98. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/detectors/__init__.py +0 -0
  99. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/detectors/base.py +0 -0
  100. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/detectors/factory.py +0 -0
  101. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/detectors/seasonality.py +0 -0
  102. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/detectors/statistical/__init__.py +0 -0
  103. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/detectors/statistical/_windowed.py +0 -0
  104. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/detectors/statistical/iqr.py +0 -0
  105. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/detectors/statistical/mad.py +0 -0
  106. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/detectors/statistical/manual_bounds.py +0 -0
  107. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/detectors/statistical/zscore.py +0 -0
  108. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/loaders/__init__.py +0 -0
  109. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/loaders/metric_loader.py +0 -0
  110. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/loaders/query_template.py +0 -0
  111. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/orchestration/__init__.py +0 -0
  112. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/orchestration/error_dispatch.py +0 -0
  113. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/orchestration/task_manager/__init__.py +0 -0
  114. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/orchestration/task_manager/_alert_step.py +0 -0
  115. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/orchestration/task_manager/_base.py +0 -0
  116. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/orchestration/task_manager/_detect_step.py +0 -0
  117. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/orchestration/task_manager/_load_step.py +0 -0
  118. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/orchestration/task_manager/_types.py +0 -0
  119. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/orchestration/task_manager/manager.py +0 -0
  120. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/utils/__init__.py +0 -0
  121. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/utils/datetime_utils.py +0 -0
  122. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/utils/env_interpolation.py +0 -0
  123. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/utils/json_utils.py +0 -0
  124. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit/utils/stats.py +0 -0
  125. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit.egg-info/dependency_links.txt +0 -0
  126. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit.egg-info/entry_points.txt +0 -0
  127. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit.egg-info/requires.txt +0 -0
  128. {detectkit-0.28.0 → detectkit-0.29.0}/detectkit.egg-info/top_level.txt +0 -0
  129. {detectkit-0.28.0 → detectkit-0.29.0}/requirements.txt +0 -0
  130. {detectkit-0.28.0 → detectkit-0.29.0}/setup.cfg +0 -0
  131. {detectkit-0.28.0 → detectkit-0.29.0}/setup.py +0 -0
@@ -3,6 +3,7 @@ include LICENSE
3
3
  include requirements.txt
4
4
  recursive-include detectkit *.py
5
5
  recursive-include detectkit/cli/assets *.md
6
+ recursive-include detectkit/reporting/assets *.js
6
7
  recursive-exclude tests *
7
8
  recursive-exclude * __pycache__
8
9
  recursive-exclude * *.pyc
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: detectkit
3
- Version: 0.28.0
3
+ Version: 0.29.0
4
4
  Summary: Metric monitoring with automatic anomaly detection
5
5
  Author: detectkit team
6
6
  License: MIT
@@ -4,7 +4,7 @@ detectk - Anomaly Detection for Time-Series Metrics
4
4
  A Python library for data analysts and engineers to monitor metrics with automatic anomaly detection.
5
5
  """
6
6
 
7
- __version__ = "0.28.0"
7
+ __version__ = "0.29.0"
8
8
 
9
9
  from detectkit.core.interval import Interval
10
10
  from detectkit.core.models import ColumnDefinition, TableModel
@@ -1,5 +1,6 @@
1
1
  """Public surface of the alert-orchestrator package."""
2
2
 
3
+ from detectkit.alerting.orchestrator._replay import ReplayedEvent
3
4
  from detectkit.alerting.orchestrator._types import (
4
5
  AlertConditions,
5
6
  DetectionRecord,
@@ -13,6 +14,7 @@ __all__ = [
13
14
  "AlertOrchestrator",
14
15
  "AlertConditions",
15
16
  "DetectionRecord",
17
+ "ReplayedEvent",
16
18
  # Shared hydration of DetectionRecord rows from get_recent_detections
17
19
  # output (used by TaskManager and the recovery mixin).
18
20
  "hydrate_detection_records",
@@ -139,6 +139,7 @@ class _RecoveryMixin(_OrchestratorBase):
139
139
  def _build_recovery_data(
140
140
  self,
141
141
  detections: list[DetectionRecord],
142
+ incident_records: list[DetectionRecord] | None = None,
142
143
  ) -> AlertData | None:
143
144
  """Construct the AlertData payload sent as a recovery notification."""
144
145
  if not detections:
@@ -165,7 +166,9 @@ class _RecoveryMixin(_OrchestratorBase):
165
166
 
166
167
  # Reconstruct the just-ended incident so the recovery message can say how
167
168
  # long it lasted (symmetric with the anomaly alert's onset/duration).
168
- incident_count, onset_ts, capped = self._resolve_incident(latest.timestamp)
169
+ incident_count, onset_ts, capped = self._resolve_incident(
170
+ latest.timestamp, records=incident_records
171
+ )
169
172
 
170
173
  return AlertData(
171
174
  metric_name=self.metric_name,
@@ -200,7 +203,9 @@ class _RecoveryMixin(_OrchestratorBase):
200
203
  streak_capped=capped,
201
204
  )
202
205
 
203
- def _resolve_incident(self, cleared_ts: Any) -> tuple[int, Any, bool]:
206
+ def _resolve_incident(
207
+ self, cleared_ts: Any, records: list[DetectionRecord] | None = None
208
+ ) -> tuple[int, Any, bool]:
204
209
  """Find the anomalous run that just ended before the recovery point.
205
210
 
206
211
  Walks back from *cleared_ts* (the latest, now-clean point): skips the
@@ -209,20 +214,23 @@ class _RecoveryMixin(_OrchestratorBase):
209
214
  capped)`` — ``(0, None, False)`` when no run can be reconstructed, so the
210
215
  recovery message just omits the incident duration.
211
216
  """
212
- if not self.internal:
213
- return 0, None, False
214
-
215
217
  step = np.timedelta64(self.interval.seconds, "s")
216
- if isinstance(cleared_ts, np.datetime64):
217
- last_point = cleared_ts.astype("datetime64[ms]").astype(datetime)
218
- else:
219
- last_point = cleared_ts
220
- rows = self.internal.get_recent_detections(
221
- metric_name=self.metric_name,
222
- last_point=last_point,
223
- num_points=STREAK_LOOKBACK_POINTS,
224
- )
225
- records = hydrate_detection_records(rows)
218
+ # ``records`` lets a pure caller (alert replay) supply the in-memory
219
+ # detection slice instead of a DB read; production passes None and the
220
+ # incident is resolved from ``_dtk_detections`` as before.
221
+ if records is None:
222
+ if not self.internal:
223
+ return 0, None, False
224
+ if isinstance(cleared_ts, np.datetime64):
225
+ last_point = cleared_ts.astype("datetime64[ms]").astype(datetime)
226
+ else:
227
+ last_point = cleared_ts
228
+ rows = self.internal.get_recent_detections(
229
+ metric_name=self.metric_name,
230
+ last_point=last_point,
231
+ num_points=STREAK_LOOKBACK_POINTS,
232
+ )
233
+ records = hydrate_detection_records(rows)
226
234
  if not records:
227
235
  return 0, None, False
228
236
 
@@ -0,0 +1,258 @@
1
+ """Pure historical replay of alert/recovery/no-data events.
2
+
3
+ Reconstructs the alert events the orchestrator *would have* produced over a
4
+ historical period from already-persisted detections — **without** any channel
5
+ dispatch, DB state writes or wall-clock. It is the offline counterpart of the
6
+ live ``should_alert`` / ``should_send_recovery`` / ``should_alert_no_data`` path:
7
+ state (last alert / last recovery) is simulated in memory and the decision at
8
+ every grid point is evaluated *causally* (only records with ``timestamp <= t``,
9
+ since the windowed detector is causal), reusing the exact same quorum,
10
+ consecutive-walk, cooldown and recovery arithmetic as the live path.
11
+
12
+ Used to answer "what would these detections have alerted on over this window"
13
+ for backtesting / autotune alert-window sweeps, where firing real channels and
14
+ mutating ``_dtk_alert_states`` would be wrong.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ from dataclasses import dataclass
20
+ from datetime import datetime, timedelta
21
+
22
+ import numpy as np
23
+
24
+ from detectkit.alerting.channels.base import AlertData
25
+ from detectkit.alerting.orchestrator._base import STREAK_LOOKBACK_POINTS, _OrchestratorBase
26
+ from detectkit.alerting.orchestrator._types import DetectionRecord
27
+ from detectkit.core.interval import Interval
28
+
29
+
30
+ @dataclass(frozen=True)
31
+ class ReplayedEvent:
32
+ """One alert event reconstructed by :meth:`_ReplayMixin.replay`.
33
+
34
+ ``kind`` is ``"anomaly"``, ``"recovery"`` or ``"no_data"``; ``timestamp`` is
35
+ the grid point at which the event fired (the simulated "now"); ``alert_data``
36
+ is identical in shape to a live :class:`AlertData` (built via the same
37
+ ``_build_*`` helpers as the live path).
38
+ """
39
+
40
+ kind: str
41
+ timestamp: np.datetime64
42
+ alert_data: AlertData
43
+
44
+
45
+ class _ReplayMixin(_OrchestratorBase):
46
+ def replay(
47
+ self,
48
+ detections: list[DetectionRecord],
49
+ value_at: dict[np.datetime64, float | None],
50
+ start: datetime,
51
+ end: datetime,
52
+ ) -> list[ReplayedEvent]:
53
+ """Reconstruct alert/recovery/no-data events over ``[start, end]``.
54
+
55
+ Forward pass over every interval boundary in the closed range
56
+ ``[start, end]``. At each grid point ``t`` the decision is evaluated
57
+ causally — only ``detections`` with ``timestamp <= t`` are considered —
58
+ reusing the live quorum / consecutive-walk / cooldown / recovery logic.
59
+ Simulated state (last alert / last recovery) lives in memory, so nothing
60
+ is dispatched and no DB row is written.
61
+
62
+ Args:
63
+ detections: every persisted detection over the period (any order;
64
+ the same per-detector-per-timestamp shape the live path uses).
65
+ value_at: grid ``np.datetime64`` → value, with ``None`` for a
66
+ missing / NaN datapoint (drives the no-data check).
67
+ start: first grid boundary to evaluate (inclusive).
68
+ end: last grid boundary to evaluate (inclusive).
69
+
70
+ Returns:
71
+ The fired events in chronological order.
72
+ """
73
+ by_time = self._group_by_timestamp(detections)
74
+
75
+ sim_last_alert: np.datetime64 | None = None
76
+ sim_last_recovery: np.datetime64 | None = None
77
+ events: list[ReplayedEvent] = []
78
+
79
+ for t in self._replay_grid(start, end):
80
+ # No-data fires independently of the quorum (a single binary
81
+ # metric-level signal), only when configured and not in cooldown.
82
+ if (
83
+ self.alert_config
84
+ and getattr(self.alert_config, "no_data_alert", False)
85
+ and value_at.get(t) is None
86
+ and not self._replay_in_cooldown(t, sim_last_alert, sim_last_recovery)
87
+ ):
88
+ last_point = t.astype("datetime64[ms]").astype(datetime)
89
+ events.append(
90
+ ReplayedEvent("no_data", t, self._build_no_data_alert_data(last_point))
91
+ )
92
+ sim_last_alert = t
93
+ continue
94
+
95
+ causal = {ts: recs for ts, recs in by_time.items() if ts <= t}
96
+ ts_desc = sorted(causal, reverse=True)
97
+
98
+ consecutive, latest_quorum, direction = self._count_consecutive_anomalies(
99
+ causal, ts_desc
100
+ )
101
+ fired = (
102
+ latest_quorum is not None
103
+ and consecutive >= self.conditions.consecutive_anomalies
104
+ and not self._replay_in_cooldown(t, sim_last_alert, sim_last_recovery)
105
+ )
106
+
107
+ if fired:
108
+ assert latest_quorum is not None # narrowed by ``fired``
109
+ streak, onset, capped = self._replay_streak(causal, ts_desc)
110
+ ad = self._build_alert_data(latest_quorum, streak, direction, onset, capped)
111
+ events.append(ReplayedEvent("anomaly", t, ad))
112
+ sim_last_alert = t
113
+ elif (
114
+ self.alert_config
115
+ and getattr(self.alert_config, "notify_on_recovery", False)
116
+ and sim_last_alert is not None
117
+ and (sim_last_recovery is None or sim_last_recovery < sim_last_alert)
118
+ and self._replay_recovered(causal, ts_desc, sim_last_alert)
119
+ ):
120
+ slice_ = [d for d in detections if d.timestamp <= t]
121
+ # Pure replay: resolve the just-ended incident from the in-memory
122
+ # slice, never from the DB (keeps replay standalone).
123
+ rd = self._build_recovery_data(slice_, incident_records=slice_)
124
+ if rd is not None:
125
+ events.append(ReplayedEvent("recovery", t, rd))
126
+ sim_last_recovery = t
127
+
128
+ return events
129
+
130
+ def _replay_grid(self, start: datetime, end: datetime) -> list[np.datetime64]:
131
+ """Every interval boundary in the closed range ``[start, end]``.
132
+
133
+ Boundaries are produced in ``datetime64[ms]`` so they compare exactly
134
+ with hydrated detection timestamps and ``value_at`` keys.
135
+ """
136
+ step = timedelta(seconds=self.interval.seconds)
137
+ grid: list[np.datetime64] = []
138
+ cur = start
139
+ while cur <= end:
140
+ grid.append(np.datetime64(cur, "ms"))
141
+ cur = cur + step
142
+ return grid
143
+
144
+ def _replay_in_cooldown(
145
+ self,
146
+ t: np.datetime64,
147
+ sim_last_alert: np.datetime64 | None,
148
+ sim_last_recovery: np.datetime64 | None,
149
+ ) -> bool:
150
+ """In-memory analog of :meth:`_CooldownMixin._is_in_cooldown`.
151
+
152
+ Elapsed time is measured on the grid (``t - sim_last_alert``) rather than
153
+ from the wall clock. ``cooldown_reset_on_recovery`` clears the cooldown
154
+ when a recovery has been simulated since the last alert.
155
+ """
156
+ if not self.alert_config or not getattr(self.alert_config, "alert_cooldown", None):
157
+ return False
158
+ if sim_last_alert is None:
159
+ return False
160
+
161
+ cooldown = np.timedelta64(Interval(self.alert_config.alert_cooldown).seconds, "s")
162
+ elapsed = (t - sim_last_alert).astype("timedelta64[s]")
163
+
164
+ if getattr(self.alert_config, "cooldown_reset_on_recovery", True):
165
+ if sim_last_recovery is not None and sim_last_recovery > sim_last_alert:
166
+ return False
167
+
168
+ return bool(elapsed < cooldown)
169
+
170
+ def _replay_recovered(
171
+ self,
172
+ causal: dict[np.datetime64, list[DetectionRecord]],
173
+ ts_desc: list[np.datetime64],
174
+ sim_last_alert: np.datetime64,
175
+ ) -> bool:
176
+ """Pure half of :meth:`_RecoveryMixin._check_recovery_since_last_alert`.
177
+
178
+ Returns ``True`` when the metric has recovered as of the latest causal
179
+ point: no blocking anomalies under the trigger direction, OR no causal
180
+ detections strictly after the last simulated alert.
181
+ """
182
+ if not ts_desc:
183
+ # No detections at all → nothing blocking → recovered.
184
+ return True
185
+
186
+ # No fresh detections after the alert → assume recovery (mirrors the
187
+ # live "no fresh detections" branch).
188
+ if not any(ts > sim_last_alert for ts in ts_desc):
189
+ return True
190
+
191
+ latest_ts = ts_desc[0]
192
+ latest_anomalies = [d for d in causal[latest_ts] if d.is_anomaly]
193
+
194
+ policy = self.conditions.direction
195
+ if policy == "down":
196
+ blocking = [d for d in latest_anomalies if d.direction == "down"]
197
+ elif policy == "up":
198
+ blocking = [d for d in latest_anomalies if d.direction == "up"]
199
+ elif policy == "same":
200
+ trigger_direction = self._replay_trigger_direction(causal, sim_last_alert)
201
+ if trigger_direction is None:
202
+ blocking = latest_anomalies # conservative fallback
203
+ else:
204
+ blocking = [d for d in latest_anomalies if d.direction == trigger_direction]
205
+ else: # "any" / unknown — preserve historical behaviour
206
+ blocking = latest_anomalies
207
+
208
+ return len(blocking) == 0
209
+
210
+ def _replay_trigger_direction(
211
+ self,
212
+ causal: dict[np.datetime64, list[DetectionRecord]],
213
+ sim_last_alert: np.datetime64,
214
+ ) -> str | None:
215
+ """Direction of the anomaly that triggered the simulated last alert.
216
+
217
+ Pure analog of :meth:`_RecoveryMixin._get_alert_trigger_direction`: the
218
+ live code reads the single detection row at the alert timestamp; here the
219
+ alert fired at the grid point ``sim_last_alert``, so the triggering
220
+ quorum is the latest causal point at or before it.
221
+ """
222
+ candidates = [ts for ts in causal if ts <= sim_last_alert]
223
+ if not candidates:
224
+ return None
225
+ latest_ts = max(candidates)
226
+ anomalies = [d for d in causal[latest_ts] if d.is_anomaly]
227
+ if not anomalies:
228
+ return None
229
+
230
+ _, direction = self._quorum_at(anomalies, None)
231
+ if direction in ("up", "down"):
232
+ return direction
233
+
234
+ ups = sum(1 for d in anomalies if d.direction == "up")
235
+ downs = sum(1 for d in anomalies if d.direction == "down")
236
+ if ups > downs:
237
+ return "up"
238
+ if downs > ups:
239
+ return "down"
240
+ return None
241
+
242
+ def _replay_streak(
243
+ self,
244
+ causal: dict[np.datetime64, list[DetectionRecord]],
245
+ ts_desc: list[np.datetime64],
246
+ ) -> tuple[int, np.datetime64, bool]:
247
+ """In-memory analog of :meth:`_DecisionMixin._resolve_streak`.
248
+
249
+ Re-walks the same direction-aware quorum logic over the causal records to
250
+ get the *true* streak length, then derives the onset and the cap flag the
251
+ same way the live path does.
252
+ """
253
+ latest_ts = ts_desc[0]
254
+ step = np.timedelta64(self.interval.seconds, "s")
255
+ count, _, _ = self._count_consecutive_anomalies(causal, ts_desc)
256
+ count = max(count, 1)
257
+ capped = count >= STREAK_LOOKBACK_POINTS
258
+ return count, latest_ts - step * (count - 1), capped
@@ -6,12 +6,14 @@ from detectkit.alerting.orchestrator._cooldown import _CooldownMixin
6
6
  from detectkit.alerting.orchestrator._decision import _DecisionMixin
7
7
  from detectkit.alerting.orchestrator._dispatch import _DispatchMixin
8
8
  from detectkit.alerting.orchestrator._recovery import _RecoveryMixin
9
+ from detectkit.alerting.orchestrator._replay import _ReplayMixin
9
10
 
10
11
 
11
12
  class AlertOrchestrator(
12
13
  _DecisionMixin,
13
14
  _CooldownMixin,
14
15
  _RecoveryMixin,
16
+ _ReplayMixin,
15
17
  _DispatchMixin,
16
18
  ):
17
19
  """Coordinates alert decisions, cooldown, recovery and dispatch.
@@ -21,6 +23,8 @@ class AlertOrchestrator(
21
23
  * ``_DecisionMixin`` — should we alert? builds AlertData.
22
24
  * ``_CooldownMixin`` — suppress within the configured window.
23
25
  * ``_RecoveryMixin`` — direction-aware "all-clear" detection.
26
+ * ``_ReplayMixin`` — pure historical replay of alert/recovery/no-data
27
+ events (no dispatch, no DB state, no wall-clock).
24
28
  * ``_DispatchMixin`` — ship to channels and stamp state.
25
29
  """
26
30
 
@@ -37,7 +37,7 @@ across the project; duplicates raise an error listing the conflicting files.
37
37
 
38
38
  ```bash
39
39
  dtk run --select <sel> [--steps load,detect,alert] [--from DATE] [--to DATE] \
40
- [--full-refresh] [--force] [--profile NAME]
40
+ [--full-refresh] [--force] [--profile NAME] [--report [PATH]]
41
41
  ```
42
42
 
43
43
  - `--steps` — which of `load`, `detect`, `alert` to run (default all); they always
@@ -52,6 +52,13 @@ dtk run --select <sel> [--steps load,detect,alert] [--from DATE] [--to DATE] \
52
52
  - `--force` — ignore a held lock and run anyway (also releases it on exit).
53
53
  Risky with concurrent runs; usually `dtk unlock` is the better recovery.
54
54
  - `--profile` — override the project's default profile (e.g. run against staging).
55
+ - `--report [PATH]` — after the run, write a **self-contained HTML report** per
56
+ metric (values + per-detector confidence bands + flagged anomalies + the alerts
57
+ that fired + a summary, with client-side period selection). It is offline — open
58
+ it in a browser, nothing leaves the page. The report reads the persisted `_dtk_*`
59
+ tables, so even a `--steps load` run can produce one. Dual-mode: bare `--report`
60
+ → `reports/<metric>.html`; `--report <dir>` → `<dir>/<metric>.html`;
61
+ `--report file.html` → that file.
55
62
 
56
63
  ## `dtk autotune --select <sel>`
57
64
 
@@ -60,7 +67,10 @@ history window, then writes an annotated `metrics/<name>__tuned_<id>.yml`. Reads
60
67
  the metric's loaded datapoints (run `dtk run --steps load` first if empty), never
61
68
  edits the original, never alerts. `--incidents FILE` enables supervised tuning
62
69
  against labeled incidents; without it, an unsupervised objective is used.
63
- `--dry-run` searches without writing. Full reference: `autotune.md`.
70
+ `--dry-run` searches without writing. `--report [PATH]` writes the same
71
+ self-contained HTML report as `dtk run` for the tuned winner (default
72
+ `reports/<metric>__tuned_<id>.html`; `<dir>` or a `.html` file also accepted).
73
+ Full reference: `autotune.md`.
64
74
 
65
75
  ## `dtk test-alert <metric>`
66
76
 
@@ -333,6 +333,7 @@ def run_autotune(
333
333
  profile: str | None,
334
334
  force: bool,
335
335
  dry_run: bool,
336
+ report_path: str | None = None,
336
337
  ) -> None:
337
338
  """Auto-tune each selected metric's detector configuration."""
338
339
  from_dt = parse_date(from_date) if from_date else None
@@ -342,6 +343,7 @@ def run_autotune(
342
343
  if loaded is None:
343
344
  return
344
345
  project_root, _project_config, internal_manager, _db_manager = loaded
346
+ project_name = getattr(_project_config, "name", None)
345
347
 
346
348
  try:
347
349
  metrics = select_metrics(select, project_root)
@@ -369,6 +371,8 @@ def run_autotune(
369
371
  to_dt=to_dt,
370
372
  force=force,
371
373
  dry_run=dry_run,
374
+ report_path=report_path,
375
+ project_name=project_name,
372
376
  )
373
377
  if ok:
374
378
  succeeded += 1
@@ -391,6 +395,8 @@ def _tune_one(
391
395
  to_dt: datetime | None,
392
396
  force: bool,
393
397
  dry_run: bool,
398
+ report_path: str | None = None,
399
+ project_name: str | None = None,
394
400
  ) -> bool:
395
401
  """Tune one metric end to end; return True on success."""
396
402
  name = config.name
@@ -526,6 +532,9 @@ def _tune_one(
526
532
  ground_truth=ground_truth,
527
533
  dry_run=dry_run,
528
534
  project_root=project_root,
535
+ config=config,
536
+ report_path=report_path,
537
+ project_name=project_name,
529
538
  )
530
539
  internal_manager.release_lock(name, "pipeline", "pipeline", status="completed")
531
540
  return True
@@ -559,6 +568,9 @@ def _finalize(
559
568
  ground_truth: GroundTruth,
560
569
  dry_run: bool,
561
570
  project_root: Path,
571
+ config: MetricConfig | None = None,
572
+ report_path: str | None = None,
573
+ project_name: str | None = None,
562
574
  ) -> None:
563
575
  """Persist run + winner detections + tuned config, prune prior winners, render RESULT."""
564
576
  folds = " ".join(f"{f:.2f}" for f in result.cv_per_fold) or "—"
@@ -623,6 +635,33 @@ def _finalize(
623
635
  # Write the annotated tuned config.
624
636
  out_path.write_text(config_text, encoding="utf-8")
625
637
 
638
+ # Optional: emit an HTML report for the tuned window (winner's bands +
639
+ # anomalies + replayed alerts under the metric's alerting rules).
640
+ if report_path is not None and config is not None:
641
+ try:
642
+ from detectkit.cli.commands.run import _resolve_report_path
643
+ from detectkit.reporting import build_report_payload, render_report_html
644
+ from detectkit.utils.datetime_utils import now_utc_naive
645
+
646
+ ts = data["timestamp"]
647
+ start = ts[0].astype("datetime64[ms]").astype(datetime) if len(ts) else None
648
+ end = ts[-1].astype("datetime64[ms]").astype(datetime) if len(ts) else None
649
+ payload = build_report_payload(
650
+ metric_config=config,
651
+ internal=internal_manager,
652
+ start=start,
653
+ end=end,
654
+ project_name=project_name,
655
+ generated_at=now_utc_naive().strftime("%Y-%m-%d %H:%M UTC"),
656
+ )
657
+ if payload["points"]:
658
+ report_out = _resolve_report_path(report_path, project_root, out_path.stem)
659
+ report_out.parent.mkdir(parents=True, exist_ok=True)
660
+ report_out.write_text(render_report_html(payload), encoding="utf-8")
661
+ children.append(f"Report → {report_out.relative_to(project_root)}")
662
+ except Exception as report_error: # never fail tuning on a report
663
+ children.append(f"Report skipped: {report_error}")
664
+
626
665
  children.append(f"Wrote {out_path.relative_to(project_root)} (run_id={run_id})")
627
666
  children.append(
628
667
  f"Evaluated {len(result.candidate_detector_ids)} candidate(s); "
@@ -27,6 +27,7 @@ def run_command(
27
27
  full_refresh: bool,
28
28
  force: bool,
29
29
  profile: str | None,
30
+ report_path: str | None = None,
30
31
  ):
31
32
  """
32
33
  Execute metric processing pipeline.
@@ -40,6 +41,9 @@ def run_command(
40
41
  full_refresh: Delete and reload all data
41
42
  force: Ignore task locks
42
43
  profile: Profile name to use
44
+ report_path: When not None, emit an HTML report per metric after its
45
+ run. "" → default location (reports/<metric>.html); a directory →
46
+ <dir>/<metric>.html; a .html path → that file.
43
47
  """
44
48
  # Parse steps
45
49
  step_list = parse_steps(steps)
@@ -227,6 +231,79 @@ def run_command(
227
231
  )
228
232
  break
229
233
 
234
+ # Optional: emit a self-contained HTML report from the freshly-persisted
235
+ # internal tables (values + bands + anomalies + replayed alerts).
236
+ if report_path is not None:
237
+ try:
238
+ emit_metric_report(
239
+ config=config,
240
+ project_root=project_root,
241
+ internal_manager=internal_manager,
242
+ report_path=report_path,
243
+ project_name=getattr(project_config, "name", None),
244
+ from_dt=from_dt,
245
+ to_dt=to_dt,
246
+ )
247
+ except Exception as report_error: # never fail the run on a report
248
+ click.echo(click.style(f" │ Report skipped: {report_error}", fg="yellow"))
249
+
250
+
251
+ def _resolve_report_path(report_path: str, project_root: Path, metric_name: str) -> Path:
252
+ """Map the ``--report`` value to a concrete output file for a metric.
253
+
254
+ "" → ``<project>/reports/<metric>.html``; a ``.html`` path → that file;
255
+ anything else → ``<dir>/<metric>.html``.
256
+ """
257
+ if report_path == "":
258
+ return project_root / "reports" / f"{metric_name}.html"
259
+ candidate = Path(report_path)
260
+ if candidate.suffix.lower() == ".html":
261
+ return candidate
262
+ return candidate / f"{metric_name}.html"
263
+
264
+
265
+ def emit_metric_report(
266
+ *,
267
+ config: MetricConfig,
268
+ project_root: Path,
269
+ internal_manager: InternalTablesManager,
270
+ report_path: str,
271
+ project_name: str | None,
272
+ from_dt: datetime | None,
273
+ to_dt: datetime | None,
274
+ ) -> None:
275
+ """Build and write the HTML report for one metric (best-effort)."""
276
+ from detectkit.reporting import build_report_payload, render_report_html
277
+ from detectkit.utils.datetime_utils import now_utc_naive
278
+
279
+ payload = build_report_payload(
280
+ metric_config=config,
281
+ internal=internal_manager,
282
+ start=from_dt,
283
+ end=to_dt,
284
+ project_name=project_name,
285
+ generated_at=now_utc_naive().strftime("%Y-%m-%d %H:%M UTC"),
286
+ )
287
+ if not payload["points"]:
288
+ click.echo(" │ Report: no datapoints in window, skipped")
289
+ return
290
+
291
+ out = _resolve_report_path(report_path, project_root, config.name)
292
+ out.parent.mkdir(parents=True, exist_ok=True)
293
+ out.write_text(render_report_html(payload), encoding="utf-8")
294
+ try:
295
+ shown = out.relative_to(project_root)
296
+ except ValueError:
297
+ shown = out
298
+ click.echo(
299
+ click.style(
300
+ f" │ Report → {shown} "
301
+ f"({payload['summary']['anomalies']} anomalies, "
302
+ f"{payload['summary']['alerts']} alerts)",
303
+ fg="cyan",
304
+ )
305
+ )
306
+
230
307
 
231
308
  def parse_steps(steps_str: str) -> list[PipelineStep]:
232
309
  """
@@ -136,6 +136,18 @@ def init_claude(target_dir: str):
136
136
  "--profile",
137
137
  help="Profile to use (default: from project config)",
138
138
  )
139
+ @click.option(
140
+ "--report",
141
+ "report_path",
142
+ is_flag=False,
143
+ flag_value="",
144
+ default=None,
145
+ help=(
146
+ "After the run, emit a self-contained HTML report per metric "
147
+ "(values, confidence bands, anomalies, and alerts). Optional value: "
148
+ "an output file or directory; defaults to reports/<metric>.html."
149
+ ),
150
+ )
139
151
  def run(
140
152
  select: str,
141
153
  exclude: str,
@@ -145,6 +157,7 @@ def run(
145
157
  full_refresh: bool,
146
158
  force: bool,
147
159
  profile: str,
160
+ report_path: str,
148
161
  ):
149
162
  """
150
163
  Run metric processing pipeline.
@@ -186,6 +199,7 @@ def run(
186
199
  full_refresh=full_refresh,
187
200
  force=force,
188
201
  profile=profile,
202
+ report_path=report_path,
189
203
  )
190
204
 
191
205
 
@@ -246,6 +260,18 @@ def run(
246
260
  is_flag=True,
247
261
  help="Run the search but persist nothing and write no config",
248
262
  )
263
+ @click.option(
264
+ "--report",
265
+ "report_path",
266
+ is_flag=False,
267
+ flag_value="",
268
+ default=None,
269
+ help=(
270
+ "After tuning, emit a self-contained HTML report for the winning "
271
+ "config (values, confidence bands, anomalies, alerts). Optional value: "
272
+ "an output file or directory; defaults to reports/<metric>__tuned_<id>.html."
273
+ ),
274
+ )
249
275
  def autotune(
250
276
  select: str,
251
277
  incidents_path: str,
@@ -258,6 +284,7 @@ def autotune(
258
284
  profile: str,
259
285
  force: bool,
260
286
  dry_run: bool,
287
+ report_path: str,
261
288
  ):
262
289
  """
263
290
  Automatically configure a metric's anomaly detector.
@@ -298,6 +325,7 @@ def autotune(
298
325
  profile=profile,
299
326
  force=force,
300
327
  dry_run=dry_run,
328
+ report_path=report_path,
301
329
  )
302
330
 
303
331