detectkit 0.28.0__tar.gz → 0.30.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {detectkit-0.28.0 → detectkit-0.30.0}/MANIFEST.in +2 -0
- {detectkit-0.28.0/detectkit.egg-info → detectkit-0.30.0}/PKG-INFO +1 -1
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/__init__.py +1 -1
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/alerting/orchestrator/__init__.py +2 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/alerting/orchestrator/_recovery.py +23 -15
- detectkit-0.30.0/detectkit/alerting/orchestrator/_replay.py +258 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/alerting/orchestrator/orchestrator.py +4 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/autotune/html_labeler.py +67 -47
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/cli/assets/claude/rules/autotune.md +21 -2
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/cli/assets/claude/rules/cli.md +33 -2
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/cli/assets/claude/rules/overview.md +15 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/cli/assets/claude/skills/dtk-autotune/SKILL.md +6 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/cli/commands/autotune.py +39 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/cli/commands/run.py +77 -0
- detectkit-0.30.0/detectkit/cli/commands/tune.py +108 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/cli/main.py +103 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/database/internal_tables/_detections.py +62 -0
- detectkit-0.30.0/detectkit/reporting/__init__.py +18 -0
- detectkit-0.30.0/detectkit/reporting/assets/report.js +77 -0
- detectkit-0.30.0/detectkit/reporting/builder.py +391 -0
- detectkit-0.30.0/detectkit/reporting/html_report.py +83 -0
- detectkit-0.30.0/detectkit/tuning/__init__.py +34 -0
- detectkit-0.30.0/detectkit/tuning/assets/tune.js +50 -0
- detectkit-0.30.0/detectkit/tuning/config_writer.py +137 -0
- detectkit-0.30.0/detectkit/tuning/html.py +82 -0
- detectkit-0.30.0/detectkit/tuning/payload.py +160 -0
- detectkit-0.30.0/detectkit/tuning/server.py +151 -0
- {detectkit-0.28.0 → detectkit-0.30.0/detectkit.egg-info}/PKG-INFO +1 -1
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit.egg-info/SOURCES.txt +12 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/pyproject.toml +6 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/LICENSE +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/README.md +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/alerting/__init__.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/alerting/channels/__init__.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/alerting/channels/base.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/alerting/channels/branding.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/alerting/channels/email.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/alerting/channels/factory.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/alerting/channels/mattermost.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/alerting/channels/slack.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/alerting/channels/telegram.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/alerting/channels/webhook.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/alerting/orchestrator/_base.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/alerting/orchestrator/_cooldown.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/alerting/orchestrator/_decision.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/alerting/orchestrator/_dispatch.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/alerting/orchestrator/_types.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/autotune/__init__.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/autotune/_base.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/autotune/_types.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/autotune/autotuner.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/autotune/config_emitter.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/autotune/crossval.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/autotune/detector_select.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/autotune/distribution.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/autotune/grid_search.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/autotune/label_server.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/autotune/labels.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/autotune/result.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/autotune/scoring.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/autotune/seasonality_search.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/autotune/settings.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/autotune/window_select.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/cli/__init__.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/cli/_output.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/cli/assets/claude/CLAUDE.section.md +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/cli/assets/claude/rules/alerting.md +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/cli/assets/claude/rules/detectors.md +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/cli/assets/claude/rules/metrics.md +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/cli/assets/claude/rules/project.md +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/cli/assets/claude/skills/dtk-feedback/SKILL.md +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/cli/assets/claude/skills/dtk-new-metric/SKILL.md +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/cli/assets/claude/skills/dtk-setup-project/SKILL.md +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/cli/commands/__init__.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/cli/commands/clean.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/cli/commands/init.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/cli/commands/init_claude.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/cli/commands/test_alert.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/cli/commands/unlock.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/config/__init__.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/config/metric_config.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/config/profile.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/config/project_config.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/config/validator.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/core/__init__.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/core/interval.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/core/models.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/database/__init__.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/database/_sql_manager.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/database/clickhouse_manager.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/database/internal_tables/__init__.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/database/internal_tables/_alert_states.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/database/internal_tables/_autotune_runs.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/database/internal_tables/_base.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/database/internal_tables/_datapoints.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/database/internal_tables/_maintenance.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/database/internal_tables/_metrics.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/database/internal_tables/_schema.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/database/internal_tables/_tasks.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/database/internal_tables/manager.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/database/manager.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/database/mysql_manager.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/database/postgres_manager.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/database/tables.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/detectors/__init__.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/detectors/base.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/detectors/factory.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/detectors/seasonality.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/detectors/statistical/__init__.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/detectors/statistical/_windowed.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/detectors/statistical/iqr.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/detectors/statistical/mad.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/detectors/statistical/manual_bounds.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/detectors/statistical/zscore.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/loaders/__init__.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/loaders/metric_loader.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/loaders/query_template.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/orchestration/__init__.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/orchestration/error_dispatch.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/orchestration/task_manager/__init__.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/orchestration/task_manager/_alert_step.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/orchestration/task_manager/_base.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/orchestration/task_manager/_detect_step.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/orchestration/task_manager/_load_step.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/orchestration/task_manager/_types.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/orchestration/task_manager/manager.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/utils/__init__.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/utils/datetime_utils.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/utils/env_interpolation.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/utils/json_utils.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit/utils/stats.py +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit.egg-info/dependency_links.txt +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit.egg-info/entry_points.txt +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit.egg-info/requires.txt +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/detectkit.egg-info/top_level.txt +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/requirements.txt +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/setup.cfg +0 -0
- {detectkit-0.28.0 → detectkit-0.30.0}/setup.py +0 -0
|
@@ -3,6 +3,8 @@ include LICENSE
|
|
|
3
3
|
include requirements.txt
|
|
4
4
|
recursive-include detectkit *.py
|
|
5
5
|
recursive-include detectkit/cli/assets *.md
|
|
6
|
+
recursive-include detectkit/reporting/assets *.js
|
|
7
|
+
recursive-include detectkit/tuning/assets *.js
|
|
6
8
|
recursive-exclude tests *
|
|
7
9
|
recursive-exclude * __pycache__
|
|
8
10
|
recursive-exclude * *.pyc
|
|
@@ -4,7 +4,7 @@ detectk - Anomaly Detection for Time-Series Metrics
|
|
|
4
4
|
A Python library for data analysts and engineers to monitor metrics with automatic anomaly detection.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
-
__version__ = "0.
|
|
7
|
+
__version__ = "0.30.0"
|
|
8
8
|
|
|
9
9
|
from detectkit.core.interval import Interval
|
|
10
10
|
from detectkit.core.models import ColumnDefinition, TableModel
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Public surface of the alert-orchestrator package."""
|
|
2
2
|
|
|
3
|
+
from detectkit.alerting.orchestrator._replay import ReplayedEvent
|
|
3
4
|
from detectkit.alerting.orchestrator._types import (
|
|
4
5
|
AlertConditions,
|
|
5
6
|
DetectionRecord,
|
|
@@ -13,6 +14,7 @@ __all__ = [
|
|
|
13
14
|
"AlertOrchestrator",
|
|
14
15
|
"AlertConditions",
|
|
15
16
|
"DetectionRecord",
|
|
17
|
+
"ReplayedEvent",
|
|
16
18
|
# Shared hydration of DetectionRecord rows from get_recent_detections
|
|
17
19
|
# output (used by TaskManager and the recovery mixin).
|
|
18
20
|
"hydrate_detection_records",
|
|
@@ -139,6 +139,7 @@ class _RecoveryMixin(_OrchestratorBase):
|
|
|
139
139
|
def _build_recovery_data(
|
|
140
140
|
self,
|
|
141
141
|
detections: list[DetectionRecord],
|
|
142
|
+
incident_records: list[DetectionRecord] | None = None,
|
|
142
143
|
) -> AlertData | None:
|
|
143
144
|
"""Construct the AlertData payload sent as a recovery notification."""
|
|
144
145
|
if not detections:
|
|
@@ -165,7 +166,9 @@ class _RecoveryMixin(_OrchestratorBase):
|
|
|
165
166
|
|
|
166
167
|
# Reconstruct the just-ended incident so the recovery message can say how
|
|
167
168
|
# long it lasted (symmetric with the anomaly alert's onset/duration).
|
|
168
|
-
incident_count, onset_ts, capped = self._resolve_incident(
|
|
169
|
+
incident_count, onset_ts, capped = self._resolve_incident(
|
|
170
|
+
latest.timestamp, records=incident_records
|
|
171
|
+
)
|
|
169
172
|
|
|
170
173
|
return AlertData(
|
|
171
174
|
metric_name=self.metric_name,
|
|
@@ -200,7 +203,9 @@ class _RecoveryMixin(_OrchestratorBase):
|
|
|
200
203
|
streak_capped=capped,
|
|
201
204
|
)
|
|
202
205
|
|
|
203
|
-
def _resolve_incident(
|
|
206
|
+
def _resolve_incident(
|
|
207
|
+
self, cleared_ts: Any, records: list[DetectionRecord] | None = None
|
|
208
|
+
) -> tuple[int, Any, bool]:
|
|
204
209
|
"""Find the anomalous run that just ended before the recovery point.
|
|
205
210
|
|
|
206
211
|
Walks back from *cleared_ts* (the latest, now-clean point): skips the
|
|
@@ -209,20 +214,23 @@ class _RecoveryMixin(_OrchestratorBase):
|
|
|
209
214
|
capped)`` — ``(0, None, False)`` when no run can be reconstructed, so the
|
|
210
215
|
recovery message just omits the incident duration.
|
|
211
216
|
"""
|
|
212
|
-
if not self.internal:
|
|
213
|
-
return 0, None, False
|
|
214
|
-
|
|
215
217
|
step = np.timedelta64(self.interval.seconds, "s")
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
218
|
+
# ``records`` lets a pure caller (alert replay) supply the in-memory
|
|
219
|
+
# detection slice instead of a DB read; production passes None and the
|
|
220
|
+
# incident is resolved from ``_dtk_detections`` as before.
|
|
221
|
+
if records is None:
|
|
222
|
+
if not self.internal:
|
|
223
|
+
return 0, None, False
|
|
224
|
+
if isinstance(cleared_ts, np.datetime64):
|
|
225
|
+
last_point = cleared_ts.astype("datetime64[ms]").astype(datetime)
|
|
226
|
+
else:
|
|
227
|
+
last_point = cleared_ts
|
|
228
|
+
rows = self.internal.get_recent_detections(
|
|
229
|
+
metric_name=self.metric_name,
|
|
230
|
+
last_point=last_point,
|
|
231
|
+
num_points=STREAK_LOOKBACK_POINTS,
|
|
232
|
+
)
|
|
233
|
+
records = hydrate_detection_records(rows)
|
|
226
234
|
if not records:
|
|
227
235
|
return 0, None, False
|
|
228
236
|
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
"""Pure historical replay of alert/recovery/no-data events.
|
|
2
|
+
|
|
3
|
+
Reconstructs the alert events the orchestrator *would have* produced over a
|
|
4
|
+
historical period from already-persisted detections — **without** any channel
|
|
5
|
+
dispatch, DB state writes or wall-clock. It is the offline counterpart of the
|
|
6
|
+
live ``should_alert`` / ``should_send_recovery`` / ``should_alert_no_data`` path:
|
|
7
|
+
state (last alert / last recovery) is simulated in memory and the decision at
|
|
8
|
+
every grid point is evaluated *causally* (only records with ``timestamp <= t``,
|
|
9
|
+
since the windowed detector is causal), reusing the exact same quorum,
|
|
10
|
+
consecutive-walk, cooldown and recovery arithmetic as the live path.
|
|
11
|
+
|
|
12
|
+
Used to answer "what would these detections have alerted on over this window"
|
|
13
|
+
for backtesting / autotune alert-window sweeps, where firing real channels and
|
|
14
|
+
mutating ``_dtk_alert_states`` would be wrong.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
from dataclasses import dataclass
|
|
20
|
+
from datetime import datetime, timedelta
|
|
21
|
+
|
|
22
|
+
import numpy as np
|
|
23
|
+
|
|
24
|
+
from detectkit.alerting.channels.base import AlertData
|
|
25
|
+
from detectkit.alerting.orchestrator._base import STREAK_LOOKBACK_POINTS, _OrchestratorBase
|
|
26
|
+
from detectkit.alerting.orchestrator._types import DetectionRecord
|
|
27
|
+
from detectkit.core.interval import Interval
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass(frozen=True)
|
|
31
|
+
class ReplayedEvent:
|
|
32
|
+
"""One alert event reconstructed by :meth:`_ReplayMixin.replay`.
|
|
33
|
+
|
|
34
|
+
``kind`` is ``"anomaly"``, ``"recovery"`` or ``"no_data"``; ``timestamp`` is
|
|
35
|
+
the grid point at which the event fired (the simulated "now"); ``alert_data``
|
|
36
|
+
is identical in shape to a live :class:`AlertData` (built via the same
|
|
37
|
+
``_build_*`` helpers as the live path).
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
kind: str
|
|
41
|
+
timestamp: np.datetime64
|
|
42
|
+
alert_data: AlertData
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class _ReplayMixin(_OrchestratorBase):
|
|
46
|
+
def replay(
|
|
47
|
+
self,
|
|
48
|
+
detections: list[DetectionRecord],
|
|
49
|
+
value_at: dict[np.datetime64, float | None],
|
|
50
|
+
start: datetime,
|
|
51
|
+
end: datetime,
|
|
52
|
+
) -> list[ReplayedEvent]:
|
|
53
|
+
"""Reconstruct alert/recovery/no-data events over ``[start, end]``.
|
|
54
|
+
|
|
55
|
+
Forward pass over every interval boundary in the closed range
|
|
56
|
+
``[start, end]``. At each grid point ``t`` the decision is evaluated
|
|
57
|
+
causally — only ``detections`` with ``timestamp <= t`` are considered —
|
|
58
|
+
reusing the live quorum / consecutive-walk / cooldown / recovery logic.
|
|
59
|
+
Simulated state (last alert / last recovery) lives in memory, so nothing
|
|
60
|
+
is dispatched and no DB row is written.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
detections: every persisted detection over the period (any order;
|
|
64
|
+
the same per-detector-per-timestamp shape the live path uses).
|
|
65
|
+
value_at: grid ``np.datetime64`` → value, with ``None`` for a
|
|
66
|
+
missing / NaN datapoint (drives the no-data check).
|
|
67
|
+
start: first grid boundary to evaluate (inclusive).
|
|
68
|
+
end: last grid boundary to evaluate (inclusive).
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
The fired events in chronological order.
|
|
72
|
+
"""
|
|
73
|
+
by_time = self._group_by_timestamp(detections)
|
|
74
|
+
|
|
75
|
+
sim_last_alert: np.datetime64 | None = None
|
|
76
|
+
sim_last_recovery: np.datetime64 | None = None
|
|
77
|
+
events: list[ReplayedEvent] = []
|
|
78
|
+
|
|
79
|
+
for t in self._replay_grid(start, end):
|
|
80
|
+
# No-data fires independently of the quorum (a single binary
|
|
81
|
+
# metric-level signal), only when configured and not in cooldown.
|
|
82
|
+
if (
|
|
83
|
+
self.alert_config
|
|
84
|
+
and getattr(self.alert_config, "no_data_alert", False)
|
|
85
|
+
and value_at.get(t) is None
|
|
86
|
+
and not self._replay_in_cooldown(t, sim_last_alert, sim_last_recovery)
|
|
87
|
+
):
|
|
88
|
+
last_point = t.astype("datetime64[ms]").astype(datetime)
|
|
89
|
+
events.append(
|
|
90
|
+
ReplayedEvent("no_data", t, self._build_no_data_alert_data(last_point))
|
|
91
|
+
)
|
|
92
|
+
sim_last_alert = t
|
|
93
|
+
continue
|
|
94
|
+
|
|
95
|
+
causal = {ts: recs for ts, recs in by_time.items() if ts <= t}
|
|
96
|
+
ts_desc = sorted(causal, reverse=True)
|
|
97
|
+
|
|
98
|
+
consecutive, latest_quorum, direction = self._count_consecutive_anomalies(
|
|
99
|
+
causal, ts_desc
|
|
100
|
+
)
|
|
101
|
+
fired = (
|
|
102
|
+
latest_quorum is not None
|
|
103
|
+
and consecutive >= self.conditions.consecutive_anomalies
|
|
104
|
+
and not self._replay_in_cooldown(t, sim_last_alert, sim_last_recovery)
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
if fired:
|
|
108
|
+
assert latest_quorum is not None # narrowed by ``fired``
|
|
109
|
+
streak, onset, capped = self._replay_streak(causal, ts_desc)
|
|
110
|
+
ad = self._build_alert_data(latest_quorum, streak, direction, onset, capped)
|
|
111
|
+
events.append(ReplayedEvent("anomaly", t, ad))
|
|
112
|
+
sim_last_alert = t
|
|
113
|
+
elif (
|
|
114
|
+
self.alert_config
|
|
115
|
+
and getattr(self.alert_config, "notify_on_recovery", False)
|
|
116
|
+
and sim_last_alert is not None
|
|
117
|
+
and (sim_last_recovery is None or sim_last_recovery < sim_last_alert)
|
|
118
|
+
and self._replay_recovered(causal, ts_desc, sim_last_alert)
|
|
119
|
+
):
|
|
120
|
+
slice_ = [d for d in detections if d.timestamp <= t]
|
|
121
|
+
# Pure replay: resolve the just-ended incident from the in-memory
|
|
122
|
+
# slice, never from the DB (keeps replay standalone).
|
|
123
|
+
rd = self._build_recovery_data(slice_, incident_records=slice_)
|
|
124
|
+
if rd is not None:
|
|
125
|
+
events.append(ReplayedEvent("recovery", t, rd))
|
|
126
|
+
sim_last_recovery = t
|
|
127
|
+
|
|
128
|
+
return events
|
|
129
|
+
|
|
130
|
+
def _replay_grid(self, start: datetime, end: datetime) -> list[np.datetime64]:
|
|
131
|
+
"""Every interval boundary in the closed range ``[start, end]``.
|
|
132
|
+
|
|
133
|
+
Boundaries are produced in ``datetime64[ms]`` so they compare exactly
|
|
134
|
+
with hydrated detection timestamps and ``value_at`` keys.
|
|
135
|
+
"""
|
|
136
|
+
step = timedelta(seconds=self.interval.seconds)
|
|
137
|
+
grid: list[np.datetime64] = []
|
|
138
|
+
cur = start
|
|
139
|
+
while cur <= end:
|
|
140
|
+
grid.append(np.datetime64(cur, "ms"))
|
|
141
|
+
cur = cur + step
|
|
142
|
+
return grid
|
|
143
|
+
|
|
144
|
+
def _replay_in_cooldown(
|
|
145
|
+
self,
|
|
146
|
+
t: np.datetime64,
|
|
147
|
+
sim_last_alert: np.datetime64 | None,
|
|
148
|
+
sim_last_recovery: np.datetime64 | None,
|
|
149
|
+
) -> bool:
|
|
150
|
+
"""In-memory analog of :meth:`_CooldownMixin._is_in_cooldown`.
|
|
151
|
+
|
|
152
|
+
Elapsed time is measured on the grid (``t - sim_last_alert``) rather than
|
|
153
|
+
from the wall clock. ``cooldown_reset_on_recovery`` clears the cooldown
|
|
154
|
+
when a recovery has been simulated since the last alert.
|
|
155
|
+
"""
|
|
156
|
+
if not self.alert_config or not getattr(self.alert_config, "alert_cooldown", None):
|
|
157
|
+
return False
|
|
158
|
+
if sim_last_alert is None:
|
|
159
|
+
return False
|
|
160
|
+
|
|
161
|
+
cooldown = np.timedelta64(Interval(self.alert_config.alert_cooldown).seconds, "s")
|
|
162
|
+
elapsed = (t - sim_last_alert).astype("timedelta64[s]")
|
|
163
|
+
|
|
164
|
+
if getattr(self.alert_config, "cooldown_reset_on_recovery", True):
|
|
165
|
+
if sim_last_recovery is not None and sim_last_recovery > sim_last_alert:
|
|
166
|
+
return False
|
|
167
|
+
|
|
168
|
+
return bool(elapsed < cooldown)
|
|
169
|
+
|
|
170
|
+
def _replay_recovered(
|
|
171
|
+
self,
|
|
172
|
+
causal: dict[np.datetime64, list[DetectionRecord]],
|
|
173
|
+
ts_desc: list[np.datetime64],
|
|
174
|
+
sim_last_alert: np.datetime64,
|
|
175
|
+
) -> bool:
|
|
176
|
+
"""Pure half of :meth:`_RecoveryMixin._check_recovery_since_last_alert`.
|
|
177
|
+
|
|
178
|
+
Returns ``True`` when the metric has recovered as of the latest causal
|
|
179
|
+
point: no blocking anomalies under the trigger direction, OR no causal
|
|
180
|
+
detections strictly after the last simulated alert.
|
|
181
|
+
"""
|
|
182
|
+
if not ts_desc:
|
|
183
|
+
# No detections at all → nothing blocking → recovered.
|
|
184
|
+
return True
|
|
185
|
+
|
|
186
|
+
# No fresh detections after the alert → assume recovery (mirrors the
|
|
187
|
+
# live "no fresh detections" branch).
|
|
188
|
+
if not any(ts > sim_last_alert for ts in ts_desc):
|
|
189
|
+
return True
|
|
190
|
+
|
|
191
|
+
latest_ts = ts_desc[0]
|
|
192
|
+
latest_anomalies = [d for d in causal[latest_ts] if d.is_anomaly]
|
|
193
|
+
|
|
194
|
+
policy = self.conditions.direction
|
|
195
|
+
if policy == "down":
|
|
196
|
+
blocking = [d for d in latest_anomalies if d.direction == "down"]
|
|
197
|
+
elif policy == "up":
|
|
198
|
+
blocking = [d for d in latest_anomalies if d.direction == "up"]
|
|
199
|
+
elif policy == "same":
|
|
200
|
+
trigger_direction = self._replay_trigger_direction(causal, sim_last_alert)
|
|
201
|
+
if trigger_direction is None:
|
|
202
|
+
blocking = latest_anomalies # conservative fallback
|
|
203
|
+
else:
|
|
204
|
+
blocking = [d for d in latest_anomalies if d.direction == trigger_direction]
|
|
205
|
+
else: # "any" / unknown — preserve historical behaviour
|
|
206
|
+
blocking = latest_anomalies
|
|
207
|
+
|
|
208
|
+
return len(blocking) == 0
|
|
209
|
+
|
|
210
|
+
def _replay_trigger_direction(
|
|
211
|
+
self,
|
|
212
|
+
causal: dict[np.datetime64, list[DetectionRecord]],
|
|
213
|
+
sim_last_alert: np.datetime64,
|
|
214
|
+
) -> str | None:
|
|
215
|
+
"""Direction of the anomaly that triggered the simulated last alert.
|
|
216
|
+
|
|
217
|
+
Pure analog of :meth:`_RecoveryMixin._get_alert_trigger_direction`: the
|
|
218
|
+
live code reads the single detection row at the alert timestamp; here the
|
|
219
|
+
alert fired at the grid point ``sim_last_alert``, so the triggering
|
|
220
|
+
quorum is the latest causal point at or before it.
|
|
221
|
+
"""
|
|
222
|
+
candidates = [ts for ts in causal if ts <= sim_last_alert]
|
|
223
|
+
if not candidates:
|
|
224
|
+
return None
|
|
225
|
+
latest_ts = max(candidates)
|
|
226
|
+
anomalies = [d for d in causal[latest_ts] if d.is_anomaly]
|
|
227
|
+
if not anomalies:
|
|
228
|
+
return None
|
|
229
|
+
|
|
230
|
+
_, direction = self._quorum_at(anomalies, None)
|
|
231
|
+
if direction in ("up", "down"):
|
|
232
|
+
return direction
|
|
233
|
+
|
|
234
|
+
ups = sum(1 for d in anomalies if d.direction == "up")
|
|
235
|
+
downs = sum(1 for d in anomalies if d.direction == "down")
|
|
236
|
+
if ups > downs:
|
|
237
|
+
return "up"
|
|
238
|
+
if downs > ups:
|
|
239
|
+
return "down"
|
|
240
|
+
return None
|
|
241
|
+
|
|
242
|
+
def _replay_streak(
|
|
243
|
+
self,
|
|
244
|
+
causal: dict[np.datetime64, list[DetectionRecord]],
|
|
245
|
+
ts_desc: list[np.datetime64],
|
|
246
|
+
) -> tuple[int, np.datetime64, bool]:
|
|
247
|
+
"""In-memory analog of :meth:`_DecisionMixin._resolve_streak`.
|
|
248
|
+
|
|
249
|
+
Re-walks the same direction-aware quorum logic over the causal records to
|
|
250
|
+
get the *true* streak length, then derives the onset and the cap flag the
|
|
251
|
+
same way the live path does.
|
|
252
|
+
"""
|
|
253
|
+
latest_ts = ts_desc[0]
|
|
254
|
+
step = np.timedelta64(self.interval.seconds, "s")
|
|
255
|
+
count, _, _ = self._count_consecutive_anomalies(causal, ts_desc)
|
|
256
|
+
count = max(count, 1)
|
|
257
|
+
capped = count >= STREAK_LOOKBACK_POINTS
|
|
258
|
+
return count, latest_ts - step * (count - 1), capped
|
|
@@ -6,12 +6,14 @@ from detectkit.alerting.orchestrator._cooldown import _CooldownMixin
|
|
|
6
6
|
from detectkit.alerting.orchestrator._decision import _DecisionMixin
|
|
7
7
|
from detectkit.alerting.orchestrator._dispatch import _DispatchMixin
|
|
8
8
|
from detectkit.alerting.orchestrator._recovery import _RecoveryMixin
|
|
9
|
+
from detectkit.alerting.orchestrator._replay import _ReplayMixin
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
class AlertOrchestrator(
|
|
12
13
|
_DecisionMixin,
|
|
13
14
|
_CooldownMixin,
|
|
14
15
|
_RecoveryMixin,
|
|
16
|
+
_ReplayMixin,
|
|
15
17
|
_DispatchMixin,
|
|
16
18
|
):
|
|
17
19
|
"""Coordinates alert decisions, cooldown, recovery and dispatch.
|
|
@@ -21,6 +23,8 @@ class AlertOrchestrator(
|
|
|
21
23
|
* ``_DecisionMixin`` — should we alert? builds AlertData.
|
|
22
24
|
* ``_CooldownMixin`` — suppress within the configured window.
|
|
23
25
|
* ``_RecoveryMixin`` — direction-aware "all-clear" detection.
|
|
26
|
+
* ``_ReplayMixin`` — pure historical replay of alert/recovery/no-data
|
|
27
|
+
events (no dispatch, no DB state, no wall-clock).
|
|
24
28
|
* ``_DispatchMixin`` — ship to channels and stamp state.
|
|
25
29
|
"""
|
|
26
30
|
|
|
@@ -67,96 +67,116 @@ _TEMPLATE = """<!doctype html>
|
|
|
67
67
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
|
68
68
|
<link rel="icon" type="image/svg+xml" href="__FAVICON__">
|
|
69
69
|
<title>detectkit · label incidents · __METRIC__</title>
|
|
70
|
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
|
71
|
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
|
72
|
+
<link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;500;600&family=Schibsted+Grotesk:wght@400;500;600;700&display=swap" rel="stylesheet">
|
|
70
73
|
<style>
|
|
71
74
|
:root {
|
|
72
|
-
--clay:#d15b36; --clay-700:#b4471f; --
|
|
75
|
+
--clay:#d15b36; --clay-700:#b4471f; --ink:#1b1916; --muted:#6e675b; --faint:#9a9384;
|
|
76
|
+
--paper:#f5f1e8; --surface:#fbf9f3; --border:#e6e0d4;
|
|
73
77
|
--term-bg:#211e1a; --term-surface:#1b1916; --term-border:#332f29; --term-text:#c9c2b4;
|
|
74
|
-
--anomaly:#d63232; --nodata:#f0ad4e;
|
|
78
|
+
--accent-green:#2e9e73; --anomaly:#d63232; --nodata:#f0ad4e;
|
|
79
|
+
--panel-shadow:0 24px 60px -30px rgba(27,25,22,.45);
|
|
75
80
|
--ui:'Schibsted Grotesk',ui-sans-serif,system-ui,-apple-system,'Segoe UI',Roboto,sans-serif;
|
|
76
81
|
--mono:'JetBrains Mono',ui-monospace,'SFMono-Regular',Menlo,Consolas,monospace;
|
|
77
82
|
}
|
|
78
83
|
* { box-sizing: border-box; }
|
|
79
|
-
body { font-family: var(--ui); margin: 0; background: var(--
|
|
84
|
+
body { font-family: var(--ui); margin: 0; background: var(--paper); color: var(--ink);
|
|
80
85
|
-webkit-font-smoothing: antialiased; }
|
|
81
|
-
.shell { max-width:
|
|
82
|
-
.brand { display:flex; align-items:center; gap:9px; margin-bottom:
|
|
86
|
+
.shell { max-width: 1100px; margin: 0 auto; padding: 26px 26px 48px; }
|
|
87
|
+
.brand { display:flex; align-items:center; gap:9px; margin-bottom: 18px; }
|
|
83
88
|
.brand svg { width: 26px; height: 26px; border-radius: 7px; display:block; }
|
|
84
|
-
.brand b { color: var(--
|
|
85
|
-
.brand span { color: var(--
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
89
|
+
.brand b { color: var(--ink); font-weight: 600; font-size: 15px; letter-spacing: .2px; }
|
|
90
|
+
.brand span { color: var(--muted); font-size: 12px; }
|
|
91
|
+
.head { display:flex; align-items:flex-start; gap:12px; margin-bottom: 14px; }
|
|
92
|
+
.head .bar { flex: 0 0 auto; width: 4px; align-self: stretch; min-height: 38px;
|
|
93
|
+
border-radius: 999px; background: var(--clay); margin-top: 2px; }
|
|
94
|
+
.head .htext { min-width: 0; }
|
|
95
|
+
h1 { font-size: 22px; line-height: 1.25; margin: 0 0 5px; color: var(--ink); font-weight: 700;
|
|
96
|
+
letter-spacing: -.01em; display:flex; align-items:center; flex-wrap:wrap; gap: 9px; }
|
|
97
|
+
h1 code { color: var(--clay); font-family: var(--mono); font-size: .7em; font-weight: 600; }
|
|
98
|
+
.subline { color: var(--muted); font-family: var(--mono); font-size: 12.5px; margin: 0; }
|
|
99
|
+
.ichip { display:inline-flex; align-items:center; gap:6px; vertical-align: middle;
|
|
100
|
+
font-family: var(--mono); font-size: 11.5px; font-weight: 500; color: var(--clay-700);
|
|
101
|
+
background: var(--surface); border: 1px solid var(--border); border-radius: 999px; padding: 3px 11px; }
|
|
91
102
|
.ichip .d { width:6px; height:6px; border-radius:50%; background: var(--clay); }
|
|
92
|
-
.ichip b { color: var(--clay); font-weight: 700; }
|
|
93
|
-
.hint { color: var(--
|
|
94
|
-
.hint code, code.k { color: var(--
|
|
95
|
-
background: var(--
|
|
103
|
+
.ichip b { color: var(--clay-700); font-weight: 700; }
|
|
104
|
+
.hint { color: var(--muted); font-size: 13px; margin: 0 0 18px; line-height: 1.55; }
|
|
105
|
+
.hint code, code.k { color: var(--ink); font-family: var(--mono); font-size: 12px;
|
|
106
|
+
background: var(--surface); border: 1px solid var(--border); border-radius: 5px; padding: 1px 6px; }
|
|
96
107
|
.toolbar { display:flex; flex-wrap:wrap; gap:10px; align-items:center; margin-bottom: 12px; }
|
|
97
108
|
button { font-family: var(--ui); font-size: 13px; font-weight: 500; border: 0; border-radius: 7px;
|
|
98
109
|
padding: 9px 15px; cursor: pointer; transition: background .12s ease, border-color .12s ease, color .12s ease; }
|
|
99
110
|
button.primary { background: var(--clay); color: #fff; }
|
|
100
111
|
button.primary:hover { background: var(--clay-700); }
|
|
101
|
-
button.primary:disabled { background: var(--
|
|
102
|
-
button.ghost { background:
|
|
103
|
-
button.ghost:hover { border-color: var(--
|
|
104
|
-
button.ghost.active { border-color: var(--nodata); color: var(--
|
|
105
|
-
input.setname { background: var(--
|
|
112
|
+
button.primary:disabled { background: var(--border); color: var(--faint); cursor: default; }
|
|
113
|
+
button.ghost { background: var(--surface); color: var(--ink); border: 1px solid var(--border); }
|
|
114
|
+
button.ghost:hover { border-color: var(--clay); color: var(--clay-700); }
|
|
115
|
+
button.ghost.active { border-color: var(--nodata); color: var(--ink); background: rgba(240,173,78,0.18); }
|
|
116
|
+
input.setname { background: var(--surface); color: var(--ink); border: 1px solid var(--border);
|
|
106
117
|
border-radius: 7px; padding: 9px 11px; font-family: var(--ui); font-size: 13px; min-width: 200px; }
|
|
107
|
-
input.setname::placeholder { color: var(--
|
|
118
|
+
input.setname::placeholder { color: var(--faint); }
|
|
108
119
|
input.setname:focus { outline: none; border-color: var(--clay); }
|
|
109
|
-
.summary { margin-left: auto; color: var(--
|
|
110
|
-
.summary b { color: var(--clay); font-weight: 600; }
|
|
120
|
+
.summary { margin-left: auto; color: var(--muted); font-size: 12.5px; font-family: var(--mono); }
|
|
121
|
+
.summary b { color: var(--clay-700); font-weight: 600; }
|
|
111
122
|
.savemsg { margin: 4px 2px 0; font-size: 13px; display: none; }
|
|
112
|
-
.savemsg.ok { display: block; color: var(--accent-green
|
|
123
|
+
.savemsg.ok { display: block; color: var(--accent-green); }
|
|
113
124
|
.savemsg.err { display: block; color: var(--anomaly); }
|
|
114
|
-
.savemsg.info { display: block; color: var(--
|
|
125
|
+
.savemsg.info { display: block; color: var(--muted); }
|
|
115
126
|
.thbar { display:none; flex-wrap:wrap; gap:12px; align-items:center; margin: 0 0 12px;
|
|
116
|
-
padding: 11px 13px; border: 1px solid var(--nodata); border-radius: 9px; background: var(--
|
|
117
|
-
.thbar .thlabel { color: var(--
|
|
118
|
-
|
|
119
|
-
.thbar
|
|
127
|
+
padding: 11px 13px; border: 1px solid var(--nodata); border-radius: 9px; background: var(--surface); }
|
|
128
|
+
.thbar .thlabel { color: var(--clay-700); font-family: var(--mono); font-size: 11px; font-weight: 600;
|
|
129
|
+
letter-spacing: .06em; text-transform: uppercase; }
|
|
130
|
+
.thbar label { color: var(--muted); font-size: 12.5px; display:inline-flex; align-items:center; gap:6px; }
|
|
131
|
+
.thbar select, .thbar input { background: var(--paper); color: var(--ink); border: 1px solid var(--border);
|
|
120
132
|
border-radius: 6px; padding: 6px 8px; font-family: var(--ui); font-size: 12.5px; }
|
|
121
133
|
.thbar input.num { width: 84px; font-family: var(--mono); }
|
|
122
134
|
.thbar input:focus, .thbar select:focus { outline: none; border-color: var(--nodata); }
|
|
123
135
|
.thbar button { padding: 7px 13px; }
|
|
124
136
|
.thbar .thscope { color: var(--faint); font-size: 12px; white-space: nowrap; }
|
|
125
137
|
.thbar .thscope.hint { font-style: italic; }
|
|
126
|
-
.thbar .thscope b { color: var(--
|
|
138
|
+
.thbar .thscope b { color: var(--clay-700); font-weight: 600; font-style: normal; }
|
|
127
139
|
canvas#c { width: 100%; height: clamp(300px, 44vh, 500px); display:block; touch-action: none;
|
|
128
|
-
background: var(--term-
|
|
129
|
-
|
|
130
|
-
.
|
|
140
|
+
background: var(--term-bg); border: 1px solid var(--term-border); border-radius: 12px; cursor: crosshair;
|
|
141
|
+
box-shadow: var(--panel-shadow); }
|
|
142
|
+
.zoombar { display:flex; align-items:center; gap:8px; margin: 12px 0 6px; }
|
|
143
|
+
.rangelbl { margin-left: auto; color: var(--muted); font-size: 12px; font-family: var(--mono); }
|
|
131
144
|
canvas#ov { width: 100%; height: 66px; display:block; touch-action: none;
|
|
132
|
-
background: var(--term-
|
|
133
|
-
|
|
145
|
+
background: var(--term-bg); border: 1px solid var(--term-border); border-radius: 12px; cursor: grab;
|
|
146
|
+
box-shadow: var(--panel-shadow); }
|
|
147
|
+
.navhint { color: var(--faint); font-size: 12px; margin: 8px 2px 0; line-height: 1.55; }
|
|
134
148
|
.empty { color: var(--faint); font-size: 13px; margin: 18px 2px; font-style: italic; }
|
|
135
149
|
ul { list-style: none; margin: 16px 0 0; padding: 0; }
|
|
136
150
|
li { display:flex; align-items:center; gap:11px; padding: 9px 12px; font-size: 13px; flex-wrap: wrap;
|
|
137
|
-
border: 1px solid var(--
|
|
138
|
-
li.sel { border-color: var(--clay); background: rgba(209,91,54,0.
|
|
151
|
+
border: 1px solid var(--border); border-radius: 8px; margin-bottom: 7px; background: var(--surface); }
|
|
152
|
+
li.sel { border-color: var(--clay); background: rgba(209,91,54,0.07); }
|
|
139
153
|
li .dot { width:9px; height:9px; border-radius:50%; background: var(--anomaly); flex: 0 0 auto; }
|
|
140
|
-
li .span { font-family: var(--mono); color: var(--
|
|
141
|
-
li .dur { color: var(--
|
|
142
|
-
li input.desc { flex: 1 1 220px; min-width: 160px; background: var(--
|
|
143
|
-
border: 1px solid var(--
|
|
144
|
-
li input.desc::placeholder { color: var(--
|
|
154
|
+
li .span { font-family: var(--mono); color: var(--ink); }
|
|
155
|
+
li .dur { color: var(--muted); font-size: 12px; }
|
|
156
|
+
li input.desc { flex: 1 1 220px; min-width: 160px; background: var(--paper); color: var(--ink);
|
|
157
|
+
border: 1px solid var(--border); border-radius: 6px; padding: 6px 9px; font-family: var(--ui); font-size: 12.5px; }
|
|
158
|
+
li input.desc::placeholder { color: var(--faint); }
|
|
145
159
|
li input.desc:focus { outline: none; border-color: var(--clay); }
|
|
146
160
|
li button { margin-left: auto; padding: 5px 11px; font-size: 12px; }
|
|
147
161
|
li button.focus { margin-left: auto; }
|
|
148
162
|
li button.focus + button { margin-left: 0; }
|
|
149
|
-
footer { margin-top: 26px; padding-top: 14px; border-top: 1px solid var(--
|
|
163
|
+
footer { margin-top: 26px; padding-top: 14px; border-top: 1px solid var(--border);
|
|
150
164
|
color: var(--faint); font-size: 12px; line-height: 1.6; }
|
|
151
|
-
footer code { font-family: var(--mono); color: var(--
|
|
165
|
+
footer code { font-family: var(--mono); color: var(--muted); }
|
|
152
166
|
</style>
|
|
153
167
|
<div class="shell">
|
|
154
168
|
<div class="brand">
|
|
155
169
|
<svg viewBox="0 0 100 100" aria-hidden="true"><rect x="3" y="3" width="94" height="94" rx="26" fill="#D15B36"/><polyline points="14,62 36,62 50,22 64,62 86,62" fill="none" stroke="#FBF9F3" stroke-width="8" stroke-linecap="round" stroke-linejoin="round"/><circle cx="50" cy="22" r="6.5" fill="#FBF9F3"/></svg>
|
|
156
170
|
<b>detectkit</b><span>· incident labeler</span>
|
|
157
171
|
</div>
|
|
158
|
-
<
|
|
159
|
-
|
|
172
|
+
<div class="head">
|
|
173
|
+
<span class="bar" aria-hidden="true"></span>
|
|
174
|
+
<div class="htext">
|
|
175
|
+
<h1>Label incidents <code>__METRIC__</code><span id="intervalchip" class="ichip"
|
|
176
|
+
title="The metric's sampling interval — the spacing between points, taken straight from the metric."></span></h1>
|
|
177
|
+
<p class="subline">incident labeler · all times UTC</p>
|
|
178
|
+
</div>
|
|
179
|
+
</div>
|
|
160
180
|
<p class="hint">Click-drag across the chart to mark each real incident, add a short description, then
|
|
161
181
|
<b>Export</b>. Save the file into <code class="k">incidents/__METRIC__/</code> and run
|
|
162
182
|
<code class="k">dtk autotune --select __METRIC__ --incidents incidents/__METRIC__/</code></p>
|
|
@@ -10,6 +10,14 @@ A tuned config is an ordinary detectkit config (one chosen detector reusing the
|
|
|
10
10
|
same windowed detectors and `detector_id` identity). The fastest path is the
|
|
11
11
|
**`dtk-autotune`** skill, which runs the whole flow conversationally.
|
|
12
12
|
|
|
13
|
+
> **Prefer to tune by hand?** `dtk tune --select <metric>` is the interactive,
|
|
14
|
+
> human-in-the-loop sibling: it opens a browser view of the real series, lets you
|
|
15
|
+
> turn the knobs and watch the band recompute live, and on **Apply** writes the
|
|
16
|
+
> config back into the metric YAML **in place** (archiving the previous version to
|
|
17
|
+
> `metrics/.history/<metric>/` first). Use `autotune` to search automatically and
|
|
18
|
+
> emit a new file; use `tune` to dial a detector in by eye and commit it. See
|
|
19
|
+
> `cli.md`.
|
|
20
|
+
|
|
13
21
|
## What it searches
|
|
14
22
|
|
|
15
23
|
1. **Seasonality** — greedily builds the best `seasonality_components` grouping
|
|
@@ -57,7 +65,7 @@ ratios to choose.
|
|
|
57
65
|
|
|
58
66
|
```bash
|
|
59
67
|
dtk autotune --select <sel> [--incidents FILE] [--label] [--scoring METRIC] \
|
|
60
|
-
[--from DATE] [--to DATE] [--profile NAME] [--force] [--dry-run]
|
|
68
|
+
[--from DATE] [--to DATE] [--profile NAME] [--force] [--dry-run] [--report]
|
|
61
69
|
```
|
|
62
70
|
|
|
63
71
|
- `--incidents FILE|DIR` — a labels file (below) → **supervised** tuning. May be a
|
|
@@ -80,6 +88,12 @@ dtk autotune --select <sel> [--incidents FILE] [--label] [--scoring METRIC] \
|
|
|
80
88
|
- `--scoring` — `mcc` (default), `f1`, `f_beta`, `balanced_accuracy`, `roc_auc`,
|
|
81
89
|
`pr_auc`. MCC uses the whole confusion matrix and suits rare anomalies.
|
|
82
90
|
- `--dry-run` — run the search but persist nothing and write no config.
|
|
91
|
+
- `--report [PATH]` — after tuning, emit a self-contained **HTML report** for the
|
|
92
|
+
winning config over the training window (values, confidence band, anomalies,
|
|
93
|
+
replayed alerts; offline). Bare `--report` writes
|
|
94
|
+
`reports/<name>__tuned_<id>.html`; pass a directory or a `.html` path to
|
|
95
|
+
override. `dtk run --select <m> --report` produces the same report from the
|
|
96
|
+
live config.
|
|
83
97
|
- Selectors match `dtk run`. Tuning reads loaded datapoints — if empty, run
|
|
84
98
|
`dtk run --select <m> --steps load` (optionally `--from`) first.
|
|
85
99
|
|
|
@@ -215,7 +229,12 @@ LIMIT 5
|
|
|
215
229
|
|
|
216
230
|
## Reading the tuned detector's results
|
|
217
231
|
|
|
218
|
-
|
|
232
|
+
The quickest view is an **HTML report**: add `--report` to the tune (or run
|
|
233
|
+
`dtk run --select <m> --report` later) to get a self-contained file charting the
|
|
234
|
+
winning detector's values, confidence band, flagged anomalies and the alerts it
|
|
235
|
+
would fire, with a period selector — no BI/SQL setup, offline.
|
|
236
|
+
|
|
237
|
+
To query the raw rows instead, join recent datapoints with its detections
|
|
219
238
|
(`value` vs `confidence_lower/upper` vs `is_anomaly`) for the
|
|
220
239
|
`winning_detector_id` — see the per-backend query templates in the
|
|
221
240
|
**`dtk-autotune`** skill and in the visualizing-results guide.
|