PyPI - detectkit - Versions diffs - 0.18.0__tar.gz → 0.19.0__tar.gz - Mend

detectkit 0.18.0tar.gz → 0.19.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (125) hide show

{detectkit-0.18.0/detectkit.egg-info → detectkit-0.19.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: detectkit
-Version: 0.18.0
+Version: 0.19.0
 Summary: Metric monitoring with automatic anomaly detection
 Author: detectkit team
 License: MIT

{detectkit-0.18.0 → detectkit-0.19.0}/detectkit/__init__.py RENAMED Viewed

@@ -4,7 +4,7 @@ detectk - Anomaly Detection for Time-Series Metrics
 A Python library for data analysts and engineers to monitor metrics with automatic anomaly detection.
 """
-__version__ = "0.18.0"
+__version__ = "0.19.0"
 from detectkit.core.interval import Interval
 from detectkit.core.models import ColumnDefinition, TableModel

detectkit-0.19.0/detectkit/autotune/__init__.py ADDED Viewed

@@ -0,0 +1,44 @@
+"""detectkit autotune engine.
+Given a metric's loaded series (+ optional labeled incidents), automatically
+chooses the seasonality grouping, detector type, hyperparameters and history
+window, cross-validates the choice, and returns an :class:`AutoTuneResult`.
+Pure and DB-free: the engine operates on the in-memory ``data`` dict and reuses
+the existing ``WindowedStatDetector`` / ``DetectorFactory``. The CLI command
+(``dtk autotune``) handles loading, persistence, config emission and cleanup.
+"""
+from __future__ import annotations
+from detectkit.autotune._base import AutoTuneError, _AutoTuneBase
+from detectkit.autotune._types import ScoringMetric, TuneMode
+from detectkit.autotune.autotuner import AutoTuner, run_autotune_engine
+from detectkit.autotune.config_emitter import compute_run_id, emit_tuned_config
+from detectkit.autotune.html_labeler import render_labeler_html
+from detectkit.autotune.labels import (
+    GroundTruth,
+    IncidentLabels,
+    parse_incident_labels,
+    parse_labels_file,
+)
+from detectkit.autotune.result import AutoTuneResult
+from detectkit.autotune.settings import TuneSettings
+__all__ = [
+    "AutoTuner",
+    "AutoTuneError",
+    "AutoTuneResult",
+    "GroundTruth",
+    "IncidentLabels",
+    "ScoringMetric",
+    "TuneMode",
+    "TuneSettings",
+    "_AutoTuneBase",
+    "compute_run_id",
+    "emit_tuned_config",
+    "parse_incident_labels",
+    "parse_labels_file",
+    "render_labeler_html",
+    "run_autotune_engine",
+]

detectkit-0.19.0/detectkit/autotune/_base.py ADDED Viewed

@@ -0,0 +1,109 @@
+"""Shared engine state + the candidate-evaluation primitive.
+The stages are plain functions (in their own modules) that take an
+:class:`_AutoTuneBase` as their first argument and call ``evaluate`` /
+``log`` on it. This keeps cross-stage calls explicit and type-checkable
+(no cross-mixin attribute access) while still splitting the pipeline into
+focused, <250-line files.
+"""
+from __future__ import annotations
+from collections.abc import Callable
+from typing import Any
+import numpy as np
+from detectkit.autotune._types import (
+    CandidateEval,
+    CVPlan,
+    DecisionEntry,
+    GroupVote,
+)
+from detectkit.autotune.crossval import run_cv
+from detectkit.autotune.labels import GroundTruth
+from detectkit.autotune.settings import TuneSettings
+from detectkit.detectors.factory import DetectorFactory
+class AutoTuneError(RuntimeError):
+    """Raised when a metric cannot be tuned (no data, no viable candidate, …)."""
+class _AutoTuneBase:
+    """Holds the loaded series, labels, settings, decision log + eval cache."""
+    def __init__(
+        self,
+        *,
+        metric_name: str,
+        data: dict[str, np.ndarray],
+        ground_truth: GroundTruth,
+        interval_seconds: int,
+        settings: TuneSettings,
+        on_stage: Callable[[str, str], None] | None = None,
+    ) -> None:
+        self.metric_name = metric_name
+        self.data = data
+        self.ground_truth = ground_truth
+        self.interval_seconds = interval_seconds
+        self.settings = settings
+        self._on_stage = on_stage
+        self.decision_log: list[DecisionEntry] = []
+        self.group_votes: list[GroupVote] = []
+        self.cv_plan: CVPlan | None = None
+        # detector_id -> evaluated candidate (doubles as the dedup cache and
+        # the ledger of every candidate considered during the run)
+        self._evaluated: dict[str, CandidateEval] = {}
+    # ------------------------------------------------------------------
+    # Progress + decision log
+    # ------------------------------------------------------------------
+    def emit(self, stage: str, line: str) -> None:
+        """Stream one progress line to the CLI renderer (if attached)."""
+        if self._on_stage is not None:
+            self._on_stage(stage, line)
+    def log(self, stage: str, message: str, *, emit: bool = True, **fields: Any) -> None:
+        """Record a decision-log entry (and optionally stream it)."""
+        self.decision_log.append(DecisionEntry(stage=stage, message=message, fields=fields))
+        if emit:
+            self.emit(stage, message)
+    # ------------------------------------------------------------------
+    # Candidate evaluation
+    # ------------------------------------------------------------------
+    def evaluate(self, detector_type: str, params: dict[str, Any]) -> CandidateEval:
+        """Build + cross-validate a candidate detector (memoized by detector id)."""
+        full_params = {**self.settings.fixed_params, **params}
+        detector = DetectorFactory.create(detector_type, full_params)
+        detector_id = detector.get_detector_id()
+        cached = self._evaluated.get(detector_id)
+        if cached is not None:
+            return cached
+        if self.cv_plan is None:
+            raise AutoTuneError("CV plan not initialized before evaluation")
+        fold_scores = run_cv(detector, self.data, self.cv_plan, self.ground_truth, self.settings)
+        ev = CandidateEval(
+            detector_type=detector_type,
+            params=full_params,
+            detector_id=detector_id,
+            fold_scores=fold_scores,
+            score=fold_scores.aggregate,
+        )
+        self._evaluated[detector_id] = ev
+        return ev
+    def safe_evaluate(self, detector_type: str, params: dict[str, Any]) -> CandidateEval | None:
+        """Evaluate, returning None on an invalid parameter combination."""
+        try:
+            return self.evaluate(detector_type, params)
+        except ValueError:
+            return None
+    def evaluated_ids(self) -> list[str]:
+        """Every distinct detector id considered during the run (cleanup ledger)."""
+        return list(self._evaluated.keys())

detectkit-0.19.0/detectkit/autotune/_types.py ADDED Viewed

@@ -0,0 +1,87 @@
+"""Small shared types for the autotune engine.
+Kept dependency-free (no imports from other autotune modules) so every stage
+can import these without cycles.
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import Any
+class ScoringMetric(str, Enum):
+    """Optimization target for the grid search.
+    All are computed in pure numpy (see :mod:`detectkit.autotune.scoring`).
+    MCC is the default: it uses all four confusion cells and is robust to the
+    heavy class imbalance of rare anomalies.
+    """
+    MCC = "mcc"
+    F1 = "f1"
+    F_BETA = "f_beta"
+    BALANCED_ACCURACY = "balanced_accuracy"
+    ROC_AUC = "roc_auc"
+    PR_AUC = "pr_auc"
+class TuneMode(str, Enum):
+    """Whether the run optimizes against labels or data statistics."""
+    SUPERVISED = "supervised"
+    UNSUPERVISED = "unsupervised"
+@dataclass
+class CVPlan:
+    """Walk-forward fold layout over a single loaded series.
+    ``fold_bounds`` are ``[lo, hi)`` index ranges into the series; the first
+    ``context_end`` points are reserved as pure context and never scored.
+    """
+    fold_bounds: list[tuple[int, int]]
+    context_end: int
+@dataclass
+class FoldScores:
+    """Per-fold scores plus the stability-penalized aggregate."""
+    per_fold: list[float]
+    aggregate: float
+    stability_penalty: float
+@dataclass
+class CandidateEval:
+    """A single evaluated detector candidate."""
+    detector_type: str
+    params: dict[str, Any]
+    detector_id: str
+    fold_scores: FoldScores
+    score: float
+@dataclass
+class GroupVote:
+    """Per-seasonality-group distribution features + ranked detector suitabilities."""
+    group: list[str]
+    features: dict[str, float]
+    ranked_types: list[tuple[str, float]]
+@dataclass
+class DecisionEntry:
+    """One ordered, human-readable rationale entry for the decision log."""
+    stage: str
+    message: str
+    fields: dict[str, Any] = field(default_factory=dict)
+    def to_dict(self) -> dict[str, Any]:
+        return {"stage": self.stage, "message": self.message, "fields": self.fields}

detectkit-0.19.0/detectkit/autotune/autotuner.py ADDED Viewed

@@ -0,0 +1,194 @@
+"""The autotune orchestrator: runs the stages and assembles the result.
+Pure and DB-free — operates entirely on the in-memory ``data`` dict. The CLI
+command handles loading, persistence, config emission and candidate cleanup.
+"""
+from __future__ import annotations
+from collections.abc import Callable
+from datetime import datetime, timedelta
+from typing import Any
+import numpy as np
+from detectkit.autotune._base import AutoTuneError, _AutoTuneBase
+from detectkit.autotune._types import CandidateEval, TuneMode
+from detectkit.autotune.crossval import build_cv_plan, predictions_from_results
+from detectkit.autotune.detector_select import select_detector_types
+from detectkit.autotune.grid_search import grid_search
+from detectkit.autotune.labels import GroundTruth
+from detectkit.autotune.result import AutoTuneResult
+from detectkit.autotune.scoring import score_predictions
+from detectkit.autotune.seasonality_search import search_seasonality
+from detectkit.autotune.settings import TuneSettings
+from detectkit.autotune.window_select import window_grid
+from detectkit.detectors.factory import DetectorFactory
+_ALERT_WINDOW_GRID = (1, 2, 3, 4, 5)
+def _ts_to_dt(ts64: np.datetime64) -> datetime:
+    ms = int(ts64.astype("datetime64[ms]").astype(np.int64))
+    return datetime(1970, 1, 1) + timedelta(milliseconds=ms)
+def _consecutive(flags: np.ndarray, k: int) -> np.ndarray:
+    """Mark index i where the last *k* grid points are all anomalous."""
+    if k <= 1:
+        return flags.copy()
+    out = flags.copy()
+    for shift in range(1, k):
+        shifted = np.concatenate([np.zeros(shift, dtype=bool), flags[:-shift]])
+        out &= shifted
+    return out
+class AutoTuner(_AutoTuneBase):
+    """Runs the load-free tuning pipeline and returns an :class:`AutoTuneResult`."""
+    def tune(self) -> AutoTuneResult:
+        timestamps = self.data["timestamp"]
+        n = int(len(timestamps))
+        if n == 0:
+            raise AutoTuneError(
+                "no datapoints to tune on — run `dtk run --select <metric> --steps load` first"
+            )
+        grid = window_grid(self)
+        max_window = max([*grid, 100])
+        self.cv_plan = build_cv_plan(n, max_window, self.settings.fold_count)
+        if not self.cv_plan.fold_bounds:
+            raise AutoTuneError(
+                f"not enough datapoints ({n}) for {self.settings.fold_count}-fold "
+                f"cross-validation with a {max_window}-point context window"
+            )
+        gt = self.ground_truth
+        self.log(
+            "labels",
+            f"{gt.n_intervals} interval(s) + {gt.n_points} point(s) → {gt.mode.value} mode "
+            f"({gt.n_positive} labeled grid point(s)); scoring={self.settings.metric.value}",
+            mode=gt.mode.value,
+            n_positive=gt.n_positive,
+        )
+        seasonality = search_seasonality(self)
+        detector_types = select_detector_types(self, seasonality)
+        best = grid_search(self, detector_types, seasonality, grid)
+        if best is None:
+            raise AutoTuneError("no viable detector candidate found for this data")
+        consecutive = self._select_alert_window(best.detector_type, best.params)
+        return self._build_result(seasonality, best, consecutive)
+    # ------------------------------------------------------------------
+    def _select_alert_window(self, detector_type: str, params: dict[str, Any]) -> int | None:
+        """Sweep consecutive_anomalies on labeled incidents (supervised only)."""
+        if self.ground_truth.mode != TuneMode.SUPERVISED:
+            return None
+        detector = DetectorFactory.create(detector_type, params)
+        y_pred, y_score, valid = predictions_from_results(detector.detect(self.data))
+        y_true = self.ground_truth.y_true
+        best_k = 1
+        best_score = float("-inf")
+        for k in _ALERT_WINDOW_GRID:
+            alert = _consecutive(y_pred, k)
+            score = score_predictions(
+                y_true[valid],
+                alert[valid],
+                y_score[valid],
+                self.settings.metric,
+                self.settings.beta,
+            )
+            if score > best_score:
+                best_score, best_k = score, k
+        self.log(
+            "window",
+            f"consecutive_anomalies={best_k} "
+            f"(max {self.settings.metric.value}={best_score:.3f} on labeled incidents)",
+            consecutive_anomalies=best_k,
+        )
+        return best_k
+    def _clean_params(self, params: dict[str, Any]) -> dict[str, Any]:
+        """Drop None/empty values so the emitted config is tidy."""
+        out: dict[str, Any] = {}
+        for key, value in params.items():
+            if value is None:
+                continue
+            if key == "seasonality_components" and not value:
+                continue
+            out[key] = value
+        return out
+    def _build_result(
+        self, seasonality: list | None, best: CandidateEval, consecutive: int | None
+    ) -> AutoTuneResult:
+        timestamps = self.data["timestamp"]
+        training_start = _ts_to_dt(timestamps[0]) if len(timestamps) else None
+        training_end = _ts_to_dt(timestamps[-1]) if len(timestamps) else None
+        gt = self.ground_truth
+        candidates = [
+            {
+                "detector_type": ev.detector_type,
+                "params": self._clean_params(ev.params),
+                "detector_id": ev.detector_id,
+            }
+            for ev in self._evaluated.values()
+        ]
+        group_votes = [
+            {"group": gv.group, "features": gv.features, "ranked_types": gv.ranked_types}
+            for gv in self.group_votes
+        ]
+        return AutoTuneResult(
+            metric_name=self.metric_name,
+            mode=gt.mode.value,
+            scoring_metric=self.settings.metric.value,
+            training_start=training_start,
+            training_end=training_end,
+            interval_seconds=self.interval_seconds,
+            n_points=int(len(timestamps)),
+            labels_summary={
+                "intervals": gt.n_intervals,
+                "points": gt.n_points,
+                "positive_grid_points": gt.n_positive,
+            },
+            chosen_seasonality=seasonality,
+            chosen_detector_type=best.detector_type,
+            chosen_detector_params=self._clean_params(best.params),
+            winning_detector_id=best.detector_id,
+            score=best.score,
+            cv_per_fold=best.fold_scores.per_fold,
+            cv_stability_penalty=best.fold_scores.stability_penalty,
+            consecutive_anomalies=consecutive,
+            candidate_detector_ids=self.evaluated_ids(),
+            candidates=candidates,
+            group_votes=group_votes,
+            decision_log=[entry.to_dict() for entry in self.decision_log],
+        )
+def run_autotune_engine(
+    *,
+    metric_name: str,
+    data: dict[str, np.ndarray],
+    ground_truth: GroundTruth,
+    interval_seconds: int,
+    settings: TuneSettings,
+    on_stage: Callable[[str, str], None] | None = None,
+) -> AutoTuneResult:
+    """Build an :class:`AutoTuner` and run it (the command↔engine entry point)."""
+    tuner = AutoTuner(
+        metric_name=metric_name,
+        data=data,
+        ground_truth=ground_truth,
+        interval_seconds=interval_seconds,
+        settings=settings,
+        on_stage=on_stage,
+    )
+    return tuner.tune()

detectkit-0.19.0/detectkit/autotune/config_emitter.py ADDED Viewed

@@ -0,0 +1,167 @@
+"""Emit the annotated, ready-to-run tuned metric config.
+Builds a new metric YAML named ``<original>__tuned_<run_id>`` led by a
+``#``-comment block that walks the entire decision log, followed by the real
+config (single chosen detector + chosen seasonality + copied query/alerting).
+The body is validated through ``MetricConfig`` before it is ever written, so a
+broken config is never emitted. PyYAML only — no new dependency.
+"""
+from __future__ import annotations
+import hashlib
+from pathlib import Path
+from typing import Any
+import yaml
+from detectkit.autotune.result import AutoTuneResult
+from detectkit.config.metric_config import MetricConfig
+from detectkit.utils.json_utils import json_dumps_sorted
+_RULE = "# " + "─" * 61
+_STAGE_LABELS = {
+    "seasonality": "SEASONALITY",
+    "detector_select": "DETECTOR",
+    "grid_search": "GRID SEARCH",
+    "window": "WINDOW",
+}
+def compute_run_id(result: AutoTuneResult) -> str:
+    """Deterministic 6-hex id from the run's inputs + outputs (no wall-clock)."""
+    payload = {
+        "metric": result.metric_name,
+        "training_start": result.training_start.isoformat() if result.training_start else None,
+        "training_end": result.training_end.isoformat() if result.training_end else None,
+        "labels": result.labels_summary,
+        "detector_type": result.chosen_detector_type,
+        "detector_params": result.chosen_detector_params,
+        "seasonality": result.chosen_seasonality,
+        "scoring_metric": result.scoring_metric,
+    }
+    return hashlib.sha256(json_dumps_sorted(payload).encode()).hexdigest()[:6]
+def _flatten_scalar_seasonality(seasonality: list | None) -> list[str]:
+    if not seasonality:
+        return []
+    cols: list[str] = []
+    for comp in seasonality:
+        for c in [comp] if isinstance(comp, str) else comp:
+            if c not in cols:
+                cols.append(c)
+    return cols
+def _build_alerting(original: MetricConfig, result: AutoTuneResult) -> list[dict] | None:
+    if not original.alerting:
+        return None
+    first = original.alerting[0].model_dump(exclude_none=True, exclude_defaults=True)
+    if result.consecutive_anomalies is not None:
+        first["consecutive_anomalies"] = result.consecutive_anomalies
+    first["min_detectors"] = 1  # single tuned detector now
+    return [first]
+def _build_body(original: MetricConfig, result: AutoTuneResult, new_name: str) -> dict[str, Any]:
+    body: dict[str, Any] = {"name": new_name}
+    if original.description:
+        body["description"] = original.description
+    if original.tags:
+        body["tags"] = original.tags
+    if original.profile:
+        body["profile"] = original.profile
+    if original.query is not None:
+        body["query"] = original.query
+    elif original.query_file is not None:
+        body["query_file"] = str(original.query_file)
+    if original.query_columns is not None:
+        body["query_columns"] = original.query_columns.model_dump(exclude_none=True)
+    body["interval"] = original.interval
+    if result.training_start is not None:
+        body["loading_start_time"] = result.training_start.strftime("%Y-%m-%d %H:%M:%S")
+    elif original.loading_start_time:
+        body["loading_start_time"] = original.loading_start_time
+    body["loading_batch_size"] = original.loading_batch_size
+    scalar_cols = _flatten_scalar_seasonality(result.chosen_seasonality)
+    if scalar_cols:
+        body["seasonality_columns"] = scalar_cols
+    elif original.seasonality_columns:
+        body["seasonality_columns"] = original.seasonality_columns
+    body["detectors"] = [
+        {"type": result.chosen_detector_type, "params": result.chosen_detector_params}
+    ]
+    alerting = _build_alerting(original, result)
+    if alerting is not None:
+        body["alerting"] = alerting
+    body["enabled"] = True
+    return body
+def _build_comments(result: AutoTuneResult, source_label: str, run_id: str) -> str:
+    lines = [
+        _RULE,
+        f"# Auto-tuned by `dtk autotune`  (run_id: {run_id})",
+        f"# Generated from: {source_label}",
+        "#",
+    ]
+    if result.training_start and result.training_end:
+        lines.append(
+            f"# Training period : {result.training_start:%Y-%m-%d %H:%M:%S} → "
+            f"{result.training_end:%Y-%m-%d %H:%M:%S} UTC ({result.n_points:,} points)"
+        )
+    summary = result.labels_summary
+    lines.append(
+        f"# Labels          : {result.mode} — {summary.get('intervals', 0)} interval(s), "
+        f"{summary.get('points', 0)} point(s), "
+        f"{summary.get('positive_grid_points', 0)} labeled grid point(s)"
+    )
+    folds = " ".join(f"{f:.2f}" for f in result.cv_per_fold) or "—"
+    lines.append(
+        f"# Scoring metric  : {result.scoring_metric} = {result.score:.3f}  (CV folds: {folds})"
+    )
+    lines.append("#")
+    for entry in result.decision_log:
+        label = _STAGE_LABELS.get(entry.get("stage", ""))
+        if label:
+            lines.append(f"# {label:<12}: {entry.get('message', '')}")
+    lines.append("#")
+    lines.append(f"# Reproduce: dtk autotune --select {result.metric_name}")
+    lines.append(_RULE)
+    return "\n".join(lines)
+def emit_tuned_config(
+    *,
+    original_config: MetricConfig,
+    original_path: Path,
+    result: AutoTuneResult,
+    project_root: Path,
+    run_id: str | None = None,
+) -> tuple[Path, str, str]:
+    """Return ``(out_path, yaml_text, run_id)`` for the tuned config.
+    Validates the body through ``MetricConfig`` before returning so callers
+    never write an unparseable file. Does not touch the filesystem.
+    """
+    run_id = run_id or compute_run_id(result)
+    new_name = f"{original_config.name}__tuned_{run_id}"
+    body = _build_body(original_config, result, new_name)
+    # Fail fast on an invalid body rather than writing a broken config.
+    MetricConfig.model_validate(body)
+    try:
+        source_label = str(original_path.relative_to(project_root))
+    except ValueError:
+        source_label = original_path.name
+    comments = _build_comments(result, source_label, run_id)
+    yaml_body = yaml.safe_dump(body, sort_keys=False, default_flow_style=False, allow_unicode=True)
+    text = f"{comments}\n{yaml_body}"
+    out_path = project_root / "metrics" / f"{original_path.stem}__tuned_{run_id}.yml"
+    return out_path, text, run_id

detectkit 0.18.0__tar.gz → 0.19.0__tar.gz

detectkit 0.18.0tar.gz → 0.19.0tar.gz