canary-ml 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,72 @@
1
+ Metadata-Version: 2.4
2
+ Name: canary-ml
3
+ Version: 1.0.0
4
+ Summary: Lightweight drift and anomaly monitoring for production ML models.
5
+ License: MIT
6
+ Project-URL: Homepage, https://github.com/aitor1717/canary-ml
7
+ Project-URL: Repository, https://github.com/aitor1717/canary-ml
8
+ Requires-Python: >=3.9
9
+ Description-Content-Type: text/markdown
10
+ Requires-Dist: numpy>=1.24
11
+ Requires-Dist: scipy>=1.10
12
+ Requires-Dist: scikit-learn>=1.3
13
+ Requires-Dist: rich>=13.0
14
+ Provides-Extra: keras
15
+ Requires-Dist: tensorflow>=2.13; extra == "keras"
16
+ Provides-Extra: dev
17
+ Requires-Dist: pytest>=7.0; extra == "dev"
18
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
19
+
20
+ # canary-ml
21
+
22
+ Lightweight drift and anomaly monitoring for production ML models.
23
+
24
+ ## Install
25
+
26
+ ```bash
27
+ pip install canary-ml
28
+ ```
29
+
30
+ ## Quickstart
31
+
32
+ ```python
33
+ from canary_ml import ModelMonitor
34
+
35
+ monitor = ModelMonitor(
36
+ model=your_model,
37
+ reference_data=X_train,
38
+ alert_threshold=0.2,
39
+ log_path="./canary_logs"
40
+ )
41
+
42
+ # drop-in replacement — monitoring is a side effect
43
+ predictions = monitor.predict(X_new)
44
+
45
+ report = monitor.get_report()
46
+ print(report.summary())
47
+ # DriftReport | psi=0.41 | features_drifted=3 | anomaly_rate=3.2% | ALERT
48
+
49
+ monitor.serve_dashboard(port=8501)
50
+ ```
51
+
52
+ ## Features
53
+
54
+ - Data Drift Detection — KS test, PSI, chi-square per feature with configurable thresholds
55
+ - Anomaly Detection — Isolation Forest and z-score ensemble on inputs and outputs
56
+ - Zero Latency — monitoring is a side effect; inference path stays unchanged
57
+ - Live Dashboard — zero-dep HTML/JS, ships with the package, no cloud account needed
58
+
59
+ ## Why
60
+
61
+ Most ML monitoring tools require a database, a cloud account, or a separate deployment pipeline. canary-ml wraps your model with a single line of code and starts logging drift metrics immediately — to a local JSON-lines file, with no external dependencies.
62
+
63
+ The dashboard (`monitor.serve_dashboard()`) reads from that file and auto-refreshes every 5 seconds. You can run it on a laptop, in a Docker container, or on any machine with the package installed.
64
+
65
+ ## Roadmap
66
+
67
+ ### v1.1
68
+ - Label-free performance estimation: estimate model accuracy/F1 from confidence score distributions without ground truth labels, using confidence-based performance estimation (CBPE). Alerts when estimated performance degrades, not just when inputs shift.
69
+
70
+ ## License
71
+
72
+ MIT
@@ -0,0 +1,53 @@
1
+ # canary-ml
2
+
3
+ Lightweight drift and anomaly monitoring for production ML models.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ pip install canary-ml
9
+ ```
10
+
11
+ ## Quickstart
12
+
13
+ ```python
14
+ from canary_ml import ModelMonitor
15
+
16
+ monitor = ModelMonitor(
17
+ model=your_model,
18
+ reference_data=X_train,
19
+ alert_threshold=0.2,
20
+ log_path="./canary_logs"
21
+ )
22
+
23
+ # drop-in replacement — monitoring is a side effect
24
+ predictions = monitor.predict(X_new)
25
+
26
+ report = monitor.get_report()
27
+ print(report.summary())
28
+ # DriftReport | psi=0.41 | features_drifted=3 | anomaly_rate=3.2% | ALERT
29
+
30
+ monitor.serve_dashboard(port=8501)
31
+ ```
32
+
33
+ ## Features
34
+
35
+ - Data Drift Detection — KS test, PSI, chi-square per feature with configurable thresholds
36
+ - Anomaly Detection — Isolation Forest and z-score ensemble on inputs and outputs
37
+ - Zero Latency — monitoring is a side effect; inference path stays unchanged
38
+ - Live Dashboard — zero-dep HTML/JS, ships with the package, no cloud account needed
39
+
40
+ ## Why
41
+
42
+ Most ML monitoring tools require a database, a cloud account, or a separate deployment pipeline. canary-ml wraps your model with a single line of code and starts logging drift metrics immediately — to a local JSON-lines file, with no external dependencies.
43
+
44
+ The dashboard (`monitor.serve_dashboard()`) reads from that file and auto-refreshes every 5 seconds. You can run it on a laptop, in a Docker container, or on any machine with the package installed.
45
+
46
+ ## Roadmap
47
+
48
+ ### v1.1
49
+ - Label-free performance estimation: estimate model accuracy/F1 from confidence score distributions without ground truth labels, using confidence-based performance estimation (CBPE). Alerts when estimated performance degrades, not just when inputs shift.
50
+
51
+ ## License
52
+
53
+ MIT
@@ -0,0 +1,5 @@
1
+ from canary_ml.monitor import ModelMonitor
2
+ from canary_ml.report import DriftReport
3
+
4
+ __all__ = ["ModelMonitor", "DriftReport"]
5
+ __version__ = "1.0.0"
@@ -0,0 +1,40 @@
1
+ from __future__ import annotations
2
+
3
+ from rich.console import Console
4
+ from rich.panel import Panel
5
+ from rich.text import Text
6
+
7
+ from canary_ml.report import DriftReport
8
+
9
+ _console = Console()
10
+
11
+
12
+ def check_alert(report: DriftReport, threshold: float) -> bool:
13
+ """Return True when PSI exceeds *threshold*."""
14
+ return report.psi_score > threshold
15
+
16
+
17
+ def format_alert(report: DriftReport) -> None:
18
+ """Print a rich-formatted alert panel to the terminal."""
19
+ drifted = sum(1 for v in report.ks_results.values() if v.get("drifted"))
20
+
21
+ if report.alert_triggered:
22
+ style, title = "bold red", "[bold red]DRIFT ALERT[/bold red]"
23
+ elif report.drift_detected:
24
+ style, title = "bold yellow", "[bold yellow]DRIFT WARNING[/bold yellow]"
25
+ else:
26
+ style, title = "bold green", "[bold green]STABLE[/bold green]"
27
+
28
+ body = Text()
29
+ body.append(f" timestamp ", style="dim")
30
+ body.append(f"{report.timestamp}\n")
31
+ body.append(f" samples ", style="dim")
32
+ body.append(f"{report.n_samples}\n")
33
+ body.append(f" PSI score ", style="dim")
34
+ body.append(f"{report.psi_score:.3f}\n", style=style if report.psi_score > 0.1 else "")
35
+ body.append(f" features drifted ", style="dim")
36
+ body.append(f"{drifted}\n", style=style if drifted > 0 else "")
37
+ body.append(f" anomaly rate ", style="dim")
38
+ body.append(f"{report.anomaly_rate * 100:.1f}%\n", style="yellow" if report.anomaly_rate > 0.02 else "")
39
+
40
+ _console.print(Panel(body, title=title, border_style=style.split()[1]))
@@ -0,0 +1,77 @@
1
+ from __future__ import annotations
2
+
3
+ import numpy as np
4
+ from sklearn.ensemble import IsolationForest
5
+
6
+
7
+ class AnomalyDetector:
8
+ """Ensemble anomaly detector: IsolationForest OR z-score (|z| > 3)."""
9
+
10
+ def __init__(
11
+ self,
12
+ contamination: float = 0.05,
13
+ method: str = "ensemble",
14
+ ) -> None:
15
+ """
16
+ Args:
17
+ contamination: Expected fraction of outliers (passed to IsolationForest).
18
+ method: 'isolation_forest', 'zscore', or 'ensemble'.
19
+ """
20
+ self.contamination = contamination
21
+ self.method = method
22
+ self._isoforest: IsolationForest | None = None
23
+ self._ref_mean: np.ndarray | None = None
24
+ self._ref_std: np.ndarray | None = None
25
+
26
+ def fit(self, X: np.ndarray) -> "AnomalyDetector":
27
+ """Fit on reference (baseline) data."""
28
+ X = np.asarray(X, dtype=float)
29
+ if X.ndim == 1:
30
+ X = X.reshape(-1, 1)
31
+
32
+ self._ref_mean = X.mean(axis=0)
33
+ self._ref_std = X.std(axis=0)
34
+ # Avoid division by zero in score()
35
+ self._ref_std = np.where(self._ref_std == 0, 1.0, self._ref_std)
36
+
37
+ if self.method in ("isolation_forest", "ensemble"):
38
+ self._isoforest = IsolationForest(
39
+ contamination=self.contamination,
40
+ random_state=42,
41
+ )
42
+ self._isoforest.fit(X)
43
+
44
+ return self
45
+
46
+ def score(self, X: np.ndarray) -> dict:
47
+ """Detect anomalies in *X*.
48
+
49
+ Returns:
50
+ anomaly_rate: fraction of samples flagged
51
+ anomaly_mask: bool array, True = anomalous
52
+ scores: raw scores (lower = more anomalous for IsolationForest)
53
+ """
54
+ X = np.asarray(X, dtype=float)
55
+ if X.ndim == 1:
56
+ X = X.reshape(-1, 1)
57
+
58
+ n = len(X)
59
+ mask = np.zeros(n, dtype=bool)
60
+ raw_scores = np.zeros(n, dtype=float)
61
+
62
+ if self.method in ("isolation_forest", "ensemble") and self._isoforest is not None:
63
+ preds = self._isoforest.predict(X) # -1 = anomaly
64
+ iso_mask = preds == -1
65
+ raw_scores = self._isoforest.decision_function(X)
66
+ mask |= iso_mask
67
+
68
+ if self.method in ("zscore", "ensemble") and self._ref_mean is not None:
69
+ z = np.abs((X - self._ref_mean) / self._ref_std)
70
+ zscore_mask = z.max(axis=1) > 3.0
71
+ mask |= zscore_mask
72
+
73
+ return {
74
+ "anomaly_rate": float(mask.sum() / max(n, 1)),
75
+ "anomaly_mask": mask,
76
+ "scores": raw_scores,
77
+ }