canary-ml 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- canary_ml-1.0.0/PKG-INFO +72 -0
- canary_ml-1.0.0/README.md +53 -0
- canary_ml-1.0.0/canary_ml/__init__.py +5 -0
- canary_ml-1.0.0/canary_ml/alerts.py +40 -0
- canary_ml-1.0.0/canary_ml/anomaly.py +77 -0
- canary_ml-1.0.0/canary_ml/dashboard.html +978 -0
- canary_ml-1.0.0/canary_ml/drift.py +162 -0
- canary_ml-1.0.0/canary_ml/monitor.py +190 -0
- canary_ml-1.0.0/canary_ml/report.py +44 -0
- canary_ml-1.0.0/canary_ml/server.py +84 -0
- canary_ml-1.0.0/canary_ml/storage.py +56 -0
- canary_ml-1.0.0/canary_ml.egg-info/PKG-INFO +72 -0
- canary_ml-1.0.0/canary_ml.egg-info/SOURCES.txt +21 -0
- canary_ml-1.0.0/canary_ml.egg-info/dependency_links.txt +1 -0
- canary_ml-1.0.0/canary_ml.egg-info/requires.txt +11 -0
- canary_ml-1.0.0/canary_ml.egg-info/top_level.txt +1 -0
- canary_ml-1.0.0/pyproject.toml +32 -0
- canary_ml-1.0.0/setup.cfg +4 -0
- canary_ml-1.0.0/tests/test_anomaly.py +59 -0
- canary_ml-1.0.0/tests/test_callbacks.py +129 -0
- canary_ml-1.0.0/tests/test_drift.py +59 -0
- canary_ml-1.0.0/tests/test_monitor.py +89 -0
- canary_ml-1.0.0/tests/test_server.py +95 -0
canary_ml-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: canary-ml
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Lightweight drift and anomaly monitoring for production ML models.
|
|
5
|
+
License: MIT
|
|
6
|
+
Project-URL: Homepage, https://github.com/aitor1717/canary-ml
|
|
7
|
+
Project-URL: Repository, https://github.com/aitor1717/canary-ml
|
|
8
|
+
Requires-Python: >=3.9
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
Requires-Dist: numpy>=1.24
|
|
11
|
+
Requires-Dist: scipy>=1.10
|
|
12
|
+
Requires-Dist: scikit-learn>=1.3
|
|
13
|
+
Requires-Dist: rich>=13.0
|
|
14
|
+
Provides-Extra: keras
|
|
15
|
+
Requires-Dist: tensorflow>=2.13; extra == "keras"
|
|
16
|
+
Provides-Extra: dev
|
|
17
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
18
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
19
|
+
|
|
20
|
+
# canary-ml
|
|
21
|
+
|
|
22
|
+
Lightweight drift and anomaly monitoring for production ML models.
|
|
23
|
+
|
|
24
|
+
## Install
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
pip install canary-ml
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Quickstart
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
from canary_ml import ModelMonitor
|
|
34
|
+
|
|
35
|
+
monitor = ModelMonitor(
|
|
36
|
+
model=your_model,
|
|
37
|
+
reference_data=X_train,
|
|
38
|
+
alert_threshold=0.2,
|
|
39
|
+
log_path="./canary_logs"
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
# drop-in replacement — monitoring is a side effect
|
|
43
|
+
predictions = monitor.predict(X_new)
|
|
44
|
+
|
|
45
|
+
report = monitor.get_report()
|
|
46
|
+
print(report.summary())
|
|
47
|
+
# DriftReport | psi=0.41 | features_drifted=3 | anomaly_rate=3.2% | ALERT
|
|
48
|
+
|
|
49
|
+
monitor.serve_dashboard(port=8501)
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## Features
|
|
53
|
+
|
|
54
|
+
- Data Drift Detection — KS test, PSI, chi-square per feature with configurable thresholds
|
|
55
|
+
- Anomaly Detection — Isolation Forest and z-score ensemble on inputs and outputs
|
|
56
|
+
- Zero Latency — monitoring is a side effect; inference path stays unchanged
|
|
57
|
+
- Live Dashboard — zero-dep HTML/JS, ships with the package, no cloud account needed
|
|
58
|
+
|
|
59
|
+
## Why
|
|
60
|
+
|
|
61
|
+
Most ML monitoring tools require a database, a cloud account, or a separate deployment pipeline. canary-ml wraps your model with a single line of code and starts logging drift metrics immediately — to a local JSON-lines file, with no external dependencies.
|
|
62
|
+
|
|
63
|
+
The dashboard (`monitor.serve_dashboard()`) reads from that file and auto-refreshes every 5 seconds. You can run it on a laptop, in a Docker container, or on any machine with the package installed.
|
|
64
|
+
|
|
65
|
+
## Roadmap
|
|
66
|
+
|
|
67
|
+
### v1.1
|
|
68
|
+
- Label-free performance estimation: estimate model accuracy/F1 from confidence score distributions without ground truth labels, using confidence-based performance estimation (CBPE). Alerts when estimated performance degrades, not just when inputs shift.
|
|
69
|
+
|
|
70
|
+
## License
|
|
71
|
+
|
|
72
|
+
MIT
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# canary-ml
|
|
2
|
+
|
|
3
|
+
Lightweight drift and anomaly monitoring for production ML models.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install canary-ml
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Quickstart
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
from canary_ml import ModelMonitor
|
|
15
|
+
|
|
16
|
+
monitor = ModelMonitor(
|
|
17
|
+
model=your_model,
|
|
18
|
+
reference_data=X_train,
|
|
19
|
+
alert_threshold=0.2,
|
|
20
|
+
log_path="./canary_logs"
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
# drop-in replacement — monitoring is a side effect
|
|
24
|
+
predictions = monitor.predict(X_new)
|
|
25
|
+
|
|
26
|
+
report = monitor.get_report()
|
|
27
|
+
print(report.summary())
|
|
28
|
+
# DriftReport | psi=0.41 | features_drifted=3 | anomaly_rate=3.2% | ALERT
|
|
29
|
+
|
|
30
|
+
monitor.serve_dashboard(port=8501)
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Features
|
|
34
|
+
|
|
35
|
+
- Data Drift Detection — KS test, PSI, chi-square per feature with configurable thresholds
|
|
36
|
+
- Anomaly Detection — Isolation Forest and z-score ensemble on inputs and outputs
|
|
37
|
+
- Zero Latency — monitoring is a side effect; inference path stays unchanged
|
|
38
|
+
- Live Dashboard — zero-dep HTML/JS, ships with the package, no cloud account needed
|
|
39
|
+
|
|
40
|
+
## Why
|
|
41
|
+
|
|
42
|
+
Most ML monitoring tools require a database, a cloud account, or a separate deployment pipeline. canary-ml wraps your model with a single line of code and starts logging drift metrics immediately — to a local JSON-lines file, with no external dependencies.
|
|
43
|
+
|
|
44
|
+
The dashboard (`monitor.serve_dashboard()`) reads from that file and auto-refreshes every 5 seconds. You can run it on a laptop, in a Docker container, or on any machine with the package installed.
|
|
45
|
+
|
|
46
|
+
## Roadmap
|
|
47
|
+
|
|
48
|
+
### v1.1
|
|
49
|
+
- Label-free performance estimation: estimate model accuracy/F1 from confidence score distributions without ground truth labels, using confidence-based performance estimation (CBPE). Alerts when estimated performance degrades, not just when inputs shift.
|
|
50
|
+
|
|
51
|
+
## License
|
|
52
|
+
|
|
53
|
+
MIT
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from rich.console import Console
|
|
4
|
+
from rich.panel import Panel
|
|
5
|
+
from rich.text import Text
|
|
6
|
+
|
|
7
|
+
from canary_ml.report import DriftReport
|
|
8
|
+
|
|
9
|
+
_console = Console()
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def check_alert(report: DriftReport, threshold: float) -> bool:
|
|
13
|
+
"""Return True when PSI exceeds *threshold*."""
|
|
14
|
+
return report.psi_score > threshold
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def format_alert(report: DriftReport) -> None:
|
|
18
|
+
"""Print a rich-formatted alert panel to the terminal."""
|
|
19
|
+
drifted = sum(1 for v in report.ks_results.values() if v.get("drifted"))
|
|
20
|
+
|
|
21
|
+
if report.alert_triggered:
|
|
22
|
+
style, title = "bold red", "[bold red]DRIFT ALERT[/bold red]"
|
|
23
|
+
elif report.drift_detected:
|
|
24
|
+
style, title = "bold yellow", "[bold yellow]DRIFT WARNING[/bold yellow]"
|
|
25
|
+
else:
|
|
26
|
+
style, title = "bold green", "[bold green]STABLE[/bold green]"
|
|
27
|
+
|
|
28
|
+
body = Text()
|
|
29
|
+
body.append(f" timestamp ", style="dim")
|
|
30
|
+
body.append(f"{report.timestamp}\n")
|
|
31
|
+
body.append(f" samples ", style="dim")
|
|
32
|
+
body.append(f"{report.n_samples}\n")
|
|
33
|
+
body.append(f" PSI score ", style="dim")
|
|
34
|
+
body.append(f"{report.psi_score:.3f}\n", style=style if report.psi_score > 0.1 else "")
|
|
35
|
+
body.append(f" features drifted ", style="dim")
|
|
36
|
+
body.append(f"{drifted}\n", style=style if drifted > 0 else "")
|
|
37
|
+
body.append(f" anomaly rate ", style="dim")
|
|
38
|
+
body.append(f"{report.anomaly_rate * 100:.1f}%\n", style="yellow" if report.anomaly_rate > 0.02 else "")
|
|
39
|
+
|
|
40
|
+
_console.print(Panel(body, title=title, border_style=style.split()[1]))
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
from sklearn.ensemble import IsolationForest
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class AnomalyDetector:
|
|
8
|
+
"""Ensemble anomaly detector: IsolationForest OR z-score (|z| > 3)."""
|
|
9
|
+
|
|
10
|
+
def __init__(
|
|
11
|
+
self,
|
|
12
|
+
contamination: float = 0.05,
|
|
13
|
+
method: str = "ensemble",
|
|
14
|
+
) -> None:
|
|
15
|
+
"""
|
|
16
|
+
Args:
|
|
17
|
+
contamination: Expected fraction of outliers (passed to IsolationForest).
|
|
18
|
+
method: 'isolation_forest', 'zscore', or 'ensemble'.
|
|
19
|
+
"""
|
|
20
|
+
self.contamination = contamination
|
|
21
|
+
self.method = method
|
|
22
|
+
self._isoforest: IsolationForest | None = None
|
|
23
|
+
self._ref_mean: np.ndarray | None = None
|
|
24
|
+
self._ref_std: np.ndarray | None = None
|
|
25
|
+
|
|
26
|
+
def fit(self, X: np.ndarray) -> "AnomalyDetector":
|
|
27
|
+
"""Fit on reference (baseline) data."""
|
|
28
|
+
X = np.asarray(X, dtype=float)
|
|
29
|
+
if X.ndim == 1:
|
|
30
|
+
X = X.reshape(-1, 1)
|
|
31
|
+
|
|
32
|
+
self._ref_mean = X.mean(axis=0)
|
|
33
|
+
self._ref_std = X.std(axis=0)
|
|
34
|
+
# Avoid division by zero in score()
|
|
35
|
+
self._ref_std = np.where(self._ref_std == 0, 1.0, self._ref_std)
|
|
36
|
+
|
|
37
|
+
if self.method in ("isolation_forest", "ensemble"):
|
|
38
|
+
self._isoforest = IsolationForest(
|
|
39
|
+
contamination=self.contamination,
|
|
40
|
+
random_state=42,
|
|
41
|
+
)
|
|
42
|
+
self._isoforest.fit(X)
|
|
43
|
+
|
|
44
|
+
return self
|
|
45
|
+
|
|
46
|
+
def score(self, X: np.ndarray) -> dict:
|
|
47
|
+
"""Detect anomalies in *X*.
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
anomaly_rate: fraction of samples flagged
|
|
51
|
+
anomaly_mask: bool array, True = anomalous
|
|
52
|
+
scores: raw scores (lower = more anomalous for IsolationForest)
|
|
53
|
+
"""
|
|
54
|
+
X = np.asarray(X, dtype=float)
|
|
55
|
+
if X.ndim == 1:
|
|
56
|
+
X = X.reshape(-1, 1)
|
|
57
|
+
|
|
58
|
+
n = len(X)
|
|
59
|
+
mask = np.zeros(n, dtype=bool)
|
|
60
|
+
raw_scores = np.zeros(n, dtype=float)
|
|
61
|
+
|
|
62
|
+
if self.method in ("isolation_forest", "ensemble") and self._isoforest is not None:
|
|
63
|
+
preds = self._isoforest.predict(X) # -1 = anomaly
|
|
64
|
+
iso_mask = preds == -1
|
|
65
|
+
raw_scores = self._isoforest.decision_function(X)
|
|
66
|
+
mask |= iso_mask
|
|
67
|
+
|
|
68
|
+
if self.method in ("zscore", "ensemble") and self._ref_mean is not None:
|
|
69
|
+
z = np.abs((X - self._ref_mean) / self._ref_std)
|
|
70
|
+
zscore_mask = z.max(axis=1) > 3.0
|
|
71
|
+
mask |= zscore_mask
|
|
72
|
+
|
|
73
|
+
return {
|
|
74
|
+
"anomaly_rate": float(mask.sum() / max(n, 1)),
|
|
75
|
+
"anomaly_mask": mask,
|
|
76
|
+
"scores": raw_scores,
|
|
77
|
+
}
|