checkdrift 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checkdrift-1.0.0/.github/workflows/publish.yml +27 -0
- checkdrift-1.0.0/.gitignore +33 -0
- checkdrift-1.0.0/LICENSE +21 -0
- checkdrift-1.0.0/PKG-INFO +83 -0
- checkdrift-1.0.0/README.md +50 -0
- checkdrift-1.0.0/checkdrift/__init__.py +25 -0
- checkdrift-1.0.0/checkdrift/decorators.py +280 -0
- checkdrift-1.0.0/checkdrift/py.typed +0 -0
- checkdrift-1.0.0/examples/__init__.py +1 -0
- checkdrift-1.0.0/examples/lendingclub/README.md +64 -0
- checkdrift-1.0.0/examples/lendingclub/__init__.py +1 -0
- checkdrift-1.0.0/examples/lendingclub/api.py +63 -0
- checkdrift-1.0.0/examples/lendingclub/baseline.json +16694 -0
- checkdrift-1.0.0/examples/lendingclub/data/lendingclub_2015_sample.csv.gz +0 -0
- checkdrift-1.0.0/examples/lendingclub/data/lendingclub_2017_sample.csv.gz +0 -0
- checkdrift-1.0.0/examples/lendingclub/generate_baseline.py +66 -0
- checkdrift-1.0.0/examples/lendingclub/models.py +116 -0
- checkdrift-1.0.0/examples/lendingclub/test_run.py +52 -0
- checkdrift-1.0.0/pyproject.toml +49 -0
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
publish:
|
|
9
|
+
runs-on: ubuntu-latest
|
|
10
|
+
permissions:
|
|
11
|
+
id-token: write # Required for trusted publishing
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v4
|
|
14
|
+
|
|
15
|
+
- name: Set up Python
|
|
16
|
+
uses: actions/setup-python@v5
|
|
17
|
+
with:
|
|
18
|
+
python-version: "3.14"
|
|
19
|
+
|
|
20
|
+
- name: Install build tools
|
|
21
|
+
run: pip install build
|
|
22
|
+
|
|
23
|
+
- name: Build package
|
|
24
|
+
run: python -m build
|
|
25
|
+
|
|
26
|
+
- name: Publish to PyPI
|
|
27
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# Virtual environments
|
|
2
|
+
.venv/
|
|
3
|
+
venv/
|
|
4
|
+
env/
|
|
5
|
+
|
|
6
|
+
# Python
|
|
7
|
+
__pycache__/
|
|
8
|
+
*.py[cod]
|
|
9
|
+
*$py.class
|
|
10
|
+
*.egg-info/
|
|
11
|
+
*.egg
|
|
12
|
+
dist/
|
|
13
|
+
build/
|
|
14
|
+
.eggs/
|
|
15
|
+
|
|
16
|
+
# Testing
|
|
17
|
+
.pytest_cache/
|
|
18
|
+
.coverage
|
|
19
|
+
htmlcov/
|
|
20
|
+
|
|
21
|
+
# IDE
|
|
22
|
+
.idea/
|
|
23
|
+
.vscode/
|
|
24
|
+
*.swp
|
|
25
|
+
*.swo
|
|
26
|
+
|
|
27
|
+
# OS
|
|
28
|
+
.DS_Store
|
|
29
|
+
Thumbs.db
|
|
30
|
+
|
|
31
|
+
# Local config
|
|
32
|
+
*.local
|
|
33
|
+
.env
|
checkdrift-1.0.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Valentyn Danylchuk
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: checkdrift
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: One-line drift detection for ML APIs. Like Pydantic or a rate limiter, but for data drift.
|
|
5
|
+
Project-URL: Homepage, https://github.com/valdanylchuk/driftdetect
|
|
6
|
+
Project-URL: Repository, https://github.com/valdanylchuk/driftdetect
|
|
7
|
+
Project-URL: Issues, https://github.com/valdanylchuk/driftdetect/issues
|
|
8
|
+
Author-email: Valentyn Danylchuk <val@danylchuk.com>
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: drift-detection,fastapi,machine-learning,mlops,monitoring
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Framework :: FastAPI
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Intended Audience :: Science/Research
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
23
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
24
|
+
Requires-Python: >=3.10
|
|
25
|
+
Requires-Dist: numpy>=1.21.0
|
|
26
|
+
Requires-Dist: scipy>=1.7.0
|
|
27
|
+
Provides-Extra: dev
|
|
28
|
+
Requires-Dist: fastapi>=0.100.0; extra == 'dev'
|
|
29
|
+
Requires-Dist: httpx>=0.24.0; extra == 'dev'
|
|
30
|
+
Requires-Dist: pytest-asyncio>=0.21; extra == 'dev'
|
|
31
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
32
|
+
Description-Content-Type: text/markdown
|
|
33
|
+
|
|
34
|
+
# checkdrift
|
|
35
|
+
|
|
36
|
+
One-line drift detection for ML APIs. Like Pydantic or a rate limiter, but for data drift.
|
|
37
|
+
|
|
38
|
+
Just add @check_drift decorator to your FastAPI endpoint:
|
|
39
|
+
|
|
40
|
+
```python
|
|
41
|
+
@app.post("/predict")
|
|
42
|
+
@check_drift(baseline="baseline.json")
|
|
43
|
+
async def predict(application: LoanApplication):
|
|
44
|
+
return model.predict(application)
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Installation
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
pip install checkdrift
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## What It Does
|
|
54
|
+
|
|
55
|
+
- Maintains a sliding window of recent requests
|
|
56
|
+
- Computes drift metrics every N requests (default: 50)
|
|
57
|
+
- Logs warnings when drift is detected
|
|
58
|
+
- Minimal impact on your endpoint response (about 1ms in my tests)
|
|
59
|
+
|
|
60
|
+
Uses PSI (Population Stability Index) and KS test - industry standards from banking.
|
|
61
|
+
|
|
62
|
+
## Baseline Format
|
|
63
|
+
|
|
64
|
+
```json
|
|
65
|
+
{"distributions": {"feature1": [1.0, 2.0, ...], "feature2": [...]}}
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
See [examples/lendingclub](examples/lendingclub) for a complete example with sample data.
|
|
69
|
+
|
|
70
|
+
## Options
|
|
71
|
+
|
|
72
|
+
```python
|
|
73
|
+
@check_drift(
|
|
74
|
+
baseline="baseline.json",
|
|
75
|
+
window_size=100, # Sliding window size
|
|
76
|
+
check_interval=50, # Check every N requests
|
|
77
|
+
on_drift=my_callback, # Optional callback
|
|
78
|
+
)
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## License
|
|
82
|
+
|
|
83
|
+
MIT
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# checkdrift
|
|
2
|
+
|
|
3
|
+
One-line drift detection for ML APIs. Like Pydantic or a rate limiter, but for data drift.
|
|
4
|
+
|
|
5
|
+
Just add @check_drift decorator to your FastAPI endpoint:
|
|
6
|
+
|
|
7
|
+
```python
|
|
8
|
+
@app.post("/predict")
|
|
9
|
+
@check_drift(baseline="baseline.json")
|
|
10
|
+
async def predict(application: LoanApplication):
|
|
11
|
+
return model.predict(application)
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## Installation
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
pip install checkdrift
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## What It Does
|
|
21
|
+
|
|
22
|
+
- Maintains a sliding window of recent requests
|
|
23
|
+
- Computes drift metrics every N requests (default: 50)
|
|
24
|
+
- Logs warnings when drift is detected
|
|
25
|
+
- Minimal impact on your endpoint response (about 1ms in my tests)
|
|
26
|
+
|
|
27
|
+
Uses PSI (Population Stability Index) and KS test - industry standards from banking.
|
|
28
|
+
|
|
29
|
+
## Baseline Format
|
|
30
|
+
|
|
31
|
+
```json
|
|
32
|
+
{"distributions": {"feature1": [1.0, 2.0, ...], "feature2": [...]}}
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
See [examples/lendingclub](examples/lendingclub) for a complete example with sample data.
|
|
36
|
+
|
|
37
|
+
## Options
|
|
38
|
+
|
|
39
|
+
```python
|
|
40
|
+
@check_drift(
|
|
41
|
+
baseline="baseline.json",
|
|
42
|
+
window_size=100, # Sliding window size
|
|
43
|
+
check_interval=50, # Check every N requests
|
|
44
|
+
on_drift=my_callback, # Optional callback
|
|
45
|
+
)
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## License
|
|
49
|
+
|
|
50
|
+
MIT
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""
|
|
2
|
+
checkdrift - One-line drift detection for ML APIs.
|
|
3
|
+
|
|
4
|
+
"Like Pydantic or a rate limiter, but for data drift."
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .decorators import (
|
|
8
|
+
check_drift,
|
|
9
|
+
DriftMonitor,
|
|
10
|
+
DriftCallback,
|
|
11
|
+
DriftCheckResult,
|
|
12
|
+
DriftSeverity,
|
|
13
|
+
FeatureDriftResult,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
__version__ = "1.0.0"
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"check_drift",
|
|
20
|
+
"DriftMonitor",
|
|
21
|
+
"DriftCallback",
|
|
22
|
+
"DriftCheckResult",
|
|
23
|
+
"DriftSeverity",
|
|
24
|
+
"FeatureDriftResult",
|
|
25
|
+
]
|
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Drift detection decorator for FastAPI endpoints.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import functools
|
|
6
|
+
import json
|
|
7
|
+
import logging
|
|
8
|
+
from collections import deque
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from enum import Enum
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Callable
|
|
13
|
+
|
|
14
|
+
import numpy as np
|
|
15
|
+
from scipy import stats
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger("checkdrift")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class DriftSeverity(Enum):
|
|
21
|
+
"""Drift severity levels."""
|
|
22
|
+
OK = "ok"
|
|
23
|
+
WARNING = "warning" # Moderate drift detected
|
|
24
|
+
ALARM = "alarm" # Significant drift detected
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class FeatureDriftResult:
|
|
29
|
+
"""Drift detection result for a single feature."""
|
|
30
|
+
feature: str
|
|
31
|
+
psi: float
|
|
32
|
+
ks_stat: float
|
|
33
|
+
ks_pvalue: float
|
|
34
|
+
wasserstein: float
|
|
35
|
+
severity: DriftSeverity
|
|
36
|
+
|
|
37
|
+
@property
|
|
38
|
+
def is_drifted(self) -> bool:
|
|
39
|
+
return self.severity != DriftSeverity.OK
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass
|
|
43
|
+
class DriftCheckResult:
|
|
44
|
+
"""Complete drift check result passed to callback."""
|
|
45
|
+
request_count: int
|
|
46
|
+
features: dict[str, FeatureDriftResult] = field(default_factory=dict)
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def severity(self) -> DriftSeverity:
|
|
50
|
+
"""Overall severity (worst across all features)."""
|
|
51
|
+
if any(f.severity == DriftSeverity.ALARM for f in self.features.values()):
|
|
52
|
+
return DriftSeverity.ALARM
|
|
53
|
+
if any(f.severity == DriftSeverity.WARNING for f in self.features.values()):
|
|
54
|
+
return DriftSeverity.WARNING
|
|
55
|
+
return DriftSeverity.OK
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def drifted_features(self) -> list[str]:
|
|
59
|
+
return [name for name, f in self.features.items() if f.is_drifted]
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
# Type alias for callback
|
|
63
|
+
DriftCallback = Callable[[DriftCheckResult], None]
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def compute_psi(reference: np.ndarray, current: np.ndarray, bins: int = 10) -> float:
|
|
67
|
+
"""
|
|
68
|
+
Compute Population Stability Index between two distributions.
|
|
69
|
+
"""
|
|
70
|
+
_, bin_edges = np.histogram(reference, bins=bins)
|
|
71
|
+
|
|
72
|
+
ref_counts, _ = np.histogram(reference, bins=bin_edges)
|
|
73
|
+
curr_counts, _ = np.histogram(current, bins=bin_edges)
|
|
74
|
+
|
|
75
|
+
# Convert to proportions, avoid division by zero
|
|
76
|
+
ref_pct = (ref_counts + 1) / (len(reference) + bins)
|
|
77
|
+
curr_pct = (curr_counts + 1) / (len(current) + bins)
|
|
78
|
+
|
|
79
|
+
# PSI formula: sum((curr - ref) * ln(curr / ref))
|
|
80
|
+
psi = np.sum((curr_pct - ref_pct) * np.log(curr_pct / ref_pct))
|
|
81
|
+
return float(psi)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class DriftMonitor:
|
|
85
|
+
"""
|
|
86
|
+
Monitors feature distributions for drift against a baseline.
|
|
87
|
+
|
|
88
|
+
Maintains a sliding window of recent observations and periodically
|
|
89
|
+
checks for drift using PSI, KS test, and Wasserstein distance.
|
|
90
|
+
|
|
91
|
+
Thresholds (banking industry standard):
|
|
92
|
+
- PSI > 0.2: Significant drift
|
|
93
|
+
- PSI > 0.1: Moderate drift
|
|
94
|
+
- KS p-value < 0.05: Statistically significant shift
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
PSI_WARNING = 0.1
|
|
98
|
+
PSI_ALARM = 0.2
|
|
99
|
+
KS_PVALUE_THRESHOLD = 0.05
|
|
100
|
+
|
|
101
|
+
def __init__(
|
|
102
|
+
self,
|
|
103
|
+
baseline_path: str | Path,
|
|
104
|
+
window_size: int = 100,
|
|
105
|
+
check_interval: int = 50,
|
|
106
|
+
psi_threshold: float = 0.2,
|
|
107
|
+
ks_pvalue_threshold: float = 0.05,
|
|
108
|
+
callback: DriftCallback | None = None,
|
|
109
|
+
):
|
|
110
|
+
self.baseline_path = Path(baseline_path)
|
|
111
|
+
self.window_size = window_size
|
|
112
|
+
self.check_interval = check_interval
|
|
113
|
+
self.psi_threshold = psi_threshold
|
|
114
|
+
self.ks_pvalue_threshold = ks_pvalue_threshold
|
|
115
|
+
self.callback = callback
|
|
116
|
+
|
|
117
|
+
# Load baseline distributions
|
|
118
|
+
with open(self.baseline_path) as f:
|
|
119
|
+
self.baseline = json.load(f)
|
|
120
|
+
|
|
121
|
+
self.features = list(self.baseline["distributions"].keys())
|
|
122
|
+
|
|
123
|
+
# Sliding window per feature
|
|
124
|
+
self.windows: dict[str, deque] = {
|
|
125
|
+
feat: deque(maxlen=window_size) for feat in self.features
|
|
126
|
+
}
|
|
127
|
+
self.request_count = 0
|
|
128
|
+
self.last_check_results: DriftCheckResult | None = None
|
|
129
|
+
|
|
130
|
+
def push(self, values: dict[str, float]) -> None:
|
|
131
|
+
"""Add observation to sliding windows."""
|
|
132
|
+
for feat in self.features:
|
|
133
|
+
if feat in values:
|
|
134
|
+
self.windows[feat].append(values[feat])
|
|
135
|
+
self.request_count += 1
|
|
136
|
+
|
|
137
|
+
if self.request_count % self.check_interval == 0:
|
|
138
|
+
self._check_drift()
|
|
139
|
+
|
|
140
|
+
def _determine_severity(self, psi: float, ks_pvalue: float) -> DriftSeverity:
|
|
141
|
+
"""Determine drift severity combining PSI and KS test."""
|
|
142
|
+
if psi > self.PSI_ALARM:
|
|
143
|
+
return DriftSeverity.ALARM
|
|
144
|
+
if psi > self.PSI_WARNING and ks_pvalue < self.ks_pvalue_threshold:
|
|
145
|
+
return DriftSeverity.ALARM
|
|
146
|
+
if psi > self.PSI_WARNING or ks_pvalue < self.ks_pvalue_threshold:
|
|
147
|
+
return DriftSeverity.WARNING
|
|
148
|
+
return DriftSeverity.OK
|
|
149
|
+
|
|
150
|
+
def _check_drift(self) -> None:
|
|
151
|
+
"""Run drift detection on current windows."""
|
|
152
|
+
result = DriftCheckResult(request_count=self.request_count)
|
|
153
|
+
|
|
154
|
+
for feat in self.features:
|
|
155
|
+
if len(self.windows[feat]) < self.check_interval:
|
|
156
|
+
continue
|
|
157
|
+
|
|
158
|
+
current = np.array(self.windows[feat])
|
|
159
|
+
reference = np.array(self.baseline["distributions"][feat])
|
|
160
|
+
|
|
161
|
+
psi = compute_psi(reference, current)
|
|
162
|
+
ks_stat, ks_pvalue = stats.ks_2samp(reference, current)
|
|
163
|
+
wasserstein = stats.wasserstein_distance(reference, current)
|
|
164
|
+
|
|
165
|
+
severity = self._determine_severity(psi, ks_pvalue)
|
|
166
|
+
|
|
167
|
+
result.features[feat] = FeatureDriftResult(
|
|
168
|
+
feature=feat,
|
|
169
|
+
psi=round(psi, 4),
|
|
170
|
+
ks_stat=round(ks_stat, 4),
|
|
171
|
+
ks_pvalue=round(ks_pvalue, 4),
|
|
172
|
+
wasserstein=round(wasserstein, 4),
|
|
173
|
+
severity=severity,
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
self.last_check_results = result
|
|
177
|
+
|
|
178
|
+
if result.severity != DriftSeverity.OK:
|
|
179
|
+
self._handle_drift(result)
|
|
180
|
+
|
|
181
|
+
def _handle_drift(self, result: DriftCheckResult) -> None:
|
|
182
|
+
"""Log drift or invoke callback."""
|
|
183
|
+
if self.callback:
|
|
184
|
+
try:
|
|
185
|
+
self.callback(result)
|
|
186
|
+
except Exception as e:
|
|
187
|
+
logger.error(f"[checkdrift] Callback error: {e}")
|
|
188
|
+
else:
|
|
189
|
+
drifted = result.drifted_features
|
|
190
|
+
severity = result.severity.value.upper()
|
|
191
|
+
|
|
192
|
+
summary = {
|
|
193
|
+
feat: {
|
|
194
|
+
"psi": r.psi,
|
|
195
|
+
"ks_p": r.ks_pvalue,
|
|
196
|
+
"wasserstein": r.wasserstein,
|
|
197
|
+
}
|
|
198
|
+
for feat, r in result.features.items()
|
|
199
|
+
if r.is_drifted
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
log_msg = (
|
|
203
|
+
f"Drift {severity} in {drifted} "
|
|
204
|
+
f"after {result.request_count} requests: {summary}"
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
if result.severity == DriftSeverity.ALARM:
|
|
208
|
+
logger.warning(log_msg)
|
|
209
|
+
else:
|
|
210
|
+
logger.info(log_msg)
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
# Global registry of monitors
|
|
214
|
+
_monitors: dict[str, DriftMonitor] = {}
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def check_drift(
|
|
218
|
+
baseline: str | Path,
|
|
219
|
+
window_size: int = 100,
|
|
220
|
+
check_interval: int = 50,
|
|
221
|
+
psi_threshold: float = 0.2,
|
|
222
|
+
ks_pvalue_threshold: float = 0.05,
|
|
223
|
+
on_drift: DriftCallback | None = None,
|
|
224
|
+
):
|
|
225
|
+
"""
|
|
226
|
+
Decorator for drift detection on FastAPI endpoints.
|
|
227
|
+
|
|
228
|
+
Monitors incoming request distributions against a baseline using
|
|
229
|
+
industry-standard metrics:
|
|
230
|
+
- PSI (Population Stability Index): >0.2 = significant, >0.1 = moderate
|
|
231
|
+
- KS test p-value: <0.05 = statistically significant shift
|
|
232
|
+
- Wasserstein distance: magnitude of distribution shift
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
baseline: Path to baseline JSON file with reference distributions
|
|
236
|
+
window_size: Number of recent requests to keep in sliding window
|
|
237
|
+
check_interval: Check drift every N requests
|
|
238
|
+
psi_threshold: PSI threshold for drift alert (default 0.2)
|
|
239
|
+
ks_pvalue_threshold: KS test p-value threshold (default 0.05)
|
|
240
|
+
on_drift: Callback invoked when drift is detected
|
|
241
|
+
|
|
242
|
+
Example:
|
|
243
|
+
@app.post("/predict")
|
|
244
|
+
@check_drift(baseline="baseline.json")
|
|
245
|
+
async def predict(application: LoanApplication):
|
|
246
|
+
return model.predict(application)
|
|
247
|
+
"""
|
|
248
|
+
def decorator(func: Callable) -> Callable:
|
|
249
|
+
endpoint_name = func.__name__
|
|
250
|
+
|
|
251
|
+
if endpoint_name not in _monitors:
|
|
252
|
+
_monitors[endpoint_name] = DriftMonitor(
|
|
253
|
+
baseline_path=baseline,
|
|
254
|
+
window_size=window_size,
|
|
255
|
+
check_interval=check_interval,
|
|
256
|
+
psi_threshold=psi_threshold,
|
|
257
|
+
ks_pvalue_threshold=ks_pvalue_threshold,
|
|
258
|
+
callback=on_drift,
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
monitor = _monitors[endpoint_name]
|
|
262
|
+
|
|
263
|
+
@functools.wraps(func)
|
|
264
|
+
async def wrapper(*args, **kwargs):
|
|
265
|
+
for arg in kwargs.values():
|
|
266
|
+
if hasattr(arg, "model_dump"): # Pydantic v2
|
|
267
|
+
data = arg.model_dump()
|
|
268
|
+
features = {
|
|
269
|
+
k: float(v) for k, v in data.items()
|
|
270
|
+
if isinstance(v, (int, float)) and k in monitor.features
|
|
271
|
+
}
|
|
272
|
+
monitor.push(features)
|
|
273
|
+
break
|
|
274
|
+
|
|
275
|
+
return await func(*args, **kwargs)
|
|
276
|
+
|
|
277
|
+
wrapper.drift_monitor = monitor
|
|
278
|
+
return wrapper
|
|
279
|
+
|
|
280
|
+
return decorator
|
|
File without changes
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""checkdrift examples."""
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# LendingClub Example
|
|
2
|
+
|
|
3
|
+
Demonstrates drift detection on a loan triage API using LendingClub data.
|
|
4
|
+
|
|
5
|
+
## Files
|
|
6
|
+
|
|
7
|
+
- `api.py` - FastAPI app with `@check_drift` decorator
|
|
8
|
+
- `models.py` - Pydantic models and mock ML model
|
|
9
|
+
- `test_run.py` - Demo script showing drift detection
|
|
10
|
+
- `generate_baseline.py` - Script to create baseline.json from CSV
|
|
11
|
+
- `data/lendingclub_2015_sample.csv.gz` - Reference data (1% sample, ~4k rows)
|
|
12
|
+
- `data/lendingclub_2017_sample.csv.gz` - Test data with drift (1% sample)
|
|
13
|
+
- `baseline.json` - Pre-generated baseline from 2015 data
|
|
14
|
+
|
|
15
|
+
## Quick Start
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
# Install dependencies
|
|
19
|
+
pip install checkdrift fastapi uvicorn
|
|
20
|
+
|
|
21
|
+
# Run the API
|
|
22
|
+
uvicorn examples.lendingclub.api:app --port 8000
|
|
23
|
+
|
|
24
|
+
# Test endpoint
|
|
25
|
+
curl -X POST http://localhost:8000/triage \
|
|
26
|
+
-H "Content-Type: application/json" \
|
|
27
|
+
-d '{"annual_inc": 75000, "dti": 18.5, "loan_amnt": 15000, "int_rate": 11.99}'
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Demo: Detecting Drift
|
|
31
|
+
|
|
32
|
+
Run the test script to send requests to the API and see drift detection logs:
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
python examples/lendingclub/test_run.py
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
Output:
|
|
39
|
+
|
|
40
|
+
```
|
|
41
|
+
checkdrift - WARNING - Drift ALARM in ['annual_inc', 'dti', 'loan_amnt', 'int_rate'] after 50 requests: {'annual_inc': {'psi': 0.5026, ...}, 'dti': {'psi': 0.1764, ...}, 'loan_amnt': {'psi': 0.1239, ...}, 'int_rate': {'psi': 0.3146, ...}}
|
|
42
|
+
checkdrift - WARNING - Drift ALARM in ['annual_inc', 'dti', 'loan_amnt', 'int_rate'] after 100 requests: ...
|
|
43
|
+
checkdrift - WARNING - Drift ALARM in ['annual_inc', 'dti', 'loan_amnt', 'int_rate'] after 150 requests: ...
|
|
44
|
+
Sending 200 requests to /triage...
|
|
45
|
+
Drift logs will appear below:
|
|
46
|
+
|
|
47
|
+
Done.
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
The 2017 data triggers **ALARM** on multiple features - the distribution shifted between 2015 (baseline) and 2017.
|
|
51
|
+
|
|
52
|
+
## Regenerate Baseline
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
python generate_baseline.py data/lendingclub_2015_sample.csv.gz baseline.json
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Data
|
|
59
|
+
|
|
60
|
+
The sample datasets are 1% random samples of [LendingClub loan data](https://www.kaggle.com/datasets/wordsforthewise/lending-club) for two years:
|
|
61
|
+
- **2015**: Reference distribution (baseline)
|
|
62
|
+
- **2017**: Shows drift over 2 years
|
|
63
|
+
|
|
64
|
+
Features: `annual_inc`, `dti`, `loan_amnt`, `int_rate`
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""LendingClub example for checkdrift."""
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Loan Triage API - Example with Drift Detection
|
|
3
|
+
|
|
4
|
+
Demo service showing how to add drift detection to a FastAPI endpoint.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
uvicorn examples.lendingclub.api:app --port 8000
|
|
8
|
+
|
|
9
|
+
Endpoints:
|
|
10
|
+
POST /triage - Score a loan application
|
|
11
|
+
GET /health - Service health check
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from contextlib import asynccontextmanager
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
from fastapi import FastAPI, HTTPException
|
|
18
|
+
|
|
19
|
+
from checkdrift import check_drift
|
|
20
|
+
|
|
21
|
+
from .models import LoanApplication, TriageResult, LoanTriageModel
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
BASELINE_PATH = Path(__file__).parent / "baseline.json"
|
|
25
|
+
|
|
26
|
+
model: LoanTriageModel | None = None
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@asynccontextmanager
|
|
30
|
+
async def lifespan(app: FastAPI):
|
|
31
|
+
"""Load model on startup."""
|
|
32
|
+
global model
|
|
33
|
+
model = LoanTriageModel(seed=42)
|
|
34
|
+
yield
|
|
35
|
+
model = None
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
app = FastAPI(
|
|
39
|
+
title="Loan Triage API",
|
|
40
|
+
description="Demo API with drift detection using checkdrift.",
|
|
41
|
+
version="1.0.0",
|
|
42
|
+
lifespan=lifespan,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@app.get("/health")
|
|
47
|
+
async def health_check() -> dict:
|
|
48
|
+
"""Service health check."""
|
|
49
|
+
return {"status": "healthy", "model_loaded": model is not None}
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@app.post("/triage", response_model=TriageResult)
|
|
53
|
+
@check_drift(baseline=BASELINE_PATH)
|
|
54
|
+
async def triage_application(application: LoanApplication) -> TriageResult:
|
|
55
|
+
"""
|
|
56
|
+
Score a loan application and assign to review queue.
|
|
57
|
+
|
|
58
|
+
Returns risk score and recommended review tier.
|
|
59
|
+
"""
|
|
60
|
+
if model is None:
|
|
61
|
+
raise HTTPException(status_code=503, detail="Model not loaded")
|
|
62
|
+
|
|
63
|
+
return model.predict(application)
|