checkdrift 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,27 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ jobs:
8
+ publish:
9
+ runs-on: ubuntu-latest
10
+ permissions:
11
+ id-token: write # Required for trusted publishing
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+
15
+ - name: Set up Python
16
+ uses: actions/setup-python@v5
17
+ with:
18
+ python-version: "3.14"
19
+
20
+ - name: Install build tools
21
+ run: pip install build
22
+
23
+ - name: Build package
24
+ run: python -m build
25
+
26
+ - name: Publish to PyPI
27
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,33 @@
1
+ # Virtual environments
2
+ .venv/
3
+ venv/
4
+ env/
5
+
6
+ # Python
7
+ __pycache__/
8
+ *.py[cod]
9
+ *$py.class
10
+ *.egg-info/
11
+ *.egg
12
+ dist/
13
+ build/
14
+ .eggs/
15
+
16
+ # Testing
17
+ .pytest_cache/
18
+ .coverage
19
+ htmlcov/
20
+
21
+ # IDE
22
+ .idea/
23
+ .vscode/
24
+ *.swp
25
+ *.swo
26
+
27
+ # OS
28
+ .DS_Store
29
+ Thumbs.db
30
+
31
+ # Local config
32
+ *.local
33
+ .env
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Valentyn Danylchuk
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,83 @@
1
+ Metadata-Version: 2.4
2
+ Name: checkdrift
3
+ Version: 1.0.0
4
+ Summary: One-line drift detection for ML APIs. Like Pydantic or a rate limiter, but for data drift.
5
+ Project-URL: Homepage, https://github.com/valdanylchuk/driftdetect
6
+ Project-URL: Repository, https://github.com/valdanylchuk/driftdetect
7
+ Project-URL: Issues, https://github.com/valdanylchuk/driftdetect/issues
8
+ Author-email: Valentyn Danylchuk <val@danylchuk.com>
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: drift-detection,fastapi,machine-learning,mlops,monitoring
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Framework :: FastAPI
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: Science/Research
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Programming Language :: Python :: 3.14
23
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
24
+ Requires-Python: >=3.10
25
+ Requires-Dist: numpy>=1.21.0
26
+ Requires-Dist: scipy>=1.7.0
27
+ Provides-Extra: dev
28
+ Requires-Dist: fastapi>=0.100.0; extra == 'dev'
29
+ Requires-Dist: httpx>=0.24.0; extra == 'dev'
30
+ Requires-Dist: pytest-asyncio>=0.21; extra == 'dev'
31
+ Requires-Dist: pytest>=7.0; extra == 'dev'
32
+ Description-Content-Type: text/markdown
33
+
34
+ # checkdrift
35
+
36
+ One-line drift detection for ML APIs. Like Pydantic or a rate limiter, but for data drift.
37
+
38
+ Just add @check_drift decorator to your FastAPI endpoint:
39
+
40
+ ```python
41
+ @app.post("/predict")
42
+ @check_drift(baseline="baseline.json")
43
+ async def predict(application: LoanApplication):
44
+ return model.predict(application)
45
+ ```
46
+
47
+ ## Installation
48
+
49
+ ```bash
50
+ pip install checkdrift
51
+ ```
52
+
53
+ ## What It Does
54
+
55
+ - Maintains a sliding window of recent requests
56
+ - Computes drift metrics every N requests (default: 50)
57
+ - Logs warnings when drift is detected
58
+ - Minimal impact on your endpoint response (about 1ms in my tests)
59
+
60
+ Uses PSI (Population Stability Index) and KS test - industry standards from banking.
61
+
62
+ ## Baseline Format
63
+
64
+ ```json
65
+ {"distributions": {"feature1": [1.0, 2.0, ...], "feature2": [...]}}
66
+ ```
67
+
68
+ See [examples/lendingclub](examples/lendingclub) for a complete example with sample data.
69
+
70
+ ## Options
71
+
72
+ ```python
73
+ @check_drift(
74
+ baseline="baseline.json",
75
+ window_size=100, # Sliding window size
76
+ check_interval=50, # Check every N requests
77
+ on_drift=my_callback, # Optional callback
78
+ )
79
+ ```
80
+
81
+ ## License
82
+
83
+ MIT
@@ -0,0 +1,50 @@
1
+ # checkdrift
2
+
3
+ One-line drift detection for ML APIs. Like Pydantic or a rate limiter, but for data drift.
4
+
5
+ Just add @check_drift decorator to your FastAPI endpoint:
6
+
7
+ ```python
8
+ @app.post("/predict")
9
+ @check_drift(baseline="baseline.json")
10
+ async def predict(application: LoanApplication):
11
+ return model.predict(application)
12
+ ```
13
+
14
+ ## Installation
15
+
16
+ ```bash
17
+ pip install checkdrift
18
+ ```
19
+
20
+ ## What It Does
21
+
22
+ - Maintains a sliding window of recent requests
23
+ - Computes drift metrics every N requests (default: 50)
24
+ - Logs warnings when drift is detected
25
+ - Minimal impact on your endpoint response (about 1ms in my tests)
26
+
27
+ Uses PSI (Population Stability Index) and KS test - industry standards from banking.
28
+
29
+ ## Baseline Format
30
+
31
+ ```json
32
+ {"distributions": {"feature1": [1.0, 2.0, ...], "feature2": [...]}}
33
+ ```
34
+
35
+ See [examples/lendingclub](examples/lendingclub) for a complete example with sample data.
36
+
37
+ ## Options
38
+
39
+ ```python
40
+ @check_drift(
41
+ baseline="baseline.json",
42
+ window_size=100, # Sliding window size
43
+ check_interval=50, # Check every N requests
44
+ on_drift=my_callback, # Optional callback
45
+ )
46
+ ```
47
+
48
+ ## License
49
+
50
+ MIT
@@ -0,0 +1,25 @@
1
+ """
2
+ checkdrift - One-line drift detection for ML APIs.
3
+
4
+ "Like Pydantic or a rate limiter, but for data drift."
5
+ """
6
+
7
+ from .decorators import (
8
+ check_drift,
9
+ DriftMonitor,
10
+ DriftCallback,
11
+ DriftCheckResult,
12
+ DriftSeverity,
13
+ FeatureDriftResult,
14
+ )
15
+
16
+ __version__ = "1.0.0"
17
+
18
+ __all__ = [
19
+ "check_drift",
20
+ "DriftMonitor",
21
+ "DriftCallback",
22
+ "DriftCheckResult",
23
+ "DriftSeverity",
24
+ "FeatureDriftResult",
25
+ ]
@@ -0,0 +1,280 @@
1
+ """
2
+ Drift detection decorator for FastAPI endpoints.
3
+ """
4
+
5
+ import functools
6
+ import json
7
+ import logging
8
+ from collections import deque
9
+ from dataclasses import dataclass, field
10
+ from enum import Enum
11
+ from pathlib import Path
12
+ from typing import Callable
13
+
14
+ import numpy as np
15
+ from scipy import stats
16
+
17
+ logger = logging.getLogger("checkdrift")
18
+
19
+
20
+ class DriftSeverity(Enum):
21
+ """Drift severity levels."""
22
+ OK = "ok"
23
+ WARNING = "warning" # Moderate drift detected
24
+ ALARM = "alarm" # Significant drift detected
25
+
26
+
27
+ @dataclass
28
+ class FeatureDriftResult:
29
+ """Drift detection result for a single feature."""
30
+ feature: str
31
+ psi: float
32
+ ks_stat: float
33
+ ks_pvalue: float
34
+ wasserstein: float
35
+ severity: DriftSeverity
36
+
37
+ @property
38
+ def is_drifted(self) -> bool:
39
+ return self.severity != DriftSeverity.OK
40
+
41
+
42
+ @dataclass
43
+ class DriftCheckResult:
44
+ """Complete drift check result passed to callback."""
45
+ request_count: int
46
+ features: dict[str, FeatureDriftResult] = field(default_factory=dict)
47
+
48
+ @property
49
+ def severity(self) -> DriftSeverity:
50
+ """Overall severity (worst across all features)."""
51
+ if any(f.severity == DriftSeverity.ALARM for f in self.features.values()):
52
+ return DriftSeverity.ALARM
53
+ if any(f.severity == DriftSeverity.WARNING for f in self.features.values()):
54
+ return DriftSeverity.WARNING
55
+ return DriftSeverity.OK
56
+
57
+ @property
58
+ def drifted_features(self) -> list[str]:
59
+ return [name for name, f in self.features.items() if f.is_drifted]
60
+
61
+
62
+ # Type alias for callback
63
+ DriftCallback = Callable[[DriftCheckResult], None]
64
+
65
+
66
+ def compute_psi(reference: np.ndarray, current: np.ndarray, bins: int = 10) -> float:
67
+ """
68
+ Compute Population Stability Index between two distributions.
69
+ """
70
+ _, bin_edges = np.histogram(reference, bins=bins)
71
+
72
+ ref_counts, _ = np.histogram(reference, bins=bin_edges)
73
+ curr_counts, _ = np.histogram(current, bins=bin_edges)
74
+
75
+ # Convert to proportions, avoid division by zero
76
+ ref_pct = (ref_counts + 1) / (len(reference) + bins)
77
+ curr_pct = (curr_counts + 1) / (len(current) + bins)
78
+
79
+ # PSI formula: sum((curr - ref) * ln(curr / ref))
80
+ psi = np.sum((curr_pct - ref_pct) * np.log(curr_pct / ref_pct))
81
+ return float(psi)
82
+
83
+
84
+ class DriftMonitor:
85
+ """
86
+ Monitors feature distributions for drift against a baseline.
87
+
88
+ Maintains a sliding window of recent observations and periodically
89
+ checks for drift using PSI, KS test, and Wasserstein distance.
90
+
91
+ Thresholds (banking industry standard):
92
+ - PSI > 0.2: Significant drift
93
+ - PSI > 0.1: Moderate drift
94
+ - KS p-value < 0.05: Statistically significant shift
95
+ """
96
+
97
+ PSI_WARNING = 0.1
98
+ PSI_ALARM = 0.2
99
+ KS_PVALUE_THRESHOLD = 0.05
100
+
101
+ def __init__(
102
+ self,
103
+ baseline_path: str | Path,
104
+ window_size: int = 100,
105
+ check_interval: int = 50,
106
+ psi_threshold: float = 0.2,
107
+ ks_pvalue_threshold: float = 0.05,
108
+ callback: DriftCallback | None = None,
109
+ ):
110
+ self.baseline_path = Path(baseline_path)
111
+ self.window_size = window_size
112
+ self.check_interval = check_interval
113
+ self.psi_threshold = psi_threshold
114
+ self.ks_pvalue_threshold = ks_pvalue_threshold
115
+ self.callback = callback
116
+
117
+ # Load baseline distributions
118
+ with open(self.baseline_path) as f:
119
+ self.baseline = json.load(f)
120
+
121
+ self.features = list(self.baseline["distributions"].keys())
122
+
123
+ # Sliding window per feature
124
+ self.windows: dict[str, deque] = {
125
+ feat: deque(maxlen=window_size) for feat in self.features
126
+ }
127
+ self.request_count = 0
128
+ self.last_check_results: DriftCheckResult | None = None
129
+
130
+ def push(self, values: dict[str, float]) -> None:
131
+ """Add observation to sliding windows."""
132
+ for feat in self.features:
133
+ if feat in values:
134
+ self.windows[feat].append(values[feat])
135
+ self.request_count += 1
136
+
137
+ if self.request_count % self.check_interval == 0:
138
+ self._check_drift()
139
+
140
+ def _determine_severity(self, psi: float, ks_pvalue: float) -> DriftSeverity:
141
+ """Determine drift severity combining PSI and KS test."""
142
+ if psi > self.PSI_ALARM:
143
+ return DriftSeverity.ALARM
144
+ if psi > self.PSI_WARNING and ks_pvalue < self.ks_pvalue_threshold:
145
+ return DriftSeverity.ALARM
146
+ if psi > self.PSI_WARNING or ks_pvalue < self.ks_pvalue_threshold:
147
+ return DriftSeverity.WARNING
148
+ return DriftSeverity.OK
149
+
150
+ def _check_drift(self) -> None:
151
+ """Run drift detection on current windows."""
152
+ result = DriftCheckResult(request_count=self.request_count)
153
+
154
+ for feat in self.features:
155
+ if len(self.windows[feat]) < self.check_interval:
156
+ continue
157
+
158
+ current = np.array(self.windows[feat])
159
+ reference = np.array(self.baseline["distributions"][feat])
160
+
161
+ psi = compute_psi(reference, current)
162
+ ks_stat, ks_pvalue = stats.ks_2samp(reference, current)
163
+ wasserstein = stats.wasserstein_distance(reference, current)
164
+
165
+ severity = self._determine_severity(psi, ks_pvalue)
166
+
167
+ result.features[feat] = FeatureDriftResult(
168
+ feature=feat,
169
+ psi=round(psi, 4),
170
+ ks_stat=round(ks_stat, 4),
171
+ ks_pvalue=round(ks_pvalue, 4),
172
+ wasserstein=round(wasserstein, 4),
173
+ severity=severity,
174
+ )
175
+
176
+ self.last_check_results = result
177
+
178
+ if result.severity != DriftSeverity.OK:
179
+ self._handle_drift(result)
180
+
181
+ def _handle_drift(self, result: DriftCheckResult) -> None:
182
+ """Log drift or invoke callback."""
183
+ if self.callback:
184
+ try:
185
+ self.callback(result)
186
+ except Exception as e:
187
+ logger.error(f"[checkdrift] Callback error: {e}")
188
+ else:
189
+ drifted = result.drifted_features
190
+ severity = result.severity.value.upper()
191
+
192
+ summary = {
193
+ feat: {
194
+ "psi": r.psi,
195
+ "ks_p": r.ks_pvalue,
196
+ "wasserstein": r.wasserstein,
197
+ }
198
+ for feat, r in result.features.items()
199
+ if r.is_drifted
200
+ }
201
+
202
+ log_msg = (
203
+ f"Drift {severity} in {drifted} "
204
+ f"after {result.request_count} requests: {summary}"
205
+ )
206
+
207
+ if result.severity == DriftSeverity.ALARM:
208
+ logger.warning(log_msg)
209
+ else:
210
+ logger.info(log_msg)
211
+
212
+
213
+ # Global registry of monitors
214
+ _monitors: dict[str, DriftMonitor] = {}
215
+
216
+
217
+ def check_drift(
218
+ baseline: str | Path,
219
+ window_size: int = 100,
220
+ check_interval: int = 50,
221
+ psi_threshold: float = 0.2,
222
+ ks_pvalue_threshold: float = 0.05,
223
+ on_drift: DriftCallback | None = None,
224
+ ):
225
+ """
226
+ Decorator for drift detection on FastAPI endpoints.
227
+
228
+ Monitors incoming request distributions against a baseline using
229
+ industry-standard metrics:
230
+ - PSI (Population Stability Index): >0.2 = significant, >0.1 = moderate
231
+ - KS test p-value: <0.05 = statistically significant shift
232
+ - Wasserstein distance: magnitude of distribution shift
233
+
234
+ Args:
235
+ baseline: Path to baseline JSON file with reference distributions
236
+ window_size: Number of recent requests to keep in sliding window
237
+ check_interval: Check drift every N requests
238
+ psi_threshold: PSI threshold for drift alert (default 0.2)
239
+ ks_pvalue_threshold: KS test p-value threshold (default 0.05)
240
+ on_drift: Callback invoked when drift is detected
241
+
242
+ Example:
243
+ @app.post("/predict")
244
+ @check_drift(baseline="baseline.json")
245
+ async def predict(application: LoanApplication):
246
+ return model.predict(application)
247
+ """
248
+ def decorator(func: Callable) -> Callable:
249
+ endpoint_name = func.__name__
250
+
251
+ if endpoint_name not in _monitors:
252
+ _monitors[endpoint_name] = DriftMonitor(
253
+ baseline_path=baseline,
254
+ window_size=window_size,
255
+ check_interval=check_interval,
256
+ psi_threshold=psi_threshold,
257
+ ks_pvalue_threshold=ks_pvalue_threshold,
258
+ callback=on_drift,
259
+ )
260
+
261
+ monitor = _monitors[endpoint_name]
262
+
263
+ @functools.wraps(func)
264
+ async def wrapper(*args, **kwargs):
265
+ for arg in kwargs.values():
266
+ if hasattr(arg, "model_dump"): # Pydantic v2
267
+ data = arg.model_dump()
268
+ features = {
269
+ k: float(v) for k, v in data.items()
270
+ if isinstance(v, (int, float)) and k in monitor.features
271
+ }
272
+ monitor.push(features)
273
+ break
274
+
275
+ return await func(*args, **kwargs)
276
+
277
+ wrapper.drift_monitor = monitor
278
+ return wrapper
279
+
280
+ return decorator
File without changes
@@ -0,0 +1 @@
1
+ """checkdrift examples."""
@@ -0,0 +1,64 @@
1
+ # LendingClub Example
2
+
3
+ Demonstrates drift detection on a loan triage API using LendingClub data.
4
+
5
+ ## Files
6
+
7
+ - `api.py` - FastAPI app with `@check_drift` decorator
8
+ - `models.py` - Pydantic models and mock ML model
9
+ - `test_run.py` - Demo script showing drift detection
10
+ - `generate_baseline.py` - Script to create baseline.json from CSV
11
+ - `data/lendingclub_2015_sample.csv.gz` - Reference data (1% sample, ~4k rows)
12
+ - `data/lendingclub_2017_sample.csv.gz` - Test data with drift (1% sample)
13
+ - `baseline.json` - Pre-generated baseline from 2015 data
14
+
15
+ ## Quick Start
16
+
17
+ ```bash
18
+ # Install dependencies
19
+ pip install checkdrift fastapi uvicorn
20
+
21
+ # Run the API
22
+ uvicorn examples.lendingclub.api:app --port 8000
23
+
24
+ # Test endpoint
25
+ curl -X POST http://localhost:8000/triage \
26
+ -H "Content-Type: application/json" \
27
+ -d '{"annual_inc": 75000, "dti": 18.5, "loan_amnt": 15000, "int_rate": 11.99}'
28
+ ```
29
+
30
+ ## Demo: Detecting Drift
31
+
32
+ Run the test script to send requests to the API and see drift detection logs:
33
+
34
+ ```bash
35
+ python examples/lendingclub/test_run.py
36
+ ```
37
+
38
+ Output:
39
+
40
+ ```
41
+ checkdrift - WARNING - Drift ALARM in ['annual_inc', 'dti', 'loan_amnt', 'int_rate'] after 50 requests: {'annual_inc': {'psi': 0.5026, ...}, 'dti': {'psi': 0.1764, ...}, 'loan_amnt': {'psi': 0.1239, ...}, 'int_rate': {'psi': 0.3146, ...}}
42
+ checkdrift - WARNING - Drift ALARM in ['annual_inc', 'dti', 'loan_amnt', 'int_rate'] after 100 requests: ...
43
+ checkdrift - WARNING - Drift ALARM in ['annual_inc', 'dti', 'loan_amnt', 'int_rate'] after 150 requests: ...
44
+ Sending 200 requests to /triage...
45
+ Drift logs will appear below:
46
+
47
+ Done.
48
+ ```
49
+
50
+ The 2017 data triggers **ALARM** on multiple features - the distribution shifted between 2015 (baseline) and 2017.
51
+
52
+ ## Regenerate Baseline
53
+
54
+ ```bash
55
+ python generate_baseline.py data/lendingclub_2015_sample.csv.gz baseline.json
56
+ ```
57
+
58
+ ## Data
59
+
60
+ The sample datasets are 1% random samples of [LendingClub loan data](https://www.kaggle.com/datasets/wordsforthewise/lending-club) for two years:
61
+ - **2015**: Reference distribution (baseline)
62
+ - **2017**: Shows drift over 2 years
63
+
64
+ Features: `annual_inc`, `dti`, `loan_amnt`, `int_rate`
@@ -0,0 +1 @@
1
+ """LendingClub example for checkdrift."""
@@ -0,0 +1,63 @@
1
+ """
2
+ Loan Triage API - Example with Drift Detection
3
+
4
+ Demo service showing how to add drift detection to a FastAPI endpoint.
5
+
6
+ Usage:
7
+ uvicorn examples.lendingclub.api:app --port 8000
8
+
9
+ Endpoints:
10
+ POST /triage - Score a loan application
11
+ GET /health - Service health check
12
+ """
13
+
14
+ from contextlib import asynccontextmanager
15
+ from pathlib import Path
16
+
17
+ from fastapi import FastAPI, HTTPException
18
+
19
+ from checkdrift import check_drift
20
+
21
+ from .models import LoanApplication, TriageResult, LoanTriageModel
22
+
23
+
24
+ BASELINE_PATH = Path(__file__).parent / "baseline.json"
25
+
26
+ model: LoanTriageModel | None = None
27
+
28
+
29
+ @asynccontextmanager
30
+ async def lifespan(app: FastAPI):
31
+ """Load model on startup."""
32
+ global model
33
+ model = LoanTriageModel(seed=42)
34
+ yield
35
+ model = None
36
+
37
+
38
+ app = FastAPI(
39
+ title="Loan Triage API",
40
+ description="Demo API with drift detection using checkdrift.",
41
+ version="1.0.0",
42
+ lifespan=lifespan,
43
+ )
44
+
45
+
46
+ @app.get("/health")
47
+ async def health_check() -> dict:
48
+ """Service health check."""
49
+ return {"status": "healthy", "model_loaded": model is not None}
50
+
51
+
52
+ @app.post("/triage", response_model=TriageResult)
53
+ @check_drift(baseline=BASELINE_PATH)
54
+ async def triage_application(application: LoanApplication) -> TriageResult:
55
+ """
56
+ Score a loan application and assign to review queue.
57
+
58
+ Returns risk score and recommended review tier.
59
+ """
60
+ if model is None:
61
+ raise HTTPException(status_code=503, detail="Model not loaded")
62
+
63
+ return model.predict(application)