driftwatch 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,297 @@
1
+ """FastAPI integration for DriftWatch.
2
+
3
+ Provides middleware and endpoints for automatic drift monitoring
4
+ on ML inference APIs.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import asyncio
10
+ import threading
11
+ from collections import deque
12
+ from dataclasses import dataclass, field
13
+ from datetime import datetime, timezone
14
+ from typing import TYPE_CHECKING, Any, Callable, cast
15
+
16
+ import pandas as pd
17
+ from starlette.middleware.base import BaseHTTPMiddleware
18
+
19
+ if TYPE_CHECKING:
20
+ from starlette.requests import Request
21
+ from starlette.responses import Response
22
+ from starlette.types import ASGIApp
23
+
24
+ from driftwatch import Monitor
25
+ from driftwatch.core.report import DriftReport
26
+
27
+
28
+ @dataclass
29
+ class DriftState:
30
+ """Thread-safe state for drift monitoring."""
31
+
32
+ samples: deque[dict[str, Any]] = field(default_factory=lambda: deque(maxlen=10000))
33
+ predictions: deque[dict[str, Any]] = field(
34
+ default_factory=lambda: deque(maxlen=10000)
35
+ )
36
+ last_report: DriftReport | None = None
37
+ last_check_time: datetime | None = None
38
+ request_count: int = 0
39
+ lock: threading.Lock = field(default_factory=threading.Lock)
40
+
41
+ def add_prediction(self, prediction: dict[str, Any]) -> None:
42
+ """Add a prediction to the buffer."""
43
+ with self.lock:
44
+ self.predictions.append(prediction)
45
+
46
+ def add_sample(self, sample: dict[str, Any]) -> None:
47
+ """Add a sample to the buffer."""
48
+ with self.lock:
49
+ self.samples.append(sample)
50
+ self.request_count += 1
51
+
52
+ def get_samples_df(self) -> pd.DataFrame:
53
+ """Get samples as DataFrame."""
54
+ with self.lock:
55
+ return pd.DataFrame(list(self.samples))
56
+
57
+ def update_report(self, report: DriftReport) -> None:
58
+ """Update the last drift report."""
59
+ with self.lock:
60
+ self.last_report = report
61
+ self.last_check_time = datetime.now(timezone.utc)
62
+
63
+
64
+ class DriftMiddleware(BaseHTTPMiddleware):
65
+ """
66
+ FastAPI middleware for automatic drift monitoring.
67
+
68
+ Collects input features from requests and runs drift detection
69
+ on a configurable schedule.
70
+
71
+ Args:
72
+ app: The ASGI application
73
+ monitor: DriftWatch Monitor instance with reference data
74
+ feature_extractor: Function to extract features from request body.
75
+ Defaults to returning the entire request body as features.
76
+ check_interval: Number of requests between drift checks.
77
+ Set to 0 to disable automatic checks.
78
+ min_samples: Minimum samples required before running drift check.
79
+ enabled: Whether drift collection is enabled.
80
+
81
+ Example:
82
+ ```python
83
+ from fastapi import FastAPI
84
+ from driftwatch import Monitor
85
+ from driftwatch.integrations.fastapi import DriftMiddleware
86
+
87
+ monitor = Monitor(reference_data=train_df)
88
+ app = FastAPI()
89
+
90
+ app.add_middleware(
91
+ DriftMiddleware,
92
+ monitor=monitor,
93
+ check_interval=100,
94
+ )
95
+ ```
96
+ """
97
+
98
+ def __init__(
99
+ self,
100
+ app: ASGIApp,
101
+ monitor: Monitor,
102
+ feature_extractor: Callable[[dict[str, Any]], dict[str, Any]] | None = None,
103
+ prediction_extractor: Callable[[dict[str, Any]], dict[str, Any]] | None = None,
104
+ check_interval: int = 100,
105
+ min_samples: int = 50,
106
+ buffer_size: int = 10000,
107
+ enabled: bool = True,
108
+ ) -> None:
109
+ super().__init__(app)
110
+ self.monitor = monitor
111
+ self.feature_extractor = feature_extractor or (lambda x: x)
112
+ self.prediction_extractor = prediction_extractor
113
+ self.check_interval = check_interval
114
+ self.min_samples = min_samples
115
+ self.buffer_size = buffer_size
116
+ self.enabled = enabled
117
+ self.state = DriftState(
118
+ samples=deque(maxlen=buffer_size),
119
+ predictions=deque(maxlen=buffer_size),
120
+ )
121
+ self._background_tasks: set[asyncio.Task[None]] = set()
122
+
123
+ async def dispatch(self, request: Request, call_next: Callable) -> Response:
124
+ """Process request and collect features for drift monitoring."""
125
+ if not self.enabled:
126
+ return cast("Response", await call_next(request))
127
+
128
+ # Skip non-POST requests and internal endpoints
129
+ if request.method != "POST" or request.url.path.startswith("/drift"):
130
+ return cast("Response", await call_next(request))
131
+
132
+ # Try to extract features from request body
133
+ try:
134
+ body = await request.json()
135
+ features = self.feature_extractor(body)
136
+
137
+ if features and isinstance(features, dict):
138
+ # Filter to only monitored features
139
+ monitored = {
140
+ k: v
141
+ for k, v in features.items()
142
+ if k in self.monitor.monitored_features
143
+ }
144
+ if monitored:
145
+ self.state.add_sample(monitored)
146
+
147
+ except Exception:
148
+ # Don't fail the request if feature extraction fails
149
+ pass
150
+
151
+ # Process the request
152
+ response = cast("Response", await call_next(request))
153
+
154
+ # Try to extract predictions from response
155
+ if self.prediction_extractor is not None:
156
+ try:
157
+ # For JSONResponse, we can access the body
158
+ if hasattr(response, "body"):
159
+ import json
160
+
161
+ response_body = json.loads(response.body)
162
+ prediction = self.prediction_extractor(response_body)
163
+ if prediction and isinstance(prediction, dict):
164
+ self.state.add_prediction(prediction)
165
+ except Exception:
166
+ pass
167
+
168
+ # Check if we should run drift detection
169
+ if self._should_check_drift():
170
+ task = asyncio.create_task(self._run_drift_check())
171
+ self._background_tasks.add(task)
172
+ task.add_done_callback(self._background_tasks.discard)
173
+
174
+ return response
175
+
176
+ def _should_check_drift(self) -> bool:
177
+ """Determine if drift check should run."""
178
+ if self.check_interval <= 0:
179
+ return False
180
+ if len(self.state.samples) < self.min_samples:
181
+ return False
182
+ return self.state.request_count % self.check_interval == 0
183
+
184
+ async def _run_drift_check(self) -> None:
185
+ """Run drift detection in background."""
186
+ try:
187
+ production_df = self.state.get_samples_df()
188
+ if production_df.empty:
189
+ return
190
+
191
+ # Run check in thread pool to avoid blocking
192
+ loop = asyncio.get_event_loop()
193
+ report = await loop.run_in_executor(None, self.monitor.check, production_df)
194
+ self.state.update_report(report)
195
+
196
+ except Exception:
197
+ # Log error in production, but don't crash
198
+ pass
199
+
200
+
201
+ def add_drift_routes(app: Any, middleware: DriftMiddleware) -> None:
202
+ """
203
+ Add drift monitoring endpoints to a FastAPI app.
204
+
205
+ Endpoints:
206
+ GET /drift/status - Current drift status
207
+ GET /drift/report - Full drift report
208
+ GET /drift/health - Health check
209
+
210
+ Args:
211
+ app: FastAPI application instance
212
+ middleware: DriftMiddleware instance
213
+ """
214
+ from fastapi import FastAPI
215
+
216
+ if not isinstance(app, FastAPI):
217
+ raise TypeError("app must be a FastAPI instance")
218
+
219
+ @app.get("/drift/status")
220
+ async def drift_status() -> dict[str, Any]:
221
+ """Get current drift status."""
222
+ state = middleware.state
223
+
224
+ if state.last_report is None:
225
+ return {
226
+ "status": "NO_DATA",
227
+ "message": "No drift check has been performed yet",
228
+ "samples_collected": len(state.samples),
229
+ "min_samples_required": middleware.min_samples,
230
+ }
231
+
232
+ return {
233
+ "status": state.last_report.status.value,
234
+ "has_drift": state.last_report.has_drift(),
235
+ "drift_ratio": state.last_report.drift_ratio(),
236
+ "drifted_features": state.last_report.drifted_features(),
237
+ "last_check": (
238
+ state.last_check_time.isoformat() if state.last_check_time else None
239
+ ),
240
+ "samples_collected": len(state.samples),
241
+ "total_requests": state.request_count,
242
+ }
243
+
244
+ @app.get("/drift/report")
245
+ async def drift_report() -> dict[str, Any]:
246
+ """Get full drift report."""
247
+ state = middleware.state
248
+
249
+ if state.last_report is None:
250
+ return {
251
+ "error": "No drift report available",
252
+ "samples_collected": len(state.samples),
253
+ }
254
+
255
+ return state.last_report.to_dict()
256
+
257
+ @app.get("/drift/health")
258
+ async def drift_health() -> dict[str, Any]:
259
+ """Health check endpoint."""
260
+ state = middleware.state
261
+
262
+ return {
263
+ "status": "healthy",
264
+ "monitoring_enabled": middleware.enabled,
265
+ "features_monitored": middleware.monitor.monitored_features,
266
+ "samples_in_buffer": len(state.samples),
267
+ "check_interval": middleware.check_interval,
268
+ }
269
+
270
+ @app.post("/drift/check")
271
+ async def trigger_drift_check() -> dict[str, Any]:
272
+ """Manually trigger a drift check."""
273
+ production_df = middleware.state.get_samples_df()
274
+
275
+ if len(production_df) < middleware.min_samples:
276
+ return {
277
+ "error": f"Not enough samples. Need {middleware.min_samples}, have {len(production_df)}",
278
+ }
279
+
280
+ report = middleware.monitor.check(production_df)
281
+ middleware.state.update_report(report)
282
+
283
+ return {
284
+ "status": report.status.value,
285
+ "has_drift": report.has_drift(),
286
+ "drifted_features": report.drifted_features(),
287
+ "checked_at": datetime.now(timezone.utc).isoformat(),
288
+ }
289
+
290
+ @app.post("/drift/reset")
291
+ async def reset_samples() -> dict[str, Any]:
292
+ """Reset collected samples."""
293
+ with middleware.state.lock:
294
+ middleware.state.samples.clear()
295
+ middleware.state.request_count = 0
296
+
297
+ return {"message": "Samples reset successfully"}
driftwatch/py.typed ADDED
@@ -0,0 +1 @@
1
+ """Type marker for PEP 561 compliance."""
@@ -0,0 +1 @@
1
+ """Simulation module for generating synthetic drift."""
@@ -0,0 +1,144 @@
1
+ Metadata-Version: 2.4
2
+ Name: driftwatch
3
+ Version: 0.2.0
4
+ Summary: Lightweight ML drift monitoring, built for real-world pipelines
5
+ Author-email: Your Name <your.email@example.com>
6
+ Maintainer-email: Your Name <your.email@example.com>
7
+ License: MIT
8
+ Project-URL: Homepage, https://github.com/YOUR_USERNAME/driftwatch
9
+ Project-URL: Documentation, https://driftwatch.readthedocs.io
10
+ Project-URL: Repository, https://github.com/YOUR_USERNAME/driftwatch
11
+ Project-URL: Issues, https://github.com/YOUR_USERNAME/driftwatch/issues
12
+ Project-URL: Changelog, https://github.com/YOUR_USERNAME/driftwatch/blob/main/CHANGELOG.md
13
+ Keywords: machine-learning,mlops,drift-detection,monitoring,data-quality,model-monitoring
14
+ Classifier: Development Status :: 3 - Alpha
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: Intended Audience :: Science/Research
17
+ Classifier: License :: OSI Approved :: MIT License
18
+ Classifier: Operating System :: OS Independent
19
+ Classifier: Programming Language :: Python :: 3
20
+ Classifier: Programming Language :: Python :: 3.9
21
+ Classifier: Programming Language :: Python :: 3.10
22
+ Classifier: Programming Language :: Python :: 3.11
23
+ Classifier: Programming Language :: Python :: 3.12
24
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
25
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
26
+ Classifier: Typing :: Typed
27
+ Requires-Python: >=3.9
28
+ Description-Content-Type: text/markdown
29
+ License-File: LICENSE
30
+ Requires-Dist: numpy>=1.21.0
31
+ Requires-Dist: pandas>=1.3.0
32
+ Requires-Dist: scipy>=1.7.0
33
+ Requires-Dist: pydantic>=2.0.0
34
+ Provides-Extra: dev
35
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
36
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
37
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
38
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
39
+ Requires-Dist: black>=23.0.0; extra == "dev"
40
+ Requires-Dist: mypy>=1.0.0; extra == "dev"
41
+ Requires-Dist: pre-commit>=3.0.0; extra == "dev"
42
+ Requires-Dist: pandas-stubs>=2.0.0; extra == "dev"
43
+ Provides-Extra: cli
44
+ Requires-Dist: typer>=0.9.0; extra == "cli"
45
+ Requires-Dist: rich>=13.0.0; extra == "cli"
46
+ Provides-Extra: fastapi
47
+ Requires-Dist: fastapi>=0.100.0; extra == "fastapi"
48
+ Requires-Dist: uvicorn>=0.23.0; extra == "fastapi"
49
+ Provides-Extra: mlflow
50
+ Requires-Dist: mlflow>=2.0.0; extra == "mlflow"
51
+ Provides-Extra: alerting
52
+ Requires-Dist: httpx>=0.24.0; extra == "alerting"
53
+ Requires-Dist: aiosmtplib>=2.0.0; extra == "alerting"
54
+ Provides-Extra: all
55
+ Requires-Dist: driftwatch[alerting,cli,fastapi,mlflow]; extra == "all"
56
+ Provides-Extra: docs
57
+ Requires-Dist: mkdocs>=1.5.0; extra == "docs"
58
+ Requires-Dist: mkdocs-material>=9.0.0; extra == "docs"
59
+ Requires-Dist: mkdocstrings[python]>=0.23.0; extra == "docs"
60
+ Dynamic: license-file
61
+
62
+ # DriftWatch
63
+
64
+ <div align="center">
65
+
66
+ **Lightweight ML drift monitoring, built for real-world pipelines**
67
+
68
+ [![Documentation](https://img.shields.io/badge/docs-vincentcotella.github.io%2FDriftWatch-blue.svg)](https://vincentcotella.github.io/DriftWatch/)
69
+ [![CI](https://github.com/VincentCotella/DriftWatch/actions/workflows/ci.yml/badge.svg)](https://github.com/VincentCotella/DriftWatch/actions/workflows/ci.yml)
70
+ [![PyPI version](https://badge.fury.io/py/driftwatch.svg)](https://pypi.org/project/driftwatch/)
71
+ [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
72
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
73
+
74
+ </div>
75
+
76
+ ---
77
+
78
+ ## 📖 Documentation
79
+
80
+ **Read the full documentation here:** [vincentcotella.github.io/DriftWatch](https://vincentcotella.github.io/DriftWatch/)
81
+
82
+ ## 🚀 Features
83
+
84
+ - **Simple API**: Detect drift in 3 lines of code.
85
+ - **Multiple Detectors**: **PSI**, **KS-Test**, **Wasserstein Distance**, **Chi-Squared**.
86
+ - **Production-Ready**:
87
+ - ⚡ **FastAPI Integration** (Middleware included).
88
+ - 🔔 **Slack Alerts** built-in.
89
+ - 🛠️ **CLI** for batch processing.
90
+ - **Lightweight**: Minimal dependencies (`numpy`, `pandas`, `scipy`).
91
+ - **Type-Safe**: 100% typed code with `mypy` support.
92
+
93
+ ## 📦 Installation
94
+
95
+ ```bash
96
+ pip install driftwatch
97
+ ```
98
+
99
+ For extras (CLI, FastAPI, Alerting):
100
+ ```bash
101
+ pip install driftwatch[all]
102
+ ```
103
+
104
+ ## ⚡ Quick Start
105
+
106
+ ```python
107
+ from driftwatch import Monitor
108
+ import pandas as pd
109
+
110
+ # 1. Initialize monitor with reference data (e.g., training set)
111
+ monitor = Monitor(reference_data=pd.read_parquet("train.parquet"))
112
+
113
+ # 2. Check production data for drift
114
+ report = monitor.check(pd.read_parquet("production.parquet"))
115
+
116
+ # 3. Act on results
117
+ if report.has_drift():
118
+ print(f"⚠️ Drift detected! Ratio: {report.drift_ratio():.1%}")
119
+ print(f"Drifted features: {report.drifted_features()}")
120
+ else:
121
+ print("✅ All systems normal.")
122
+ ```
123
+
124
+ ## 🛠️ Usage Scenarios
125
+
126
+ | Scenario | Solution |
127
+ |----------|----------|
128
+ | **Real-time API** | Use `DriftMiddleware` in FastAPI to monitor every request. |
129
+ | **Batch Job** | Use `driftwatch check` CLI in your Airflow/Cron jobs. |
130
+ | **CI/CD** | Block deployments if validation data drifts from training data. |
131
+ | **Alerting** | Send Slack notifications automatically when drift is critical. |
132
+
133
+ ## 🤝 Contributing
134
+
135
+ We welcome contributions! Please see our [Contributing Guide](https://vincentcotella.github.io/DriftWatch/contributing/) for details.
136
+
137
+ 1. Fork the repo.
138
+ 2. Install dev dependencies: `pip install -e ".[dev,all]"`
139
+ 3. Run tests: `pytest`
140
+ 4. Submit a PR!
141
+
142
+ ## 📄 License
143
+
144
+ MIT © [Vincent Cotella](https://github.com/VincentCotella)
@@ -0,0 +1,22 @@
1
+ driftwatch/__init__.py,sha256=eANf-l8xzaWO10uuIaSxAYnYGm2sYXsn5b4Ew8MoC14,593
2
+ driftwatch/py.typed,sha256=VeLLojGNPosEVWplpI3jgpXMfmmOHaAzQGyFJq8YfjM,42
3
+ driftwatch/cli/__init__.py,sha256=0pMY4GjZQK_23O4R3jgOhDC-ry_rGNU9sTkGlJp7jXI,101
4
+ driftwatch/cli/main.py,sha256=QV3IYgLJL5iOM0XBHnJ_tDhnPexYyLtPsI5o_e-Oeo4,8118
5
+ driftwatch/core/__init__.py,sha256=_frvUvPNa1zXJWv7mNR5Lnk6soNGmN387dpDZiI1EHU,201
6
+ driftwatch/core/monitor.py,sha256=BGIP8mHx3bPJ0UZf7g6I8cfOulRDoYrK4ntj6vnaYW0,5208
7
+ driftwatch/core/report.py,sha256=hHVJGfP4B4XabkcpS9TpHJpw3bTzfSCFVuCvaiJcmxo,5059
8
+ driftwatch/detectors/__init__.py,sha256=sLo0A7W6ik8XXpbVpqf150DieTZNQ3o5gbIm_gxGSw4,217
9
+ driftwatch/detectors/base.py,sha256=CAJHePSwQdaAEvC8qVYD3bL39sInY0c-_XIfifOxnq0,1616
10
+ driftwatch/detectors/categorical.py,sha256=9NHsXveNpgINJUJNoPtQy-iT9BxMgr3ji5bfLfd9_Gw,4232
11
+ driftwatch/detectors/numerical.py,sha256=OrDN35GRfZ6pevffSGs-lh32Uts56dLMJrO1x0cOwLs,5921
12
+ driftwatch/detectors/registry.py,sha256=JltHokVjlHI_Sz1eDonLWH0cQDfdScz2sK5PNkBkXiE,2084
13
+ driftwatch/integrations/__init__.py,sha256=8w5eie9Ci_1EsZAmOBBsU5PJFUIg52RCqXHWQK2FK3o,183
14
+ driftwatch/integrations/alerting.py,sha256=OzRuP_KVu7RQtBSvJnL6QyxbGyswp6qBHk9Xzgd9a-A,6468
15
+ driftwatch/integrations/fastapi.py,sha256=pgpdhVg0JYges9FURACJzwbkzwQ6MQ4atuILhphoPyM,10197
16
+ driftwatch/simulation/__init__.py,sha256=gxIK5yj9YcwtRMFCXBRhVJ6dA_eEJ0gSAQq5YPy3KUs,56
17
+ driftwatch-0.2.0.dist-info/licenses/LICENSE,sha256=J161VpWy8YHSX997h8UHNVgOG8Til1UqK6ui-evf5l4,1080
18
+ driftwatch-0.2.0.dist-info/METADATA,sha256=csIWiWGTEgXBb6FQerDOA5KI3SST3DZJA_Gnx54RqLk,5433
19
+ driftwatch-0.2.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
20
+ driftwatch-0.2.0.dist-info/entry_points.txt,sha256=kfLxdBjdL_A1DhNwWYUNj2zYB9tX5qpxY3vVn-uX0zI,55
21
+ driftwatch-0.2.0.dist-info/top_level.txt,sha256=3C66tjNxK5CgLhkRhSeWkgpQLakS4Uop84vJWCjWgd8,11
22
+ driftwatch-0.2.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.10.2)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ driftwatch = driftwatch.cli.main:app
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 DriftWatch Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ driftwatch