PyPI - driftwatch - Versions diffs - 0.2.0__py3-none-any.whl - Mend

driftwatch 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

driftwatch/__init__.py +22 -0
driftwatch/cli/__init__.py +5 -0
driftwatch/cli/main.py +274 -0
driftwatch/core/__init__.py +6 -0
driftwatch/core/monitor.py +153 -0
driftwatch/core/report.py +162 -0
driftwatch/detectors/__init__.py +6 -0
driftwatch/detectors/base.py +67 -0
driftwatch/detectors/categorical.py +145 -0
driftwatch/detectors/numerical.py +198 -0
driftwatch/detectors/registry.py +71 -0
driftwatch/integrations/__init__.py +5 -0
driftwatch/integrations/alerting.py +211 -0
driftwatch/integrations/fastapi.py +297 -0
driftwatch/py.typed +1 -0
driftwatch/simulation/__init__.py +1 -0
driftwatch-0.2.0.dist-info/METADATA +144 -0
driftwatch-0.2.0.dist-info/RECORD +22 -0
driftwatch-0.2.0.dist-info/WHEEL +5 -0
driftwatch-0.2.0.dist-info/entry_points.txt +2 -0
driftwatch-0.2.0.dist-info/licenses/LICENSE +21 -0
driftwatch-0.2.0.dist-info/top_level.txt +1 -0

driftwatch/integrations/fastapi.py ADDED Viewed

@@ -0,0 +1,297 @@
+"""FastAPI integration for DriftWatch.
+Provides middleware and endpoints for automatic drift monitoring
+on ML inference APIs.
+"""
+from __future__ import annotations
+import asyncio
+import threading
+from collections import deque
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from typing import TYPE_CHECKING, Any, Callable, cast
+import pandas as pd
+from starlette.middleware.base import BaseHTTPMiddleware
+if TYPE_CHECKING:
+    from starlette.requests import Request
+    from starlette.responses import Response
+    from starlette.types import ASGIApp
+    from driftwatch import Monitor
+    from driftwatch.core.report import DriftReport
+@dataclass
+class DriftState:
+    """Thread-safe state for drift monitoring."""
+    samples: deque[dict[str, Any]] = field(default_factory=lambda: deque(maxlen=10000))
+    predictions: deque[dict[str, Any]] = field(
+        default_factory=lambda: deque(maxlen=10000)
+    )
+    last_report: DriftReport | None = None
+    last_check_time: datetime | None = None
+    request_count: int = 0
+    lock: threading.Lock = field(default_factory=threading.Lock)
+    def add_prediction(self, prediction: dict[str, Any]) -> None:
+        """Add a prediction to the buffer."""
+        with self.lock:
+            self.predictions.append(prediction)
+    def add_sample(self, sample: dict[str, Any]) -> None:
+        """Add a sample to the buffer."""
+        with self.lock:
+            self.samples.append(sample)
+            self.request_count += 1
+    def get_samples_df(self) -> pd.DataFrame:
+        """Get samples as DataFrame."""
+        with self.lock:
+            return pd.DataFrame(list(self.samples))
+    def update_report(self, report: DriftReport) -> None:
+        """Update the last drift report."""
+        with self.lock:
+            self.last_report = report
+            self.last_check_time = datetime.now(timezone.utc)
+class DriftMiddleware(BaseHTTPMiddleware):
+    """
+    FastAPI middleware for automatic drift monitoring.
+    Collects input features from requests and runs drift detection
+    on a configurable schedule.
+    Args:
+        app: The ASGI application
+        monitor: DriftWatch Monitor instance with reference data
+        feature_extractor: Function to extract features from request body.
+            Defaults to returning the entire request body as features.
+        check_interval: Number of requests between drift checks.
+            Set to 0 to disable automatic checks.
+        min_samples: Minimum samples required before running drift check.
+        enabled: Whether drift collection is enabled.
+    Example:
+        ```python
+        from fastapi import FastAPI
+        from driftwatch import Monitor
+        from driftwatch.integrations.fastapi import DriftMiddleware
+        monitor = Monitor(reference_data=train_df)
+        app = FastAPI()
+        app.add_middleware(
+            DriftMiddleware,
+            monitor=monitor,
+            check_interval=100,
+        )
+        ```
+    """
+    def __init__(
+        self,
+        app: ASGIApp,
+        monitor: Monitor,
+        feature_extractor: Callable[[dict[str, Any]], dict[str, Any]] | None = None,
+        prediction_extractor: Callable[[dict[str, Any]], dict[str, Any]] | None = None,
+        check_interval: int = 100,
+        min_samples: int = 50,
+        buffer_size: int = 10000,
+        enabled: bool = True,
+    ) -> None:
+        super().__init__(app)
+        self.monitor = monitor
+        self.feature_extractor = feature_extractor or (lambda x: x)
+        self.prediction_extractor = prediction_extractor
+        self.check_interval = check_interval
+        self.min_samples = min_samples
+        self.buffer_size = buffer_size
+        self.enabled = enabled
+        self.state = DriftState(
+            samples=deque(maxlen=buffer_size),
+            predictions=deque(maxlen=buffer_size),
+        )
+        self._background_tasks: set[asyncio.Task[None]] = set()
+    async def dispatch(self, request: Request, call_next: Callable) -> Response:
+        """Process request and collect features for drift monitoring."""
+        if not self.enabled:
+            return cast("Response", await call_next(request))
+        # Skip non-POST requests and internal endpoints
+        if request.method != "POST" or request.url.path.startswith("/drift"):
+            return cast("Response", await call_next(request))
+        # Try to extract features from request body
+        try:
+            body = await request.json()
+            features = self.feature_extractor(body)
+            if features and isinstance(features, dict):
+                # Filter to only monitored features
+                monitored = {
+                    k: v
+                    for k, v in features.items()
+                    if k in self.monitor.monitored_features
+                }
+                if monitored:
+                    self.state.add_sample(monitored)
+        except Exception:
+            # Don't fail the request if feature extraction fails
+            pass
+        # Process the request
+        response = cast("Response", await call_next(request))
+        # Try to extract predictions from response
+        if self.prediction_extractor is not None:
+            try:
+                # For JSONResponse, we can access the body
+                if hasattr(response, "body"):
+                    import json
+                    response_body = json.loads(response.body)
+                    prediction = self.prediction_extractor(response_body)
+                    if prediction and isinstance(prediction, dict):
+                        self.state.add_prediction(prediction)
+            except Exception:
+                pass
+        # Check if we should run drift detection
+        if self._should_check_drift():
+            task = asyncio.create_task(self._run_drift_check())
+            self._background_tasks.add(task)
+            task.add_done_callback(self._background_tasks.discard)
+        return response
+    def _should_check_drift(self) -> bool:
+        """Determine if drift check should run."""
+        if self.check_interval <= 0:
+            return False
+        if len(self.state.samples) < self.min_samples:
+            return False
+        return self.state.request_count % self.check_interval == 0
+    async def _run_drift_check(self) -> None:
+        """Run drift detection in background."""
+        try:
+            production_df = self.state.get_samples_df()
+            if production_df.empty:
+                return
+            # Run check in thread pool to avoid blocking
+            loop = asyncio.get_event_loop()
+            report = await loop.run_in_executor(None, self.monitor.check, production_df)
+            self.state.update_report(report)
+        except Exception:
+            # Log error in production, but don't crash
+            pass
+def add_drift_routes(app: Any, middleware: DriftMiddleware) -> None:
+    """
+    Add drift monitoring endpoints to a FastAPI app.
+    Endpoints:
+        GET /drift/status - Current drift status
+        GET /drift/report - Full drift report
+        GET /drift/health - Health check
+    Args:
+        app: FastAPI application instance
+        middleware: DriftMiddleware instance
+    """
+    from fastapi import FastAPI
+    if not isinstance(app, FastAPI):
+        raise TypeError("app must be a FastAPI instance")
+    @app.get("/drift/status")
+    async def drift_status() -> dict[str, Any]:
+        """Get current drift status."""
+        state = middleware.state
+        if state.last_report is None:
+            return {
+                "status": "NO_DATA",
+                "message": "No drift check has been performed yet",
+                "samples_collected": len(state.samples),
+                "min_samples_required": middleware.min_samples,
+            }
+        return {
+            "status": state.last_report.status.value,
+            "has_drift": state.last_report.has_drift(),
+            "drift_ratio": state.last_report.drift_ratio(),
+            "drifted_features": state.last_report.drifted_features(),
+            "last_check": (
+                state.last_check_time.isoformat() if state.last_check_time else None
+            ),
+            "samples_collected": len(state.samples),
+            "total_requests": state.request_count,
+        }
+    @app.get("/drift/report")
+    async def drift_report() -> dict[str, Any]:
+        """Get full drift report."""
+        state = middleware.state
+        if state.last_report is None:
+            return {
+                "error": "No drift report available",
+                "samples_collected": len(state.samples),
+            }
+        return state.last_report.to_dict()
+    @app.get("/drift/health")
+    async def drift_health() -> dict[str, Any]:
+        """Health check endpoint."""
+        state = middleware.state
+        return {
+            "status": "healthy",
+            "monitoring_enabled": middleware.enabled,
+            "features_monitored": middleware.monitor.monitored_features,
+            "samples_in_buffer": len(state.samples),
+            "check_interval": middleware.check_interval,
+        }
+    @app.post("/drift/check")
+    async def trigger_drift_check() -> dict[str, Any]:
+        """Manually trigger a drift check."""
+        production_df = middleware.state.get_samples_df()
+        if len(production_df) < middleware.min_samples:
+            return {
+                "error": f"Not enough samples. Need {middleware.min_samples}, have {len(production_df)}",
+            }
+        report = middleware.monitor.check(production_df)
+        middleware.state.update_report(report)
+        return {
+            "status": report.status.value,
+            "has_drift": report.has_drift(),
+            "drifted_features": report.drifted_features(),
+            "checked_at": datetime.now(timezone.utc).isoformat(),
+        }
+    @app.post("/drift/reset")
+    async def reset_samples() -> dict[str, Any]:
+        """Reset collected samples."""
+        with middleware.state.lock:
+            middleware.state.samples.clear()
+            middleware.state.request_count = 0
+        return {"message": "Samples reset successfully"}

driftwatch/py.typed ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Type marker for PEP 561 compliance."""

driftwatch/simulation/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Simulation module for generating synthetic drift."""

driftwatch-0.2.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,144 @@
+Metadata-Version: 2.4
+Name: driftwatch
+Version: 0.2.0
+Summary: Lightweight ML drift monitoring, built for real-world pipelines
+Author-email: Your Name <your.email@example.com>
+Maintainer-email: Your Name <your.email@example.com>
+License: MIT
+Project-URL: Homepage, https://github.com/YOUR_USERNAME/driftwatch
+Project-URL: Documentation, https://driftwatch.readthedocs.io
+Project-URL: Repository, https://github.com/YOUR_USERNAME/driftwatch
+Project-URL: Issues, https://github.com/YOUR_USERNAME/driftwatch/issues
+Project-URL: Changelog, https://github.com/YOUR_USERNAME/driftwatch/blob/main/CHANGELOG.md
+Keywords: machine-learning,mlops,drift-detection,monitoring,data-quality,model-monitoring
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Classifier: Typing :: Typed
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: numpy>=1.21.0
+Requires-Dist: pandas>=1.3.0
+Requires-Dist: scipy>=1.7.0
+Requires-Dist: pydantic>=2.0.0
+Provides-Extra: dev
+Requires-Dist: pytest>=7.0.0; extra == "dev"
+Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
+Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
+Requires-Dist: ruff>=0.1.0; extra == "dev"
+Requires-Dist: black>=23.0.0; extra == "dev"
+Requires-Dist: mypy>=1.0.0; extra == "dev"
+Requires-Dist: pre-commit>=3.0.0; extra == "dev"
+Requires-Dist: pandas-stubs>=2.0.0; extra == "dev"
+Provides-Extra: cli
+Requires-Dist: typer>=0.9.0; extra == "cli"
+Requires-Dist: rich>=13.0.0; extra == "cli"
+Provides-Extra: fastapi
+Requires-Dist: fastapi>=0.100.0; extra == "fastapi"
+Requires-Dist: uvicorn>=0.23.0; extra == "fastapi"
+Provides-Extra: mlflow
+Requires-Dist: mlflow>=2.0.0; extra == "mlflow"
+Provides-Extra: alerting
+Requires-Dist: httpx>=0.24.0; extra == "alerting"
+Requires-Dist: aiosmtplib>=2.0.0; extra == "alerting"
+Provides-Extra: all
+Requires-Dist: driftwatch[alerting,cli,fastapi,mlflow]; extra == "all"
+Provides-Extra: docs
+Requires-Dist: mkdocs>=1.5.0; extra == "docs"
+Requires-Dist: mkdocs-material>=9.0.0; extra == "docs"
+Requires-Dist: mkdocstrings[python]>=0.23.0; extra == "docs"
+Dynamic: license-file
+# DriftWatch
+<div align="center">
+**Lightweight ML drift monitoring, built for real-world pipelines**
+[![Documentation](https://img.shields.io/badge/docs-vincentcotella.github.io%2FDriftWatch-blue.svg)](https://vincentcotella.github.io/DriftWatch/)
+[![CI](https://github.com/VincentCotella/DriftWatch/actions/workflows/ci.yml/badge.svg)](https://github.com/VincentCotella/DriftWatch/actions/workflows/ci.yml)
+[![PyPI version](https://badge.fury.io/py/driftwatch.svg)](https://pypi.org/project/driftwatch/)
+[![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+</div>
+---
+## 📖 Documentation
+**Read the full documentation here:** [vincentcotella.github.io/DriftWatch](https://vincentcotella.github.io/DriftWatch/)
+## 🚀 Features
+- **Simple API**: Detect drift in 3 lines of code.
+- **Multiple Detectors**: **PSI**, **KS-Test**, **Wasserstein Distance**, **Chi-Squared**.
+- **Production-Ready**:
+    - ⚡ **FastAPI Integration** (Middleware included).
+    - 🔔 **Slack Alerts** built-in.
+    - 🛠️ **CLI** for batch processing.
+- **Lightweight**: Minimal dependencies (`numpy`, `pandas`, `scipy`).
+- **Type-Safe**: 100% typed code with `mypy` support.
+## 📦 Installation
+```bash
+pip install driftwatch
+```
+For extras (CLI, FastAPI, Alerting):
+```bash
+pip install driftwatch[all]
+```
+## ⚡ Quick Start
+```python
+from driftwatch import Monitor
+import pandas as pd
+# 1. Initialize monitor with reference data (e.g., training set)
+monitor = Monitor(reference_data=pd.read_parquet("train.parquet"))
+# 2. Check production data for drift
+report = monitor.check(pd.read_parquet("production.parquet"))
+# 3. Act on results
+if report.has_drift():
+    print(f"⚠️ Drift detected! Ratio: {report.drift_ratio():.1%}")
+    print(f"Drifted features: {report.drifted_features()}")
+else:
+    print("✅ All systems normal.")
+```
+## 🛠️ Usage Scenarios
+| Scenario | Solution |
+|----------|----------|
+| **Real-time API** | Use `DriftMiddleware` in FastAPI to monitor every request. |
+| **Batch Job** | Use `driftwatch check` CLI in your Airflow/Cron jobs. |
+| **CI/CD** | Block deployments if validation data drifts from training data. |
+| **Alerting** | Send Slack notifications automatically when drift is critical. |
+## 🤝 Contributing
+We welcome contributions! Please see our [Contributing Guide](https://vincentcotella.github.io/DriftWatch/contributing/) for details.
+1. Fork the repo.
+2. Install dev dependencies: `pip install -e ".[dev,all]"`
+3. Run tests: `pytest`
+4. Submit a PR!
+## 📄 License
+MIT © [Vincent Cotella](https://github.com/VincentCotella)

driftwatch-0.2.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,22 @@
+driftwatch/__init__.py,sha256=eANf-l8xzaWO10uuIaSxAYnYGm2sYXsn5b4Ew8MoC14,593
+driftwatch/py.typed,sha256=VeLLojGNPosEVWplpI3jgpXMfmmOHaAzQGyFJq8YfjM,42
+driftwatch/cli/__init__.py,sha256=0pMY4GjZQK_23O4R3jgOhDC-ry_rGNU9sTkGlJp7jXI,101
+driftwatch/cli/main.py,sha256=QV3IYgLJL5iOM0XBHnJ_tDhnPexYyLtPsI5o_e-Oeo4,8118
+driftwatch/core/__init__.py,sha256=_frvUvPNa1zXJWv7mNR5Lnk6soNGmN387dpDZiI1EHU,201
+driftwatch/core/monitor.py,sha256=BGIP8mHx3bPJ0UZf7g6I8cfOulRDoYrK4ntj6vnaYW0,5208
+driftwatch/core/report.py,sha256=hHVJGfP4B4XabkcpS9TpHJpw3bTzfSCFVuCvaiJcmxo,5059
+driftwatch/detectors/__init__.py,sha256=sLo0A7W6ik8XXpbVpqf150DieTZNQ3o5gbIm_gxGSw4,217
+driftwatch/detectors/base.py,sha256=CAJHePSwQdaAEvC8qVYD3bL39sInY0c-_XIfifOxnq0,1616
+driftwatch/detectors/categorical.py,sha256=9NHsXveNpgINJUJNoPtQy-iT9BxMgr3ji5bfLfd9_Gw,4232
+driftwatch/detectors/numerical.py,sha256=OrDN35GRfZ6pevffSGs-lh32Uts56dLMJrO1x0cOwLs,5921
+driftwatch/detectors/registry.py,sha256=JltHokVjlHI_Sz1eDonLWH0cQDfdScz2sK5PNkBkXiE,2084
+driftwatch/integrations/__init__.py,sha256=8w5eie9Ci_1EsZAmOBBsU5PJFUIg52RCqXHWQK2FK3o,183
+driftwatch/integrations/alerting.py,sha256=OzRuP_KVu7RQtBSvJnL6QyxbGyswp6qBHk9Xzgd9a-A,6468
+driftwatch/integrations/fastapi.py,sha256=pgpdhVg0JYges9FURACJzwbkzwQ6MQ4atuILhphoPyM,10197
+driftwatch/simulation/__init__.py,sha256=gxIK5yj9YcwtRMFCXBRhVJ6dA_eEJ0gSAQq5YPy3KUs,56
+driftwatch-0.2.0.dist-info/licenses/LICENSE,sha256=J161VpWy8YHSX997h8UHNVgOG8Til1UqK6ui-evf5l4,1080
+driftwatch-0.2.0.dist-info/METADATA,sha256=csIWiWGTEgXBb6FQerDOA5KI3SST3DZJA_Gnx54RqLk,5433
+driftwatch-0.2.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+driftwatch-0.2.0.dist-info/entry_points.txt,sha256=kfLxdBjdL_A1DhNwWYUNj2zYB9tX5qpxY3vVn-uX0zI,55
+driftwatch-0.2.0.dist-info/top_level.txt,sha256=3C66tjNxK5CgLhkRhSeWkgpQLakS4Uop84vJWCjWgd8,11
+driftwatch-0.2.0.dist-info/RECORD,,

driftwatch-0.2.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (80.10.2)
+Root-Is-Purelib: true
+Tag: py3-none-any

driftwatch-0.2.0.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ driftwatch = driftwatch.cli.main:app

driftwatch-0.2.0.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 DriftWatch Contributors
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

driftwatch-0.2.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ driftwatch