datadriftguard 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. datadriftguard-0.1.0/.gitignore +36 -0
  2. datadriftguard-0.1.0/CHANGELOG.md +22 -0
  3. datadriftguard-0.1.0/LICENSE +21 -0
  4. datadriftguard-0.1.0/MANIFEST.in +4 -0
  5. datadriftguard-0.1.0/PKG-INFO +239 -0
  6. datadriftguard-0.1.0/README.md +195 -0
  7. datadriftguard-0.1.0/config.yaml +54 -0
  8. datadriftguard-0.1.0/datasets/.gitkeep +1 -0
  9. datadriftguard-0.1.0/datasets/current.csv +16 -0
  10. datadriftguard-0.1.0/datasets/reference.csv +16 -0
  11. datadriftguard-0.1.0/pyproject.toml +77 -0
  12. datadriftguard-0.1.0/src/driftguardai/__init__.py +56 -0
  13. datadriftguard-0.1.0/src/driftguardai/api/__init__.py +2 -0
  14. datadriftguard-0.1.0/src/driftguardai/api/app.py +69 -0
  15. datadriftguard-0.1.0/src/driftguardai/api/dependencies.py +165 -0
  16. datadriftguard-0.1.0/src/driftguardai/api/routes.py +99 -0
  17. datadriftguard-0.1.0/src/driftguardai/api/schemas.py +49 -0
  18. datadriftguard-0.1.0/src/driftguardai/core/__init__.py +6 -0
  19. datadriftguard-0.1.0/src/driftguardai/core/alerts.py +244 -0
  20. datadriftguard-0.1.0/src/driftguardai/core/config.py +127 -0
  21. datadriftguard-0.1.0/src/driftguardai/core/entities.py +108 -0
  22. datadriftguard-0.1.0/src/driftguardai/core/exceptions.py +18 -0
  23. datadriftguard-0.1.0/src/driftguardai/core/interfaces.py +31 -0
  24. datadriftguard-0.1.0/src/driftguardai/core/retraining.py +128 -0
  25. datadriftguard-0.1.0/src/driftguardai/core/use_cases.py +72 -0
  26. datadriftguard-0.1.0/src/driftguardai/dashboard/__init__.py +1 -0
  27. datadriftguard-0.1.0/src/driftguardai/dashboard/app.py +403 -0
  28. datadriftguard-0.1.0/src/driftguardai/data/__init__.py +2 -0
  29. datadriftguard-0.1.0/src/driftguardai/data/ingestion.py +114 -0
  30. datadriftguard-0.1.0/src/driftguardai/data/repositories.py +22 -0
  31. datadriftguard-0.1.0/src/driftguardai/drift/__init__.py +23 -0
  32. datadriftguard-0.1.0/src/driftguardai/drift/detectors.py +368 -0
  33. datadriftguard-0.1.0/src/driftguardai/drift/metrics.py +436 -0
  34. datadriftguard-0.1.0/src/driftguardai/py.typed +1 -0
  35. datadriftguard-0.1.0/src/driftguardai/utils/__init__.py +1 -0
  36. datadriftguard-0.1.0/src/driftguardai/utils/dataset_validation.py +60 -0
  37. datadriftguard-0.1.0/src/driftguardai/utils/logging.py +91 -0
  38. datadriftguard-0.1.0/tests/__init__.py +1 -0
  39. datadriftguard-0.1.0/tests/test_drift_detector.py +83 -0
@@ -0,0 +1,36 @@
1
+ # Virtual environments
2
+ .venv/
3
+ venv/
4
+
5
+ # Python bytecode
6
+ __pycache__/
7
+ *.pyc
8
+ *.pyo
9
+
10
+ # Testing
11
+ .pytest_cache/
12
+ .coverage
13
+ htmlcov/
14
+
15
+ # Type checking
16
+ .mypy_cache/
17
+
18
+ # Environment
19
+ .env
20
+
21
+ # Build & distribution
22
+ build/
23
+ dist/
24
+ *.egg-info/
25
+ *.egg
26
+ .eggs/
27
+
28
+ # IDE
29
+ .idea/
30
+ .vscode/
31
+ *.swp
32
+ *.swo
33
+
34
+ # OS
35
+ .DS_Store
36
+ Thumbs.db
@@ -0,0 +1,22 @@
1
+ # Changelog
2
+
3
+ All notable changes to DriftGuardAI will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [0.1.0] - 2026-06-24
9
+
10
+ ### Added
11
+
12
+ - Initial public release.
13
+ - Drift detection metrics: PSI, KS test, KL divergence, chi-square test, categorical distribution difference.
14
+ - `DriftDetector` class for feature-level drift analysis with configurable thresholds.
15
+ - `AlertManager` for log-based, webhook, and Slack alert dispatch.
16
+ - `RetrainingManager` for automated retraining trigger evaluation.
17
+ - Domain entities: `FeatureDriftReport`, `FeatureDriftResult`, `DriftMetricResult`, and more.
18
+ - YAML-based configuration with environment variable override (`DRIFT_GUARD_CONFIG_PATH`).
19
+ - Structured JSON logging.
20
+ - Optional FastAPI server (`pip install driftguardai[api]`).
21
+ - Optional Streamlit dashboard (`pip install driftguardai[dashboard]`).
22
+ - PEP 561 `py.typed` marker for type checker support.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Surya Babbar
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,4 @@
1
+ include config.yaml
2
+ include LICENSE
3
+ include README.md
4
+ recursive-include datasets *.csv
@@ -0,0 +1,239 @@
1
+ Metadata-Version: 2.4
2
+ Name: datadriftguard
3
+ Version: 0.1.0
4
+ Summary: Data drift detection toolkit for ML pipelines — PSI, KS, KL divergence, chi-square, and more.
5
+ Project-URL: Homepage, https://github.com/suryanandanbabbar/DriftGuardAI
6
+ Project-URL: Documentation, https://github.com/suryanandanbabbar/DriftGuardAI#readme
7
+ Project-URL: Repository, https://github.com/suryanandanbabbar/DriftGuardAI
8
+ Project-URL: Issues, https://github.com/suryanandanbabbar/DriftGuardAI/issues
9
+ Author: Surya Babbar
10
+ License-Expression: MIT
11
+ License-File: LICENSE
12
+ Keywords: data-quality,drift,machine-learning,mlops,model-monitoring,monitoring
13
+ Classifier: Development Status :: 3 - Alpha
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: Science/Research
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
23
+ Classifier: Typing :: Typed
24
+ Requires-Python: >=3.10
25
+ Requires-Dist: numpy>=1.23
26
+ Requires-Dist: pandas>=1.5
27
+ Requires-Dist: pydantic>=2.0
28
+ Requires-Dist: python-dotenv>=1.0
29
+ Requires-Dist: pyyaml>=6.0
30
+ Requires-Dist: scipy>=1.9
31
+ Provides-Extra: api
32
+ Requires-Dist: fastapi>=0.100; extra == 'api'
33
+ Requires-Dist: python-multipart>=0.0.6; extra == 'api'
34
+ Requires-Dist: uvicorn>=0.20; extra == 'api'
35
+ Provides-Extra: dashboard
36
+ Requires-Dist: streamlit>=1.25; extra == 'dashboard'
37
+ Provides-Extra: dev
38
+ Requires-Dist: build>=1.0; extra == 'dev'
39
+ Requires-Dist: mypy>=1.0; extra == 'dev'
40
+ Requires-Dist: pytest>=7.0; extra == 'dev'
41
+ Requires-Dist: ruff>=0.4; extra == 'dev'
42
+ Requires-Dist: twine>=5.0; extra == 'dev'
43
+ Description-Content-Type: text/markdown
44
+
45
+ # DriftGuardAI
46
+
47
+ [![PyPI version](https://img.shields.io/pypi/v/datadriftguard.svg)](https://pypi.org/project/datadriftguard/)
48
+ [![Python versions](https://img.shields.io/pypi/pyversions/datadriftguard.svg)](https://pypi.org/project/datadriftguard/)
49
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
50
+
51
+ **Data drift detection toolkit for ML pipelines.** Monitor feature-level distribution shifts using PSI, KS test, KL divergence, chi-square test, and more.
52
+
53
+ DriftGuardAI helps ML engineers and data scientists detect when production data drifts away from training data — a leading indicator of model degradation.
54
+
55
+ ## Installation
56
+
57
+ ```bash
58
+ pip install datadriftguard
59
+ ```
60
+
61
+ Optional extras:
62
+
63
+ ```bash
64
+ # Include the FastAPI server
65
+ pip install datadriftguard[api]
66
+
67
+ # Include the Streamlit dashboard
68
+ pip install datadriftguard[dashboard]
69
+
70
+ # Install everything for development
71
+ pip install datadriftguard[dev,api,dashboard]
72
+ ```
73
+
74
+ ## Quick Start
75
+
76
+ ```python
77
+ import pandas as pd
78
+ from driftguardai import DriftDetector, ThresholdSettings
79
+
80
+ # Load your baseline (training) and incoming (production) data
81
+ baseline = pd.read_csv("baseline.csv")
82
+ incoming = pd.read_csv("incoming.csv")
83
+
84
+ # Create a detector with default or custom thresholds
85
+ detector = DriftDetector(
86
+ baseline_dataset=baseline,
87
+ incoming_dataset=incoming,
88
+ thresholds=ThresholdSettings(psi=0.20, ks_significance_level=0.05),
89
+ )
90
+
91
+ # Generate a drift report
92
+ report = detector.generate_report(dataset_name="production_model_v2")
93
+
94
+ # Inspect results
95
+ print(f"Total features: {report.total_features}")
96
+ print(f"Drifted features: {len(report.drifted_features)}")
97
+
98
+ for feature in report.drifted_features:
99
+ print(f" ⚠ {feature.feature_name} ({feature.feature_type}): drift detected")
100
+ if feature.metrics.psi:
101
+ print(f" PSI: {feature.metrics.psi.value:.4f} (threshold: {feature.metrics.psi.threshold})")
102
+ if feature.metrics.ks:
103
+ print(f" KS: {feature.metrics.ks.value:.4f} (p={feature.metrics.ks.p_value:.4f})")
104
+ ```
105
+
106
+ ## Drift Metrics
107
+
108
+ | Metric | Type | Description |
109
+ |--------|------|-------------|
110
+ | **PSI** (Population Stability Index) | Numerical | Measures distribution shift between baseline and incoming data |
111
+ | **KS Test** (Kolmogorov-Smirnov) | Numerical | Non-parametric test for distribution equality |
112
+ | **KL Divergence** (Kullback-Leibler) | Numerical | Information-theoretic measure of distribution difference |
113
+ | **Chi-Square Test** | Categorical | Tests independence between categorical distributions |
114
+ | **Distribution Difference** | Categorical | Total variation distance between category frequencies |
115
+
116
+ ## Alerting
117
+
118
+ DriftGuardAI includes an alert system that can dispatch drift notifications via logging, webhooks, or Slack:
119
+
120
+ ```python
121
+ from driftguardai import AlertManager, DriftDetector
122
+ from driftguardai.core.config import AlertSettings
123
+
124
+ detector = DriftDetector(baseline, incoming)
125
+ report = detector.generate_report(dataset_name="production")
126
+
127
+ alert_manager = AlertManager(
128
+ settings=AlertSettings(
129
+ enabled=True,
130
+ log_alerts=True,
131
+ slack_webhook_url="https://hooks.slack.com/services/...",
132
+ )
133
+ )
134
+ dispatch_report = alert_manager.dispatch(report)
135
+ print(f"Dispatched {dispatch_report.total_alerts} alerts")
136
+ ```
137
+
138
+ ## Retraining Triggers
139
+
140
+ Automatically evaluate whether model retraining should be triggered based on drift severity:
141
+
142
+ ```python
143
+ from driftguardai import RetrainingManager
144
+ from driftguardai.core.config import RetrainingSettings
145
+
146
+ manager = RetrainingManager(
147
+ settings=RetrainingSettings(
148
+ enabled=True,
149
+ trigger_severity="critical",
150
+ min_alert_count=2,
151
+ )
152
+ )
153
+ result = manager.evaluate(report)
154
+ if result.triggered:
155
+ print(f"Retraining triggered: {result.reason}")
156
+ print(f"Affected features: {result.affected_features}")
157
+ ```
158
+
159
+ ## Configuration
160
+
161
+ DriftGuardAI can be configured programmatically or via a `config.yaml` file:
162
+
163
+ ```yaml
164
+ thresholds:
165
+ psi: 0.20
166
+ ks_significance_level: 0.05
167
+ kl_divergence: 0.10
168
+ categorical_distance: 0.10
169
+ categorical_chi_square_significance_level: 0.05
170
+ histogram_bins: 10
171
+ histogram_strategy: quantile
172
+
173
+ alerts:
174
+ enabled: true
175
+ log_alerts: true
176
+ minimum_severity: warning
177
+ slack_webhook_url: https://hooks.slack.com/services/...
178
+
179
+ retraining:
180
+ enabled: true
181
+ trigger_severity: critical
182
+ min_alert_count: 1
183
+ ```
184
+
185
+ Place `config.yaml` in your working directory, or set the `DRIFT_GUARD_CONFIG_PATH` environment variable:
186
+
187
+ ```bash
188
+ export DRIFT_GUARD_CONFIG_PATH=/path/to/your/config.yaml
189
+ ```
190
+
191
+ ## Optional: API Server
192
+
193
+ Run a FastAPI server for drift detection over HTTP:
194
+
195
+ ```bash
196
+ pip install datadriftguard[api]
197
+ uvicorn driftguardai.api.app:app --reload
198
+ ```
199
+
200
+ Endpoints:
201
+ - `GET /api/v1/health` — Health check
202
+ - `POST /api/v1/drift/analyze` — Analyze drift from file paths
203
+ - `POST /api/v1/drift/analyze/files` — Analyze drift from uploaded CSVs
204
+
205
+ ## Optional: Streamlit Dashboard
206
+
207
+ Visualize drift metrics with an interactive dashboard:
208
+
209
+ ```bash
210
+ pip install datadriftguard[dashboard]
211
+ streamlit run src/driftguardai/dashboard/app.py
212
+ ```
213
+
214
+ ## Architecture
215
+
216
+ ```text
217
+ driftguardai/
218
+ ├── core/ # Domain models, config, exceptions, interfaces, use cases
219
+ ├── drift/ # Drift detection implementations and statistical metrics
220
+ ├── data/ # Data ingestion and repository adapters
221
+ ├── utils/ # Logging and dataset validation utilities
222
+ ├── api/ # Optional FastAPI HTTP layer
223
+ └── dashboard/ # Optional Streamlit visualization
224
+ ```
225
+
226
+ ## Development
227
+
228
+ ```bash
229
+ git clone https://github.com/suryanandanbabbar/DriftGuardAI.git
230
+ cd DriftGuardAI
231
+ python -m venv .venv
232
+ source .venv/bin/activate
233
+ pip install -e ".[dev,api,dashboard]"
234
+ pytest
235
+ ```
236
+
237
+ ## License
238
+
239
+ MIT — see [LICENSE](LICENSE) for details.
@@ -0,0 +1,195 @@
1
+ # DriftGuardAI
2
+
3
+ [![PyPI version](https://img.shields.io/pypi/v/datadriftguard.svg)](https://pypi.org/project/datadriftguard/)
4
+ [![Python versions](https://img.shields.io/pypi/pyversions/datadriftguard.svg)](https://pypi.org/project/datadriftguard/)
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
6
+
7
+ **Data drift detection toolkit for ML pipelines.** Monitor feature-level distribution shifts using PSI, KS test, KL divergence, chi-square test, and more.
8
+
9
+ DriftGuardAI helps ML engineers and data scientists detect when production data drifts away from training data — a leading indicator of model degradation.
10
+
11
+ ## Installation
12
+
13
+ ```bash
14
+ pip install datadriftguard
15
+ ```
16
+
17
+ Optional extras:
18
+
19
+ ```bash
20
+ # Include the FastAPI server
21
+ pip install datadriftguard[api]
22
+
23
+ # Include the Streamlit dashboard
24
+ pip install datadriftguard[dashboard]
25
+
26
+ # Install everything for development
27
+ pip install datadriftguard[dev,api,dashboard]
28
+ ```
29
+
30
+ ## Quick Start
31
+
32
+ ```python
33
+ import pandas as pd
34
+ from driftguardai import DriftDetector, ThresholdSettings
35
+
36
+ # Load your baseline (training) and incoming (production) data
37
+ baseline = pd.read_csv("baseline.csv")
38
+ incoming = pd.read_csv("incoming.csv")
39
+
40
+ # Create a detector with default or custom thresholds
41
+ detector = DriftDetector(
42
+ baseline_dataset=baseline,
43
+ incoming_dataset=incoming,
44
+ thresholds=ThresholdSettings(psi=0.20, ks_significance_level=0.05),
45
+ )
46
+
47
+ # Generate a drift report
48
+ report = detector.generate_report(dataset_name="production_model_v2")
49
+
50
+ # Inspect results
51
+ print(f"Total features: {report.total_features}")
52
+ print(f"Drifted features: {len(report.drifted_features)}")
53
+
54
+ for feature in report.drifted_features:
55
+ print(f" ⚠ {feature.feature_name} ({feature.feature_type}): drift detected")
56
+ if feature.metrics.psi:
57
+ print(f" PSI: {feature.metrics.psi.value:.4f} (threshold: {feature.metrics.psi.threshold})")
58
+ if feature.metrics.ks:
59
+ print(f" KS: {feature.metrics.ks.value:.4f} (p={feature.metrics.ks.p_value:.4f})")
60
+ ```
61
+
62
+ ## Drift Metrics
63
+
64
+ | Metric | Type | Description |
65
+ |--------|------|-------------|
66
+ | **PSI** (Population Stability Index) | Numerical | Measures distribution shift between baseline and incoming data |
67
+ | **KS Test** (Kolmogorov-Smirnov) | Numerical | Non-parametric test for distribution equality |
68
+ | **KL Divergence** (Kullback-Leibler) | Numerical | Information-theoretic measure of distribution difference |
69
+ | **Chi-Square Test** | Categorical | Tests independence between categorical distributions |
70
+ | **Distribution Difference** | Categorical | Total variation distance between category frequencies |
71
+
72
+ ## Alerting
73
+
74
+ DriftGuardAI includes an alert system that can dispatch drift notifications via logging, webhooks, or Slack:
75
+
76
+ ```python
77
+ from driftguardai import AlertManager, DriftDetector
78
+ from driftguardai.core.config import AlertSettings
79
+
80
+ detector = DriftDetector(baseline, incoming)
81
+ report = detector.generate_report(dataset_name="production")
82
+
83
+ alert_manager = AlertManager(
84
+ settings=AlertSettings(
85
+ enabled=True,
86
+ log_alerts=True,
87
+ slack_webhook_url="https://hooks.slack.com/services/...",
88
+ )
89
+ )
90
+ dispatch_report = alert_manager.dispatch(report)
91
+ print(f"Dispatched {dispatch_report.total_alerts} alerts")
92
+ ```
93
+
94
+ ## Retraining Triggers
95
+
96
+ Automatically evaluate whether model retraining should be triggered based on drift severity:
97
+
98
+ ```python
99
+ from driftguardai import RetrainingManager
100
+ from driftguardai.core.config import RetrainingSettings
101
+
102
+ manager = RetrainingManager(
103
+ settings=RetrainingSettings(
104
+ enabled=True,
105
+ trigger_severity="critical",
106
+ min_alert_count=2,
107
+ )
108
+ )
109
+ result = manager.evaluate(report)
110
+ if result.triggered:
111
+ print(f"Retraining triggered: {result.reason}")
112
+ print(f"Affected features: {result.affected_features}")
113
+ ```
114
+
115
+ ## Configuration
116
+
117
+ DriftGuardAI can be configured programmatically or via a `config.yaml` file:
118
+
119
+ ```yaml
120
+ thresholds:
121
+ psi: 0.20
122
+ ks_significance_level: 0.05
123
+ kl_divergence: 0.10
124
+ categorical_distance: 0.10
125
+ categorical_chi_square_significance_level: 0.05
126
+ histogram_bins: 10
127
+ histogram_strategy: quantile
128
+
129
+ alerts:
130
+ enabled: true
131
+ log_alerts: true
132
+ minimum_severity: warning
133
+ slack_webhook_url: https://hooks.slack.com/services/...
134
+
135
+ retraining:
136
+ enabled: true
137
+ trigger_severity: critical
138
+ min_alert_count: 1
139
+ ```
140
+
141
+ Place `config.yaml` in your working directory, or set the `DRIFT_GUARD_CONFIG_PATH` environment variable:
142
+
143
+ ```bash
144
+ export DRIFT_GUARD_CONFIG_PATH=/path/to/your/config.yaml
145
+ ```
146
+
147
+ ## Optional: API Server
148
+
149
+ Run a FastAPI server for drift detection over HTTP:
150
+
151
+ ```bash
152
+ pip install datadriftguard[api]
153
+ uvicorn driftguardai.api.app:app --reload
154
+ ```
155
+
156
+ Endpoints:
157
+ - `GET /api/v1/health` — Health check
158
+ - `POST /api/v1/drift/analyze` — Analyze drift from file paths
159
+ - `POST /api/v1/drift/analyze/files` — Analyze drift from uploaded CSVs
160
+
161
+ ## Optional: Streamlit Dashboard
162
+
163
+ Visualize drift metrics with an interactive dashboard:
164
+
165
+ ```bash
166
+ pip install datadriftguard[dashboard]
167
+ streamlit run src/driftguardai/dashboard/app.py
168
+ ```
169
+
170
+ ## Architecture
171
+
172
+ ```text
173
+ driftguardai/
174
+ ├── core/ # Domain models, config, exceptions, interfaces, use cases
175
+ ├── drift/ # Drift detection implementations and statistical metrics
176
+ ├── data/ # Data ingestion and repository adapters
177
+ ├── utils/ # Logging and dataset validation utilities
178
+ ├── api/ # Optional FastAPI HTTP layer
179
+ └── dashboard/ # Optional Streamlit visualization
180
+ ```
181
+
182
+ ## Development
183
+
184
+ ```bash
185
+ git clone https://github.com/suryanandanbabbar/DriftGuardAI.git
186
+ cd DriftGuardAI
187
+ python -m venv .venv
188
+ source .venv/bin/activate
189
+ pip install -e ".[dev,api,dashboard]"
190
+ pytest
191
+ ```
192
+
193
+ ## License
194
+
195
+ MIT — see [LICENSE](LICENSE) for details.
@@ -0,0 +1,54 @@
1
+ environment: development
2
+ debug: true
3
+ log_level: INFO
4
+
5
+ logging:
6
+ level: INFO
7
+ structured: true
8
+ timestamp_format: "%Y-%m-%dT%H:%M:%S%z"
9
+
10
+ api:
11
+ title: DriftGuardAI API
12
+ version: 0.1.0
13
+ prefix: /api/v1
14
+
15
+ thresholds:
16
+ numerical_p_value: 0.05
17
+ categorical_distance: 0.10
18
+ categorical_chi_square_significance_level: 0.05
19
+ missing_rate_delta: 0.02
20
+ psi: 0.20
21
+ ks_significance_level: 0.05
22
+ kl_divergence: 0.10
23
+ histogram_bins: 10
24
+ histogram_strategy: quantile
25
+ histogram_epsilon: 0.000001
26
+
27
+ monitoring:
28
+ min_rows: 100
29
+ default_method: kolmogorov_smirnov
30
+ alert_on_missing_data: true
31
+
32
+ data:
33
+ reference_dataset_path: datasets/reference.csv
34
+ current_dataset_path: datasets/current.csv
35
+
36
+ runtime:
37
+ default_dataset_name: production
38
+ uploaded_dataset_name: uploaded
39
+
40
+ alerts:
41
+ enabled: true
42
+ log_alerts: true
43
+ minimum_severity: warning
44
+ critical_excess_ratio: 1.5
45
+ critical_p_value_ratio: 0.2
46
+ webhook_url:
47
+ slack_webhook_url:
48
+ timeout_seconds: 5.0
49
+
50
+ retraining:
51
+ enabled: true
52
+ trigger_severity: critical
53
+ min_alert_count: 1
54
+ pipeline_name: placeholder_retraining_pipeline
@@ -0,0 +1 @@
1
+
@@ -0,0 +1,16 @@
1
+ age,income,credit_score,transactions_per_month,avg_transaction_amount,region,device_type
2
+ 22,28000,590,8,85.2,South,Mobile
3
+ 24,30000,610,9,90.4,South,Mobile
4
+ 26,32000,605,10,95.1,South,Mobile
5
+ 21,25000,580,7,70.3,South,Mobile
6
+ 23,27000,600,8,80.2,South,Mobile
7
+ 25,29000,610,9,88.7,South,Mobile
8
+ 28,31000,620,11,100.4,South,Mobile
9
+ 30,33000,630,12,105.9,South,Mobile
10
+ 27,30500,615,10,97.3,South,Mobile
11
+ 24,29500,605,9,92.5,South,Mobile
12
+ 22,26000,585,7,75.6,South,Mobile
13
+ 23,27500,595,8,82.1,South,Mobile
14
+ 26,31500,620,11,102.2,South,Mobile
15
+ 29,34000,635,12,110.5,South,Mobile
16
+ 31,36000,645,13,115.8,South,Mobile
@@ -0,0 +1,16 @@
1
+ age,income,credit_score,transactions_per_month,avg_transaction_amount,region,device_type
2
+ 23,35000,640,18,120.5,North,Mobile
3
+ 45,72000,710,25,240.3,West,Desktop
4
+ 31,54000,680,21,180.2,South,Mobile
5
+ 52,88000,750,32,310.7,West,Desktop
6
+ 28,46000,660,19,150.9,North,Mobile
7
+ 39,61000,700,23,210.4,East,Tablet
8
+ 47,79000,720,27,260.1,West,Desktop
9
+ 34,57000,690,22,195.8,South,Mobile
10
+ 29,48000,670,20,165.6,East,Mobile
11
+ 41,65000,705,24,220.3,North,Desktop
12
+ 36,59000,695,23,205.4,South,Mobile
13
+ 50,82000,735,30,290.2,West,Desktop
14
+ 27,44000,655,18,140.2,East,Mobile
15
+ 43,70000,715,26,245.8,North,Tablet
16
+ 38,62000,705,24,215.9,South,Desktop
@@ -0,0 +1,77 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "datadriftguard"
7
+ dynamic = ["version"]
8
+ description = "Data drift detection toolkit for ML pipelines — PSI, KS, KL divergence, chi-square, and more."
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.10"
12
+ authors = [
13
+ { name = "Surya Babbar" },
14
+ ]
15
+ keywords = ["drift", "machine-learning", "monitoring", "mlops", "data-quality", "model-monitoring"]
16
+ classifiers = [
17
+ "Development Status :: 3 - Alpha",
18
+ "Intended Audience :: Developers",
19
+ "Intended Audience :: Science/Research",
20
+ "License :: OSI Approved :: MIT License",
21
+ "Programming Language :: Python :: 3",
22
+ "Programming Language :: Python :: 3.10",
23
+ "Programming Language :: Python :: 3.11",
24
+ "Programming Language :: Python :: 3.12",
25
+ "Programming Language :: Python :: 3.13",
26
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
27
+ "Typing :: Typed",
28
+ ]
29
+ dependencies = [
30
+ "pandas>=1.5",
31
+ "numpy>=1.23",
32
+ "scipy>=1.9",
33
+ "pydantic>=2.0",
34
+ "PyYAML>=6.0",
35
+ "python-dotenv>=1.0",
36
+ ]
37
+
38
+ [project.optional-dependencies]
39
+ api = [
40
+ "fastapi>=0.100",
41
+ "uvicorn>=0.20",
42
+ "python-multipart>=0.0.6",
43
+ ]
44
+ dashboard = [
45
+ "streamlit>=1.25",
46
+ ]
47
+ dev = [
48
+ "pytest>=7.0",
49
+ "ruff>=0.4",
50
+ "mypy>=1.0",
51
+ "build>=1.0",
52
+ "twine>=5.0",
53
+ ]
54
+
55
+ [project.urls]
56
+ Homepage = "https://github.com/suryanandanbabbar/DriftGuardAI"
57
+ Documentation = "https://github.com/suryanandanbabbar/DriftGuardAI#readme"
58
+ Repository = "https://github.com/suryanandanbabbar/DriftGuardAI"
59
+ Issues = "https://github.com/suryanandanbabbar/DriftGuardAI/issues"
60
+
61
+ [tool.hatch.version]
62
+ path = "src/driftguardai/__init__.py"
63
+
64
+ [tool.hatch.build.targets.wheel]
65
+ packages = ["src/driftguardai"]
66
+
67
+ [tool.pytest.ini_options]
68
+ testpaths = ["tests"]
69
+
70
+ [tool.ruff]
71
+ target-version = "py310"
72
+ line-length = 120
73
+
74
+ [tool.mypy]
75
+ python_version = "3.10"
76
+ warn_return_any = true
77
+ warn_unused_configs = true