modelshift 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
modelshift/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """
2
+ ModelShift-Lite package initialization.
3
+ """
modelshift/baseline.py ADDED
@@ -0,0 +1,37 @@
1
+ import pandas as pd
2
+
3
+
4
+ class BaselineWindow:
5
+ """
6
+ Stores and manages reference baseline data
7
+ representing normal model behavior.
8
+ """
9
+
10
+ def __init__(self, data: pd.DataFrame):
11
+ self._validate(data)
12
+ self.data = data.copy()
13
+ self.feature_names = list(data.columns)
14
+ self.num_samples = len(data)
15
+
16
+ def _validate(self, data):
17
+ if not isinstance(data, pd.DataFrame):
18
+ raise TypeError("Baseline data must be a pandas DataFrame")
19
+
20
+ if data.empty:
21
+ raise ValueError("Baseline data cannot be empty")
22
+
23
+ def get_data(self) -> pd.DataFrame:
24
+ """
25
+ Returns a copy of baseline data.
26
+ """
27
+ return self.data.copy()
28
+
29
+ def summary(self) -> dict:
30
+ """
31
+ Returns basic metadata about the baseline window.
32
+ """
33
+ return {
34
+ "num_samples": self.num_samples,
35
+ "num_features": len(self.feature_names),
36
+ "feature_names": self.feature_names,
37
+ }
@@ -0,0 +1,3 @@
1
+ """
2
+ Drift detection modules.
3
+ """
@@ -0,0 +1,50 @@
1
+ import pandas as pd
2
+ from scipy.stats import ks_2samp
3
+
4
+
5
+ def compute_feature_drift(
6
+ baseline_data: pd.DataFrame,
7
+ live_data: pd.DataFrame
8
+ ) -> dict:
9
+ """
10
+ Compute feature-level drift using the Kolmogorov–Smirnov test.
11
+
12
+ Returns a dictionary:
13
+ {
14
+ feature_name: {
15
+ "ks_statistic": float,
16
+ "p_value": float
17
+ }
18
+ }
19
+ """
20
+
21
+ _validate_inputs(baseline_data, live_data)
22
+
23
+ drift_results = {}
24
+
25
+ for feature in baseline_data.columns:
26
+ baseline_values = baseline_data[feature].dropna()
27
+ live_values = live_data[feature].dropna()
28
+
29
+ ks_stat, p_value = ks_2samp(baseline_values, live_values)
30
+
31
+ drift_results[feature] = {
32
+ "ks_statistic": float(ks_stat),
33
+ "p_value": float(p_value),
34
+ }
35
+
36
+ return drift_results
37
+
38
+
39
+ def _validate_inputs(baseline_data, live_data):
40
+ if not isinstance(baseline_data, pd.DataFrame):
41
+ raise TypeError("Baseline data must be a pandas DataFrame")
42
+
43
+ if not isinstance(live_data, pd.DataFrame):
44
+ raise TypeError("Live data must be a pandas DataFrame")
45
+
46
+ if baseline_data.empty or live_data.empty:
47
+ raise ValueError("Baseline and live data cannot be empty")
48
+
49
+ if list(baseline_data.columns) != list(live_data.columns):
50
+ raise ValueError("Baseline and live data must have identical features")
@@ -0,0 +1,111 @@
1
+ from __future__ import annotations
2
+
3
+ import numpy as np
4
+ from scipy.stats import ks_2samp
5
+
6
+
7
+ def compute_prediction_drift(
8
+ baseline_predictions: np.ndarray,
9
+ live_predictions: np.ndarray
10
+ ) -> dict:
11
+ """
12
+ Compute prediction behavior drift using:
13
+ 1) KS-test on prediction probability distributions
14
+ 2) Binary entropy change (mean entropy of predicted probabilities)
15
+
16
+ Notes:
17
+ - Expects 1D probability arrays (values in [0, 1]).
18
+ - Uses full binary entropy: -(p*log(p) + (1-p)*log(1-p))
19
+ """
20
+
21
+ baseline = _prepare_predictions("baseline", baseline_predictions)
22
+ live = _prepare_predictions("live", live_predictions)
23
+
24
+ # KS-test on prediction distributions
25
+ ks_stat, p_value = ks_2samp(baseline, live)
26
+
27
+ # Entropy analysis (full binary entropy)
28
+ baseline_entropy = _binary_entropy_mean(baseline)
29
+ live_entropy = _binary_entropy_mean(live)
30
+ entropy_change = live_entropy - baseline_entropy
31
+
32
+ # Lightweight shape/center diagnostics (useful for dashboards/reports)
33
+ baseline_mean = float(np.mean(baseline))
34
+ live_mean = float(np.mean(live))
35
+ baseline_std = float(np.std(baseline))
36
+ live_std = float(np.std(live))
37
+ baseline_median = float(np.median(baseline))
38
+ live_median = float(np.median(live))
39
+
40
+ return {
41
+ "ks_statistic": float(ks_stat),
42
+ "p_value": float(p_value),
43
+
44
+ "baseline_entropy": round(float(baseline_entropy), 6),
45
+ "live_entropy": round(float(live_entropy), 6),
46
+ "entropy_change": round(float(entropy_change), 6),
47
+ "abs_entropy_change": round(float(abs(entropy_change)), 6),
48
+
49
+ "baseline_mean_prob": round(baseline_mean, 6),
50
+ "live_mean_prob": round(live_mean, 6),
51
+ "mean_prob_shift": round(float(live_mean - baseline_mean), 6),
52
+
53
+ "baseline_median_prob": round(baseline_median, 6),
54
+ "live_median_prob": round(live_median, 6),
55
+ "median_prob_shift": round(float(live_median - baseline_median), 6),
56
+
57
+ "baseline_std_prob": round(baseline_std, 6),
58
+ "live_std_prob": round(live_std, 6),
59
+ "std_prob_shift": round(float(live_std - baseline_std), 6),
60
+
61
+ "n_baseline": int(baseline.size),
62
+ "n_live": int(live.size),
63
+ }
64
+
65
+
66
+ def _binary_entropy_mean(preds: np.ndarray) -> float:
67
+ """
68
+ Mean binary entropy for probability predictions:
69
+ H(p) = -(p*log(p) + (1-p)*log(1-p))
70
+
71
+ Uses natural log (nats).
72
+ """
73
+ eps = 1e-9
74
+ p = np.clip(preds.astype(float), eps, 1.0 - eps)
75
+ entropy = -(p * np.log(p) + (1.0 - p) * np.log(1.0 - p))
76
+ return float(np.mean(entropy))
77
+
78
+
79
+ def _prepare_predictions(name: str, arr) -> np.ndarray:
80
+ """
81
+ Validate and normalize prediction arrays to a clean 1D float numpy array.
82
+ """
83
+ if arr is None:
84
+ raise ValueError(f"{name.capitalize()} predictions cannot be None")
85
+
86
+ if not isinstance(arr, np.ndarray):
87
+ # Allow lists/Series while staying user-friendly
88
+ try:
89
+ arr = np.asarray(arr, dtype=float)
90
+ except Exception as exc:
91
+ raise TypeError(
92
+ f"{name.capitalize()} predictions must be a numpy array or array-like of numeric values"
93
+ ) from exc
94
+ else:
95
+ arr = arr.astype(float, copy=False)
96
+
97
+ arr = np.ravel(arr)
98
+
99
+ if arr.size == 0:
100
+ raise ValueError(f"{name.capitalize()} prediction array cannot be empty")
101
+
102
+ if not np.all(np.isfinite(arr)):
103
+ raise ValueError(f"{name.capitalize()} predictions contain NaN/Inf values")
104
+
105
+ # We treat these as probability predictions for entropy-based drift
106
+ if np.min(arr) < 0.0 or np.max(arr) > 1.0:
107
+ raise ValueError(
108
+ f"{name.capitalize()} predictions must be probability values in [0, 1]"
109
+ )
110
+
111
+ return arr
@@ -0,0 +1,239 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Dict, List, Optional
4
+
5
+
6
+ # ----------------------------
7
+ # Basic per-signal thresholds
8
+ # ----------------------------
9
+ FEATURE_KS_LOW = 0.10
10
+ FEATURE_KS_MEDIUM = 0.20
11
+ FEATURE_KS_HIGH = 0.35
12
+
13
+ PRED_KS_WARNING = 0.10
14
+ PRED_KS_CRITICAL = 0.15
15
+
16
+ ENTROPY_DELTA_WARNING = 0.01
17
+ ENTROPY_DELTA_CRITICAL = 0.02
18
+
19
+
20
+ def classify_severity(ks_statistic: float) -> str:
21
+ """
22
+ Classify severity from a KS-like drift signal.
23
+
24
+ Returns one of:
25
+ LOW / MEDIUM / HIGH / CRITICAL
26
+ """
27
+ ks = _safe_float(ks_statistic, default=0.0)
28
+ if ks < FEATURE_KS_LOW:
29
+ return "LOW"
30
+ if ks < FEATURE_KS_MEDIUM:
31
+ return "MEDIUM"
32
+ if ks < FEATURE_KS_HIGH:
33
+ return "HIGH"
34
+ return "CRITICAL"
35
+
36
+
37
+ def compute_health_score(feature_drift_results: dict) -> float:
38
+ """
39
+ Compute overall model health score (0–100) from feature drift.
40
+ Higher score = healthier model.
41
+
42
+ Uses average feature KS:
43
+ health = max(0, 100 * (1 - avg_ks))
44
+ """
45
+ summary = summarize_feature_drift(feature_drift_results)
46
+ if summary["feature_count"] == 0:
47
+ raise ValueError("Feature drift results cannot be empty")
48
+
49
+ avg_ks = summary["avg_ks"]
50
+ health_score = max(0.0, 100.0 * (1.0 - avg_ks))
51
+ return round(float(health_score), 2)
52
+
53
+
54
+ def summarize_feature_drift(feature_drift_results: Optional[dict]) -> Dict[str, Any]:
55
+ """
56
+ Extract feature drift summary stats from feature_drift_results.
57
+ Safe against missing/malformed values.
58
+ """
59
+ if not isinstance(feature_drift_results, dict):
60
+ return {
61
+ "feature_count": 0,
62
+ "avg_ks": 0.0,
63
+ "max_ks": 0.0,
64
+ "max_feature": None,
65
+ "ks_values": [],
66
+ }
67
+
68
+ ks_pairs: List[tuple[str, float]] = []
69
+ for feature, values in feature_drift_results.items():
70
+ if not isinstance(values, dict):
71
+ continue
72
+ ks = _safe_float(values.get("ks_statistic"), default=None)
73
+ if ks is None:
74
+ continue
75
+ ks_pairs.append((str(feature), ks))
76
+
77
+ if not ks_pairs:
78
+ return {
79
+ "feature_count": 0,
80
+ "avg_ks": 0.0,
81
+ "max_ks": 0.0,
82
+ "max_feature": None,
83
+ "ks_values": [],
84
+ }
85
+
86
+ ks_values = [ks for _, ks in ks_pairs]
87
+ max_feature, max_ks = max(ks_pairs, key=lambda x: x[1])
88
+
89
+ return {
90
+ "feature_count": len(ks_values),
91
+ "avg_ks": round(sum(ks_values) / len(ks_values), 6),
92
+ "max_ks": round(float(max_ks), 6),
93
+ "max_feature": max_feature,
94
+ "ks_values": [round(float(v), 6) for v in ks_values],
95
+ }
96
+
97
+
98
+ def classify_drift_taxonomy(
99
+ feature_drift_results: Optional[dict] = None,
100
+ prediction_drift_results: Optional[dict] = None,
101
+ ) -> str:
102
+ """
103
+ Agreement/disagreement taxonomy between feature drift and prediction drift.
104
+
105
+ Returns one of:
106
+ STABLE
107
+ ROBUST_SHIFT (feature drift high, prediction drift low)
108
+ SILENT_BEHAVIOR_DRIFT (feature drift low, prediction drift high)
109
+ DEGRADING_DRIFT (both high)
110
+ """
111
+ f_summary = summarize_feature_drift(feature_drift_results)
112
+ max_feature_ks = _safe_float(f_summary.get("max_ks"), default=0.0)
113
+
114
+ pred_ks = 0.0
115
+ if isinstance(prediction_drift_results, dict):
116
+ pred_ks = _safe_float(prediction_drift_results.get("ks_statistic"), default=0.0)
117
+
118
+ feature_high = max_feature_ks >= FEATURE_KS_MEDIUM
119
+ pred_high = pred_ks >= PRED_KS_WARNING
120
+
121
+ if not feature_high and not pred_high:
122
+ return "STABLE"
123
+ if feature_high and not pred_high:
124
+ return "ROBUST_SHIFT"
125
+ if (not feature_high) and pred_high:
126
+ return "SILENT_BEHAVIOR_DRIFT"
127
+ return "DEGRADING_DRIFT"
128
+
129
+
130
+ def evaluate_drift_state(
131
+ feature_drift_results: Optional[dict] = None,
132
+ prediction_drift_results: Optional[dict] = None,
133
+ ) -> Dict[str, Any]:
134
+ """
135
+ Composite severity + status decision engine.
136
+
137
+ Combines:
138
+ - avg feature KS
139
+ - max feature KS
140
+ - prediction KS
141
+ - entropy delta
142
+
143
+ Returns a normalized decision payload with:
144
+ severity, status, taxonomy, health_score, signals, thresholds
145
+ """
146
+ f_summary = summarize_feature_drift(feature_drift_results)
147
+
148
+ avg_feature_ks = _safe_float(f_summary.get("avg_ks"), default=0.0)
149
+ max_feature_ks = _safe_float(f_summary.get("max_ks"), default=0.0)
150
+ pred_ks = 0.0
151
+ entropy_change = 0.0
152
+
153
+ if isinstance(prediction_drift_results, dict):
154
+ pred_ks = _safe_float(prediction_drift_results.get("ks_statistic"), default=0.0)
155
+ entropy_change = _safe_float(prediction_drift_results.get("entropy_change"), default=0.0)
156
+
157
+ # Normalize signals into 0..1 severity components
158
+ # (Threshold denominators chosen to align with your current observed ranges.)
159
+ avg_comp = min(1.0, avg_feature_ks / FEATURE_KS_MEDIUM) # avg drift
160
+ max_comp = min(1.0, max_feature_ks / FEATURE_KS_HIGH) # worst feature
161
+ pred_comp = min(1.0, pred_ks / PRED_KS_CRITICAL) # behavior drift
162
+ ent_comp = min(1.0, abs(entropy_change) / ENTROPY_DELTA_CRITICAL) # confidence shift
163
+
164
+ # Weighted composite score [0,1]
165
+ composite_score = (
166
+ 0.30 * avg_comp +
167
+ 0.25 * max_comp +
168
+ 0.35 * pred_comp +
169
+ 0.10 * ent_comp
170
+ )
171
+
172
+ severity = _classify_composite_severity(composite_score)
173
+
174
+ # Status favors prediction drift a bit more (behavior-centric monitoring)
175
+ if pred_ks >= PRED_KS_CRITICAL or max_feature_ks >= FEATURE_KS_HIGH:
176
+ status = "CRITICAL_DRIFT"
177
+ elif pred_ks >= PRED_KS_WARNING or max_feature_ks >= FEATURE_KS_MEDIUM or avg_feature_ks >= FEATURE_KS_LOW:
178
+ status = "WARNING_DRIFT"
179
+ else:
180
+ status = "STABLE"
181
+
182
+ taxonomy = classify_drift_taxonomy(feature_drift_results, prediction_drift_results)
183
+
184
+ # Health score is only meaningful if feature drift exists
185
+ health_score = None
186
+ if f_summary["feature_count"] > 0:
187
+ health_score = compute_health_score(feature_drift_results)
188
+
189
+ return {
190
+ "severity": severity,
191
+ "status": status,
192
+ "taxonomy": taxonomy,
193
+ "health_score": health_score,
194
+ "signals": {
195
+ "avg_feature_ks": round(avg_feature_ks, 6),
196
+ "max_feature_ks": round(max_feature_ks, 6),
197
+ "max_feature_name": f_summary.get("max_feature"),
198
+ "prediction_ks": round(pred_ks, 6),
199
+ "entropy_change": round(entropy_change, 6),
200
+ "composite_score": round(float(composite_score), 6),
201
+ "feature_count": int(f_summary.get("feature_count", 0)),
202
+ },
203
+ "thresholds": {
204
+ "feature_ks_low": FEATURE_KS_LOW,
205
+ "feature_ks_medium": FEATURE_KS_MEDIUM,
206
+ "feature_ks_high": FEATURE_KS_HIGH,
207
+ "pred_ks_warning": PRED_KS_WARNING,
208
+ "pred_ks_critical": PRED_KS_CRITICAL,
209
+ "entropy_delta_warning": ENTROPY_DELTA_WARNING,
210
+ "entropy_delta_critical": ENTROPY_DELTA_CRITICAL,
211
+ },
212
+ }
213
+
214
+
215
+ # ----------------------------
216
+ # Internal helpers
217
+ # ----------------------------
218
+ def _classify_composite_severity(score: float) -> str:
219
+ """
220
+ Composite score is already normalized 0..1.
221
+ """
222
+ s = max(0.0, min(1.0, _safe_float(score, default=0.0)))
223
+
224
+ if s < 0.20:
225
+ return "LOW"
226
+ if s < 0.45:
227
+ return "MEDIUM"
228
+ if s < 0.70:
229
+ return "HIGH"
230
+ return "CRITICAL"
231
+
232
+
233
+ def _safe_float(value: Any, default: Optional[float] = 0.0) -> Optional[float]:
234
+ try:
235
+ if value is None:
236
+ return default
237
+ return float(value)
238
+ except (TypeError, ValueError):
239
+ return default
modelshift/monitor.py ADDED
@@ -0,0 +1,317 @@
1
+ from __future__ import annotations
2
+
3
+ import uuid
4
+ import requests
5
+ from datetime import datetime, timezone
6
+ from typing import Any, Dict, List, Optional
7
+
8
+ import numpy as np
9
+ import pandas as pd
10
+
11
+ from modelshift.baseline import BaselineWindow
12
+ from modelshift.drift.feature_drift import compute_feature_drift
13
+ from modelshift.drift.prediction_drift import compute_prediction_drift
14
+ from modelshift.drift.severity import (
15
+ classify_severity,
16
+ compute_health_score,
17
+ evaluate_drift_state,
18
+ summarize_feature_drift,
19
+ )
20
+
21
+ # -------------------------------------------------------------------
22
+ # Phase 2: Cloud SDK Configuration
23
+ # -------------------------------------------------------------------
24
+ _CLOUD_CONFIG = {
25
+ "api_key": None,
26
+ "endpoint": "http://127.0.0.1:8000/api/v1/track"
27
+ }
28
+
29
+ def init(api_key: str, endpoint: str = "http://127.0.0.1:8000/api/v1/track"):
30
+ """
31
+ Initialize the ModelShift-Lite SDK with your cloud API Key.
32
+ This links your local ML models to your cloud dashboard.
33
+ """
34
+ _CLOUD_CONFIG["api_key"] = api_key
35
+ _CLOUD_CONFIG["endpoint"] = endpoint
36
+ print(f"[βœ“] ModelShift SDK Authenticated. Cloud sync enabled.")
37
+
38
+ # -------------------------------------------------------------------
39
+ # Core Engine
40
+ # -------------------------------------------------------------------
41
+ class ModelMonitor:
42
+ """
43
+ Main interface for ModelShift-Lite monitoring.
44
+
45
+ Handles:
46
+ - baseline/live feature drift
47
+ - baseline/live prediction drift
48
+ - composite status/severity/taxonomy summary
49
+ """
50
+
51
+ def __init__(self, reference_data: pd.DataFrame):
52
+ """
53
+ Initialize monitor with reference baseline data.
54
+ """
55
+ if not isinstance(reference_data, pd.DataFrame):
56
+ raise TypeError("Reference data must be a pandas DataFrame")
57
+ if reference_data.empty:
58
+ raise ValueError("Reference data cannot be empty")
59
+
60
+ self.baseline = BaselineWindow(reference_data.copy())
61
+
62
+ # Data containers
63
+ self.live_data: Optional[pd.DataFrame] = None
64
+
65
+ # Feature drift
66
+ self.feature_drift_results: Optional[Dict[str, Any]] = None
67
+
68
+ # Prediction drift
69
+ self.baseline_predictions: Optional[np.ndarray] = None
70
+ self.live_predictions: Optional[np.ndarray] = None
71
+ self.prediction_drift_results: Optional[Dict[str, Any]] = None
72
+
73
+ # -----------------------
74
+ # Data Update
75
+ # -----------------------
76
+ def update(self, live_data: pd.DataFrame):
77
+ """
78
+ Update monitor with new live data.
79
+ Enforces same columns as baseline (reordered if needed).
80
+ """
81
+ if not isinstance(live_data, pd.DataFrame):
82
+ raise TypeError("Live data must be a pandas DataFrame")
83
+ if live_data.empty:
84
+ raise ValueError("Live data cannot be empty")
85
+
86
+ baseline_df = self.baseline.get_data()
87
+ baseline_cols = list(baseline_df.columns)
88
+ live_cols = list(live_data.columns)
89
+
90
+ if set(live_cols) != set(baseline_cols):
91
+ missing = [c for c in baseline_cols if c not in live_cols]
92
+ extra = [c for c in live_cols if c not in baseline_cols]
93
+ raise ValueError(
94
+ f"Live data columns must match baseline columns. Missing={missing}, Extra={extra}"
95
+ )
96
+
97
+ # Reorder to baseline column order for deterministic behavior
98
+ self.live_data = live_data[baseline_cols].copy()
99
+
100
+ # -----------------------
101
+ # Feature Drift
102
+ # -----------------------
103
+ def compute_feature_drift(self) -> dict:
104
+ """
105
+ Compute feature-level drift between baseline and live data.
106
+ """
107
+ if self.live_data is None:
108
+ raise RuntimeError("Live data not set. Call update() first.")
109
+
110
+ self.feature_drift_results = compute_feature_drift(
111
+ self.baseline.get_data(),
112
+ self.live_data
113
+ )
114
+ return self.feature_drift_results
115
+
116
+ def get_latest_feature_drift(self) -> dict:
117
+ if self.feature_drift_results is None:
118
+ raise RuntimeError("No feature drift computed yet.")
119
+ return self.feature_drift_results
120
+
121
+ def get_feature_severity(self) -> dict:
122
+ if self.feature_drift_results is None:
123
+ raise RuntimeError("No feature drift computed yet.")
124
+
125
+ severity = {}
126
+ for feature, values in self.feature_drift_results.items():
127
+ if not isinstance(values, dict):
128
+ continue
129
+ severity[feature] = classify_severity(values.get("ks_statistic", 0.0))
130
+
131
+ return severity
132
+
133
+ def get_model_health_score(self) -> float:
134
+ if self.feature_drift_results is None:
135
+ raise RuntimeError("No feature drift computed yet.")
136
+ return compute_health_score(self.feature_drift_results)
137
+
138
+ def get_top_drifted_features(self, k: int = 5) -> List[Dict[str, Any]]:
139
+ if self.feature_drift_results is None:
140
+ raise RuntimeError("No feature drift computed yet.")
141
+ if not isinstance(k, int) or k <= 0:
142
+ raise ValueError("k must be a positive integer")
143
+
144
+ rows: List[Dict[str, Any]] = []
145
+ for feature, values in self.feature_drift_results.items():
146
+ if not isinstance(values, dict):
147
+ continue
148
+ ks = _safe_float(values.get("ks_statistic"), 0.0)
149
+ pv = _safe_float(values.get("p_value"), None)
150
+ rows.append({
151
+ "feature": str(feature),
152
+ "ks_statistic": round(ks, 6),
153
+ "p_value": None if pv is None else round(pv, 6),
154
+ "severity": classify_severity(ks),
155
+ })
156
+
157
+ rows.sort(key=lambda x: x["ks_statistic"], reverse=True)
158
+ return rows[:k]
159
+
160
+ def get_most_drifted_feature(self) -> Optional[Dict[str, Any]]:
161
+ top = self.get_top_drifted_features(k=1)
162
+ return top[0] if top else None
163
+
164
+ # -----------------------
165
+ # Prediction Drift
166
+ # -----------------------
167
+ def set_baseline_predictions(self, predictions):
168
+ self.baseline_predictions = _prepare_prediction_array(predictions, "baseline")
169
+
170
+ def update_predictions(self, live_predictions):
171
+ self.live_predictions = _prepare_prediction_array(live_predictions, "live")
172
+
173
+ def compute_prediction_drift(self) -> dict:
174
+ if self.baseline_predictions is None:
175
+ raise RuntimeError("Baseline predictions not set.")
176
+ if self.live_predictions is None:
177
+ raise RuntimeError("Live predictions not set.")
178
+
179
+ self.prediction_drift_results = compute_prediction_drift(
180
+ self.baseline_predictions,
181
+ self.live_predictions
182
+ )
183
+ return self.prediction_drift_results
184
+
185
+ def get_latest_prediction_drift(self) -> dict:
186
+ if self.prediction_drift_results is None:
187
+ raise RuntimeError("No prediction drift computed yet.")
188
+ return self.prediction_drift_results
189
+
190
+ # -----------------------
191
+ # Composite Summary
192
+ # -----------------------
193
+ def evaluate_health(self) -> Dict[str, Any]:
194
+ if self.feature_drift_results is None:
195
+ raise RuntimeError("No feature drift computed yet.")
196
+ if self.prediction_drift_results is None:
197
+ raise RuntimeError("No prediction drift computed yet.")
198
+
199
+ decision = evaluate_drift_state(
200
+ feature_drift_results=self.feature_drift_results,
201
+ prediction_drift_results=self.prediction_drift_results,
202
+ )
203
+
204
+ feature_summary = summarize_feature_drift(self.feature_drift_results)
205
+ top_features = self.get_top_drifted_features(k=5)
206
+ most_feature = top_features[0] if top_features else None
207
+
208
+ return {
209
+ "status": decision.get("status"),
210
+ "severity": decision.get("severity"),
211
+ "taxonomy": decision.get("taxonomy"),
212
+ "health_score": decision.get("health_score"),
213
+ "feature_summary": feature_summary,
214
+ "prediction_drift": self.prediction_drift_results,
215
+ "top_drifted_features": top_features,
216
+ "most_drifted_feature": most_feature,
217
+ "signals": decision.get("signals", {}),
218
+ "thresholds": decision.get("thresholds", {}),
219
+ }
220
+
221
+ def build_snapshot(self) -> Dict[str, Any]:
222
+ snapshot: Dict[str, Any] = {
223
+ "feature_drift": self.feature_drift_results,
224
+ "prediction_drift": self.prediction_drift_results,
225
+ }
226
+
227
+ if self.feature_drift_results is not None:
228
+ snapshot["feature_severity"] = self.get_feature_severity()
229
+ snapshot["health_score"] = self.get_model_health_score()
230
+ snapshot["top_drifted_features"] = self.get_top_drifted_features(k=5)
231
+ snapshot["most_drifted_feature"] = self.get_most_drifted_feature()
232
+
233
+ if self.feature_drift_results is not None and self.prediction_drift_results is not None:
234
+ snapshot["decision"] = self.evaluate_health()
235
+
236
+ return snapshot
237
+
238
+ # -----------------------
239
+ # Phase 2: Cloud Sync Method
240
+ # -----------------------
241
+ def push(self) -> Optional[Dict[str, Any]]:
242
+ """
243
+ Takes the local drift calculation and beams it securely to your FastAPI dashboard.
244
+ """
245
+ if not _CLOUD_CONFIG["api_key"]:
246
+ print("[!] API Key Missing. Please add 'modelshift.init(api_key=\"YOUR_KEY\")' at the top of your script.")
247
+ return None
248
+
249
+ snapshot = self.build_snapshot()
250
+ decision = snapshot.get("decision", {})
251
+
252
+ mdf = snapshot.get("most_drifted_feature") or {}
253
+ pred_drift = snapshot.get("prediction_drift") or {}
254
+
255
+ # Package the data exactly how your dashboard expects it
256
+ run_id = f"run_{uuid.uuid4().hex[:8]}"
257
+ payload = {
258
+ "run_id": run_id,
259
+ "generated_at": datetime.now(timezone.utc).isoformat(),
260
+ "status": decision.get("status", "UNKNOWN"),
261
+ "window_size": len(self.live_data) if self.live_data is not None else 0,
262
+
263
+ # Dashboard Graph Metrics
264
+ "clean_health": 100.0,
265
+ "drifted_health": decision.get("health_score", 0.0),
266
+ "drifted_pred_ks": pred_drift.get("ks_statistic", 0.0),
267
+ "drifted_entropy_change": pred_drift.get("delta_entropy", 0.0),
268
+ "drifted_last_window_feature": mdf.get("feature"),
269
+ "drifted_last_window_ks": mdf.get("ks_statistic"),
270
+
271
+ "evaluation": snapshot
272
+ }
273
+
274
+ # The Security Checkpoint
275
+ headers = {
276
+ "Content-Type": "application/json",
277
+ "X-API-Key": _CLOUD_CONFIG["api_key"]
278
+ }
279
+
280
+ try:
281
+ print(f"[~] Beaming data to {_CLOUD_CONFIG['endpoint']}...")
282
+ response = requests.post(_CLOUD_CONFIG["endpoint"], json=payload, headers=headers)
283
+ response.raise_for_status()
284
+ print(f"[βœ“] Successfully synced run '{run_id}' to ModelShift Cloud.")
285
+ return response.json()
286
+ except requests.exceptions.RequestException as e:
287
+ print(f"[!] ModelShift Cloud Sync Failed: {e}")
288
+ if hasattr(e, 'response') and e.response is not None:
289
+ print(f"[!] Server Context: {e.response.text}")
290
+ return None
291
+
292
+
293
+ # -----------------------
294
+ # Internal helpers
295
+ # -----------------------
296
+ def _prepare_prediction_array(values, name: str) -> np.ndarray:
297
+ if values is None:
298
+ raise ValueError(f"{name.capitalize()} predictions cannot be None")
299
+ try:
300
+ arr = np.asarray(values, dtype=float).reshape(-1)
301
+ except Exception as exc:
302
+ raise TypeError(f"{name.capitalize()} predictions must be numeric array-like") from exc
303
+
304
+ if arr.size == 0:
305
+ raise ValueError(f"{name.capitalize()} predictions cannot be empty")
306
+ if not np.all(np.isfinite(arr)):
307
+ raise ValueError(f"{name.capitalize()} predictions contain NaN/Inf")
308
+ return arr
309
+
310
+
311
+ def _safe_float(value, default=None):
312
+ try:
313
+ if value is None:
314
+ return default
315
+ return float(value)
316
+ except (TypeError, ValueError):
317
+ return default
modelshift/selftest.py ADDED
@@ -0,0 +1,398 @@
1
+ # selftest.py (repo root)
2
+ from __future__ import annotations
3
+
4
+ from dataclasses import dataclass
5
+ from typing import Any, Dict, List, Optional, Tuple
6
+ import math
7
+ import time
8
+
9
+ import numpy as np
10
+ import pandas as pd
11
+
12
+ from modelshift.drift.feature_drift import compute_feature_drift
13
+ from modelshift.drift.prediction_drift import compute_prediction_drift
14
+ from modelshift.drift.severity import compute_health_score
15
+
16
+
17
+ # -----------------------------
18
+ # Helpers (robust schema)
19
+ # -----------------------------
20
+ def _to_float(x: Any, default: float = 0.0) -> float:
21
+ try:
22
+ v = float(x)
23
+ if np.isfinite(v):
24
+ return float(v)
25
+ except Exception:
26
+ pass
27
+ return float(default)
28
+
29
+
30
+ def _extract_pred_map(pd_: Any) -> Dict[str, Any]:
31
+ if not isinstance(pd_, dict):
32
+ return {}
33
+ for k in ("prediction_drift", "prediction_drift_results", "results"):
34
+ v = pd_.get(k)
35
+ if isinstance(v, dict):
36
+ return v
37
+ return pd_
38
+
39
+
40
+ def _extract_fd_map(fd: Any) -> Dict[str, Any]:
41
+ if not isinstance(fd, dict):
42
+ return {}
43
+ for k in ("feature_drift_results", "feature_drift", "results"):
44
+ v = fd.get(k)
45
+ if isinstance(v, dict):
46
+ return v
47
+ return fd
48
+
49
+
50
+ def _adapt_pred(pd_: Any) -> Dict[str, Any]:
51
+ m = _extract_pred_map(pd_)
52
+ if not isinstance(m, dict):
53
+ return {}
54
+ ks = m.get("ks_statistic", m.get("ks", m.get("ks_stat", m.get("statistic", 0.0))))
55
+ pv = m.get("p_value", m.get("p", m.get("pvalue", 1.0)))
56
+ out = dict(m)
57
+ out["ks_statistic"] = _to_float(ks, 0.0)
58
+ out["p_value"] = _to_float(pv, 1.0)
59
+ return out
60
+
61
+
62
+ def _adapt_fd(fd: Any) -> Dict[str, Dict[str, float]]:
63
+ m = _extract_fd_map(fd)
64
+ out: Dict[str, Dict[str, float]] = {}
65
+ if not isinstance(m, dict):
66
+ return out
67
+ for feat, v in m.items():
68
+ if not isinstance(v, dict):
69
+ continue
70
+ ks = v.get("ks_statistic", v.get("ks", v.get("ks_stat", v.get("statistic", v.get("D", 0.0)))))
71
+ pv = v.get("p_value", v.get("p", v.get("pvalue", v.get("p_val", 1.0))))
72
+ out[str(feat)] = {
73
+ "ks_statistic": _to_float(ks, 0.0),
74
+ "p_value": _to_float(pv, 1.0),
75
+ }
76
+ return out
77
+
78
+
79
+ def _call_health(fd: Any, pd_: Any) -> Tuple[float, str]:
80
+ fd_fixed = _adapt_fd(fd)
81
+ pd_fixed = _adapt_pred(pd_)
82
+
83
+ # Try dict form (newer)
84
+ try:
85
+ out = compute_health_score({"feature_drift": fd_fixed, "prediction_drift": pd_fixed})
86
+ if isinstance(out, dict):
87
+ sc = out.get("health_score", out.get("score", out.get("health", None)))
88
+ md = out.get("mode", out.get("health_compute_mode", "severity"))
89
+ if sc is not None:
90
+ return float(sc), str(md)
91
+ if isinstance(out, (int, float)):
92
+ return float(out), "severity"
93
+ except Exception:
94
+ pass
95
+
96
+ # Try 2-arg form (older)
97
+ try:
98
+ out = compute_health_score(fd_fixed, pd_fixed)
99
+ if isinstance(out, dict):
100
+ sc = out.get("health_score", out.get("score", out.get("health", None)))
101
+ md = out.get("mode", out.get("health_compute_mode", "severity"))
102
+ if sc is not None:
103
+ return float(sc), str(md)
104
+ if isinstance(out, (int, float)):
105
+ return float(out), "severity"
106
+ except Exception:
107
+ pass
108
+
109
+ # Fallback (simple)
110
+ ks_vals = [v["ks_statistic"] for v in fd_fixed.values()] if fd_fixed else []
111
+ avg_ks = float(np.mean(ks_vals)) if ks_vals else 0.0
112
+ pred_ks = _to_float(pd_fixed.get("ks_statistic"), 0.0)
113
+ score = 100.0 * (1.0 - min(max(0.70 * pred_ks + 0.30 * avg_ks, 0.0), 1.0))
114
+ return float(np.clip(score, 0.0, 100.0)), "fallback"
115
+
116
+
117
+ def _entropy(probs: np.ndarray, bins: int = 24) -> float:
118
+ p = np.clip(np.asarray(probs, dtype=float), 0.0, 1.0)
119
+ h, _ = np.histogram(p, bins=bins, range=(0.0, 1.0), density=False)
120
+ h = h.astype(float)
121
+ if h.sum() <= 0:
122
+ return 0.0
123
+ q = h / h.sum()
124
+ q = q[q > 0]
125
+ return float(-np.sum(q * np.log2(q)))
126
+
127
+
128
+ def _hist(probs: np.ndarray, bins: int = 32) -> Dict[str, Any]:
129
+ p = np.clip(np.asarray(probs, dtype=float), 0.0, 1.0)
130
+ h, edges = np.histogram(p, bins=bins, range=(0.0, 1.0), density=False)
131
+ return {
132
+ "bins": [float(x) for x in edges.tolist()],
133
+ "counts": [int(x) for x in h.tolist()],
134
+ }
135
+
136
+
137
+ def _top_features(fd: Any, k: int = 8) -> List[Dict[str, Any]]:
138
+ m = _adapt_fd(fd)
139
+ rows = []
140
+ for feat, v in m.items():
141
+ ks = _to_float(v.get("ks_statistic"), 0.0)
142
+ pv = _to_float(v.get("p_value"), 1.0)
143
+ if ks >= 0.35:
144
+ sev = "CRITICAL"
145
+ elif ks >= 0.20:
146
+ sev = "HIGH"
147
+ elif ks >= 0.10:
148
+ sev = "MEDIUM"
149
+ else:
150
+ sev = "LOW"
151
+ rows.append({"feature": feat, "ks_statistic": ks, "p_value": pv, "severity": sev})
152
+ rows.sort(key=lambda r: r["ks_statistic"], reverse=True)
153
+ return rows[:k]
154
+
155
+
156
+ # -----------------------------
157
+ # Synthetic scenario generator
158
+ # -----------------------------
159
+ def _make_synthetic(seed: int, n: int = 2400, d: int = 14) -> Dict[str, Any]:
160
+ rng = np.random.default_rng(int(seed))
161
+
162
+ # baseline features
163
+ base = rng.normal(0, 1.0, size=(n, d))
164
+ base_df = pd.DataFrame(base, columns=[f"f{i}" for i in range(d)])
165
+
166
+ # clean ~ baseline (small noise)
167
+ clean = base + rng.normal(0, 0.08, size=(n, d))
168
+ clean_df = pd.DataFrame(clean, columns=base_df.columns)
169
+
170
+ # drifted (shift subset of features)
171
+ drift = base.copy()
172
+ drift[:, 0] += 2.0
173
+ drift[:, 1] += 1.3
174
+ drift[:, 2] *= 1.8
175
+ drift[:, 3] += rng.normal(0, 2.2, size=n)
176
+ drift_df = pd.DataFrame(drift, columns=base_df.columns)
177
+
178
+ # synthetic "prediction probs"
179
+ base_p = rng.beta(2.2, 2.6, size=n) # mild center
180
+ clean_p = np.clip(base_p + rng.normal(0, 0.02, size=n), 0, 1)
181
+ drift_p = np.clip(1.0 - base_p + rng.normal(0, 0.03, size=n), 0, 1) # strong invert shift
182
+
183
+ # FIX: Cleaned up the dictionary return to prevent syntax errors
184
+ return {
185
+ "base_X": base_df,
186
+ "clean_X": clean_df,
187
+ "drift_X": drift_df,
188
+ "base_p": base_p,
189
+ "clean_p": clean_p,
190
+ "drift_p": drift_p,
191
+ }
192
+
193
+
194
+ # -----------------------------
195
+ # Public API
196
+ # -----------------------------
197
+ def run_selftest(seed: int = 7, test: str = "suite") -> Dict[str, Any]:
198
+ """
199
+ Returns a payload designed for BOTH:
200
+ - readable JSON
201
+ - rich UI animations (histograms, gauges, feature bars)
202
+
203
+ test options:
204
+ - "prediction"
205
+ - "feature"
206
+ - "pipeline"
207
+ - "suite" (default, runs 3 scenarios)
208
+ - "concept"
209
+ """
210
+ t0 = time.time()
211
+ test = (test or "suite").strip().lower()
212
+
213
+ try:
214
+ if test not in {"prediction", "feature", "pipeline", "suite", "concept"}:
215
+ test = "suite"
216
+
217
+ cases: List[Dict[str, Any]] = []
218
+ checks: List[Dict[str, Any]] = []
219
+
220
+ # We run up to 3 scenarios in suite so it looks β€œreal”
221
+ seeds = [seed] if test != "suite" else [seed, seed + 11, seed + 23]
222
+
223
+ for idx, s in enumerate(seeds, start=1):
224
+ sim = _make_synthetic(seed=s)
225
+ base_X = sim["base_X"]
226
+ clean_X = sim["clean_X"]
227
+ drift_X = sim["drift_X"]
228
+ base_p = sim["base_p"]
229
+ clean_p = sim["clean_p"]
230
+ drift_p = sim["drift_p"]
231
+
232
+ # --- CONCEPT DRIFT LOGIC ADDED HERE ---
233
+ if test == "concept":
234
+ # In Concept Drift, the relationship flips but features stay the same!
235
+ drift_X = base_X.copy()
236
+ drift_p = np.clip(base_p + 0.35, 0, 1)
237
+ # --------------------------------------
238
+
239
+ # compute drifts
240
+ fd_clean = compute_feature_drift(base_X, clean_X)
241
+ fd_drift = compute_feature_drift(base_X, drift_X)
242
+
243
+ pd_clean = compute_prediction_drift(base_p, clean_p)
244
+ pd_drift = compute_prediction_drift(base_p, drift_p)
245
+
246
+ pd_clean_m = _adapt_pred(pd_clean)
247
+ pd_drift_m = _adapt_pred(pd_drift)
248
+
249
+ pred_ks_clean = _to_float(pd_clean_m.get("ks_statistic"), 0.0)
250
+ pred_ks_drift = _to_float(pd_drift_m.get("ks_statistic"), 0.0)
251
+
252
+ ent_base = _entropy(base_p)
253
+ ent_clean = _entropy(clean_p)
254
+ ent_drift = _entropy(drift_p)
255
+ delta_ent_clean = float(ent_clean - ent_base)
256
+ delta_ent_drift = float(ent_drift - ent_base)
257
+
258
+ # health score (use pipeline if available)
259
+ health_clean, mode_clean = _call_health(fd_clean, pd_clean)
260
+ health_drift, mode_drift = _call_health(fd_drift, pd_drift)
261
+
262
+ # histograms for visuals
263
+ h_base = _hist(base_p, bins=40)
264
+ h_clean = _hist(clean_p, bins=40)
265
+ h_drift = _hist(drift_p, bins=40)
266
+
267
+ top_feat = _top_features(fd_drift, k=8)
268
+
269
+ cases.append(
270
+ {
271
+ "case_id": f"C{idx}",
272
+ "seed": int(s),
273
+ "name": (
274
+ "Prediction Drift Test" if test == "prediction"
275
+ else "Feature Drift Test" if test == "feature"
276
+ else "Pipeline Health Test" if test == "pipeline"
277
+ else "Concept Drift Test" if test == "concept"
278
+ else f"Suite Scenario {idx}"
279
+ ),
280
+ "metrics": {
281
+ "pred_ks_clean": float(pred_ks_clean),
282
+ "pred_ks_drifted": float(pred_ks_drift),
283
+ "delta_entropy_clean": float(delta_ent_clean),
284
+ "delta_entropy_drifted": float(delta_ent_drift),
285
+ "health_clean": float(health_clean),
286
+ "health_drifted": float(health_drift),
287
+ "health_mode_clean": str(mode_clean),
288
+ "health_mode_drifted": str(mode_drift),
289
+ },
290
+ "viz": {
291
+ "pred_hist": {
292
+ "bins": h_base["bins"],
293
+ "baseline": h_base["counts"],
294
+ "clean": h_clean["counts"],
295
+ "drifted": h_drift["counts"],
296
+ },
297
+ "top_drifted_features": top_feat,
298
+ },
299
+ }
300
+ )
301
+
302
+ # Decide which checks to enforce (based on selected test)
303
+ # (We validate the FIRST case for β€œpass/fail”)
304
+ c0 = cases[0]
305
+ m0 = c0["metrics"]
306
+ pred_clean = float(m0["pred_ks_clean"])
307
+ pred_drift = float(m0["pred_ks_drifted"])
308
+ h_clean = float(m0["health_clean"])
309
+ h_drift = float(m0["health_drifted"])
310
+
311
+ if test in {"prediction", "pipeline", "suite"}:
312
+ checks.append(
313
+ {
314
+ "name": "Prediction drift should be low for clean",
315
+ "pass": bool(pred_clean < 0.08),
316
+ "value": pred_clean,
317
+ "threshold": "< 0.08",
318
+ }
319
+ )
320
+ checks.append(
321
+ {
322
+ "name": "Prediction drift should be high for drifted",
323
+ "pass": bool(pred_drift > 0.10),
324
+ "value": pred_drift,
325
+ "threshold": "> 0.10",
326
+ }
327
+ )
328
+
329
+ if test in {"feature", "pipeline", "suite"}:
330
+ top = c0["viz"]["top_drifted_features"]
331
+ mx = float(top[0]["ks_statistic"]) if top else 0.0
332
+ checks.append(
333
+ {
334
+ "name": "At least one feature should show strong shift",
335
+ "pass": bool(mx > 0.20),
336
+ "value": mx,
337
+ "threshold": "> 0.20",
338
+ }
339
+ )
340
+
341
+ if test in {"pipeline", "suite"}:
342
+ checks.append(
343
+ {
344
+ "name": "Health should degrade under drift",
345
+ "pass": bool(h_drift < h_clean),
346
+ "value": {"clean": h_clean, "drifted": h_drift},
347
+ "threshold": "drifted < clean",
348
+ }
349
+ )
350
+
351
+ # --- CONCEPT DRIFT CHECKS ADDED HERE ---
352
+ if test == "concept":
353
+ top = c0["viz"]["top_drifted_features"]
354
+ mx = float(top[0]["ks_statistic"]) if top else 0.0
355
+ checks.append(
356
+ {
357
+ "name": "Feature drift should be ZERO (Inputs didn't change)",
358
+ "pass": bool(mx < 0.05),
359
+ "value": mx,
360
+ "threshold": "< 0.05",
361
+ }
362
+ )
363
+ checks.append(
364
+ {
365
+ "name": "Prediction drift should be MASSIVE (Concept flipped)",
366
+ "pass": bool(pred_drift > 0.30),
367
+ "value": pred_drift,
368
+ "threshold": "> 0.30",
369
+ }
370
+ )
371
+ # ---------------------------------------
372
+
373
+ ok = all(bool(x.get("pass")) for x in checks) if checks else True
374
+
375
+ payload = {
376
+ "ok": ok,
377
+ "test": test,
378
+ "seed": int(seed),
379
+ "started_at": time.strftime("%Y-%m-%d %H:%M:%S"),
380
+ "elapsed_ms": int((time.time() - t0) * 1000),
381
+ "cases": len(cases),
382
+ "passed": int(sum(1 for x in checks if x.get("pass"))),
383
+ "failed": int(sum(1 for x in checks if not x.get("pass"))),
384
+ "summary": cases[0]["metrics"] if cases else {},
385
+ "checks": checks,
386
+ "case_results": cases,
387
+ }
388
+ return payload
389
+
390
+ except Exception as e:
391
+ import traceback
392
+ return {
393
+ "ok": False,
394
+ "test": test,
395
+ "seed": int(seed),
396
+ "error": str(e),
397
+ "trace": traceback.format_exc(),
398
+ }
@@ -0,0 +1,3 @@
1
+ """
2
+ Storage modules for ModelShift-Lite.
3
+ """
@@ -0,0 +1,13 @@
1
+ class SQLiteStore:
2
+ """
3
+ Simple SQLite storage interface for drift metrics.
4
+ """
5
+
6
+ def __init__(self, db_path="modelshift.db"):
7
+ self.db_path = db_path
8
+
9
+ def connect(self):
10
+ pass
11
+
12
+ def save_metrics(self, metrics):
13
+ pass
@@ -0,0 +1,3 @@
1
+ """
2
+ Utility functions.
3
+ """
@@ -0,0 +1,5 @@
1
+ def validate_inputs(data):
2
+ """
3
+ Validate input data format.
4
+ """
5
+ pass
@@ -0,0 +1,129 @@
1
+ Metadata-Version: 2.4
2
+ Name: modelshift
3
+ Version: 0.1.0
4
+ Summary: A lightweight machine learning drift monitoring and alerting engine.
5
+ Author: Krishna
6
+ Author-email: ryomensukuna2530@gmail.com
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: Operating System :: OS Independent
9
+ Requires-Python: >=3.8
10
+ Description-Content-Type: text/markdown
11
+ Requires-Dist: pandas
12
+ Requires-Dist: numpy
13
+ Requires-Dist: scipy
14
+ Requires-Dist: requests
15
+ Dynamic: author
16
+ Dynamic: author-email
17
+ Dynamic: classifier
18
+ Dynamic: description
19
+ Dynamic: description-content-type
20
+ Dynamic: requires-dist
21
+ Dynamic: requires-python
22
+ Dynamic: summary
23
+
24
+ # 🚦 ModelShift-Lite
25
+ ### Label-Free Monitoring for Deployed Machine Learning Models
26
+
27
+ > A lightweight, behavior-centric system to detect **silent reliability degradation** in deployed machine learning models β€” without requiring ground-truth labels.
28
+
29
+ ---
30
+
31
+ ## πŸ“Œ Why ModelShift-Lite?
32
+
33
+ Machine learning models rarely fail loudly after deployment.
34
+ Instead, they **silently degrade** as real-world data changes β€” while true labels are unavailable for continuous evaluation.
35
+
36
+ **ModelShift-Lite addresses this blind spot.**
37
+
38
+ ---
39
+
40
+ ## 🧩 Problem Statement
41
+
42
+ Deployed machine learning models often degrade silently over time due to changing data distributions, while ground-truth labels are unavailable for continuous performance evaluation.
43
+
44
+ ---
45
+
46
+ ## 🎯 Project Objective
47
+
48
+ Design a **label-free, post-deployment monitoring system** that tracks:
49
+
50
+ - Data distribution shifts
51
+ - Prediction behavior instability
52
+ - Model reliability trends
53
+
54
+ to provide **early warning signals** of degradation **without modifying the deployed model**.
55
+
56
+ ---
57
+
58
+ ## 🚫 What This Project Does *Not* Do
59
+
60
+ To maintain clarity of scope, ModelShift-Lite explicitly does **not**:
61
+
62
+ - ❌ Retrain models
63
+ - ❌ Correct predictions
64
+ - ❌ Compute accuracy on production data
65
+
66
+ It focuses solely on **monitoring and interpretability**.
67
+
68
+ ---
69
+
70
+ ## 🧠 Core Idea (In Simple Terms)
71
+
72
+ > *If we cannot measure correctness, we can still monitor behavior.*
73
+
74
+ ModelShift-Lite observes how a model **reacts** to changing data and identifies signs of instability before failures become obvious.
75
+
76
+ ---
77
+
78
+ ## πŸ› οΈ Key Components
79
+
80
+ - **Reference Baseline Handling**
81
+ Captures normal model behavior from historical or validation data
82
+
83
+ - **Live Inference Monitoring**
84
+ Tracks incoming production data and predictions
85
+
86
+ - **Feature Drift Detection**
87
+ Identifies changes in input distributions
88
+
89
+ - **Prediction Behavior Analysis**
90
+ Monitors confidence, stability, and output distribution shifts
91
+
92
+ - **Model Health Scoring**
93
+ Aggregates drift signals into an interpretable reliability indicator
94
+
95
+ - **Visualization Dashboard**
96
+ Displays trends, drift severity, and degradation warnings
97
+
98
+ ---
99
+ Reference Data β†’
100
+ β†’ Drift Detection β†’ Health Scoring β†’ Monitoring Dashboard
101
+ Live Inference β†’
102
+
103
+
104
+ *(Detailed architecture diagrams are provided in `/docs`)*
105
+
106
+ ---
107
+
108
+ ## πŸ’» Technology Stack
109
+
110
+ - **Language:** Python
111
+ - **Data Processing:** NumPy, Pandas
112
+ - **Statistical Analysis:** SciPy
113
+ - **Visualization:** Streamlit, Matplotlib
114
+ - **Storage:** SQLite (local, replaceable)
115
+
116
+ ---
117
+
118
+ ## πŸ“‚ Repository Structure
119
+
120
+ ```text
121
+ modelshift-lite/
122
+ β”œβ”€β”€ modelshift/ # Core monitoring logic
123
+ β”œβ”€β”€ dashboard/ # Streamlit visualization app
124
+ β”œβ”€β”€ experiments/ # Drift simulation & analysis
125
+ β”œβ”€β”€ data/ # Reference & live data
126
+ β”œβ”€β”€ docs/ # Architecture and design docs
127
+ └── README.md
128
+ ## πŸ—οΈ High-Level Architecture
129
+
@@ -0,0 +1,16 @@
1
+ modelshift/__init__.py,sha256=_I2AIeZgUPSI1iAoeqE0BW3Lvdx1HNp4kFUt59OnWTs,51
2
+ modelshift/baseline.py,sha256=gdk_dMUc3bq7AigxBOIEW2PGSLpZw8twzMdeQcv65qg,1038
3
+ modelshift/monitor.py,sha256=Kt7c7jv48auJ2cGnGdSo01G5y8iI-ve0bgaSKXdAO1w,12421
4
+ modelshift/selftest.py,sha256=EdgQd-IROhYagll4QTJ83uRBTvBhtrahrxLa_nuJl9M,14274
5
+ modelshift/drift/__init__.py,sha256=TEkGw_vO--HzOCqu4i014te5Vru7yghgeJh2cJjle_Y,36
6
+ modelshift/drift/feature_drift.py,sha256=nU6auZWatpdjgdleU1ZIms2tGE-4iUfs4skv1VdOnME,1413
7
+ modelshift/drift/prediction_drift.py,sha256=v_vNA3JLPmUU4_f2vi05FVzpNCEWu1we8pXG5BGL1qA,3842
8
+ modelshift/drift/severity.py,sha256=OOEzoisoPLjvFTvU0O6wwK1-GTA-KuUyAWxmP4ihTD0,7878
9
+ modelshift/storage/__init__.py,sha256=N-Ydih1vXsRhj6pkXgkaFesGM_4CrnPlEDWyteRAxoA,48
10
+ modelshift/storage/sqlite_store.py,sha256=TD0aMI5IEBI9zQr8OpJOY-H9i_urURTR5UkCzSd-dJE,272
11
+ modelshift/utils/__init__.py,sha256=NBECA9wFkkwn4-O2NJsDwuZB_SciP_pkl5AhfWGe6tE,30
12
+ modelshift/utils/helpers.py,sha256=NGLv7R82B7xnquKLZw-RhChQVQnfrVCuNund4f5tle8,89
13
+ modelshift-0.1.0.dist-info/METADATA,sha256=obDrdvG6gYhRI0Vdx45WKNSIZXFMpLbhXFyUcZi4XoQ,3688
14
+ modelshift-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
15
+ modelshift-0.1.0.dist-info/top_level.txt,sha256=3d2NcfPXrOeovneJake07097D7ZHHjKHkykxCDbFoe4,11
16
+ modelshift-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ modelshift