modelshift 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- modelshift/__init__.py +3 -0
- modelshift/baseline.py +37 -0
- modelshift/drift/__init__.py +3 -0
- modelshift/drift/feature_drift.py +50 -0
- modelshift/drift/prediction_drift.py +111 -0
- modelshift/drift/severity.py +239 -0
- modelshift/monitor.py +317 -0
- modelshift/selftest.py +398 -0
- modelshift/storage/__init__.py +3 -0
- modelshift/storage/sqlite_store.py +13 -0
- modelshift/utils/__init__.py +3 -0
- modelshift/utils/helpers.py +5 -0
- modelshift-0.1.0.dist-info/METADATA +129 -0
- modelshift-0.1.0.dist-info/RECORD +16 -0
- modelshift-0.1.0.dist-info/WHEEL +5 -0
- modelshift-0.1.0.dist-info/top_level.txt +1 -0
modelshift/__init__.py
ADDED
modelshift/baseline.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class BaselineWindow:
|
|
5
|
+
"""
|
|
6
|
+
Stores and manages reference baseline data
|
|
7
|
+
representing normal model behavior.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
def __init__(self, data: pd.DataFrame):
|
|
11
|
+
self._validate(data)
|
|
12
|
+
self.data = data.copy()
|
|
13
|
+
self.feature_names = list(data.columns)
|
|
14
|
+
self.num_samples = len(data)
|
|
15
|
+
|
|
16
|
+
def _validate(self, data):
|
|
17
|
+
if not isinstance(data, pd.DataFrame):
|
|
18
|
+
raise TypeError("Baseline data must be a pandas DataFrame")
|
|
19
|
+
|
|
20
|
+
if data.empty:
|
|
21
|
+
raise ValueError("Baseline data cannot be empty")
|
|
22
|
+
|
|
23
|
+
def get_data(self) -> pd.DataFrame:
|
|
24
|
+
"""
|
|
25
|
+
Returns a copy of baseline data.
|
|
26
|
+
"""
|
|
27
|
+
return self.data.copy()
|
|
28
|
+
|
|
29
|
+
def summary(self) -> dict:
|
|
30
|
+
"""
|
|
31
|
+
Returns basic metadata about the baseline window.
|
|
32
|
+
"""
|
|
33
|
+
return {
|
|
34
|
+
"num_samples": self.num_samples,
|
|
35
|
+
"num_features": len(self.feature_names),
|
|
36
|
+
"feature_names": self.feature_names,
|
|
37
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
from scipy.stats import ks_2samp
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def compute_feature_drift(
|
|
6
|
+
baseline_data: pd.DataFrame,
|
|
7
|
+
live_data: pd.DataFrame
|
|
8
|
+
) -> dict:
|
|
9
|
+
"""
|
|
10
|
+
Compute feature-level drift using the KolmogorovβSmirnov test.
|
|
11
|
+
|
|
12
|
+
Returns a dictionary:
|
|
13
|
+
{
|
|
14
|
+
feature_name: {
|
|
15
|
+
"ks_statistic": float,
|
|
16
|
+
"p_value": float
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
_validate_inputs(baseline_data, live_data)
|
|
22
|
+
|
|
23
|
+
drift_results = {}
|
|
24
|
+
|
|
25
|
+
for feature in baseline_data.columns:
|
|
26
|
+
baseline_values = baseline_data[feature].dropna()
|
|
27
|
+
live_values = live_data[feature].dropna()
|
|
28
|
+
|
|
29
|
+
ks_stat, p_value = ks_2samp(baseline_values, live_values)
|
|
30
|
+
|
|
31
|
+
drift_results[feature] = {
|
|
32
|
+
"ks_statistic": float(ks_stat),
|
|
33
|
+
"p_value": float(p_value),
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
return drift_results
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _validate_inputs(baseline_data, live_data):
|
|
40
|
+
if not isinstance(baseline_data, pd.DataFrame):
|
|
41
|
+
raise TypeError("Baseline data must be a pandas DataFrame")
|
|
42
|
+
|
|
43
|
+
if not isinstance(live_data, pd.DataFrame):
|
|
44
|
+
raise TypeError("Live data must be a pandas DataFrame")
|
|
45
|
+
|
|
46
|
+
if baseline_data.empty or live_data.empty:
|
|
47
|
+
raise ValueError("Baseline and live data cannot be empty")
|
|
48
|
+
|
|
49
|
+
if list(baseline_data.columns) != list(live_data.columns):
|
|
50
|
+
raise ValueError("Baseline and live data must have identical features")
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
from scipy.stats import ks_2samp
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def compute_prediction_drift(
|
|
8
|
+
baseline_predictions: np.ndarray,
|
|
9
|
+
live_predictions: np.ndarray
|
|
10
|
+
) -> dict:
|
|
11
|
+
"""
|
|
12
|
+
Compute prediction behavior drift using:
|
|
13
|
+
1) KS-test on prediction probability distributions
|
|
14
|
+
2) Binary entropy change (mean entropy of predicted probabilities)
|
|
15
|
+
|
|
16
|
+
Notes:
|
|
17
|
+
- Expects 1D probability arrays (values in [0, 1]).
|
|
18
|
+
- Uses full binary entropy: -(p*log(p) + (1-p)*log(1-p))
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
baseline = _prepare_predictions("baseline", baseline_predictions)
|
|
22
|
+
live = _prepare_predictions("live", live_predictions)
|
|
23
|
+
|
|
24
|
+
# KS-test on prediction distributions
|
|
25
|
+
ks_stat, p_value = ks_2samp(baseline, live)
|
|
26
|
+
|
|
27
|
+
# Entropy analysis (full binary entropy)
|
|
28
|
+
baseline_entropy = _binary_entropy_mean(baseline)
|
|
29
|
+
live_entropy = _binary_entropy_mean(live)
|
|
30
|
+
entropy_change = live_entropy - baseline_entropy
|
|
31
|
+
|
|
32
|
+
# Lightweight shape/center diagnostics (useful for dashboards/reports)
|
|
33
|
+
baseline_mean = float(np.mean(baseline))
|
|
34
|
+
live_mean = float(np.mean(live))
|
|
35
|
+
baseline_std = float(np.std(baseline))
|
|
36
|
+
live_std = float(np.std(live))
|
|
37
|
+
baseline_median = float(np.median(baseline))
|
|
38
|
+
live_median = float(np.median(live))
|
|
39
|
+
|
|
40
|
+
return {
|
|
41
|
+
"ks_statistic": float(ks_stat),
|
|
42
|
+
"p_value": float(p_value),
|
|
43
|
+
|
|
44
|
+
"baseline_entropy": round(float(baseline_entropy), 6),
|
|
45
|
+
"live_entropy": round(float(live_entropy), 6),
|
|
46
|
+
"entropy_change": round(float(entropy_change), 6),
|
|
47
|
+
"abs_entropy_change": round(float(abs(entropy_change)), 6),
|
|
48
|
+
|
|
49
|
+
"baseline_mean_prob": round(baseline_mean, 6),
|
|
50
|
+
"live_mean_prob": round(live_mean, 6),
|
|
51
|
+
"mean_prob_shift": round(float(live_mean - baseline_mean), 6),
|
|
52
|
+
|
|
53
|
+
"baseline_median_prob": round(baseline_median, 6),
|
|
54
|
+
"live_median_prob": round(live_median, 6),
|
|
55
|
+
"median_prob_shift": round(float(live_median - baseline_median), 6),
|
|
56
|
+
|
|
57
|
+
"baseline_std_prob": round(baseline_std, 6),
|
|
58
|
+
"live_std_prob": round(live_std, 6),
|
|
59
|
+
"std_prob_shift": round(float(live_std - baseline_std), 6),
|
|
60
|
+
|
|
61
|
+
"n_baseline": int(baseline.size),
|
|
62
|
+
"n_live": int(live.size),
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _binary_entropy_mean(preds: np.ndarray) -> float:
|
|
67
|
+
"""
|
|
68
|
+
Mean binary entropy for probability predictions:
|
|
69
|
+
H(p) = -(p*log(p) + (1-p)*log(1-p))
|
|
70
|
+
|
|
71
|
+
Uses natural log (nats).
|
|
72
|
+
"""
|
|
73
|
+
eps = 1e-9
|
|
74
|
+
p = np.clip(preds.astype(float), eps, 1.0 - eps)
|
|
75
|
+
entropy = -(p * np.log(p) + (1.0 - p) * np.log(1.0 - p))
|
|
76
|
+
return float(np.mean(entropy))
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _prepare_predictions(name: str, arr) -> np.ndarray:
|
|
80
|
+
"""
|
|
81
|
+
Validate and normalize prediction arrays to a clean 1D float numpy array.
|
|
82
|
+
"""
|
|
83
|
+
if arr is None:
|
|
84
|
+
raise ValueError(f"{name.capitalize()} predictions cannot be None")
|
|
85
|
+
|
|
86
|
+
if not isinstance(arr, np.ndarray):
|
|
87
|
+
# Allow lists/Series while staying user-friendly
|
|
88
|
+
try:
|
|
89
|
+
arr = np.asarray(arr, dtype=float)
|
|
90
|
+
except Exception as exc:
|
|
91
|
+
raise TypeError(
|
|
92
|
+
f"{name.capitalize()} predictions must be a numpy array or array-like of numeric values"
|
|
93
|
+
) from exc
|
|
94
|
+
else:
|
|
95
|
+
arr = arr.astype(float, copy=False)
|
|
96
|
+
|
|
97
|
+
arr = np.ravel(arr)
|
|
98
|
+
|
|
99
|
+
if arr.size == 0:
|
|
100
|
+
raise ValueError(f"{name.capitalize()} prediction array cannot be empty")
|
|
101
|
+
|
|
102
|
+
if not np.all(np.isfinite(arr)):
|
|
103
|
+
raise ValueError(f"{name.capitalize()} predictions contain NaN/Inf values")
|
|
104
|
+
|
|
105
|
+
# We treat these as probability predictions for entropy-based drift
|
|
106
|
+
if np.min(arr) < 0.0 or np.max(arr) > 1.0:
|
|
107
|
+
raise ValueError(
|
|
108
|
+
f"{name.capitalize()} predictions must be probability values in [0, 1]"
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
return arr
|
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, List, Optional
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
# ----------------------------
|
|
7
|
+
# Basic per-signal thresholds
|
|
8
|
+
# ----------------------------
|
|
9
|
+
FEATURE_KS_LOW = 0.10
|
|
10
|
+
FEATURE_KS_MEDIUM = 0.20
|
|
11
|
+
FEATURE_KS_HIGH = 0.35
|
|
12
|
+
|
|
13
|
+
PRED_KS_WARNING = 0.10
|
|
14
|
+
PRED_KS_CRITICAL = 0.15
|
|
15
|
+
|
|
16
|
+
ENTROPY_DELTA_WARNING = 0.01
|
|
17
|
+
ENTROPY_DELTA_CRITICAL = 0.02
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def classify_severity(ks_statistic: float) -> str:
|
|
21
|
+
"""
|
|
22
|
+
Classify severity from a KS-like drift signal.
|
|
23
|
+
|
|
24
|
+
Returns one of:
|
|
25
|
+
LOW / MEDIUM / HIGH / CRITICAL
|
|
26
|
+
"""
|
|
27
|
+
ks = _safe_float(ks_statistic, default=0.0)
|
|
28
|
+
if ks < FEATURE_KS_LOW:
|
|
29
|
+
return "LOW"
|
|
30
|
+
if ks < FEATURE_KS_MEDIUM:
|
|
31
|
+
return "MEDIUM"
|
|
32
|
+
if ks < FEATURE_KS_HIGH:
|
|
33
|
+
return "HIGH"
|
|
34
|
+
return "CRITICAL"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def compute_health_score(feature_drift_results: dict) -> float:
|
|
38
|
+
"""
|
|
39
|
+
Compute overall model health score (0β100) from feature drift.
|
|
40
|
+
Higher score = healthier model.
|
|
41
|
+
|
|
42
|
+
Uses average feature KS:
|
|
43
|
+
health = max(0, 100 * (1 - avg_ks))
|
|
44
|
+
"""
|
|
45
|
+
summary = summarize_feature_drift(feature_drift_results)
|
|
46
|
+
if summary["feature_count"] == 0:
|
|
47
|
+
raise ValueError("Feature drift results cannot be empty")
|
|
48
|
+
|
|
49
|
+
avg_ks = summary["avg_ks"]
|
|
50
|
+
health_score = max(0.0, 100.0 * (1.0 - avg_ks))
|
|
51
|
+
return round(float(health_score), 2)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def summarize_feature_drift(feature_drift_results: Optional[dict]) -> Dict[str, Any]:
|
|
55
|
+
"""
|
|
56
|
+
Extract feature drift summary stats from feature_drift_results.
|
|
57
|
+
Safe against missing/malformed values.
|
|
58
|
+
"""
|
|
59
|
+
if not isinstance(feature_drift_results, dict):
|
|
60
|
+
return {
|
|
61
|
+
"feature_count": 0,
|
|
62
|
+
"avg_ks": 0.0,
|
|
63
|
+
"max_ks": 0.0,
|
|
64
|
+
"max_feature": None,
|
|
65
|
+
"ks_values": [],
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
ks_pairs: List[tuple[str, float]] = []
|
|
69
|
+
for feature, values in feature_drift_results.items():
|
|
70
|
+
if not isinstance(values, dict):
|
|
71
|
+
continue
|
|
72
|
+
ks = _safe_float(values.get("ks_statistic"), default=None)
|
|
73
|
+
if ks is None:
|
|
74
|
+
continue
|
|
75
|
+
ks_pairs.append((str(feature), ks))
|
|
76
|
+
|
|
77
|
+
if not ks_pairs:
|
|
78
|
+
return {
|
|
79
|
+
"feature_count": 0,
|
|
80
|
+
"avg_ks": 0.0,
|
|
81
|
+
"max_ks": 0.0,
|
|
82
|
+
"max_feature": None,
|
|
83
|
+
"ks_values": [],
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
ks_values = [ks for _, ks in ks_pairs]
|
|
87
|
+
max_feature, max_ks = max(ks_pairs, key=lambda x: x[1])
|
|
88
|
+
|
|
89
|
+
return {
|
|
90
|
+
"feature_count": len(ks_values),
|
|
91
|
+
"avg_ks": round(sum(ks_values) / len(ks_values), 6),
|
|
92
|
+
"max_ks": round(float(max_ks), 6),
|
|
93
|
+
"max_feature": max_feature,
|
|
94
|
+
"ks_values": [round(float(v), 6) for v in ks_values],
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def classify_drift_taxonomy(
|
|
99
|
+
feature_drift_results: Optional[dict] = None,
|
|
100
|
+
prediction_drift_results: Optional[dict] = None,
|
|
101
|
+
) -> str:
|
|
102
|
+
"""
|
|
103
|
+
Agreement/disagreement taxonomy between feature drift and prediction drift.
|
|
104
|
+
|
|
105
|
+
Returns one of:
|
|
106
|
+
STABLE
|
|
107
|
+
ROBUST_SHIFT (feature drift high, prediction drift low)
|
|
108
|
+
SILENT_BEHAVIOR_DRIFT (feature drift low, prediction drift high)
|
|
109
|
+
DEGRADING_DRIFT (both high)
|
|
110
|
+
"""
|
|
111
|
+
f_summary = summarize_feature_drift(feature_drift_results)
|
|
112
|
+
max_feature_ks = _safe_float(f_summary.get("max_ks"), default=0.0)
|
|
113
|
+
|
|
114
|
+
pred_ks = 0.0
|
|
115
|
+
if isinstance(prediction_drift_results, dict):
|
|
116
|
+
pred_ks = _safe_float(prediction_drift_results.get("ks_statistic"), default=0.0)
|
|
117
|
+
|
|
118
|
+
feature_high = max_feature_ks >= FEATURE_KS_MEDIUM
|
|
119
|
+
pred_high = pred_ks >= PRED_KS_WARNING
|
|
120
|
+
|
|
121
|
+
if not feature_high and not pred_high:
|
|
122
|
+
return "STABLE"
|
|
123
|
+
if feature_high and not pred_high:
|
|
124
|
+
return "ROBUST_SHIFT"
|
|
125
|
+
if (not feature_high) and pred_high:
|
|
126
|
+
return "SILENT_BEHAVIOR_DRIFT"
|
|
127
|
+
return "DEGRADING_DRIFT"
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def evaluate_drift_state(
|
|
131
|
+
feature_drift_results: Optional[dict] = None,
|
|
132
|
+
prediction_drift_results: Optional[dict] = None,
|
|
133
|
+
) -> Dict[str, Any]:
|
|
134
|
+
"""
|
|
135
|
+
Composite severity + status decision engine.
|
|
136
|
+
|
|
137
|
+
Combines:
|
|
138
|
+
- avg feature KS
|
|
139
|
+
- max feature KS
|
|
140
|
+
- prediction KS
|
|
141
|
+
- entropy delta
|
|
142
|
+
|
|
143
|
+
Returns a normalized decision payload with:
|
|
144
|
+
severity, status, taxonomy, health_score, signals, thresholds
|
|
145
|
+
"""
|
|
146
|
+
f_summary = summarize_feature_drift(feature_drift_results)
|
|
147
|
+
|
|
148
|
+
avg_feature_ks = _safe_float(f_summary.get("avg_ks"), default=0.0)
|
|
149
|
+
max_feature_ks = _safe_float(f_summary.get("max_ks"), default=0.0)
|
|
150
|
+
pred_ks = 0.0
|
|
151
|
+
entropy_change = 0.0
|
|
152
|
+
|
|
153
|
+
if isinstance(prediction_drift_results, dict):
|
|
154
|
+
pred_ks = _safe_float(prediction_drift_results.get("ks_statistic"), default=0.0)
|
|
155
|
+
entropy_change = _safe_float(prediction_drift_results.get("entropy_change"), default=0.0)
|
|
156
|
+
|
|
157
|
+
# Normalize signals into 0..1 severity components
|
|
158
|
+
# (Threshold denominators chosen to align with your current observed ranges.)
|
|
159
|
+
avg_comp = min(1.0, avg_feature_ks / FEATURE_KS_MEDIUM) # avg drift
|
|
160
|
+
max_comp = min(1.0, max_feature_ks / FEATURE_KS_HIGH) # worst feature
|
|
161
|
+
pred_comp = min(1.0, pred_ks / PRED_KS_CRITICAL) # behavior drift
|
|
162
|
+
ent_comp = min(1.0, abs(entropy_change) / ENTROPY_DELTA_CRITICAL) # confidence shift
|
|
163
|
+
|
|
164
|
+
# Weighted composite score [0,1]
|
|
165
|
+
composite_score = (
|
|
166
|
+
0.30 * avg_comp +
|
|
167
|
+
0.25 * max_comp +
|
|
168
|
+
0.35 * pred_comp +
|
|
169
|
+
0.10 * ent_comp
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
severity = _classify_composite_severity(composite_score)
|
|
173
|
+
|
|
174
|
+
# Status favors prediction drift a bit more (behavior-centric monitoring)
|
|
175
|
+
if pred_ks >= PRED_KS_CRITICAL or max_feature_ks >= FEATURE_KS_HIGH:
|
|
176
|
+
status = "CRITICAL_DRIFT"
|
|
177
|
+
elif pred_ks >= PRED_KS_WARNING or max_feature_ks >= FEATURE_KS_MEDIUM or avg_feature_ks >= FEATURE_KS_LOW:
|
|
178
|
+
status = "WARNING_DRIFT"
|
|
179
|
+
else:
|
|
180
|
+
status = "STABLE"
|
|
181
|
+
|
|
182
|
+
taxonomy = classify_drift_taxonomy(feature_drift_results, prediction_drift_results)
|
|
183
|
+
|
|
184
|
+
# Health score is only meaningful if feature drift exists
|
|
185
|
+
health_score = None
|
|
186
|
+
if f_summary["feature_count"] > 0:
|
|
187
|
+
health_score = compute_health_score(feature_drift_results)
|
|
188
|
+
|
|
189
|
+
return {
|
|
190
|
+
"severity": severity,
|
|
191
|
+
"status": status,
|
|
192
|
+
"taxonomy": taxonomy,
|
|
193
|
+
"health_score": health_score,
|
|
194
|
+
"signals": {
|
|
195
|
+
"avg_feature_ks": round(avg_feature_ks, 6),
|
|
196
|
+
"max_feature_ks": round(max_feature_ks, 6),
|
|
197
|
+
"max_feature_name": f_summary.get("max_feature"),
|
|
198
|
+
"prediction_ks": round(pred_ks, 6),
|
|
199
|
+
"entropy_change": round(entropy_change, 6),
|
|
200
|
+
"composite_score": round(float(composite_score), 6),
|
|
201
|
+
"feature_count": int(f_summary.get("feature_count", 0)),
|
|
202
|
+
},
|
|
203
|
+
"thresholds": {
|
|
204
|
+
"feature_ks_low": FEATURE_KS_LOW,
|
|
205
|
+
"feature_ks_medium": FEATURE_KS_MEDIUM,
|
|
206
|
+
"feature_ks_high": FEATURE_KS_HIGH,
|
|
207
|
+
"pred_ks_warning": PRED_KS_WARNING,
|
|
208
|
+
"pred_ks_critical": PRED_KS_CRITICAL,
|
|
209
|
+
"entropy_delta_warning": ENTROPY_DELTA_WARNING,
|
|
210
|
+
"entropy_delta_critical": ENTROPY_DELTA_CRITICAL,
|
|
211
|
+
},
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
# ----------------------------
|
|
216
|
+
# Internal helpers
|
|
217
|
+
# ----------------------------
|
|
218
|
+
def _classify_composite_severity(score: float) -> str:
|
|
219
|
+
"""
|
|
220
|
+
Composite score is already normalized 0..1.
|
|
221
|
+
"""
|
|
222
|
+
s = max(0.0, min(1.0, _safe_float(score, default=0.0)))
|
|
223
|
+
|
|
224
|
+
if s < 0.20:
|
|
225
|
+
return "LOW"
|
|
226
|
+
if s < 0.45:
|
|
227
|
+
return "MEDIUM"
|
|
228
|
+
if s < 0.70:
|
|
229
|
+
return "HIGH"
|
|
230
|
+
return "CRITICAL"
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def _safe_float(value: Any, default: Optional[float] = 0.0) -> Optional[float]:
|
|
234
|
+
try:
|
|
235
|
+
if value is None:
|
|
236
|
+
return default
|
|
237
|
+
return float(value)
|
|
238
|
+
except (TypeError, ValueError):
|
|
239
|
+
return default
|
modelshift/monitor.py
ADDED
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import uuid
|
|
4
|
+
import requests
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from typing import Any, Dict, List, Optional
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
import pandas as pd
|
|
10
|
+
|
|
11
|
+
from modelshift.baseline import BaselineWindow
|
|
12
|
+
from modelshift.drift.feature_drift import compute_feature_drift
|
|
13
|
+
from modelshift.drift.prediction_drift import compute_prediction_drift
|
|
14
|
+
from modelshift.drift.severity import (
|
|
15
|
+
classify_severity,
|
|
16
|
+
compute_health_score,
|
|
17
|
+
evaluate_drift_state,
|
|
18
|
+
summarize_feature_drift,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
# -------------------------------------------------------------------
|
|
22
|
+
# Phase 2: Cloud SDK Configuration
|
|
23
|
+
# -------------------------------------------------------------------
|
|
24
|
+
_CLOUD_CONFIG = {
|
|
25
|
+
"api_key": None,
|
|
26
|
+
"endpoint": "http://127.0.0.1:8000/api/v1/track"
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
def init(api_key: str, endpoint: str = "http://127.0.0.1:8000/api/v1/track"):
|
|
30
|
+
"""
|
|
31
|
+
Initialize the ModelShift-Lite SDK with your cloud API Key.
|
|
32
|
+
This links your local ML models to your cloud dashboard.
|
|
33
|
+
"""
|
|
34
|
+
_CLOUD_CONFIG["api_key"] = api_key
|
|
35
|
+
_CLOUD_CONFIG["endpoint"] = endpoint
|
|
36
|
+
print(f"[β] ModelShift SDK Authenticated. Cloud sync enabled.")
|
|
37
|
+
|
|
38
|
+
# -------------------------------------------------------------------
|
|
39
|
+
# Core Engine
|
|
40
|
+
# -------------------------------------------------------------------
|
|
41
|
+
class ModelMonitor:
|
|
42
|
+
"""
|
|
43
|
+
Main interface for ModelShift-Lite monitoring.
|
|
44
|
+
|
|
45
|
+
Handles:
|
|
46
|
+
- baseline/live feature drift
|
|
47
|
+
- baseline/live prediction drift
|
|
48
|
+
- composite status/severity/taxonomy summary
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def __init__(self, reference_data: pd.DataFrame):
|
|
52
|
+
"""
|
|
53
|
+
Initialize monitor with reference baseline data.
|
|
54
|
+
"""
|
|
55
|
+
if not isinstance(reference_data, pd.DataFrame):
|
|
56
|
+
raise TypeError("Reference data must be a pandas DataFrame")
|
|
57
|
+
if reference_data.empty:
|
|
58
|
+
raise ValueError("Reference data cannot be empty")
|
|
59
|
+
|
|
60
|
+
self.baseline = BaselineWindow(reference_data.copy())
|
|
61
|
+
|
|
62
|
+
# Data containers
|
|
63
|
+
self.live_data: Optional[pd.DataFrame] = None
|
|
64
|
+
|
|
65
|
+
# Feature drift
|
|
66
|
+
self.feature_drift_results: Optional[Dict[str, Any]] = None
|
|
67
|
+
|
|
68
|
+
# Prediction drift
|
|
69
|
+
self.baseline_predictions: Optional[np.ndarray] = None
|
|
70
|
+
self.live_predictions: Optional[np.ndarray] = None
|
|
71
|
+
self.prediction_drift_results: Optional[Dict[str, Any]] = None
|
|
72
|
+
|
|
73
|
+
# -----------------------
|
|
74
|
+
# Data Update
|
|
75
|
+
# -----------------------
|
|
76
|
+
def update(self, live_data: pd.DataFrame):
|
|
77
|
+
"""
|
|
78
|
+
Update monitor with new live data.
|
|
79
|
+
Enforces same columns as baseline (reordered if needed).
|
|
80
|
+
"""
|
|
81
|
+
if not isinstance(live_data, pd.DataFrame):
|
|
82
|
+
raise TypeError("Live data must be a pandas DataFrame")
|
|
83
|
+
if live_data.empty:
|
|
84
|
+
raise ValueError("Live data cannot be empty")
|
|
85
|
+
|
|
86
|
+
baseline_df = self.baseline.get_data()
|
|
87
|
+
baseline_cols = list(baseline_df.columns)
|
|
88
|
+
live_cols = list(live_data.columns)
|
|
89
|
+
|
|
90
|
+
if set(live_cols) != set(baseline_cols):
|
|
91
|
+
missing = [c for c in baseline_cols if c not in live_cols]
|
|
92
|
+
extra = [c for c in live_cols if c not in baseline_cols]
|
|
93
|
+
raise ValueError(
|
|
94
|
+
f"Live data columns must match baseline columns. Missing={missing}, Extra={extra}"
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
# Reorder to baseline column order for deterministic behavior
|
|
98
|
+
self.live_data = live_data[baseline_cols].copy()
|
|
99
|
+
|
|
100
|
+
# -----------------------
|
|
101
|
+
# Feature Drift
|
|
102
|
+
# -----------------------
|
|
103
|
+
def compute_feature_drift(self) -> dict:
|
|
104
|
+
"""
|
|
105
|
+
Compute feature-level drift between baseline and live data.
|
|
106
|
+
"""
|
|
107
|
+
if self.live_data is None:
|
|
108
|
+
raise RuntimeError("Live data not set. Call update() first.")
|
|
109
|
+
|
|
110
|
+
self.feature_drift_results = compute_feature_drift(
|
|
111
|
+
self.baseline.get_data(),
|
|
112
|
+
self.live_data
|
|
113
|
+
)
|
|
114
|
+
return self.feature_drift_results
|
|
115
|
+
|
|
116
|
+
def get_latest_feature_drift(self) -> dict:
|
|
117
|
+
if self.feature_drift_results is None:
|
|
118
|
+
raise RuntimeError("No feature drift computed yet.")
|
|
119
|
+
return self.feature_drift_results
|
|
120
|
+
|
|
121
|
+
def get_feature_severity(self) -> dict:
|
|
122
|
+
if self.feature_drift_results is None:
|
|
123
|
+
raise RuntimeError("No feature drift computed yet.")
|
|
124
|
+
|
|
125
|
+
severity = {}
|
|
126
|
+
for feature, values in self.feature_drift_results.items():
|
|
127
|
+
if not isinstance(values, dict):
|
|
128
|
+
continue
|
|
129
|
+
severity[feature] = classify_severity(values.get("ks_statistic", 0.0))
|
|
130
|
+
|
|
131
|
+
return severity
|
|
132
|
+
|
|
133
|
+
def get_model_health_score(self) -> float:
|
|
134
|
+
if self.feature_drift_results is None:
|
|
135
|
+
raise RuntimeError("No feature drift computed yet.")
|
|
136
|
+
return compute_health_score(self.feature_drift_results)
|
|
137
|
+
|
|
138
|
+
def get_top_drifted_features(self, k: int = 5) -> List[Dict[str, Any]]:
|
|
139
|
+
if self.feature_drift_results is None:
|
|
140
|
+
raise RuntimeError("No feature drift computed yet.")
|
|
141
|
+
if not isinstance(k, int) or k <= 0:
|
|
142
|
+
raise ValueError("k must be a positive integer")
|
|
143
|
+
|
|
144
|
+
rows: List[Dict[str, Any]] = []
|
|
145
|
+
for feature, values in self.feature_drift_results.items():
|
|
146
|
+
if not isinstance(values, dict):
|
|
147
|
+
continue
|
|
148
|
+
ks = _safe_float(values.get("ks_statistic"), 0.0)
|
|
149
|
+
pv = _safe_float(values.get("p_value"), None)
|
|
150
|
+
rows.append({
|
|
151
|
+
"feature": str(feature),
|
|
152
|
+
"ks_statistic": round(ks, 6),
|
|
153
|
+
"p_value": None if pv is None else round(pv, 6),
|
|
154
|
+
"severity": classify_severity(ks),
|
|
155
|
+
})
|
|
156
|
+
|
|
157
|
+
rows.sort(key=lambda x: x["ks_statistic"], reverse=True)
|
|
158
|
+
return rows[:k]
|
|
159
|
+
|
|
160
|
+
def get_most_drifted_feature(self) -> Optional[Dict[str, Any]]:
|
|
161
|
+
top = self.get_top_drifted_features(k=1)
|
|
162
|
+
return top[0] if top else None
|
|
163
|
+
|
|
164
|
+
# -----------------------
|
|
165
|
+
# Prediction Drift
|
|
166
|
+
# -----------------------
|
|
167
|
+
def set_baseline_predictions(self, predictions):
|
|
168
|
+
self.baseline_predictions = _prepare_prediction_array(predictions, "baseline")
|
|
169
|
+
|
|
170
|
+
def update_predictions(self, live_predictions):
|
|
171
|
+
self.live_predictions = _prepare_prediction_array(live_predictions, "live")
|
|
172
|
+
|
|
173
|
+
def compute_prediction_drift(self) -> dict:
|
|
174
|
+
if self.baseline_predictions is None:
|
|
175
|
+
raise RuntimeError("Baseline predictions not set.")
|
|
176
|
+
if self.live_predictions is None:
|
|
177
|
+
raise RuntimeError("Live predictions not set.")
|
|
178
|
+
|
|
179
|
+
self.prediction_drift_results = compute_prediction_drift(
|
|
180
|
+
self.baseline_predictions,
|
|
181
|
+
self.live_predictions
|
|
182
|
+
)
|
|
183
|
+
return self.prediction_drift_results
|
|
184
|
+
|
|
185
|
+
def get_latest_prediction_drift(self) -> dict:
|
|
186
|
+
if self.prediction_drift_results is None:
|
|
187
|
+
raise RuntimeError("No prediction drift computed yet.")
|
|
188
|
+
return self.prediction_drift_results
|
|
189
|
+
|
|
190
|
+
# -----------------------
|
|
191
|
+
# Composite Summary
|
|
192
|
+
# -----------------------
|
|
193
|
+
def evaluate_health(self) -> Dict[str, Any]:
|
|
194
|
+
if self.feature_drift_results is None:
|
|
195
|
+
raise RuntimeError("No feature drift computed yet.")
|
|
196
|
+
if self.prediction_drift_results is None:
|
|
197
|
+
raise RuntimeError("No prediction drift computed yet.")
|
|
198
|
+
|
|
199
|
+
decision = evaluate_drift_state(
|
|
200
|
+
feature_drift_results=self.feature_drift_results,
|
|
201
|
+
prediction_drift_results=self.prediction_drift_results,
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
feature_summary = summarize_feature_drift(self.feature_drift_results)
|
|
205
|
+
top_features = self.get_top_drifted_features(k=5)
|
|
206
|
+
most_feature = top_features[0] if top_features else None
|
|
207
|
+
|
|
208
|
+
return {
|
|
209
|
+
"status": decision.get("status"),
|
|
210
|
+
"severity": decision.get("severity"),
|
|
211
|
+
"taxonomy": decision.get("taxonomy"),
|
|
212
|
+
"health_score": decision.get("health_score"),
|
|
213
|
+
"feature_summary": feature_summary,
|
|
214
|
+
"prediction_drift": self.prediction_drift_results,
|
|
215
|
+
"top_drifted_features": top_features,
|
|
216
|
+
"most_drifted_feature": most_feature,
|
|
217
|
+
"signals": decision.get("signals", {}),
|
|
218
|
+
"thresholds": decision.get("thresholds", {}),
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
def build_snapshot(self) -> Dict[str, Any]:
|
|
222
|
+
snapshot: Dict[str, Any] = {
|
|
223
|
+
"feature_drift": self.feature_drift_results,
|
|
224
|
+
"prediction_drift": self.prediction_drift_results,
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
if self.feature_drift_results is not None:
|
|
228
|
+
snapshot["feature_severity"] = self.get_feature_severity()
|
|
229
|
+
snapshot["health_score"] = self.get_model_health_score()
|
|
230
|
+
snapshot["top_drifted_features"] = self.get_top_drifted_features(k=5)
|
|
231
|
+
snapshot["most_drifted_feature"] = self.get_most_drifted_feature()
|
|
232
|
+
|
|
233
|
+
if self.feature_drift_results is not None and self.prediction_drift_results is not None:
|
|
234
|
+
snapshot["decision"] = self.evaluate_health()
|
|
235
|
+
|
|
236
|
+
return snapshot
|
|
237
|
+
|
|
238
|
+
# -----------------------
|
|
239
|
+
# Phase 2: Cloud Sync Method
|
|
240
|
+
# -----------------------
|
|
241
|
+
def push(self) -> Optional[Dict[str, Any]]:
|
|
242
|
+
"""
|
|
243
|
+
Takes the local drift calculation and beams it securely to your FastAPI dashboard.
|
|
244
|
+
"""
|
|
245
|
+
if not _CLOUD_CONFIG["api_key"]:
|
|
246
|
+
print("[!] API Key Missing. Please add 'modelshift.init(api_key=\"YOUR_KEY\")' at the top of your script.")
|
|
247
|
+
return None
|
|
248
|
+
|
|
249
|
+
snapshot = self.build_snapshot()
|
|
250
|
+
decision = snapshot.get("decision", {})
|
|
251
|
+
|
|
252
|
+
mdf = snapshot.get("most_drifted_feature") or {}
|
|
253
|
+
pred_drift = snapshot.get("prediction_drift") or {}
|
|
254
|
+
|
|
255
|
+
# Package the data exactly how your dashboard expects it
|
|
256
|
+
run_id = f"run_{uuid.uuid4().hex[:8]}"
|
|
257
|
+
payload = {
|
|
258
|
+
"run_id": run_id,
|
|
259
|
+
"generated_at": datetime.now(timezone.utc).isoformat(),
|
|
260
|
+
"status": decision.get("status", "UNKNOWN"),
|
|
261
|
+
"window_size": len(self.live_data) if self.live_data is not None else 0,
|
|
262
|
+
|
|
263
|
+
# Dashboard Graph Metrics
|
|
264
|
+
"clean_health": 100.0,
|
|
265
|
+
"drifted_health": decision.get("health_score", 0.0),
|
|
266
|
+
"drifted_pred_ks": pred_drift.get("ks_statistic", 0.0),
|
|
267
|
+
"drifted_entropy_change": pred_drift.get("delta_entropy", 0.0),
|
|
268
|
+
"drifted_last_window_feature": mdf.get("feature"),
|
|
269
|
+
"drifted_last_window_ks": mdf.get("ks_statistic"),
|
|
270
|
+
|
|
271
|
+
"evaluation": snapshot
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
# The Security Checkpoint
|
|
275
|
+
headers = {
|
|
276
|
+
"Content-Type": "application/json",
|
|
277
|
+
"X-API-Key": _CLOUD_CONFIG["api_key"]
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
try:
|
|
281
|
+
print(f"[~] Beaming data to {_CLOUD_CONFIG['endpoint']}...")
|
|
282
|
+
response = requests.post(_CLOUD_CONFIG["endpoint"], json=payload, headers=headers)
|
|
283
|
+
response.raise_for_status()
|
|
284
|
+
print(f"[β] Successfully synced run '{run_id}' to ModelShift Cloud.")
|
|
285
|
+
return response.json()
|
|
286
|
+
except requests.exceptions.RequestException as e:
|
|
287
|
+
print(f"[!] ModelShift Cloud Sync Failed: {e}")
|
|
288
|
+
if hasattr(e, 'response') and e.response is not None:
|
|
289
|
+
print(f"[!] Server Context: {e.response.text}")
|
|
290
|
+
return None
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
# -----------------------
|
|
294
|
+
# Internal helpers
|
|
295
|
+
# -----------------------
|
|
296
|
+
def _prepare_prediction_array(values, name: str) -> np.ndarray:
|
|
297
|
+
if values is None:
|
|
298
|
+
raise ValueError(f"{name.capitalize()} predictions cannot be None")
|
|
299
|
+
try:
|
|
300
|
+
arr = np.asarray(values, dtype=float).reshape(-1)
|
|
301
|
+
except Exception as exc:
|
|
302
|
+
raise TypeError(f"{name.capitalize()} predictions must be numeric array-like") from exc
|
|
303
|
+
|
|
304
|
+
if arr.size == 0:
|
|
305
|
+
raise ValueError(f"{name.capitalize()} predictions cannot be empty")
|
|
306
|
+
if not np.all(np.isfinite(arr)):
|
|
307
|
+
raise ValueError(f"{name.capitalize()} predictions contain NaN/Inf")
|
|
308
|
+
return arr
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def _safe_float(value, default=None):
|
|
312
|
+
try:
|
|
313
|
+
if value is None:
|
|
314
|
+
return default
|
|
315
|
+
return float(value)
|
|
316
|
+
except (TypeError, ValueError):
|
|
317
|
+
return default
|
modelshift/selftest.py
ADDED
|
@@ -0,0 +1,398 @@
|
|
|
1
|
+
# selftest.py (repo root)
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
6
|
+
import math
|
|
7
|
+
import time
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
import pandas as pd
|
|
11
|
+
|
|
12
|
+
from modelshift.drift.feature_drift import compute_feature_drift
|
|
13
|
+
from modelshift.drift.prediction_drift import compute_prediction_drift
|
|
14
|
+
from modelshift.drift.severity import compute_health_score
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# -----------------------------
|
|
18
|
+
# Helpers (robust schema)
|
|
19
|
+
# -----------------------------
|
|
20
|
+
def _to_float(x: Any, default: float = 0.0) -> float:
|
|
21
|
+
try:
|
|
22
|
+
v = float(x)
|
|
23
|
+
if np.isfinite(v):
|
|
24
|
+
return float(v)
|
|
25
|
+
except Exception:
|
|
26
|
+
pass
|
|
27
|
+
return float(default)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _extract_pred_map(pd_: Any) -> Dict[str, Any]:
|
|
31
|
+
if not isinstance(pd_, dict):
|
|
32
|
+
return {}
|
|
33
|
+
for k in ("prediction_drift", "prediction_drift_results", "results"):
|
|
34
|
+
v = pd_.get(k)
|
|
35
|
+
if isinstance(v, dict):
|
|
36
|
+
return v
|
|
37
|
+
return pd_
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _extract_fd_map(fd: Any) -> Dict[str, Any]:
|
|
41
|
+
if not isinstance(fd, dict):
|
|
42
|
+
return {}
|
|
43
|
+
for k in ("feature_drift_results", "feature_drift", "results"):
|
|
44
|
+
v = fd.get(k)
|
|
45
|
+
if isinstance(v, dict):
|
|
46
|
+
return v
|
|
47
|
+
return fd
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _adapt_pred(pd_: Any) -> Dict[str, Any]:
|
|
51
|
+
m = _extract_pred_map(pd_)
|
|
52
|
+
if not isinstance(m, dict):
|
|
53
|
+
return {}
|
|
54
|
+
ks = m.get("ks_statistic", m.get("ks", m.get("ks_stat", m.get("statistic", 0.0))))
|
|
55
|
+
pv = m.get("p_value", m.get("p", m.get("pvalue", 1.0)))
|
|
56
|
+
out = dict(m)
|
|
57
|
+
out["ks_statistic"] = _to_float(ks, 0.0)
|
|
58
|
+
out["p_value"] = _to_float(pv, 1.0)
|
|
59
|
+
return out
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _adapt_fd(fd: Any) -> Dict[str, Dict[str, float]]:
|
|
63
|
+
m = _extract_fd_map(fd)
|
|
64
|
+
out: Dict[str, Dict[str, float]] = {}
|
|
65
|
+
if not isinstance(m, dict):
|
|
66
|
+
return out
|
|
67
|
+
for feat, v in m.items():
|
|
68
|
+
if not isinstance(v, dict):
|
|
69
|
+
continue
|
|
70
|
+
ks = v.get("ks_statistic", v.get("ks", v.get("ks_stat", v.get("statistic", v.get("D", 0.0)))))
|
|
71
|
+
pv = v.get("p_value", v.get("p", v.get("pvalue", v.get("p_val", 1.0))))
|
|
72
|
+
out[str(feat)] = {
|
|
73
|
+
"ks_statistic": _to_float(ks, 0.0),
|
|
74
|
+
"p_value": _to_float(pv, 1.0),
|
|
75
|
+
}
|
|
76
|
+
return out
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _call_health(fd: Any, pd_: Any) -> Tuple[float, str]:
|
|
80
|
+
fd_fixed = _adapt_fd(fd)
|
|
81
|
+
pd_fixed = _adapt_pred(pd_)
|
|
82
|
+
|
|
83
|
+
# Try dict form (newer)
|
|
84
|
+
try:
|
|
85
|
+
out = compute_health_score({"feature_drift": fd_fixed, "prediction_drift": pd_fixed})
|
|
86
|
+
if isinstance(out, dict):
|
|
87
|
+
sc = out.get("health_score", out.get("score", out.get("health", None)))
|
|
88
|
+
md = out.get("mode", out.get("health_compute_mode", "severity"))
|
|
89
|
+
if sc is not None:
|
|
90
|
+
return float(sc), str(md)
|
|
91
|
+
if isinstance(out, (int, float)):
|
|
92
|
+
return float(out), "severity"
|
|
93
|
+
except Exception:
|
|
94
|
+
pass
|
|
95
|
+
|
|
96
|
+
# Try 2-arg form (older)
|
|
97
|
+
try:
|
|
98
|
+
out = compute_health_score(fd_fixed, pd_fixed)
|
|
99
|
+
if isinstance(out, dict):
|
|
100
|
+
sc = out.get("health_score", out.get("score", out.get("health", None)))
|
|
101
|
+
md = out.get("mode", out.get("health_compute_mode", "severity"))
|
|
102
|
+
if sc is not None:
|
|
103
|
+
return float(sc), str(md)
|
|
104
|
+
if isinstance(out, (int, float)):
|
|
105
|
+
return float(out), "severity"
|
|
106
|
+
except Exception:
|
|
107
|
+
pass
|
|
108
|
+
|
|
109
|
+
# Fallback (simple)
|
|
110
|
+
ks_vals = [v["ks_statistic"] for v in fd_fixed.values()] if fd_fixed else []
|
|
111
|
+
avg_ks = float(np.mean(ks_vals)) if ks_vals else 0.0
|
|
112
|
+
pred_ks = _to_float(pd_fixed.get("ks_statistic"), 0.0)
|
|
113
|
+
score = 100.0 * (1.0 - min(max(0.70 * pred_ks + 0.30 * avg_ks, 0.0), 1.0))
|
|
114
|
+
return float(np.clip(score, 0.0, 100.0)), "fallback"
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _entropy(probs: np.ndarray, bins: int = 24) -> float:
|
|
118
|
+
p = np.clip(np.asarray(probs, dtype=float), 0.0, 1.0)
|
|
119
|
+
h, _ = np.histogram(p, bins=bins, range=(0.0, 1.0), density=False)
|
|
120
|
+
h = h.astype(float)
|
|
121
|
+
if h.sum() <= 0:
|
|
122
|
+
return 0.0
|
|
123
|
+
q = h / h.sum()
|
|
124
|
+
q = q[q > 0]
|
|
125
|
+
return float(-np.sum(q * np.log2(q)))
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _hist(probs: np.ndarray, bins: int = 32) -> Dict[str, Any]:
|
|
129
|
+
p = np.clip(np.asarray(probs, dtype=float), 0.0, 1.0)
|
|
130
|
+
h, edges = np.histogram(p, bins=bins, range=(0.0, 1.0), density=False)
|
|
131
|
+
return {
|
|
132
|
+
"bins": [float(x) for x in edges.tolist()],
|
|
133
|
+
"counts": [int(x) for x in h.tolist()],
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _top_features(fd: Any, k: int = 8) -> List[Dict[str, Any]]:
|
|
138
|
+
m = _adapt_fd(fd)
|
|
139
|
+
rows = []
|
|
140
|
+
for feat, v in m.items():
|
|
141
|
+
ks = _to_float(v.get("ks_statistic"), 0.0)
|
|
142
|
+
pv = _to_float(v.get("p_value"), 1.0)
|
|
143
|
+
if ks >= 0.35:
|
|
144
|
+
sev = "CRITICAL"
|
|
145
|
+
elif ks >= 0.20:
|
|
146
|
+
sev = "HIGH"
|
|
147
|
+
elif ks >= 0.10:
|
|
148
|
+
sev = "MEDIUM"
|
|
149
|
+
else:
|
|
150
|
+
sev = "LOW"
|
|
151
|
+
rows.append({"feature": feat, "ks_statistic": ks, "p_value": pv, "severity": sev})
|
|
152
|
+
rows.sort(key=lambda r: r["ks_statistic"], reverse=True)
|
|
153
|
+
return rows[:k]
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
# -----------------------------
|
|
157
|
+
# Synthetic scenario generator
|
|
158
|
+
# -----------------------------
|
|
159
|
+
def _make_synthetic(seed: int, n: int = 2400, d: int = 14) -> Dict[str, Any]:
|
|
160
|
+
rng = np.random.default_rng(int(seed))
|
|
161
|
+
|
|
162
|
+
# baseline features
|
|
163
|
+
base = rng.normal(0, 1.0, size=(n, d))
|
|
164
|
+
base_df = pd.DataFrame(base, columns=[f"f{i}" for i in range(d)])
|
|
165
|
+
|
|
166
|
+
# clean ~ baseline (small noise)
|
|
167
|
+
clean = base + rng.normal(0, 0.08, size=(n, d))
|
|
168
|
+
clean_df = pd.DataFrame(clean, columns=base_df.columns)
|
|
169
|
+
|
|
170
|
+
# drifted (shift subset of features)
|
|
171
|
+
drift = base.copy()
|
|
172
|
+
drift[:, 0] += 2.0
|
|
173
|
+
drift[:, 1] += 1.3
|
|
174
|
+
drift[:, 2] *= 1.8
|
|
175
|
+
drift[:, 3] += rng.normal(0, 2.2, size=n)
|
|
176
|
+
drift_df = pd.DataFrame(drift, columns=base_df.columns)
|
|
177
|
+
|
|
178
|
+
# synthetic "prediction probs"
|
|
179
|
+
base_p = rng.beta(2.2, 2.6, size=n) # mild center
|
|
180
|
+
clean_p = np.clip(base_p + rng.normal(0, 0.02, size=n), 0, 1)
|
|
181
|
+
drift_p = np.clip(1.0 - base_p + rng.normal(0, 0.03, size=n), 0, 1) # strong invert shift
|
|
182
|
+
|
|
183
|
+
# FIX: Cleaned up the dictionary return to prevent syntax errors
|
|
184
|
+
return {
|
|
185
|
+
"base_X": base_df,
|
|
186
|
+
"clean_X": clean_df,
|
|
187
|
+
"drift_X": drift_df,
|
|
188
|
+
"base_p": base_p,
|
|
189
|
+
"clean_p": clean_p,
|
|
190
|
+
"drift_p": drift_p,
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
# -----------------------------
|
|
195
|
+
# Public API
|
|
196
|
+
# -----------------------------
|
|
197
|
+
def run_selftest(seed: int = 7, test: str = "suite") -> Dict[str, Any]:
|
|
198
|
+
"""
|
|
199
|
+
Returns a payload designed for BOTH:
|
|
200
|
+
- readable JSON
|
|
201
|
+
- rich UI animations (histograms, gauges, feature bars)
|
|
202
|
+
|
|
203
|
+
test options:
|
|
204
|
+
- "prediction"
|
|
205
|
+
- "feature"
|
|
206
|
+
- "pipeline"
|
|
207
|
+
- "suite" (default, runs 3 scenarios)
|
|
208
|
+
- "concept"
|
|
209
|
+
"""
|
|
210
|
+
t0 = time.time()
|
|
211
|
+
test = (test or "suite").strip().lower()
|
|
212
|
+
|
|
213
|
+
try:
|
|
214
|
+
if test not in {"prediction", "feature", "pipeline", "suite", "concept"}:
|
|
215
|
+
test = "suite"
|
|
216
|
+
|
|
217
|
+
cases: List[Dict[str, Any]] = []
|
|
218
|
+
checks: List[Dict[str, Any]] = []
|
|
219
|
+
|
|
220
|
+
# We run up to 3 scenarios in suite so it looks βrealβ
|
|
221
|
+
seeds = [seed] if test != "suite" else [seed, seed + 11, seed + 23]
|
|
222
|
+
|
|
223
|
+
for idx, s in enumerate(seeds, start=1):
|
|
224
|
+
sim = _make_synthetic(seed=s)
|
|
225
|
+
base_X = sim["base_X"]
|
|
226
|
+
clean_X = sim["clean_X"]
|
|
227
|
+
drift_X = sim["drift_X"]
|
|
228
|
+
base_p = sim["base_p"]
|
|
229
|
+
clean_p = sim["clean_p"]
|
|
230
|
+
drift_p = sim["drift_p"]
|
|
231
|
+
|
|
232
|
+
# --- CONCEPT DRIFT LOGIC ADDED HERE ---
|
|
233
|
+
if test == "concept":
|
|
234
|
+
# In Concept Drift, the relationship flips but features stay the same!
|
|
235
|
+
drift_X = base_X.copy()
|
|
236
|
+
drift_p = np.clip(base_p + 0.35, 0, 1)
|
|
237
|
+
# --------------------------------------
|
|
238
|
+
|
|
239
|
+
# compute drifts
|
|
240
|
+
fd_clean = compute_feature_drift(base_X, clean_X)
|
|
241
|
+
fd_drift = compute_feature_drift(base_X, drift_X)
|
|
242
|
+
|
|
243
|
+
pd_clean = compute_prediction_drift(base_p, clean_p)
|
|
244
|
+
pd_drift = compute_prediction_drift(base_p, drift_p)
|
|
245
|
+
|
|
246
|
+
pd_clean_m = _adapt_pred(pd_clean)
|
|
247
|
+
pd_drift_m = _adapt_pred(pd_drift)
|
|
248
|
+
|
|
249
|
+
pred_ks_clean = _to_float(pd_clean_m.get("ks_statistic"), 0.0)
|
|
250
|
+
pred_ks_drift = _to_float(pd_drift_m.get("ks_statistic"), 0.0)
|
|
251
|
+
|
|
252
|
+
ent_base = _entropy(base_p)
|
|
253
|
+
ent_clean = _entropy(clean_p)
|
|
254
|
+
ent_drift = _entropy(drift_p)
|
|
255
|
+
delta_ent_clean = float(ent_clean - ent_base)
|
|
256
|
+
delta_ent_drift = float(ent_drift - ent_base)
|
|
257
|
+
|
|
258
|
+
# health score (use pipeline if available)
|
|
259
|
+
health_clean, mode_clean = _call_health(fd_clean, pd_clean)
|
|
260
|
+
health_drift, mode_drift = _call_health(fd_drift, pd_drift)
|
|
261
|
+
|
|
262
|
+
# histograms for visuals
|
|
263
|
+
h_base = _hist(base_p, bins=40)
|
|
264
|
+
h_clean = _hist(clean_p, bins=40)
|
|
265
|
+
h_drift = _hist(drift_p, bins=40)
|
|
266
|
+
|
|
267
|
+
top_feat = _top_features(fd_drift, k=8)
|
|
268
|
+
|
|
269
|
+
cases.append(
|
|
270
|
+
{
|
|
271
|
+
"case_id": f"C{idx}",
|
|
272
|
+
"seed": int(s),
|
|
273
|
+
"name": (
|
|
274
|
+
"Prediction Drift Test" if test == "prediction"
|
|
275
|
+
else "Feature Drift Test" if test == "feature"
|
|
276
|
+
else "Pipeline Health Test" if test == "pipeline"
|
|
277
|
+
else "Concept Drift Test" if test == "concept"
|
|
278
|
+
else f"Suite Scenario {idx}"
|
|
279
|
+
),
|
|
280
|
+
"metrics": {
|
|
281
|
+
"pred_ks_clean": float(pred_ks_clean),
|
|
282
|
+
"pred_ks_drifted": float(pred_ks_drift),
|
|
283
|
+
"delta_entropy_clean": float(delta_ent_clean),
|
|
284
|
+
"delta_entropy_drifted": float(delta_ent_drift),
|
|
285
|
+
"health_clean": float(health_clean),
|
|
286
|
+
"health_drifted": float(health_drift),
|
|
287
|
+
"health_mode_clean": str(mode_clean),
|
|
288
|
+
"health_mode_drifted": str(mode_drift),
|
|
289
|
+
},
|
|
290
|
+
"viz": {
|
|
291
|
+
"pred_hist": {
|
|
292
|
+
"bins": h_base["bins"],
|
|
293
|
+
"baseline": h_base["counts"],
|
|
294
|
+
"clean": h_clean["counts"],
|
|
295
|
+
"drifted": h_drift["counts"],
|
|
296
|
+
},
|
|
297
|
+
"top_drifted_features": top_feat,
|
|
298
|
+
},
|
|
299
|
+
}
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
# Decide which checks to enforce (based on selected test)
|
|
303
|
+
# (We validate the FIRST case for βpass/failβ)
|
|
304
|
+
c0 = cases[0]
|
|
305
|
+
m0 = c0["metrics"]
|
|
306
|
+
pred_clean = float(m0["pred_ks_clean"])
|
|
307
|
+
pred_drift = float(m0["pred_ks_drifted"])
|
|
308
|
+
h_clean = float(m0["health_clean"])
|
|
309
|
+
h_drift = float(m0["health_drifted"])
|
|
310
|
+
|
|
311
|
+
if test in {"prediction", "pipeline", "suite"}:
|
|
312
|
+
checks.append(
|
|
313
|
+
{
|
|
314
|
+
"name": "Prediction drift should be low for clean",
|
|
315
|
+
"pass": bool(pred_clean < 0.08),
|
|
316
|
+
"value": pred_clean,
|
|
317
|
+
"threshold": "< 0.08",
|
|
318
|
+
}
|
|
319
|
+
)
|
|
320
|
+
checks.append(
|
|
321
|
+
{
|
|
322
|
+
"name": "Prediction drift should be high for drifted",
|
|
323
|
+
"pass": bool(pred_drift > 0.10),
|
|
324
|
+
"value": pred_drift,
|
|
325
|
+
"threshold": "> 0.10",
|
|
326
|
+
}
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
if test in {"feature", "pipeline", "suite"}:
|
|
330
|
+
top = c0["viz"]["top_drifted_features"]
|
|
331
|
+
mx = float(top[0]["ks_statistic"]) if top else 0.0
|
|
332
|
+
checks.append(
|
|
333
|
+
{
|
|
334
|
+
"name": "At least one feature should show strong shift",
|
|
335
|
+
"pass": bool(mx > 0.20),
|
|
336
|
+
"value": mx,
|
|
337
|
+
"threshold": "> 0.20",
|
|
338
|
+
}
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
if test in {"pipeline", "suite"}:
|
|
342
|
+
checks.append(
|
|
343
|
+
{
|
|
344
|
+
"name": "Health should degrade under drift",
|
|
345
|
+
"pass": bool(h_drift < h_clean),
|
|
346
|
+
"value": {"clean": h_clean, "drifted": h_drift},
|
|
347
|
+
"threshold": "drifted < clean",
|
|
348
|
+
}
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
# --- CONCEPT DRIFT CHECKS ADDED HERE ---
|
|
352
|
+
if test == "concept":
|
|
353
|
+
top = c0["viz"]["top_drifted_features"]
|
|
354
|
+
mx = float(top[0]["ks_statistic"]) if top else 0.0
|
|
355
|
+
checks.append(
|
|
356
|
+
{
|
|
357
|
+
"name": "Feature drift should be ZERO (Inputs didn't change)",
|
|
358
|
+
"pass": bool(mx < 0.05),
|
|
359
|
+
"value": mx,
|
|
360
|
+
"threshold": "< 0.05",
|
|
361
|
+
}
|
|
362
|
+
)
|
|
363
|
+
checks.append(
|
|
364
|
+
{
|
|
365
|
+
"name": "Prediction drift should be MASSIVE (Concept flipped)",
|
|
366
|
+
"pass": bool(pred_drift > 0.30),
|
|
367
|
+
"value": pred_drift,
|
|
368
|
+
"threshold": "> 0.30",
|
|
369
|
+
}
|
|
370
|
+
)
|
|
371
|
+
# ---------------------------------------
|
|
372
|
+
|
|
373
|
+
ok = all(bool(x.get("pass")) for x in checks) if checks else True
|
|
374
|
+
|
|
375
|
+
payload = {
|
|
376
|
+
"ok": ok,
|
|
377
|
+
"test": test,
|
|
378
|
+
"seed": int(seed),
|
|
379
|
+
"started_at": time.strftime("%Y-%m-%d %H:%M:%S"),
|
|
380
|
+
"elapsed_ms": int((time.time() - t0) * 1000),
|
|
381
|
+
"cases": len(cases),
|
|
382
|
+
"passed": int(sum(1 for x in checks if x.get("pass"))),
|
|
383
|
+
"failed": int(sum(1 for x in checks if not x.get("pass"))),
|
|
384
|
+
"summary": cases[0]["metrics"] if cases else {},
|
|
385
|
+
"checks": checks,
|
|
386
|
+
"case_results": cases,
|
|
387
|
+
}
|
|
388
|
+
return payload
|
|
389
|
+
|
|
390
|
+
except Exception as e:
|
|
391
|
+
import traceback
|
|
392
|
+
return {
|
|
393
|
+
"ok": False,
|
|
394
|
+
"test": test,
|
|
395
|
+
"seed": int(seed),
|
|
396
|
+
"error": str(e),
|
|
397
|
+
"trace": traceback.format_exc(),
|
|
398
|
+
}
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: modelshift
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A lightweight machine learning drift monitoring and alerting engine.
|
|
5
|
+
Author: Krishna
|
|
6
|
+
Author-email: ryomensukuna2530@gmail.com
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: Operating System :: OS Independent
|
|
9
|
+
Requires-Python: >=3.8
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
Requires-Dist: pandas
|
|
12
|
+
Requires-Dist: numpy
|
|
13
|
+
Requires-Dist: scipy
|
|
14
|
+
Requires-Dist: requests
|
|
15
|
+
Dynamic: author
|
|
16
|
+
Dynamic: author-email
|
|
17
|
+
Dynamic: classifier
|
|
18
|
+
Dynamic: description
|
|
19
|
+
Dynamic: description-content-type
|
|
20
|
+
Dynamic: requires-dist
|
|
21
|
+
Dynamic: requires-python
|
|
22
|
+
Dynamic: summary
|
|
23
|
+
|
|
24
|
+
# π¦ ModelShift-Lite
|
|
25
|
+
### Label-Free Monitoring for Deployed Machine Learning Models
|
|
26
|
+
|
|
27
|
+
> A lightweight, behavior-centric system to detect **silent reliability degradation** in deployed machine learning models β without requiring ground-truth labels.
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## π Why ModelShift-Lite?
|
|
32
|
+
|
|
33
|
+
Machine learning models rarely fail loudly after deployment.
|
|
34
|
+
Instead, they **silently degrade** as real-world data changes β while true labels are unavailable for continuous evaluation.
|
|
35
|
+
|
|
36
|
+
**ModelShift-Lite addresses this blind spot.**
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
## π§© Problem Statement
|
|
41
|
+
|
|
42
|
+
Deployed machine learning models often degrade silently over time due to changing data distributions, while ground-truth labels are unavailable for continuous performance evaluation.
|
|
43
|
+
|
|
44
|
+
---
|
|
45
|
+
|
|
46
|
+
## π― Project Objective
|
|
47
|
+
|
|
48
|
+
Design a **label-free, post-deployment monitoring system** that tracks:
|
|
49
|
+
|
|
50
|
+
- Data distribution shifts
|
|
51
|
+
- Prediction behavior instability
|
|
52
|
+
- Model reliability trends
|
|
53
|
+
|
|
54
|
+
to provide **early warning signals** of degradation **without modifying the deployed model**.
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
## π« What This Project Does *Not* Do
|
|
59
|
+
|
|
60
|
+
To maintain clarity of scope, ModelShift-Lite explicitly does **not**:
|
|
61
|
+
|
|
62
|
+
- β Retrain models
|
|
63
|
+
- β Correct predictions
|
|
64
|
+
- β Compute accuracy on production data
|
|
65
|
+
|
|
66
|
+
It focuses solely on **monitoring and interpretability**.
|
|
67
|
+
|
|
68
|
+
---
|
|
69
|
+
|
|
70
|
+
## π§ Core Idea (In Simple Terms)
|
|
71
|
+
|
|
72
|
+
> *If we cannot measure correctness, we can still monitor behavior.*
|
|
73
|
+
|
|
74
|
+
ModelShift-Lite observes how a model **reacts** to changing data and identifies signs of instability before failures become obvious.
|
|
75
|
+
|
|
76
|
+
---
|
|
77
|
+
|
|
78
|
+
## π οΈ Key Components
|
|
79
|
+
|
|
80
|
+
- **Reference Baseline Handling**
|
|
81
|
+
Captures normal model behavior from historical or validation data
|
|
82
|
+
|
|
83
|
+
- **Live Inference Monitoring**
|
|
84
|
+
Tracks incoming production data and predictions
|
|
85
|
+
|
|
86
|
+
- **Feature Drift Detection**
|
|
87
|
+
Identifies changes in input distributions
|
|
88
|
+
|
|
89
|
+
- **Prediction Behavior Analysis**
|
|
90
|
+
Monitors confidence, stability, and output distribution shifts
|
|
91
|
+
|
|
92
|
+
- **Model Health Scoring**
|
|
93
|
+
Aggregates drift signals into an interpretable reliability indicator
|
|
94
|
+
|
|
95
|
+
- **Visualization Dashboard**
|
|
96
|
+
Displays trends, drift severity, and degradation warnings
|
|
97
|
+
|
|
98
|
+
---
|
|
99
|
+
Reference Data β
|
|
100
|
+
β Drift Detection β Health Scoring β Monitoring Dashboard
|
|
101
|
+
Live Inference β
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
*(Detailed architecture diagrams are provided in `/docs`)*
|
|
105
|
+
|
|
106
|
+
---
|
|
107
|
+
|
|
108
|
+
## π» Technology Stack
|
|
109
|
+
|
|
110
|
+
- **Language:** Python
|
|
111
|
+
- **Data Processing:** NumPy, Pandas
|
|
112
|
+
- **Statistical Analysis:** SciPy
|
|
113
|
+
- **Visualization:** Streamlit, Matplotlib
|
|
114
|
+
- **Storage:** SQLite (local, replaceable)
|
|
115
|
+
|
|
116
|
+
---
|
|
117
|
+
|
|
118
|
+
## π Repository Structure
|
|
119
|
+
|
|
120
|
+
```text
|
|
121
|
+
modelshift-lite/
|
|
122
|
+
βββ modelshift/ # Core monitoring logic
|
|
123
|
+
βββ dashboard/ # Streamlit visualization app
|
|
124
|
+
βββ experiments/ # Drift simulation & analysis
|
|
125
|
+
βββ data/ # Reference & live data
|
|
126
|
+
βββ docs/ # Architecture and design docs
|
|
127
|
+
βββ README.md
|
|
128
|
+
## ποΈ High-Level Architecture
|
|
129
|
+
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
modelshift/__init__.py,sha256=_I2AIeZgUPSI1iAoeqE0BW3Lvdx1HNp4kFUt59OnWTs,51
|
|
2
|
+
modelshift/baseline.py,sha256=gdk_dMUc3bq7AigxBOIEW2PGSLpZw8twzMdeQcv65qg,1038
|
|
3
|
+
modelshift/monitor.py,sha256=Kt7c7jv48auJ2cGnGdSo01G5y8iI-ve0bgaSKXdAO1w,12421
|
|
4
|
+
modelshift/selftest.py,sha256=EdgQd-IROhYagll4QTJ83uRBTvBhtrahrxLa_nuJl9M,14274
|
|
5
|
+
modelshift/drift/__init__.py,sha256=TEkGw_vO--HzOCqu4i014te5Vru7yghgeJh2cJjle_Y,36
|
|
6
|
+
modelshift/drift/feature_drift.py,sha256=nU6auZWatpdjgdleU1ZIms2tGE-4iUfs4skv1VdOnME,1413
|
|
7
|
+
modelshift/drift/prediction_drift.py,sha256=v_vNA3JLPmUU4_f2vi05FVzpNCEWu1we8pXG5BGL1qA,3842
|
|
8
|
+
modelshift/drift/severity.py,sha256=OOEzoisoPLjvFTvU0O6wwK1-GTA-KuUyAWxmP4ihTD0,7878
|
|
9
|
+
modelshift/storage/__init__.py,sha256=N-Ydih1vXsRhj6pkXgkaFesGM_4CrnPlEDWyteRAxoA,48
|
|
10
|
+
modelshift/storage/sqlite_store.py,sha256=TD0aMI5IEBI9zQr8OpJOY-H9i_urURTR5UkCzSd-dJE,272
|
|
11
|
+
modelshift/utils/__init__.py,sha256=NBECA9wFkkwn4-O2NJsDwuZB_SciP_pkl5AhfWGe6tE,30
|
|
12
|
+
modelshift/utils/helpers.py,sha256=NGLv7R82B7xnquKLZw-RhChQVQnfrVCuNund4f5tle8,89
|
|
13
|
+
modelshift-0.1.0.dist-info/METADATA,sha256=obDrdvG6gYhRI0Vdx45WKNSIZXFMpLbhXFyUcZi4XoQ,3688
|
|
14
|
+
modelshift-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
15
|
+
modelshift-0.1.0.dist-info/top_level.txt,sha256=3d2NcfPXrOeovneJake07097D7ZHHjKHkykxCDbFoe4,11
|
|
16
|
+
modelshift-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
modelshift
|