ai-critic 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_critic/evaluators/explainability.py +64 -0
- ai_critic/evaluators/scoring.py +14 -0
- ai_critic/evaluators/summary.py +30 -2
- {ai_critic-1.1.0.dist-info → ai_critic-1.2.0.dist-info}/METADATA +2 -1
- {ai_critic-1.1.0.dist-info → ai_critic-1.2.0.dist-info}/RECORD +7 -6
- {ai_critic-1.1.0.dist-info → ai_critic-1.2.0.dist-info}/WHEEL +0 -0
- {ai_critic-1.1.0.dist-info → ai_critic-1.2.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# explainability.py
|
|
2
|
+
import numpy as np
|
|
3
|
+
from sklearn.model_selection import cross_val_score
|
|
4
|
+
from sklearn.base import clone
|
|
5
|
+
|
|
6
|
+
from .validation import make_cv
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def evaluate(model, X, y, max_features=10):
|
|
10
|
+
"""
|
|
11
|
+
Model-agnostic feature sensitivity analysis.
|
|
12
|
+
Measures how much performance drops when each feature is permuted.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
cv = make_cv(y)
|
|
16
|
+
|
|
17
|
+
base_model = clone(model)
|
|
18
|
+
base_score = cross_val_score(base_model, X, y, cv=cv).mean()
|
|
19
|
+
|
|
20
|
+
sensitivities = []
|
|
21
|
+
|
|
22
|
+
for i in range(X.shape[1]):
|
|
23
|
+
X_permuted = X.copy()
|
|
24
|
+
np.random.shuffle(X_permuted[:, i])
|
|
25
|
+
|
|
26
|
+
permuted_model = clone(model)
|
|
27
|
+
score = cross_val_score(permuted_model, X_permuted, y, cv=cv).mean()
|
|
28
|
+
|
|
29
|
+
drop = base_score - score
|
|
30
|
+
|
|
31
|
+
sensitivities.append({
|
|
32
|
+
"feature_index": int(i),
|
|
33
|
+
"performance_drop": float(drop)
|
|
34
|
+
})
|
|
35
|
+
|
|
36
|
+
sensitivities.sort(
|
|
37
|
+
key=lambda x: x["performance_drop"],
|
|
38
|
+
reverse=True
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
top = sensitivities[:max_features]
|
|
42
|
+
|
|
43
|
+
verdict = "stable"
|
|
44
|
+
message = "No single feature dominates model behavior."
|
|
45
|
+
|
|
46
|
+
if top and top[0]["performance_drop"] > 0.30:
|
|
47
|
+
verdict = "feature_leakage_risk"
|
|
48
|
+
message = (
|
|
49
|
+
"Model is highly sensitive to a single feature, "
|
|
50
|
+
"which may indicate leakage or shortcut learning."
|
|
51
|
+
)
|
|
52
|
+
elif top and top[0]["performance_drop"] > 0.15:
|
|
53
|
+
verdict = "feature_dependency"
|
|
54
|
+
message = (
|
|
55
|
+
"Model depends strongly on a small subset of features."
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
return {
|
|
59
|
+
"baseline_score": float(base_score),
|
|
60
|
+
"top_sensitive_features": top,
|
|
61
|
+
"max_performance_drop": float(top[0]["performance_drop"]) if top else 0.0,
|
|
62
|
+
"verdict": verdict,
|
|
63
|
+
"message": message
|
|
64
|
+
}
|
ai_critic/evaluators/scoring.py
CHANGED
|
@@ -11,6 +11,10 @@ def compute_scores(report: dict) -> dict:
|
|
|
11
11
|
robustness = report["details"]["robustness"]["verdict"]
|
|
12
12
|
structural = report["details"]["config"]["structural_warnings"]
|
|
13
13
|
|
|
14
|
+
explainability = report["details"].get("explainability", {})
|
|
15
|
+
explain_verdict = explainability.get("verdict")
|
|
16
|
+
max_feature_drop = explainability.get("max_performance_drop", 0)
|
|
17
|
+
|
|
14
18
|
if data_leakage:
|
|
15
19
|
score -= 30
|
|
16
20
|
|
|
@@ -25,6 +29,11 @@ def compute_scores(report: dict) -> dict:
|
|
|
25
29
|
if structural:
|
|
26
30
|
score -= 10
|
|
27
31
|
|
|
32
|
+
if explain_verdict == "feature_leakage_risk":
|
|
33
|
+
score -= 20
|
|
34
|
+
elif explain_verdict == "feature_dependency":
|
|
35
|
+
score -= 10
|
|
36
|
+
|
|
28
37
|
return {
|
|
29
38
|
"global": max(0, min(100, score)),
|
|
30
39
|
"components": {
|
|
@@ -35,5 +44,10 @@ def compute_scores(report: dict) -> dict:
|
|
|
35
44
|
"fragile": 65,
|
|
36
45
|
"misleading": 40
|
|
37
46
|
}.get(robustness, 100),
|
|
47
|
+
"explainability": (
|
|
48
|
+
40 if explain_verdict == "feature_leakage_risk"
|
|
49
|
+
else 70 if explain_verdict == "feature_dependency"
|
|
50
|
+
else 100
|
|
51
|
+
)
|
|
38
52
|
}
|
|
39
53
|
}
|
ai_critic/evaluators/summary.py
CHANGED
|
@@ -10,6 +10,10 @@ class HumanSummary:
|
|
|
10
10
|
robustness_verdict = report["robustness"].get("verdict")
|
|
11
11
|
structural_warnings = report["config"]["structural_warnings"]
|
|
12
12
|
|
|
13
|
+
explainability = report.get("explainability", {})
|
|
14
|
+
explain_verdict = explainability.get("verdict")
|
|
15
|
+
max_feature_drop = explainability.get("max_performance_drop", 0)
|
|
16
|
+
|
|
13
17
|
# =========================
|
|
14
18
|
# Executive summary
|
|
15
19
|
# =========================
|
|
@@ -18,11 +22,19 @@ class HumanSummary:
|
|
|
18
22
|
risk_level = "high"
|
|
19
23
|
deploy = False
|
|
20
24
|
main_reason = "Strong evidence of data leakage inflating model performance."
|
|
25
|
+
elif explain_verdict == "feature_leakage_risk":
|
|
26
|
+
verdict = "❌ Unreliable"
|
|
27
|
+
risk_level = "high"
|
|
28
|
+
deploy = False
|
|
29
|
+
main_reason = (
|
|
30
|
+
"Model behavior is dominated by a single feature, "
|
|
31
|
+
"suggesting shortcut learning or leakage."
|
|
32
|
+
)
|
|
21
33
|
elif robustness_verdict in ("fragile", "misleading") or structural_warnings:
|
|
22
34
|
verdict = "⚠️ Risky"
|
|
23
35
|
risk_level = "medium"
|
|
24
36
|
deploy = False
|
|
25
|
-
main_reason = "Structural or
|
|
37
|
+
main_reason = "Structural, robustness, or dependency-related risks detected."
|
|
26
38
|
else:
|
|
27
39
|
verdict = "✅ Acceptable"
|
|
28
40
|
risk_level = "low"
|
|
@@ -71,6 +83,21 @@ class HumanSummary:
|
|
|
71
83
|
"Reduce model complexity or adjust hyperparameters."
|
|
72
84
|
)
|
|
73
85
|
|
|
86
|
+
if explain_verdict == "feature_leakage_risk":
|
|
87
|
+
key_risks.append(
|
|
88
|
+
f"Single feature causes a {max_feature_drop:.2f} performance drop when permuted."
|
|
89
|
+
)
|
|
90
|
+
recommendations.append(
|
|
91
|
+
"Remove or heavily regularize the dominant feature and retrain."
|
|
92
|
+
)
|
|
93
|
+
elif explain_verdict == "feature_dependency":
|
|
94
|
+
key_risks.append(
|
|
95
|
+
"Model relies disproportionately on a small subset of features."
|
|
96
|
+
)
|
|
97
|
+
recommendations.append(
|
|
98
|
+
"Increase regularization or collect more diverse data."
|
|
99
|
+
)
|
|
100
|
+
|
|
74
101
|
if robustness_verdict == "misleading":
|
|
75
102
|
key_risks.append(
|
|
76
103
|
"Robustness metrics are misleading due to inflated baseline performance."
|
|
@@ -92,7 +119,8 @@ class HumanSummary:
|
|
|
92
119
|
"data_leakage": leakage,
|
|
93
120
|
"suspicious_cv": perfect_cv,
|
|
94
121
|
"structural_risk": bool(structural_warnings),
|
|
95
|
-
"robustness_verdict": robustness_verdict
|
|
122
|
+
"robustness_verdict": robustness_verdict,
|
|
123
|
+
"explainability_verdict": explain_verdict
|
|
96
124
|
},
|
|
97
125
|
"recommendations": recommendations
|
|
98
126
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ai-critic
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.0
|
|
4
4
|
Summary: Fast AI evaluator for scikit-learn models
|
|
5
5
|
Author-email: Luiz Seabra <filipedemarco@yahoo.com>
|
|
6
6
|
Requires-Python: >=3.9
|
|
@@ -91,6 +91,7 @@ Each pillar contributes signals used later in the **deployment gate**.
|
|
|
91
91
|
|
|
92
92
|
---
|
|
93
93
|
|
|
94
|
+
|
|
94
95
|
### Full Technical & Visual Analysis
|
|
95
96
|
|
|
96
97
|
To access **all internal diagnostics**, including plots and recommendations, use `view="all"`.
|
|
@@ -4,14 +4,15 @@ ai_critic/evaluators/__init__.py,sha256=ri6InmL8_LIcO-JZpU_gEFKLO4URdqo3z6rh7fV6
|
|
|
4
4
|
ai_critic/evaluators/adapters.py,sha256=8Xw9Ccg1iGVNwVQDGVIqhWj5-Sg6evqCZhg21u8EP20,3068
|
|
5
5
|
ai_critic/evaluators/config.py,sha256=gBXaS8Qxl14f40JnvMWgA0Z0SGEtbCuCHpTOPem0H90,1163
|
|
6
6
|
ai_critic/evaluators/data.py,sha256=YAK5NkwCeJOny_UueZ5ALwvEcRDIbEck404eV2oqWnc,1871
|
|
7
|
+
ai_critic/evaluators/explainability.py,sha256=UWbcb5uVI78d1ljfdrWd2DrjlwEz1y9CeVtkukefEfA,1759
|
|
7
8
|
ai_critic/evaluators/performance.py,sha256=1CQx5DueK0XkelYyJnAGRJ3AjQtjsKeW8_1JQZqKVOI,1973
|
|
8
9
|
ai_critic/evaluators/robustness.py,sha256=mfVQ67Z6t6aRvtIq-XQEQYbwvyf8UefM1myeOGVrnAE,1869
|
|
9
|
-
ai_critic/evaluators/scoring.py,sha256=
|
|
10
|
-
ai_critic/evaluators/summary.py,sha256=
|
|
10
|
+
ai_critic/evaluators/scoring.py,sha256=9rgkCXKKm9G1Lfwn5i9HcsJTN5OUjxMycOUzhWkp_2g,1576
|
|
11
|
+
ai_critic/evaluators/summary.py,sha256=H9rU9tXAXqyQ34L6bOOOHrdIapSq71gcjjc8jfyJMq4,5003
|
|
11
12
|
ai_critic/evaluators/validation.py,sha256=rnzRwD78Cugey33gl9geE8JoBURsKEEnqrIOhBZv0LY,904
|
|
12
13
|
ai_critic/sessions/__init__.py,sha256=Yp7mphSPJwt8a4cJgcQNErqwqHVuP_xAJODrs0y0Abw,72
|
|
13
14
|
ai_critic/sessions/store.py,sha256=65m9WXFVFWv4pPzvXV4l8zLHoHWMfCGe6eHh4X-8agY,947
|
|
14
|
-
ai_critic-1.
|
|
15
|
-
ai_critic-1.
|
|
16
|
-
ai_critic-1.
|
|
17
|
-
ai_critic-1.
|
|
15
|
+
ai_critic-1.2.0.dist-info/METADATA,sha256=s0XYw_E7ZoVBhF74lyhQsFk_bcyJWY3eo8Yk5E97tZ4,8115
|
|
16
|
+
ai_critic-1.2.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
17
|
+
ai_critic-1.2.0.dist-info/top_level.txt,sha256=TRyZkm1vyLLcFDg_80yeg5cHvPis_oW1Ti170417jkw,10
|
|
18
|
+
ai_critic-1.2.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|