ai-critic 1.1.0__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_critic/ai_suggestions/predictor.py +5 -0
- ai_critic/ai_suggestions/rules.py +3 -0
- ai_critic/cli.py +141 -0
- ai_critic/critic.py +81 -201
- ai_critic/evaluators/config.py +29 -5
- ai_critic/evaluators/explainability.py +64 -0
- ai_critic/evaluators/scoring.py +14 -0
- ai_critic/evaluators/summary.py +30 -2
- ai_critic/feedback/__init__.py +3 -0
- ai_critic/feedback/store.py +23 -0
- ai_critic/learning/__init__.py +13 -0
- ai_critic/learning/critic_model.py +25 -0
- ai_critic/learning/features.py +15 -0
- ai_critic/learning/policy.py +20 -0
- ai_critic/learning/recommender.py +26 -0
- ai_critic/learning/trainer.py +16 -0
- ai_critic/ml/suggester.py +63 -0
- ai_critic/telemetry/__init__.py +0 -0
- ai_critic/telemetry/anonymizer.py +9 -0
- ai_critic/telemetry/client.py +6 -0
- ai_critic/telemetry/event.py +15 -0
- ai_critic/telemetry/local_store.py +9 -0
- ai_critic/telemetry/schema.py +11 -0
- ai_critic/telemetry/sender.py +9 -0
- ai_critic-2.0.0.dist-info/METADATA +390 -0
- ai_critic-2.0.0.dist-info/RECORD +37 -0
- ai_critic-1.1.0.dist-info/METADATA +0 -289
- ai_critic-1.1.0.dist-info/RECORD +0 -17
- {ai_critic-1.1.0.dist-info → ai_critic-2.0.0.dist-info}/WHEEL +0 -0
- {ai_critic-1.1.0.dist-info → ai_critic-2.0.0.dist-info}/top_level.txt +0 -0
ai_critic/evaluators/summary.py
CHANGED
|
@@ -10,6 +10,10 @@ class HumanSummary:
|
|
|
10
10
|
robustness_verdict = report["robustness"].get("verdict")
|
|
11
11
|
structural_warnings = report["config"]["structural_warnings"]
|
|
12
12
|
|
|
13
|
+
explainability = report.get("explainability", {})
|
|
14
|
+
explain_verdict = explainability.get("verdict")
|
|
15
|
+
max_feature_drop = explainability.get("max_performance_drop", 0)
|
|
16
|
+
|
|
13
17
|
# =========================
|
|
14
18
|
# Executive summary
|
|
15
19
|
# =========================
|
|
@@ -18,11 +22,19 @@ class HumanSummary:
|
|
|
18
22
|
risk_level = "high"
|
|
19
23
|
deploy = False
|
|
20
24
|
main_reason = "Strong evidence of data leakage inflating model performance."
|
|
25
|
+
elif explain_verdict == "feature_leakage_risk":
|
|
26
|
+
verdict = "❌ Unreliable"
|
|
27
|
+
risk_level = "high"
|
|
28
|
+
deploy = False
|
|
29
|
+
main_reason = (
|
|
30
|
+
"Model behavior is dominated by a single feature, "
|
|
31
|
+
"suggesting shortcut learning or leakage."
|
|
32
|
+
)
|
|
21
33
|
elif robustness_verdict in ("fragile", "misleading") or structural_warnings:
|
|
22
34
|
verdict = "⚠️ Risky"
|
|
23
35
|
risk_level = "medium"
|
|
24
36
|
deploy = False
|
|
25
|
-
main_reason = "Structural or
|
|
37
|
+
main_reason = "Structural, robustness, or dependency-related risks detected."
|
|
26
38
|
else:
|
|
27
39
|
verdict = "✅ Acceptable"
|
|
28
40
|
risk_level = "low"
|
|
@@ -71,6 +83,21 @@ class HumanSummary:
|
|
|
71
83
|
"Reduce model complexity or adjust hyperparameters."
|
|
72
84
|
)
|
|
73
85
|
|
|
86
|
+
if explain_verdict == "feature_leakage_risk":
|
|
87
|
+
key_risks.append(
|
|
88
|
+
f"Single feature causes a {max_feature_drop:.2f} performance drop when permuted."
|
|
89
|
+
)
|
|
90
|
+
recommendations.append(
|
|
91
|
+
"Remove or heavily regularize the dominant feature and retrain."
|
|
92
|
+
)
|
|
93
|
+
elif explain_verdict == "feature_dependency":
|
|
94
|
+
key_risks.append(
|
|
95
|
+
"Model relies disproportionately on a small subset of features."
|
|
96
|
+
)
|
|
97
|
+
recommendations.append(
|
|
98
|
+
"Increase regularization or collect more diverse data."
|
|
99
|
+
)
|
|
100
|
+
|
|
74
101
|
if robustness_verdict == "misleading":
|
|
75
102
|
key_risks.append(
|
|
76
103
|
"Robustness metrics are misleading due to inflated baseline performance."
|
|
@@ -92,7 +119,8 @@ class HumanSummary:
|
|
|
92
119
|
"data_leakage": leakage,
|
|
93
120
|
"suspicious_cv": perfect_cv,
|
|
94
121
|
"structural_risk": bool(structural_warnings),
|
|
95
|
-
"robustness_verdict": robustness_verdict
|
|
122
|
+
"robustness_verdict": robustness_verdict,
|
|
123
|
+
"explainability_verdict": explain_verdict
|
|
96
124
|
},
|
|
97
125
|
"recommendations": recommendations
|
|
98
126
|
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
class FeedbackStore:
|
|
2
|
+
def __init__(self):
|
|
3
|
+
self.storage = []
|
|
4
|
+
|
|
5
|
+
def add(self, session_id, report, success: bool):
|
|
6
|
+
self.storage.append({
|
|
7
|
+
"session": session_id,
|
|
8
|
+
"success": success,
|
|
9
|
+
"report": report
|
|
10
|
+
})
|
|
11
|
+
|
|
12
|
+
def stats(self):
|
|
13
|
+
positives = sum(1 for x in self.storage if x["success"])
|
|
14
|
+
negatives = sum(1 for x in self.storage if not x["success"])
|
|
15
|
+
|
|
16
|
+
return {
|
|
17
|
+
"total": len(self.storage),
|
|
18
|
+
"positives": positives,
|
|
19
|
+
"negatives": negatives
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
def all(self):
|
|
23
|
+
return self.storage
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from .features import extract_features
|
|
2
|
+
from .critic_model import CriticModel
|
|
3
|
+
from .trainer import CriticTrainer
|
|
4
|
+
from .policy import policy_decision
|
|
5
|
+
from .recommender import recommend_changes
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"extract_features",
|
|
9
|
+
"CriticModel",
|
|
10
|
+
"CriticTrainer",
|
|
11
|
+
"policy_decision",
|
|
12
|
+
"recommend_changes",
|
|
13
|
+
]
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import joblib
|
|
2
|
+
from sklearn.linear_model import LogisticRegression
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
class CriticModel:
|
|
6
|
+
def __init__(self, path="critic_model.joblib"):
|
|
7
|
+
self.path = path
|
|
8
|
+
self.model = LogisticRegression()
|
|
9
|
+
self.is_trained = False
|
|
10
|
+
|
|
11
|
+
def train(self, X, y):
|
|
12
|
+
self.model.fit(X, y)
|
|
13
|
+
self.is_trained = True
|
|
14
|
+
joblib.dump(self.model, self.path)
|
|
15
|
+
|
|
16
|
+
def load(self):
|
|
17
|
+
self.model = joblib.load(self.path)
|
|
18
|
+
self.is_trained = True
|
|
19
|
+
|
|
20
|
+
def predict_proba(self, features: dict) -> float:
|
|
21
|
+
if not self.is_trained:
|
|
22
|
+
return 0.5 # neutro
|
|
23
|
+
|
|
24
|
+
X = np.array([list(features.values())])
|
|
25
|
+
return float(self.model.predict_proba(X)[0][1])
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
def extract_features(report: dict) -> dict:
|
|
2
|
+
data = report["details"]["data"]
|
|
3
|
+
perf = report["details"]["performance"]
|
|
4
|
+
robust = report["details"]["robustness"]
|
|
5
|
+
config = report["details"]["config"]
|
|
6
|
+
|
|
7
|
+
return {
|
|
8
|
+
"n_samples": report["meta"]["n_samples"],
|
|
9
|
+
"n_features": report["meta"]["n_features"],
|
|
10
|
+
"data_leakage": int(data["data_leakage"]["suspected"]),
|
|
11
|
+
"perfect_cv": int(perf["suspiciously_perfect"]),
|
|
12
|
+
"robustness_fragile": int(robust["verdict"] == "fragile"),
|
|
13
|
+
"robustness_misleading": int(robust["verdict"] == "misleading"),
|
|
14
|
+
"structural_risk_high": int(config["risk_level"] == "high"),
|
|
15
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
def policy_decision(rule_decision: dict, ml_score: float):
|
|
2
|
+
if rule_decision["risk_level"] == "high":
|
|
3
|
+
return {
|
|
4
|
+
"deploy": False,
|
|
5
|
+
"reason": "Blocked by rules",
|
|
6
|
+
"ml_score": ml_score
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
if ml_score < 0.4:
|
|
10
|
+
return {
|
|
11
|
+
"deploy": False,
|
|
12
|
+
"reason": "ML predicts failure",
|
|
13
|
+
"ml_score": ml_score
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
return {
|
|
17
|
+
"deploy": True,
|
|
18
|
+
"reason": "Approved by ML + rules",
|
|
19
|
+
"ml_score": ml_score
|
|
20
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
def recommend_changes(report):
|
|
2
|
+
recs = []
|
|
3
|
+
|
|
4
|
+
config = report["details"]["config"]
|
|
5
|
+
perf = report["details"]["performance"]
|
|
6
|
+
data = report["details"]["data"]
|
|
7
|
+
|
|
8
|
+
if config["risk_level"] == "high":
|
|
9
|
+
recs.append(
|
|
10
|
+
"Reduce model complexity (e.g., lower max_depth, fewer estimators)."
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
if perf["suspiciously_perfect"]:
|
|
14
|
+
recs.append(
|
|
15
|
+
"Suspiciously perfect performance detected — verify data leakage."
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
if data["data_leakage"]["suspected"]:
|
|
19
|
+
recs.append(
|
|
20
|
+
"Potential target leakage — review feature engineering pipeline."
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
if not recs:
|
|
24
|
+
recs.append("No critical changes recommended.")
|
|
25
|
+
|
|
26
|
+
return recs
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from .features import extract_features
|
|
2
|
+
|
|
3
|
+
class CriticTrainer:
|
|
4
|
+
def __init__(self, critic_model, min_samples=10):
|
|
5
|
+
self.model = critic_model
|
|
6
|
+
self.min_samples = min_samples
|
|
7
|
+
self.X = []
|
|
8
|
+
self.y = []
|
|
9
|
+
|
|
10
|
+
def add_feedback(self, report, success: bool):
|
|
11
|
+
features = extract_features(report)
|
|
12
|
+
self.X.append(list(features.values()))
|
|
13
|
+
self.y.append(int(success))
|
|
14
|
+
|
|
15
|
+
if len(self.y) >= self.min_samples:
|
|
16
|
+
self.model.train(self.X, self.y)
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# ai_critic/ml/suggester.py
|
|
2
|
+
|
|
3
|
+
def suggest_fix(event: dict) -> dict:
|
|
4
|
+
"""
|
|
5
|
+
Lightweight ML-ready suggestion engine.
|
|
6
|
+
Today: rule-based.
|
|
7
|
+
Tomorrow: trained on global telemetry.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
signals = event["signals"]
|
|
11
|
+
score = event["score"]
|
|
12
|
+
|
|
13
|
+
# 🔴 Casos críticos
|
|
14
|
+
if signals["leakage"] and signals["perfect_cv"]:
|
|
15
|
+
return {
|
|
16
|
+
"verdict": "critical",
|
|
17
|
+
"suggestion": (
|
|
18
|
+
"Strong evidence of data leakage. "
|
|
19
|
+
"Audit features highly correlated with the target, "
|
|
20
|
+
"remove shortcuts and re-run validation."
|
|
21
|
+
)
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
# 🟠 Robustez fraca
|
|
25
|
+
if signals["robustness"] == "fragile":
|
|
26
|
+
return {
|
|
27
|
+
"verdict": "warning",
|
|
28
|
+
"suggestion": (
|
|
29
|
+
"Model is fragile under noise. "
|
|
30
|
+
"Consider stronger regularization, "
|
|
31
|
+
"simpler architecture or more data."
|
|
32
|
+
)
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
# 🟠 Estrutura pesada
|
|
36
|
+
if signals["structural"] == "high":
|
|
37
|
+
return {
|
|
38
|
+
"verdict": "warning",
|
|
39
|
+
"suggestion": (
|
|
40
|
+
"Model complexity may be too high for dataset size. "
|
|
41
|
+
"Reduce depth, number of parameters or features."
|
|
42
|
+
)
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
# 🟢 Caso saudável
|
|
46
|
+
if score >= 85:
|
|
47
|
+
return {
|
|
48
|
+
"verdict": "ok",
|
|
49
|
+
"suggestion": (
|
|
50
|
+
"Model behavior looks consistent. "
|
|
51
|
+
"No critical risks detected at this stage."
|
|
52
|
+
)
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
# 🟡 Default
|
|
56
|
+
return {
|
|
57
|
+
"verdict": "review",
|
|
58
|
+
"suggestion": (
|
|
59
|
+
"No critical failures detected, "
|
|
60
|
+
"but model could benefit from further validation "
|
|
61
|
+
"and robustness checks."
|
|
62
|
+
)
|
|
63
|
+
}
|
|
File without changes
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
def anonymize(report: dict) -> dict:
|
|
2
|
+
return {
|
|
3
|
+
"model_type": report["meta"]["model_type"],
|
|
4
|
+
"score": report["score"]["global"],
|
|
5
|
+
"signals": {
|
|
6
|
+
"leakage": report["details"]["data"]["data_leakage"]["suspected"],
|
|
7
|
+
"robustness": report["details"]["robustness"]["verdict"],
|
|
8
|
+
}
|
|
9
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
def build_event(report: dict) -> dict:
|
|
2
|
+
return {
|
|
3
|
+
"model_type": report["meta"]["model_type"],
|
|
4
|
+
"framework": report["meta"]["framework"],
|
|
5
|
+
"n_samples": report["meta"]["n_samples"],
|
|
6
|
+
"n_features": report["meta"]["n_features"],
|
|
7
|
+
"score": report["scores"]["global"],
|
|
8
|
+
"risk_level": report["executive"]["risk_level"],
|
|
9
|
+
"signals": {
|
|
10
|
+
"leakage": report["details"]["data"]["data_leakage"]["suspected"],
|
|
11
|
+
"perfect_cv": report["details"]["performance"]["suspiciously_perfect"],
|
|
12
|
+
"robustness": report["details"]["robustness"]["verdict"],
|
|
13
|
+
"structural": report["details"]["config"]["risk_level"],
|
|
14
|
+
}
|
|
15
|
+
}
|