ai-critic 1.2.0__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_critic/ai_suggestions/predictor.py +5 -0
- ai_critic/ai_suggestions/rules.py +3 -0
- ai_critic/cli.py +141 -0
- ai_critic/critic.py +81 -201
- ai_critic/evaluators/config.py +29 -5
- ai_critic/feedback/__init__.py +3 -0
- ai_critic/feedback/store.py +23 -0
- ai_critic/learning/__init__.py +13 -0
- ai_critic/learning/critic_model.py +25 -0
- ai_critic/learning/features.py +15 -0
- ai_critic/learning/policy.py +20 -0
- ai_critic/learning/recommender.py +26 -0
- ai_critic/learning/trainer.py +16 -0
- ai_critic/ml/suggester.py +63 -0
- ai_critic/telemetry/__init__.py +0 -0
- ai_critic/telemetry/anonymizer.py +9 -0
- ai_critic/telemetry/client.py +6 -0
- ai_critic/telemetry/event.py +15 -0
- ai_critic/telemetry/local_store.py +9 -0
- ai_critic/telemetry/schema.py +11 -0
- ai_critic/telemetry/sender.py +9 -0
- ai_critic-2.0.0.dist-info/METADATA +390 -0
- ai_critic-2.0.0.dist-info/RECORD +37 -0
- ai_critic-1.2.0.dist-info/METADATA +0 -290
- ai_critic-1.2.0.dist-info/RECORD +0 -18
- {ai_critic-1.2.0.dist-info → ai_critic-2.0.0.dist-info}/WHEEL +0 -0
- {ai_critic-1.2.0.dist-info → ai_critic-2.0.0.dist-info}/top_level.txt +0 -0
ai_critic/cli.py
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
# ai_critic/critic.py
|
|
2
|
+
|
|
3
|
+
from ai_critic.evaluators import (
|
|
4
|
+
robustness,
|
|
5
|
+
config,
|
|
6
|
+
data,
|
|
7
|
+
performance,
|
|
8
|
+
adapters
|
|
9
|
+
)
|
|
10
|
+
from ai_critic.evaluators.summary import HumanSummary
|
|
11
|
+
from ai_critic.sessions import CriticSessionStore
|
|
12
|
+
from ai_critic.evaluators.scoring import compute_scores
|
|
13
|
+
|
|
14
|
+
from ai_critic.learning import (
|
|
15
|
+
extract_features,
|
|
16
|
+
CriticModel,
|
|
17
|
+
CriticTrainer,
|
|
18
|
+
policy_decision,
|
|
19
|
+
recommend_changes
|
|
20
|
+
)
|
|
21
|
+
from ai_critic.feedback import FeedbackStore
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class AICritic:
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
model,
|
|
28
|
+
X,
|
|
29
|
+
y,
|
|
30
|
+
random_state=None,
|
|
31
|
+
session=None,
|
|
32
|
+
framework="sklearn",
|
|
33
|
+
adapter_kwargs=None
|
|
34
|
+
):
|
|
35
|
+
adapter_kwargs = adapter_kwargs or {}
|
|
36
|
+
self.framework = framework.lower()
|
|
37
|
+
|
|
38
|
+
self.model = (
|
|
39
|
+
adapters.ModelAdapter(model, framework=self.framework, **adapter_kwargs)
|
|
40
|
+
if self.framework != "sklearn"
|
|
41
|
+
else model
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
self.X = X
|
|
45
|
+
self.y = y
|
|
46
|
+
self.session = session
|
|
47
|
+
|
|
48
|
+
self.ml_model = CriticModel()
|
|
49
|
+
try:
|
|
50
|
+
self.ml_model.load()
|
|
51
|
+
except Exception:
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
self.trainer = CriticTrainer(self.ml_model)
|
|
55
|
+
self.feedback = FeedbackStore()
|
|
56
|
+
self._store = CriticSessionStore() if session else None
|
|
57
|
+
|
|
58
|
+
def evaluate(self, view="all", plot=False):
|
|
59
|
+
details = {}
|
|
60
|
+
|
|
61
|
+
details["data"] = data.evaluate(self.X, self.y, plot=plot)
|
|
62
|
+
details["config"] = config.evaluate(
|
|
63
|
+
self.model,
|
|
64
|
+
n_samples=details["data"]["n_samples"],
|
|
65
|
+
n_features=details["data"]["n_features"]
|
|
66
|
+
)
|
|
67
|
+
details["performance"] = performance.evaluate(
|
|
68
|
+
self.model, self.X, self.y, plot=plot
|
|
69
|
+
)
|
|
70
|
+
details["robustness"] = robustness.evaluate(
|
|
71
|
+
self.model,
|
|
72
|
+
self.X,
|
|
73
|
+
self.y,
|
|
74
|
+
leakage_suspected=details["data"]["data_leakage"]["suspected"],
|
|
75
|
+
plot=plot
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
human = HumanSummary().generate(details)
|
|
79
|
+
|
|
80
|
+
payload = {
|
|
81
|
+
"executive": human["executive_summary"],
|
|
82
|
+
"technical": human["technical_summary"],
|
|
83
|
+
"details": details,
|
|
84
|
+
"meta": {
|
|
85
|
+
"framework": self.framework,
|
|
86
|
+
"n_samples": details["data"]["n_samples"],
|
|
87
|
+
"n_features": details["data"]["n_features"],
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
payload["scores"] = compute_scores(payload)
|
|
92
|
+
|
|
93
|
+
if self.session:
|
|
94
|
+
self._store.save(self.session, payload)
|
|
95
|
+
|
|
96
|
+
return payload if view == "all" else payload.get(view)
|
|
97
|
+
|
|
98
|
+
def deploy_decision(self, success_feedback=None):
|
|
99
|
+
report = self.evaluate(view="all", plot=False)
|
|
100
|
+
|
|
101
|
+
rule_decision = self._rule_based_decision(report)
|
|
102
|
+
features = extract_features(report)
|
|
103
|
+
ml_score = self.ml_model.predict_proba(features)
|
|
104
|
+
|
|
105
|
+
decision = policy_decision(rule_decision, ml_score)
|
|
106
|
+
recommendations = recommend_changes(report)
|
|
107
|
+
|
|
108
|
+
# 🔁 FEEDBACK LOOP AUTOMÁTICO
|
|
109
|
+
if success_feedback is not None:
|
|
110
|
+
self.feedback.add(self.session, report, success_feedback)
|
|
111
|
+
self.trainer.add_feedback(report, success_feedback)
|
|
112
|
+
|
|
113
|
+
return {
|
|
114
|
+
"deploy": decision["deploy"],
|
|
115
|
+
"risk_level": rule_decision["risk_level"],
|
|
116
|
+
"ml_score": round(ml_score, 3),
|
|
117
|
+
"recommendations": recommendations,
|
|
118
|
+
"feedback_stats": self.feedback.stats()
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
def _rule_based_decision(self, report):
|
|
122
|
+
blocking = []
|
|
123
|
+
risk = "low"
|
|
124
|
+
|
|
125
|
+
if report["details"]["data"]["data_leakage"]["suspected"]:
|
|
126
|
+
blocking.append("Data leakage suspected")
|
|
127
|
+
risk = "high"
|
|
128
|
+
|
|
129
|
+
if report["details"]["performance"]["suspiciously_perfect"]:
|
|
130
|
+
blocking.append("Suspiciously perfect CV score")
|
|
131
|
+
risk = "medium"
|
|
132
|
+
|
|
133
|
+
if report["details"]["config"]["risk_level"] == "high":
|
|
134
|
+
blocking.append("High structural complexity")
|
|
135
|
+
risk = "medium"
|
|
136
|
+
|
|
137
|
+
return {
|
|
138
|
+
"deploy": len(blocking) == 0,
|
|
139
|
+
"risk_level": risk,
|
|
140
|
+
"blocking_issues": blocking
|
|
141
|
+
}
|
ai_critic/critic.py
CHANGED
|
@@ -1,112 +1,72 @@
|
|
|
1
|
+
# ai_critic/critic.py
|
|
2
|
+
|
|
1
3
|
from ai_critic.evaluators import (
|
|
2
4
|
robustness,
|
|
3
5
|
config,
|
|
4
6
|
data,
|
|
5
7
|
performance,
|
|
6
|
-
adapters
|
|
8
|
+
adapters
|
|
7
9
|
)
|
|
8
10
|
from ai_critic.evaluators.summary import HumanSummary
|
|
9
11
|
from ai_critic.sessions import CriticSessionStore
|
|
10
12
|
from ai_critic.evaluators.scoring import compute_scores
|
|
11
13
|
|
|
14
|
+
from ai_critic.learning import (
|
|
15
|
+
extract_features,
|
|
16
|
+
CriticModel,
|
|
17
|
+
CriticTrainer,
|
|
18
|
+
policy_decision,
|
|
19
|
+
recommend_changes
|
|
20
|
+
)
|
|
21
|
+
from ai_critic.feedback import FeedbackStore
|
|
12
22
|
|
|
13
|
-
class AICritic:
|
|
14
|
-
"""
|
|
15
|
-
Automated reviewer for scikit-learn, PyTorch, or TensorFlow models.
|
|
16
|
-
|
|
17
|
-
Produces a multi-layered risk assessment including:
|
|
18
|
-
- Data integrity analysis
|
|
19
|
-
- Model configuration sanity checks
|
|
20
|
-
- Performance evaluation (CV + learning curves)
|
|
21
|
-
- Robustness & leakage heuristics
|
|
22
|
-
- Human-readable executive and technical summaries
|
|
23
|
-
"""
|
|
24
23
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
Global seed for reproducibility (optional)
|
|
37
|
-
session : str or None
|
|
38
|
-
Optional session name for longitudinal comparison
|
|
39
|
-
framework : str
|
|
40
|
-
"sklearn" (default), "torch", or "tensorflow"
|
|
41
|
-
adapter_kwargs : dict
|
|
42
|
-
Extra kwargs para o adaptador (ex: epochs, lr, batch_size)
|
|
43
|
-
"""
|
|
24
|
+
class AICritic:
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
model,
|
|
28
|
+
X,
|
|
29
|
+
y,
|
|
30
|
+
random_state=None,
|
|
31
|
+
session=None,
|
|
32
|
+
framework="sklearn",
|
|
33
|
+
adapter_kwargs=None
|
|
34
|
+
):
|
|
44
35
|
adapter_kwargs = adapter_kwargs or {}
|
|
45
36
|
self.framework = framework.lower()
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
self.
|
|
37
|
+
|
|
38
|
+
self.model = (
|
|
39
|
+
adapters.ModelAdapter(model, framework=self.framework, **adapter_kwargs)
|
|
40
|
+
if self.framework != "sklearn"
|
|
41
|
+
else model
|
|
42
|
+
)
|
|
50
43
|
|
|
51
44
|
self.X = X
|
|
52
45
|
self.y = y
|
|
53
|
-
self.random_state = random_state
|
|
54
46
|
self.session = session
|
|
55
|
-
self._store = CriticSessionStore() if session else None
|
|
56
47
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
48
|
+
self.ml_model = CriticModel()
|
|
49
|
+
try:
|
|
50
|
+
self.ml_model.load()
|
|
51
|
+
except Exception:
|
|
52
|
+
pass
|
|
60
53
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
- "all" : full payload (default)
|
|
65
|
-
- "executive" : executive summary only
|
|
66
|
-
- "technical" : technical summary only
|
|
67
|
-
- "details" : low-level evaluator outputs
|
|
68
|
-
- list : subset of views
|
|
69
|
-
plot : bool
|
|
70
|
-
- True : generate plots
|
|
71
|
-
- False : no plots
|
|
72
|
-
"""
|
|
54
|
+
self.trainer = CriticTrainer(self.ml_model)
|
|
55
|
+
self.feedback = FeedbackStore()
|
|
56
|
+
self._store = CriticSessionStore() if session else None
|
|
73
57
|
|
|
74
|
-
|
|
75
|
-
# Low-level evaluator outputs
|
|
76
|
-
# =========================
|
|
58
|
+
def evaluate(self, view="all", plot=False):
|
|
77
59
|
details = {}
|
|
78
60
|
|
|
79
|
-
|
|
80
|
-
# Data analysis
|
|
81
|
-
# -------------------------
|
|
82
|
-
details["data"] = data.evaluate(
|
|
83
|
-
self.X,
|
|
84
|
-
self.y,
|
|
85
|
-
plot=plot
|
|
86
|
-
)
|
|
87
|
-
|
|
88
|
-
# -------------------------
|
|
89
|
-
# Model configuration sanity
|
|
90
|
-
# -------------------------
|
|
61
|
+
details["data"] = data.evaluate(self.X, self.y, plot=plot)
|
|
91
62
|
details["config"] = config.evaluate(
|
|
92
63
|
self.model,
|
|
93
64
|
n_samples=details["data"]["n_samples"],
|
|
94
65
|
n_features=details["data"]["n_features"]
|
|
95
66
|
)
|
|
96
|
-
|
|
97
|
-
# -------------------------
|
|
98
|
-
# Performance evaluation
|
|
99
|
-
# -------------------------
|
|
100
67
|
details["performance"] = performance.evaluate(
|
|
101
|
-
self.model,
|
|
102
|
-
self.X,
|
|
103
|
-
self.y,
|
|
104
|
-
plot=plot
|
|
68
|
+
self.model, self.X, self.y, plot=plot
|
|
105
69
|
)
|
|
106
|
-
|
|
107
|
-
# -------------------------
|
|
108
|
-
# Robustness evaluation
|
|
109
|
-
# -------------------------
|
|
110
70
|
details["robustness"] = robustness.evaluate(
|
|
111
71
|
self.model,
|
|
112
72
|
self.X,
|
|
@@ -115,147 +75,67 @@ class AICritic:
|
|
|
115
75
|
plot=plot
|
|
116
76
|
)
|
|
117
77
|
|
|
118
|
-
|
|
119
|
-
# Human summaries
|
|
120
|
-
# =========================
|
|
121
|
-
human_summary = HumanSummary().generate(details)
|
|
78
|
+
human = HumanSummary().generate(details)
|
|
122
79
|
|
|
123
80
|
payload = {
|
|
124
|
-
"executive":
|
|
125
|
-
"technical":
|
|
81
|
+
"executive": human["executive_summary"],
|
|
82
|
+
"technical": human["technical_summary"],
|
|
126
83
|
"details": details,
|
|
127
|
-
"
|
|
84
|
+
"meta": {
|
|
85
|
+
"framework": self.framework,
|
|
86
|
+
"n_samples": details["data"]["n_samples"],
|
|
87
|
+
"n_features": details["data"]["n_features"],
|
|
88
|
+
}
|
|
128
89
|
}
|
|
129
90
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
# =========================
|
|
91
|
+
payload["scores"] = compute_scores(payload)
|
|
92
|
+
|
|
133
93
|
if self.session:
|
|
134
|
-
scores = compute_scores(payload)
|
|
135
|
-
payload["scores"] = scores
|
|
136
94
|
self._store.save(self.session, payload)
|
|
137
95
|
|
|
138
|
-
|
|
139
|
-
# View selector
|
|
140
|
-
# =========================
|
|
141
|
-
if view == "all":
|
|
142
|
-
return payload
|
|
143
|
-
|
|
144
|
-
if isinstance(view, list):
|
|
145
|
-
return {k: payload[k] for k in view if k in payload}
|
|
96
|
+
return payload if view == "all" else payload.get(view)
|
|
146
97
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
def compare_with(self, previous_session: str) -> dict:
|
|
150
|
-
"""
|
|
151
|
-
Compare current session with a previous one.
|
|
152
|
-
"""
|
|
153
|
-
|
|
154
|
-
if not self.session:
|
|
155
|
-
raise ValueError("Current session name not set.")
|
|
156
|
-
|
|
157
|
-
current = self._store.load(self.session)
|
|
158
|
-
previous = self._store.load(previous_session)
|
|
98
|
+
def deploy_decision(self, success_feedback=None):
|
|
99
|
+
report = self.evaluate(view="all", plot=False)
|
|
159
100
|
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
)
|
|
101
|
+
rule_decision = self._rule_based_decision(report)
|
|
102
|
+
features = extract_features(report)
|
|
103
|
+
ml_score = self.ml_model.predict_proba(features)
|
|
164
104
|
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
"current": current["scores"]["global"],
|
|
168
|
-
"previous": previous["scores"]["global"],
|
|
169
|
-
"delta": current["scores"]["global"] - previous["scores"]["global"],
|
|
170
|
-
},
|
|
171
|
-
"components": {}
|
|
172
|
-
}
|
|
105
|
+
decision = policy_decision(rule_decision, ml_score)
|
|
106
|
+
recommendations = recommend_changes(report)
|
|
173
107
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
"current": value,
|
|
179
|
-
"previous": prev_value,
|
|
180
|
-
"delta": value - prev_value
|
|
181
|
-
}
|
|
108
|
+
# 🔁 FEEDBACK LOOP AUTOMÁTICO
|
|
109
|
+
if success_feedback is not None:
|
|
110
|
+
self.feedback.add(self.session, report, success_feedback)
|
|
111
|
+
self.trainer.add_feedback(report, success_feedback)
|
|
182
112
|
|
|
183
113
|
return {
|
|
184
|
-
"
|
|
185
|
-
"
|
|
186
|
-
"
|
|
187
|
-
"
|
|
188
|
-
|
|
189
|
-
"not absolute model quality."
|
|
190
|
-
)
|
|
114
|
+
"deploy": decision["deploy"],
|
|
115
|
+
"risk_level": rule_decision["risk_level"],
|
|
116
|
+
"ml_score": round(ml_score, 3),
|
|
117
|
+
"recommendations": recommendations,
|
|
118
|
+
"feedback_stats": self.feedback.stats()
|
|
191
119
|
}
|
|
192
120
|
|
|
193
|
-
def
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
"""
|
|
197
|
-
|
|
198
|
-
report = self.evaluate(view="all", plot=False)
|
|
199
|
-
|
|
200
|
-
data_risk = report["details"]["data"]["data_leakage"]["suspected"]
|
|
201
|
-
perfect_cv = report["details"]["performance"]["suspiciously_perfect"]
|
|
202
|
-
robustness_verdict = report["details"]["robustness"]["verdict"]
|
|
203
|
-
structural_warnings = report["details"]["config"]["structural_warnings"]
|
|
204
|
-
|
|
205
|
-
blocking_issues = []
|
|
206
|
-
risk_level = "low"
|
|
207
|
-
|
|
208
|
-
# Hard blockers
|
|
209
|
-
if data_risk and perfect_cv:
|
|
210
|
-
blocking_issues.append(
|
|
211
|
-
"Data leakage combined with suspiciously perfect CV score"
|
|
212
|
-
)
|
|
213
|
-
risk_level = "high"
|
|
214
|
-
|
|
215
|
-
if robustness_verdict == "misleading":
|
|
216
|
-
blocking_issues.append(
|
|
217
|
-
"Robustness results are misleading due to inflated baseline performance"
|
|
218
|
-
)
|
|
219
|
-
risk_level = "high"
|
|
220
|
-
|
|
221
|
-
if data_risk:
|
|
222
|
-
blocking_issues.append(
|
|
223
|
-
"Suspected target leakage in feature set"
|
|
224
|
-
)
|
|
225
|
-
risk_level = "high"
|
|
226
|
-
|
|
227
|
-
# Soft blockers
|
|
228
|
-
if risk_level != "high":
|
|
229
|
-
if robustness_verdict == "fragile":
|
|
230
|
-
blocking_issues.append(
|
|
231
|
-
"Model performance degrades significantly under noise"
|
|
232
|
-
)
|
|
233
|
-
risk_level = "medium"
|
|
234
|
-
|
|
235
|
-
if perfect_cv:
|
|
236
|
-
blocking_issues.append(
|
|
237
|
-
"Suspiciously perfect cross-validation score"
|
|
238
|
-
)
|
|
239
|
-
risk_level = "medium"
|
|
121
|
+
def _rule_based_decision(self, report):
|
|
122
|
+
blocking = []
|
|
123
|
+
risk = "low"
|
|
240
124
|
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
)
|
|
245
|
-
risk_level = "medium"
|
|
125
|
+
if report["details"]["data"]["data_leakage"]["suspected"]:
|
|
126
|
+
blocking.append("Data leakage suspected")
|
|
127
|
+
risk = "high"
|
|
246
128
|
|
|
247
|
-
|
|
129
|
+
if report["details"]["performance"]["suspiciously_perfect"]:
|
|
130
|
+
blocking.append("Suspiciously perfect CV score")
|
|
131
|
+
risk = "medium"
|
|
248
132
|
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
confidence -= 0.25 if robustness_verdict in ("fragile", "misleading") else 0
|
|
253
|
-
confidence -= 0.15 if structural_warnings else 0
|
|
254
|
-
confidence = max(0.0, round(confidence, 2))
|
|
133
|
+
if report["details"]["config"]["risk_level"] == "high":
|
|
134
|
+
blocking.append("High structural complexity")
|
|
135
|
+
risk = "medium"
|
|
255
136
|
|
|
256
137
|
return {
|
|
257
|
-
"deploy":
|
|
258
|
-
"risk_level":
|
|
259
|
-
"blocking_issues":
|
|
260
|
-
"confidence": confidence
|
|
138
|
+
"deploy": len(blocking) == 0,
|
|
139
|
+
"risk_level": risk,
|
|
140
|
+
"blocking_issues": blocking
|
|
261
141
|
}
|
ai_critic/evaluators/config.py
CHANGED
|
@@ -1,22 +1,33 @@
|
|
|
1
|
+
# evaluators/config.py
|
|
1
2
|
import math
|
|
2
3
|
|
|
4
|
+
|
|
3
5
|
def evaluate(model, n_samples=None, n_features=None):
|
|
4
|
-
|
|
6
|
+
"""
|
|
7
|
+
Evaluates model configuration for structural risks and complexity.
|
|
8
|
+
Outputs only metadata-safe signals (telemetry-ready).
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
params = model.get_params() if hasattr(model, "get_params") else {}
|
|
5
12
|
model_type = type(model).__name__
|
|
6
13
|
|
|
7
14
|
report = {
|
|
8
15
|
"model_type": model_type,
|
|
9
16
|
"n_params": len(params),
|
|
10
|
-
"uses_random_state": "random_state" in params
|
|
17
|
+
"uses_random_state": "random_state" in params,
|
|
18
|
+
"complexity_score": 0,
|
|
19
|
+
"risk_level": "low",
|
|
11
20
|
}
|
|
12
21
|
|
|
13
|
-
# 🧠 Structural overfitting heuristics
|
|
14
22
|
warnings = []
|
|
15
23
|
|
|
16
|
-
|
|
24
|
+
# =========================
|
|
25
|
+
# Tree depth heuristic
|
|
26
|
+
# =========================
|
|
27
|
+
if n_samples and "max_depth" in params:
|
|
17
28
|
max_depth = params.get("max_depth")
|
|
18
29
|
if max_depth is not None:
|
|
19
|
-
recommended_depth = math.log2(n_samples)
|
|
30
|
+
recommended_depth = math.log2(max(2, n_samples))
|
|
20
31
|
if max_depth > recommended_depth:
|
|
21
32
|
warnings.append({
|
|
22
33
|
"issue": "structural_overfitting_risk",
|
|
@@ -24,12 +35,25 @@ def evaluate(model, n_samples=None, n_features=None):
|
|
|
24
35
|
"recommended_max_depth": int(recommended_depth),
|
|
25
36
|
"message": "Tree depth may be too high for dataset size."
|
|
26
37
|
})
|
|
38
|
+
report["complexity_score"] += 1
|
|
27
39
|
|
|
40
|
+
# =========================
|
|
41
|
+
# Feature / sample ratio
|
|
42
|
+
# =========================
|
|
28
43
|
if n_samples and n_features and n_features > n_samples:
|
|
29
44
|
warnings.append({
|
|
30
45
|
"issue": "high_feature_sample_ratio",
|
|
31
46
|
"message": "More features than samples can cause instability."
|
|
32
47
|
})
|
|
48
|
+
report["complexity_score"] += 1
|
|
49
|
+
|
|
50
|
+
# =========================
|
|
51
|
+
# Risk aggregation
|
|
52
|
+
# =========================
|
|
53
|
+
if report["complexity_score"] >= 2:
|
|
54
|
+
report["risk_level"] = "high"
|
|
55
|
+
elif report["complexity_score"] == 1:
|
|
56
|
+
report["risk_level"] = "medium"
|
|
33
57
|
|
|
34
58
|
report["structural_warnings"] = warnings
|
|
35
59
|
return report
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
class FeedbackStore:
|
|
2
|
+
def __init__(self):
|
|
3
|
+
self.storage = []
|
|
4
|
+
|
|
5
|
+
def add(self, session_id, report, success: bool):
|
|
6
|
+
self.storage.append({
|
|
7
|
+
"session": session_id,
|
|
8
|
+
"success": success,
|
|
9
|
+
"report": report
|
|
10
|
+
})
|
|
11
|
+
|
|
12
|
+
def stats(self):
|
|
13
|
+
positives = sum(1 for x in self.storage if x["success"])
|
|
14
|
+
negatives = sum(1 for x in self.storage if not x["success"])
|
|
15
|
+
|
|
16
|
+
return {
|
|
17
|
+
"total": len(self.storage),
|
|
18
|
+
"positives": positives,
|
|
19
|
+
"negatives": negatives
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
def all(self):
|
|
23
|
+
return self.storage
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from .features import extract_features
|
|
2
|
+
from .critic_model import CriticModel
|
|
3
|
+
from .trainer import CriticTrainer
|
|
4
|
+
from .policy import policy_decision
|
|
5
|
+
from .recommender import recommend_changes
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"extract_features",
|
|
9
|
+
"CriticModel",
|
|
10
|
+
"CriticTrainer",
|
|
11
|
+
"policy_decision",
|
|
12
|
+
"recommend_changes",
|
|
13
|
+
]
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import joblib
|
|
2
|
+
from sklearn.linear_model import LogisticRegression
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
class CriticModel:
|
|
6
|
+
def __init__(self, path="critic_model.joblib"):
|
|
7
|
+
self.path = path
|
|
8
|
+
self.model = LogisticRegression()
|
|
9
|
+
self.is_trained = False
|
|
10
|
+
|
|
11
|
+
def train(self, X, y):
|
|
12
|
+
self.model.fit(X, y)
|
|
13
|
+
self.is_trained = True
|
|
14
|
+
joblib.dump(self.model, self.path)
|
|
15
|
+
|
|
16
|
+
def load(self):
|
|
17
|
+
self.model = joblib.load(self.path)
|
|
18
|
+
self.is_trained = True
|
|
19
|
+
|
|
20
|
+
def predict_proba(self, features: dict) -> float:
|
|
21
|
+
if not self.is_trained:
|
|
22
|
+
return 0.5 # neutro
|
|
23
|
+
|
|
24
|
+
X = np.array([list(features.values())])
|
|
25
|
+
return float(self.model.predict_proba(X)[0][1])
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
def extract_features(report: dict) -> dict:
|
|
2
|
+
data = report["details"]["data"]
|
|
3
|
+
perf = report["details"]["performance"]
|
|
4
|
+
robust = report["details"]["robustness"]
|
|
5
|
+
config = report["details"]["config"]
|
|
6
|
+
|
|
7
|
+
return {
|
|
8
|
+
"n_samples": report["meta"]["n_samples"],
|
|
9
|
+
"n_features": report["meta"]["n_features"],
|
|
10
|
+
"data_leakage": int(data["data_leakage"]["suspected"]),
|
|
11
|
+
"perfect_cv": int(perf["suspiciously_perfect"]),
|
|
12
|
+
"robustness_fragile": int(robust["verdict"] == "fragile"),
|
|
13
|
+
"robustness_misleading": int(robust["verdict"] == "misleading"),
|
|
14
|
+
"structural_risk_high": int(config["risk_level"] == "high"),
|
|
15
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
def policy_decision(rule_decision: dict, ml_score: float):
|
|
2
|
+
if rule_decision["risk_level"] == "high":
|
|
3
|
+
return {
|
|
4
|
+
"deploy": False,
|
|
5
|
+
"reason": "Blocked by rules",
|
|
6
|
+
"ml_score": ml_score
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
if ml_score < 0.4:
|
|
10
|
+
return {
|
|
11
|
+
"deploy": False,
|
|
12
|
+
"reason": "ML predicts failure",
|
|
13
|
+
"ml_score": ml_score
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
return {
|
|
17
|
+
"deploy": True,
|
|
18
|
+
"reason": "Approved by ML + rules",
|
|
19
|
+
"ml_score": ml_score
|
|
20
|
+
}
|