ai-critic 1.2.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ def predict_improvements(report_embedding):
2
+ return {
3
+ "risk_reduction": 0.23,
4
+ "suggested_action": "Remove top correlated feature"
5
+ }
@@ -0,0 +1,3 @@
1
+ def suggest(report):
2
+ if report["score"]["global"] < 60:
3
+ return "Reduce model complexity or audit features for leakage."
ai_critic/cli.py ADDED
@@ -0,0 +1,141 @@
1
+ # ai_critic/critic.py
2
+
3
+ from ai_critic.evaluators import (
4
+ robustness,
5
+ config,
6
+ data,
7
+ performance,
8
+ adapters
9
+ )
10
+ from ai_critic.evaluators.summary import HumanSummary
11
+ from ai_critic.sessions import CriticSessionStore
12
+ from ai_critic.evaluators.scoring import compute_scores
13
+
14
+ from ai_critic.learning import (
15
+ extract_features,
16
+ CriticModel,
17
+ CriticTrainer,
18
+ policy_decision,
19
+ recommend_changes
20
+ )
21
+ from ai_critic.feedback import FeedbackStore
22
+
23
+
24
+ class AICritic:
25
+ def __init__(
26
+ self,
27
+ model,
28
+ X,
29
+ y,
30
+ random_state=None,
31
+ session=None,
32
+ framework="sklearn",
33
+ adapter_kwargs=None
34
+ ):
35
+ adapter_kwargs = adapter_kwargs or {}
36
+ self.framework = framework.lower()
37
+
38
+ self.model = (
39
+ adapters.ModelAdapter(model, framework=self.framework, **adapter_kwargs)
40
+ if self.framework != "sklearn"
41
+ else model
42
+ )
43
+
44
+ self.X = X
45
+ self.y = y
46
+ self.session = session
47
+
48
+ self.ml_model = CriticModel()
49
+ try:
50
+ self.ml_model.load()
51
+ except Exception:
52
+ pass
53
+
54
+ self.trainer = CriticTrainer(self.ml_model)
55
+ self.feedback = FeedbackStore()
56
+ self._store = CriticSessionStore() if session else None
57
+
58
+ def evaluate(self, view="all", plot=False):
59
+ details = {}
60
+
61
+ details["data"] = data.evaluate(self.X, self.y, plot=plot)
62
+ details["config"] = config.evaluate(
63
+ self.model,
64
+ n_samples=details["data"]["n_samples"],
65
+ n_features=details["data"]["n_features"]
66
+ )
67
+ details["performance"] = performance.evaluate(
68
+ self.model, self.X, self.y, plot=plot
69
+ )
70
+ details["robustness"] = robustness.evaluate(
71
+ self.model,
72
+ self.X,
73
+ self.y,
74
+ leakage_suspected=details["data"]["data_leakage"]["suspected"],
75
+ plot=plot
76
+ )
77
+
78
+ human = HumanSummary().generate(details)
79
+
80
+ payload = {
81
+ "executive": human["executive_summary"],
82
+ "technical": human["technical_summary"],
83
+ "details": details,
84
+ "meta": {
85
+ "framework": self.framework,
86
+ "n_samples": details["data"]["n_samples"],
87
+ "n_features": details["data"]["n_features"],
88
+ }
89
+ }
90
+
91
+ payload["scores"] = compute_scores(payload)
92
+
93
+ if self.session:
94
+ self._store.save(self.session, payload)
95
+
96
+ return payload if view == "all" else payload.get(view)
97
+
98
+ def deploy_decision(self, success_feedback=None):
99
+ report = self.evaluate(view="all", plot=False)
100
+
101
+ rule_decision = self._rule_based_decision(report)
102
+ features = extract_features(report)
103
+ ml_score = self.ml_model.predict_proba(features)
104
+
105
+ decision = policy_decision(rule_decision, ml_score)
106
+ recommendations = recommend_changes(report)
107
+
108
+ # 🔁 FEEDBACK LOOP AUTOMÁTICO
109
+ if success_feedback is not None:
110
+ self.feedback.add(self.session, report, success_feedback)
111
+ self.trainer.add_feedback(report, success_feedback)
112
+
113
+ return {
114
+ "deploy": decision["deploy"],
115
+ "risk_level": rule_decision["risk_level"],
116
+ "ml_score": round(ml_score, 3),
117
+ "recommendations": recommendations,
118
+ "feedback_stats": self.feedback.stats()
119
+ }
120
+
121
+ def _rule_based_decision(self, report):
122
+ blocking = []
123
+ risk = "low"
124
+
125
+ if report["details"]["data"]["data_leakage"]["suspected"]:
126
+ blocking.append("Data leakage suspected")
127
+ risk = "high"
128
+
129
+ if report["details"]["performance"]["suspiciously_perfect"]:
130
+ blocking.append("Suspiciously perfect CV score")
131
+ risk = "medium"
132
+
133
+ if report["details"]["config"]["risk_level"] == "high":
134
+ blocking.append("High structural complexity")
135
+ risk = "medium"
136
+
137
+ return {
138
+ "deploy": len(blocking) == 0,
139
+ "risk_level": risk,
140
+ "blocking_issues": blocking
141
+ }
ai_critic/critic.py CHANGED
@@ -1,112 +1,72 @@
1
+ # ai_critic/critic.py
2
+
1
3
  from ai_critic.evaluators import (
2
4
  robustness,
3
5
  config,
4
6
  data,
5
7
  performance,
6
- adapters # <- novo import
8
+ adapters
7
9
  )
8
10
  from ai_critic.evaluators.summary import HumanSummary
9
11
  from ai_critic.sessions import CriticSessionStore
10
12
  from ai_critic.evaluators.scoring import compute_scores
11
13
 
14
+ from ai_critic.learning import (
15
+ extract_features,
16
+ CriticModel,
17
+ CriticTrainer,
18
+ policy_decision,
19
+ recommend_changes
20
+ )
21
+ from ai_critic.feedback import FeedbackStore
12
22
 
13
- class AICritic:
14
- """
15
- Automated reviewer for scikit-learn, PyTorch, or TensorFlow models.
16
-
17
- Produces a multi-layered risk assessment including:
18
- - Data integrity analysis
19
- - Model configuration sanity checks
20
- - Performance evaluation (CV + learning curves)
21
- - Robustness & leakage heuristics
22
- - Human-readable executive and technical summaries
23
- """
24
23
 
25
- def __init__(self, model, X, y, random_state=None, session=None, framework="sklearn", adapter_kwargs=None):
26
- """
27
- Parameters
28
- ----------
29
- model : object
30
- scikit-learn estimator, torch.nn.Module, or tf.keras.Model
31
- X : np.ndarray
32
- Feature matrix
33
- y : np.ndarray
34
- Target vector
35
- random_state : int or None
36
- Global seed for reproducibility (optional)
37
- session : str or None
38
- Optional session name for longitudinal comparison
39
- framework : str
40
- "sklearn" (default), "torch", or "tensorflow"
41
- adapter_kwargs : dict
42
- Extra kwargs para o adaptador (ex: epochs, lr, batch_size)
43
- """
24
+ class AICritic:
25
+ def __init__(
26
+ self,
27
+ model,
28
+ X,
29
+ y,
30
+ random_state=None,
31
+ session=None,
32
+ framework="sklearn",
33
+ adapter_kwargs=None
34
+ ):
44
35
  adapter_kwargs = adapter_kwargs or {}
45
36
  self.framework = framework.lower()
46
- if self.framework != "sklearn":
47
- self.model = adapters.ModelAdapter(model, framework=self.framework, **adapter_kwargs)
48
- else:
49
- self.model = model
37
+
38
+ self.model = (
39
+ adapters.ModelAdapter(model, framework=self.framework, **adapter_kwargs)
40
+ if self.framework != "sklearn"
41
+ else model
42
+ )
50
43
 
51
44
  self.X = X
52
45
  self.y = y
53
- self.random_state = random_state
54
46
  self.session = session
55
- self._store = CriticSessionStore() if session else None
56
47
 
57
- def evaluate(self, view="all", plot=False):
58
- """
59
- Evaluate the model.
48
+ self.ml_model = CriticModel()
49
+ try:
50
+ self.ml_model.load()
51
+ except Exception:
52
+ pass
60
53
 
61
- Parameters
62
- ----------
63
- view : str or list
64
- - "all" : full payload (default)
65
- - "executive" : executive summary only
66
- - "technical" : technical summary only
67
- - "details" : low-level evaluator outputs
68
- - list : subset of views
69
- plot : bool
70
- - True : generate plots
71
- - False : no plots
72
- """
54
+ self.trainer = CriticTrainer(self.ml_model)
55
+ self.feedback = FeedbackStore()
56
+ self._store = CriticSessionStore() if session else None
73
57
 
74
- # =========================
75
- # Low-level evaluator outputs
76
- # =========================
58
+ def evaluate(self, view="all", plot=False):
77
59
  details = {}
78
60
 
79
- # -------------------------
80
- # Data analysis
81
- # -------------------------
82
- details["data"] = data.evaluate(
83
- self.X,
84
- self.y,
85
- plot=plot
86
- )
87
-
88
- # -------------------------
89
- # Model configuration sanity
90
- # -------------------------
61
+ details["data"] = data.evaluate(self.X, self.y, plot=plot)
91
62
  details["config"] = config.evaluate(
92
63
  self.model,
93
64
  n_samples=details["data"]["n_samples"],
94
65
  n_features=details["data"]["n_features"]
95
66
  )
96
-
97
- # -------------------------
98
- # Performance evaluation
99
- # -------------------------
100
67
  details["performance"] = performance.evaluate(
101
- self.model,
102
- self.X,
103
- self.y,
104
- plot=plot
68
+ self.model, self.X, self.y, plot=plot
105
69
  )
106
-
107
- # -------------------------
108
- # Robustness evaluation
109
- # -------------------------
110
70
  details["robustness"] = robustness.evaluate(
111
71
  self.model,
112
72
  self.X,
@@ -115,147 +75,67 @@ class AICritic:
115
75
  plot=plot
116
76
  )
117
77
 
118
- # =========================
119
- # Human summaries
120
- # =========================
121
- human_summary = HumanSummary().generate(details)
78
+ human = HumanSummary().generate(details)
122
79
 
123
80
  payload = {
124
- "executive": human_summary["executive_summary"],
125
- "technical": human_summary["technical_summary"],
81
+ "executive": human["executive_summary"],
82
+ "technical": human["technical_summary"],
126
83
  "details": details,
127
- "performance": details["performance"],
84
+ "meta": {
85
+ "framework": self.framework,
86
+ "n_samples": details["data"]["n_samples"],
87
+ "n_features": details["data"]["n_features"],
88
+ }
128
89
  }
129
90
 
130
- # =========================
131
- # Session persistence (optional)
132
- # =========================
91
+ payload["scores"] = compute_scores(payload)
92
+
133
93
  if self.session:
134
- scores = compute_scores(payload)
135
- payload["scores"] = scores
136
94
  self._store.save(self.session, payload)
137
95
 
138
- # =========================
139
- # View selector
140
- # =========================
141
- if view == "all":
142
- return payload
143
-
144
- if isinstance(view, list):
145
- return {k: payload[k] for k in view if k in payload}
96
+ return payload if view == "all" else payload.get(view)
146
97
 
147
- return payload.get(view)
148
-
149
- def compare_with(self, previous_session: str) -> dict:
150
- """
151
- Compare current session with a previous one.
152
- """
153
-
154
- if not self.session:
155
- raise ValueError("Current session name not set.")
156
-
157
- current = self._store.load(self.session)
158
- previous = self._store.load(previous_session)
98
+ def deploy_decision(self, success_feedback=None):
99
+ report = self.evaluate(view="all", plot=False)
159
100
 
160
- if not previous:
161
- raise FileNotFoundError(
162
- f"Session '{previous_session}' not found."
163
- )
101
+ rule_decision = self._rule_based_decision(report)
102
+ features = extract_features(report)
103
+ ml_score = self.ml_model.predict_proba(features)
164
104
 
165
- diff = {
166
- "global_score": {
167
- "current": current["scores"]["global"],
168
- "previous": previous["scores"]["global"],
169
- "delta": current["scores"]["global"] - previous["scores"]["global"],
170
- },
171
- "components": {}
172
- }
105
+ decision = policy_decision(rule_decision, ml_score)
106
+ recommendations = recommend_changes(report)
173
107
 
174
- for key, value in current["scores"]["components"].items():
175
- prev_value = previous["scores"]["components"].get(key)
176
- if prev_value is not None:
177
- diff["components"][key] = {
178
- "current": value,
179
- "previous": prev_value,
180
- "delta": value - prev_value
181
- }
108
+ # 🔁 FEEDBACK LOOP AUTOMÁTICO
109
+ if success_feedback is not None:
110
+ self.feedback.add(self.session, report, success_feedback)
111
+ self.trainer.add_feedback(report, success_feedback)
182
112
 
183
113
  return {
184
- "current_session": self.session,
185
- "previous_session": previous_session,
186
- "score_diff": diff,
187
- "note": (
188
- "Score deltas indicate changes in risk profile, "
189
- "not absolute model quality."
190
- )
114
+ "deploy": decision["deploy"],
115
+ "risk_level": rule_decision["risk_level"],
116
+ "ml_score": round(ml_score, 3),
117
+ "recommendations": recommendations,
118
+ "feedback_stats": self.feedback.stats()
191
119
  }
192
120
 
193
- def deploy_decision(self):
194
- """
195
- Final deployment gate.
196
- """
197
-
198
- report = self.evaluate(view="all", plot=False)
199
-
200
- data_risk = report["details"]["data"]["data_leakage"]["suspected"]
201
- perfect_cv = report["details"]["performance"]["suspiciously_perfect"]
202
- robustness_verdict = report["details"]["robustness"]["verdict"]
203
- structural_warnings = report["details"]["config"]["structural_warnings"]
204
-
205
- blocking_issues = []
206
- risk_level = "low"
207
-
208
- # Hard blockers
209
- if data_risk and perfect_cv:
210
- blocking_issues.append(
211
- "Data leakage combined with suspiciously perfect CV score"
212
- )
213
- risk_level = "high"
214
-
215
- if robustness_verdict == "misleading":
216
- blocking_issues.append(
217
- "Robustness results are misleading due to inflated baseline performance"
218
- )
219
- risk_level = "high"
220
-
221
- if data_risk:
222
- blocking_issues.append(
223
- "Suspected target leakage in feature set"
224
- )
225
- risk_level = "high"
226
-
227
- # Soft blockers
228
- if risk_level != "high":
229
- if robustness_verdict == "fragile":
230
- blocking_issues.append(
231
- "Model performance degrades significantly under noise"
232
- )
233
- risk_level = "medium"
234
-
235
- if perfect_cv:
236
- blocking_issues.append(
237
- "Suspiciously perfect cross-validation score"
238
- )
239
- risk_level = "medium"
121
+ def _rule_based_decision(self, report):
122
+ blocking = []
123
+ risk = "low"
240
124
 
241
- if structural_warnings:
242
- blocking_issues.append(
243
- "Structural complexity risks detected in model configuration"
244
- )
245
- risk_level = "medium"
125
+ if report["details"]["data"]["data_leakage"]["suspected"]:
126
+ blocking.append("Data leakage suspected")
127
+ risk = "high"
246
128
 
247
- deploy = len(blocking_issues) == 0
129
+ if report["details"]["performance"]["suspiciously_perfect"]:
130
+ blocking.append("Suspiciously perfect CV score")
131
+ risk = "medium"
248
132
 
249
- confidence = 1.0
250
- confidence -= 0.35 if data_risk else 0
251
- confidence -= 0.25 if perfect_cv else 0
252
- confidence -= 0.25 if robustness_verdict in ("fragile", "misleading") else 0
253
- confidence -= 0.15 if structural_warnings else 0
254
- confidence = max(0.0, round(confidence, 2))
133
+ if report["details"]["config"]["risk_level"] == "high":
134
+ blocking.append("High structural complexity")
135
+ risk = "medium"
255
136
 
256
137
  return {
257
- "deploy": deploy,
258
- "risk_level": risk_level,
259
- "blocking_issues": blocking_issues,
260
- "confidence": confidence
138
+ "deploy": len(blocking) == 0,
139
+ "risk_level": risk,
140
+ "blocking_issues": blocking
261
141
  }
@@ -1,22 +1,33 @@
1
+ # evaluators/config.py
1
2
  import math
2
3
 
4
+
3
5
  def evaluate(model, n_samples=None, n_features=None):
4
- params = model.get_params()
6
+ """
7
+ Evaluates model configuration for structural risks and complexity.
8
+ Outputs only metadata-safe signals (telemetry-ready).
9
+ """
10
+
11
+ params = model.get_params() if hasattr(model, "get_params") else {}
5
12
  model_type = type(model).__name__
6
13
 
7
14
  report = {
8
15
  "model_type": model_type,
9
16
  "n_params": len(params),
10
- "uses_random_state": "random_state" in params
17
+ "uses_random_state": "random_state" in params,
18
+ "complexity_score": 0,
19
+ "risk_level": "low",
11
20
  }
12
21
 
13
- # 🧠 Structural overfitting heuristics
14
22
  warnings = []
15
23
 
16
- if n_samples and hasattr(model, "max_depth"):
24
+ # =========================
25
+ # Tree depth heuristic
26
+ # =========================
27
+ if n_samples and "max_depth" in params:
17
28
  max_depth = params.get("max_depth")
18
29
  if max_depth is not None:
19
- recommended_depth = math.log2(n_samples)
30
+ recommended_depth = math.log2(max(2, n_samples))
20
31
  if max_depth > recommended_depth:
21
32
  warnings.append({
22
33
  "issue": "structural_overfitting_risk",
@@ -24,12 +35,25 @@ def evaluate(model, n_samples=None, n_features=None):
24
35
  "recommended_max_depth": int(recommended_depth),
25
36
  "message": "Tree depth may be too high for dataset size."
26
37
  })
38
+ report["complexity_score"] += 1
27
39
 
40
+ # =========================
41
+ # Feature / sample ratio
42
+ # =========================
28
43
  if n_samples and n_features and n_features > n_samples:
29
44
  warnings.append({
30
45
  "issue": "high_feature_sample_ratio",
31
46
  "message": "More features than samples can cause instability."
32
47
  })
48
+ report["complexity_score"] += 1
49
+
50
+ # =========================
51
+ # Risk aggregation
52
+ # =========================
53
+ if report["complexity_score"] >= 2:
54
+ report["risk_level"] = "high"
55
+ elif report["complexity_score"] == 1:
56
+ report["risk_level"] = "medium"
33
57
 
34
58
  report["structural_warnings"] = warnings
35
59
  return report
@@ -0,0 +1,3 @@
1
+ from .store import FeedbackStore
2
+
3
+ __all__ = ["FeedbackStore"]
@@ -0,0 +1,23 @@
1
+ class FeedbackStore:
2
+ def __init__(self):
3
+ self.storage = []
4
+
5
+ def add(self, session_id, report, success: bool):
6
+ self.storage.append({
7
+ "session": session_id,
8
+ "success": success,
9
+ "report": report
10
+ })
11
+
12
+ def stats(self):
13
+ positives = sum(1 for x in self.storage if x["success"])
14
+ negatives = sum(1 for x in self.storage if not x["success"])
15
+
16
+ return {
17
+ "total": len(self.storage),
18
+ "positives": positives,
19
+ "negatives": negatives
20
+ }
21
+
22
+ def all(self):
23
+ return self.storage
@@ -0,0 +1,13 @@
1
+ from .features import extract_features
2
+ from .critic_model import CriticModel
3
+ from .trainer import CriticTrainer
4
+ from .policy import policy_decision
5
+ from .recommender import recommend_changes
6
+
7
+ __all__ = [
8
+ "extract_features",
9
+ "CriticModel",
10
+ "CriticTrainer",
11
+ "policy_decision",
12
+ "recommend_changes",
13
+ ]
@@ -0,0 +1,25 @@
1
+ import joblib
2
+ from sklearn.linear_model import LogisticRegression
3
+ import numpy as np
4
+
5
+ class CriticModel:
6
+ def __init__(self, path="critic_model.joblib"):
7
+ self.path = path
8
+ self.model = LogisticRegression()
9
+ self.is_trained = False
10
+
11
+ def train(self, X, y):
12
+ self.model.fit(X, y)
13
+ self.is_trained = True
14
+ joblib.dump(self.model, self.path)
15
+
16
+ def load(self):
17
+ self.model = joblib.load(self.path)
18
+ self.is_trained = True
19
+
20
+ def predict_proba(self, features: dict) -> float:
21
+ if not self.is_trained:
22
+ return 0.5 # neutro
23
+
24
+ X = np.array([list(features.values())])
25
+ return float(self.model.predict_proba(X)[0][1])
@@ -0,0 +1,15 @@
1
+ def extract_features(report: dict) -> dict:
2
+ data = report["details"]["data"]
3
+ perf = report["details"]["performance"]
4
+ robust = report["details"]["robustness"]
5
+ config = report["details"]["config"]
6
+
7
+ return {
8
+ "n_samples": report["meta"]["n_samples"],
9
+ "n_features": report["meta"]["n_features"],
10
+ "data_leakage": int(data["data_leakage"]["suspected"]),
11
+ "perfect_cv": int(perf["suspiciously_perfect"]),
12
+ "robustness_fragile": int(robust["verdict"] == "fragile"),
13
+ "robustness_misleading": int(robust["verdict"] == "misleading"),
14
+ "structural_risk_high": int(config["risk_level"] == "high"),
15
+ }
@@ -0,0 +1,20 @@
1
+ def policy_decision(rule_decision: dict, ml_score: float):
2
+ if rule_decision["risk_level"] == "high":
3
+ return {
4
+ "deploy": False,
5
+ "reason": "Blocked by rules",
6
+ "ml_score": ml_score
7
+ }
8
+
9
+ if ml_score < 0.4:
10
+ return {
11
+ "deploy": False,
12
+ "reason": "ML predicts failure",
13
+ "ml_score": ml_score
14
+ }
15
+
16
+ return {
17
+ "deploy": True,
18
+ "reason": "Approved by ML + rules",
19
+ "ml_score": ml_score
20
+ }