ai-critic 1.0.0__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ai_critic/critic.py CHANGED
@@ -2,7 +2,8 @@ from ai_critic.evaluators import (
2
2
  robustness,
3
3
  config,
4
4
  data,
5
- performance
5
+ performance,
6
+ adapters # <- novo import
6
7
  )
7
8
  from ai_critic.evaluators.summary import HumanSummary
8
9
  from ai_critic.sessions import CriticSessionStore
@@ -11,7 +12,7 @@ from ai_critic.evaluators.scoring import compute_scores
11
12
 
12
13
  class AICritic:
13
14
  """
14
- Automated reviewer for scikit-learn models.
15
+ Automated reviewer for scikit-learn, PyTorch, or TensorFlow models.
15
16
 
16
17
  Produces a multi-layered risk assessment including:
17
18
  - Data integrity analysis
@@ -21,11 +22,12 @@ class AICritic:
21
22
  - Human-readable executive and technical summaries
22
23
  """
23
24
 
24
- def __init__(self, model, X, y, random_state=None, session=None):
25
+ def __init__(self, model, X, y, random_state=None, session=None, framework="sklearn", adapter_kwargs=None):
25
26
  """
26
27
  Parameters
27
28
  ----------
28
- model : sklearn-compatible estimator
29
+ model : object
30
+ scikit-learn estimator, torch.nn.Module, or tf.keras.Model
29
31
  X : np.ndarray
30
32
  Feature matrix
31
33
  y : np.ndarray
@@ -34,8 +36,18 @@ class AICritic:
34
36
  Global seed for reproducibility (optional)
35
37
  session : str or None
36
38
  Optional session name for longitudinal comparison
39
+ framework : str
40
+ "sklearn" (default), "torch", or "tensorflow"
41
+ adapter_kwargs : dict
42
+ Extra kwargs para o adaptador (ex: epochs, lr, batch_size)
37
43
  """
38
- self.model = model
44
+ adapter_kwargs = adapter_kwargs or {}
45
+ self.framework = framework.lower()
46
+ if self.framework != "sklearn":
47
+ self.model = adapters.ModelAdapter(model, framework=self.framework, **adapter_kwargs)
48
+ else:
49
+ self.model = model
50
+
39
51
  self.X = X
40
52
  self.y = y
41
53
  self.random_state = random_state
@@ -0,0 +1,84 @@
1
+ # evaluators/adapters.py
2
+ import numpy as np
3
+
4
+ try:
5
+ import torch
6
+ import torch.nn as nn
7
+ except ImportError:
8
+ torch = None
9
+
10
+ try:
11
+ import tensorflow as tf
12
+ except ImportError:
13
+ tf = None
14
+
15
+ class ModelAdapter:
16
+ """
17
+ Wraps scikit-learn, PyTorch, or TensorFlow models to provide a
18
+ unified fit/predict interface for AICritic.
19
+ """
20
+
21
+ def __init__(self, model, framework="sklearn", **kwargs):
22
+ """
23
+ Parameters
24
+ ----------
25
+ model : object
26
+ The original model (sklearn estimator, torch.nn.Module, or tf.keras.Model)
27
+ framework : str
28
+ One of "sklearn", "torch", "tensorflow"
29
+ kwargs : dict
30
+ Extra hyperparameters for training (epochs, batch_size, optimizer, etc)
31
+ """
32
+ self.model = model
33
+ self.framework = framework.lower()
34
+ self.kwargs = kwargs
35
+
36
+ if self.framework not in ("sklearn", "torch", "tensorflow"):
37
+ raise ValueError(f"Unsupported framework: {framework}")
38
+
39
+ # PyTorch default settings
40
+ if self.framework == "torch":
41
+ self.epochs = kwargs.get("epochs", 5)
42
+ self.lr = kwargs.get("lr", 1e-3)
43
+ self.loss_fn = kwargs.get("loss_fn", nn.MSELoss())
44
+ self.optimizer_class = kwargs.get("optimizer", torch.optim.Adam)
45
+ self.device = kwargs.get("device", "cpu")
46
+ self.model.to(self.device)
47
+
48
+ # TensorFlow default settings
49
+ if self.framework == "tensorflow":
50
+ self.epochs = kwargs.get("epochs", 5)
51
+ self.batch_size = kwargs.get("batch_size", 32)
52
+ self.loss_fn = kwargs.get("loss_fn", "mse")
53
+ self.optimizer = kwargs.get("optimizer", "adam")
54
+ self.model.compile(optimizer=self.optimizer, loss=self.loss_fn)
55
+
56
+ def fit(self, X, y):
57
+ if self.framework == "sklearn":
58
+ self.model.fit(X, y)
59
+ elif self.framework == "torch":
60
+ X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
61
+ y_tensor = torch.tensor(y, dtype=torch.float32).to(self.device).view(-1, 1)
62
+ optimizer = self.optimizer_class(self.model.parameters(), lr=self.lr)
63
+
64
+ self.model.train()
65
+ for epoch in range(self.epochs):
66
+ optimizer.zero_grad()
67
+ output = self.model(X_tensor)
68
+ loss = self.loss_fn(output, y_tensor)
69
+ loss.backward()
70
+ optimizer.step()
71
+ elif self.framework == "tensorflow":
72
+ self.model.fit(X, y, epochs=self.epochs, batch_size=self.batch_size, verbose=0)
73
+ return self
74
+
75
+ def predict(self, X):
76
+ if self.framework == "sklearn":
77
+ return self.model.predict(X)
78
+ elif self.framework == "torch":
79
+ self.model.eval()
80
+ with torch.no_grad():
81
+ X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
82
+ return self.model(X_tensor).cpu().numpy().flatten()
83
+ elif self.framework == "tensorflow":
84
+ return self.model.predict(X).flatten()
@@ -0,0 +1,64 @@
1
+ # explainability.py
2
+ import numpy as np
3
+ from sklearn.model_selection import cross_val_score
4
+ from sklearn.base import clone
5
+
6
+ from .validation import make_cv
7
+
8
+
9
+ def evaluate(model, X, y, max_features=10):
10
+ """
11
+ Model-agnostic feature sensitivity analysis.
12
+ Measures how much performance drops when each feature is permuted.
13
+ """
14
+
15
+ cv = make_cv(y)
16
+
17
+ base_model = clone(model)
18
+ base_score = cross_val_score(base_model, X, y, cv=cv).mean()
19
+
20
+ sensitivities = []
21
+
22
+ for i in range(X.shape[1]):
23
+ X_permuted = X.copy()
24
+ np.random.shuffle(X_permuted[:, i])
25
+
26
+ permuted_model = clone(model)
27
+ score = cross_val_score(permuted_model, X_permuted, y, cv=cv).mean()
28
+
29
+ drop = base_score - score
30
+
31
+ sensitivities.append({
32
+ "feature_index": int(i),
33
+ "performance_drop": float(drop)
34
+ })
35
+
36
+ sensitivities.sort(
37
+ key=lambda x: x["performance_drop"],
38
+ reverse=True
39
+ )
40
+
41
+ top = sensitivities[:max_features]
42
+
43
+ verdict = "stable"
44
+ message = "No single feature dominates model behavior."
45
+
46
+ if top and top[0]["performance_drop"] > 0.30:
47
+ verdict = "feature_leakage_risk"
48
+ message = (
49
+ "Model is highly sensitive to a single feature, "
50
+ "which may indicate leakage or shortcut learning."
51
+ )
52
+ elif top and top[0]["performance_drop"] > 0.15:
53
+ verdict = "feature_dependency"
54
+ message = (
55
+ "Model depends strongly on a small subset of features."
56
+ )
57
+
58
+ return {
59
+ "baseline_score": float(base_score),
60
+ "top_sensitive_features": top,
61
+ "max_performance_drop": float(top[0]["performance_drop"]) if top else 0.0,
62
+ "verdict": verdict,
63
+ "message": message
64
+ }
@@ -11,6 +11,10 @@ def compute_scores(report: dict) -> dict:
11
11
  robustness = report["details"]["robustness"]["verdict"]
12
12
  structural = report["details"]["config"]["structural_warnings"]
13
13
 
14
+ explainability = report["details"].get("explainability", {})
15
+ explain_verdict = explainability.get("verdict")
16
+ max_feature_drop = explainability.get("max_performance_drop", 0)
17
+
14
18
  if data_leakage:
15
19
  score -= 30
16
20
 
@@ -25,6 +29,11 @@ def compute_scores(report: dict) -> dict:
25
29
  if structural:
26
30
  score -= 10
27
31
 
32
+ if explain_verdict == "feature_leakage_risk":
33
+ score -= 20
34
+ elif explain_verdict == "feature_dependency":
35
+ score -= 10
36
+
28
37
  return {
29
38
  "global": max(0, min(100, score)),
30
39
  "components": {
@@ -35,5 +44,10 @@ def compute_scores(report: dict) -> dict:
35
44
  "fragile": 65,
36
45
  "misleading": 40
37
46
  }.get(robustness, 100),
47
+ "explainability": (
48
+ 40 if explain_verdict == "feature_leakage_risk"
49
+ else 70 if explain_verdict == "feature_dependency"
50
+ else 100
51
+ )
38
52
  }
39
53
  }
@@ -10,6 +10,10 @@ class HumanSummary:
10
10
  robustness_verdict = report["robustness"].get("verdict")
11
11
  structural_warnings = report["config"]["structural_warnings"]
12
12
 
13
+ explainability = report.get("explainability", {})
14
+ explain_verdict = explainability.get("verdict")
15
+ max_feature_drop = explainability.get("max_performance_drop", 0)
16
+
13
17
  # =========================
14
18
  # Executive summary
15
19
  # =========================
@@ -18,11 +22,19 @@ class HumanSummary:
18
22
  risk_level = "high"
19
23
  deploy = False
20
24
  main_reason = "Strong evidence of data leakage inflating model performance."
25
+ elif explain_verdict == "feature_leakage_risk":
26
+ verdict = "❌ Unreliable"
27
+ risk_level = "high"
28
+ deploy = False
29
+ main_reason = (
30
+ "Model behavior is dominated by a single feature, "
31
+ "suggesting shortcut learning or leakage."
32
+ )
21
33
  elif robustness_verdict in ("fragile", "misleading") or structural_warnings:
22
34
  verdict = "⚠️ Risky"
23
35
  risk_level = "medium"
24
36
  deploy = False
25
- main_reason = "Structural or robustness-related risks detected."
37
+ main_reason = "Structural, robustness, or dependency-related risks detected."
26
38
  else:
27
39
  verdict = "✅ Acceptable"
28
40
  risk_level = "low"
@@ -71,6 +83,21 @@ class HumanSummary:
71
83
  "Reduce model complexity or adjust hyperparameters."
72
84
  )
73
85
 
86
+ if explain_verdict == "feature_leakage_risk":
87
+ key_risks.append(
88
+ f"Single feature causes a {max_feature_drop:.2f} performance drop when permuted."
89
+ )
90
+ recommendations.append(
91
+ "Remove or heavily regularize the dominant feature and retrain."
92
+ )
93
+ elif explain_verdict == "feature_dependency":
94
+ key_risks.append(
95
+ "Model relies disproportionately on a small subset of features."
96
+ )
97
+ recommendations.append(
98
+ "Increase regularization or collect more diverse data."
99
+ )
100
+
74
101
  if robustness_verdict == "misleading":
75
102
  key_risks.append(
76
103
  "Robustness metrics are misleading due to inflated baseline performance."
@@ -92,7 +119,8 @@ class HumanSummary:
92
119
  "data_leakage": leakage,
93
120
  "suspicious_cv": perfect_cv,
94
121
  "structural_risk": bool(structural_warnings),
95
- "robustness_verdict": robustness_verdict
122
+ "robustness_verdict": robustness_verdict,
123
+ "explainability_verdict": explain_verdict
96
124
  },
97
125
  "recommendations": recommendations
98
126
  }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai-critic
3
- Version: 1.0.0
3
+ Version: 1.2.0
4
4
  Summary: Fast AI evaluator for scikit-learn models
5
5
  Author-email: Luiz Seabra <filipedemarco@yahoo.com>
6
6
  Requires-Python: >=3.9
@@ -10,7 +10,7 @@ Requires-Dist: scikit-learn
10
10
 
11
11
  # ai-critic 🧠: The Quality Gate for Machine Learning Models
12
12
 
13
- **ai-critic** is a specialized **decision-making** tool designed to audit the reliability and readiness for deployment of scikit-learn–compatible Machine Learning models.
13
+ **ai-critic** is a specialized **decision-making** tool designed to audit the reliability and readiness for deployment of **scikit-learn**, **PyTorch**, and **TensorFlow** models.
14
14
 
15
15
  Instead of merely measuring performance (accuracy, F1 score), **ai-critic** acts as a **Quality Gate**, actively probing the model to uncover *hidden risks* that commonly cause production failures — such as **data leakage**, **structural overfitting**, and **fragility under noise**.
16
16
 
@@ -19,11 +19,11 @@ Instead of merely measuring performance (accuracy, F1 score), **ai-critic** acts
19
19
 
20
20
  ---
21
21
 
22
- ## 🚀 Getting Started (The Basics)
22
+ ## 🚀 Getting Started (The Basics)
23
23
 
24
24
  This section is ideal for beginners who need a **fast and reliable verdict** on a trained model.
25
25
 
26
- ### Installation
26
+ ### Installation
27
27
 
28
28
  Install directly from PyPI:
29
29
 
@@ -33,7 +33,7 @@ pip install ai-critic
33
33
 
34
34
  ---
35
35
 
36
- ### The Quick Verdict
36
+ ### The Quick Verdict
37
37
 
38
38
  With just a few lines of code, you obtain an **executive-level assessment** and a **deployment recommendation**.
39
39
 
@@ -70,13 +70,13 @@ If **ai-critic** recommends deployment, it means meaningful risks were *not* det
70
70
 
71
71
  ---
72
72
 
73
- ## 💡 Understanding the Critique (The Intermediary)
73
+ ## 💡 Understanding the Critique (The Intermediary)
74
74
 
75
75
  For data scientists who want to understand **why** the model received a given verdict and **how to improve it**.
76
76
 
77
77
  ---
78
78
 
79
- ### The Four Pillars of the Audit
79
+ ### The Four Pillars of the Audit
80
80
 
81
81
  **ai-critic** evaluates models across four independent risk dimensions:
82
82
 
@@ -91,7 +91,8 @@ Each pillar contributes signals used later in the **deployment gate**.
91
91
 
92
92
  ---
93
93
 
94
- ### Full Technical & Visual Analysis
94
+
95
+ ### Full Technical & Visual Analysis
95
96
 
96
97
  To access **all internal diagnostics**, including plots and recommendations, use `view="all"`.
97
98
 
@@ -117,7 +118,7 @@ Generated plots may include:
117
118
 
118
119
  ---
119
120
 
120
- ### Robustness Test (Noise Injection)
121
+ ### Robustness Test (Noise Injection)
121
122
 
122
123
  A model that collapses under small perturbations is **not production-safe**.
123
124
 
@@ -139,13 +140,52 @@ print(f"Verdict: {robustness['verdict']}")
139
140
 
140
141
  ---
141
142
 
142
- ## ⚙️ Integration and Governance (The Advanced)
143
+ ## ⚙️ Integration and Governance (The Advanced)
143
144
 
144
145
  This section targets **MLOps engineers**, **architects**, and teams operating automated pipelines.
145
146
 
146
147
  ---
147
148
 
148
- ### The Deployment Gate (`deploy_decision`)
149
+ ### Multi-Framework Support
150
+
151
+ **ai-critic 1.0+** supports models from multiple frameworks with the **same API**:
152
+
153
+ ```python
154
+ # PyTorch Example
155
+ import torch
156
+ import torch.nn as nn
157
+ from ai_critic import AICritic
158
+
159
+ X = torch.randn(1000, 20)
160
+ y = torch.randint(0, 2, (1000,))
161
+
162
+ model = nn.Sequential(
163
+ nn.Linear(20, 32),
164
+ nn.ReLU(),
165
+ nn.Linear(32, 2)
166
+ )
167
+
168
+ critic = AICritic(model, X, y, framework="torch", adapter_kwargs={"epochs":5, "batch_size":64})
169
+ report = critic.evaluate(view="executive")
170
+ print(report)
171
+
172
+ # TensorFlow Example
173
+ import tensorflow as tf
174
+
175
+ model = tf.keras.Sequential([
176
+ tf.keras.layers.Dense(32, activation="relu", input_shape=(20,)),
177
+ tf.keras.layers.Dense(2)
178
+ ])
179
+ critic = AICritic(model, X.numpy(), y.numpy(), framework="tensorflow", adapter_kwargs={"epochs":5})
180
+ report = critic.evaluate(view="executive")
181
+ print(report)
182
+ ```
183
+
184
+ > No need to rewrite evaluation code — **one Critic API works for sklearn, PyTorch, or TensorFlow**.
185
+
186
+ ---
187
+
188
+ ### The Deployment Gate (`deploy_decision`)
149
189
 
150
190
  The `deploy_decision()` method aggregates *all detected risks* and produces a final gate decision.
151
191
 
@@ -173,7 +213,7 @@ for issue in decision["blocking_issues"]:
173
213
 
174
214
  ---
175
215
 
176
- ### Modes & Views (API Design)
216
+ ### Modes & Views (API Design)
177
217
 
178
218
  The `evaluate()` method supports **multiple modes** via the `view` parameter:
179
219
 
@@ -193,7 +233,7 @@ critic.evaluate(view=["executive", "performance"])
193
233
 
194
234
  ---
195
235
 
196
- ### Session Tracking & Model Comparison (New in 1.0.0)
236
+ ### Session Tracking & Model Comparison
197
237
 
198
238
  You can persist evaluations and compare model versions over time.
199
239
 
@@ -216,7 +256,7 @@ This enables:
216
256
 
217
257
  ---
218
258
 
219
- ### Best Practices & Use Cases
259
+ ### Best Practices & Use Cases
220
260
 
221
261
  | Scenario | Recommended Usage |
222
262
  | ----------------------- | -------------------------------------- |
@@ -226,11 +266,14 @@ This enables:
226
266
  | **Stakeholder Reports** | Share executive summaries |
227
267
 
228
268
  ---
269
+
229
270
  ## 🔒 API Stability
230
271
 
231
272
  Starting from version **1.0.0**, the public API of **ai-critic** follows semantic versioning.
232
273
  Breaking changes will only occur in major releases.
233
274
 
275
+ ---
276
+
234
277
  ## 📄 License
235
278
 
236
279
  Distributed under the **MIT License**.
@@ -245,13 +288,3 @@ Distributed under the **MIT License**.
245
288
  A failed audit does **not** mean the model is bad — it means the model **is not ready to be trusted**.
246
289
 
247
290
  The purpose of **ai-critic** is to introduce *structured skepticism* into machine learning workflows — exactly where it belongs.
248
-
249
- ---
250
-
251
- Se quiser, próximo passo posso:
252
-
253
- * gerar o **CHANGELOG.md oficial do 1.0.0**
254
- * revisar esse README como um **reviewer externo**
255
- * escrever o **post de lançamento** (GitHub / PyPI / Reddit)
256
-
257
- Esse README já está em **nível profissional real**.
@@ -1,16 +1,18 @@
1
1
  ai_critic/__init__.py,sha256=H6DlPMmbcFUamhsNULPLk9vHx81XCiXuKKf63EJ8eM0,53
2
- ai_critic/critic.py,sha256=ovvOX357OzIC28H0iJrtZfUyku4CA9FnGQiA8M9DDbk,7701
2
+ ai_critic/critic.py,sha256=I9MeVHVCN-lWffPm3DJCgbFVVW8VTIs_qhXd-aP3X5Q,8277
3
3
  ai_critic/evaluators/__init__.py,sha256=ri6InmL8_LIcO-JZpU_gEFKLO4URdqo3z6rh7fV6M8Y,169
4
+ ai_critic/evaluators/adapters.py,sha256=8Xw9Ccg1iGVNwVQDGVIqhWj5-Sg6evqCZhg21u8EP20,3068
4
5
  ai_critic/evaluators/config.py,sha256=gBXaS8Qxl14f40JnvMWgA0Z0SGEtbCuCHpTOPem0H90,1163
5
6
  ai_critic/evaluators/data.py,sha256=YAK5NkwCeJOny_UueZ5ALwvEcRDIbEck404eV2oqWnc,1871
7
+ ai_critic/evaluators/explainability.py,sha256=UWbcb5uVI78d1ljfdrWd2DrjlwEz1y9CeVtkukefEfA,1759
6
8
  ai_critic/evaluators/performance.py,sha256=1CQx5DueK0XkelYyJnAGRJ3AjQtjsKeW8_1JQZqKVOI,1973
7
9
  ai_critic/evaluators/robustness.py,sha256=mfVQ67Z6t6aRvtIq-XQEQYbwvyf8UefM1myeOGVrnAE,1869
8
- ai_critic/evaluators/scoring.py,sha256=GBkmDa5Q6RZY4hJfzrCbxbBopsOsRjsNtzyoQHqgWHA,1046
9
- ai_critic/evaluators/summary.py,sha256=O9ZCrph93VV6pFcMIx2a7DizPIccRUqbGcUZ6oDmOLs,3791
10
+ ai_critic/evaluators/scoring.py,sha256=9rgkCXKKm9G1Lfwn5i9HcsJTN5OUjxMycOUzhWkp_2g,1576
11
+ ai_critic/evaluators/summary.py,sha256=H9rU9tXAXqyQ34L6bOOOHrdIapSq71gcjjc8jfyJMq4,5003
10
12
  ai_critic/evaluators/validation.py,sha256=rnzRwD78Cugey33gl9geE8JoBURsKEEnqrIOhBZv0LY,904
11
13
  ai_critic/sessions/__init__.py,sha256=Yp7mphSPJwt8a4cJgcQNErqwqHVuP_xAJODrs0y0Abw,72
12
14
  ai_critic/sessions/store.py,sha256=65m9WXFVFWv4pPzvXV4l8zLHoHWMfCGe6eHh4X-8agY,947
13
- ai_critic-1.0.0.dist-info/METADATA,sha256=_3VxXuMYnt2LoCrUw8AhTb2UMm934lcxgWV2Bw0l3eg,7426
14
- ai_critic-1.0.0.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
15
- ai_critic-1.0.0.dist-info/top_level.txt,sha256=TRyZkm1vyLLcFDg_80yeg5cHvPis_oW1Ti170417jkw,10
16
- ai_critic-1.0.0.dist-info/RECORD,,
15
+ ai_critic-1.2.0.dist-info/METADATA,sha256=s0XYw_E7ZoVBhF74lyhQsFk_bcyJWY3eo8Yk5E97tZ4,8115
16
+ ai_critic-1.2.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
17
+ ai_critic-1.2.0.dist-info/top_level.txt,sha256=TRyZkm1vyLLcFDg_80yeg5cHvPis_oW1Ti170417jkw,10
18
+ ai_critic-1.2.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.10.1)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5