ai-critic 0.2.4__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ai_critic/critic.py CHANGED
@@ -5,6 +5,8 @@ from ai_critic.evaluators import (
5
5
  performance
6
6
  )
7
7
  from ai_critic.evaluators.summary import HumanSummary
8
+ from ai_critic.sessions import CriticSessionStore
9
+ from ai_critic.evaluators.scoring import compute_scores
8
10
 
9
11
 
10
12
  class AICritic:
@@ -19,7 +21,7 @@ class AICritic:
19
21
  - Human-readable executive and technical summaries
20
22
  """
21
23
 
22
- def __init__(self, model, X, y, random_state=None):
24
+ def __init__(self, model, X, y, random_state=None, session=None):
23
25
  """
24
26
  Parameters
25
27
  ----------
@@ -30,11 +32,15 @@ class AICritic:
30
32
  Target vector
31
33
  random_state : int or None
32
34
  Global seed for reproducibility (optional)
35
+ session : str or None
36
+ Optional session name for longitudinal comparison
33
37
  """
34
38
  self.model = model
35
39
  self.X = X
36
40
  self.y = y
37
41
  self.random_state = random_state
42
+ self.session = session
43
+ self._store = CriticSessionStore() if session else None
38
44
 
39
45
  def evaluate(self, view="all", plot=False):
40
46
  """
@@ -47,15 +53,10 @@ class AICritic:
47
53
  - "executive" : executive summary only
48
54
  - "technical" : technical summary only
49
55
  - "details" : low-level evaluator outputs
50
- - list : subset of views (e.g. ["executive", "details"])
56
+ - list : subset of views
51
57
  plot : bool
52
- - True : generate plots (learning curve, heatmap, robustness)
58
+ - True : generate plots
53
59
  - False : no plots
54
-
55
- Returns
56
- -------
57
- dict
58
- Evaluation payload according to selected view
59
60
  """
60
61
 
61
62
  # =========================
@@ -66,25 +67,23 @@ class AICritic:
66
67
  # -------------------------
67
68
  # Data analysis
68
69
  # -------------------------
69
- data_report = data.evaluate(
70
+ details["data"] = data.evaluate(
70
71
  self.X,
71
72
  self.y,
72
73
  plot=plot
73
74
  )
74
- details["data"] = data_report
75
75
 
76
76
  # -------------------------
77
77
  # Model configuration sanity
78
78
  # -------------------------
79
79
  details["config"] = config.evaluate(
80
80
  self.model,
81
- n_samples=data_report["n_samples"],
82
- n_features=data_report["n_features"]
81
+ n_samples=details["data"]["n_samples"],
82
+ n_features=details["data"]["n_features"]
83
83
  )
84
84
 
85
85
  # -------------------------
86
86
  # Performance evaluation
87
- # (CV strategy inferred automatically)
88
87
  # -------------------------
89
88
  details["performance"] = performance.evaluate(
90
89
  self.model,
@@ -94,32 +93,36 @@ class AICritic:
94
93
  )
95
94
 
96
95
  # -------------------------
97
- # Robustness & leakage analysis
96
+ # Robustness evaluation
98
97
  # -------------------------
99
98
  details["robustness"] = robustness.evaluate(
100
99
  self.model,
101
100
  self.X,
102
101
  self.y,
103
- leakage_suspected=data_report["data_leakage"]["suspected"],
102
+ leakage_suspected=details["data"]["data_leakage"]["suspected"],
104
103
  plot=plot
105
104
  )
106
105
 
107
106
  # =========================
108
- # Human-centered summaries
107
+ # Human summaries
109
108
  # =========================
110
109
  human_summary = HumanSummary().generate(details)
111
110
 
112
- # =========================
113
- # Full payload (PUBLIC API)
114
- # =========================
115
111
  payload = {
116
112
  "executive": human_summary["executive_summary"],
117
113
  "technical": human_summary["technical_summary"],
118
114
  "details": details,
119
- # Convenience shortcut (prevents KeyError in user code)
120
- "performance": details["performance"]
115
+ "performance": details["performance"],
121
116
  }
122
117
 
118
+ # =========================
119
+ # Session persistence (optional)
120
+ # =========================
121
+ if self.session:
122
+ scores = compute_scores(payload)
123
+ payload["scores"] = scores
124
+ self._store.save(self.session, payload)
125
+
123
126
  # =========================
124
127
  # View selector
125
128
  # =========================
@@ -130,3 +133,117 @@ class AICritic:
130
133
  return {k: payload[k] for k in view if k in payload}
131
134
 
132
135
  return payload.get(view)
136
+
137
+ def compare_with(self, previous_session: str) -> dict:
138
+ """
139
+ Compare current session with a previous one.
140
+ """
141
+
142
+ if not self.session:
143
+ raise ValueError("Current session name not set.")
144
+
145
+ current = self._store.load(self.session)
146
+ previous = self._store.load(previous_session)
147
+
148
+ if not previous:
149
+ raise FileNotFoundError(
150
+ f"Session '{previous_session}' not found."
151
+ )
152
+
153
+ diff = {
154
+ "global_score": {
155
+ "current": current["scores"]["global"],
156
+ "previous": previous["scores"]["global"],
157
+ "delta": current["scores"]["global"] - previous["scores"]["global"],
158
+ },
159
+ "components": {}
160
+ }
161
+
162
+ for key, value in current["scores"]["components"].items():
163
+ prev_value = previous["scores"]["components"].get(key)
164
+ if prev_value is not None:
165
+ diff["components"][key] = {
166
+ "current": value,
167
+ "previous": prev_value,
168
+ "delta": value - prev_value
169
+ }
170
+
171
+ return {
172
+ "current_session": self.session,
173
+ "previous_session": previous_session,
174
+ "score_diff": diff,
175
+ "note": (
176
+ "Score deltas indicate changes in risk profile, "
177
+ "not absolute model quality."
178
+ )
179
+ }
180
+
181
+ def deploy_decision(self):
182
+ """
183
+ Final deployment gate.
184
+ """
185
+
186
+ report = self.evaluate(view="all", plot=False)
187
+
188
+ data_risk = report["details"]["data"]["data_leakage"]["suspected"]
189
+ perfect_cv = report["details"]["performance"]["suspiciously_perfect"]
190
+ robustness_verdict = report["details"]["robustness"]["verdict"]
191
+ structural_warnings = report["details"]["config"]["structural_warnings"]
192
+
193
+ blocking_issues = []
194
+ risk_level = "low"
195
+
196
+ # Hard blockers
197
+ if data_risk and perfect_cv:
198
+ blocking_issues.append(
199
+ "Data leakage combined with suspiciously perfect CV score"
200
+ )
201
+ risk_level = "high"
202
+
203
+ if robustness_verdict == "misleading":
204
+ blocking_issues.append(
205
+ "Robustness results are misleading due to inflated baseline performance"
206
+ )
207
+ risk_level = "high"
208
+
209
+ if data_risk:
210
+ blocking_issues.append(
211
+ "Suspected target leakage in feature set"
212
+ )
213
+ risk_level = "high"
214
+
215
+ # Soft blockers
216
+ if risk_level != "high":
217
+ if robustness_verdict == "fragile":
218
+ blocking_issues.append(
219
+ "Model performance degrades significantly under noise"
220
+ )
221
+ risk_level = "medium"
222
+
223
+ if perfect_cv:
224
+ blocking_issues.append(
225
+ "Suspiciously perfect cross-validation score"
226
+ )
227
+ risk_level = "medium"
228
+
229
+ if structural_warnings:
230
+ blocking_issues.append(
231
+ "Structural complexity risks detected in model configuration"
232
+ )
233
+ risk_level = "medium"
234
+
235
+ deploy = len(blocking_issues) == 0
236
+
237
+ confidence = 1.0
238
+ confidence -= 0.35 if data_risk else 0
239
+ confidence -= 0.25 if perfect_cv else 0
240
+ confidence -= 0.25 if robustness_verdict in ("fragile", "misleading") else 0
241
+ confidence -= 0.15 if structural_warnings else 0
242
+ confidence = max(0.0, round(confidence, 2))
243
+
244
+ return {
245
+ "deploy": deploy,
246
+ "risk_level": risk_level,
247
+ "blocking_issues": blocking_issues,
248
+ "confidence": confidence
249
+ }
@@ -0,0 +1,39 @@
1
+ def compute_scores(report: dict) -> dict:
2
+ """
3
+ Converts critic signals into a coarse 0–100 score.
4
+ Score is NOT an objective metric.
5
+ """
6
+
7
+ score = 100
8
+
9
+ data_leakage = report["details"]["data"]["data_leakage"]["suspected"]
10
+ perfect_cv = report["details"]["performance"]["suspiciously_perfect"]
11
+ robustness = report["details"]["robustness"]["verdict"]
12
+ structural = report["details"]["config"]["structural_warnings"]
13
+
14
+ if data_leakage:
15
+ score -= 30
16
+
17
+ if perfect_cv:
18
+ score -= 20
19
+
20
+ if robustness == "fragile":
21
+ score -= 15
22
+ elif robustness == "misleading":
23
+ score -= 25
24
+
25
+ if structural:
26
+ score -= 10
27
+
28
+ return {
29
+ "global": max(0, min(100, score)),
30
+ "components": {
31
+ "data_integrity": 0 if data_leakage else 100,
32
+ "validation": 70 if perfect_cv else 100,
33
+ "robustness": {
34
+ "stable": 100,
35
+ "fragile": 65,
36
+ "misleading": 40
37
+ }.get(robustness, 100),
38
+ }
39
+ }
@@ -0,0 +1,3 @@
1
+ from .store import CriticSessionStore
2
+
3
+ __all__ = ["CriticSessionStore"]
@@ -0,0 +1,33 @@
1
+ import json
2
+ from pathlib import Path
3
+ from datetime import datetime
4
+
5
+
6
+ class CriticSessionStore:
7
+ """
8
+ Simple local persistence layer for ai-critic sessions.
9
+ """
10
+
11
+ def __init__(self, base_dir: str | None = None):
12
+ self.base_dir = Path(
13
+ base_dir or Path.home() / ".ai_critic_sessions"
14
+ )
15
+ self.base_dir.mkdir(parents=True, exist_ok=True)
16
+
17
+ def _session_path(self, name: str) -> Path:
18
+ return self.base_dir / f"{name}.json"
19
+
20
+ def save(self, name: str, payload: dict):
21
+ data = {
22
+ "timestamp": datetime.utcnow().isoformat(),
23
+ "payload": payload
24
+ }
25
+ with open(self._session_path(name), "w") as f:
26
+ json.dump(data, f, indent=2)
27
+
28
+ def load(self, name: str) -> dict | None:
29
+ path = self._session_path(name)
30
+ if not path.exists():
31
+ return None
32
+ with open(path) as f:
33
+ return json.load(f)["payload"]
@@ -0,0 +1,257 @@
1
+ Metadata-Version: 2.4
2
+ Name: ai-critic
3
+ Version: 1.0.0
4
+ Summary: Fast AI evaluator for scikit-learn models
5
+ Author-email: Luiz Seabra <filipedemarco@yahoo.com>
6
+ Requires-Python: >=3.9
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: numpy
9
+ Requires-Dist: scikit-learn
10
+
11
+ # ai-critic 🧠: The Quality Gate for Machine Learning Models
12
+
13
+ **ai-critic** is a specialized **decision-making** tool designed to audit the reliability and readiness for deployment of scikit-learn–compatible Machine Learning models.
14
+
15
+ Instead of merely measuring performance (accuracy, F1 score), **ai-critic** acts as a **Quality Gate**, actively probing the model to uncover *hidden risks* that commonly cause production failures — such as **data leakage**, **structural overfitting**, and **fragility under noise**.
16
+
17
+ > **ai-critic does not ask “How good is this model?”**
18
+ > It asks **“Can this model be trusted?”**
19
+
20
+ ---
21
+
22
+ ## 🚀 Getting Started (The Basics)
23
+
24
+ This section is ideal for beginners who need a **fast and reliable verdict** on a trained model.
25
+
26
+ ### Installation
27
+
28
+ Install directly from PyPI:
29
+
30
+ ```bash
31
+ pip install ai-critic
32
+ ```
33
+
34
+ ---
35
+
36
+ ### The Quick Verdict
37
+
38
+ With just a few lines of code, you obtain an **executive-level assessment** and a **deployment recommendation**.
39
+
40
+ ```python
41
+ from ai_critic import AICritic
42
+ from sklearn.ensemble import RandomForestClassifier
43
+ from sklearn.datasets import make_classification
44
+
45
+ # 1. Prepare data and model
46
+ X, y = make_classification(n_samples=1000, n_features=20, random_state=42)
47
+ model = RandomForestClassifier(max_depth=5, random_state=42)
48
+
49
+ # 2. Initialize the Critic
50
+ critic = AICritic(model, X, y)
51
+
52
+ # 3. Run the audit (executive mode)
53
+ report = critic.evaluate(view="executive")
54
+
55
+ print(f"Verdict: {report['verdict']}")
56
+ print(f"Risk Level: {report['risk_level']}")
57
+ print(f"Main Reason: {report['main_reason']}")
58
+ ```
59
+
60
+ **Expected Output (example):**
61
+
62
+ ```text
63
+ Verdict: ⚠️ Risky
64
+ Risk Level: medium
65
+ Main Reason: Structural or robustness-related risks detected.
66
+ ```
67
+
68
+ This output is intentionally **conservative**.
69
+ If **ai-critic** recommends deployment, it means meaningful risks were *not* detected.
70
+
71
+ ---
72
+
73
+ ## 💡 Understanding the Critique (The Intermediary)
74
+
75
+ For data scientists who want to understand **why** the model received a given verdict and **how to improve it**.
76
+
77
+ ---
78
+
79
+ ### The Four Pillars of the Audit
80
+
81
+ **ai-critic** evaluates models across four independent risk dimensions:
82
+
83
+ | Pillar | Main Risk Detected | Internal Module |
84
+ | ---------------------- | -------------------------------------- | ------------------------ |
85
+ | 📊 **Data Integrity** | Target Leakage & Correlation Artifacts | `evaluators.data` |
86
+ | 🧠 **Model Structure** | Over-complexity & Misconfiguration | `evaluators.config` |
87
+ | 📈 **Performance** | Suspicious CV or Learning Curves | `evaluators.performance` |
88
+ | 🧪 **Robustness** | Sensitivity to Noise | `evaluators.robustness` |
89
+
90
+ Each pillar contributes signals used later in the **deployment gate**.
91
+
92
+ ---
93
+
94
+ ### Full Technical & Visual Analysis
95
+
96
+ To access **all internal diagnostics**, including plots and recommendations, use `view="all"`.
97
+
98
+ ```python
99
+ full_report = critic.evaluate(view="all", plot=True)
100
+
101
+ technical_summary = full_report["technical"]
102
+
103
+ print("\n--- Key Risks Detected ---")
104
+ for i, risk in enumerate(technical_summary["key_risks"], start=1):
105
+ print(f"{i}. {risk}")
106
+
107
+ print("\n--- Recommendations ---")
108
+ for rec in technical_summary["recommendations"]:
109
+ print(f"- {rec}")
110
+ ```
111
+
112
+ Generated plots may include:
113
+
114
+ * Feature correlation heatmaps
115
+ * Learning curves
116
+ * Robustness degradation charts
117
+
118
+ ---
119
+
120
+ ### Robustness Test (Noise Injection)
121
+
122
+ A model that collapses under small perturbations is **not production-safe**.
123
+
124
+ ```python
125
+ robustness = full_report["details"]["robustness"]
126
+
127
+ print("\n--- Robustness Analysis ---")
128
+ print(f"Original CV Score: {robustness['cv_score_original']:.4f}")
129
+ print(f"Noisy CV Score: {robustness['cv_score_noisy']:.4f}")
130
+ print(f"Performance Drop: {robustness['performance_drop']:.4f}")
131
+ print(f"Verdict: {robustness['verdict']}")
132
+ ```
133
+
134
+ **Possible Verdicts:**
135
+
136
+ * `stable` → acceptable degradation
137
+ * `fragile` → high sensitivity to noise
138
+ * `misleading` → performance likely inflated by leakage
139
+
140
+ ---
141
+
142
+ ## ⚙️ Integration and Governance (The Advanced)
143
+
144
+ This section targets **MLOps engineers**, **architects**, and teams operating automated pipelines.
145
+
146
+ ---
147
+
148
+ ### The Deployment Gate (`deploy_decision`)
149
+
150
+ The `deploy_decision()` method aggregates *all detected risks* and produces a final gate decision.
151
+
152
+ ```python
153
+ decision = critic.deploy_decision()
154
+
155
+ if decision["deploy"]:
156
+ print("✅ Deployment Approved")
157
+ else:
158
+ print("❌ Deployment Blocked")
159
+
160
+ print(f"Risk Level: {decision['risk_level']}")
161
+ print(f"Confidence Score: {decision['confidence']:.2f}")
162
+
163
+ print("\nBlocking Issues:")
164
+ for issue in decision["blocking_issues"]:
165
+ print(f"- {issue}")
166
+ ```
167
+
168
+ **Conceptual model:**
169
+
170
+ * **Hard Blockers** → deployment denied
171
+ * **Soft Blockers** → deployment discouraged
172
+ * **Confidence Score (0–1)** → heuristic trust level
173
+
174
+ ---
175
+
176
+ ### Modes & Views (API Design)
177
+
178
+ The `evaluate()` method supports **multiple modes** via the `view` parameter:
179
+
180
+ | View | Description |
181
+ | ------------- | ---------------------------------- |
182
+ | `"executive"` | High-level verdict (non-technical) |
183
+ | `"technical"` | Risks & recommendations |
184
+ | `"details"` | Raw evaluator outputs |
185
+ | `"all"` | Complete payload |
186
+
187
+ Example:
188
+
189
+ ```python
190
+ critic.evaluate(view="technical")
191
+ critic.evaluate(view=["executive", "performance"])
192
+ ```
193
+
194
+ ---
195
+
196
+ ### Session Tracking & Model Comparison (New in 1.0.0)
197
+
198
+ You can persist evaluations and compare model versions over time.
199
+
200
+ ```python
201
+ critic_v1 = AICritic(model, X, y, session="v1")
202
+ critic_v1.evaluate()
203
+
204
+ critic_v2 = AICritic(model, X, y, session="v2")
205
+ critic_v2.evaluate()
206
+
207
+ comparison = critic_v2.compare_with("v1")
208
+ print(comparison["score_diff"])
209
+ ```
210
+
211
+ This enables:
212
+
213
+ * Regression tracking
214
+ * Risk drift detection
215
+ * Governance & audit trails
216
+
217
+ ---
218
+
219
+ ### Best Practices & Use Cases
220
+
221
+ | Scenario | Recommended Usage |
222
+ | ----------------------- | -------------------------------------- |
223
+ | **CI/CD** | Block merges using `deploy_decision()` |
224
+ | **Model Tuning** | Use technical view for guidance |
225
+ | **Governance** | Persist session outputs |
226
+ | **Stakeholder Reports** | Share executive summaries |
227
+
228
+ ---
229
+ ## 🔒 API Stability
230
+
231
+ Starting from version **1.0.0**, the public API of **ai-critic** follows semantic versioning.
232
+ Breaking changes will only occur in major releases.
233
+
234
+ ## 📄 License
235
+
236
+ Distributed under the **MIT License**.
237
+
238
+ ---
239
+
240
+ ## 🧠 Final Note
241
+
242
+ > **ai-critic is not a benchmarking tool.**
243
+ > It is a *decision-making system*.
244
+
245
+ A failed audit does **not** mean the model is bad — it means the model **is not ready to be trusted**.
246
+
247
+ The purpose of **ai-critic** is to introduce *structured skepticism* into machine learning workflows — exactly where it belongs.
248
+
249
+ ---
250
+
251
+ Se quiser, próximo passo posso:
252
+
253
+ * gerar o **CHANGELOG.md oficial do 1.0.0**
254
+ * revisar esse README como um **reviewer externo**
255
+ * escrever o **post de lançamento** (GitHub / PyPI / Reddit)
256
+
257
+ Esse README já está em **nível profissional real**.
@@ -1,13 +1,16 @@
1
1
  ai_critic/__init__.py,sha256=H6DlPMmbcFUamhsNULPLk9vHx81XCiXuKKf63EJ8eM0,53
2
- ai_critic/critic.py,sha256=0fsMpvvV4JSp59vsj4ie9xUSJcTpzM1P8MBRtYKHzxc,3785
2
+ ai_critic/critic.py,sha256=ovvOX357OzIC28H0iJrtZfUyku4CA9FnGQiA8M9DDbk,7701
3
3
  ai_critic/evaluators/__init__.py,sha256=ri6InmL8_LIcO-JZpU_gEFKLO4URdqo3z6rh7fV6M8Y,169
4
4
  ai_critic/evaluators/config.py,sha256=gBXaS8Qxl14f40JnvMWgA0Z0SGEtbCuCHpTOPem0H90,1163
5
5
  ai_critic/evaluators/data.py,sha256=YAK5NkwCeJOny_UueZ5ALwvEcRDIbEck404eV2oqWnc,1871
6
6
  ai_critic/evaluators/performance.py,sha256=1CQx5DueK0XkelYyJnAGRJ3AjQtjsKeW8_1JQZqKVOI,1973
7
7
  ai_critic/evaluators/robustness.py,sha256=mfVQ67Z6t6aRvtIq-XQEQYbwvyf8UefM1myeOGVrnAE,1869
8
+ ai_critic/evaluators/scoring.py,sha256=GBkmDa5Q6RZY4hJfzrCbxbBopsOsRjsNtzyoQHqgWHA,1046
8
9
  ai_critic/evaluators/summary.py,sha256=O9ZCrph93VV6pFcMIx2a7DizPIccRUqbGcUZ6oDmOLs,3791
9
10
  ai_critic/evaluators/validation.py,sha256=rnzRwD78Cugey33gl9geE8JoBURsKEEnqrIOhBZv0LY,904
10
- ai_critic-0.2.4.dist-info/METADATA,sha256=ldrqxE_VPP5IqXOrpiwB65WP5OTILQ4rizx_IfWuGBQ,1615
11
- ai_critic-0.2.4.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
12
- ai_critic-0.2.4.dist-info/top_level.txt,sha256=TRyZkm1vyLLcFDg_80yeg5cHvPis_oW1Ti170417jkw,10
13
- ai_critic-0.2.4.dist-info/RECORD,,
11
+ ai_critic/sessions/__init__.py,sha256=Yp7mphSPJwt8a4cJgcQNErqwqHVuP_xAJODrs0y0Abw,72
12
+ ai_critic/sessions/store.py,sha256=65m9WXFVFWv4pPzvXV4l8zLHoHWMfCGe6eHh4X-8agY,947
13
+ ai_critic-1.0.0.dist-info/METADATA,sha256=_3VxXuMYnt2LoCrUw8AhTb2UMm934lcxgWV2Bw0l3eg,7426
14
+ ai_critic-1.0.0.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
15
+ ai_critic-1.0.0.dist-info/top_level.txt,sha256=TRyZkm1vyLLcFDg_80yeg5cHvPis_oW1Ti170417jkw,10
16
+ ai_critic-1.0.0.dist-info/RECORD,,
@@ -1,76 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: ai-critic
3
- Version: 0.2.4
4
- Summary: Fast AI evaluator for scikit-learn models
5
- Author-email: Luiz Seabra <filipedemarco@yahoo.com>
6
- Requires-Python: >=3.9
7
- Description-Content-Type: text/markdown
8
- Requires-Dist: numpy
9
- Requires-Dist: scikit-learn
10
-
11
- Performance under noise
12
-
13
- > Visualizations are optional and do not affect the decision logic.
14
-
15
- ---
16
-
17
- ## ⚙️ Main API
18
-
19
- ### `AICritic(model, X, y)`
20
-
21
- * `model`: scikit-learn compatible estimator
22
- * `X`: feature matrix
23
- * `y`: target vector
24
-
25
- ### `evaluate(view="all", plot=False)`
26
-
27
- * `view`: `"executive"`, `"technical"`, `"details"`, `"all"` or custom list
28
- * `plot`: generates graphs when `True`
29
-
30
- ---
31
-
32
- ## 🧠 What ai-critic Detects
33
-
34
- | Category | Risks |
35
-
36
- | ------------ | ---------------------------------------- |
37
-
38
- | 🔍 Data | Target Leakage, NaNs, Imbalance |
39
-
40
- | 🧱 Structure | Excessive Complexity, Overfitting |
41
-
42
- | 📈 Validation | Perfect or Statistically Suspicious CV |
43
-
44
- | 🧪 Robustness | Stable, Fragile, or Misleading |
45
-
46
- ---
47
-
48
- ## 🛡️ Best Practices
49
-
50
- * **CI/CD:** Use executive output as a *quality gate*
51
- * **Iteration:** Use technical output during tuning
52
- * **Governance:** Log detailed output
53
- * **Skepticism:** Never blindly trust a perfect CV
54
-
55
- ---
56
-
57
- ## 🧭 Use Cases
58
-
59
- * Pre-deployment Audit
60
- * ML Governance
61
- * CI/CD Pipelines
62
- * Risk Communication for Non-Technical Users
63
-
64
- ---
65
-
66
- ## 📄 License
67
-
68
- Distributed under the **MIT License**.
69
-
70
- ---
71
-
72
- ## 🧠 Final Note
73
-
74
- **ai-critic** is not a *benchmarking* tool. It's a **decision-making tool**.
75
-
76
- If a model fails here, it doesn't mean it's bad—it means it **shouldn't be trusted yet**.