ai-critic 0.2.1__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_critic-0.2.3/PKG-INFO +76 -0
- ai_critic-0.2.3/README.md +66 -0
- ai_critic-0.2.3/ai_critic/critic.py +132 -0
- ai_critic-0.2.3/ai_critic/evaluators/__init__.py +11 -0
- ai_critic-0.2.3/ai_critic/evaluators/performance.py +72 -0
- {ai_critic-0.2.1 → ai_critic-0.2.3}/ai_critic/evaluators/robustness.py +7 -2
- {ai_critic-0.2.1 → ai_critic-0.2.3}/ai_critic/evaluators/summary.py +7 -0
- ai_critic-0.2.3/ai_critic/evaluators/validation.py +41 -0
- ai_critic-0.2.3/ai_critic.egg-info/PKG-INFO +76 -0
- {ai_critic-0.2.1 → ai_critic-0.2.3}/ai_critic.egg-info/SOURCES.txt +1 -0
- {ai_critic-0.2.1 → ai_critic-0.2.3}/pyproject.toml +1 -1
- ai_critic-0.2.3/test/test_in_ia.py +24 -0
- ai_critic-0.2.1/PKG-INFO +0 -258
- ai_critic-0.2.1/README.md +0 -248
- ai_critic-0.2.1/ai_critic/critic.py +0 -87
- ai_critic-0.2.1/ai_critic/evaluators/__init__.py +0 -4
- ai_critic-0.2.1/ai_critic/evaluators/performance.py +0 -43
- ai_critic-0.2.1/ai_critic.egg-info/PKG-INFO +0 -258
- ai_critic-0.2.1/test/test_in_ia.py +0 -56
- {ai_critic-0.2.1 → ai_critic-0.2.3}/ai_critic/__init__.py +0 -0
- {ai_critic-0.2.1 → ai_critic-0.2.3}/ai_critic/evaluators/config.py +0 -0
- {ai_critic-0.2.1 → ai_critic-0.2.3}/ai_critic/evaluators/data.py +0 -0
- {ai_critic-0.2.1 → ai_critic-0.2.3}/ai_critic.egg-info/dependency_links.txt +0 -0
- {ai_critic-0.2.1 → ai_critic-0.2.3}/ai_critic.egg-info/requires.txt +0 -0
- {ai_critic-0.2.1 → ai_critic-0.2.3}/ai_critic.egg-info/top_level.txt +0 -0
- {ai_critic-0.2.1 → ai_critic-0.2.3}/setup.cfg +0 -0
- {ai_critic-0.2.1 → ai_critic-0.2.3}/test/test_model.py +0 -0
ai_critic-0.2.3/PKG-INFO
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ai-critic
|
|
3
|
+
Version: 0.2.3
|
|
4
|
+
Summary: Fast AI evaluator for scikit-learn models
|
|
5
|
+
Author-email: Luiz Seabra <filipedemarco@yahoo.com>
|
|
6
|
+
Requires-Python: >=3.9
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Requires-Dist: numpy
|
|
9
|
+
Requires-Dist: scikit-learn
|
|
10
|
+
|
|
11
|
+
Performance under noise
|
|
12
|
+
|
|
13
|
+
> Visualizations are optional and do not affect the decision logic.
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## ⚙️ Main API
|
|
18
|
+
|
|
19
|
+
### `AICritic(model, X, y)`
|
|
20
|
+
|
|
21
|
+
* `model`: scikit-learn compatible estimator
|
|
22
|
+
* `X`: feature matrix
|
|
23
|
+
* `y`: target vector
|
|
24
|
+
|
|
25
|
+
### `evaluate(view="all", plot=False)`
|
|
26
|
+
|
|
27
|
+
* `view`: `"executive"`, `"technical"`, `"details"`, `"all"` or custom list
|
|
28
|
+
* `plot`: generates graphs when `True`
|
|
29
|
+
|
|
30
|
+
---
|
|
31
|
+
|
|
32
|
+
## 🧠 What ai-critic Detects
|
|
33
|
+
|
|
34
|
+
| Category | Risks |
|
|
35
|
+
|
|
36
|
+
| ------------ | ---------------------------------------- |
|
|
37
|
+
|
|
38
|
+
| 🔍 Data | Target Leakage, NaNs, Imbalance |
|
|
39
|
+
|
|
40
|
+
| 🧱 Structure | Excessive Complexity, Overfitting |
|
|
41
|
+
|
|
42
|
+
| 📈 Validation | Perfect or Statistically Suspicious CV |
|
|
43
|
+
|
|
44
|
+
| 🧪 Robustness | Stable, Fragile, or Misleading |
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
## 🛡️ Best Practices
|
|
49
|
+
|
|
50
|
+
* **CI/CD:** Use executive output as a *quality gate*
|
|
51
|
+
* **Iteration:** Use technical output during tuning
|
|
52
|
+
* **Governance:** Log detailed output
|
|
53
|
+
* **Skepticism:** Never blindly trust a perfect CV
|
|
54
|
+
|
|
55
|
+
---
|
|
56
|
+
|
|
57
|
+
## 🧭 Use Cases
|
|
58
|
+
|
|
59
|
+
* Pre-deployment Audit
|
|
60
|
+
* ML Governance
|
|
61
|
+
* CI/CD Pipelines
|
|
62
|
+
* Risk Communication for Non-Technical Users
|
|
63
|
+
|
|
64
|
+
---
|
|
65
|
+
|
|
66
|
+
## 📄 License
|
|
67
|
+
|
|
68
|
+
Distributed under the **MIT License**.
|
|
69
|
+
|
|
70
|
+
---
|
|
71
|
+
|
|
72
|
+
## 🧠 Final Note
|
|
73
|
+
|
|
74
|
+
**ai-critic** is not a *benchmarking* tool. It's a **decision-making tool**.
|
|
75
|
+
|
|
76
|
+
If a model fails here, it doesn't mean it's bad—it means it **shouldn't be trusted yet**.
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
Performance under noise
|
|
2
|
+
|
|
3
|
+
> Visualizations are optional and do not affect the decision logic.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## ⚙️ Main API
|
|
8
|
+
|
|
9
|
+
### `AICritic(model, X, y)`
|
|
10
|
+
|
|
11
|
+
* `model`: scikit-learn compatible estimator
|
|
12
|
+
* `X`: feature matrix
|
|
13
|
+
* `y`: target vector
|
|
14
|
+
|
|
15
|
+
### `evaluate(view="all", plot=False)`
|
|
16
|
+
|
|
17
|
+
* `view`: `"executive"`, `"technical"`, `"details"`, `"all"` or custom list
|
|
18
|
+
* `plot`: generates graphs when `True`
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## 🧠 What ai-critic Detects
|
|
23
|
+
|
|
24
|
+
| Category | Risks |
|
|
25
|
+
|
|
26
|
+
| ------------ | ---------------------------------------- |
|
|
27
|
+
|
|
28
|
+
| 🔍 Data | Target Leakage, NaNs, Imbalance |
|
|
29
|
+
|
|
30
|
+
| 🧱 Structure | Excessive Complexity, Overfitting |
|
|
31
|
+
|
|
32
|
+
| 📈 Validation | Perfect or Statistically Suspicious CV |
|
|
33
|
+
|
|
34
|
+
| 🧪 Robustness | Stable, Fragile, or Misleading |
|
|
35
|
+
|
|
36
|
+
---
|
|
37
|
+
|
|
38
|
+
## 🛡️ Best Practices
|
|
39
|
+
|
|
40
|
+
* **CI/CD:** Use executive output as a *quality gate*
|
|
41
|
+
* **Iteration:** Use technical output during tuning
|
|
42
|
+
* **Governance:** Log detailed output
|
|
43
|
+
* **Skepticism:** Never blindly trust a perfect CV
|
|
44
|
+
|
|
45
|
+
---
|
|
46
|
+
|
|
47
|
+
## 🧭 Use Cases
|
|
48
|
+
|
|
49
|
+
* Pre-deployment Audit
|
|
50
|
+
* ML Governance
|
|
51
|
+
* CI/CD Pipelines
|
|
52
|
+
* Risk Communication for Non-Technical Users
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
## 📄 License
|
|
57
|
+
|
|
58
|
+
Distributed under the **MIT License**.
|
|
59
|
+
|
|
60
|
+
---
|
|
61
|
+
|
|
62
|
+
## 🧠 Final Note
|
|
63
|
+
|
|
64
|
+
**ai-critic** is not a *benchmarking* tool. It's a **decision-making tool**.
|
|
65
|
+
|
|
66
|
+
If a model fails here, it doesn't mean it's bad—it means it **shouldn't be trusted yet**.
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
from ai_critic.evaluators import (
|
|
2
|
+
robustness,
|
|
3
|
+
config,
|
|
4
|
+
data,
|
|
5
|
+
performance
|
|
6
|
+
)
|
|
7
|
+
from ai_critic.evaluators.summary import HumanSummary
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class AICritic:
|
|
11
|
+
"""
|
|
12
|
+
Automated reviewer for scikit-learn models.
|
|
13
|
+
|
|
14
|
+
Produces a multi-layered risk assessment including:
|
|
15
|
+
- Data integrity analysis
|
|
16
|
+
- Model configuration sanity checks
|
|
17
|
+
- Performance evaluation (CV + learning curves)
|
|
18
|
+
- Robustness & leakage heuristics
|
|
19
|
+
- Human-readable executive and technical summaries
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(self, model, X, y, random_state=None):
|
|
23
|
+
"""
|
|
24
|
+
Parameters
|
|
25
|
+
----------
|
|
26
|
+
model : sklearn-compatible estimator
|
|
27
|
+
X : np.ndarray
|
|
28
|
+
Feature matrix
|
|
29
|
+
y : np.ndarray
|
|
30
|
+
Target vector
|
|
31
|
+
random_state : int or None
|
|
32
|
+
Global seed for reproducibility (optional)
|
|
33
|
+
"""
|
|
34
|
+
self.model = model
|
|
35
|
+
self.X = X
|
|
36
|
+
self.y = y
|
|
37
|
+
self.random_state = random_state
|
|
38
|
+
|
|
39
|
+
def evaluate(self, view="all", plot=False):
|
|
40
|
+
"""
|
|
41
|
+
Evaluate the model.
|
|
42
|
+
|
|
43
|
+
Parameters
|
|
44
|
+
----------
|
|
45
|
+
view : str or list
|
|
46
|
+
- "all" : full payload (default)
|
|
47
|
+
- "executive" : executive summary only
|
|
48
|
+
- "technical" : technical summary only
|
|
49
|
+
- "details" : low-level evaluator outputs
|
|
50
|
+
- list : subset of views (e.g. ["executive", "details"])
|
|
51
|
+
plot : bool
|
|
52
|
+
- True : generate plots (learning curve, heatmap, robustness)
|
|
53
|
+
- False : no plots
|
|
54
|
+
|
|
55
|
+
Returns
|
|
56
|
+
-------
|
|
57
|
+
dict
|
|
58
|
+
Evaluation payload according to selected view
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
# =========================
|
|
62
|
+
# Low-level evaluator outputs
|
|
63
|
+
# =========================
|
|
64
|
+
details = {}
|
|
65
|
+
|
|
66
|
+
# -------------------------
|
|
67
|
+
# Data analysis
|
|
68
|
+
# -------------------------
|
|
69
|
+
data_report = data.evaluate(
|
|
70
|
+
self.X,
|
|
71
|
+
self.y,
|
|
72
|
+
plot=plot
|
|
73
|
+
)
|
|
74
|
+
details["data"] = data_report
|
|
75
|
+
|
|
76
|
+
# -------------------------
|
|
77
|
+
# Model configuration sanity
|
|
78
|
+
# -------------------------
|
|
79
|
+
details["config"] = config.evaluate(
|
|
80
|
+
self.model,
|
|
81
|
+
n_samples=data_report["n_samples"],
|
|
82
|
+
n_features=data_report["n_features"]
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
# -------------------------
|
|
86
|
+
# Performance evaluation
|
|
87
|
+
# (CV strategy inferred automatically)
|
|
88
|
+
# -------------------------
|
|
89
|
+
details["performance"] = performance.evaluate(
|
|
90
|
+
self.model,
|
|
91
|
+
self.X,
|
|
92
|
+
self.y,
|
|
93
|
+
plot=plot
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
# -------------------------
|
|
97
|
+
# Robustness & leakage analysis
|
|
98
|
+
# -------------------------
|
|
99
|
+
details["robustness"] = robustness.evaluate(
|
|
100
|
+
self.model,
|
|
101
|
+
self.X,
|
|
102
|
+
self.y,
|
|
103
|
+
leakage_suspected=data_report["data_leakage"]["suspected"],
|
|
104
|
+
plot=plot
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
# =========================
|
|
108
|
+
# Human-centered summaries
|
|
109
|
+
# =========================
|
|
110
|
+
human_summary = HumanSummary().generate(details)
|
|
111
|
+
|
|
112
|
+
# =========================
|
|
113
|
+
# Full payload (PUBLIC API)
|
|
114
|
+
# =========================
|
|
115
|
+
payload = {
|
|
116
|
+
"executive": human_summary["executive_summary"],
|
|
117
|
+
"technical": human_summary["technical_summary"],
|
|
118
|
+
"details": details,
|
|
119
|
+
# Convenience shortcut (prevents KeyError in user code)
|
|
120
|
+
"performance": details["performance"]
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
# =========================
|
|
124
|
+
# View selector
|
|
125
|
+
# =========================
|
|
126
|
+
if view == "all":
|
|
127
|
+
return payload
|
|
128
|
+
|
|
129
|
+
if isinstance(view, list):
|
|
130
|
+
return {k: payload[k] for k in view if k in payload}
|
|
131
|
+
|
|
132
|
+
return payload.get(view)
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
from sklearn.model_selection import cross_val_score, learning_curve
|
|
2
|
+
import matplotlib.pyplot as plt
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
from .validation import make_cv
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def evaluate(model, X, y, plot=False):
|
|
9
|
+
"""
|
|
10
|
+
Avalia a performance do modelo usando validação cruzada
|
|
11
|
+
automaticamente adequada (StratifiedKFold ou KFold).
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
# =========================
|
|
15
|
+
# Cross-validation adaptativa
|
|
16
|
+
# =========================
|
|
17
|
+
cv = make_cv(y)
|
|
18
|
+
|
|
19
|
+
scores = cross_val_score(model, X, y, cv=cv)
|
|
20
|
+
mean = float(scores.mean())
|
|
21
|
+
std = float(scores.std())
|
|
22
|
+
suspicious = mean > 0.995
|
|
23
|
+
|
|
24
|
+
result = {
|
|
25
|
+
"cv_mean_score": mean,
|
|
26
|
+
"cv_std": std,
|
|
27
|
+
"suspiciously_perfect": suspicious,
|
|
28
|
+
"validation_strategy": type(cv).__name__,
|
|
29
|
+
"message": (
|
|
30
|
+
"Perfect CV score detected — possible data leakage."
|
|
31
|
+
if suspicious
|
|
32
|
+
else "CV performance within expected range."
|
|
33
|
+
)
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
# =========================
|
|
37
|
+
# Learning curve
|
|
38
|
+
# =========================
|
|
39
|
+
if plot:
|
|
40
|
+
train_sizes, train_scores, test_scores = learning_curve(
|
|
41
|
+
model,
|
|
42
|
+
X,
|
|
43
|
+
y,
|
|
44
|
+
cv=cv, # <- MESMA estratégia de validação
|
|
45
|
+
train_sizes=np.linspace(0.1, 1.0, 5)
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
plt.figure(figsize=(6, 4))
|
|
49
|
+
plt.plot(
|
|
50
|
+
train_sizes,
|
|
51
|
+
np.mean(train_scores, axis=1),
|
|
52
|
+
label="Treino"
|
|
53
|
+
)
|
|
54
|
+
plt.plot(
|
|
55
|
+
train_sizes,
|
|
56
|
+
np.mean(test_scores, axis=1),
|
|
57
|
+
label="Validação"
|
|
58
|
+
)
|
|
59
|
+
plt.fill_between(
|
|
60
|
+
train_sizes,
|
|
61
|
+
np.mean(test_scores, axis=1) - np.std(test_scores, axis=1),
|
|
62
|
+
np.mean(test_scores, axis=1) + np.std(test_scores, axis=1),
|
|
63
|
+
alpha=0.2
|
|
64
|
+
)
|
|
65
|
+
plt.xlabel("Amostra de treino")
|
|
66
|
+
plt.ylabel("Score")
|
|
67
|
+
plt.title("Learning Curve")
|
|
68
|
+
plt.legend()
|
|
69
|
+
plt.tight_layout()
|
|
70
|
+
plt.show()
|
|
71
|
+
|
|
72
|
+
return result
|
|
@@ -12,8 +12,13 @@ def evaluate(model, X, y, leakage_suspected=False, plot=False):
|
|
|
12
12
|
model_clean = clone(model)
|
|
13
13
|
model_noisy = clone(model)
|
|
14
14
|
|
|
15
|
-
|
|
16
|
-
|
|
15
|
+
from .validation import make_cv
|
|
16
|
+
|
|
17
|
+
cv = make_cv(y)
|
|
18
|
+
|
|
19
|
+
score_clean = cross_val_score(model_clean, X, y, cv=cv, n_jobs=1).mean()
|
|
20
|
+
score_noisy = cross_val_score(model_noisy, X_noisy, y, cv=cv, n_jobs=1).mean()
|
|
21
|
+
|
|
17
22
|
drop = score_clean - score_noisy
|
|
18
23
|
|
|
19
24
|
# =========================
|
|
@@ -78,6 +78,13 @@ class HumanSummary:
|
|
|
78
78
|
recommendations.append(
|
|
79
79
|
"Fix baseline performance issues before trusting robustness metrics."
|
|
80
80
|
)
|
|
81
|
+
elif robustness_verdict == "fragile":
|
|
82
|
+
key_risks.append(
|
|
83
|
+
"Model is fragile under noise perturbations."
|
|
84
|
+
)
|
|
85
|
+
recommendations.append(
|
|
86
|
+
"Consider regularization or simpler model architecture."
|
|
87
|
+
)
|
|
81
88
|
|
|
82
89
|
technical_summary = {
|
|
83
90
|
"key_risks": key_risks or ["No significant risks detected."],
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# validation.py
|
|
2
|
+
import numpy as np
|
|
3
|
+
from sklearn.model_selection import KFold, StratifiedKFold
|
|
4
|
+
|
|
5
|
+
def infer_problem_type(y):
|
|
6
|
+
"""
|
|
7
|
+
Infer whether the task is classification or regression.
|
|
8
|
+
"""
|
|
9
|
+
y = np.asarray(y)
|
|
10
|
+
|
|
11
|
+
unique_values = np.unique(y)
|
|
12
|
+
n_unique = len(unique_values)
|
|
13
|
+
|
|
14
|
+
# Heurística conservadora
|
|
15
|
+
if (
|
|
16
|
+
np.issubdtype(y.dtype, np.integer)
|
|
17
|
+
or n_unique <= 20
|
|
18
|
+
):
|
|
19
|
+
return "classification"
|
|
20
|
+
|
|
21
|
+
return "regression"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def make_cv(y, n_splits=3, random_state=42):
|
|
25
|
+
"""
|
|
26
|
+
Automatically selects the correct CV strategy.
|
|
27
|
+
"""
|
|
28
|
+
problem_type = infer_problem_type(y)
|
|
29
|
+
|
|
30
|
+
if problem_type == "classification":
|
|
31
|
+
return StratifiedKFold(
|
|
32
|
+
n_splits=n_splits,
|
|
33
|
+
shuffle=True,
|
|
34
|
+
random_state=random_state
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
return KFold(
|
|
38
|
+
n_splits=n_splits,
|
|
39
|
+
shuffle=True,
|
|
40
|
+
random_state=random_state
|
|
41
|
+
)
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ai-critic
|
|
3
|
+
Version: 0.2.3
|
|
4
|
+
Summary: Fast AI evaluator for scikit-learn models
|
|
5
|
+
Author-email: Luiz Seabra <filipedemarco@yahoo.com>
|
|
6
|
+
Requires-Python: >=3.9
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Requires-Dist: numpy
|
|
9
|
+
Requires-Dist: scikit-learn
|
|
10
|
+
|
|
11
|
+
Performance under noise
|
|
12
|
+
|
|
13
|
+
> Visualizations are optional and do not affect the decision logic.
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## ⚙️ Main API
|
|
18
|
+
|
|
19
|
+
### `AICritic(model, X, y)`
|
|
20
|
+
|
|
21
|
+
* `model`: scikit-learn compatible estimator
|
|
22
|
+
* `X`: feature matrix
|
|
23
|
+
* `y`: target vector
|
|
24
|
+
|
|
25
|
+
### `evaluate(view="all", plot=False)`
|
|
26
|
+
|
|
27
|
+
* `view`: `"executive"`, `"technical"`, `"details"`, `"all"` or custom list
|
|
28
|
+
* `plot`: generates graphs when `True`
|
|
29
|
+
|
|
30
|
+
---
|
|
31
|
+
|
|
32
|
+
## 🧠 What ai-critic Detects
|
|
33
|
+
|
|
34
|
+
| Category | Risks |
|
|
35
|
+
|
|
36
|
+
| ------------ | ---------------------------------------- |
|
|
37
|
+
|
|
38
|
+
| 🔍 Data | Target Leakage, NaNs, Imbalance |
|
|
39
|
+
|
|
40
|
+
| 🧱 Structure | Excessive Complexity, Overfitting |
|
|
41
|
+
|
|
42
|
+
| 📈 Validation | Perfect or Statistically Suspicious CV |
|
|
43
|
+
|
|
44
|
+
| 🧪 Robustness | Stable, Fragile, or Misleading |
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
## 🛡️ Best Practices
|
|
49
|
+
|
|
50
|
+
* **CI/CD:** Use executive output as a *quality gate*
|
|
51
|
+
* **Iteration:** Use technical output during tuning
|
|
52
|
+
* **Governance:** Log detailed output
|
|
53
|
+
* **Skepticism:** Never blindly trust a perfect CV
|
|
54
|
+
|
|
55
|
+
---
|
|
56
|
+
|
|
57
|
+
## 🧭 Use Cases
|
|
58
|
+
|
|
59
|
+
* Pre-deployment Audit
|
|
60
|
+
* ML Governance
|
|
61
|
+
* CI/CD Pipelines
|
|
62
|
+
* Risk Communication for Non-Technical Users
|
|
63
|
+
|
|
64
|
+
---
|
|
65
|
+
|
|
66
|
+
## 📄 License
|
|
67
|
+
|
|
68
|
+
Distributed under the **MIT License**.
|
|
69
|
+
|
|
70
|
+
---
|
|
71
|
+
|
|
72
|
+
## 🧠 Final Note
|
|
73
|
+
|
|
74
|
+
**ai-critic** is not a *benchmarking* tool. It's a **decision-making tool**.
|
|
75
|
+
|
|
76
|
+
If a model fails here, it doesn't mean it's bad—it means it **shouldn't be trusted yet**.
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from ai_critic import AICritic
|
|
2
|
+
from sklearn.ensemble import RandomForestClassifier
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
# Dataset propositalmente desbalanceado
|
|
6
|
+
X = np.random.rand(200, 10)
|
|
7
|
+
y = np.array([0] * 180 + [1] * 20)
|
|
8
|
+
|
|
9
|
+
model = RandomForestClassifier(
|
|
10
|
+
max_depth=12,
|
|
11
|
+
random_state=42
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
critic = AICritic(model, X, y)
|
|
15
|
+
report = critic.evaluate(plot=False)
|
|
16
|
+
|
|
17
|
+
print("\n=== EXECUTIVE SUMMARY ===")
|
|
18
|
+
print(report["executive"])
|
|
19
|
+
|
|
20
|
+
print("\n=== TECHNICAL SUMMARY ===")
|
|
21
|
+
print(report["technical"])
|
|
22
|
+
|
|
23
|
+
print("\n=== PERFORMANCE DETAILS ===")
|
|
24
|
+
print(report["performance"])
|