ai-critic 0.2.1__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. ai_critic-0.2.3/PKG-INFO +76 -0
  2. ai_critic-0.2.3/README.md +66 -0
  3. ai_critic-0.2.3/ai_critic/critic.py +132 -0
  4. ai_critic-0.2.3/ai_critic/evaluators/__init__.py +11 -0
  5. ai_critic-0.2.3/ai_critic/evaluators/performance.py +72 -0
  6. {ai_critic-0.2.1 → ai_critic-0.2.3}/ai_critic/evaluators/robustness.py +7 -2
  7. {ai_critic-0.2.1 → ai_critic-0.2.3}/ai_critic/evaluators/summary.py +7 -0
  8. ai_critic-0.2.3/ai_critic/evaluators/validation.py +41 -0
  9. ai_critic-0.2.3/ai_critic.egg-info/PKG-INFO +76 -0
  10. {ai_critic-0.2.1 → ai_critic-0.2.3}/ai_critic.egg-info/SOURCES.txt +1 -0
  11. {ai_critic-0.2.1 → ai_critic-0.2.3}/pyproject.toml +1 -1
  12. ai_critic-0.2.3/test/test_in_ia.py +24 -0
  13. ai_critic-0.2.1/PKG-INFO +0 -258
  14. ai_critic-0.2.1/README.md +0 -248
  15. ai_critic-0.2.1/ai_critic/critic.py +0 -87
  16. ai_critic-0.2.1/ai_critic/evaluators/__init__.py +0 -4
  17. ai_critic-0.2.1/ai_critic/evaluators/performance.py +0 -43
  18. ai_critic-0.2.1/ai_critic.egg-info/PKG-INFO +0 -258
  19. ai_critic-0.2.1/test/test_in_ia.py +0 -56
  20. {ai_critic-0.2.1 → ai_critic-0.2.3}/ai_critic/__init__.py +0 -0
  21. {ai_critic-0.2.1 → ai_critic-0.2.3}/ai_critic/evaluators/config.py +0 -0
  22. {ai_critic-0.2.1 → ai_critic-0.2.3}/ai_critic/evaluators/data.py +0 -0
  23. {ai_critic-0.2.1 → ai_critic-0.2.3}/ai_critic.egg-info/dependency_links.txt +0 -0
  24. {ai_critic-0.2.1 → ai_critic-0.2.3}/ai_critic.egg-info/requires.txt +0 -0
  25. {ai_critic-0.2.1 → ai_critic-0.2.3}/ai_critic.egg-info/top_level.txt +0 -0
  26. {ai_critic-0.2.1 → ai_critic-0.2.3}/setup.cfg +0 -0
  27. {ai_critic-0.2.1 → ai_critic-0.2.3}/test/test_model.py +0 -0
@@ -0,0 +1,76 @@
1
+ Metadata-Version: 2.4
2
+ Name: ai-critic
3
+ Version: 0.2.3
4
+ Summary: Fast AI evaluator for scikit-learn models
5
+ Author-email: Luiz Seabra <filipedemarco@yahoo.com>
6
+ Requires-Python: >=3.9
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: numpy
9
+ Requires-Dist: scikit-learn
10
+
11
+ Performance under noise
12
+
13
+ > Visualizations are optional and do not affect the decision logic.
14
+
15
+ ---
16
+
17
+ ## ⚙️ Main API
18
+
19
+ ### `AICritic(model, X, y)`
20
+
21
+ * `model`: scikit-learn compatible estimator
22
+ * `X`: feature matrix
23
+ * `y`: target vector
24
+
25
+ ### `evaluate(view="all", plot=False)`
26
+
27
+ * `view`: `"executive"`, `"technical"`, `"details"`, `"all"` or custom list
28
+ * `plot`: generates graphs when `True`
29
+
30
+ ---
31
+
32
+ ## 🧠 What ai-critic Detects
33
+
34
+ | Category | Risks |
35
+
36
+ | ------------ | ---------------------------------------- |
37
+
38
+ | 🔍 Data | Target Leakage, NaNs, Imbalance |
39
+
40
+ | 🧱 Structure | Excessive Complexity, Overfitting |
41
+
42
+ | 📈 Validation | Perfect or Statistically Suspicious CV |
43
+
44
+ | 🧪 Robustness | Stable, Fragile, or Misleading |
45
+
46
+ ---
47
+
48
+ ## 🛡️ Best Practices
49
+
50
+ * **CI/CD:** Use executive output as a *quality gate*
51
+ * **Iteration:** Use technical output during tuning
52
+ * **Governance:** Log detailed output
53
+ * **Skepticism:** Never blindly trust a perfect CV
54
+
55
+ ---
56
+
57
+ ## 🧭 Use Cases
58
+
59
+ * Pre-deployment Audit
60
+ * ML Governance
61
+ * CI/CD Pipelines
62
+ * Risk Communication for Non-Technical Users
63
+
64
+ ---
65
+
66
+ ## 📄 License
67
+
68
+ Distributed under the **MIT License**.
69
+
70
+ ---
71
+
72
+ ## 🧠 Final Note
73
+
74
+ **ai-critic** is not a *benchmarking* tool. It's a **decision-making tool**.
75
+
76
+ If a model fails here, it doesn't mean it's bad—it means it **shouldn't be trusted yet**.
@@ -0,0 +1,66 @@
1
+ Performance under noise
2
+
3
+ > Visualizations are optional and do not affect the decision logic.
4
+
5
+ ---
6
+
7
+ ## ⚙️ Main API
8
+
9
+ ### `AICritic(model, X, y)`
10
+
11
+ * `model`: scikit-learn compatible estimator
12
+ * `X`: feature matrix
13
+ * `y`: target vector
14
+
15
+ ### `evaluate(view="all", plot=False)`
16
+
17
+ * `view`: `"executive"`, `"technical"`, `"details"`, `"all"` or custom list
18
+ * `plot`: generates graphs when `True`
19
+
20
+ ---
21
+
22
+ ## 🧠 What ai-critic Detects
23
+
24
+ | Category | Risks |
25
+
26
+ | ------------ | ---------------------------------------- |
27
+
28
+ | 🔍 Data | Target Leakage, NaNs, Imbalance |
29
+
30
+ | 🧱 Structure | Excessive Complexity, Overfitting |
31
+
32
+ | 📈 Validation | Perfect or Statistically Suspicious CV |
33
+
34
+ | 🧪 Robustness | Stable, Fragile, or Misleading |
35
+
36
+ ---
37
+
38
+ ## 🛡️ Best Practices
39
+
40
+ * **CI/CD:** Use executive output as a *quality gate*
41
+ * **Iteration:** Use technical output during tuning
42
+ * **Governance:** Log detailed output
43
+ * **Skepticism:** Never blindly trust a perfect CV
44
+
45
+ ---
46
+
47
+ ## 🧭 Use Cases
48
+
49
+ * Pre-deployment Audit
50
+ * ML Governance
51
+ * CI/CD Pipelines
52
+ * Risk Communication for Non-Technical Users
53
+
54
+ ---
55
+
56
+ ## 📄 License
57
+
58
+ Distributed under the **MIT License**.
59
+
60
+ ---
61
+
62
+ ## 🧠 Final Note
63
+
64
+ **ai-critic** is not a *benchmarking* tool. It's a **decision-making tool**.
65
+
66
+ If a model fails here, it doesn't mean it's bad—it means it **shouldn't be trusted yet**.
@@ -0,0 +1,132 @@
1
+ from ai_critic.evaluators import (
2
+ robustness,
3
+ config,
4
+ data,
5
+ performance
6
+ )
7
+ from ai_critic.evaluators.summary import HumanSummary
8
+
9
+
10
+ class AICritic:
11
+ """
12
+ Automated reviewer for scikit-learn models.
13
+
14
+ Produces a multi-layered risk assessment including:
15
+ - Data integrity analysis
16
+ - Model configuration sanity checks
17
+ - Performance evaluation (CV + learning curves)
18
+ - Robustness & leakage heuristics
19
+ - Human-readable executive and technical summaries
20
+ """
21
+
22
+ def __init__(self, model, X, y, random_state=None):
23
+ """
24
+ Parameters
25
+ ----------
26
+ model : sklearn-compatible estimator
27
+ X : np.ndarray
28
+ Feature matrix
29
+ y : np.ndarray
30
+ Target vector
31
+ random_state : int or None
32
+ Global seed for reproducibility (optional)
33
+ """
34
+ self.model = model
35
+ self.X = X
36
+ self.y = y
37
+ self.random_state = random_state
38
+
39
+ def evaluate(self, view="all", plot=False):
40
+ """
41
+ Evaluate the model.
42
+
43
+ Parameters
44
+ ----------
45
+ view : str or list
46
+ - "all" : full payload (default)
47
+ - "executive" : executive summary only
48
+ - "technical" : technical summary only
49
+ - "details" : low-level evaluator outputs
50
+ - list : subset of views (e.g. ["executive", "details"])
51
+ plot : bool
52
+ - True : generate plots (learning curve, heatmap, robustness)
53
+ - False : no plots
54
+
55
+ Returns
56
+ -------
57
+ dict
58
+ Evaluation payload according to selected view
59
+ """
60
+
61
+ # =========================
62
+ # Low-level evaluator outputs
63
+ # =========================
64
+ details = {}
65
+
66
+ # -------------------------
67
+ # Data analysis
68
+ # -------------------------
69
+ data_report = data.evaluate(
70
+ self.X,
71
+ self.y,
72
+ plot=plot
73
+ )
74
+ details["data"] = data_report
75
+
76
+ # -------------------------
77
+ # Model configuration sanity
78
+ # -------------------------
79
+ details["config"] = config.evaluate(
80
+ self.model,
81
+ n_samples=data_report["n_samples"],
82
+ n_features=data_report["n_features"]
83
+ )
84
+
85
+ # -------------------------
86
+ # Performance evaluation
87
+ # (CV strategy inferred automatically)
88
+ # -------------------------
89
+ details["performance"] = performance.evaluate(
90
+ self.model,
91
+ self.X,
92
+ self.y,
93
+ plot=plot
94
+ )
95
+
96
+ # -------------------------
97
+ # Robustness & leakage analysis
98
+ # -------------------------
99
+ details["robustness"] = robustness.evaluate(
100
+ self.model,
101
+ self.X,
102
+ self.y,
103
+ leakage_suspected=data_report["data_leakage"]["suspected"],
104
+ plot=plot
105
+ )
106
+
107
+ # =========================
108
+ # Human-centered summaries
109
+ # =========================
110
+ human_summary = HumanSummary().generate(details)
111
+
112
+ # =========================
113
+ # Full payload (PUBLIC API)
114
+ # =========================
115
+ payload = {
116
+ "executive": human_summary["executive_summary"],
117
+ "technical": human_summary["technical_summary"],
118
+ "details": details,
119
+ # Convenience shortcut (prevents KeyError in user code)
120
+ "performance": details["performance"]
121
+ }
122
+
123
+ # =========================
124
+ # View selector
125
+ # =========================
126
+ if view == "all":
127
+ return payload
128
+
129
+ if isinstance(view, list):
130
+ return {k: payload[k] for k in view if k in payload}
131
+
132
+ return payload.get(view)
@@ -0,0 +1,11 @@
1
+ from . import data
2
+ from . import performance
3
+ from . import robustness
4
+ from . import config
5
+
6
+ __all__ = [
7
+ "data",
8
+ "performance",
9
+ "robustness",
10
+ "config",
11
+ ]
@@ -0,0 +1,72 @@
1
+ from sklearn.model_selection import cross_val_score, learning_curve
2
+ import matplotlib.pyplot as plt
3
+ import numpy as np
4
+
5
+ from .validation import make_cv
6
+
7
+
8
+ def evaluate(model, X, y, plot=False):
9
+ """
10
+ Avalia a performance do modelo usando validação cruzada
11
+ automaticamente adequada (StratifiedKFold ou KFold).
12
+ """
13
+
14
+ # =========================
15
+ # Cross-validation adaptativa
16
+ # =========================
17
+ cv = make_cv(y)
18
+
19
+ scores = cross_val_score(model, X, y, cv=cv)
20
+ mean = float(scores.mean())
21
+ std = float(scores.std())
22
+ suspicious = mean > 0.995
23
+
24
+ result = {
25
+ "cv_mean_score": mean,
26
+ "cv_std": std,
27
+ "suspiciously_perfect": suspicious,
28
+ "validation_strategy": type(cv).__name__,
29
+ "message": (
30
+ "Perfect CV score detected — possible data leakage."
31
+ if suspicious
32
+ else "CV performance within expected range."
33
+ )
34
+ }
35
+
36
+ # =========================
37
+ # Learning curve
38
+ # =========================
39
+ if plot:
40
+ train_sizes, train_scores, test_scores = learning_curve(
41
+ model,
42
+ X,
43
+ y,
44
+ cv=cv, # <- MESMA estratégia de validação
45
+ train_sizes=np.linspace(0.1, 1.0, 5)
46
+ )
47
+
48
+ plt.figure(figsize=(6, 4))
49
+ plt.plot(
50
+ train_sizes,
51
+ np.mean(train_scores, axis=1),
52
+ label="Treino"
53
+ )
54
+ plt.plot(
55
+ train_sizes,
56
+ np.mean(test_scores, axis=1),
57
+ label="Validação"
58
+ )
59
+ plt.fill_between(
60
+ train_sizes,
61
+ np.mean(test_scores, axis=1) - np.std(test_scores, axis=1),
62
+ np.mean(test_scores, axis=1) + np.std(test_scores, axis=1),
63
+ alpha=0.2
64
+ )
65
+ plt.xlabel("Amostra de treino")
66
+ plt.ylabel("Score")
67
+ plt.title("Learning Curve")
68
+ plt.legend()
69
+ plt.tight_layout()
70
+ plt.show()
71
+
72
+ return result
@@ -12,8 +12,13 @@ def evaluate(model, X, y, leakage_suspected=False, plot=False):
12
12
  model_clean = clone(model)
13
13
  model_noisy = clone(model)
14
14
 
15
- score_clean = cross_val_score(model_clean, X, y, cv=3, n_jobs=1).mean()
16
- score_noisy = cross_val_score(model_noisy, X_noisy, y, cv=3, n_jobs=1).mean()
15
+ from .validation import make_cv
16
+
17
+ cv = make_cv(y)
18
+
19
+ score_clean = cross_val_score(model_clean, X, y, cv=cv, n_jobs=1).mean()
20
+ score_noisy = cross_val_score(model_noisy, X_noisy, y, cv=cv, n_jobs=1).mean()
21
+
17
22
  drop = score_clean - score_noisy
18
23
 
19
24
  # =========================
@@ -78,6 +78,13 @@ class HumanSummary:
78
78
  recommendations.append(
79
79
  "Fix baseline performance issues before trusting robustness metrics."
80
80
  )
81
+ elif robustness_verdict == "fragile":
82
+ key_risks.append(
83
+ "Model is fragile under noise perturbations."
84
+ )
85
+ recommendations.append(
86
+ "Consider regularization or simpler model architecture."
87
+ )
81
88
 
82
89
  technical_summary = {
83
90
  "key_risks": key_risks or ["No significant risks detected."],
@@ -0,0 +1,41 @@
1
+ # validation.py
2
+ import numpy as np
3
+ from sklearn.model_selection import KFold, StratifiedKFold
4
+
5
+ def infer_problem_type(y):
6
+ """
7
+ Infer whether the task is classification or regression.
8
+ """
9
+ y = np.asarray(y)
10
+
11
+ unique_values = np.unique(y)
12
+ n_unique = len(unique_values)
13
+
14
+ # Heurística conservadora
15
+ if (
16
+ np.issubdtype(y.dtype, np.integer)
17
+ or n_unique <= 20
18
+ ):
19
+ return "classification"
20
+
21
+ return "regression"
22
+
23
+
24
+ def make_cv(y, n_splits=3, random_state=42):
25
+ """
26
+ Automatically selects the correct CV strategy.
27
+ """
28
+ problem_type = infer_problem_type(y)
29
+
30
+ if problem_type == "classification":
31
+ return StratifiedKFold(
32
+ n_splits=n_splits,
33
+ shuffle=True,
34
+ random_state=random_state
35
+ )
36
+
37
+ return KFold(
38
+ n_splits=n_splits,
39
+ shuffle=True,
40
+ random_state=random_state
41
+ )
@@ -0,0 +1,76 @@
1
+ Metadata-Version: 2.4
2
+ Name: ai-critic
3
+ Version: 0.2.3
4
+ Summary: Fast AI evaluator for scikit-learn models
5
+ Author-email: Luiz Seabra <filipedemarco@yahoo.com>
6
+ Requires-Python: >=3.9
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: numpy
9
+ Requires-Dist: scikit-learn
10
+
11
+ Performance under noise
12
+
13
+ > Visualizations are optional and do not affect the decision logic.
14
+
15
+ ---
16
+
17
+ ## ⚙️ Main API
18
+
19
+ ### `AICritic(model, X, y)`
20
+
21
+ * `model`: scikit-learn compatible estimator
22
+ * `X`: feature matrix
23
+ * `y`: target vector
24
+
25
+ ### `evaluate(view="all", plot=False)`
26
+
27
+ * `view`: `"executive"`, `"technical"`, `"details"`, `"all"` or custom list
28
+ * `plot`: generates graphs when `True`
29
+
30
+ ---
31
+
32
+ ## 🧠 What ai-critic Detects
33
+
34
+ | Category | Risks |
35
+
36
+ | ------------ | ---------------------------------------- |
37
+
38
+ | 🔍 Data | Target Leakage, NaNs, Imbalance |
39
+
40
+ | 🧱 Structure | Excessive Complexity, Overfitting |
41
+
42
+ | 📈 Validation | Perfect or Statistically Suspicious CV |
43
+
44
+ | 🧪 Robustness | Stable, Fragile, or Misleading |
45
+
46
+ ---
47
+
48
+ ## 🛡️ Best Practices
49
+
50
+ * **CI/CD:** Use executive output as a *quality gate*
51
+ * **Iteration:** Use technical output during tuning
52
+ * **Governance:** Log detailed output
53
+ * **Skepticism:** Never blindly trust a perfect CV
54
+
55
+ ---
56
+
57
+ ## 🧭 Use Cases
58
+
59
+ * Pre-deployment Audit
60
+ * ML Governance
61
+ * CI/CD Pipelines
62
+ * Risk Communication for Non-Technical Users
63
+
64
+ ---
65
+
66
+ ## 📄 License
67
+
68
+ Distributed under the **MIT License**.
69
+
70
+ ---
71
+
72
+ ## 🧠 Final Note
73
+
74
+ **ai-critic** is not a *benchmarking* tool. It's a **decision-making tool**.
75
+
76
+ If a model fails here, it doesn't mean it's bad—it means it **shouldn't be trusted yet**.
@@ -13,5 +13,6 @@ ai_critic/evaluators/data.py
13
13
  ai_critic/evaluators/performance.py
14
14
  ai_critic/evaluators/robustness.py
15
15
  ai_critic/evaluators/summary.py
16
+ ai_critic/evaluators/validation.py
16
17
  test/test_in_ia.py
17
18
  test/test_model.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "ai-critic"
7
- version = "0.2.1"
7
+ version = "0.2.3"
8
8
  description = "Fast AI evaluator for scikit-learn models"
9
9
  readme = "README.md"
10
10
  authors = [
@@ -0,0 +1,24 @@
1
+ from ai_critic import AICritic
2
+ from sklearn.ensemble import RandomForestClassifier
3
+ import numpy as np
4
+
5
+ # Dataset propositalmente desbalanceado
6
+ X = np.random.rand(200, 10)
7
+ y = np.array([0] * 180 + [1] * 20)
8
+
9
+ model = RandomForestClassifier(
10
+ max_depth=12,
11
+ random_state=42
12
+ )
13
+
14
+ critic = AICritic(model, X, y)
15
+ report = critic.evaluate(plot=False)
16
+
17
+ print("\n=== EXECUTIVE SUMMARY ===")
18
+ print(report["executive"])
19
+
20
+ print("\n=== TECHNICAL SUMMARY ===")
21
+ print(report["technical"])
22
+
23
+ print("\n=== PERFORMANCE DETAILS ===")
24
+ print(report["performance"])