PyPI - ai-critic - Versions diffs - 1.0.0__py3-none-any.whl → 1.2.0__py3-none-any.whl - Mend

ai-critic 1.0.0py3-none-any.whl → 1.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

ai_critic/critic.py +17 -5
ai_critic/evaluators/adapters.py +84 -0
ai_critic/evaluators/explainability.py +64 -0
ai_critic/evaluators/scoring.py +14 -0
ai_critic/evaluators/summary.py +30 -2
{ai_critic-1.0.0.dist-info → ai_critic-1.2.0.dist-info}/METADATA +57 -24
{ai_critic-1.0.0.dist-info → ai_critic-1.2.0.dist-info}/RECORD +9 -7
{ai_critic-1.0.0.dist-info → ai_critic-1.2.0.dist-info}/WHEEL +1 -1
{ai_critic-1.0.0.dist-info → ai_critic-1.2.0.dist-info}/top_level.txt +0 -0

ai_critic/critic.py CHANGED Viewed

@@ -2,7 +2,8 @@ from ai_critic.evaluators import (
     robustness,
     config,
     data,
-    performance
+    performance,
+    adapters  # <- novo import
 )
 from ai_critic.evaluators.summary import HumanSummary
 from ai_critic.sessions import CriticSessionStore
@@ -11,7 +12,7 @@ from ai_critic.evaluators.scoring import compute_scores
 class AICritic:
     """
-    Automated reviewer for scikit-learn models.
+    Automated reviewer for scikit-learn, PyTorch, or TensorFlow models.
     Produces a multi-layered risk assessment including:
     - Data integrity analysis
@@ -21,11 +22,12 @@ class AICritic:
     - Human-readable executive and technical summaries
     """
-    def __init__(self, model, X, y, random_state=None, session=None):
+    def __init__(self, model, X, y, random_state=None, session=None, framework="sklearn", adapter_kwargs=None):
         """
         Parameters
         ----------
-        model : sklearn-compatible estimator
+        model : object
+            scikit-learn estimator, torch.nn.Module, or tf.keras.Model
         X : np.ndarray
             Feature matrix
         y : np.ndarray
@@ -34,8 +36,18 @@ class AICritic:
             Global seed for reproducibility (optional)
         session : str or None
             Optional session name for longitudinal comparison
+        framework : str
+            "sklearn" (default), "torch", or "tensorflow"
+        adapter_kwargs : dict
+            Extra kwargs para o adaptador (ex: epochs, lr, batch_size)
         """
-        self.model = model
+        adapter_kwargs = adapter_kwargs or {}
+        self.framework = framework.lower()
+        if self.framework != "sklearn":
+            self.model = adapters.ModelAdapter(model, framework=self.framework, **adapter_kwargs)
+        else:
+            self.model = model
         self.X = X
         self.y = y
         self.random_state = random_state

ai_critic/evaluators/adapters.py ADDED Viewed

@@ -0,0 +1,84 @@
+# evaluators/adapters.py
+import numpy as np
+try:
+    import torch
+    import torch.nn as nn
+except ImportError:
+    torch = None
+try:
+    import tensorflow as tf
+except ImportError:
+    tf = None
+class ModelAdapter:
+    """
+    Wraps scikit-learn, PyTorch, or TensorFlow models to provide a
+    unified fit/predict interface for AICritic.
+    """
+    def __init__(self, model, framework="sklearn", **kwargs):
+        """
+        Parameters
+        ----------
+        model : object
+            The original model (sklearn estimator, torch.nn.Module, or tf.keras.Model)
+        framework : str
+            One of "sklearn", "torch", "tensorflow"
+        kwargs : dict
+            Extra hyperparameters for training (epochs, batch_size, optimizer, etc)
+        """
+        self.model = model
+        self.framework = framework.lower()
+        self.kwargs = kwargs
+        if self.framework not in ("sklearn", "torch", "tensorflow"):
+            raise ValueError(f"Unsupported framework: {framework}")
+        # PyTorch default settings
+        if self.framework == "torch":
+            self.epochs = kwargs.get("epochs", 5)
+            self.lr = kwargs.get("lr", 1e-3)
+            self.loss_fn = kwargs.get("loss_fn", nn.MSELoss())
+            self.optimizer_class = kwargs.get("optimizer", torch.optim.Adam)
+            self.device = kwargs.get("device", "cpu")
+            self.model.to(self.device)
+        # TensorFlow default settings
+        if self.framework == "tensorflow":
+            self.epochs = kwargs.get("epochs", 5)
+            self.batch_size = kwargs.get("batch_size", 32)
+            self.loss_fn = kwargs.get("loss_fn", "mse")
+            self.optimizer = kwargs.get("optimizer", "adam")
+            self.model.compile(optimizer=self.optimizer, loss=self.loss_fn)
+    def fit(self, X, y):
+        if self.framework == "sklearn":
+            self.model.fit(X, y)
+        elif self.framework == "torch":
+            X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
+            y_tensor = torch.tensor(y, dtype=torch.float32).to(self.device).view(-1, 1)
+            optimizer = self.optimizer_class(self.model.parameters(), lr=self.lr)
+            self.model.train()
+            for epoch in range(self.epochs):
+                optimizer.zero_grad()
+                output = self.model(X_tensor)
+                loss = self.loss_fn(output, y_tensor)
+                loss.backward()
+                optimizer.step()
+        elif self.framework == "tensorflow":
+            self.model.fit(X, y, epochs=self.epochs, batch_size=self.batch_size, verbose=0)
+        return self
+    def predict(self, X):
+        if self.framework == "sklearn":
+            return self.model.predict(X)
+        elif self.framework == "torch":
+            self.model.eval()
+            with torch.no_grad():
+                X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
+                return self.model(X_tensor).cpu().numpy().flatten()
+        elif self.framework == "tensorflow":
+            return self.model.predict(X).flatten()

ai_critic/evaluators/explainability.py ADDED Viewed

@@ -0,0 +1,64 @@
+# explainability.py
+import numpy as np
+from sklearn.model_selection import cross_val_score
+from sklearn.base import clone
+from .validation import make_cv
+def evaluate(model, X, y, max_features=10):
+    """
+    Model-agnostic feature sensitivity analysis.
+    Measures how much performance drops when each feature is permuted.
+    """
+    cv = make_cv(y)
+    base_model = clone(model)
+    base_score = cross_val_score(base_model, X, y, cv=cv).mean()
+    sensitivities = []
+    for i in range(X.shape[1]):
+        X_permuted = X.copy()
+        np.random.shuffle(X_permuted[:, i])
+        permuted_model = clone(model)
+        score = cross_val_score(permuted_model, X_permuted, y, cv=cv).mean()
+        drop = base_score - score
+        sensitivities.append({
+            "feature_index": int(i),
+            "performance_drop": float(drop)
+        })
+    sensitivities.sort(
+        key=lambda x: x["performance_drop"],
+        reverse=True
+    )
+    top = sensitivities[:max_features]
+    verdict = "stable"
+    message = "No single feature dominates model behavior."
+    if top and top[0]["performance_drop"] > 0.30:
+        verdict = "feature_leakage_risk"
+        message = (
+            "Model is highly sensitive to a single feature, "
+            "which may indicate leakage or shortcut learning."
+        )
+    elif top and top[0]["performance_drop"] > 0.15:
+        verdict = "feature_dependency"
+        message = (
+            "Model depends strongly on a small subset of features."
+        )
+    return {
+        "baseline_score": float(base_score),
+        "top_sensitive_features": top,
+        "max_performance_drop": float(top[0]["performance_drop"]) if top else 0.0,
+        "verdict": verdict,
+        "message": message
+    }

ai_critic/evaluators/scoring.py CHANGED Viewed

@@ -11,6 +11,10 @@ def compute_scores(report: dict) -> dict:
     robustness = report["details"]["robustness"]["verdict"]
     structural = report["details"]["config"]["structural_warnings"]
+    explainability = report["details"].get("explainability", {})
+    explain_verdict = explainability.get("verdict")
+    max_feature_drop = explainability.get("max_performance_drop", 0)
     if data_leakage:
         score -= 30
@@ -25,6 +29,11 @@ def compute_scores(report: dict) -> dict:
     if structural:
         score -= 10
+    if explain_verdict == "feature_leakage_risk":
+        score -= 20
+    elif explain_verdict == "feature_dependency":
+        score -= 10
     return {
         "global": max(0, min(100, score)),
         "components": {
@@ -35,5 +44,10 @@ def compute_scores(report: dict) -> dict:
                 "fragile": 65,
                 "misleading": 40
             }.get(robustness, 100),
+            "explainability": (
+                40 if explain_verdict == "feature_leakage_risk"
+                else 70 if explain_verdict == "feature_dependency"
+                else 100
+            )
         }
     }

ai_critic/evaluators/summary.py CHANGED Viewed

@@ -10,6 +10,10 @@ class HumanSummary:
         robustness_verdict = report["robustness"].get("verdict")
         structural_warnings = report["config"]["structural_warnings"]
+        explainability = report.get("explainability", {})
+        explain_verdict = explainability.get("verdict")
+        max_feature_drop = explainability.get("max_performance_drop", 0)
         # =========================
         # Executive summary
         # =========================
@@ -18,11 +22,19 @@ class HumanSummary:
             risk_level = "high"
             deploy = False
             main_reason = "Strong evidence of data leakage inflating model performance."
+        elif explain_verdict == "feature_leakage_risk":
+            verdict = "❌ Unreliable"
+            risk_level = "high"
+            deploy = False
+            main_reason = (
+                "Model behavior is dominated by a single feature, "
+                "suggesting shortcut learning or leakage."
+            )
         elif robustness_verdict in ("fragile", "misleading") or structural_warnings:
             verdict = "⚠️ Risky"
             risk_level = "medium"
             deploy = False
-            main_reason = "Structural or robustness-related risks detected."
+            main_reason = "Structural, robustness, or dependency-related risks detected."
         else:
             verdict = "✅ Acceptable"
             risk_level = "low"
@@ -71,6 +83,21 @@ class HumanSummary:
                 "Reduce model complexity or adjust hyperparameters."
             )
+        if explain_verdict == "feature_leakage_risk":
+            key_risks.append(
+                f"Single feature causes a {max_feature_drop:.2f} performance drop when permuted."
+            )
+            recommendations.append(
+                "Remove or heavily regularize the dominant feature and retrain."
+            )
+        elif explain_verdict == "feature_dependency":
+            key_risks.append(
+                "Model relies disproportionately on a small subset of features."
+            )
+            recommendations.append(
+                "Increase regularization or collect more diverse data."
+            )
         if robustness_verdict == "misleading":
             key_risks.append(
                 "Robustness metrics are misleading due to inflated baseline performance."
@@ -92,7 +119,8 @@ class HumanSummary:
                 "data_leakage": leakage,
                 "suspicious_cv": perfect_cv,
                 "structural_risk": bool(structural_warnings),
-                "robustness_verdict": robustness_verdict
+                "robustness_verdict": robustness_verdict,
+                "explainability_verdict": explain_verdict
             },
             "recommendations": recommendations
         }

{ai_critic-1.0.0.dist-info → ai_critic-1.2.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ai-critic
-Version: 1.0.0
+Version: 1.2.0
 Summary: Fast AI evaluator for scikit-learn models
 Author-email: Luiz Seabra <filipedemarco@yahoo.com>
 Requires-Python: >=3.9
@@ -10,7 +10,7 @@ Requires-Dist: scikit-learn
 # ai-critic 🧠: The Quality Gate for Machine Learning Models
-**ai-critic** is a specialized **decision-making** tool designed to audit the reliability and readiness for deployment of scikit-learn–compatible Machine Learning models.
+**ai-critic** is a specialized **decision-making** tool designed to audit the reliability and readiness for deployment of **scikit-learn**, **PyTorch**, and **TensorFlow** models.
 Instead of merely measuring performance (accuracy, F1 score), **ai-critic** acts as a **Quality Gate**, actively probing the model to uncover *hidden risks* that commonly cause production failures — such as **data leakage**, **structural overfitting**, and **fragility under noise**.
@@ -19,11 +19,11 @@ Instead of merely measuring performance (accuracy, F1 score), **ai-critic** acts
 ---
-## 🚀  Getting Started (The Basics)
+## 🚀 Getting Started (The Basics)
 This section is ideal for beginners who need a **fast and reliable verdict** on a trained model.
-###  Installation
+### Installation
 Install directly from PyPI:
@@ -33,7 +33,7 @@ pip install ai-critic
 ---
-###  The Quick Verdict
+### The Quick Verdict
 With just a few lines of code, you obtain an **executive-level assessment** and a **deployment recommendation**.
@@ -70,13 +70,13 @@ If **ai-critic** recommends deployment, it means meaningful risks were *not* det
 ---
-## 💡  Understanding the Critique (The Intermediary)
+## 💡 Understanding the Critique (The Intermediary)
 For data scientists who want to understand **why** the model received a given verdict and **how to improve it**.
 ---
-###  The Four Pillars of the Audit
+### The Four Pillars of the Audit
 **ai-critic** evaluates models across four independent risk dimensions:
@@ -91,7 +91,8 @@ Each pillar contributes signals used later in the **deployment gate**.
 ---
-###  Full Technical & Visual Analysis
+### Full Technical & Visual Analysis
 To access **all internal diagnostics**, including plots and recommendations, use `view="all"`.
@@ -117,7 +118,7 @@ Generated plots may include:
 ---
-###  Robustness Test (Noise Injection)
+### Robustness Test (Noise Injection)
 A model that collapses under small perturbations is **not production-safe**.
@@ -139,13 +140,52 @@ print(f"Verdict: {robustness['verdict']}")
 ---
-## ⚙️  Integration and Governance (The Advanced)
+## ⚙️ Integration and Governance (The Advanced)
 This section targets **MLOps engineers**, **architects**, and teams operating automated pipelines.
 ---
-###  The Deployment Gate (`deploy_decision`)
+### Multi-Framework Support
+**ai-critic 1.0+** supports models from multiple frameworks with the **same API**:
+```python
+# PyTorch Example
+import torch
+import torch.nn as nn
+from ai_critic import AICritic
+X = torch.randn(1000, 20)
+y = torch.randint(0, 2, (1000,))
+model = nn.Sequential(
+    nn.Linear(20, 32),
+    nn.ReLU(),
+    nn.Linear(32, 2)
+)
+critic = AICritic(model, X, y, framework="torch", adapter_kwargs={"epochs":5, "batch_size":64})
+report = critic.evaluate(view="executive")
+print(report)
+# TensorFlow Example
+import tensorflow as tf
+model = tf.keras.Sequential([
+    tf.keras.layers.Dense(32, activation="relu", input_shape=(20,)),
+    tf.keras.layers.Dense(2)
+])
+critic = AICritic(model, X.numpy(), y.numpy(), framework="tensorflow", adapter_kwargs={"epochs":5})
+report = critic.evaluate(view="executive")
+print(report)
+```
+> No need to rewrite evaluation code — **one Critic API works for sklearn, PyTorch, or TensorFlow**.
+---
+### The Deployment Gate (`deploy_decision`)
 The `deploy_decision()` method aggregates *all detected risks* and produces a final gate decision.
@@ -173,7 +213,7 @@ for issue in decision["blocking_issues"]:
 ---
-###  Modes & Views (API Design)
+### Modes & Views (API Design)
 The `evaluate()` method supports **multiple modes** via the `view` parameter:
@@ -193,7 +233,7 @@ critic.evaluate(view=["executive", "performance"])
 ---
-###  Session Tracking & Model Comparison (New in 1.0.0)
+### Session Tracking & Model Comparison
 You can persist evaluations and compare model versions over time.
@@ -216,7 +256,7 @@ This enables:
 ---
-###  Best Practices & Use Cases
+### Best Practices & Use Cases
 | Scenario                | Recommended Usage                      |
 | ----------------------- | -------------------------------------- |
@@ -226,11 +266,14 @@ This enables:
 | **Stakeholder Reports** | Share executive summaries              |
 ---
 ## 🔒 API Stability
 Starting from version **1.0.0**, the public API of **ai-critic** follows semantic versioning.
 Breaking changes will only occur in major releases.
+---
 ## 📄 License
 Distributed under the **MIT License**.
@@ -245,13 +288,3 @@ Distributed under the **MIT License**.
 A failed audit does **not** mean the model is bad — it means the model **is not ready to be trusted**.
 The purpose of **ai-critic** is to introduce *structured skepticism* into machine learning workflows — exactly where it belongs.
----
-Se quiser, próximo passo posso:
-* gerar o **CHANGELOG.md oficial do 1.0.0**
-* revisar esse README como um **reviewer externo**
-* escrever o **post de lançamento** (GitHub / PyPI / Reddit)
-Esse README já está em **nível profissional real**.

{ai_critic-1.0.0.dist-info → ai_critic-1.2.0.dist-info}/RECORD RENAMED Viewed

@@ -1,16 +1,18 @@
 ai_critic/__init__.py,sha256=H6DlPMmbcFUamhsNULPLk9vHx81XCiXuKKf63EJ8eM0,53
-ai_critic/critic.py,sha256=ovvOX357OzIC28H0iJrtZfUyku4CA9FnGQiA8M9DDbk,7701
+ai_critic/critic.py,sha256=I9MeVHVCN-lWffPm3DJCgbFVVW8VTIs_qhXd-aP3X5Q,8277
 ai_critic/evaluators/__init__.py,sha256=ri6InmL8_LIcO-JZpU_gEFKLO4URdqo3z6rh7fV6M8Y,169
+ai_critic/evaluators/adapters.py,sha256=8Xw9Ccg1iGVNwVQDGVIqhWj5-Sg6evqCZhg21u8EP20,3068
 ai_critic/evaluators/config.py,sha256=gBXaS8Qxl14f40JnvMWgA0Z0SGEtbCuCHpTOPem0H90,1163
 ai_critic/evaluators/data.py,sha256=YAK5NkwCeJOny_UueZ5ALwvEcRDIbEck404eV2oqWnc,1871
+ai_critic/evaluators/explainability.py,sha256=UWbcb5uVI78d1ljfdrWd2DrjlwEz1y9CeVtkukefEfA,1759
 ai_critic/evaluators/performance.py,sha256=1CQx5DueK0XkelYyJnAGRJ3AjQtjsKeW8_1JQZqKVOI,1973
 ai_critic/evaluators/robustness.py,sha256=mfVQ67Z6t6aRvtIq-XQEQYbwvyf8UefM1myeOGVrnAE,1869
-ai_critic/evaluators/scoring.py,sha256=GBkmDa5Q6RZY4hJfzrCbxbBopsOsRjsNtzyoQHqgWHA,1046
-ai_critic/evaluators/summary.py,sha256=O9ZCrph93VV6pFcMIx2a7DizPIccRUqbGcUZ6oDmOLs,3791
+ai_critic/evaluators/scoring.py,sha256=9rgkCXKKm9G1Lfwn5i9HcsJTN5OUjxMycOUzhWkp_2g,1576
+ai_critic/evaluators/summary.py,sha256=H9rU9tXAXqyQ34L6bOOOHrdIapSq71gcjjc8jfyJMq4,5003
 ai_critic/evaluators/validation.py,sha256=rnzRwD78Cugey33gl9geE8JoBURsKEEnqrIOhBZv0LY,904
 ai_critic/sessions/__init__.py,sha256=Yp7mphSPJwt8a4cJgcQNErqwqHVuP_xAJODrs0y0Abw,72
 ai_critic/sessions/store.py,sha256=65m9WXFVFWv4pPzvXV4l8zLHoHWMfCGe6eHh4X-8agY,947
-ai_critic-1.0.0.dist-info/METADATA,sha256=_3VxXuMYnt2LoCrUw8AhTb2UMm934lcxgWV2Bw0l3eg,7426
-ai_critic-1.0.0.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
-ai_critic-1.0.0.dist-info/top_level.txt,sha256=TRyZkm1vyLLcFDg_80yeg5cHvPis_oW1Ti170417jkw,10
-ai_critic-1.0.0.dist-info/RECORD,,
+ai_critic-1.2.0.dist-info/METADATA,sha256=s0XYw_E7ZoVBhF74lyhQsFk_bcyJWY3eo8Yk5E97tZ4,8115
+ai_critic-1.2.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+ai_critic-1.2.0.dist-info/top_level.txt,sha256=TRyZkm1vyLLcFDg_80yeg5cHvPis_oW1Ti170417jkw,10
+ai_critic-1.2.0.dist-info/RECORD,,

{ai_critic-1.0.0.dist-info → ai_critic-1.2.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.10.1)
+Generator: setuptools (80.10.2)
 Root-Is-Purelib: true
 Tag: py3-none-any

{ai_critic-1.0.0.dist-info → ai_critic-1.2.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

ai-critic 1.0.0__py3-none-any.whl → 1.2.0__py3-none-any.whl

ai-critic 1.0.0py3-none-any.whl → 1.2.0py3-none-any.whl