PyPI - gpclarity - Versions diffs - 0.0.2__py3-none-any.whl - Mend

gpclarity 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

gpclarity/__init__.py +190 -0
gpclarity/_version.py +3 -0
gpclarity/data_influence.py +501 -0
gpclarity/exceptions.py +46 -0
gpclarity/hyperparam_tracker.py +718 -0
gpclarity/kernel_summary.py +285 -0
gpclarity/model_complexity.py +619 -0
gpclarity/plotting.py +337 -0
gpclarity/uncertainty_analysis.py +647 -0
gpclarity/utils.py +411 -0
gpclarity-0.0.2.dist-info/METADATA +248 -0
gpclarity-0.0.2.dist-info/RECORD +14 -0
gpclarity-0.0.2.dist-info/WHEEL +4 -0
gpclarity-0.0.2.dist-info/licenses/LICENSE +37 -0

gpclarity/utils.py ADDED Viewed

@@ -0,0 +1,411 @@
+"""
+Model complexity quantification for Gaussian Processes.
+"""
+import logging
+from typing import Any, Dict
+import GPy
+import numpy as np
+logger = logging.getLogger(__name__)
+class ComplexityError(Exception):
+    """Raised when complexity computation fails."""
+    pass
+class LinAlgError(Exception):
+    """Linear algebra computation error."""
+    pass
+def count_kernel_components(kern: GPy.kern.Kern) -> int:
+    """
+    Recursively count total kernel components in composite kernels.
+    Args:
+        kern: GPy kernel object
+    Returns:
+        Total number of kernel components
+    Raises:
+        ComplexityError: If kernel traversal fails unexpectedly
+    """
+    try:
+        if not hasattr(kern, "parts"):
+            return 1
+        if not kern.parts:
+            return 1
+        # Validate parts is iterable
+        if not hasattr(kern.parts, "__iter__"):
+            raise ComplexityError(f"Kernel 'parts' is not iterable: {type(kern.parts)}")
+        return sum(count_kernel_components(k) for k in kern.parts)
+    except ComplexityError:
+        raise
+    except RecursionError as e:
+        logger.error(f"Recursion limit hit in kernel counting (circular reference?): {e}")
+        raise ComplexityError("Kernel structure too deep or circular") from e
+    except Exception as e:
+        logger.error(f"Unexpected error counting kernel components: {e}")
+        raise ComplexityError(f"Failed to count kernel components: {e}") from e
+def compute_roughness_score(kern: GPy.kern.Kern) -> float:
+    """
+    Compute overall function roughness as inverse lengthscale.
+    Args:
+        kern: GPy kernel object
+    Returns:
+        Roughness score (higher = more wiggly)
+    Raises:
+        ComplexityError: If roughness computation fails
+    """
+    roughness = 0.0
+    count = 0
+    def traverse(k):
+        nonlocal roughness, count
+        try:
+            if not hasattr(k, "parts"):
+                # Leaf kernel
+                if hasattr(k, "lengthscale"):
+                    ls = k.lengthscale
+                    ls_mean = np.mean(ls) if hasattr(ls, "__iter__") else ls
+                    if not np.isfinite(ls_mean):
+                        logger.warning(f"Non-finite lengthscale encountered: {ls_mean}")
+                        return
+                    roughness += 1.0 / (ls_mean + 1e-10)
+                    count += 1
+                return
+            if k.parts:
+                for i, part in enumerate(k.parts):
+                    try:
+                        traverse(part)
+                    except Exception as e:
+                        logger.warning(f"Failed to traverse kernel part {i}: {e}")
+        except Exception as e:
+            logger.warning(f"Error traversing kernel: {e}")
+    try:
+        traverse(kern)
+    except Exception as e:
+        logger.error(f"Roughness score computation failed: {e}")
+        raise ComplexityError(f"Failed to compute roughness: {e}") from e
+    if count == 0:
+        logger.warning("No lengthscales found in kernel, returning zero roughness")
+        return 0.0
+    return roughness / count
+def compute_noise_ratio(model: GPy.models.GPRegression) -> float:
+    """
+    Compute signal-to-noise ratio (SNR) for the model.
+    Args:
+        model: Trained GPy model
+    Returns:
+        SNR = signal_variance / noise_variance (returns 1.0 if indeterminate)
+    Raises:
+        ComplexityError: If SNR computation fails unexpectedly
+    """
+    try:
+        if not hasattr(model, "kern"):
+            raise ComplexityError("Model has no kernel")
+        if not hasattr(model.kern, "variance"):
+            # Some kernels don't have variance (e.g., combination kernels)
+            logger.debug("Kernel has no variance attribute, assuming SNR=1.0")
+            return 1.0
+        signal_var = float(model.kern.variance)
+        if not hasattr(model, "Gaussian_noise"):
+            raise ComplexityError("Model has no Gaussian_noise attribute")
+        if not hasattr(model.Gaussian_noise, "variance"):
+            raise ComplexityError("Gaussian_noise has no variance attribute")
+        noise_var = float(model.Gaussian_noise.variance)
+        # Handle edge cases
+        if not np.isfinite(signal_var) or not np.isfinite(noise_var):
+            logger.warning(f"Non-finite variance values: signal={signal_var}, noise={noise_var}")
+            return 1.0
+        if noise_var < 0:
+            logger.warning(f"Negative noise variance: {noise_var}")
+            return 1.0
+        return float(signal_var / (noise_var + 1e-10))
+    except (AttributeError, TypeError, ValueError) as e:
+        # Expected failures for non-standard model structures
+        logger.debug(f"Could not compute noise ratio (expected for some models): {e}")
+        return 1.0
+    except ComplexityError:
+        raise
+    except Exception as e:
+        logger.error(f"Unexpected error computing noise ratio: {e}")
+        raise ComplexityError(f"Failed to compute noise ratio: {e}") from e
+def compute_complexity_score(
+    model: GPy.models.GPRegression, X: np.ndarray
+) -> Dict[str, Any]:
+    """
+    Comprehensive model complexity quantification.
+    Combines multiple metrics: kernel components, roughness, noise ratio,
+    and effective degrees of freedom.
+    Args:
+        model: Trained GPy model
+        X: Training data for degrees of freedom calculation
+    Returns:
+        Dictionary with complexity score and detailed breakdown
+    Raises:
+        ComplexityError: If computation fails
+        ValueError: If inputs are invalid
+    """
+    if X is None or not hasattr(X, "shape"):
+        raise ValueError("X must be a numpy array")
+    if X.shape[0] == 0:
+        raise ValueError("X cannot be empty")
+    try:
+        n_components = count_kernel_components(model.kern)
+        roughness = compute_roughness_score(model.kern)
+        noise_ratio = compute_noise_ratio(model)
+        # Effective degrees of freedom (approximation)
+        effective_dof = X.shape[0] * 0.5  # default fallback
+        try:
+            K = model.kern.K(X, X)
+            noise_var = float(model.Gaussian_noise.variance)
+            if not np.isfinite(noise_var):
+                logger.warning(f"Non-finite noise variance: {noise_var}")
+            else:
+                trace_K = np.trace(K)
+                if np.isfinite(trace_K) and trace_K >= 0:
+                    trace_ratio = trace_K / (trace_K + noise_var * X.shape[0] + 1e-10)
+                    effective_dof = trace_ratio * X.shape[0]
+        except (AttributeError, ValueError, np.linalg.LinAlgError) as e:
+            logger.debug(f"Could not compute effective DOF: {e}")
+        except Exception as e:
+            logger.warning(f"Unexpected error in effective DOF computation: {e}")
+        # Composite complexity score (0 = simple, ∞ = complex)
+        dof_ratio = effective_dof / X.shape[0]
+        complexity_score = (
+            n_components * roughness * noise_ratio / (dof_ratio + 1e-10)
+        )
+        # Interpretation thresholds (adaptive)
+        complexity_score_log = np.log10(complexity_score + 1)
+        if complexity_score_log < 0.5:
+            interpretation = "Simple model (low risk of overfitting)"
+            suggestions = [
+                "Model is likely underfitting",
+                "Consider more expressive kernel",
+            ]
+        elif complexity_score_log < 1.5:
+            interpretation = "Moderate complexity (well-balanced)"
+            suggestions = ["Good complexity for most applications"]
+        else:
+            interpretation = "High complexity (monitor for overfitting)"
+            suggestions = [
+                "Consider simplifying kernel",
+                "Add regularization",
+                "Collect more data",
+            ]
+        return {
+            "score": float(complexity_score),
+            "log_score": float(complexity_score_log),
+            "interpretation": interpretation,
+            "suggestions": suggestions,
+            "components": {
+                "n_kernel_parts": n_components,
+                "roughness_score": float(roughness),
+                "noise_ratio": float(noise_ratio),
+                "effective_degrees_of_freedom": float(effective_dof),
+            },
+            "risk_factors": {
+                "too_complex": complexity_score_log > 1.5,
+                "too_simple": complexity_score_log < 0.5,
+                "high_noise": noise_ratio < 0.1,
+            },
+        }
+    except ComplexityError:
+        raise
+    except Exception as e:
+        logger.error(f"Unexpected error in complexity score computation: {e}")
+        raise ComplexityError(f"Failed to compute complexity score: {e}") from e
+def _validate_kernel_matrix(K: np.ndarray) -> None:
+    """
+    Validate kernel matrix for numerical issues.
+    Args:
+        K: Kernel matrix to validate
+    Raises:
+        LinAlgError: If matrix is invalid
+    """
+    if not np.all(np.isfinite(K)):
+        n_nonfinite = np.sum(~np.isfinite(K))
+        raise LinAlgError(
+            f"Kernel matrix contains {n_nonfinite} non-finite values"
+        )
+    if K.shape[0] != K.shape[1]:
+        raise LinAlgError(f"Kernel matrix must be square, got {K.shape}")
+    # Check symmetry
+    if not np.allclose(K, K.T, rtol=1e-5, atol=1e-8):
+        max_asym = np.max(np.abs(K - K.T))
+        logger.warning(f"Kernel matrix asymmetric (max diff: {max_asym:.2e})")
+def _cholesky_with_jitter(
+    K: np.ndarray,
+    max_attempts: int = 5,
+    initial_jitter: float = 1e-6,
+    jitter_growth: float = 10.0,
+) -> np.ndarray:
+    """
+    Compute Cholesky decomposition with progressive jitter.
+    Args:
+        K: Positive semi-definite matrix
+        max_attempts: Maximum jitter attempts
+        initial_jitter: Starting jitter magnitude
+        jitter_growth: Multiplicative factor for jitter increase
+    Returns:
+        Lower triangular Cholesky factor
+    Raises:
+        LinAlgError: If decomposition fails after all attempts
+    """
+    try:
+        return np.linalg.cholesky(K)
+    except np.linalg.LinAlgError:
+        pass
+    K_work = K.copy()
+    jitter = initial_jitter
+    for attempt in range(max_attempts):
+        K_work = K_work + np.eye(K.shape[0]) * jitter
+        try:
+            L = np.linalg.cholesky(K_work)
+            logger.debug(f"Cholesky succeeded with jitter {jitter:.2e}")
+            return L
+        except np.linalg.LinAlgError:
+            jitter *= jitter_growth
+    raise LinAlgError(
+        f"Cholesky decomposition failed after {max_attempts} attempts "
+        f"with max jitter {jitter/jitter_growth:.2e}"
+    )
+def _extract_param_value(param: Any) -> Union[float, np.ndarray]:
+    """
+    Safely extract scalar or array value from GPy parameter.
+    Args:
+        param: GPy parameter object
+    Returns:
+        Scalar float or numpy array
+    """
+    val = param.param_array
+    if val is None:
+        return 0.0
+    arr = np.atleast_1d(val)
+    if len(arr) == 1:
+        return float(arr[0])
+    else:
+        return arr.copy()
+def _validate_convergence_window(window: int, history_length: int) -> None:
+    """
+    Validate window size for convergence analysis.
+    Args:
+        window: Requested window size
+        history_length: Available history length
+    Raises:
+        ValueError: If window invalid
+    """
+    if window <= 0:
+        raise ValueError(f"Window must be positive, got {window}")
+    if window > history_length // 2:
+        raise ValueError(
+            f"Window ({window}) too large for history length ({history_length}). "
+            f"Max allowed: {history_length // 2}"
+        )
+def _validate_array(arr: Any, name: str = "array") -> np.ndarray:
+    """
+    Validate and convert input to numpy array.
+    Args:
+        arr: Input array-like
+        name: Name for error messages
+    Returns:
+        Validated numpy array
+    Raises:
+        ValueError: If invalid
+    """
+    if arr is None:
+        raise ValueError(f"{name} cannot be None")
+    try:
+        arr = np.asarray(arr)
+    except Exception as e:
+        raise ValueError(f"{name} must be array-like: {e}") from e
+    if arr.size == 0:
+        raise ValueError(f"{name} cannot be empty")
+    if not np.all(np.isfinite(arr)):
+        n_invalid = np.sum(~np.isfinite(arr))
+        raise ValueError(f"{name} contains {n_invalid} non-finite values")
+    return arr

gpclarity-0.0.2.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,248 @@
+Metadata-Version: 2.4
+Name: gpclarity
+Version: 0.0.2
+Summary: Interpretability and Diagnostics Tools for Gaussian Processes
+Project-URL: Homepage, https://github.com/AngadKumar16/gpclarity
+Project-URL: Issues, https://github.com/AngadKumar16/gpclarity/issues
+Project-URL: Documentation, https://gpclarity.readthedocs.io
+Author-email: Angad Kumar <angadkumar16ak@gmail.com>
+License: BSD 3-Clause License
+        Copyright (c) 2026, Angad Kumar
+        All rights reserved.
+        Redistribution and use in source and binary forms, with or without
+        modification, are permitted provided that the following conditions are met:
+        1. Redistributions of source code must retain the above copyright notice, this
+           list of conditions and the following disclaimer.
+        2. Redistributions in binary form must reproduce the above copyright notice,
+           this list of conditions and the following disclaimer in the documentation
+           and/or other materials provided with the distribution.
+        3. Neither the name of the copyright holder nor the names of its
+           contributors may be used to endorse or promote products derived from
+           this software without specific prior written permission.
+        THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+        AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+        IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+        DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+        FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+        DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+        SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+        CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+        OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+        OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+        Academic Citation Request (Non-Binding)
+        If you use GPClarity in academic research, publications, or derived
+        scientific work, we kindly request that you cite the software. Citation
+        helps support continued development and enables recognition of open
+        scientific infrastructure. See the CITATION.cff file for details.
+License-File: LICENSE
+Keywords: gaussian-process,interpretability,machine-learning,uncertainty
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Requires-Python: >=3.9
+Requires-Dist: emukit>=0.4.0
+Requires-Dist: gpy>=1.10.0
+Requires-Dist: matplotlib>=3.4.0
+Requires-Dist: numpy>=1.20.0
+Requires-Dist: scipy>=1.7.0
+Provides-Extra: dev
+Requires-Dist: black>=23.0; extra == 'dev'
+Requires-Dist: isort>=5.12; extra == 'dev'
+Requires-Dist: mypy>=1.0; extra == 'dev'
+Requires-Dist: pre-commit>=3.0; extra == 'dev'
+Requires-Dist: pytest-cov>=4.0; extra == 'dev'
+Requires-Dist: pytest>=7.0; extra == 'dev'
+Provides-Extra: docs
+Requires-Dist: myst-parser>=1.0; extra == 'docs'
+Requires-Dist: nbsphinx>=0.9; extra == 'docs'
+Requires-Dist: sphinx-rtd-theme>=1.2; extra == 'docs'
+Requires-Dist: sphinx>=5.0; extra == 'docs'
+Description-Content-Type: text/markdown
+# GPClarity: Gaussian Process Interpretability Toolkit
+![Python Version](https://img.shields.io/python/v/gpclarity)
+![License](https://img.shields.io/badge/license-MIT-blue.svg)
+![Build Status](https://github.com/AngadKumar16/gpclarity/workflows/CI/badge.svg)
+**GPClarity** is a production-ready library that transforms black-box Gaussian Process models into interpretable, debuggable, and trustworthy tools. Built on GPy and emukit, it provides human-readable insights into kernel behavior, uncertainty patterns, and model complexity.
+---
+## 🎯 Features
+- 🔍 **Kernel Interpretation**: Translate raw kernel math into human meaning
+- 📊 **Uncertainty Profiling**: Visualize and diagnose uncertainty behavior
+- 📈 **Hyperparameter Tracking**: Monitor optimization dynamics in real-time
+- 🧮 **Complexity Quantification**: Measure and prevent overfitting
+- 🎯 **Data Influence Analysis**: Identify impactful training points
+- 🔗 **Emukit Integration**: Seamless Bayesian optimization support
+---
+## 🚀 Quick Start
+```python
+import gpclarity
+import GPy
+import numpy as np
+# Train a Gaussian Process
+X = np.linspace(0, 10, 50).reshape(-1, 1)
+y = np.sin(X).flatten() + 0.1 * np.random.randn(50)
+kernel = GPy.kern.RBF(1) + GPy.kern.White(1)
+model = GPy.models.GPRegression(X, y[:, None], kernel)
+model.optimize()
+summary = gpclarity.summarize_kernel(model)
+profiler = gpclarity.UncertaintyProfiler(model)
+X_test = np.linspace(-2, 12, 200).reshape(-1, 1)
+profiler.plot(X_test, X_train=X, y_train=y)
+tracker = gpclarity.HyperparameterTracker(model)
+history = tracker.wrapped_optimize(max_iters=50)
+tracker.plot_evolution()
+complexity = gpclarity.compute_complexity_score(model, X)
+print(f"Complexity: {complexity['score']:.2f} - {complexity['interpretation']}")
+```
+---
+## 📦 Installation
+### Stable Release
+```bash
+pip install gpclarity
+```
+### Development Version
+```bash
+git clone https://github.com/AngadKumar16/gpclarity.git
+cd gpclarity
+pip install -e ".[dev]"
+```
+### Conda (coming soon)
+```bash
+conda install -c conda-forge gpclarity
+```
+---
+## 🏗️ Architecture
+```
+gpclarity/
+├── kernel_summary
+├── uncertainty_analysis
+├── hyperparam_tracker
+├── model_complexity
+├── data_influence
+└── utils
+```
+---
+## 🔬 Advanced Usage
+### Emukit Integration
+```python
+from gpclarity import ClarityBayesianOptimizationLoop
+loop = ClarityBayesianOptimizationLoop(model, space)
+loop.run_loop(user_function, stopping_condition)
+loop.plot_diagnostics()
+```
+### Batch Processing
+```python
+models = [model1, model2, model3]
+reports = [gpclarity.summarize_kernel(m, verbose=False) for m in models]
+```
+---
+## 📊 Example Outputs
+### Kernel Summary
+```
+🔍 KERNEL SUMMARY
+Structure: ['RBF', 'White']
+Components: 2
+📦 RBF (lengthscale)
+  └─ lengthscale: 1.23
+  💡 Moderate flexibility
+📦 White (variance)
+  └─ variance: 0.01
+  💡 Low observation noise
+```
+### Complexity Report
+```json
+{
+  "score": 2.34,
+  "interpretation": "Moderate complexity (well-balanced)",
+  "components": {
+    "n_kernel_parts": 2,
+    "roughness_score": 0.81,
+    "noise_ratio": 4.5
+  }
+}
+```
+---
+## 🎓 Citation
+```bibtex
+@software{gpclarity2026,
+  title={gpclarity: Gaussian Process Interpretability Toolkit},
+  author={Angad Kumar},
+  year={2026},
+  url={https://github.com/AngadKumar16/gpclarity},
+  version={0.1.0}
+}
+```
+## 📝 License
+GPClarity is licensed under the **MIT License**. See [LICENSE](LICENSE) for details.
+## 🤝 Contributing
+Contributions are welcome!
+- Report bugs or request features via [GitHub Issues](https://github.com/AngadKumar16/gpclarity/issues)
+- Submit pull requests for fixes or enhancements
+- Make sure to follow the code style and write tests for new features
+**Author:** Angad Kumar ([GitHub](https://github.com/AngadKumar16), [Email](mailto:angadkumar16ak@gmail.com))
+## 🛣️ Roadmap
+- Conda package support
+- More visualization tools for kernel decomposition
+- Automated tutorials / example notebooks
+- More features overall

gpclarity-0.0.2.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,14 @@
+gpclarity/__init__.py,sha256=3ZmuI3pMR76IAac5VfeYjsj3XZoiiGl5IoyFsb7DWyQ,5965
+gpclarity/_version.py,sha256=TqmJTwRFFMcQ_an21EbHMvk9y7bfvBnH7YUaMsYzvR8,49
+gpclarity/data_influence.py,sha256=Cm45O8twbzQG2JP5LqMzKG_Sulz4v7yuO4Dsy4jEDOA,17552
+gpclarity/exceptions.py,sha256=AGGTsVOQr4VQ84P7qjqsKTEAKNBOyVRRtS8eUHmcesw,950
+gpclarity/hyperparam_tracker.py,sha256=vjx4PZLKjaQBUx79G5ym6s6DKqPxHg4YZCPynJ6Y6K8,24873
+gpclarity/kernel_summary.py,sha256=a8sXs_qEmG2K3m0oYU2fUV9CjTz6jre_P5gk-Wn5s7U,9272
+gpclarity/model_complexity.py,sha256=SRATXpdKX_uIwqk1_RZvS4jvHi7mmBpLpBz0MbkhgQM,22324
+gpclarity/plotting.py,sha256=ZMIvGLqXykD4WmXYoDEeuStY5YEaaMQ9EMun4UAB0Ro,10356
+gpclarity/uncertainty_analysis.py,sha256=XOQv98khDWSMDZb76uVJ6V3e4nsjUlLgyeUNFY2Q-ZI,22294
+gpclarity/utils.py,sha256=rbg0vpUt29_eYOwcXVJ3wvTArd73xfFvll67GSB9NJ8,12834
+gpclarity-0.0.2.dist-info/METADATA,sha256=M0hbeP9xpt9m1YlgHr4dAj8ji-OKFc3tQ8OSDff8MLs,7825
+gpclarity-0.0.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+gpclarity-0.0.2.dist-info/licenses/LICENSE,sha256=Hcj35trYFMm1phdiTIEHqP-5kxrQGZPD-Uis2NkC1N8,1836
+gpclarity-0.0.2.dist-info/RECORD,,

gpclarity-0.0.2.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.28.0
+Root-Is-Purelib: true
+Tag: py3-none-any