pearsonify 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pearsonify/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ __version__ = "0.1.0"
2
+
3
+ from .wrapper import Pearsonify
4
+
5
+ __all__ = ["__version__", "Pearsonify"]
pearsonify/utils.py ADDED
@@ -0,0 +1,22 @@
1
+ """Utilities for Pearsonify: Pearson residuals, confidence intervals, coverage."""
2
+
3
+ import numpy as np
4
+
5
+
6
+ def compute_pearson_residuals(y_true, y_pred_proba):
7
+ """Compute Pearson residuals for binary classification."""
8
+ y_pred_proba = np.clip(y_pred_proba, 1e-10, 1 - 1e-10)
9
+ return (y_true - y_pred_proba) / np.sqrt(y_pred_proba * (1 - y_pred_proba))
10
+
11
+
12
+ def compute_confidence_intervals(y_pred_proba, q_alpha):
13
+ """Compute confidence intervals based on Pearson residuals."""
14
+ std_error = np.sqrt(y_pred_proba * (1 - y_pred_proba))
15
+ lower_bounds = np.maximum(0, y_pred_proba - q_alpha * std_error)
16
+ upper_bounds = np.minimum(1, y_pred_proba + q_alpha * std_error)
17
+ return lower_bounds, upper_bounds
18
+
19
+
20
+ def calculate_coverage(y_true, lower_bounds, upper_bounds):
21
+ """Calculate the empirical coverage of confidence intervals."""
22
+ return np.mean((y_true >= lower_bounds) & (y_true <= upper_bounds))
pearsonify/wrapper.py ADDED
@@ -0,0 +1,93 @@
1
+ import matplotlib.pyplot as plt
2
+ import numpy as np
3
+ from sklearn.base import BaseEstimator
4
+ from sklearn.utils.validation import NotFittedError, check_is_fitted
5
+
6
+ from .utils import (
7
+ calculate_coverage,
8
+ compute_confidence_intervals,
9
+ compute_pearson_residuals,
10
+ )
11
+
12
+
13
+ class Pearsonify:
14
+ def __init__(self, estimator: BaseEstimator, alpha=0.05):
15
+ """
16
+ Initialize with a model that implements `fit` and `predict_proba`.
17
+
18
+ Parameters:
19
+ - estimator: A scikit-learn-like classifier with `fit` and `predict_proba` methods.
20
+ - alpha: Significance level (e.g., 0.05 for 95% intervals).
21
+ """
22
+ self.estimator = estimator
23
+ self.alpha = alpha
24
+ self.q_alpha = None
25
+
26
+ def fit(self, X_train, y_train, X_cal, y_cal):
27
+ """Fit the model and compute Pearson residual-based quantile from calibration data."""
28
+ # Train the model if it's not already fitted
29
+ try:
30
+ check_is_fitted(self.estimator)
31
+ if not callable(getattr(self.estimator, "predict_proba", None)):
32
+ raise TypeError(
33
+ "The estimator must have a callable 'predict_proba' method."
34
+ )
35
+ except TypeError as e:
36
+ raise TypeError(f"Estimator validation failed: {e}") from e
37
+ except NotFittedError:
38
+ # Attempt to fit the estimator if not already fitted
39
+ self.estimator.fit(X_train, y_train)
40
+
41
+ # Compute residuals on calibration set
42
+ y_cal_pred_proba = self.estimator.predict_proba(X_cal)[:, 1]
43
+ residuals = compute_pearson_residuals(y_cal, y_cal_pred_proba)
44
+ self.q_alpha = np.quantile(np.abs(residuals), 1 - self.alpha)
45
+ return self.q_alpha
46
+
47
+ def predict_intervals(self, X_test):
48
+ """Generate prediction intervals for new data."""
49
+ if self.q_alpha is None:
50
+ raise ValueError(
51
+ "The model needs to be fitted before predicting intervals."
52
+ )
53
+
54
+ # Generate predicted probabilities for the test set
55
+ y_test_pred_proba = self.estimator.predict_proba(X_test)[:, 1]
56
+ lower_bounds, upper_bounds = compute_confidence_intervals(
57
+ y_test_pred_proba, self.q_alpha
58
+ )
59
+ return y_test_pred_proba, lower_bounds, upper_bounds
60
+
61
+ def evaluate_coverage(self, y_test, lower_bounds, upper_bounds):
62
+ """Evaluate the empirical coverage."""
63
+ return calculate_coverage(y_test, lower_bounds, upper_bounds)
64
+
65
+ def plot_intervals(
66
+ self, y_test_pred_proba, lower_bounds, upper_bounds, y_test=None
67
+ ):
68
+ """Plot the predicted probabilities with their confidence intervals."""
69
+ if y_test is not None:
70
+ coverage = self.evaluate_coverage(y_test, lower_bounds, upper_bounds)
71
+ sorted_indices = np.argsort(y_test_pred_proba)
72
+ plt.figure(figsize=(10, 6))
73
+ plt.plot(
74
+ y_test_pred_proba[sorted_indices],
75
+ color="dodgerblue",
76
+ label="Predicted Probability"
77
+ + (f" (Coverage: {coverage:.0%})" if y_test is not None else ""),
78
+ )
79
+ plt.fill_between(
80
+ range(len(y_test_pred_proba)),
81
+ lower_bounds[sorted_indices],
82
+ upper_bounds[sorted_indices],
83
+ color="lightblue",
84
+ alpha=0.4,
85
+ )
86
+ plt.title(
87
+ f"Confidence Intervals with Pearsonify\n"
88
+ f"Confidence Level: {(1 - self.alpha):.0%}"
89
+ )
90
+ plt.xlabel("Sorted Test Sample Index")
91
+ plt.ylabel("Probability")
92
+ plt.legend()
93
+ plt.show()
@@ -0,0 +1,104 @@
1
+ Metadata-Version: 2.4
2
+ Name: pearsonify
3
+ Version: 0.1.0
4
+ Summary: A lightweight package for computing confidence intervals for classification tasks using conformal prediction and Pearson residuals.
5
+ Project-URL: Repository, https://github.com/xRiskLab/pearsonify
6
+ Project-URL: Homepage, https://github.com/xRiskLab/pearsonify
7
+ Author-email: xRiskLab <contact@xrisklab.ai>
8
+ License-Expression: MIT
9
+ License-File: LICENSE.md
10
+ Keywords: classification,confidence intervals,conformal prediction,machine learning,pearson residuals
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Requires-Python: >=3.9
22
+ Requires-Dist: matplotlib<4.0.0,>=3.7.0
23
+ Requires-Dist: numpy<2.0.0,>=1.24.0
24
+ Requires-Dist: scikit-learn<2.0.0,>=1.2.0
25
+ Description-Content-Type: text/markdown
26
+
27
+ # 💡 Pearsonify
28
+ ## Probabilistic Classification with Conformalized Intervals
29
+
30
+ **Pearsonify** is a lightweight 🐍 Python package for generating **classification intervals** around predicted probabilities in binary classification tasks.
31
+
32
+ It uses **Pearson residuals** and **principles of conformal prediction** to quantify uncertainty without making strong distributional assumptions.
33
+
34
+ ![Image](ims/Slide_1.jpeg)
35
+
36
+ ### 🚀 Why Pearsonify?
37
+
38
+ * 📊 **Intuitive Classification Intervals**: Get reliable intervals for binary classification predictions.
39
+ * 🧠 **Statistically Grounded**: Uses Pearson residuals, a well-established metric from classical statistics.
40
+ * ⚡ **Model-Agnostic**: Works with any model that provides probability estimates.
41
+ * 🛠️ **Lightweight**: Minimal dependencies, easy to integrate into existing projects.
42
+
43
+ ### 📦 How to install?
44
+
45
+ Use `pip` to install the package from GitHub:
46
+
47
+ ```bash
48
+ pip install pearsonify
49
+ # or from GitHub:
50
+ pip install git+https://github.com/xRiskLab/pearsonify.git
51
+ ```
52
+
53
+ ### 💻 How to use?
54
+
55
+ ```python
56
+ import numpy as np
57
+ from pearsonify import Pearsonify
58
+ from sklearn.svm import SVC
59
+ from sklearn.datasets import make_classification
60
+ from sklearn.model_selection import train_test_split
61
+
62
+ # Generate synthetic classification data
63
+ np.random.seed(42)
64
+ X, y = make_classification(
65
+ n_samples=1000, n_features=20, n_informative=10, n_classes=2, random_state=42
66
+ )
67
+
68
+ # Split data into train, calibration, and test sets
69
+ X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
70
+ X_cal, X_test, y_cal, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)
71
+
72
+ # Initialize Pearsonify with an SVC model
73
+ clf = SVC(probability=True, random_state=42)
74
+ model = Pearsonify(estimator=clf, alpha=0.05)
75
+
76
+ # Fit the model on training and calibration sets
77
+ model.fit(X_train, y_train, X_cal, y_cal)
78
+
79
+ # Generate prediction intervals for test set
80
+ y_test_pred_proba, lower_bounds, upper_bounds = model.predict_intervals(X_test)
81
+
82
+ # Calculate coverage
83
+ coverage = model.evaluate_coverage(y_test, lower_bounds, upper_bounds)
84
+ print(f"Coverage: {coverage:.2%}")
85
+
86
+ # Plot the intervals
87
+ model.plot_intervals(y_test_pred_proba, lower_bounds, upper_bounds)
88
+ ```
89
+
90
+ Running `example.py` will generate the following plot:
91
+
92
+ ![Image](ims/Figure_1.png)
93
+
94
+ This plot shows predicted probabilities with 95% confidence intervals, sorted by prediction score.
95
+
96
+ ### 📖 References
97
+
98
+ Hosmer, D. W., Lemeshow, S., & Sturdivant, R. X. (2013). Applied Logistic Regression. John Wiley & Sons.
99
+
100
+ Tibshirani, R. (2023). Conformal Prediction. Advanced Topics in Statistical Learning, Spring 2023.
101
+
102
+ ### 📝 License
103
+
104
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
@@ -0,0 +1,7 @@
1
+ pearsonify/__init__.py,sha256=Xh8BsTaKUxBvRKO0KsLmLgqc0uhf-wD-brpH3htXu6c,96
2
+ pearsonify/utils.py,sha256=2M5bFg19PpTx8nkVZwhckN8My1r4TJ6bijRoekF8Rgs,923
3
+ pearsonify/wrapper.py,sha256=XlhXCsK1CKeZTdRCge7xx0vryakWtiL-fzZUkPUpZN0,3571
4
+ pearsonify-0.1.0.dist-info/METADATA,sha256=neNsmEZTHd9XP7Y7iBjBv4nuYsiT3WkG_BEB9hkyEX0,3980
5
+ pearsonify-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
6
+ pearsonify-0.1.0.dist-info/licenses/LICENSE.md,sha256=u_UGp1lzWTU8z40NWFG1EkMBuw5Z4OdPcoFrxjupouk,1073
7
+ pearsonify-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.28.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,9 @@
1
+ MIT License
2
+
3
+ Copyright (c) [2026] [Denis Burakov]
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6
+
7
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8
+
9
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.