pauc 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pauc-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,41 @@
1
+ Metadata-Version: 2.4
2
+ Name: pauc
3
+ Version: 0.1.0
4
+ Summary: Compute ROC AUC and confidence intervals using DeLong’s method
5
+ Home-page: https://github.com/srijitseal/pauc
6
+ Author: Srijit Seal
7
+ Author-email: srijit@understanding.bio
8
+ License: MIT
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: OS Independent
12
+ Requires-Python: >=3.6
13
+ Description-Content-Type: text/markdown
14
+ Requires-Dist: numpy
15
+ Requires-Dist: scipy
16
+ Requires-Dist: pandas
17
+ Dynamic: author
18
+ Dynamic: author-email
19
+ Dynamic: classifier
20
+ Dynamic: description
21
+ Dynamic: description-content-type
22
+ Dynamic: home-page
23
+ Dynamic: license
24
+ Dynamic: requires-dist
25
+ Dynamic: requires-python
26
+ Dynamic: summary
27
+
28
+ # pAUC
29
+
30
+ Simple Python package to calculate ROC AUC with confidence intervals using DeLong’s method.
31
+
32
+ ## Installation
33
+
34
+ pip install pauc
35
+
36
+ ## Usage
37
+
38
+ from pauc import roc_auc_ci_score
39
+
40
+ auc, (lb, ub) = roc_auc_ci_score(y_true, y_pred)
41
+ print(f'AUC: {auc}, 95% CI: ({lb}, {ub})')
pauc-0.1.0/README.md ADDED
@@ -0,0 +1,14 @@
1
+ # pAUC
2
+
3
+ Simple Python package to calculate ROC AUC with confidence intervals using DeLong’s method.
4
+
5
+ ## Installation
6
+
7
+ pip install pauc
8
+
9
+ ## Usage
10
+
11
+ from pauc import roc_auc_ci_score
12
+
13
+ auc, (lb, ub) = roc_auc_ci_score(y_true, y_pred)
14
+ print(f'AUC: {auc}, 95% CI: ({lb}, {ub})')
@@ -0,0 +1,2 @@
1
+ # __init__.py
2
+ from .roc_auc_ci import roc_auc_ci_score
@@ -0,0 +1,186 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ import scipy.stats as stats
4
+
5
+ # from https://github.com/PatWalters/comparing_classifiers/blob/master/delong_ci.py
6
+ # from https://github.com/yandexdataschool/roc_comparison/blob/master/compare_auc_delong_xu.py
7
+
8
+ # AUC comparison adapted from
9
+ # https://github.com/Netflix/vmaf/
10
+ def compute_midrank(x):
11
+ """Computes midranks.
12
+ Args:
13
+ x - a 1D numpy array
14
+ Returns:
15
+ array of midranks
16
+ """
17
+ J = np.argsort(x)
18
+ Z = x[J]
19
+ N = len(x)
20
+ T = np.zeros(N, dtype=float)
21
+ i = 0
22
+ while i < N:
23
+ j = i
24
+ while j < N and Z[j] == Z[i]:
25
+ j += 1
26
+ T[i:j] = 0.5*(i + j - 1)
27
+ i = j
28
+ T2 = np.empty(N, dtype=float)
29
+ # Note(kazeevn) +1 is due to Python using 0-based indexing
30
+ # instead of 1-based in the AUC formula in the paper
31
+ T2[J] = T + 1
32
+ return T2
33
+
34
+
35
+ def compute_midrank_weight(x, sample_weight):
36
+ """Computes midranks.
37
+ Args:
38
+ x - a 1D numpy array
39
+ Returns:
40
+ array of midranks
41
+ """
42
+ J = np.argsort(x)
43
+ Z = x[J]
44
+ cumulative_weight = np.cumsum(sample_weight[J])
45
+ N = len(x)
46
+ T = np.zeros(N, dtype=float)
47
+ i = 0
48
+ while i < N:
49
+ j = i
50
+ while j < N and Z[j] == Z[i]:
51
+ j += 1
52
+ T[i:j] = cumulative_weight[i:j].mean()
53
+ i = j
54
+ T2 = np.empty(N, dtype=float)
55
+ T2[J] = T
56
+ return T2
57
+
58
+
59
+ def fastDeLong(predictions_sorted_transposed, label_1_count):
60
+ """
61
+ The fast version of DeLong's method for computing the covariance of
62
+ unadjusted AUC.
63
+ Args:
64
+ predictions_sorted_transposed: a 2D numpy.array[n_classifiers, n_examples]
65
+ sorted such as the examples with label "1" are first
66
+ Returns:
67
+ (AUC value, DeLong covariance)
68
+ Reference:
69
+ @article{sun2014fast,
70
+ title={Fast Implementation of DeLong's Algorithm for
71
+ Comparing the Areas Under Correlated Receiver Oerating Characteristic Curves},
72
+ author={Xu Sun and Weichao Xu},
73
+ journal={IEEE Signal Processing Letters},
74
+ volume={21},
75
+ number={11},
76
+ pages={1389--1393},
77
+ year={2014},
78
+ publisher={IEEE}
79
+ }
80
+ """
81
+ # Short variables are named as they are in the paper
82
+ m = label_1_count
83
+ n = predictions_sorted_transposed.shape[1] - m
84
+ positive_examples = predictions_sorted_transposed[:, :m]
85
+ negative_examples = predictions_sorted_transposed[:, m:]
86
+ k = predictions_sorted_transposed.shape[0]
87
+
88
+ tx = np.empty([k, m], dtype=float)
89
+ ty = np.empty([k, n], dtype=float)
90
+ tz = np.empty([k, m + n], dtype=float)
91
+ for r in range(k):
92
+ tx[r, :] = compute_midrank(positive_examples[r, :])
93
+ ty[r, :] = compute_midrank(negative_examples[r, :])
94
+ tz[r, :] = compute_midrank(predictions_sorted_transposed[r, :])
95
+ aucs = tz[:, :m].sum(axis=1) / m / n - float(m + 1.0) / 2.0 / n
96
+ v01 = (tz[:, :m] - tx[:, :]) / n
97
+ v10 = 1.0 - (tz[:, m:] - ty[:, :]) / m
98
+ sx = np.cov(v01)
99
+ sy = np.cov(v10)
100
+ delongcov = sx / m + sy / n
101
+ return aucs, delongcov
102
+
103
+
104
+ def calc_pvalue(aucs, sigma):
105
+ """Computes log(10) of p-values.
106
+ Args:
107
+ aucs: 1D array of AUCs
108
+ sigma: AUC DeLong covariances
109
+ Returns:
110
+ log10(pvalue)
111
+ """
112
+ l = np.array([[1, -1]])
113
+ z = np.abs(np.diff(aucs)) / np.sqrt(np.dot(np.dot(l, sigma), l.T))
114
+ return np.log10(2) + scipy.stats.norm.logsf(z, loc=0, scale=1) / np.log(10)
115
+
116
+
117
+ def compute_ground_truth_statistics(ground_truth, sample_weight=None):
118
+ assert np.array_equal(np.unique(ground_truth), [0, 1])
119
+ order = (-ground_truth).argsort()
120
+ label_1_count = int(ground_truth.sum())
121
+ if sample_weight is None:
122
+ ordered_sample_weight = None
123
+ else:
124
+ ordered_sample_weight = sample_weight[order]
125
+
126
+ return order, label_1_count, ordered_sample_weight
127
+
128
+
129
+ def delong_roc_variance(ground_truth, predictions):
130
+ """
131
+ Computes ROC AUC variance for a single set of predictions
132
+ Args:
133
+ ground_truth: np.array of 0 and 1
134
+ predictions: np.array of floats of the probability of being class 1
135
+ """
136
+ sample_weight = None
137
+ order, label_1_count, ordered_sample_weight = compute_ground_truth_statistics(
138
+ ground_truth, sample_weight)
139
+ predictions_sorted_transposed = predictions[np.newaxis, order]
140
+ aucs, delongcov = fastDeLong(predictions_sorted_transposed, label_1_count)
141
+ assert len(aucs) == 1, "There is a bug in the code, please forward this to the developers"
142
+ return aucs[0], delongcov
143
+
144
+
145
+ def delong_roc_test(ground_truth, predictions_one, predictions_two):
146
+ """
147
+ Computes log(p-value) for hypothesis that two ROC AUCs are different
148
+ Args:
149
+ ground_truth: np.array of 0 and 1
150
+ predictions_one: predictions of the first model,
151
+ np.array of floats of the probability of being class 1
152
+ predictions_two: predictions of the second model,
153
+ np.array of floats of the probability of being class 1
154
+ """
155
+ sample_weight = None
156
+ order, label_1_count = compute_ground_truth_statistics(ground_truth)
157
+ predictions_sorted_transposed = np.vstack((predictions_one, predictions_two))[:, order]
158
+ aucs, delongcov = fastDeLong(predictions_sorted_transposed, label_1_count)
159
+ return calc_pvalue(aucs, delongcov)
160
+
161
+
162
+ def roc_auc_ci_score(y_true, y_pred, alpha=0.95):
163
+ auc, auc_cov = delong_roc_variance(y_true, y_pred)
164
+ auc_std = np.sqrt(auc_cov)
165
+
166
+ # Handle edge cases when auc_std is zero or very small
167
+ if auc_std < 1e-10:
168
+ if auc == 1.0:
169
+ ci = np.array([1.0, 1.0])
170
+ elif auc == 0.0:
171
+ ci = np.array([0.0, 0.0])
172
+ else:
173
+ # If std dev is extremely low but AUC is not exactly 0 or 1
174
+ ci = np.array([auc, auc])
175
+ else:
176
+ lower_upper_q = np.abs(np.array([0, 1]) - (1 - alpha) / 2)
177
+ ci = stats.norm.ppf(
178
+ lower_upper_q,
179
+ loc=auc,
180
+ scale=auc_std)
181
+
182
+ # Ensure confidence intervals within [0,1]
183
+ ci[ci > 1] = 1
184
+ ci[ci < 0] = 0
185
+
186
+ return auc, ci
@@ -0,0 +1,41 @@
1
+ Metadata-Version: 2.4
2
+ Name: pauc
3
+ Version: 0.1.0
4
+ Summary: Compute ROC AUC and confidence intervals using DeLong’s method
5
+ Home-page: https://github.com/srijitseal/pauc
6
+ Author: Srijit Seal
7
+ Author-email: srijit@understanding.bio
8
+ License: MIT
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: OS Independent
12
+ Requires-Python: >=3.6
13
+ Description-Content-Type: text/markdown
14
+ Requires-Dist: numpy
15
+ Requires-Dist: scipy
16
+ Requires-Dist: pandas
17
+ Dynamic: author
18
+ Dynamic: author-email
19
+ Dynamic: classifier
20
+ Dynamic: description
21
+ Dynamic: description-content-type
22
+ Dynamic: home-page
23
+ Dynamic: license
24
+ Dynamic: requires-dist
25
+ Dynamic: requires-python
26
+ Dynamic: summary
27
+
28
+ # pAUC
29
+
30
+ Simple Python package to calculate ROC AUC with confidence intervals using DeLong’s method.
31
+
32
+ ## Installation
33
+
34
+ pip install pauc
35
+
36
+ ## Usage
37
+
38
+ from pauc import roc_auc_ci_score
39
+
40
+ auc, (lb, ub) = roc_auc_ci_score(y_true, y_pred)
41
+ print(f'AUC: {auc}, 95% CI: ({lb}, {ub})')
@@ -0,0 +1,11 @@
1
+ README.md
2
+ setup.py
3
+ pauc/__init__.py
4
+ pauc/roc_auc_ci.py
5
+ pauc.egg-info/PKG-INFO
6
+ pauc.egg-info/SOURCES.txt
7
+ pauc.egg-info/dependency_links.txt
8
+ pauc.egg-info/requires.txt
9
+ pauc.egg-info/top_level.txt
10
+ tests/__init__.py
11
+ tests/test_roc_auc_ci.py
@@ -0,0 +1,3 @@
1
+ numpy
2
+ scipy
3
+ pandas
@@ -0,0 +1,2 @@
1
+ pauc
2
+ tests
pauc-0.1.0/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
pauc-0.1.0/setup.py ADDED
@@ -0,0 +1,28 @@
1
+ from setuptools import setup, find_packages
2
+
3
+ setup(
4
+ name='pauc',
5
+ version='0.1.0',
6
+ packages=find_packages(),
7
+ description='Compute ROC AUC and confidence intervals using DeLong’s method',
8
+ long_description=open('README.md').read(),
9
+ long_description_content_type='text/markdown',
10
+ install_requires=[
11
+ 'numpy',
12
+ 'scipy',
13
+ 'pandas'
14
+ ],
15
+ tests_require=[
16
+ 'pytest',
17
+ ],
18
+ python_requires='>=3.6',
19
+ author='Srijit Seal',
20
+ author_email='srijit@understanding.bio',
21
+ url='https://github.com/srijitseal/pauc',
22
+ license='MIT',
23
+ classifiers=[
24
+ 'Programming Language :: Python :: 3',
25
+ 'License :: OSI Approved :: MIT License',
26
+ 'Operating System :: OS Independent',
27
+ ],
28
+ )
File without changes
@@ -0,0 +1,30 @@
1
+ import numpy as np
2
+ from pauc import roc_auc_ci_score
3
+
4
+ def test_roc_auc_ci_score_basic():
5
+ y_true = np.array([0, 0, 1, 1])
6
+ y_pred = np.array([0.1, 0.4, 0.35, 0.8])
7
+
8
+ auc, (lb, ub) = roc_auc_ci_score(y_true, y_pred)
9
+
10
+ assert 0 <= auc <= 1
11
+ assert lb <= auc <= ub
12
+
13
+ def test_roc_auc_ci_perfect_classifier():
14
+ y_true = np.array([0, 0, 1, 1])
15
+ y_pred = np.array([0.1, 0.2, 0.9, 0.95])
16
+
17
+ auc, (lb, ub) = roc_auc_ci_score(y_true, y_pred)
18
+
19
+ assert auc == 1.0
20
+ assert lb <= auc <= ub
21
+ assert ub <= 1.0
22
+
23
+ def test_roc_auc_ci_worst_classifier():
24
+ y_true = np.array([0, 0, 1, 1])
25
+ y_pred = np.array([0.9, 0.95, 0.1, 0.2])
26
+
27
+ auc, (lb, ub) = roc_auc_ci_score(y_true, y_pred)
28
+
29
+ assert auc == 0.0
30
+ assert lb <= auc <= ub