score-select 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- SCoRE/SCoRE.py +497 -0
- SCoRE/__init__.py +46 -0
- SCoRE/utility.py +246 -0
- score_select-0.1.1.dist-info/METADATA +125 -0
- score_select-0.1.1.dist-info/RECORD +8 -0
- score_select-0.1.1.dist-info/WHEEL +5 -0
- score_select-0.1.1.dist-info/licenses/LICENSE +21 -0
- score_select-0.1.1.dist-info/top_level.txt +1 -0
SCoRE/SCoRE.py
ADDED
|
@@ -0,0 +1,497 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from .utility import BH, eBH
|
|
3
|
+
|
|
4
|
+
# implementation of SCoRE procedures
|
|
5
|
+
|
|
6
|
+
def _uniform_random(random_state, size=None):
|
|
7
|
+
if random_state is None:
|
|
8
|
+
return np.random.uniform(0, 1, size)
|
|
9
|
+
if isinstance(random_state, np.random.Generator):
|
|
10
|
+
return random_state.uniform(0, 1, size)
|
|
11
|
+
return np.random.default_rng(random_state).uniform(0, 1, size)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _as_index_array(sel):
|
|
15
|
+
return np.asarray(sel, dtype=int)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _as_1d_array(name, values):
|
|
19
|
+
arr = np.asarray(values)
|
|
20
|
+
if arr.ndim != 1:
|
|
21
|
+
raise ValueError(f"{name} must be a one-dimensional array.")
|
|
22
|
+
return arr
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _split_calib(Dcalib):
|
|
26
|
+
if not isinstance(Dcalib, (tuple, list)) or len(Dcalib) != 2:
|
|
27
|
+
raise ValueError("Dcalib must be a tuple or list of losses and scores (Lcalib, Scalib).")
|
|
28
|
+
|
|
29
|
+
Lcalib = _as_1d_array("Lcalib", Dcalib[0])
|
|
30
|
+
Scalib = _as_1d_array("Scalib", Dcalib[1])
|
|
31
|
+
if len(Lcalib) != len(Scalib):
|
|
32
|
+
raise ValueError("The losses and scores (Lcalib, Scalib) must have the same length.")
|
|
33
|
+
return Lcalib, Scalib
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _is_legacy_dtest(Dtest):
|
|
37
|
+
if not isinstance(Dtest, (tuple, list)) or len(Dtest) != 2:
|
|
38
|
+
return False
|
|
39
|
+
if np.ndim(Dtest[1]) == 0:
|
|
40
|
+
return False
|
|
41
|
+
return Dtest[0] is None or np.ndim(Dtest[0]) > 0
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _get_stest(Dtest):
|
|
45
|
+
if _is_legacy_dtest(Dtest):
|
|
46
|
+
Dtest = Dtest[1]
|
|
47
|
+
return _as_1d_array("Dtest", Dtest)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _validate_binary_loss(Lcalib):
|
|
51
|
+
if not np.all(np.isin(Lcalib, [0, 1])):
|
|
52
|
+
raise ValueError("Conformal selection requires binary calibration losses in {0, 1}.")
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _validate_alpha(alpha):
|
|
56
|
+
alpha = float(alpha)
|
|
57
|
+
if not np.isfinite(alpha) or alpha <= 0 or alpha > 1:
|
|
58
|
+
raise ValueError("alpha must be in (0, 1]")
|
|
59
|
+
return alpha
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _validate_gamma(gamma):
|
|
63
|
+
gamma = float(gamma)
|
|
64
|
+
if not np.isfinite(gamma) or gamma < 0 or gamma > 1:
|
|
65
|
+
raise ValueError("gamma must be in [0, 1]")
|
|
66
|
+
return gamma
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _validate_prune(prune):
|
|
70
|
+
if prune not in (None, "hete", "homo"):
|
|
71
|
+
raise ValueError("prune must be one of None, 'hete', or 'homo'")
|
|
72
|
+
return prune
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def CS(Dcalib, Dtest, alpha, mult_test=True, return_pvals=False):
|
|
76
|
+
"""Conformal Selection (CS) procedure for binary losses that controls the marginal deployment risk (MDR) or selective deployment risk (SDR).
|
|
77
|
+
Here, MDR reduces to the average type-I error and SDR reduces to the usual false discovery rate (FDR).
|
|
78
|
+
|
|
79
|
+
The function applies only when the loss function evaluates strictly to {0,1}.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
Dcalib (tuple): A tuple containing losses and scores (Lcalib, Scalib) for the calibration set.
|
|
83
|
+
Dtest (array-like): Test scores Stest. A legacy tuple/list (ignored, Stest) is also accepted.
|
|
84
|
+
alpha (float): The target error margin.
|
|
85
|
+
mult_test (bool): Whether to perform multiple testing correction using the Benjamini-Hochberg (BH) procedure. If False, MDR is controlled; otherwise SDR is controlled.
|
|
86
|
+
return_pvals (bool): If True, returns the calculated p-values alongside the selected indices.
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
Union[np.ndarray, tuple]: Selected indices, or (selected indices, p-values) if return_pvals is True.
|
|
90
|
+
"""
|
|
91
|
+
alpha = _validate_alpha(alpha)
|
|
92
|
+
Lcalib, Scalib = _split_calib(Dcalib)
|
|
93
|
+
_validate_binary_loss(Lcalib)
|
|
94
|
+
Stest = _get_stest(Dtest)
|
|
95
|
+
Ncalib, Ntest = len(Scalib), len(Stest)
|
|
96
|
+
|
|
97
|
+
calib_scores = 1000 * (Lcalib == 0) + Scalib
|
|
98
|
+
test_scores = Stest
|
|
99
|
+
|
|
100
|
+
pvals = np.zeros(Ntest)
|
|
101
|
+
for j in range(Ntest):
|
|
102
|
+
pvals[j] = (1 + np.sum(calib_scores <= test_scores[j])) / (Ncalib + 1)
|
|
103
|
+
|
|
104
|
+
if mult_test:
|
|
105
|
+
sel = BH(pvals, alpha)
|
|
106
|
+
else:
|
|
107
|
+
sel = np.flatnonzero(pvals <= alpha)
|
|
108
|
+
|
|
109
|
+
if not return_pvals:
|
|
110
|
+
return _as_index_array(sel)
|
|
111
|
+
return sel, pvals
|
|
112
|
+
|
|
113
|
+
def SCoRE_MDR_bf(Dcalib, Dtest, alpha, gamma, return_evals=False):
|
|
114
|
+
"""Brute-force algorithm for SCoRE testing with Marginal Deployment Risk (MDR) control. The algorithm manually search for a suitable cutoff t.
|
|
115
|
+
Compared to SCoRE_MDR, this brute-force computation enables computing the SCoRE e-values explicitly.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
Dcalib (tuple): A tuple containing losses and scores (Lcalib, Scalib) for the calibration set.
|
|
119
|
+
Dtest (array-like): Test scores Stest. A legacy tuple/list (ignored, Stest) is also accepted.
|
|
120
|
+
alpha (float): The target error margin.
|
|
121
|
+
gamma (float): A tuning parameter spanning [0, 1]. Recommended value is gamma=alpha.
|
|
122
|
+
return_evals (bool): Whether to output the computed e-values. Defaults to False.
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
Union[np.ndarray, tuple]: Selected indices, or (selected indices, e-values) if return_evals is True.
|
|
126
|
+
"""
|
|
127
|
+
alpha = _validate_alpha(alpha)
|
|
128
|
+
gamma = _validate_gamma(gamma)
|
|
129
|
+
Lcalib, Scalib = _split_calib(Dcalib)
|
|
130
|
+
Stest = _get_stest(Dtest)
|
|
131
|
+
Ncalib, Ntest = len(Scalib), len(Stest)
|
|
132
|
+
|
|
133
|
+
M = list(np.concatenate([Scalib, Stest]))
|
|
134
|
+
|
|
135
|
+
def F(j, t, l):
|
|
136
|
+
return (np.sum(Lcalib * (Scalib <= t)) + l * (Stest[j] <= t)) / (Ncalib + 1)
|
|
137
|
+
|
|
138
|
+
def t_gamma(j, l):
|
|
139
|
+
max_t = -np.inf
|
|
140
|
+
for cur_t in M:
|
|
141
|
+
if F(j, cur_t, l) <= gamma:
|
|
142
|
+
max_t = max(max_t, cur_t)
|
|
143
|
+
return max_t
|
|
144
|
+
|
|
145
|
+
sel = []
|
|
146
|
+
evalues = np.zeros(Ntest)
|
|
147
|
+
|
|
148
|
+
for i_itr in range(Ntest):
|
|
149
|
+
evalue = np.inf
|
|
150
|
+
for l in [0, 1]:
|
|
151
|
+
t_l = t_gamma(i_itr, l)
|
|
152
|
+
num = (Ncalib + 1) * (Stest[i_itr] <= t_l)
|
|
153
|
+
denom = np.sum(Lcalib * (Scalib <= t_l)) + l * (Stest[i_itr] <= t_l)
|
|
154
|
+
|
|
155
|
+
evalue = min(evalue, num / denom)
|
|
156
|
+
evalues[i_itr] = evalue
|
|
157
|
+
|
|
158
|
+
phi = (evalue >= (1 / alpha))
|
|
159
|
+
if phi == 1:
|
|
160
|
+
sel.append(i_itr)
|
|
161
|
+
|
|
162
|
+
if not return_evals:
|
|
163
|
+
return _as_index_array(sel)
|
|
164
|
+
return _as_index_array(sel), evalues
|
|
165
|
+
|
|
166
|
+
def SCoRE_MDR(Dcalib, Dtest, alpha, gamma):
|
|
167
|
+
"""SCoRE testing procedure with Marginal Deployment Risk (MDR) control, implemented using the computational shortcut. Note the e-values are not directly available with this shortcut.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
Dcalib (tuple): A tuple containing losses and scores (Lcalib, Scalib) for the calibration set.
|
|
171
|
+
Dtest (array-like): Test scores Stest. A legacy tuple/list (ignored, Stest) is also accepted.
|
|
172
|
+
alpha (float): The target error margin.
|
|
173
|
+
gamma (float): A tuning parameter spanning [0, 1]. Recommended value is gamma=alpha.
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
list: A list of selected instances with low risk and deemed safe to deploy.
|
|
177
|
+
"""
|
|
178
|
+
alpha = _validate_alpha(alpha)
|
|
179
|
+
gamma = _validate_gamma(gamma)
|
|
180
|
+
Lcalib, Scalib = _split_calib(Dcalib)
|
|
181
|
+
Stest = _get_stest(Dtest)
|
|
182
|
+
Ncalib, Ntest = len(Scalib), len(Stest)
|
|
183
|
+
|
|
184
|
+
sel = []
|
|
185
|
+
|
|
186
|
+
for i_itr in range(Ntest):
|
|
187
|
+
phi = (1 + np.sum(Lcalib * (Scalib <= Stest[i_itr]))) / (Ncalib + 1) <= gamma
|
|
188
|
+
|
|
189
|
+
if gamma > alpha and phi == 1: # need to check the 2nd condition
|
|
190
|
+
M = list(np.concatenate([Scalib, Stest]))
|
|
191
|
+
for t in M:
|
|
192
|
+
upp = (1 + np.sum(Lcalib * (Scalib <= t))) / (Ncalib + 1)
|
|
193
|
+
low = upp - 1 / (Ncalib + 1)
|
|
194
|
+
|
|
195
|
+
# check whether (alpha, gamma] and [low, upp] overlap
|
|
196
|
+
if not ((upp <= alpha) or (low > gamma)): # overlap
|
|
197
|
+
phi = 0
|
|
198
|
+
break
|
|
199
|
+
|
|
200
|
+
if phi == 1: # selected
|
|
201
|
+
sel.append(i_itr)
|
|
202
|
+
|
|
203
|
+
return _as_index_array(sel)
|
|
204
|
+
|
|
205
|
+
def SCoRE_MDR_w(Dcalib, Dtest, wcalib, wtest, alpha, gamma):
|
|
206
|
+
"""SCoRE testing procedure with Marginal Deployment Risk (MDR) control under the covariate shift case, implemented using the computational shortcut.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
Dcalib (tuple): A tuple containing losses and scores (Lcalib, Scalib) for the calibration set.
|
|
210
|
+
Dtest (array-like): Test scores Stest. A legacy tuple/list (ignored, Stest) is also accepted.
|
|
211
|
+
wcalib (np.ndarray): The covariate shift weights for the calibration data.
|
|
212
|
+
wtest (np.ndarray): The covariate shift weights for the test data.
|
|
213
|
+
alpha (float): The target error margin.
|
|
214
|
+
gamma (float): A tuning parameter spanning [0, 1]. Recommended value is gamma=alpha.
|
|
215
|
+
|
|
216
|
+
Returns:
|
|
217
|
+
list: A list of selected instances with low risk and deemed safe to deploy.
|
|
218
|
+
"""
|
|
219
|
+
alpha = _validate_alpha(alpha)
|
|
220
|
+
gamma = _validate_gamma(gamma)
|
|
221
|
+
Lcalib, Scalib = _split_calib(Dcalib)
|
|
222
|
+
Stest = _get_stest(Dtest)
|
|
223
|
+
wcalib = _as_1d_array("wcalib", wcalib)
|
|
224
|
+
wtest = _as_1d_array("wtest", wtest)
|
|
225
|
+
Ncalib, Ntest = len(Scalib), len(Stest)
|
|
226
|
+
if len(wcalib) != Ncalib:
|
|
227
|
+
raise ValueError("wcalib must have the same length as Lcalib and Scalib.")
|
|
228
|
+
if len(wtest) != Ntest:
|
|
229
|
+
raise ValueError("wtest must have the same length as Stest.")
|
|
230
|
+
|
|
231
|
+
sel = []
|
|
232
|
+
|
|
233
|
+
calib_w_sum = np.sum(wcalib)
|
|
234
|
+
for i_itr in range(Ntest):
|
|
235
|
+
phi = (wtest[i_itr] + np.sum(wcalib * Lcalib * (Scalib <= Stest[i_itr]))) / (wtest[i_itr] + calib_w_sum) <= gamma
|
|
236
|
+
|
|
237
|
+
if gamma > alpha and phi == 1: # need to check the 2nd condition
|
|
238
|
+
M = list(np.concatenate([Scalib, Stest]))
|
|
239
|
+
for t in M:
|
|
240
|
+
upp = (wtest[i_itr] + np.sum(wcalib * Lcalib * (Scalib <= t))) / (wtest[i_itr] + calib_w_sum)
|
|
241
|
+
low = upp - wtest[i_itr] / (wtest[i_itr] + calib_w_sum)
|
|
242
|
+
|
|
243
|
+
# check whether (alpha, gamma] and [low, upp] overlap
|
|
244
|
+
if not ((upp <= alpha) or (low > gamma)): # overlap
|
|
245
|
+
phi = 0
|
|
246
|
+
break
|
|
247
|
+
|
|
248
|
+
if phi == 1: # selected
|
|
249
|
+
sel.append(i_itr)
|
|
250
|
+
|
|
251
|
+
return _as_index_array(sel)
|
|
252
|
+
|
|
253
|
+
######## SDR ########
|
|
254
|
+
|
|
255
|
+
def SCoRE_SDR(Dcalib, Dtest, alpha, gamma, prune=None, return_evals=False, random_state=None):
|
|
256
|
+
"""SCoRE testing procedure for Selective Deployment Risk (SDR) control. Optimized implementation with time complexity $O(m(n+m) + (n+m)\\log(n+m))$.
|
|
257
|
+
|
|
258
|
+
Args:
|
|
259
|
+
Dcalib (tuple): losses and scores (Lcalib, Scalib) for the calibration set.
|
|
260
|
+
Dtest (array-like): Test scores Stest. A legacy tuple/list (ignored, Stest) is also accepted.
|
|
261
|
+
alpha (float): The target error margin.
|
|
262
|
+
gamma (float): A tuning parameter spanning [0, 1]. Recommended value is gamma=alpha.
|
|
263
|
+
prune (str, optional): Optional boosting strategy (either 'hete' or 'homo'). Use of 'homo' is generally recommended.
|
|
264
|
+
return_evals (bool, optional): Returns computed e-values if True.
|
|
265
|
+
random_state (int or np.random.Generator, optional): Random seed or generator used when pruning is enabled. Randomization is only needed for the boosting strategies.
|
|
266
|
+
|
|
267
|
+
Returns:
|
|
268
|
+
Union[list, tuple]: Selection set indices, or combined tuple depending on `return_evals`.
|
|
269
|
+
"""
|
|
270
|
+
alpha = _validate_alpha(alpha)
|
|
271
|
+
gamma = _validate_gamma(gamma)
|
|
272
|
+
prune = _validate_prune(prune)
|
|
273
|
+
Lcalib, Scalib = _split_calib(Dcalib)
|
|
274
|
+
Stest = _get_stest(Dtest)
|
|
275
|
+
Ncalib, Ntest = len(Scalib), len(Stest)
|
|
276
|
+
|
|
277
|
+
Scalib_tagged = [(lp, l, 'calib') for lp, l in zip(Scalib, Lcalib)]
|
|
278
|
+
Stest_tagged = [(lp, 0, 'test') for lp in Stest] # 0 is dummy value
|
|
279
|
+
|
|
280
|
+
M_tagged = Scalib_tagged + Stest_tagged
|
|
281
|
+
M_tagged.sort()
|
|
282
|
+
|
|
283
|
+
M = np.array([a[0] for a in M_tagged])
|
|
284
|
+
|
|
285
|
+
evalues = np.zeros(Ntest)
|
|
286
|
+
|
|
287
|
+
# some intermediate prefix sums
|
|
288
|
+
NUMER = np.zeros(Ncalib + Ntest) # for np.sum(Lcalib * (Scalib <= t)) with t being the i-th ranked value in M
|
|
289
|
+
DENOM = np.zeros(Ncalib + Ntest) # for 1 + np.sum(Stest <= t).
|
|
290
|
+
for i, (t, L, l_type) in enumerate(M_tagged):
|
|
291
|
+
NUMER[i] = (NUMER[i-1] if i != 0 else 0)
|
|
292
|
+
DENOM[i] = (DENOM[i-1] if i != 0 else 1)
|
|
293
|
+
if l_type == 'calib':
|
|
294
|
+
NUMER[i] += L
|
|
295
|
+
else:
|
|
296
|
+
DENOM[i] += 1
|
|
297
|
+
|
|
298
|
+
# above will have a bug when there are ties in M_tagged.
|
|
299
|
+
# for example, if M_tagged = [(0.5, 0, 'calib'), (0.5, 1, 'calib')], then NUMER[0] = 0, NUMER[1] = 1.
|
|
300
|
+
# But for t = 0.5, we should have NUMER = 1. So we need to correct for ties.
|
|
301
|
+
for i in range(len(M_tagged) - 2, -1, -1):
|
|
302
|
+
if M_tagged[i][0] == M_tagged[i+1][0]:
|
|
303
|
+
NUMER[i] = NUMER[i+1]
|
|
304
|
+
DENOM[i] = DENOM[i+1]
|
|
305
|
+
|
|
306
|
+
for j in range(Ntest):
|
|
307
|
+
# we precompute all FR, t_gamma, and ell
|
|
308
|
+
FR_0 = np.zeros(Ncalib + Ntest)
|
|
309
|
+
FR_1 = np.zeros(Ncalib + Ntest)
|
|
310
|
+
|
|
311
|
+
ELL = np.zeros(Ncalib + Ntest)
|
|
312
|
+
|
|
313
|
+
# pairs of (i, t)
|
|
314
|
+
t_0, t_1 = (-1, -np.inf), (-1, -np.inf)
|
|
315
|
+
|
|
316
|
+
# compute FR and ell
|
|
317
|
+
for i, (t, _, _) in enumerate(M_tagged):
|
|
318
|
+
FR_0[i] = NUMER[i] / (DENOM[i] - (Stest[j] <= t)) / (Ncalib + 1) * Ntest
|
|
319
|
+
FR_1[i] = (NUMER[i] + (Stest[j] <= t)) / (DENOM[i] - (Stest[j] <= t)) / (Ncalib + 1) * Ntest
|
|
320
|
+
|
|
321
|
+
ELL[i] = (Ncalib + 1) * gamma / Ntest * (DENOM[i] - (Stest[j] <= t)) - NUMER[i]
|
|
322
|
+
|
|
323
|
+
# compute t_gamma. Also store the original ranking i
|
|
324
|
+
for i, t in enumerate(M):
|
|
325
|
+
if FR_0[i] <= gamma:
|
|
326
|
+
t_0 = (i, t)
|
|
327
|
+
if FR_1[i] <= gamma:
|
|
328
|
+
t_1 = (i, t)
|
|
329
|
+
|
|
330
|
+
if Stest[j] > t_1[1]:
|
|
331
|
+
continue # e-value is zero
|
|
332
|
+
|
|
333
|
+
if t_1[1] == t_0[1]:
|
|
334
|
+
evalues[j] = (Ncalib + 1) / (1 + NUMER[t_1[0]])
|
|
335
|
+
continue # same upper/lower bound case
|
|
336
|
+
|
|
337
|
+
max_ell = np.zeros(Ntest + Ncalib) # max_ell[rank(t)]: max of l(t') with t' > t, t' in M, and FR(t', 0) <= gamma.
|
|
338
|
+
# max_ell[0] correspond to the smallest t in M, max_ell[-1] correspond to the largest t in M.
|
|
339
|
+
last_max = -np.inf
|
|
340
|
+
for i, t in zip(range(Ntest + Ncalib - 1, -1, -1), reversed(M)): # n+m iterations
|
|
341
|
+
max_ell[i] = last_max
|
|
342
|
+
|
|
343
|
+
if FR_0[i] <= gamma:
|
|
344
|
+
last_max = max(last_max, ELL[i]) # both O(n+m)
|
|
345
|
+
|
|
346
|
+
M_star = [] # store pairs of (i, t)
|
|
347
|
+
for i, t in enumerate(M):
|
|
348
|
+
if t < max(Stest[j], t_1[1]):
|
|
349
|
+
continue # this is to keep the index i
|
|
350
|
+
if t > t_0[1]:
|
|
351
|
+
break
|
|
352
|
+
|
|
353
|
+
if FR_0[i] <= gamma and ELL[i] > max_ell[i]:
|
|
354
|
+
M_star.append((i, t))
|
|
355
|
+
|
|
356
|
+
evalue = np.inf
|
|
357
|
+
for i, t in M_star:
|
|
358
|
+
cur_val = (Ncalib + 1) / (ELL[i] + NUMER[i])
|
|
359
|
+
evalue = min(evalue, cur_val)
|
|
360
|
+
|
|
361
|
+
evalues[j] = evalue
|
|
362
|
+
|
|
363
|
+
if prune == 'hete':
|
|
364
|
+
evalues /= _uniform_random(random_state, len(evalues))
|
|
365
|
+
if prune == 'homo':
|
|
366
|
+
evalues /= _uniform_random(random_state)
|
|
367
|
+
sel = eBH(evalues, alpha)
|
|
368
|
+
|
|
369
|
+
if not return_evals:
|
|
370
|
+
return _as_index_array(sel)
|
|
371
|
+
return sel, evalues
|
|
372
|
+
|
|
373
|
+
def SCoRE_SDR_w(Dcalib, Dtest, wcalib, wtest, alpha, gamma, prune=None, return_evals=False, random_state=None):
|
|
374
|
+
"""SCoRE testing procedure for Selective Deployment Risk (SDR) control under the covariate shift case. Optimized implementation with time complexity $O(m(n+m) + (n+m)\\log(n+m))$.
|
|
375
|
+
|
|
376
|
+
Args:
|
|
377
|
+
Dcalib (tuple): losses and scores (Lcalib, Scalib) for the calibration set.
|
|
378
|
+
Dtest (array-like): Test scores Stest. A legacy tuple/list (ignored, Stest) is also accepted.
|
|
379
|
+
wcalib (np.ndarray): The covariate shift weights for the calibration data.
|
|
380
|
+
wtest (np.ndarray): The covariate shift weights for the test data.
|
|
381
|
+
alpha (float): The target error margin.
|
|
382
|
+
gamma (float): A tuning parameter spanning [0, 1]. Recommended value is gamma=alpha.
|
|
383
|
+
prune (str, optional): Optional boosting strategy (either 'hete' or 'homo'). Use of 'homo' is generally recommended.
|
|
384
|
+
return_evals (bool, optional): Returns computed e-values if True.
|
|
385
|
+
random_state (int or np.random.Generator, optional): Random seed or generator used when pruning is enabled.
|
|
386
|
+
|
|
387
|
+
Returns:
|
|
388
|
+
Union[list, tuple]: Selection set indices, or combined tuple depending on `return_evals`.
|
|
389
|
+
"""
|
|
390
|
+
alpha = _validate_alpha(alpha)
|
|
391
|
+
gamma = _validate_gamma(gamma)
|
|
392
|
+
prune = _validate_prune(prune)
|
|
393
|
+
Lcalib, Scalib = _split_calib(Dcalib)
|
|
394
|
+
Stest = _get_stest(Dtest)
|
|
395
|
+
wcalib = _as_1d_array("wcalib", wcalib)
|
|
396
|
+
wtest = _as_1d_array("wtest", wtest)
|
|
397
|
+
Ncalib, Ntest = len(Scalib), len(Stest)
|
|
398
|
+
if len(wcalib) != Ncalib:
|
|
399
|
+
raise ValueError("wcalib must have the same length as Lcalib and Scalib.")
|
|
400
|
+
if len(wtest) != Ntest:
|
|
401
|
+
raise ValueError("wtest must have the same length as Stest.")
|
|
402
|
+
|
|
403
|
+
Scalib_tagged = [(lp, l, w, 'calib') for lp, l, w in zip(Scalib, Lcalib, wcalib)]
|
|
404
|
+
Stest_tagged = [(lp, 0, w, 'test') for lp, w in zip(Stest, wtest)] # 0 is dummy value
|
|
405
|
+
|
|
406
|
+
M_tagged = Scalib_tagged + Stest_tagged
|
|
407
|
+
M_tagged.sort()
|
|
408
|
+
|
|
409
|
+
M = np.array([a[0] for a in M_tagged])
|
|
410
|
+
|
|
411
|
+
evalues = np.zeros(Ntest)
|
|
412
|
+
|
|
413
|
+
calib_w_sum = np.sum(wcalib)
|
|
414
|
+
|
|
415
|
+
# some intermediate prefix sums
|
|
416
|
+
NUMER = np.zeros(Ncalib + Ntest) # for np.sum(wcalib * Lcalib * (Scalib <= t)) with t being the i-th ranked value in M
|
|
417
|
+
DENOM = np.zeros(Ncalib + Ntest) # for 1 + np.sum(Stest <= t).
|
|
418
|
+
for i, (t, L, w, l_type) in enumerate(M_tagged):
|
|
419
|
+
NUMER[i] = (NUMER[i-1] if i != 0 else 0)
|
|
420
|
+
DENOM[i] = (DENOM[i-1] if i != 0 else 1)
|
|
421
|
+
if l_type == 'calib':
|
|
422
|
+
NUMER[i] += w * L
|
|
423
|
+
else:
|
|
424
|
+
DENOM[i] += 1
|
|
425
|
+
|
|
426
|
+
# Correction for ties
|
|
427
|
+
for i in range(len(M_tagged) - 2, -1, -1):
|
|
428
|
+
if M_tagged[i][0] == M_tagged[i+1][0]:
|
|
429
|
+
NUMER[i] = NUMER[i+1]
|
|
430
|
+
DENOM[i] = DENOM[i+1]
|
|
431
|
+
|
|
432
|
+
for j in range(Ntest):
|
|
433
|
+
# we precompute all FR, t_gamma, and ell
|
|
434
|
+
FR_0 = np.zeros(Ncalib + Ntest)
|
|
435
|
+
FR_1 = np.zeros(Ncalib + Ntest)
|
|
436
|
+
|
|
437
|
+
ELL = np.zeros(Ncalib + Ntest)
|
|
438
|
+
|
|
439
|
+
# pairs of (i, t)
|
|
440
|
+
t_0, t_1 = (-1, -np.inf), (-1, -np.inf)
|
|
441
|
+
|
|
442
|
+
# compute FR and ell
|
|
443
|
+
for i, (t, _, _, _) in enumerate(M_tagged):
|
|
444
|
+
FR_0[i] = NUMER[i] / (DENOM[i] - (Stest[j] <= t)) / (calib_w_sum + wtest[j]) * Ntest
|
|
445
|
+
FR_1[i] = (NUMER[i] + wtest[j] * (Stest[j] <= t)) / (DENOM[i] - (Stest[j] <= t)) / (calib_w_sum + wtest[j]) * Ntest
|
|
446
|
+
|
|
447
|
+
ELL[i] = (calib_w_sum + wtest[j]) / wtest[j] * gamma / Ntest * (DENOM[i] - (Stest[j] <= t)) - NUMER[i] / wtest[j]
|
|
448
|
+
|
|
449
|
+
# compute t_gamma. Also store the original ranking i
|
|
450
|
+
for i, t in enumerate(M):
|
|
451
|
+
if FR_0[i] <= gamma:
|
|
452
|
+
t_0 = (i, t)
|
|
453
|
+
if FR_1[i] <= gamma:
|
|
454
|
+
t_1 = (i, t)
|
|
455
|
+
|
|
456
|
+
if Stest[j] > t_1[1]:
|
|
457
|
+
continue # e-value is zero
|
|
458
|
+
|
|
459
|
+
if t_1[1] == t_0[1]:
|
|
460
|
+
evalues[j] = (calib_w_sum + wtest[j]) / (wtest[j] + NUMER[t_1[0]])
|
|
461
|
+
continue # same upper/lower bound case
|
|
462
|
+
|
|
463
|
+
max_ell = np.zeros(Ntest + Ncalib) # max_ell[rank(t)]: max of l(t') with t' > t, t' in M, and FR(t', 0) <= gamma.
|
|
464
|
+
# max_ell[0] correspond to the smallest t in M, max_ell[-1] correspond to the largest t in M.
|
|
465
|
+
last_max = -np.inf
|
|
466
|
+
for i, t in zip(range(Ntest + Ncalib - 1, -1, -1), reversed(M)): # n+m iterations
|
|
467
|
+
max_ell[i] = last_max
|
|
468
|
+
|
|
469
|
+
if FR_0[i] <= gamma:
|
|
470
|
+
last_max = max(last_max, ELL[i]) # both O(n+m)
|
|
471
|
+
|
|
472
|
+
M_star = [] # store pairs of (i, t)
|
|
473
|
+
for i, t in enumerate(M):
|
|
474
|
+
if t < max(Stest[j], t_1[1]):
|
|
475
|
+
continue # this is to keep the index i
|
|
476
|
+
if t > t_0[1]:
|
|
477
|
+
break
|
|
478
|
+
|
|
479
|
+
if FR_0[i] <= gamma and ELL[i] > max_ell[i]:
|
|
480
|
+
M_star.append((i, t))
|
|
481
|
+
|
|
482
|
+
evalue = np.inf
|
|
483
|
+
for i, t in M_star:
|
|
484
|
+
cur_val = (calib_w_sum + wtest[j]) / (wtest[j] * ELL[i] + NUMER[i])
|
|
485
|
+
evalue = min(evalue, cur_val)
|
|
486
|
+
|
|
487
|
+
evalues[j] = evalue
|
|
488
|
+
|
|
489
|
+
if prune == 'hete':
|
|
490
|
+
evalues /= _uniform_random(random_state, len(evalues))
|
|
491
|
+
if prune == 'homo':
|
|
492
|
+
evalues /= _uniform_random(random_state)
|
|
493
|
+
sel = eBH(evalues, alpha)
|
|
494
|
+
|
|
495
|
+
if not return_evals:
|
|
496
|
+
return _as_index_array(sel)
|
|
497
|
+
return sel, evalues
|
SCoRE/__init__.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""Public API for SCoRE."""
|
|
2
|
+
|
|
3
|
+
from .SCoRE import (
|
|
4
|
+
CS,
|
|
5
|
+
SCoRE_MDR,
|
|
6
|
+
SCoRE_MDR_bf,
|
|
7
|
+
SCoRE_MDR_w,
|
|
8
|
+
SCoRE_SDR,
|
|
9
|
+
SCoRE_SDR_w,
|
|
10
|
+
)
|
|
11
|
+
from .utility import (
|
|
12
|
+
BH,
|
|
13
|
+
Lpredictor,
|
|
14
|
+
eBH,
|
|
15
|
+
eval_MDR,
|
|
16
|
+
eval_SDR,
|
|
17
|
+
gen_data_1,
|
|
18
|
+
gen_data_2,
|
|
19
|
+
gen_data_Jin2023,
|
|
20
|
+
loss_1,
|
|
21
|
+
loss_2,
|
|
22
|
+
loss_Jin2023,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
__version__ = "0.1.1"
|
|
26
|
+
|
|
27
|
+
__all__ = [
|
|
28
|
+
"__version__",
|
|
29
|
+
"BH",
|
|
30
|
+
"CS",
|
|
31
|
+
"Lpredictor",
|
|
32
|
+
"SCoRE_MDR",
|
|
33
|
+
"SCoRE_MDR_bf",
|
|
34
|
+
"SCoRE_MDR_w",
|
|
35
|
+
"SCoRE_SDR",
|
|
36
|
+
"SCoRE_SDR_w",
|
|
37
|
+
"eBH",
|
|
38
|
+
"eval_MDR",
|
|
39
|
+
"eval_SDR",
|
|
40
|
+
"gen_data_1",
|
|
41
|
+
"gen_data_2",
|
|
42
|
+
"gen_data_Jin2023",
|
|
43
|
+
"loss_1",
|
|
44
|
+
"loss_2",
|
|
45
|
+
"loss_Jin2023",
|
|
46
|
+
]
|
SCoRE/utility.py
ADDED
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def _expit(x):
|
|
5
|
+
x = np.asarray(x, dtype=float)
|
|
6
|
+
out = np.empty_like(x, dtype=float)
|
|
7
|
+
positive = x >= 0
|
|
8
|
+
out[positive] = 1.0 / (1.0 + np.exp(-x[positive]))
|
|
9
|
+
exp_x = np.exp(x[~positive])
|
|
10
|
+
out[~positive] = exp_x / (1.0 + exp_x)
|
|
11
|
+
return out
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _get_rng(random_state):
|
|
15
|
+
if random_state is None:
|
|
16
|
+
return np.random
|
|
17
|
+
if isinstance(random_state, np.random.Generator):
|
|
18
|
+
return random_state
|
|
19
|
+
return np.random.default_rng(random_state)
|
|
20
|
+
|
|
21
|
+
def loss_Jin2023(Y, tau):
|
|
22
|
+
"""Calculates the smoothened indicator loss function, similar to the data generation process in Jin and Candes (2023).
|
|
23
|
+
|
|
24
|
+
The loss function is of the form sigmoid(-tau * Y).
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
Y (np.ndarray): The target values.
|
|
28
|
+
tau (float): Hyperparameter for smoothing. Larger tau means closer to 1{Y <= 0}.
|
|
29
|
+
If tau = np.inf, it returns strictly 1{Y <= 0}.
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
np.ndarray: The computed loss values.
|
|
33
|
+
"""
|
|
34
|
+
if tau != np.inf:
|
|
35
|
+
return _expit(-Y * tau) # L = smoothened indicator of <= 0
|
|
36
|
+
else:
|
|
37
|
+
return (Y <= 0)
|
|
38
|
+
|
|
39
|
+
def gen_data_Jin2023(setting, n, sig, dim=20, random_state=None):
|
|
40
|
+
"""Generates artificial data using the data generation process in Jin and Candes (2023).
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
setting (int): The data generation setting (1 or 2).
|
|
44
|
+
n (int): Number of samples to generate.
|
|
45
|
+
sig (float): Noise scaling factor.
|
|
46
|
+
dim (int, optional): Dimensionality of the feature space. Defaults to 20.
|
|
47
|
+
random_state (int or np.random.Generator, optional): Random seed or generator for reproducible samples.
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
tuple: A tuple (X, mu_x, eps, Y) representing the generated data and components.
|
|
51
|
+
"""
|
|
52
|
+
rng = _get_rng(random_state)
|
|
53
|
+
|
|
54
|
+
if setting == 1:
|
|
55
|
+
X = rng.uniform(low=-1, high=1, size=n*dim).reshape((n,dim))
|
|
56
|
+
mu_x = (X[:,0] * X[:,1] > 0) * (X[:,3] > 0.5) * (0.25 + X[:,3]) + (X[:,0] * X[:,1] <= 0) * (X[:,3] < -0.5) * (X[:,3] - 0.25)
|
|
57
|
+
eps = rng.normal(size=n) * (5.5 - abs(mu_x)) / 2 * sig
|
|
58
|
+
Y = mu_x + eps
|
|
59
|
+
return X, mu_x, eps, Y
|
|
60
|
+
|
|
61
|
+
if setting == 2:
|
|
62
|
+
X = rng.uniform(low=-1, high=1, size=n*dim).reshape((n,dim))
|
|
63
|
+
mu_x = (X[:,0] * X[:,1] + X[:,2] ** 2 + np.exp(X[:,3] - 1) - 1) * 2
|
|
64
|
+
eps = rng.normal(size=n) * (5.5 - abs(mu_x)) / 2 * sig
|
|
65
|
+
Y = mu_x + eps
|
|
66
|
+
return X, mu_x, eps, Y
|
|
67
|
+
|
|
68
|
+
raise ValueError("setting must be 1 or 2")
|
|
69
|
+
|
|
70
|
+
def loss_1(Y):
|
|
71
|
+
"""Calculates the expected shortfall-like loss function.
|
|
72
|
+
|
|
73
|
+
The loss takes the form L(f, x, y) = y * 1{y > c}, evaluated against Y.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
Y (np.ndarray): The target values.
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
np.ndarray: The computed expected shortfall loss.
|
|
80
|
+
"""
|
|
81
|
+
return 1/6 * Y * (Y > 2)
|
|
82
|
+
|
|
83
|
+
def gen_data_1(setting, n, sig, dim=20, random_state=None):
|
|
84
|
+
"""Generates artificial data for the first case.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
setting (int): The data generation setting (1 or 2).
|
|
88
|
+
n (int): Number of samples to generate.
|
|
89
|
+
sig (float): Noise scaling factor.
|
|
90
|
+
dim (int, optional): Dimensionality of the features. Defaults to 20.
|
|
91
|
+
random_state (int or np.random.Generator, optional): Random seed or generator for reproducible samples.
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
tuple: A tuple (X, mu_x, eps, Y) containing the covariates and responses.
|
|
95
|
+
"""
|
|
96
|
+
rng = _get_rng(random_state)
|
|
97
|
+
|
|
98
|
+
if setting == 1:
|
|
99
|
+
X = rng.uniform(low=-1, high=1, size=n*dim).reshape((n,dim))
|
|
100
|
+
mu_x = (X[:,0] * X[:,1] > 0) * (X[:,3] > 0.5) * (0.5 + X[:,3]) + (X[:,0] * X[:,1] <= 0) * (X[:,3] < -0.5) * (X[:,3] - 0.5) + 3 # now in (1.5, 4.5)
|
|
101
|
+
eps = np.clip(rng.normal(size=n) * sig * (5.5 - mu_x), -1.5, 1.5) # clip the noise to be in (-1.5, 1.5)
|
|
102
|
+
Y = mu_x + eps # (0, 6)
|
|
103
|
+
return X, mu_x, eps, Y
|
|
104
|
+
|
|
105
|
+
if setting == 2:
|
|
106
|
+
X = rng.uniform(low=-1, high=1, size=n*dim).reshape((n,dim))
|
|
107
|
+
mu_x = X[:,0] * X[:,1] + X[:,2] ** 2 + np.exp(X[:,3] - 1) + 2 # in (1, 5)
|
|
108
|
+
eps = np.clip(rng.normal(size=n) * sig * (6 - mu_x) * 0.5, -1, 1) # clip the noise to be in (-1, 1)
|
|
109
|
+
Y = mu_x + eps # (0, 6)
|
|
110
|
+
return X, mu_x, eps, Y
|
|
111
|
+
|
|
112
|
+
raise ValueError("setting must be 1 or 2")
|
|
113
|
+
|
|
114
|
+
def loss_2(Y, f, X, clip_const):
|
|
115
|
+
"""Calculates clipped prediction error loss.
|
|
116
|
+
|
|
117
|
+
Evaluates loss of the form L(f, x, y) = (y - f(x))^2, clipped at clip_const.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
Y (np.ndarray): The true target values.
|
|
121
|
+
f (object): The regression model to use for prediction (must have `.predict()`).
|
|
122
|
+
X (np.ndarray): The feature matrix to run predictions against.
|
|
123
|
+
clip_const (float): The clipping boundary, i.e., loss is in [0, clip_const].
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
np.ndarray: The computed normalized prediction error loss over X.
|
|
127
|
+
"""
|
|
128
|
+
return np.clip((Y - f.predict(X)) ** 2, 0, clip_const) / clip_const
|
|
129
|
+
|
|
130
|
+
def gen_data_2(setting, n, sig, dim=20, random_state=None):
|
|
131
|
+
"""Generates artificial data for the second case.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
setting (int): The data generation setting (1 or 2).
|
|
135
|
+
n (int): Number of samples to generate.
|
|
136
|
+
sig (float): Noise scaling factor.
|
|
137
|
+
dim (int, optional): Dimensionality. Defaults to 20.
|
|
138
|
+
random_state (int or np.random.Generator, optional): Random seed or generator for reproducible samples.
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
tuple: A tuple (X, mu_x, eps, Y) with covariates and label values.
|
|
142
|
+
"""
|
|
143
|
+
rng = _get_rng(random_state)
|
|
144
|
+
|
|
145
|
+
if setting == 1:
|
|
146
|
+
X = rng.uniform(low=-1, high=1, size=n*dim).reshape((n,dim))
|
|
147
|
+
mu_x = (X[:,0] * X[:,1] > 0) * (X[:,3] > 0.5) * (0.5 + X[:,3]) + (X[:,0] * X[:,1] <= 0) * (X[:,3] < -0.5) * (X[:,3] - 0.5) + 3 # now in (1.5, 4.5)
|
|
148
|
+
eps = np.clip(rng.normal(size=n) * sig * (5.5 - mu_x), -1.5, 1.5) # clip the noise to be in (-1.5, 1.5)
|
|
149
|
+
Y = mu_x + eps # (0, 6)
|
|
150
|
+
return X, mu_x, eps, Y
|
|
151
|
+
|
|
152
|
+
if setting == 2:
|
|
153
|
+
X = rng.uniform(low=-1, high=1, size=n*dim).reshape((n,dim))
|
|
154
|
+
mu_x = X[:,0] * X[:,1] + X[:,2] ** 2 + np.exp(X[:,3] - 1) + 2 # in (1, 5)
|
|
155
|
+
eps = np.clip(rng.normal(size=n) * sig * (6 - mu_x) * 0.5, -1, 1) # clip the noise to be in (-1, 1)
|
|
156
|
+
Y = mu_x + eps # (0, 6)
|
|
157
|
+
return X, mu_x, eps, Y
|
|
158
|
+
|
|
159
|
+
raise ValueError("setting must be 1 or 2")
|
|
160
|
+
|
|
161
|
+
def BH(pvals, q):
|
|
162
|
+
"""Applies the Benjamini-Hochberg (BH) procedure to a list of p-values.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
pvals (array-like): List or array of p-values.
|
|
166
|
+
q (float): The nominal False Discovery Rate (FDR) level.
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
np.ndarray: The indices forming the rejection set.
|
|
170
|
+
"""
|
|
171
|
+
pvals = np.asarray(pvals, dtype=float)
|
|
172
|
+
ntest = pvals.size
|
|
173
|
+
|
|
174
|
+
if ntest == 0:
|
|
175
|
+
return np.array([], dtype=int)
|
|
176
|
+
|
|
177
|
+
order = np.argsort(pvals, kind="mergesort")
|
|
178
|
+
sorted_pvals = pvals[order]
|
|
179
|
+
thresholds = q * np.arange(1, ntest + 1) / ntest
|
|
180
|
+
selected = np.flatnonzero(sorted_pvals <= thresholds)
|
|
181
|
+
|
|
182
|
+
if selected.size == 0:
|
|
183
|
+
return np.array([], dtype=int)
|
|
184
|
+
|
|
185
|
+
return order[: selected[-1] + 1]
|
|
186
|
+
|
|
187
|
+
def eBH(evals, q):
|
|
188
|
+
"""Applies the base e-BH procedure to a list of e-values.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
evals (array-like): List or array of e-values.
|
|
192
|
+
q (float): The nominal False Discovery Rate (FDR) level.
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
np.ndarray: The indices forming the rejection set.
|
|
196
|
+
"""
|
|
197
|
+
return BH(np.divide(1.0, evals, np.full_like(evals, np.inf), where=(evals != 0)), q)
|
|
198
|
+
|
|
199
|
+
def eval_MDR(L, R, sel):
|
|
200
|
+
"""Evaluates selection performance for risk and power in the MDR sense.
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
L (np.ndarray): The true loss corresponding to every instance.
|
|
204
|
+
R (np.ndarray): The true rewards for each instance.
|
|
205
|
+
sel (array-like): The selection set generated by the test procedure.
|
|
206
|
+
|
|
207
|
+
Returns:
|
|
208
|
+
tuple: (risk_acc, reward_acc) indicating the MDR risk and cumulative reward.
|
|
209
|
+
"""
|
|
210
|
+
if len(sel) == 0:
|
|
211
|
+
return 0, 0
|
|
212
|
+
risk_acc = np.sum(L[sel]) / len(L)
|
|
213
|
+
reward_acc = np.sum(R[sel])
|
|
214
|
+
return risk_acc, reward_acc
|
|
215
|
+
|
|
216
|
+
def eval_SDR(L, R, sel):
|
|
217
|
+
"""Evaluates selection performance for risk and power in the SDR sense.
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
L (np.ndarray): The true loss corresponding to every instance.
|
|
221
|
+
R (np.ndarray): The true rewards for each instance.
|
|
222
|
+
sel (array-like): The selection set generated by the test procedure.
|
|
223
|
+
|
|
224
|
+
Returns:
|
|
225
|
+
tuple: (sdr, bin_power, reward) corresponding to SDR, binary power (equivalent to power in the binary loss case), and reward metrics.
|
|
226
|
+
"""
|
|
227
|
+
if len(sel) == 0:
|
|
228
|
+
return 0, 0, 0
|
|
229
|
+
true_rej = len(L) - np.sum(L) # number of zeros in L
|
|
230
|
+
sdr = np.sum(L[sel]) / len(sel)
|
|
231
|
+
bin_power = (len(sel) - np.sum(L[sel])) / true_rej if true_rej != 0 else 0 # defined only for 0-1 loss
|
|
232
|
+
reward = np.sum(R[sel])
|
|
233
|
+
return sdr, bin_power, reward
|
|
234
|
+
|
|
235
|
+
class Lpredictor:
|
|
236
|
+
"""Encapsulates a target predictor and a loss mapping into an expected loss predictor.
|
|
237
|
+
|
|
238
|
+
Acts as a wrapper returning the loss via its `.predict()` method directly.
|
|
239
|
+
"""
|
|
240
|
+
def __init__(self, Ypred, loss_fn):
|
|
241
|
+
self.Ypred = Ypred
|
|
242
|
+
self.loss_fn = loss_fn
|
|
243
|
+
|
|
244
|
+
def predict(self, X):
|
|
245
|
+
Y_hat = self.Ypred.predict(X)
|
|
246
|
+
return self.loss_fn(Y_hat, X)
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: score-select
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Conformal selective prediction with general risk control.
|
|
5
|
+
Author-email: Tian Bai <20.t.bai.05@gmail.com>, Ying Jin <yjinstat@wharton.upenn.edu>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Repository, https://github.com/Tian-Bai/SCoRE
|
|
8
|
+
Project-URL: Paper, https://arxiv.org/abs/2603.24704
|
|
9
|
+
Keywords: conformal prediction,selective prediction,risk control
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Science/Research
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Mathematics
|
|
21
|
+
Requires-Python: >=3.9
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: numpy>=1.23
|
|
25
|
+
Provides-Extra: experiments
|
|
26
|
+
Requires-Dist: pandas>=1.5; extra == "experiments"
|
|
27
|
+
Requires-Dist: scikit-learn>=1.1; extra == "experiments"
|
|
28
|
+
Requires-Dist: scipy>=1.9; extra == "experiments"
|
|
29
|
+
Requires-Dist: tqdm>=4.64; extra == "experiments"
|
|
30
|
+
Provides-Extra: dev
|
|
31
|
+
Requires-Dist: build>=1.2; extra == "dev"
|
|
32
|
+
Requires-Dist: pytest>=7; extra == "dev"
|
|
33
|
+
Requires-Dist: twine>=5; extra == "dev"
|
|
34
|
+
Dynamic: license-file
|
|
35
|
+
|
|
36
|
+
# SCoRE
|
|
37
|
+
|
|
38
|
+
SCoRE implements conformal selective prediction procedures for marginal
|
|
39
|
+
deployment risk (MDR) and selective deployment risk (SDR) control.
|
|
40
|
+
|
|
41
|
+
This repository also contains the simulation and application code used for the
|
|
42
|
+
paper [Conformal Selective Prediction with General Risk Control](https://arxiv.org/abs/2603.24704).
|
|
43
|
+
|
|
44
|
+
## Installation
|
|
45
|
+
|
|
46
|
+
Install the package from a local checkout:
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
python -m pip install -e .
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
Install optional dependencies for the research scripts:
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
python -m pip install -e ".[experiments]"
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
After the package is published, install it with:
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
python -m pip install score-select
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Quickstart
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
import numpy as np
|
|
68
|
+
from SCoRE import SCoRE_MDR, SCoRE_SDR
|
|
69
|
+
|
|
70
|
+
lcalib = np.array([0, 1, 0, 1])
|
|
71
|
+
scalib = np.array([0.1, 0.4, 0.2, 0.8])
|
|
72
|
+
stest = np.array([0.15, 0.5, 0.9])
|
|
73
|
+
|
|
74
|
+
dcalib = (lcalib, scalib)
|
|
75
|
+
dtest = stest
|
|
76
|
+
|
|
77
|
+
mdr_selected = SCoRE_MDR(dcalib, dtest, alpha=0.5, gamma=0.5)
|
|
78
|
+
sdr_selected = SCoRE_SDR(dcalib, dtest, alpha=0.5, gamma=0.5)
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Functions return NumPy integer index arrays, so selections can be used directly
|
|
82
|
+
to index NumPy arrays.
|
|
83
|
+
|
|
84
|
+
When using randomized pruning, pass `random_state` for reproducible results:
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
selected = SCoRE_SDR(
|
|
88
|
+
dcalib,
|
|
89
|
+
dtest,
|
|
90
|
+
alpha=0.5,
|
|
91
|
+
gamma=1.0,
|
|
92
|
+
prune="hete",
|
|
93
|
+
random_state=123,
|
|
94
|
+
)
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## Public API
|
|
98
|
+
|
|
99
|
+
The top-level package exports the main procedures and utilities:
|
|
100
|
+
|
|
101
|
+
Recommended package entry points:
|
|
102
|
+
|
|
103
|
+
- `SCoRE_MDR`
|
|
104
|
+
- `SCoRE_SDR`
|
|
105
|
+
|
|
106
|
+
Additional utilities:
|
|
107
|
+
|
|
108
|
+
- `CS`
|
|
109
|
+
- `SCoRE_MDR_bf`, `SCoRE_MDR_w`, `SCoRE_SDR_w`
|
|
110
|
+
- `BH`, `eBH`
|
|
111
|
+
- `eval_MDR`, `eval_SDR`
|
|
112
|
+
- `loss_Jin2023`, `loss_1`, `loss_2`
|
|
113
|
+
- `gen_data_Jin2023`, `gen_data_1`, `gen_data_2`
|
|
114
|
+
- `Lpredictor`
|
|
115
|
+
|
|
116
|
+
## Repository Layout
|
|
117
|
+
|
|
118
|
+
- `SCoRE/`: installable Python package
|
|
119
|
+
- `tests/`: package tests
|
|
120
|
+
- `applications/`: real-data applications
|
|
121
|
+
- `applications/drug/`: efficient, cost-aware drug discovery
|
|
122
|
+
- `applications/icu/`: clinical prediction error management
|
|
123
|
+
- `applications/llm/`: flexible LLM abstention
|
|
124
|
+
- `simulation/`: simulation experiments
|
|
125
|
+
- `simulation_w/`: simulation experiments with covariate shifts
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
SCoRE/SCoRE.py,sha256=k5fXzFSJc1Z2gDJtLk9wI4l8RpRPxoVfH_2n0NHaBo4,20148
|
|
2
|
+
SCoRE/__init__.py,sha256=mmKP3rku_aJiinVXYZZkGDb59U78d0djtmhjyMuN3rI,662
|
|
3
|
+
SCoRE/utility.py,sha256=m3BoIqcJu8FFvXyYJpPFzlMt7dSMKM8_iwQnfmFXn5o,9466
|
|
4
|
+
score_select-0.1.1.dist-info/licenses/LICENSE,sha256=SYINFQb7BU86dVJs0T25JhoG-4wH7LmMU5I5tP5XEAE,1107
|
|
5
|
+
score_select-0.1.1.dist-info/METADATA,sha256=iKFB6_mRYi6RzmlUZzgW90RuwN-gbSFwGSgn6B3a_gc,3717
|
|
6
|
+
score_select-0.1.1.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
7
|
+
score_select-0.1.1.dist-info/top_level.txt,sha256=tQ8qYOEVtrqK2raAkVMXWvJs75exBWDMZ8NiN1DTCS0,6
|
|
8
|
+
score_select-0.1.1.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
The MIT License (MIT)
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Tian Bai and Ying Jin
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
|
13
|
+
all copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21
|
+
THE SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
SCoRE
|