score-select 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
SCoRE/SCoRE.py ADDED
@@ -0,0 +1,497 @@
1
+ import numpy as np
2
+ from .utility import BH, eBH
3
+
4
+ # implementation of SCoRE procedures
5
+
6
+ def _uniform_random(random_state, size=None):
7
+ if random_state is None:
8
+ return np.random.uniform(0, 1, size)
9
+ if isinstance(random_state, np.random.Generator):
10
+ return random_state.uniform(0, 1, size)
11
+ return np.random.default_rng(random_state).uniform(0, 1, size)
12
+
13
+
14
+ def _as_index_array(sel):
15
+ return np.asarray(sel, dtype=int)
16
+
17
+
18
+ def _as_1d_array(name, values):
19
+ arr = np.asarray(values)
20
+ if arr.ndim != 1:
21
+ raise ValueError(f"{name} must be a one-dimensional array.")
22
+ return arr
23
+
24
+
25
+ def _split_calib(Dcalib):
26
+ if not isinstance(Dcalib, (tuple, list)) or len(Dcalib) != 2:
27
+ raise ValueError("Dcalib must be a tuple or list of losses and scores (Lcalib, Scalib).")
28
+
29
+ Lcalib = _as_1d_array("Lcalib", Dcalib[0])
30
+ Scalib = _as_1d_array("Scalib", Dcalib[1])
31
+ if len(Lcalib) != len(Scalib):
32
+ raise ValueError("The losses and scores (Lcalib, Scalib) must have the same length.")
33
+ return Lcalib, Scalib
34
+
35
+
36
+ def _is_legacy_dtest(Dtest):
37
+ if not isinstance(Dtest, (tuple, list)) or len(Dtest) != 2:
38
+ return False
39
+ if np.ndim(Dtest[1]) == 0:
40
+ return False
41
+ return Dtest[0] is None or np.ndim(Dtest[0]) > 0
42
+
43
+
44
+ def _get_stest(Dtest):
45
+ if _is_legacy_dtest(Dtest):
46
+ Dtest = Dtest[1]
47
+ return _as_1d_array("Dtest", Dtest)
48
+
49
+
50
+ def _validate_binary_loss(Lcalib):
51
+ if not np.all(np.isin(Lcalib, [0, 1])):
52
+ raise ValueError("Conformal selection requires binary calibration losses in {0, 1}.")
53
+
54
+
55
+ def _validate_alpha(alpha):
56
+ alpha = float(alpha)
57
+ if not np.isfinite(alpha) or alpha <= 0 or alpha > 1:
58
+ raise ValueError("alpha must be in (0, 1]")
59
+ return alpha
60
+
61
+
62
+ def _validate_gamma(gamma):
63
+ gamma = float(gamma)
64
+ if not np.isfinite(gamma) or gamma < 0 or gamma > 1:
65
+ raise ValueError("gamma must be in [0, 1]")
66
+ return gamma
67
+
68
+
69
+ def _validate_prune(prune):
70
+ if prune not in (None, "hete", "homo"):
71
+ raise ValueError("prune must be one of None, 'hete', or 'homo'")
72
+ return prune
73
+
74
+
75
+ def CS(Dcalib, Dtest, alpha, mult_test=True, return_pvals=False):
76
+ """Conformal Selection (CS) procedure for binary losses that controls the marginal deployment risk (MDR) or selective deployment risk (SDR).
77
+ Here, MDR reduces to the average type-I error and SDR reduces to the usual false discovery rate (FDR).
78
+
79
+ The function applies only when the loss function evaluates strictly to {0,1}.
80
+
81
+ Args:
82
+ Dcalib (tuple): A tuple containing losses and scores (Lcalib, Scalib) for the calibration set.
83
+ Dtest (array-like): Test scores Stest. A legacy tuple/list (ignored, Stest) is also accepted.
84
+ alpha (float): The target error margin.
85
+ mult_test (bool): Whether to perform multiple testing correction using the Benjamini-Hochberg (BH) procedure. If False, MDR is controlled; otherwise SDR is controlled.
86
+ return_pvals (bool): If True, returns the calculated p-values alongside the selected indices.
87
+
88
+ Returns:
89
+ Union[np.ndarray, tuple]: Selected indices, or (selected indices, p-values) if return_pvals is True.
90
+ """
91
+ alpha = _validate_alpha(alpha)
92
+ Lcalib, Scalib = _split_calib(Dcalib)
93
+ _validate_binary_loss(Lcalib)
94
+ Stest = _get_stest(Dtest)
95
+ Ncalib, Ntest = len(Scalib), len(Stest)
96
+
97
+ calib_scores = 1000 * (Lcalib == 0) + Scalib
98
+ test_scores = Stest
99
+
100
+ pvals = np.zeros(Ntest)
101
+ for j in range(Ntest):
102
+ pvals[j] = (1 + np.sum(calib_scores <= test_scores[j])) / (Ncalib + 1)
103
+
104
+ if mult_test:
105
+ sel = BH(pvals, alpha)
106
+ else:
107
+ sel = np.flatnonzero(pvals <= alpha)
108
+
109
+ if not return_pvals:
110
+ return _as_index_array(sel)
111
+ return sel, pvals
112
+
113
+ def SCoRE_MDR_bf(Dcalib, Dtest, alpha, gamma, return_evals=False):
114
+ """Brute-force algorithm for SCoRE testing with Marginal Deployment Risk (MDR) control. The algorithm manually search for a suitable cutoff t.
115
+ Compared to SCoRE_MDR, this brute-force computation enables computing the SCoRE e-values explicitly.
116
+
117
+ Args:
118
+ Dcalib (tuple): A tuple containing losses and scores (Lcalib, Scalib) for the calibration set.
119
+ Dtest (array-like): Test scores Stest. A legacy tuple/list (ignored, Stest) is also accepted.
120
+ alpha (float): The target error margin.
121
+ gamma (float): A tuning parameter spanning [0, 1]. Recommended value is gamma=alpha.
122
+ return_evals (bool): Whether to output the computed e-values. Defaults to False.
123
+
124
+ Returns:
125
+ Union[np.ndarray, tuple]: Selected indices, or (selected indices, e-values) if return_evals is True.
126
+ """
127
+ alpha = _validate_alpha(alpha)
128
+ gamma = _validate_gamma(gamma)
129
+ Lcalib, Scalib = _split_calib(Dcalib)
130
+ Stest = _get_stest(Dtest)
131
+ Ncalib, Ntest = len(Scalib), len(Stest)
132
+
133
+ M = list(np.concatenate([Scalib, Stest]))
134
+
135
+ def F(j, t, l):
136
+ return (np.sum(Lcalib * (Scalib <= t)) + l * (Stest[j] <= t)) / (Ncalib + 1)
137
+
138
+ def t_gamma(j, l):
139
+ max_t = -np.inf
140
+ for cur_t in M:
141
+ if F(j, cur_t, l) <= gamma:
142
+ max_t = max(max_t, cur_t)
143
+ return max_t
144
+
145
+ sel = []
146
+ evalues = np.zeros(Ntest)
147
+
148
+ for i_itr in range(Ntest):
149
+ evalue = np.inf
150
+ for l in [0, 1]:
151
+ t_l = t_gamma(i_itr, l)
152
+ num = (Ncalib + 1) * (Stest[i_itr] <= t_l)
153
+ denom = np.sum(Lcalib * (Scalib <= t_l)) + l * (Stest[i_itr] <= t_l)
154
+
155
+ evalue = min(evalue, num / denom)
156
+ evalues[i_itr] = evalue
157
+
158
+ phi = (evalue >= (1 / alpha))
159
+ if phi == 1:
160
+ sel.append(i_itr)
161
+
162
+ if not return_evals:
163
+ return _as_index_array(sel)
164
+ return _as_index_array(sel), evalues
165
+
166
+ def SCoRE_MDR(Dcalib, Dtest, alpha, gamma):
167
+ """SCoRE testing procedure with Marginal Deployment Risk (MDR) control, implemented using the computational shortcut. Note the e-values are not directly available with this shortcut.
168
+
169
+ Args:
170
+ Dcalib (tuple): A tuple containing losses and scores (Lcalib, Scalib) for the calibration set.
171
+ Dtest (array-like): Test scores Stest. A legacy tuple/list (ignored, Stest) is also accepted.
172
+ alpha (float): The target error margin.
173
+ gamma (float): A tuning parameter spanning [0, 1]. Recommended value is gamma=alpha.
174
+
175
+ Returns:
176
+ list: A list of selected instances with low risk and deemed safe to deploy.
177
+ """
178
+ alpha = _validate_alpha(alpha)
179
+ gamma = _validate_gamma(gamma)
180
+ Lcalib, Scalib = _split_calib(Dcalib)
181
+ Stest = _get_stest(Dtest)
182
+ Ncalib, Ntest = len(Scalib), len(Stest)
183
+
184
+ sel = []
185
+
186
+ for i_itr in range(Ntest):
187
+ phi = (1 + np.sum(Lcalib * (Scalib <= Stest[i_itr]))) / (Ncalib + 1) <= gamma
188
+
189
+ if gamma > alpha and phi == 1: # need to check the 2nd condition
190
+ M = list(np.concatenate([Scalib, Stest]))
191
+ for t in M:
192
+ upp = (1 + np.sum(Lcalib * (Scalib <= t))) / (Ncalib + 1)
193
+ low = upp - 1 / (Ncalib + 1)
194
+
195
+ # check whether (alpha, gamma] and [low, upp] overlap
196
+ if not ((upp <= alpha) or (low > gamma)): # overlap
197
+ phi = 0
198
+ break
199
+
200
+ if phi == 1: # selected
201
+ sel.append(i_itr)
202
+
203
+ return _as_index_array(sel)
204
+
205
+ def SCoRE_MDR_w(Dcalib, Dtest, wcalib, wtest, alpha, gamma):
206
+ """SCoRE testing procedure with Marginal Deployment Risk (MDR) control under the covariate shift case, implemented using the computational shortcut.
207
+
208
+ Args:
209
+ Dcalib (tuple): A tuple containing losses and scores (Lcalib, Scalib) for the calibration set.
210
+ Dtest (array-like): Test scores Stest. A legacy tuple/list (ignored, Stest) is also accepted.
211
+ wcalib (np.ndarray): The covariate shift weights for the calibration data.
212
+ wtest (np.ndarray): The covariate shift weights for the test data.
213
+ alpha (float): The target error margin.
214
+ gamma (float): A tuning parameter spanning [0, 1]. Recommended value is gamma=alpha.
215
+
216
+ Returns:
217
+ list: A list of selected instances with low risk and deemed safe to deploy.
218
+ """
219
+ alpha = _validate_alpha(alpha)
220
+ gamma = _validate_gamma(gamma)
221
+ Lcalib, Scalib = _split_calib(Dcalib)
222
+ Stest = _get_stest(Dtest)
223
+ wcalib = _as_1d_array("wcalib", wcalib)
224
+ wtest = _as_1d_array("wtest", wtest)
225
+ Ncalib, Ntest = len(Scalib), len(Stest)
226
+ if len(wcalib) != Ncalib:
227
+ raise ValueError("wcalib must have the same length as Lcalib and Scalib.")
228
+ if len(wtest) != Ntest:
229
+ raise ValueError("wtest must have the same length as Stest.")
230
+
231
+ sel = []
232
+
233
+ calib_w_sum = np.sum(wcalib)
234
+ for i_itr in range(Ntest):
235
+ phi = (wtest[i_itr] + np.sum(wcalib * Lcalib * (Scalib <= Stest[i_itr]))) / (wtest[i_itr] + calib_w_sum) <= gamma
236
+
237
+ if gamma > alpha and phi == 1: # need to check the 2nd condition
238
+ M = list(np.concatenate([Scalib, Stest]))
239
+ for t in M:
240
+ upp = (wtest[i_itr] + np.sum(wcalib * Lcalib * (Scalib <= t))) / (wtest[i_itr] + calib_w_sum)
241
+ low = upp - wtest[i_itr] / (wtest[i_itr] + calib_w_sum)
242
+
243
+ # check whether (alpha, gamma] and [low, upp] overlap
244
+ if not ((upp <= alpha) or (low > gamma)): # overlap
245
+ phi = 0
246
+ break
247
+
248
+ if phi == 1: # selected
249
+ sel.append(i_itr)
250
+
251
+ return _as_index_array(sel)
252
+
253
+ ######## SDR ########
254
+
255
+ def SCoRE_SDR(Dcalib, Dtest, alpha, gamma, prune=None, return_evals=False, random_state=None):
256
+ """SCoRE testing procedure for Selective Deployment Risk (SDR) control. Optimized implementation with time complexity $O(m(n+m) + (n+m)\\log(n+m))$.
257
+
258
+ Args:
259
+ Dcalib (tuple): losses and scores (Lcalib, Scalib) for the calibration set.
260
+ Dtest (array-like): Test scores Stest. A legacy tuple/list (ignored, Stest) is also accepted.
261
+ alpha (float): The target error margin.
262
+ gamma (float): A tuning parameter spanning [0, 1]. Recommended value is gamma=alpha.
263
+ prune (str, optional): Optional boosting strategy (either 'hete' or 'homo'). Use of 'homo' is generally recommended.
264
+ return_evals (bool, optional): Returns computed e-values if True.
265
+ random_state (int or np.random.Generator, optional): Random seed or generator used when pruning is enabled. Randomization is only needed for the boosting strategies.
266
+
267
+ Returns:
268
+ Union[list, tuple]: Selection set indices, or combined tuple depending on `return_evals`.
269
+ """
270
+ alpha = _validate_alpha(alpha)
271
+ gamma = _validate_gamma(gamma)
272
+ prune = _validate_prune(prune)
273
+ Lcalib, Scalib = _split_calib(Dcalib)
274
+ Stest = _get_stest(Dtest)
275
+ Ncalib, Ntest = len(Scalib), len(Stest)
276
+
277
+ Scalib_tagged = [(lp, l, 'calib') for lp, l in zip(Scalib, Lcalib)]
278
+ Stest_tagged = [(lp, 0, 'test') for lp in Stest] # 0 is dummy value
279
+
280
+ M_tagged = Scalib_tagged + Stest_tagged
281
+ M_tagged.sort()
282
+
283
+ M = np.array([a[0] for a in M_tagged])
284
+
285
+ evalues = np.zeros(Ntest)
286
+
287
+ # some intermediate prefix sums
288
+ NUMER = np.zeros(Ncalib + Ntest) # for np.sum(Lcalib * (Scalib <= t)) with t being the i-th ranked value in M
289
+ DENOM = np.zeros(Ncalib + Ntest) # for 1 + np.sum(Stest <= t).
290
+ for i, (t, L, l_type) in enumerate(M_tagged):
291
+ NUMER[i] = (NUMER[i-1] if i != 0 else 0)
292
+ DENOM[i] = (DENOM[i-1] if i != 0 else 1)
293
+ if l_type == 'calib':
294
+ NUMER[i] += L
295
+ else:
296
+ DENOM[i] += 1
297
+
298
+ # above will have a bug when there are ties in M_tagged.
299
+ # for example, if M_tagged = [(0.5, 0, 'calib'), (0.5, 1, 'calib')], then NUMER[0] = 0, NUMER[1] = 1.
300
+ # But for t = 0.5, we should have NUMER = 1. So we need to correct for ties.
301
+ for i in range(len(M_tagged) - 2, -1, -1):
302
+ if M_tagged[i][0] == M_tagged[i+1][0]:
303
+ NUMER[i] = NUMER[i+1]
304
+ DENOM[i] = DENOM[i+1]
305
+
306
+ for j in range(Ntest):
307
+ # we precompute all FR, t_gamma, and ell
308
+ FR_0 = np.zeros(Ncalib + Ntest)
309
+ FR_1 = np.zeros(Ncalib + Ntest)
310
+
311
+ ELL = np.zeros(Ncalib + Ntest)
312
+
313
+ # pairs of (i, t)
314
+ t_0, t_1 = (-1, -np.inf), (-1, -np.inf)
315
+
316
+ # compute FR and ell
317
+ for i, (t, _, _) in enumerate(M_tagged):
318
+ FR_0[i] = NUMER[i] / (DENOM[i] - (Stest[j] <= t)) / (Ncalib + 1) * Ntest
319
+ FR_1[i] = (NUMER[i] + (Stest[j] <= t)) / (DENOM[i] - (Stest[j] <= t)) / (Ncalib + 1) * Ntest
320
+
321
+ ELL[i] = (Ncalib + 1) * gamma / Ntest * (DENOM[i] - (Stest[j] <= t)) - NUMER[i]
322
+
323
+ # compute t_gamma. Also store the original ranking i
324
+ for i, t in enumerate(M):
325
+ if FR_0[i] <= gamma:
326
+ t_0 = (i, t)
327
+ if FR_1[i] <= gamma:
328
+ t_1 = (i, t)
329
+
330
+ if Stest[j] > t_1[1]:
331
+ continue # e-value is zero
332
+
333
+ if t_1[1] == t_0[1]:
334
+ evalues[j] = (Ncalib + 1) / (1 + NUMER[t_1[0]])
335
+ continue # same upper/lower bound case
336
+
337
+ max_ell = np.zeros(Ntest + Ncalib) # max_ell[rank(t)]: max of l(t') with t' > t, t' in M, and FR(t', 0) <= gamma.
338
+ # max_ell[0] correspond to the smallest t in M, max_ell[-1] correspond to the largest t in M.
339
+ last_max = -np.inf
340
+ for i, t in zip(range(Ntest + Ncalib - 1, -1, -1), reversed(M)): # n+m iterations
341
+ max_ell[i] = last_max
342
+
343
+ if FR_0[i] <= gamma:
344
+ last_max = max(last_max, ELL[i]) # both O(n+m)
345
+
346
+ M_star = [] # store pairs of (i, t)
347
+ for i, t in enumerate(M):
348
+ if t < max(Stest[j], t_1[1]):
349
+ continue # this is to keep the index i
350
+ if t > t_0[1]:
351
+ break
352
+
353
+ if FR_0[i] <= gamma and ELL[i] > max_ell[i]:
354
+ M_star.append((i, t))
355
+
356
+ evalue = np.inf
357
+ for i, t in M_star:
358
+ cur_val = (Ncalib + 1) / (ELL[i] + NUMER[i])
359
+ evalue = min(evalue, cur_val)
360
+
361
+ evalues[j] = evalue
362
+
363
+ if prune == 'hete':
364
+ evalues /= _uniform_random(random_state, len(evalues))
365
+ if prune == 'homo':
366
+ evalues /= _uniform_random(random_state)
367
+ sel = eBH(evalues, alpha)
368
+
369
+ if not return_evals:
370
+ return _as_index_array(sel)
371
+ return sel, evalues
372
+
373
+ def SCoRE_SDR_w(Dcalib, Dtest, wcalib, wtest, alpha, gamma, prune=None, return_evals=False, random_state=None):
374
+ """SCoRE testing procedure for Selective Deployment Risk (SDR) control under the covariate shift case. Optimized implementation with time complexity $O(m(n+m) + (n+m)\\log(n+m))$.
375
+
376
+ Args:
377
+ Dcalib (tuple): losses and scores (Lcalib, Scalib) for the calibration set.
378
+ Dtest (array-like): Test scores Stest. A legacy tuple/list (ignored, Stest) is also accepted.
379
+ wcalib (np.ndarray): The covariate shift weights for the calibration data.
380
+ wtest (np.ndarray): The covariate shift weights for the test data.
381
+ alpha (float): The target error margin.
382
+ gamma (float): A tuning parameter spanning [0, 1]. Recommended value is gamma=alpha.
383
+ prune (str, optional): Optional boosting strategy (either 'hete' or 'homo'). Use of 'homo' is generally recommended.
384
+ return_evals (bool, optional): Returns computed e-values if True.
385
+ random_state (int or np.random.Generator, optional): Random seed or generator used when pruning is enabled.
386
+
387
+ Returns:
388
+ Union[list, tuple]: Selection set indices, or combined tuple depending on `return_evals`.
389
+ """
390
+ alpha = _validate_alpha(alpha)
391
+ gamma = _validate_gamma(gamma)
392
+ prune = _validate_prune(prune)
393
+ Lcalib, Scalib = _split_calib(Dcalib)
394
+ Stest = _get_stest(Dtest)
395
+ wcalib = _as_1d_array("wcalib", wcalib)
396
+ wtest = _as_1d_array("wtest", wtest)
397
+ Ncalib, Ntest = len(Scalib), len(Stest)
398
+ if len(wcalib) != Ncalib:
399
+ raise ValueError("wcalib must have the same length as Lcalib and Scalib.")
400
+ if len(wtest) != Ntest:
401
+ raise ValueError("wtest must have the same length as Stest.")
402
+
403
+ Scalib_tagged = [(lp, l, w, 'calib') for lp, l, w in zip(Scalib, Lcalib, wcalib)]
404
+ Stest_tagged = [(lp, 0, w, 'test') for lp, w in zip(Stest, wtest)] # 0 is dummy value
405
+
406
+ M_tagged = Scalib_tagged + Stest_tagged
407
+ M_tagged.sort()
408
+
409
+ M = np.array([a[0] for a in M_tagged])
410
+
411
+ evalues = np.zeros(Ntest)
412
+
413
+ calib_w_sum = np.sum(wcalib)
414
+
415
+ # some intermediate prefix sums
416
+ NUMER = np.zeros(Ncalib + Ntest) # for np.sum(wcalib * Lcalib * (Scalib <= t)) with t being the i-th ranked value in M
417
+ DENOM = np.zeros(Ncalib + Ntest) # for 1 + np.sum(Stest <= t).
418
+ for i, (t, L, w, l_type) in enumerate(M_tagged):
419
+ NUMER[i] = (NUMER[i-1] if i != 0 else 0)
420
+ DENOM[i] = (DENOM[i-1] if i != 0 else 1)
421
+ if l_type == 'calib':
422
+ NUMER[i] += w * L
423
+ else:
424
+ DENOM[i] += 1
425
+
426
+ # Correction for ties
427
+ for i in range(len(M_tagged) - 2, -1, -1):
428
+ if M_tagged[i][0] == M_tagged[i+1][0]:
429
+ NUMER[i] = NUMER[i+1]
430
+ DENOM[i] = DENOM[i+1]
431
+
432
+ for j in range(Ntest):
433
+ # we precompute all FR, t_gamma, and ell
434
+ FR_0 = np.zeros(Ncalib + Ntest)
435
+ FR_1 = np.zeros(Ncalib + Ntest)
436
+
437
+ ELL = np.zeros(Ncalib + Ntest)
438
+
439
+ # pairs of (i, t)
440
+ t_0, t_1 = (-1, -np.inf), (-1, -np.inf)
441
+
442
+ # compute FR and ell
443
+ for i, (t, _, _, _) in enumerate(M_tagged):
444
+ FR_0[i] = NUMER[i] / (DENOM[i] - (Stest[j] <= t)) / (calib_w_sum + wtest[j]) * Ntest
445
+ FR_1[i] = (NUMER[i] + wtest[j] * (Stest[j] <= t)) / (DENOM[i] - (Stest[j] <= t)) / (calib_w_sum + wtest[j]) * Ntest
446
+
447
+ ELL[i] = (calib_w_sum + wtest[j]) / wtest[j] * gamma / Ntest * (DENOM[i] - (Stest[j] <= t)) - NUMER[i] / wtest[j]
448
+
449
+ # compute t_gamma. Also store the original ranking i
450
+ for i, t in enumerate(M):
451
+ if FR_0[i] <= gamma:
452
+ t_0 = (i, t)
453
+ if FR_1[i] <= gamma:
454
+ t_1 = (i, t)
455
+
456
+ if Stest[j] > t_1[1]:
457
+ continue # e-value is zero
458
+
459
+ if t_1[1] == t_0[1]:
460
+ evalues[j] = (calib_w_sum + wtest[j]) / (wtest[j] + NUMER[t_1[0]])
461
+ continue # same upper/lower bound case
462
+
463
+ max_ell = np.zeros(Ntest + Ncalib) # max_ell[rank(t)]: max of l(t') with t' > t, t' in M, and FR(t', 0) <= gamma.
464
+ # max_ell[0] correspond to the smallest t in M, max_ell[-1] correspond to the largest t in M.
465
+ last_max = -np.inf
466
+ for i, t in zip(range(Ntest + Ncalib - 1, -1, -1), reversed(M)): # n+m iterations
467
+ max_ell[i] = last_max
468
+
469
+ if FR_0[i] <= gamma:
470
+ last_max = max(last_max, ELL[i]) # both O(n+m)
471
+
472
+ M_star = [] # store pairs of (i, t)
473
+ for i, t in enumerate(M):
474
+ if t < max(Stest[j], t_1[1]):
475
+ continue # this is to keep the index i
476
+ if t > t_0[1]:
477
+ break
478
+
479
+ if FR_0[i] <= gamma and ELL[i] > max_ell[i]:
480
+ M_star.append((i, t))
481
+
482
+ evalue = np.inf
483
+ for i, t in M_star:
484
+ cur_val = (calib_w_sum + wtest[j]) / (wtest[j] * ELL[i] + NUMER[i])
485
+ evalue = min(evalue, cur_val)
486
+
487
+ evalues[j] = evalue
488
+
489
+ if prune == 'hete':
490
+ evalues /= _uniform_random(random_state, len(evalues))
491
+ if prune == 'homo':
492
+ evalues /= _uniform_random(random_state)
493
+ sel = eBH(evalues, alpha)
494
+
495
+ if not return_evals:
496
+ return _as_index_array(sel)
497
+ return sel, evalues
SCoRE/__init__.py ADDED
@@ -0,0 +1,46 @@
1
+ """Public API for SCoRE."""
2
+
3
+ from .SCoRE import (
4
+ CS,
5
+ SCoRE_MDR,
6
+ SCoRE_MDR_bf,
7
+ SCoRE_MDR_w,
8
+ SCoRE_SDR,
9
+ SCoRE_SDR_w,
10
+ )
11
+ from .utility import (
12
+ BH,
13
+ Lpredictor,
14
+ eBH,
15
+ eval_MDR,
16
+ eval_SDR,
17
+ gen_data_1,
18
+ gen_data_2,
19
+ gen_data_Jin2023,
20
+ loss_1,
21
+ loss_2,
22
+ loss_Jin2023,
23
+ )
24
+
25
+ __version__ = "0.1.1"
26
+
27
+ __all__ = [
28
+ "__version__",
29
+ "BH",
30
+ "CS",
31
+ "Lpredictor",
32
+ "SCoRE_MDR",
33
+ "SCoRE_MDR_bf",
34
+ "SCoRE_MDR_w",
35
+ "SCoRE_SDR",
36
+ "SCoRE_SDR_w",
37
+ "eBH",
38
+ "eval_MDR",
39
+ "eval_SDR",
40
+ "gen_data_1",
41
+ "gen_data_2",
42
+ "gen_data_Jin2023",
43
+ "loss_1",
44
+ "loss_2",
45
+ "loss_Jin2023",
46
+ ]
SCoRE/utility.py ADDED
@@ -0,0 +1,246 @@
1
+ import numpy as np
2
+
3
+
4
+ def _expit(x):
5
+ x = np.asarray(x, dtype=float)
6
+ out = np.empty_like(x, dtype=float)
7
+ positive = x >= 0
8
+ out[positive] = 1.0 / (1.0 + np.exp(-x[positive]))
9
+ exp_x = np.exp(x[~positive])
10
+ out[~positive] = exp_x / (1.0 + exp_x)
11
+ return out
12
+
13
+
14
+ def _get_rng(random_state):
15
+ if random_state is None:
16
+ return np.random
17
+ if isinstance(random_state, np.random.Generator):
18
+ return random_state
19
+ return np.random.default_rng(random_state)
20
+
21
+ def loss_Jin2023(Y, tau):
22
+ """Calculates the smoothened indicator loss function, similar to the data generation process in Jin and Candes (2023).
23
+
24
+ The loss function is of the form sigmoid(-tau * Y).
25
+
26
+ Args:
27
+ Y (np.ndarray): The target values.
28
+ tau (float): Hyperparameter for smoothing. Larger tau means closer to 1{Y <= 0}.
29
+ If tau = np.inf, it returns strictly 1{Y <= 0}.
30
+
31
+ Returns:
32
+ np.ndarray: The computed loss values.
33
+ """
34
+ if tau != np.inf:
35
+ return _expit(-Y * tau) # L = smoothened indicator of <= 0
36
+ else:
37
+ return (Y <= 0)
38
+
39
+ def gen_data_Jin2023(setting, n, sig, dim=20, random_state=None):
40
+ """Generates artificial data using the data generation process in Jin and Candes (2023).
41
+
42
+ Args:
43
+ setting (int): The data generation setting (1 or 2).
44
+ n (int): Number of samples to generate.
45
+ sig (float): Noise scaling factor.
46
+ dim (int, optional): Dimensionality of the feature space. Defaults to 20.
47
+ random_state (int or np.random.Generator, optional): Random seed or generator for reproducible samples.
48
+
49
+ Returns:
50
+ tuple: A tuple (X, mu_x, eps, Y) representing the generated data and components.
51
+ """
52
+ rng = _get_rng(random_state)
53
+
54
+ if setting == 1:
55
+ X = rng.uniform(low=-1, high=1, size=n*dim).reshape((n,dim))
56
+ mu_x = (X[:,0] * X[:,1] > 0) * (X[:,3] > 0.5) * (0.25 + X[:,3]) + (X[:,0] * X[:,1] <= 0) * (X[:,3] < -0.5) * (X[:,3] - 0.25)
57
+ eps = rng.normal(size=n) * (5.5 - abs(mu_x)) / 2 * sig
58
+ Y = mu_x + eps
59
+ return X, mu_x, eps, Y
60
+
61
+ if setting == 2:
62
+ X = rng.uniform(low=-1, high=1, size=n*dim).reshape((n,dim))
63
+ mu_x = (X[:,0] * X[:,1] + X[:,2] ** 2 + np.exp(X[:,3] - 1) - 1) * 2
64
+ eps = rng.normal(size=n) * (5.5 - abs(mu_x)) / 2 * sig
65
+ Y = mu_x + eps
66
+ return X, mu_x, eps, Y
67
+
68
+ raise ValueError("setting must be 1 or 2")
69
+
70
+ def loss_1(Y):
71
+ """Calculates the expected shortfall-like loss function.
72
+
73
+ The loss takes the form L(f, x, y) = y * 1{y > c}, evaluated against Y.
74
+
75
+ Args:
76
+ Y (np.ndarray): The target values.
77
+
78
+ Returns:
79
+ np.ndarray: The computed expected shortfall loss.
80
+ """
81
+ return 1/6 * Y * (Y > 2)
82
+
83
+ def gen_data_1(setting, n, sig, dim=20, random_state=None):
84
+ """Generates artificial data for the first case.
85
+
86
+ Args:
87
+ setting (int): The data generation setting (1 or 2).
88
+ n (int): Number of samples to generate.
89
+ sig (float): Noise scaling factor.
90
+ dim (int, optional): Dimensionality of the features. Defaults to 20.
91
+ random_state (int or np.random.Generator, optional): Random seed or generator for reproducible samples.
92
+
93
+ Returns:
94
+ tuple: A tuple (X, mu_x, eps, Y) containing the covariates and responses.
95
+ """
96
+ rng = _get_rng(random_state)
97
+
98
+ if setting == 1:
99
+ X = rng.uniform(low=-1, high=1, size=n*dim).reshape((n,dim))
100
+ mu_x = (X[:,0] * X[:,1] > 0) * (X[:,3] > 0.5) * (0.5 + X[:,3]) + (X[:,0] * X[:,1] <= 0) * (X[:,3] < -0.5) * (X[:,3] - 0.5) + 3 # now in (1.5, 4.5)
101
+ eps = np.clip(rng.normal(size=n) * sig * (5.5 - mu_x), -1.5, 1.5) # clip the noise to be in (-1.5, 1.5)
102
+ Y = mu_x + eps # (0, 6)
103
+ return X, mu_x, eps, Y
104
+
105
+ if setting == 2:
106
+ X = rng.uniform(low=-1, high=1, size=n*dim).reshape((n,dim))
107
+ mu_x = X[:,0] * X[:,1] + X[:,2] ** 2 + np.exp(X[:,3] - 1) + 2 # in (1, 5)
108
+ eps = np.clip(rng.normal(size=n) * sig * (6 - mu_x) * 0.5, -1, 1) # clip the noise to be in (-1, 1)
109
+ Y = mu_x + eps # (0, 6)
110
+ return X, mu_x, eps, Y
111
+
112
+ raise ValueError("setting must be 1 or 2")
113
+
114
+ def loss_2(Y, f, X, clip_const):
115
+ """Calculates clipped prediction error loss.
116
+
117
+ Evaluates loss of the form L(f, x, y) = (y - f(x))^2, clipped at clip_const.
118
+
119
+ Args:
120
+ Y (np.ndarray): The true target values.
121
+ f (object): The regression model to use for prediction (must have `.predict()`).
122
+ X (np.ndarray): The feature matrix to run predictions against.
123
+ clip_const (float): The clipping boundary, i.e., loss is in [0, clip_const].
124
+
125
+ Returns:
126
+ np.ndarray: The computed normalized prediction error loss over X.
127
+ """
128
+ return np.clip((Y - f.predict(X)) ** 2, 0, clip_const) / clip_const
129
+
130
+ def gen_data_2(setting, n, sig, dim=20, random_state=None):
131
+ """Generates artificial data for the second case.
132
+
133
+ Args:
134
+ setting (int): The data generation setting (1 or 2).
135
+ n (int): Number of samples to generate.
136
+ sig (float): Noise scaling factor.
137
+ dim (int, optional): Dimensionality. Defaults to 20.
138
+ random_state (int or np.random.Generator, optional): Random seed or generator for reproducible samples.
139
+
140
+ Returns:
141
+ tuple: A tuple (X, mu_x, eps, Y) with covariates and label values.
142
+ """
143
+ rng = _get_rng(random_state)
144
+
145
+ if setting == 1:
146
+ X = rng.uniform(low=-1, high=1, size=n*dim).reshape((n,dim))
147
+ mu_x = (X[:,0] * X[:,1] > 0) * (X[:,3] > 0.5) * (0.5 + X[:,3]) + (X[:,0] * X[:,1] <= 0) * (X[:,3] < -0.5) * (X[:,3] - 0.5) + 3 # now in (1.5, 4.5)
148
+ eps = np.clip(rng.normal(size=n) * sig * (5.5 - mu_x), -1.5, 1.5) # clip the noise to be in (-1.5, 1.5)
149
+ Y = mu_x + eps # (0, 6)
150
+ return X, mu_x, eps, Y
151
+
152
+ if setting == 2:
153
+ X = rng.uniform(low=-1, high=1, size=n*dim).reshape((n,dim))
154
+ mu_x = X[:,0] * X[:,1] + X[:,2] ** 2 + np.exp(X[:,3] - 1) + 2 # in (1, 5)
155
+ eps = np.clip(rng.normal(size=n) * sig * (6 - mu_x) * 0.5, -1, 1) # clip the noise to be in (-1, 1)
156
+ Y = mu_x + eps # (0, 6)
157
+ return X, mu_x, eps, Y
158
+
159
+ raise ValueError("setting must be 1 or 2")
160
+
161
+ def BH(pvals, q):
162
+ """Applies the Benjamini-Hochberg (BH) procedure to a list of p-values.
163
+
164
+ Args:
165
+ pvals (array-like): List or array of p-values.
166
+ q (float): The nominal False Discovery Rate (FDR) level.
167
+
168
+ Returns:
169
+ np.ndarray: The indices forming the rejection set.
170
+ """
171
+ pvals = np.asarray(pvals, dtype=float)
172
+ ntest = pvals.size
173
+
174
+ if ntest == 0:
175
+ return np.array([], dtype=int)
176
+
177
+ order = np.argsort(pvals, kind="mergesort")
178
+ sorted_pvals = pvals[order]
179
+ thresholds = q * np.arange(1, ntest + 1) / ntest
180
+ selected = np.flatnonzero(sorted_pvals <= thresholds)
181
+
182
+ if selected.size == 0:
183
+ return np.array([], dtype=int)
184
+
185
+ return order[: selected[-1] + 1]
186
+
187
+ def eBH(evals, q):
188
+ """Applies the base e-BH procedure to a list of e-values.
189
+
190
+ Args:
191
+ evals (array-like): List or array of e-values.
192
+ q (float): The nominal False Discovery Rate (FDR) level.
193
+
194
+ Returns:
195
+ np.ndarray: The indices forming the rejection set.
196
+ """
197
+ return BH(np.divide(1.0, evals, np.full_like(evals, np.inf), where=(evals != 0)), q)
198
+
199
+ def eval_MDR(L, R, sel):
200
+ """Evaluates selection performance for risk and power in the MDR sense.
201
+
202
+ Args:
203
+ L (np.ndarray): The true loss corresponding to every instance.
204
+ R (np.ndarray): The true rewards for each instance.
205
+ sel (array-like): The selection set generated by the test procedure.
206
+
207
+ Returns:
208
+ tuple: (risk_acc, reward_acc) indicating the MDR risk and cumulative reward.
209
+ """
210
+ if len(sel) == 0:
211
+ return 0, 0
212
+ risk_acc = np.sum(L[sel]) / len(L)
213
+ reward_acc = np.sum(R[sel])
214
+ return risk_acc, reward_acc
215
+
216
+ def eval_SDR(L, R, sel):
217
+ """Evaluates selection performance for risk and power in the SDR sense.
218
+
219
+ Args:
220
+ L (np.ndarray): The true loss corresponding to every instance.
221
+ R (np.ndarray): The true rewards for each instance.
222
+ sel (array-like): The selection set generated by the test procedure.
223
+
224
+ Returns:
225
+ tuple: (sdr, bin_power, reward) corresponding to SDR, binary power (equivalent to power in the binary loss case), and reward metrics.
226
+ """
227
+ if len(sel) == 0:
228
+ return 0, 0, 0
229
+ true_rej = len(L) - np.sum(L) # number of zeros in L
230
+ sdr = np.sum(L[sel]) / len(sel)
231
+ bin_power = (len(sel) - np.sum(L[sel])) / true_rej if true_rej != 0 else 0 # defined only for 0-1 loss
232
+ reward = np.sum(R[sel])
233
+ return sdr, bin_power, reward
234
+
235
+ class Lpredictor:
236
+ """Encapsulates a target predictor and a loss mapping into an expected loss predictor.
237
+
238
+ Acts as a wrapper returning the loss via its `.predict()` method directly.
239
+ """
240
+ def __init__(self, Ypred, loss_fn):
241
+ self.Ypred = Ypred
242
+ self.loss_fn = loss_fn
243
+
244
+ def predict(self, X):
245
+ Y_hat = self.Ypred.predict(X)
246
+ return self.loss_fn(Y_hat, X)
@@ -0,0 +1,125 @@
1
+ Metadata-Version: 2.4
2
+ Name: score-select
3
+ Version: 0.1.1
4
+ Summary: Conformal selective prediction with general risk control.
5
+ Author-email: Tian Bai <20.t.bai.05@gmail.com>, Ying Jin <yjinstat@wharton.upenn.edu>
6
+ License-Expression: MIT
7
+ Project-URL: Repository, https://github.com/Tian-Bai/SCoRE
8
+ Project-URL: Paper, https://arxiv.org/abs/2603.24704
9
+ Keywords: conformal prediction,selective prediction,risk control
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3 :: Only
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Classifier: Topic :: Scientific/Engineering :: Mathematics
21
+ Requires-Python: >=3.9
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Requires-Dist: numpy>=1.23
25
+ Provides-Extra: experiments
26
+ Requires-Dist: pandas>=1.5; extra == "experiments"
27
+ Requires-Dist: scikit-learn>=1.1; extra == "experiments"
28
+ Requires-Dist: scipy>=1.9; extra == "experiments"
29
+ Requires-Dist: tqdm>=4.64; extra == "experiments"
30
+ Provides-Extra: dev
31
+ Requires-Dist: build>=1.2; extra == "dev"
32
+ Requires-Dist: pytest>=7; extra == "dev"
33
+ Requires-Dist: twine>=5; extra == "dev"
34
+ Dynamic: license-file
35
+
36
+ # SCoRE
37
+
38
+ SCoRE implements conformal selective prediction procedures for marginal
39
+ deployment risk (MDR) and selective deployment risk (SDR) control.
40
+
41
+ This repository also contains the simulation and application code used for the
42
+ paper [Conformal Selective Prediction with General Risk Control](https://arxiv.org/abs/2603.24704).
43
+
44
+ ## Installation
45
+
46
+ Install the package from a local checkout:
47
+
48
+ ```bash
49
+ python -m pip install -e .
50
+ ```
51
+
52
+ Install optional dependencies for the research scripts:
53
+
54
+ ```bash
55
+ python -m pip install -e ".[experiments]"
56
+ ```
57
+
58
+ After the package is published, install it with:
59
+
60
+ ```bash
61
+ python -m pip install score-select
62
+ ```
63
+
64
+ ## Quickstart
65
+
66
+ ```python
67
+ import numpy as np
68
+ from SCoRE import SCoRE_MDR, SCoRE_SDR
69
+
70
+ lcalib = np.array([0, 1, 0, 1])
71
+ scalib = np.array([0.1, 0.4, 0.2, 0.8])
72
+ stest = np.array([0.15, 0.5, 0.9])
73
+
74
+ dcalib = (lcalib, scalib)
75
+ dtest = stest
76
+
77
+ mdr_selected = SCoRE_MDR(dcalib, dtest, alpha=0.5, gamma=0.5)
78
+ sdr_selected = SCoRE_SDR(dcalib, dtest, alpha=0.5, gamma=0.5)
79
+ ```
80
+
81
+ Functions return NumPy integer index arrays, so selections can be used directly
82
+ to index NumPy arrays.
83
+
84
+ When using randomized pruning, pass `random_state` for reproducible results:
85
+
86
+ ```python
87
+ selected = SCoRE_SDR(
88
+ dcalib,
89
+ dtest,
90
+ alpha=0.5,
91
+ gamma=1.0,
92
+ prune="hete",
93
+ random_state=123,
94
+ )
95
+ ```
96
+
97
+ ## Public API
98
+
99
+ The top-level package exports the main procedures and utilities:
100
+
101
+ Recommended package entry points:
102
+
103
+ - `SCoRE_MDR`
104
+ - `SCoRE_SDR`
105
+
106
+ Additional utilities:
107
+
108
+ - `CS`
109
+ - `SCoRE_MDR_bf`, `SCoRE_MDR_w`, `SCoRE_SDR_w`
110
+ - `BH`, `eBH`
111
+ - `eval_MDR`, `eval_SDR`
112
+ - `loss_Jin2023`, `loss_1`, `loss_2`
113
+ - `gen_data_Jin2023`, `gen_data_1`, `gen_data_2`
114
+ - `Lpredictor`
115
+
116
+ ## Repository Layout
117
+
118
+ - `SCoRE/`: installable Python package
119
+ - `tests/`: package tests
120
+ - `applications/`: real-data applications
121
+ - `applications/drug/`: efficient, cost-aware drug discovery
122
+ - `applications/icu/`: clinical prediction error management
123
+ - `applications/llm/`: flexible LLM abstention
124
+ - `simulation/`: simulation experiments
125
+ - `simulation_w/`: simulation experiments with covariate shifts
@@ -0,0 +1,8 @@
1
+ SCoRE/SCoRE.py,sha256=k5fXzFSJc1Z2gDJtLk9wI4l8RpRPxoVfH_2n0NHaBo4,20148
2
+ SCoRE/__init__.py,sha256=mmKP3rku_aJiinVXYZZkGDb59U78d0djtmhjyMuN3rI,662
3
+ SCoRE/utility.py,sha256=m3BoIqcJu8FFvXyYJpPFzlMt7dSMKM8_iwQnfmFXn5o,9466
4
+ score_select-0.1.1.dist-info/licenses/LICENSE,sha256=SYINFQb7BU86dVJs0T25JhoG-4wH7LmMU5I5tP5XEAE,1107
5
+ score_select-0.1.1.dist-info/METADATA,sha256=iKFB6_mRYi6RzmlUZzgW90RuwN-gbSFwGSgn6B3a_gc,3717
6
+ score_select-0.1.1.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
7
+ score_select-0.1.1.dist-info/top_level.txt,sha256=tQ8qYOEVtrqK2raAkVMXWvJs75exBWDMZ8NiN1DTCS0,6
8
+ score_select-0.1.1.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2026 Tian Bai and Ying Jin
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1 @@
1
+ SCoRE