cutpointpy 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cutpointpy/__init__.py ADDED
File without changes
cutpointpy/core.py ADDED
@@ -0,0 +1,457 @@
1
+ from abc import ABC, abstractmethod
2
+ from itertools import product
3
+ import numpy as np
4
+ from sklearn.model_selection import StratifiedShuffleSplit
5
+
6
+ from cutpointpy.utils import auc as area_under_curve,\
7
+ check_same_length, cm, cm_performance_metrics
8
+
9
+ class CutpointCalculator():
10
+ """
11
+ Performs optimal cut-point calculation and bootstrapping.
12
+ """
13
+
14
+ def __init__(self, target='youdenj', polarity=True,
15
+ interpolation=None, num_points=100):
16
+ """
17
+ Parameters
18
+ ----------
19
+ target : str
20
+ The target function to maximise or minimise.
21
+ Possible values:
22
+ 'youdenj' -> see `Youdenj` class documentation.
23
+ 'eucdist' -> see `Eucdist` class documentation.
24
+ polarity : bool
25
+ The direction of the inequality. If True (False) the
26
+ datapoints with feature value greater (less) than or equal
27
+ to the threshold are flagged as positive and the other ones
28
+ as negative.
29
+ interpolation : str [optional]
30
+ Interpolation method for generating the set of thresholds
31
+ to test. Possible values:
32
+ None -> No interpolation is used. The optimal
33
+ cut-point is chosen among the feature
34
+ values.
35
+ 'linear' -> Thresholds are generated by linear
36
+ interpolation between the min and max value
37
+ of `features`.
38
+ num_points : unsigned int [optional]
39
+ Number of thresholds generated. Has no effect if `interpolation`
40
+ is None.
41
+ """
42
+
43
+ match target:
44
+ case 'eucdist':
45
+ self.optimal_cpcalculator = Eucdist()
46
+ case 'youdenj':
47
+ self.optimal_cpcalculator = Youdenj()
48
+ case _:
49
+ raise ValueError(f'Target `{target}` not recognised.')
50
+
51
+ self.above = polarity
52
+ self.interpolation = interpolation
53
+ self.num_points = num_points
54
+
55
+ def find(self, features, labels):
56
+ """
57
+ Determine the optimal cut-point value of a predictor variable
58
+ (feature) for a binary classification task.
59
+
60
+ Parameters
61
+ ----------
62
+ features : array-like of numeric (n_samples)
63
+ Value of the predictor variable for each datapoint.
64
+ It is converted to an ndarray of float internally.
65
+ labels : array-like of numeric (n_samples)
66
+ Class label of each datapoint, where 0 indicates negative
67
+ and any other value positive. It is converted to an ndarray of
68
+ bool internally.
69
+
70
+ Returns
71
+ -------
72
+ cutpoint : float
73
+ The optimal cut-point value.
74
+ cutpoint_idx : int
75
+ The index corresponding to the optimal cut-point value
76
+ among the thresholds tested.
77
+ thresholds : ndarray of numeric (N,1)
78
+ The thresholds tested. N = len(features) if
79
+ `self.interpolation` is None, otherwise N = num_points.
80
+ acc : ndarray of numeric (N,1)
81
+ Accuracy as a function of the thresholds.
82
+ se : ndarray of numeric (N,1)
83
+ Sensitivity as a function of the thresholds.
84
+ sp : ndarray of numeric (N,1)
85
+ Specificity as a function of the thresholds.
86
+ auc : float
87
+ The area under the receiver-operating characteristic (ROC)
88
+ curve.
89
+
90
+ References
91
+ ----------
92
+ 1. Hassanzad and Haijan-Tilaki (2024)
93
+ doi:10.1186/s12874-024-02198-2
94
+ """
95
+
96
+ if not check_same_length(features, labels):
97
+ raise ValueError('There should be as many features as '
98
+ 'labels')
99
+
100
+ #Labels and features: cast and reshape
101
+ features, labels = self._cast_and_reshape(features, labels)
102
+
103
+ #Sort datapoints in ascending order by feature value
104
+ sorted_idxs = np.argsort(features, axis=0)
105
+ features, labels =\
106
+ [np.take_along_axis(x, sorted_idxs, axis=0) for x in
107
+ [features, labels]]
108
+
109
+ #Define the set of thresholds to test
110
+ if self.interpolation:
111
+
112
+ lingrid = np.linspace(start=np.min(features[:, 0]),
113
+ stop=np.max(features[:, 0]),
114
+ num=self.num_points)
115
+
116
+ match self.interpolation:
117
+ case 'linear':
118
+ thresholds = np.interp(
119
+ x=lingrid, xp=features[:, 0], fp=features[:, 0])
120
+ case _:
121
+ raise ValueError(f'Interpolation "{interpolation}" '
122
+ f'not recognised')
123
+
124
+ thresholds.shape = (thresholds.size, 1)
125
+ else:
126
+ thresholds = features
127
+
128
+ acc, se, sp, cutpoint, cutpoint_idx, auc =\
129
+ self._test_cutoff_values(
130
+ features=features,
131
+ labels=labels,
132
+ thresholds=thresholds,
133
+ )
134
+
135
+ return cutpoint, cutpoint_idx, thresholds, acc, se, sp, auc
136
+
137
+ def bootstrap(self, features, labels, method='sss', train_ratio=0.7,
138
+ num_reps=30, random_state=0):
139
+ """
140
+ Compute/validate optimal cut-point through bootstrapping.
141
+
142
+ Parameters
143
+ ----------
144
+ features : array-like of numeric (n_samples)
145
+ Value of the predictor variable for each datapoint. It is
146
+ converted to ndarray of float internally.
147
+ labels : array-like of numeric (n_samples)
148
+ Class label of each datapoint, where 0 indicates negative
149
+ and any other value positive. It is converted to ndarray of
150
+ bool internally.
151
+ method : str
152
+ The strategy for generating the bootstratp repetitions-i.e.,
153
+ the subdivisions of the original data into train (in-bag
154
+ data) and test set (out-of-bag data).
155
+ Possible values:
156
+ `sss` -> Stratified shuffle split
157
+ train_ratio : float [0.0, 1.0]
158
+ The proportion of the original data used to generate the
159
+ train set at each repetition.
160
+ num_reps : int
161
+ Number of bootstrap repetitions (i.e., number of
162
+ subdivisions into train and test set).
163
+ random_state : int
164
+ Controls the randomness of the repetitions produced. Pass
165
+ an int for reproducible output across multiple function
166
+ calls, None for non-reproducible output.
167
+
168
+ Returns
169
+ -------
170
+ cutpoints : ndarray of float (num_reps, 1)
171
+ The optimal cut-point value for each repetition estimated
172
+ on the train set.
173
+ cutpoints_idxs : ndarray of float (num_reps, 1)
174
+ For each repetition the index of the `thresholds` array that
175
+ corresponds to the optimal cut-point.
176
+ thresholds : ndarray of float (num_reps, N)
177
+ The values of the thresholds tested on the train set at each
178
+ repetition. Element [i,j] of the matrix represents the j-th
179
+ threshold value for repetition i.
180
+ N = floor(len(`features`) * `train_ratio`) if
181
+ `self.interpolation` is None, otherwise N =
182
+ `self.num_points`.
183
+ accs : ndarray of float (num_reps, N)
184
+ Accuracy on the train set for each repetition and threshold
185
+ value tested.
186
+ ses : ndarray of float (num_reps, N)
187
+ Sensitivity on the train set for each repetition and
188
+ threshold value tested.
189
+ sps : ndarray of float (num_reps, N)
190
+ Specificity on the train set for each repetition and
191
+ threshold value tested.
192
+ aucs_train : ndarray of float (num_reps, 1)
193
+ The area under the curve estimated on the train set for each
194
+ repetition .
195
+ aucs_test : ndarray of float (num_reps, 1)
196
+ The area under the curve estimated on the test set for each
197
+ repetition .
198
+ performance_train : ndarray of float (num_reps, 3)
199
+ For each repetition, in column-wise order, respectively
200
+ accuracy, sensitivity and specificity yielded by the optimal
201
+ cut-point value when applied to the train set.
202
+ performance_test : ndarray of float (num_reps, 3)
203
+ For each repetition, in column-wise order, respectively
204
+ accuracy, sensitivity and specificity yielded by the optimal
205
+ cut-point value when applied to the test set.
206
+ performance_whole : ndarray of float (num_reps, 3)
207
+ For each repetition, in column-wise order, respectively
208
+ accuracy, sensitivity and specificity yielded by the optimal
209
+ cut-point value when applied to the whole dataset.
210
+ """
211
+
212
+ features, labels = self._cast_and_reshape(features, labels)
213
+
214
+ #===============================================================
215
+ #================ Initialise the output ========================
216
+ #===============================================================
217
+ cutpoints, cutpoints_idxs, aucs_train, aucs_test =\
218
+ [np.zeros(shape=(num_reps,1), dtype=float) for _ in range(4)]
219
+ performance_train, performance_test, performance_whole =\
220
+ [np.zeros(shape=(num_reps,3), dtype=float) for _ in range(3)]
221
+
222
+ if self.interpolation:
223
+ num_thresholds = self.num_points
224
+ else:
225
+ num_thresholds = np.floor(
226
+ len(features) * train_ratio,
227
+ dtype=int, casting='unsafe'
228
+ )
229
+
230
+ thresholds, accs, ses, sps = \
231
+ [np.zeros(shape=(num_reps, num_thresholds), dtype=float)
232
+ for _ in range(4)]
233
+ #===============================================================
234
+ #===============================================================
235
+ #===============================================================
236
+
237
+
238
+ match method:
239
+ case 'sss':
240
+ splitter = StratifiedShuffleSplit(
241
+ n_splits=num_reps,
242
+ train_size=train_ratio,
243
+ random_state=random_state
244
+ )
245
+ case _:
246
+ raise ValueError(f'Unrecognised method `{method}`.')
247
+
248
+ for split_idx, (train_idxs, test_idxs) in enumerate(
249
+ splitter.split(X=features, y=labels)):
250
+
251
+ train_features, test_features, train_labels, test_labels = \
252
+ [container[idxs] for container, idxs in
253
+ product((features, labels), (train_idxs, test_idxs))]
254
+
255
+ #===========================================================
256
+ #==== Return values computed on the train (in bag) data ====
257
+ #===========================================================
258
+ cutpoint, cutpoint_idx, thresholds_, acc, se, sp, auc =\
259
+ self.find(train_features, train_labels)
260
+
261
+ for out_, in_ in zip((cutpoints, cutpoints_idxs),
262
+ (cutpoint, cutpoint_idx)):
263
+ out_[split_idx] = in_
264
+
265
+ for out_, in_ in zip((thresholds, accs, ses, sps),
266
+ (thresholds_, acc, se, sp)):
267
+ out_[split_idx,:] = in_.flat
268
+
269
+ for i, in_ in enumerate([acc, se, sp]):
270
+ performance_train[split_idx, i] = in_[cutpoint_idx,0]
271
+
272
+ aucs_train[split_idx,0] = auc
273
+ #===========================================================
274
+ #===========================================================
275
+ #===========================================================
276
+
277
+ #Compute auc on the test set
278
+ _, _, _, _, _, _, aucs_test[split_idx,0] =\
279
+ self.find(test_features, test_labels)
280
+
281
+ #Compute performance parameters on the test set using the
282
+ #cut-point value estimated on the train set
283
+ acc, se, sp, _, _, _ = self._test_cutoff_values(
284
+ features=test_features,
285
+ labels=test_labels,
286
+ thresholds = np.array(cutpoint, ndmin=2),
287
+ )
288
+ for i, in_ in enumerate([acc, se, sp]):
289
+ performance_test[split_idx, i] = in_[0,0]
290
+
291
+ #Compute performance parameters on the whole dataset using
292
+ #the cut-point value estimated on the train set
293
+ acc, se, sp, _, _, _ = self._test_cutoff_values(
294
+ features=features,
295
+ labels=labels,
296
+ thresholds = np.array(cutpoint, ndmin=2),
297
+ )
298
+ for i, in_ in enumerate([acc, se, sp]):
299
+ performance_whole[split_idx, i] = in_[0,0]
300
+
301
+ return cutpoints, cutpoints_idxs, thresholds, accs, ses, sps,\
302
+ aucs_train, aucs_test, performance_train,\
303
+ performance_test, performance_whole
304
+
305
+ def _test_cutoff_values(self, features, labels, thresholds):
306
+ """
307
+ Classification performance of a set of cut-off values
308
+ (thresholds) when applied to a predictor variable (feature) for
309
+ a binary classification task.
310
+
311
+ Parameters
312
+ ----------
313
+ features : ndarray of float (n_features, 1)
314
+ Value of the predictor variable for each datapoint.
315
+ labels : ndarray of bool (n_features, 1)
316
+ Class label of each datapoint, where True indicates the
317
+ positive class.
318
+ thresholds : ndarray of float (n_thresholds, 1)
319
+ The cut-off values to be tested, sorted in ascending order.
320
+
321
+ Returns
322
+ -------
323
+ acc, se, sp : ndarrays of float, each of shape (n_thresholds, 1)
324
+ Accuracy, sensitivity and pecificity as a function of the
325
+ cut-off value.
326
+ cutpoint : float
327
+ The optimal cut-off value.
328
+ cutpoint_idx : int [0, (n_thresholds - 1)]
329
+ Index of the optimal cut-off value.
330
+ auc : float
331
+ The area under the curve.
332
+ """
333
+
334
+ #Reshape features and thresholds for vectorisation.
335
+ #Prepend 'r_' to indicate the reshaped versions
336
+ r_features = np.tile(features.T, reps=(thresholds.size, 1))
337
+ r_thresholds = np.tile(thresholds, reps=(1, features.size))
338
+
339
+ #Predict labels as a function of threshold
340
+ if self.above:
341
+ predicted = (r_features >= r_thresholds)
342
+ else:
343
+ predicted = (r_features <= r_thresholds)
344
+
345
+ #Compute sensitivity and specificity as a function of threshold
346
+ confmat = cm(predicted=predicted,
347
+ target=np.tile(labels.T, reps=(thresholds.size, 1)))
348
+ acc, se, sp = cm_performance_metrics(confmat)
349
+
350
+ #Compute optimal cutpoint
351
+ cutpoint, cutpoint_idx = self.optimal_cpcalculator.find(
352
+ thresholds=thresholds, se=se, sp=sp
353
+ )
354
+
355
+ #Compute AUC
356
+ auc = area_under_curve(se=se.T, sp=sp.T).flatten()[0]
357
+
358
+ return acc, se, sp, cutpoint, cutpoint_idx, auc
359
+
360
+ @staticmethod
361
+ def _cast_and_reshape(features, labels):
362
+ """
363
+ Convert and reshape features and labels to ndarray.
364
+
365
+ Parameters
366
+ ----------
367
+ features : array-like of numeric (n_samples)
368
+ Value of the predictor variable for each datapoint.
369
+ labels : array-like of numeric (n_samples)
370
+ Class label of each datapoint, where 0 indicates negative
371
+ and any other value positive.
372
+
373
+ Returns
374
+ -------
375
+ features : ndarray of float (n_samples, 1)
376
+ Cast and reshaped features.
377
+ labels : ndarray of bool (n_samples, 1)
378
+ Cast and reshaped labels.
379
+ """
380
+
381
+ check_same_length(features, labels)
382
+
383
+ features = np.array(features, dtype=float, ndmin=2)
384
+ labels = np.array(labels, dtype=bool, ndmin=2)
385
+
386
+ for item in [features, labels]:
387
+ item.shape = (features.size, 1)
388
+
389
+ return features, labels
390
+
391
+
392
+
393
+ class OptimalCutpointCalculator(ABC):
394
+ """
395
+ Encapsulates the target function to maximise or minimise.
396
+ """
397
+
398
+ @staticmethod
399
+ @abstractmethod
400
+ def find(thresholds, se, sp):
401
+ """
402
+ Determine the optimal cut-point given sensitivity and specificity as a
403
+ function of the thresholds tested.
404
+
405
+ Parameters
406
+ ----------
407
+ thresholds : ndarray of float (N,1)
408
+ The set of thresholds to test, sorted from smallest to
409
+ largest.
410
+ se : ndarray of float (N,1)
411
+ Sensitivity as a function of the threshold.
412
+ sp : ndarray of float (N,1)
413
+ Specificity as a function of the threshold.
414
+
415
+ Returns
416
+ -------
417
+ cutpoint : numeric
418
+ The optimal cut-point value.
419
+ cutpoint_idx : int
420
+ The index corresponding to optimal cut-point value.
421
+ """
422
+ pass
423
+
424
+ class Youdenj(OptimalCutpointCalculator):
425
+ """
426
+ Maximises Youden’s J - i.e.: Sensitivity + Specificity - 1.
427
+ """
428
+
429
+ @staticmethod
430
+ def find(thresholds, se, sp):
431
+ """
432
+ Note
433
+ ----
434
+ If the maximum occurrs on multiple values of threshold the first
435
+ (smallest) occurrence is returned.
436
+ """
437
+ target_values = se + sp - 1
438
+ idx = np.argmax(target_values, axis=0).flatten()[0]
439
+ return thresholds[idx, 0], idx
440
+
441
+ class Eucdist(OptimalCutpointCalculator):
442
+ """
443
+ Minimises the Euclidean distance between the (0,1) point and the ROC curve
444
+ in the FPR (1 - sp) vs. TPR (se) space.
445
+ """
446
+
447
+ @staticmethod
448
+ def find(thresholds, se, sp):
449
+ """
450
+ Note
451
+ ----
452
+ If the minimum occurrs on multiple values of threshold the last
453
+ (largest) occurrence is returned.
454
+ """
455
+ target_values = np.sqrt((1 - se)**2 + (1 - sp)**2)
456
+ idx = np.argmin(target_values, axis=0).flatten()[-1]
457
+ return thresholds[idx, 0], idx
cutpointpy/utils.py ADDED
@@ -0,0 +1,143 @@
1
+ import numpy as np
2
+
3
+ def check_same_length(*args):
4
+ """
5
+ Check if iterables have the same length.
6
+
7
+ Parameters
8
+ ----------
9
+ args :
10
+ The iterables to be compared for equal length.
11
+
12
+ Returns
13
+ -------
14
+ same_length : bool
15
+ True if all `àrgs` have the same length, False otherwise.
16
+ """
17
+ if not (len(args) > 1):
18
+ raise ValueError('At least two iterables should be given.')
19
+
20
+ first_len = len(args[0])
21
+ same_length = all(len(arg) == first_len for arg in args)
22
+
23
+ return same_length
24
+
25
+ def cm(predicted, target):
26
+ """
27
+ Confusion matrix for a binary outcome.
28
+
29
+ Parameters
30
+ ----------
31
+ predicted : ndarray of bool (n_tests, n_samples)
32
+ The predicted labels.
33
+ target : ndarray of bool (n_tests, n_samples)
34
+ The target labels (ground truth).
35
+
36
+ Returns
37
+ -------
38
+ cm : ndarray of bool (n_tests, 4)
39
+ The confusion matrices. Each row represents one matrix; columns
40
+ 0 to 3 respectively report the number of true positives, false
41
+ negatives, false positives and true negatives.
42
+
43
+ Notes
44
+ -----
45
+ Vectorised function - computes n_tests confusion matrices at once.
46
+ Convention for labels: True denotes the positive class.
47
+ """
48
+
49
+ if not (predicted.shape == target.shape):
50
+ raise ValueError('Predicted and target values must have the'
51
+ 'same shape')
52
+
53
+ cm = np.zeros(shape=(predicted.shape[0], 4), dtype=np.uint)
54
+
55
+ cm[:, 0] = np.sum(np.equal(predicted, True)
56
+ & np.equal(target, True), axis=1)
57
+ cm[:, 1] = np.sum(np.equal(predicted, False)
58
+ & np.equal(target, True), axis=1)
59
+ cm[:, 2] = np.sum(np.equal(predicted, True)
60
+ & np.equal(target, False), axis=1)
61
+ cm[:, 3] = np.sum(np.equal(predicted, False)
62
+ & np.equal(target, False), axis=1)
63
+
64
+ return cm
65
+
66
+ def cm_performance_metrics(cm):
67
+ """
68
+ Performance metrics from the confusion matrix.
69
+
70
+ Parameters
71
+ ----------
72
+ cm : ndarray of bool (n_matrices, 4)
73
+ The confusion matrices. Each row represents one matrix; columns
74
+ 0 to 3 respectively report the number of true positives, false
75
+ negatives, false positives and true negatives.
76
+
77
+ Returns
78
+ -------
79
+ acc : ndarray of float (n_matrices, 1)
80
+ Accuracy.
81
+ se : ndarray of float (n_matrices, 1)
82
+ Sensitivity.
83
+ sp : ndarray of float (n_matrices, 1)
84
+ Specificity.
85
+
86
+ Notes
87
+ -----
88
+ Returned values are for each confusion matrix. Accuracy, sensitivity
89
+ and specificity range between 0.0 and 1.0.
90
+ """
91
+
92
+ acc = (cm[:, 0] + cm[:, 3]) / np.sum(cm, axis=1)
93
+ se = cm[:, 0] / (cm[:, 0] + cm[:, 1])
94
+ sn = cm[:, 3] / (cm[:, 3] + cm[:, 2])
95
+
96
+ retval = list()
97
+ for item in [acc, se, sn]:
98
+ retval.append(np.array(item, ndmin=2).T)
99
+
100
+ return retval
101
+
102
+ def auc(se, sp):
103
+ """
104
+ Compute the area under the Receiver-operating characteristic (ROC)
105
+ curve.
106
+
107
+ Parameters
108
+ ----------
109
+ se : ndarray of numeric (n_tests, n_thresholds)
110
+ Sensitivity as a function of the thresholds.
111
+ sp : ndarray of numeric (n_tests, n_thresholds)
112
+ Specificity as a function of the thresholds.
113
+
114
+ Returns
115
+ -------
116
+ auc : float (n_tests, 1)
117
+ The areas under the curve.
118
+
119
+ Notes
120
+ -----
121
+ 1. Vectorised function, computes n_tests AUCs at once.
122
+ 2. It is assumed that in each row `se` and `sp` are matched by
123
+ threshold value. That is, for each row of `se` and `sp`
124
+ the same column index corresponds to the same threshold value.
125
+ 3. Area calculation is based on the trapezoidal rule.
126
+ """
127
+
128
+ if not (se.shape == sp.shape):
129
+ raise ValueError(f'`se` and `sp` must have the same shape')
130
+
131
+ #Compute true positive rate (TPR) and false positive rate (FPR)
132
+ tpr = se
133
+ fpr = 1 - sp
134
+
135
+ #Sort by FPR in increasing order
136
+ sorted_idxs = np.argsort(fpr)
137
+ tpr = np.take_along_axis(tpr, sorted_idxs)
138
+ fpr = np.take_along_axis(fpr, sorted_idxs)
139
+
140
+ #Compute AUC
141
+ auc = np.array(np.trapezoid(y=tpr, x=fpr), ndmin=2).T
142
+
143
+ return auc
@@ -0,0 +1,48 @@
1
+ Metadata-Version: 2.4
2
+ Name: cutpointpy
3
+ Version: 1.0.0
4
+ Summary: Optimal cut-point calculation of a predictor variable for binary classification tasks
5
+ Author-email: Francesco Bianconi <bianco@ieee.org>
6
+ License-Expression: GPL-3.0-or-later
7
+ Project-URL: home, https://github.com/bianconif/cutpointpy
8
+ Project-URL: repository, https://github.com/bianconif/cutpointpy
9
+ Keywords: binary-classification,cut-point,roc-analysis,sensitivity,specificity
10
+ Requires-Python: >=3.14
11
+ Description-Content-Type: text/markdown
12
+ License-File: LICENSE
13
+ Requires-Dist: numpy
14
+ Requires-Dist: scikit-learn
15
+ Dynamic: license-file
16
+
17
+ # cutpointpy
18
+ ____________
19
+ A Python package for estimating the optimal cut-point of a predictor variable (feature) for a binary classification task. It is loosely inspired by [`cutpointr`](https://cran.r-project.org/web/packages/cutpointr/index.html), an optimal cut-point calculation package for [R](https://www.r-project.org/).
20
+
21
+ Main usage:
22
+
23
+ - Optimal cut-point estimation
24
+ - Stability analysis of the estimated cut-points through bootstrapping
25
+ - Receiver-operating characteristic curve (ROC) analysis
26
+
27
+ ## Installation
28
+ `pip install cutpointpy`
29
+
30
+ ## Structure
31
+ - `cutpointpy.core`: contains the main class (`CutpointCalculator`) with functions `find()` and `bootstrap()` respectively for optimal cut-point estimation and stability analysis/validation through bootstrapping.
32
+ - `cutpointpy.utils`: contains ancillary functions including methods for computing performance parameters on binary classification tasks (e.g. confusion matrices, accuracy, sensitivity, specificity and AUC)
33
+
34
+ ## Usage
35
+ We recommend the following [marimo](https://marimo.io/) notebooks to get started with `cutpointpy`.
36
+ - Optimal cut-point estimation without bootstrapping
37
+ * [cutpointfind__glucose_cutoff_for_diabetes.py](https://molab.marimo.io/notebooks/nb_P81opF6FjJpcwDeycTAVDb)
38
+ * [cutpointfind__ibmi_cutoff_for_diabetes.py](https://molab.marimo.io/notebooks/nb_D5qnT3WHLxrNVpFtwxBHpc)
39
+ - Optimal cut-point estimation with bootstrapping
40
+ * [cutpointboot__glucose_cutoff_for_diabetes.py](https://molab.marimo.io/notebooks/nb_ZxGQFfmRsEBb5LRq8hqrXo)
41
+
42
+
43
+ ## References
44
+ 1. Baratloo, A., Hosseini, M., Negida, A., El Ashal, G. [Part 1: simple definition and calculation of accuracy, sensitivity and specificity](https://pmc.ncbi.nlm.nih.gov/articles/PMC4614595/) (2015) Emergency 3(2):48-49
45
+ 2. Hassanzad M., Hajian-Tilaki K. [Methods of determining optimal cut-point of diagnostic biomarkers with application of clinical data in ROC analysis: an update review](https://doi.org/10.1186/s12874-024-02198-2) (2024) BMC Medical Research Methodology, 24(1), art. no. 84
46
+
47
+ ## Contacts
48
+ [Francesco Bianconi](www.bianconif.net) - [bianco@ieee.org](mailto:bianco@ieee.org).
@@ -0,0 +1,10 @@
1
+ cutpointpy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ cutpointpy/core.py,sha256=ezQJ8kNqm9d4oihMCcLcasY5dYTq8mYl5fgDYpxfcU4,19069
3
+ cutpointpy/utils.py,sha256=cxTrtTNczIlR0bN7k00_NUyk8WzVX1iM6Sdn80afRhc,4388
4
+ cutpointpy-1.0.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
5
+ cutpointpy-1.0.0.dist-info/METADATA,sha256=F_8i-E2Jbuv40y-If6dT4GuF76wGX8BQiixXGrqMJJg,2793
6
+ cutpointpy-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
7
+ cutpointpy-1.0.0.dist-info/scm_file_list.json,sha256=_HvuVUKfgFObfnhdLVfPaIk0jTiA5d3Ltm6-PexJj3s,309
8
+ cutpointpy-1.0.0.dist-info/scm_version.json,sha256=TxlYJP0Qs8Unv1JgWlv37F6xrY7Vxxg4YztqiSm6_z4,170
9
+ cutpointpy-1.0.0.dist-info/top_level.txt,sha256=kHAUPkGYJmDf_OsRAsBsIOC6VYr2nDvSPEk5fjPZbLw,11
10
+ cutpointpy-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+