scikit-survival 0.24.0__cp313-cp313-macosx_11_0_arm64.whl → 0.25.0__cp313-cp313-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scikit_survival-0.25.0.dist-info/METADATA +185 -0
- scikit_survival-0.25.0.dist-info/RECORD +58 -0
- {scikit_survival-0.24.0.dist-info → scikit_survival-0.25.0.dist-info}/WHEEL +2 -1
- sksurv/__init__.py +51 -6
- sksurv/base.py +12 -2
- sksurv/bintrees/_binarytrees.cpython-313-darwin.so +0 -0
- sksurv/column.py +33 -29
- sksurv/compare.py +22 -22
- sksurv/datasets/base.py +45 -20
- sksurv/docstrings.py +99 -0
- sksurv/ensemble/_coxph_loss.cpython-313-darwin.so +0 -0
- sksurv/ensemble/boosting.py +116 -168
- sksurv/ensemble/forest.py +94 -151
- sksurv/functions.py +29 -29
- sksurv/io/arffread.py +34 -3
- sksurv/io/arffwrite.py +38 -2
- sksurv/kernels/_clinical_kernel.cpython-313-darwin.so +0 -0
- sksurv/kernels/clinical.py +33 -13
- sksurv/linear_model/_coxnet.cpython-313-darwin.so +0 -0
- sksurv/linear_model/aft.py +14 -11
- sksurv/linear_model/coxnet.py +138 -89
- sksurv/linear_model/coxph.py +102 -83
- sksurv/meta/ensemble_selection.py +91 -9
- sksurv/meta/stacking.py +47 -26
- sksurv/metrics.py +257 -224
- sksurv/nonparametric.py +150 -81
- sksurv/preprocessing.py +55 -27
- sksurv/svm/_minlip.cpython-313-darwin.so +0 -0
- sksurv/svm/_prsvm.cpython-313-darwin.so +0 -0
- sksurv/svm/minlip.py +160 -79
- sksurv/svm/naive_survival_svm.py +63 -34
- sksurv/svm/survival_svm.py +104 -104
- sksurv/tree/_criterion.cpython-313-darwin.so +0 -0
- sksurv/tree/tree.py +170 -84
- sksurv/util.py +80 -26
- scikit_survival-0.24.0.dist-info/METADATA +0 -888
- scikit_survival-0.24.0.dist-info/RECORD +0 -57
- {scikit_survival-0.24.0.dist-info → scikit_survival-0.25.0.dist-info/licenses}/COPYING +0 -0
- {scikit_survival-0.24.0.dist-info → scikit_survival-0.25.0.dist-info}/top_level.txt +0 -0
sksurv/metrics.py
CHANGED
|
@@ -148,19 +148,20 @@ def _estimate_concordance_index(event_indicator, event_time, estimate, weights,
|
|
|
148
148
|
|
|
149
149
|
|
|
150
150
|
def concordance_index_censored(event_indicator, event_time, estimate, tied_tol=1e-8):
|
|
151
|
-
"""
|
|
151
|
+
"""Measures the agreement between a predicted risk score and the actual time-to-event.
|
|
152
152
|
|
|
153
|
-
The concordance index is
|
|
154
|
-
|
|
153
|
+
The concordance index is a measure of rank correlation between predicted risk
|
|
154
|
+
scores and observed time points. It is defined as the proportion of all comparable
|
|
155
|
+
pairs in which the predictions and outcomes are concordant.
|
|
156
|
+
A pair of samples is concordant if the sample with a higher risk score has a
|
|
157
|
+
shorter time-to-event. A higher concordance index indicates better model performance.
|
|
155
158
|
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
the
|
|
159
|
-
|
|
159
|
+
A pair of samples is considered comparable if the sample with a shorter
|
|
160
|
+
survival time experienced an event. This means we can confidently say that
|
|
161
|
+
the individual with the shorter time had a worse outcome. If both samples
|
|
162
|
+
are censored, or if they experienced an event at the same time, they are
|
|
163
|
+
not comparable.
|
|
160
164
|
|
|
161
|
-
Concordance intuitively means that two samples were ordered correctly by the model.
|
|
162
|
-
More specifically, two samples are concordant, if the one with a higher estimated
|
|
163
|
-
risk score has a shorter actual survival time.
|
|
164
165
|
When predicted risks are identical for a pair, 0.5 rather than 1 is added to the count
|
|
165
166
|
of concordant pairs.
|
|
166
167
|
|
|
@@ -170,40 +171,40 @@ def concordance_index_censored(event_indicator, event_time, estimate, tied_tol=1
|
|
|
170
171
|
Parameters
|
|
171
172
|
----------
|
|
172
173
|
event_indicator : array-like, shape = (n_samples,)
|
|
173
|
-
|
|
174
|
-
|
|
174
|
+
A boolean array where ``True`` indicates an event and ``False`` indicates
|
|
175
|
+
censoring.
|
|
175
176
|
event_time : array-like, shape = (n_samples,)
|
|
176
|
-
Array containing the time of an event or time of censoring
|
|
177
|
-
|
|
177
|
+
Array containing the time of an event or time of censoring.
|
|
178
178
|
estimate : array-like, shape = (n_samples,)
|
|
179
|
-
|
|
180
|
-
|
|
179
|
+
The predicted risk score for each sample (e.g., from ``estimator.predict(X)``).
|
|
180
|
+
A higher value indicates a higher risk of experiencing an event.
|
|
181
181
|
tied_tol : float, optional, default: 1e-8
|
|
182
|
-
The tolerance value for considering ties.
|
|
183
|
-
|
|
184
|
-
|
|
182
|
+
The tolerance value for considering ties in risk scores. If the
|
|
183
|
+
absolute difference between two risk scores is smaller than or equal to
|
|
184
|
+
``tied_tol``, they are considered tied.
|
|
185
185
|
|
|
186
186
|
Returns
|
|
187
187
|
-------
|
|
188
188
|
cindex : float
|
|
189
|
-
|
|
190
|
-
|
|
189
|
+
The concordance index.
|
|
191
190
|
concordant : int
|
|
192
|
-
|
|
193
|
-
|
|
191
|
+
The number of concordant pairs.
|
|
194
192
|
discordant : int
|
|
195
|
-
|
|
196
|
-
|
|
193
|
+
The number of discordant pairs.
|
|
197
194
|
tied_risk : int
|
|
198
|
-
|
|
199
|
-
|
|
195
|
+
The number of pairs with tied risk scores.
|
|
200
196
|
tied_time : int
|
|
201
|
-
|
|
197
|
+
The number of comparable pairs with tied survival times.
|
|
198
|
+
|
|
199
|
+
Notes
|
|
200
|
+
-----
|
|
201
|
+
This metric expects risk scores, which are typically returned by ``estimator.predict(X)``.
|
|
202
|
+
It *does not accept* survival probabilities.
|
|
202
203
|
|
|
203
204
|
See also
|
|
204
205
|
--------
|
|
205
206
|
concordance_index_ipcw
|
|
206
|
-
|
|
207
|
+
A less biased estimator of the concordance index.
|
|
207
208
|
|
|
208
209
|
References
|
|
209
210
|
----------
|
|
@@ -220,25 +221,29 @@ def concordance_index_censored(event_indicator, event_time, estimate, tied_tol=1
|
|
|
220
221
|
|
|
221
222
|
|
|
222
223
|
def concordance_index_ipcw(survival_train, survival_test, estimate, tau=None, tied_tol=1e-8):
|
|
223
|
-
"""Concordance index for right-censored data based on inverse probability of censoring weights.
|
|
224
|
+
r"""Concordance index for right-censored data based on inverse probability of censoring weights.
|
|
224
225
|
|
|
225
226
|
This is an alternative to the estimator in :func:`concordance_index_censored`
|
|
226
227
|
that does not depend on the distribution of censoring times in the test data.
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
228
|
+
By using inverse probability of censoring weights (IPCW), it provides an unbiased
|
|
229
|
+
and consistent estimate of the population concordance measure.
|
|
230
|
+
|
|
231
|
+
This estimator requires access to survival times from the training data to
|
|
232
|
+
estimate the censoring distribution.
|
|
233
|
+
Note that survival times in `survival_test` must lie within the range of
|
|
234
|
+
survival times in `survival_train`. This can be achieved by specifying the
|
|
235
|
+
truncation time `tau`.
|
|
235
236
|
The resulting `cindex` tells how well the given prediction model works in
|
|
236
237
|
predicting events that occur in the time range from 0 to `tau`.
|
|
237
238
|
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
239
|
+
For time points in `survival_test` that lie outside of the range specified by
|
|
240
|
+
values in `survival_train`, the probability of censoring is unknown and an
|
|
241
|
+
exception will be raised::
|
|
242
|
+
|
|
243
|
+
ValueError: time must be smaller than largest observed time point
|
|
244
|
+
|
|
245
|
+
The censoring distribution is estimated using the Kaplan-Meier estimator, which
|
|
246
|
+
assumes that censoring is random and independent of the features.
|
|
242
247
|
|
|
243
248
|
See the :ref:`User Guide </user_guide/evaluating-survival-models.ipynb>`
|
|
244
249
|
and [1]_ for further description.
|
|
@@ -246,57 +251,54 @@ def concordance_index_ipcw(survival_train, survival_test, estimate, tau=None, ti
|
|
|
246
251
|
Parameters
|
|
247
252
|
----------
|
|
248
253
|
survival_train : structured array, shape = (n_train_samples,)
|
|
249
|
-
Survival times for training data to estimate the censoring
|
|
250
|
-
distribution
|
|
251
|
-
A structured array
|
|
252
|
-
|
|
253
|
-
second field.
|
|
254
|
-
|
|
254
|
+
Survival times for the training data, used to estimate the censoring
|
|
255
|
+
distribution.
|
|
256
|
+
A structured array with two fields. The first field is a boolean
|
|
257
|
+
where ``True`` indicates an event and ``False`` indicates right-censoring.
|
|
258
|
+
The second field is a float with the time of event or time of censoring.
|
|
255
259
|
survival_test : structured array, shape = (n_samples,)
|
|
256
|
-
Survival times
|
|
257
|
-
A structured array
|
|
258
|
-
|
|
259
|
-
second field.
|
|
260
|
-
|
|
260
|
+
Survival times for the test data.
|
|
261
|
+
A structured array with two fields. The first field is a boolean
|
|
262
|
+
where ``True`` indicates an event and ``False`` indicates right-censoring.
|
|
263
|
+
The second field is a float with the time of event or time of censoring.
|
|
261
264
|
estimate : array-like, shape = (n_samples,)
|
|
262
|
-
|
|
263
|
-
|
|
265
|
+
Predicted risk scores for the test data (e.g., from ``estimator.predict(X)``).
|
|
266
|
+
A higher value indicates a higher risk of experiencing an event.
|
|
264
267
|
tau : float, optional
|
|
265
268
|
Truncation time. The survival function for the underlying
|
|
266
269
|
censoring time distribution :math:`D` needs to be positive
|
|
267
270
|
at `tau`, i.e., `tau` should be chosen such that the
|
|
268
271
|
probability of being censored after time `tau` is non-zero:
|
|
269
|
-
:math:`P(D >
|
|
270
|
-
|
|
272
|
+
:math:`P(D > \tau) > 0`. If `None`, no truncation is performed.
|
|
271
273
|
tied_tol : float, optional, default: 1e-8
|
|
272
|
-
The tolerance value for considering ties.
|
|
273
|
-
If the absolute difference between risk scores is smaller
|
|
274
|
-
or equal
|
|
274
|
+
The tolerance value for considering ties in risk scores.
|
|
275
|
+
If the absolute difference between two risk scores is smaller than
|
|
276
|
+
or equal to ``tied_tol``, they are considered tied.
|
|
275
277
|
|
|
276
278
|
Returns
|
|
277
279
|
-------
|
|
278
280
|
cindex : float
|
|
279
|
-
|
|
280
|
-
|
|
281
|
+
The concordance index.
|
|
281
282
|
concordant : int
|
|
282
|
-
|
|
283
|
-
|
|
283
|
+
The number of concordant pairs.
|
|
284
284
|
discordant : int
|
|
285
|
-
|
|
286
|
-
|
|
285
|
+
The number of discordant pairs.
|
|
287
286
|
tied_risk : int
|
|
288
|
-
|
|
289
|
-
|
|
287
|
+
The number of pairs with tied risk scores.
|
|
290
288
|
tied_time : int
|
|
291
|
-
|
|
289
|
+
The number of comparable pairs with tied survival times.
|
|
290
|
+
|
|
291
|
+
Notes
|
|
292
|
+
-----
|
|
293
|
+
This metric expects risk scores, which are typically returned by ``estimator.predict(X)``.
|
|
294
|
+
It *does not accept* survival probabilities.
|
|
292
295
|
|
|
293
296
|
See also
|
|
294
297
|
--------
|
|
295
298
|
concordance_index_censored
|
|
296
|
-
|
|
297
|
-
|
|
299
|
+
A simpler, but potentially biased, estimator of the concordance index.
|
|
298
300
|
as_concordance_index_ipcw_scorer
|
|
299
|
-
|
|
301
|
+
A wrapper class that uses :func:`concordance_index_ipcw`
|
|
300
302
|
in its ``score`` method instead of the default
|
|
301
303
|
:func:`concordance_index_censored`.
|
|
302
304
|
|
|
@@ -331,56 +333,69 @@ def concordance_index_ipcw(survival_train, survival_test, estimate, tau=None, ti
|
|
|
331
333
|
|
|
332
334
|
|
|
333
335
|
def cumulative_dynamic_auc(survival_train, survival_test, estimate, times, tied_tol=1e-8):
|
|
334
|
-
"""
|
|
336
|
+
r"""Computes the cumulative/dynamic area under the ROC curve (AUC) for right-censored data.
|
|
337
|
+
|
|
338
|
+
This metric evaluates a model's performance at specific time points.
|
|
339
|
+
The cumulative/dynamic AUC at time :math:`t` quantifies how well a model can
|
|
340
|
+
distinguish subjects who experience an event by time :math:`t` (cases) from
|
|
341
|
+
those who do not (controls). A higher AUC indicates better model performance.
|
|
342
|
+
|
|
343
|
+
This function can also evaluate models with time-dependent predictions, such as
|
|
344
|
+
:class:`sksurv.ensemble.RandomSurvivalForest`
|
|
345
|
+
(see :ref:`User Guide </user_guide/evaluating-survival-models.ipynb#Using-Time-dependent-Risk-Scores>`).
|
|
346
|
+
In this case, ``estimate`` must be a 2D array where ``estimate[i, j]`` is the
|
|
347
|
+
predicted risk score for the :math:`i`-th instance at time point ``times[j]``.
|
|
335
348
|
|
|
336
349
|
The receiver operating characteristic (ROC) curve and the area under the
|
|
337
|
-
ROC curve (AUC)
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
Given an estimator of the :math:`i`-th individual's
|
|
347
|
-
:math
|
|
348
|
-
:math:`t` is defined as
|
|
350
|
+
ROC curve (AUC) are metrics to evaluate a binary classifier. Each point on
|
|
351
|
+
the ROC denotes the performance of a binary classifier at a specific
|
|
352
|
+
threshold with respect to the sensitivity (true positive rate) on the
|
|
353
|
+
y-axis and the specificity (true negative rate) on the x-axis.
|
|
354
|
+
|
|
355
|
+
ROC and AUC can be extended to survival analysis by defining cases and
|
|
356
|
+
controls based on a time point :math:`t`. *Cumulative cases* are all
|
|
357
|
+
individuals that experienced an event prior to or at time
|
|
358
|
+
:math:`t` (:math:`t_i \leq t`), whereas *dynamic controls* are those
|
|
359
|
+
with :math:`t_i > t`. Given an estimator of the :math:`i`-th individual's
|
|
360
|
+
risk score :math:`\hat{f}(\mathbf{x}_i)`, the cumulative/dynamic AUC at
|
|
361
|
+
time :math:`t` is defined as
|
|
349
362
|
|
|
350
363
|
.. math::
|
|
351
364
|
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
I(
|
|
355
|
-
{(
|
|
365
|
+
\widehat{\mathrm{AUC}}(t) =
|
|
366
|
+
\frac{\sum_{i=1}^n \sum_{j=1}^n I(y_j > t) I(y_i \leq t) \omega_i
|
|
367
|
+
I(\hat{f}(\mathbf{x}_j) \leq \hat{f}(\mathbf{x}_i))}
|
|
368
|
+
{(\sum_{i=1}^n I(y_i > t)) (\sum_{i=1}^n I(y_i \leq t) \omega_i)}
|
|
356
369
|
|
|
357
|
-
where :math
|
|
370
|
+
where :math:`\omega_i` are inverse probability of censoring weights (IPCW).
|
|
358
371
|
|
|
359
|
-
To
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
restricted to situations where the random censoring assumption holds and
|
|
366
|
-
censoring is independent of the features.
|
|
372
|
+
To account for censoring, this metric uses inverse probability of censoring
|
|
373
|
+
weights (IPCW), which requires access to survival times from the training
|
|
374
|
+
data to estimate the censoring distribution. Note that survival times in
|
|
375
|
+
``survival_test`` must lie within the range of survival times in ``survival_train``.
|
|
376
|
+
This can be achieved by specifying ``times`` accordingly, e.g. by setting
|
|
377
|
+
``times[-1]`` slightly below the maximum expected follow-up time.
|
|
367
378
|
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
379
|
+
For time points in ``survival_test`` that lie outside of the range specified by
|
|
380
|
+
values in ``survival_train``, the probability of censoring is unknown and an
|
|
381
|
+
exception will be raised::
|
|
382
|
+
|
|
383
|
+
ValueError: time must be smaller than largest observed time point
|
|
373
384
|
|
|
374
|
-
|
|
375
|
-
|
|
385
|
+
The censoring distribution is estimated using the Kaplan-Meier estimator, which
|
|
386
|
+
assumes that censoring is random and independent of the features.
|
|
387
|
+
|
|
388
|
+
The function also returns a summary measure, which is the mean of the
|
|
389
|
+
:math:`\mathrm{AUC}(t)` over the specified time range, weighted by the
|
|
390
|
+
estimated survival function:
|
|
376
391
|
|
|
377
392
|
.. math::
|
|
378
393
|
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
394
|
+
\overline{\mathrm{AUC}}(\tau_1, \tau_2) =
|
|
395
|
+
\frac{1}{\hat{S}(\tau_1) - \hat{S}(\tau_2)}
|
|
396
|
+
\int_{\tau_1}^{\tau_2} \widehat{\mathrm{AUC}}(t)\,d \hat{S}(t)
|
|
382
397
|
|
|
383
|
-
where :math
|
|
398
|
+
where :math:`\hat{S}(t)` is the Kaplan–Meier estimator of the survival function.
|
|
384
399
|
|
|
385
400
|
See the :ref:`User Guide </user_guide/evaluating-survival-models.ipynb#Time-dependent-Area-under-the-ROC>`,
|
|
386
401
|
[1]_, [2]_, [3]_ for further description.
|
|
@@ -388,47 +403,47 @@ def cumulative_dynamic_auc(survival_train, survival_test, estimate, times, tied_
|
|
|
388
403
|
Parameters
|
|
389
404
|
----------
|
|
390
405
|
survival_train : structured array, shape = (n_train_samples,)
|
|
391
|
-
Survival times for training data to estimate the censoring
|
|
392
|
-
distribution
|
|
393
|
-
A structured array
|
|
394
|
-
|
|
395
|
-
second field.
|
|
396
|
-
|
|
406
|
+
Survival times for the training data, used to estimate the censoring
|
|
407
|
+
distribution.
|
|
408
|
+
A structured array with two fields. The first field is a boolean
|
|
409
|
+
where ``True`` indicates an event and ``False`` indicates right-censoring.
|
|
410
|
+
The second field is a float with the time of event or time of censoring.
|
|
397
411
|
survival_test : structured array, shape = (n_samples,)
|
|
398
|
-
Survival times
|
|
399
|
-
A structured array
|
|
400
|
-
|
|
401
|
-
second field.
|
|
402
|
-
|
|
412
|
+
Survival times for the test data.
|
|
413
|
+
A structured array with two fields. The first field is a boolean
|
|
414
|
+
where ``True`` indicates an event and ``False`` indicates right-censoring.
|
|
415
|
+
The second field is a float with the time of event or time of censoring.
|
|
403
416
|
estimate : array-like, shape = (n_samples,) or (n_samples, n_times)
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
j
|
|
408
|
-
|
|
417
|
+
Predicted risk scores for the test data (e.g., from ``estimator.predict(X)``.
|
|
418
|
+
A higher value indicates a higher risk of experiencing an event.
|
|
419
|
+
If a 1D array is provided, the same risk score is used for all time points.
|
|
420
|
+
If a 2D array is provided, ``estimate[:, j]`` is used for the :math:`j`-th
|
|
421
|
+
time point.
|
|
409
422
|
times : array-like, shape = (n_times,)
|
|
410
|
-
The time points
|
|
411
|
-
|
|
412
|
-
within the range of follow-up times of the test data
|
|
413
|
-
`survival_test`.
|
|
414
|
-
|
|
423
|
+
The time points at which to compute the AUC. Values must be within the
|
|
424
|
+
range of follow-up times in ``survival_test``.
|
|
415
425
|
tied_tol : float, optional, default: 1e-8
|
|
416
|
-
The tolerance value for considering ties.
|
|
417
|
-
|
|
418
|
-
|
|
426
|
+
The tolerance value for considering ties in risk scores. If the
|
|
427
|
+
absolute difference between two risk scores is smaller than or equal to
|
|
428
|
+
``tied_tol``, they are considered tied.
|
|
419
429
|
|
|
420
430
|
Returns
|
|
421
431
|
-------
|
|
422
|
-
auc :
|
|
423
|
-
The cumulative/dynamic AUC estimates
|
|
432
|
+
auc : ndarray, shape = (n_times,)
|
|
433
|
+
The cumulative/dynamic AUC estimates at each time point in ``times``.
|
|
424
434
|
mean_auc : float
|
|
425
|
-
|
|
426
|
-
|
|
435
|
+
The mean cumulative/dynamic AUC over the specified time range ``(times[0], times[-1])``.
|
|
436
|
+
|
|
437
|
+
Notes
|
|
438
|
+
-----
|
|
439
|
+
This metric expects risk scores, which are typically returned by ``estimator.predict(X)``
|
|
440
|
+
(for time-independent risks), or ``estimator.predict_cumulative_hazard_function(X)``
|
|
441
|
+
(for time-dependent risks). It *does not accept* survival probabilities.
|
|
427
442
|
|
|
428
443
|
See also
|
|
429
444
|
--------
|
|
430
445
|
as_cumulative_dynamic_auc_scorer
|
|
431
|
-
|
|
446
|
+
A wrapper class that uses :func:`cumulative_dynamic_auc`
|
|
432
447
|
in its ``score`` method instead of the default
|
|
433
448
|
:func:`concordance_index_censored`.
|
|
434
449
|
|
|
@@ -512,20 +527,41 @@ def cumulative_dynamic_auc(survival_train, survival_test, estimate, times, tied_
|
|
|
512
527
|
|
|
513
528
|
|
|
514
529
|
def brier_score(survival_train, survival_test, estimate, times):
|
|
515
|
-
"""
|
|
530
|
+
r"""The time-dependent Brier score for right-censored data.
|
|
531
|
+
|
|
532
|
+
The time-dependent Brier score measures the inaccuracy of
|
|
533
|
+
predicted survival probabilities at a given time point.
|
|
534
|
+
It is the mean squared error between the true survival status
|
|
535
|
+
and the predicted survival probability at time point :math:`t`.
|
|
536
|
+
A lower Brier score indicates better model performance.
|
|
516
537
|
|
|
517
|
-
|
|
538
|
+
To account for censoring, this metric uses inverse probability of censoring
|
|
539
|
+
weights (IPCW), which requires access to survival times from the training
|
|
540
|
+
data to estimate the censoring distribution. Note that survival times in
|
|
541
|
+
``survival_test`` must lie within the range of survival times in ``survival_train``.
|
|
542
|
+
This can be achieved by specifying ``times`` accordingly, e.g. by setting
|
|
543
|
+
``times[-1]`` slightly below the maximum expected follow-up time.
|
|
544
|
+
|
|
545
|
+
For time points in ``survival_test`` that lie outside of the range specified by
|
|
546
|
+
values in ``survival_train``, the probability of censoring is unknown and an
|
|
547
|
+
exception will be raised::
|
|
548
|
+
|
|
549
|
+
ValueError: time must be smaller than largest observed time point
|
|
550
|
+
|
|
551
|
+
The censoring distribution is estimated using the Kaplan-Meier estimator, which
|
|
552
|
+
assumes that censoring is random and independent of the features.
|
|
553
|
+
|
|
554
|
+
The time-dependent Brier score at time :math:`t` is defined as
|
|
518
555
|
|
|
519
556
|
.. math::
|
|
520
557
|
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
558
|
+
\mathrm{BS}^c(t) = \frac{1}{n} \sum_{i=1}^n I(y_i \leq t \land \delta_i = 1)
|
|
559
|
+
\frac{(0 - \hat{\pi}(t | \mathbf{x}_i))^2}{\hat{G}(y_i)} + I(y_i > t)
|
|
560
|
+
\frac{(1 - \hat{\pi}(t | \mathbf{x}_i))^2}{\hat{G}(t)} ,
|
|
524
561
|
|
|
525
|
-
where :math
|
|
526
|
-
|
|
527
|
-
and :math:`1
|
|
528
|
-
the Kaplan-Meier estimator.
|
|
562
|
+
where :math:`\hat{\pi}(t | \mathbf{x})` is the predicted survival probability
|
|
563
|
+
up to the time point :math:`t` for a feature vector :math:`\mathbf{x}`,
|
|
564
|
+
and :math:`1/\hat{G}(t)` is a inverse probability of censoring weight.
|
|
529
565
|
|
|
530
566
|
See the :ref:`User Guide </user_guide/evaluating-survival-models.ipynb#Time-dependent-Brier-Score>`
|
|
531
567
|
and [1]_ for details.
|
|
@@ -533,38 +569,38 @@ def brier_score(survival_train, survival_test, estimate, times):
|
|
|
533
569
|
Parameters
|
|
534
570
|
----------
|
|
535
571
|
survival_train : structured array, shape = (n_train_samples,)
|
|
536
|
-
Survival times for training data to estimate the censoring
|
|
537
|
-
distribution
|
|
538
|
-
A structured array
|
|
539
|
-
|
|
540
|
-
second field.
|
|
541
|
-
|
|
572
|
+
Survival times for the training data, used to estimate the censoring
|
|
573
|
+
distribution.
|
|
574
|
+
A structured array with two fields. The first field is a boolean
|
|
575
|
+
where ``True`` indicates an event and ``False`` indicates right-censoring.
|
|
576
|
+
The second field is a float with the time of event or time of censoring.
|
|
542
577
|
survival_test : structured array, shape = (n_samples,)
|
|
543
|
-
Survival times
|
|
544
|
-
A structured array
|
|
545
|
-
|
|
546
|
-
second field.
|
|
547
|
-
|
|
578
|
+
Survival times for the test data.
|
|
579
|
+
A structured array with two fields. The first field is a boolean
|
|
580
|
+
where ``True`` indicates an event and ``False`` indicates right-censoring.
|
|
581
|
+
The second field is a float with the time of event or time of censoring.
|
|
548
582
|
estimate : array-like, shape = (n_samples, n_times)
|
|
549
|
-
|
|
550
|
-
specified by
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
``predict_survival_function`` method.
|
|
555
|
-
|
|
583
|
+
Predicted survival probabilities for the test data at the time points
|
|
584
|
+
specified by ``times``, typically obtained from
|
|
585
|
+
``estimator.predict_survival_function(X)``. The value of ``estimate[:, i]``
|
|
586
|
+
must correspond to the estimated survival probability up to
|
|
587
|
+
the time point ``times[i]``.
|
|
556
588
|
times : array-like, shape = (n_times,)
|
|
557
|
-
The time points
|
|
558
|
-
|
|
559
|
-
the test data `survival_test`.
|
|
589
|
+
The time points at which to compute the Brier score. Values must be
|
|
590
|
+
within the range of follow-up times in ``survival_test``.
|
|
560
591
|
|
|
561
592
|
Returns
|
|
562
593
|
-------
|
|
563
|
-
times :
|
|
564
|
-
|
|
594
|
+
times : ndarray, shape = (n_times,)
|
|
595
|
+
The unique time points at which the Brier score was estimated.
|
|
596
|
+
brier_scores : ndarray, shape = (n_times,)
|
|
597
|
+
The Brier score at each time point in ``times``.
|
|
565
598
|
|
|
566
|
-
|
|
567
|
-
|
|
599
|
+
Notes
|
|
600
|
+
-----
|
|
601
|
+
This metric expects survival probabilities, which are typically returned by
|
|
602
|
+
``estimator.predict_survival_function(X)``.
|
|
603
|
+
It *does not accept* risk scores.
|
|
568
604
|
|
|
569
605
|
Examples
|
|
570
606
|
--------
|
|
@@ -576,7 +612,7 @@ def brier_score(survival_train, survival_test, estimate, times):
|
|
|
576
612
|
Load and prepare data.
|
|
577
613
|
|
|
578
614
|
>>> X, y = load_gbsg2()
|
|
579
|
-
>>> X
|
|
615
|
+
>>> X["tgrade"] = X.loc[:, "tgrade"].map(len).astype(int)
|
|
580
616
|
>>> Xt = OneHotEncoder().fit_transform(X)
|
|
581
617
|
|
|
582
618
|
Fit a Cox model.
|
|
@@ -636,17 +672,21 @@ def brier_score(survival_train, survival_test, estimate, times):
|
|
|
636
672
|
|
|
637
673
|
|
|
638
674
|
def integrated_brier_score(survival_train, survival_test, estimate, times):
|
|
639
|
-
"""
|
|
640
|
-
|
|
675
|
+
r"""Computes the integrated Brier score (IBS).
|
|
676
|
+
|
|
677
|
+
The IBS is an overall measure of the model's performance across all
|
|
678
|
+
available time points :math:`t_1 \leq t \leq t_\text{max}`.
|
|
679
|
+
It is the average Brier score, integrated over time.
|
|
680
|
+
A lower IBS indicates better model performance.
|
|
641
681
|
|
|
642
682
|
The integrated time-dependent Brier score over the interval
|
|
643
|
-
:math:`[t_1; t_
|
|
683
|
+
:math:`[t_1; t_\text{max}]` is defined as
|
|
644
684
|
|
|
645
685
|
.. math::
|
|
646
686
|
|
|
647
|
-
|
|
687
|
+
\mathrm{IBS} = \int_{t_1}^{t_\text{max}} \mathrm{BS}^c(t) d w(t)
|
|
648
688
|
|
|
649
|
-
where the weighting function is :math:`w(t) = t / t_
|
|
689
|
+
where the weighting function is :math:`w(t) = t / t_\text{max}`.
|
|
650
690
|
The integral is estimated via the trapezoidal rule.
|
|
651
691
|
|
|
652
692
|
See the :ref:`User Guide </user_guide/evaluating-survival-models.ipynb#Time-dependent-Brier-Score>`
|
|
@@ -655,36 +695,37 @@ def integrated_brier_score(survival_train, survival_test, estimate, times):
|
|
|
655
695
|
Parameters
|
|
656
696
|
----------
|
|
657
697
|
survival_train : structured array, shape = (n_train_samples,)
|
|
658
|
-
Survival times for training data to estimate the censoring
|
|
659
|
-
distribution
|
|
660
|
-
A structured array
|
|
661
|
-
|
|
662
|
-
second field.
|
|
663
|
-
|
|
698
|
+
Survival times for the training data, used to estimate the censoring
|
|
699
|
+
distribution.
|
|
700
|
+
A structured array with two fields. The first field is a boolean
|
|
701
|
+
where ``True`` indicates an event and ``False`` indicates right-censoring.
|
|
702
|
+
The second field is a float with the time of event or time of censoring.
|
|
664
703
|
survival_test : structured array, shape = (n_samples,)
|
|
665
|
-
Survival times
|
|
666
|
-
A structured array
|
|
667
|
-
|
|
668
|
-
second field.
|
|
669
|
-
|
|
704
|
+
Survival times for the test data.
|
|
705
|
+
A structured array with two fields. The first field is a boolean
|
|
706
|
+
where ``True`` indicates an event and ``False`` indicates right-censoring.
|
|
707
|
+
The second field is a float with the time of event or time of censoring.
|
|
670
708
|
estimate : array-like, shape = (n_samples, n_times)
|
|
671
|
-
|
|
672
|
-
specified by
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
``predict_survival_function`` method.
|
|
677
|
-
|
|
709
|
+
Predicted survival probabilities for the test data at the time points
|
|
710
|
+
specified by ``times``, typically obtained from
|
|
711
|
+
``estimator.predict_survival_function(X)``. The value of ``estimate[:, i]``
|
|
712
|
+
must correspond to the estimated survival probability up to
|
|
713
|
+
the time point ``times[i]``.
|
|
678
714
|
times : array-like, shape = (n_times,)
|
|
679
|
-
The time points
|
|
680
|
-
|
|
681
|
-
the test data `survival_test`.
|
|
715
|
+
The time points at which to compute the Brier score. Values must be
|
|
716
|
+
within the range of follow-up times in ``survival_test``.
|
|
682
717
|
|
|
683
718
|
Returns
|
|
684
719
|
-------
|
|
685
720
|
ibs : float
|
|
686
721
|
The integrated Brier score.
|
|
687
722
|
|
|
723
|
+
Notes
|
|
724
|
+
-----
|
|
725
|
+
This metric expects survival probabilities, which are typically returned by
|
|
726
|
+
``estimator.predict_survival_function(X)``.
|
|
727
|
+
It *does not accept* risk scores.
|
|
728
|
+
|
|
688
729
|
Examples
|
|
689
730
|
--------
|
|
690
731
|
>>> import numpy as np
|
|
@@ -696,7 +737,7 @@ def integrated_brier_score(survival_train, survival_test, estimate, times):
|
|
|
696
737
|
Load and prepare data.
|
|
697
738
|
|
|
698
739
|
>>> X, y = load_gbsg2()
|
|
699
|
-
>>> X
|
|
740
|
+
>>> X["tgrade"] = X.loc[:, "tgrade"].map(len).astype(int)
|
|
700
741
|
>>> Xt = OneHotEncoder().fit_transform(X)
|
|
701
742
|
|
|
702
743
|
Fit a Cox model.
|
|
@@ -713,8 +754,8 @@ def integrated_brier_score(survival_train, survival_test, estimate, times):
|
|
|
713
754
|
Compute the integrated Brier score from 1 to 5 years.
|
|
714
755
|
|
|
715
756
|
>>> score = integrated_brier_score(y, y, preds, times)
|
|
716
|
-
>>> print(score)
|
|
717
|
-
0.
|
|
757
|
+
>>> print(round(score, 4))
|
|
758
|
+
0.1816
|
|
718
759
|
|
|
719
760
|
See also
|
|
720
761
|
--------
|
|
@@ -788,11 +829,11 @@ class _ScoreOverrideMixin:
|
|
|
788
829
|
|
|
789
830
|
Parameters
|
|
790
831
|
----------
|
|
791
|
-
X : array-like
|
|
832
|
+
X : array-like, shape = (n_samples, n_features)
|
|
792
833
|
Input data, where n_samples is the number of samples and
|
|
793
834
|
n_features is the number of features.
|
|
794
835
|
|
|
795
|
-
y : array-like
|
|
836
|
+
y : array-like, shape = (n_samples,)
|
|
796
837
|
Target relative to X for classification or regression;
|
|
797
838
|
None for unsupervised learning.
|
|
798
839
|
|
|
@@ -867,17 +908,13 @@ class as_cumulative_dynamic_auc_scorer(_ScoreOverrideMixin, BaseEstimator):
|
|
|
867
908
|
----------
|
|
868
909
|
estimator : object
|
|
869
910
|
Instance of an estimator.
|
|
870
|
-
|
|
871
911
|
times : array-like, shape = (n_times,)
|
|
872
|
-
The time points
|
|
873
|
-
|
|
874
|
-
within the range of follow-up times of the test data
|
|
875
|
-
`survival_test`.
|
|
876
|
-
|
|
912
|
+
The time points at which to compute the AUC. Values must be within the
|
|
913
|
+
range of follow-up times of the test data.
|
|
877
914
|
tied_tol : float, optional, default: 1e-8
|
|
878
|
-
The tolerance value for considering ties.
|
|
879
|
-
|
|
880
|
-
|
|
915
|
+
The tolerance value for considering ties in risk scores. If the
|
|
916
|
+
absolute difference between two risk scores is smaller than or equal to
|
|
917
|
+
``tied_tol``, they are considered tied.
|
|
881
918
|
|
|
882
919
|
Attributes
|
|
883
920
|
----------
|
|
@@ -902,7 +939,7 @@ class as_cumulative_dynamic_auc_scorer(_ScoreOverrideMixin, BaseEstimator):
|
|
|
902
939
|
|
|
903
940
|
|
|
904
941
|
class as_concordance_index_ipcw_scorer(_ScoreOverrideMixin, BaseEstimator):
|
|
905
|
-
"""Wraps an estimator to use :func:`concordance_index_ipcw` as ``score`` function.
|
|
942
|
+
r"""Wraps an estimator to use :func:`concordance_index_ipcw` as ``score`` function.
|
|
906
943
|
|
|
907
944
|
See the :ref:`User Guide </user_guide/evaluating-survival-models.ipynb#Using-Metrics-in-Hyper-parameter-Search>`
|
|
908
945
|
for using it for hyper-parameter optimization.
|
|
@@ -911,18 +948,16 @@ class as_concordance_index_ipcw_scorer(_ScoreOverrideMixin, BaseEstimator):
|
|
|
911
948
|
----------
|
|
912
949
|
estimator : object
|
|
913
950
|
Instance of an estimator.
|
|
914
|
-
|
|
915
951
|
tau : float, optional
|
|
916
952
|
Truncation time. The survival function for the underlying
|
|
917
953
|
censoring time distribution :math:`D` needs to be positive
|
|
918
954
|
at `tau`, i.e., `tau` should be chosen such that the
|
|
919
955
|
probability of being censored after time `tau` is non-zero:
|
|
920
|
-
:math:`P(D >
|
|
921
|
-
|
|
956
|
+
:math:`P(D > \tau) > 0`. If `None`, no truncation is performed.
|
|
922
957
|
tied_tol : float, optional, default: 1e-8
|
|
923
|
-
The tolerance value for considering ties.
|
|
924
|
-
If the absolute difference between risk scores is smaller
|
|
925
|
-
or equal
|
|
958
|
+
The tolerance value for considering ties in risk scores.
|
|
959
|
+
If the absolute difference between two risk scores is smaller than
|
|
960
|
+
or equal to ``tied_tol``, they are considered tied.
|
|
926
961
|
|
|
927
962
|
Attributes
|
|
928
963
|
----------
|
|
@@ -959,11 +994,9 @@ class as_integrated_brier_score_scorer(_ScoreOverrideMixin, BaseEstimator):
|
|
|
959
994
|
----------
|
|
960
995
|
estimator : object
|
|
961
996
|
Instance of an estimator that provides ``predict_survival_function``.
|
|
962
|
-
|
|
963
997
|
times : array-like, shape = (n_times,)
|
|
964
|
-
The time points
|
|
965
|
-
|
|
966
|
-
the test data `survival_test`.
|
|
998
|
+
The time points at which to compute the Brier score. Values must be
|
|
999
|
+
within the range of follow-up times of the test data.
|
|
967
1000
|
|
|
968
1001
|
Attributes
|
|
969
1002
|
----------
|