scikit-survival 0.23.1__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. scikit_survival-0.23.1.dist-info/COPYING +674 -0
  2. scikit_survival-0.23.1.dist-info/METADATA +888 -0
  3. scikit_survival-0.23.1.dist-info/RECORD +55 -0
  4. scikit_survival-0.23.1.dist-info/WHEEL +5 -0
  5. scikit_survival-0.23.1.dist-info/top_level.txt +1 -0
  6. sksurv/__init__.py +138 -0
  7. sksurv/base.py +103 -0
  8. sksurv/bintrees/__init__.py +15 -0
  9. sksurv/bintrees/_binarytrees.cp313-win_amd64.pyd +0 -0
  10. sksurv/column.py +201 -0
  11. sksurv/compare.py +123 -0
  12. sksurv/datasets/__init__.py +10 -0
  13. sksurv/datasets/base.py +436 -0
  14. sksurv/datasets/data/GBSG2.arff +700 -0
  15. sksurv/datasets/data/actg320.arff +1169 -0
  16. sksurv/datasets/data/breast_cancer_GSE7390-metastasis.arff +283 -0
  17. sksurv/datasets/data/flchain.arff +7887 -0
  18. sksurv/datasets/data/veteran.arff +148 -0
  19. sksurv/datasets/data/whas500.arff +520 -0
  20. sksurv/ensemble/__init__.py +2 -0
  21. sksurv/ensemble/_coxph_loss.cp313-win_amd64.pyd +0 -0
  22. sksurv/ensemble/boosting.py +1610 -0
  23. sksurv/ensemble/forest.py +947 -0
  24. sksurv/ensemble/survival_loss.py +151 -0
  25. sksurv/exceptions.py +18 -0
  26. sksurv/functions.py +114 -0
  27. sksurv/io/__init__.py +2 -0
  28. sksurv/io/arffread.py +58 -0
  29. sksurv/io/arffwrite.py +145 -0
  30. sksurv/kernels/__init__.py +1 -0
  31. sksurv/kernels/_clinical_kernel.cp313-win_amd64.pyd +0 -0
  32. sksurv/kernels/clinical.py +328 -0
  33. sksurv/linear_model/__init__.py +3 -0
  34. sksurv/linear_model/_coxnet.cp313-win_amd64.pyd +0 -0
  35. sksurv/linear_model/aft.py +205 -0
  36. sksurv/linear_model/coxnet.py +543 -0
  37. sksurv/linear_model/coxph.py +618 -0
  38. sksurv/meta/__init__.py +4 -0
  39. sksurv/meta/base.py +35 -0
  40. sksurv/meta/ensemble_selection.py +642 -0
  41. sksurv/meta/stacking.py +349 -0
  42. sksurv/metrics.py +996 -0
  43. sksurv/nonparametric.py +588 -0
  44. sksurv/preprocessing.py +155 -0
  45. sksurv/svm/__init__.py +11 -0
  46. sksurv/svm/_minlip.cp313-win_amd64.pyd +0 -0
  47. sksurv/svm/_prsvm.cp313-win_amd64.pyd +0 -0
  48. sksurv/svm/minlip.py +606 -0
  49. sksurv/svm/naive_survival_svm.py +221 -0
  50. sksurv/svm/survival_svm.py +1228 -0
  51. sksurv/testing.py +108 -0
  52. sksurv/tree/__init__.py +1 -0
  53. sksurv/tree/_criterion.cp313-win_amd64.pyd +0 -0
  54. sksurv/tree/tree.py +703 -0
  55. sksurv/util.py +333 -0
@@ -0,0 +1,618 @@
1
+ # This program is free software: you can redistribute it and/or modify
2
+ # it under the terms of the GNU General Public License as published by
3
+ # the Free Software Foundation, either version 3 of the License, or
4
+ # (at your option) any later version.
5
+ #
6
+ # This program is distributed in the hope that it will be useful,
7
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
8
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9
+ # GNU General Public License for more details.
10
+ #
11
+ # You should have received a copy of the GNU General Public License
12
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
13
+ import numbers
14
+ import warnings
15
+
16
+ import numpy as np
17
+ from scipy.linalg import solve
18
+ from sklearn.base import BaseEstimator
19
+ from sklearn.exceptions import ConvergenceWarning
20
+ from sklearn.utils._param_validation import Interval, StrOptions
21
+ from sklearn.utils.validation import check_array, check_is_fitted
22
+
23
+ from ..base import SurvivalAnalysisMixin
24
+ from ..functions import StepFunction
25
+ from ..nonparametric import _compute_counts
26
+ from ..util import check_array_survival
27
+
28
+ __all__ = ["CoxPHSurvivalAnalysis"]
29
+
30
+
31
+ class BreslowEstimator:
32
+ """Breslow's estimator of the cumulative hazard function.
33
+
34
+ Attributes
35
+ ----------
36
+ cum_baseline_hazard_ : :class:`sksurv.functions.StepFunction`
37
+ Cumulative baseline hazard function.
38
+
39
+ baseline_survival_ : :class:`sksurv.functions.StepFunction`
40
+ Baseline survival function.
41
+
42
+ unique_times_ : ndarray
43
+ Unique event times.
44
+ """
45
+
46
+ def fit(self, linear_predictor, event, time):
47
+ """Compute baseline cumulative hazard function.
48
+
49
+ Parameters
50
+ ----------
51
+ linear_predictor : array-like, shape = (n_samples,)
52
+ Linear predictor of risk: `X @ coef`.
53
+
54
+ event : array-like, shape = (n_samples,)
55
+ Contains binary event indicators.
56
+
57
+ time : array-like, shape = (n_samples,)
58
+ Contains event/censoring times.
59
+
60
+ Returns
61
+ -------
62
+ self
63
+ """
64
+ risk_score = np.exp(linear_predictor)
65
+ order = np.argsort(time, kind="mergesort")
66
+ risk_score = risk_score[order]
67
+ uniq_times, n_events, n_at_risk, _ = _compute_counts(event, time, order)
68
+
69
+ divisor = np.empty(n_at_risk.shape, dtype=float)
70
+ value = np.sum(risk_score)
71
+ divisor[0] = value
72
+ k = 0
73
+ for i in range(1, len(n_at_risk)):
74
+ d = n_at_risk[i - 1] - n_at_risk[i]
75
+ value -= risk_score[k : (k + d)].sum()
76
+ k += d
77
+ divisor[i] = value
78
+
79
+ assert k == n_at_risk[0] - n_at_risk[-1]
80
+
81
+ y = np.cumsum(n_events / divisor)
82
+ self.cum_baseline_hazard_ = StepFunction(uniq_times, y)
83
+ self.baseline_survival_ = StepFunction(uniq_times, np.exp(-y))
84
+ self.unique_times_ = uniq_times
85
+ return self
86
+
87
+ def get_cumulative_hazard_function(self, linear_predictor):
88
+ """Predict cumulative hazard function.
89
+
90
+ Parameters
91
+ ----------
92
+ linear_predictor : array-like, shape = (n_samples,)
93
+ Linear predictor of risk: `X @ coef`.
94
+
95
+ Returns
96
+ -------
97
+ cum_hazard : ndarray, shape = (n_samples,)
98
+ Predicted cumulative hazard functions.
99
+ """
100
+ risk_score = np.exp(linear_predictor)
101
+ n_samples = risk_score.shape[0]
102
+ funcs = np.empty(n_samples, dtype=object)
103
+ for i in range(n_samples):
104
+ funcs[i] = StepFunction(x=self.cum_baseline_hazard_.x, y=self.cum_baseline_hazard_.y, a=risk_score[i])
105
+ return funcs
106
+
107
+ def get_survival_function(self, linear_predictor):
108
+ """Predict survival function.
109
+
110
+ Parameters
111
+ ----------
112
+ linear_predictor : array-like, shape = (n_samples,)
113
+ Linear predictor of risk: `X @ coef`.
114
+
115
+ Returns
116
+ -------
117
+ survival : ndarray, shape = (n_samples,)
118
+ Predicted survival functions.
119
+ """
120
+ risk_score = np.exp(linear_predictor)
121
+ n_samples = risk_score.shape[0]
122
+ funcs = np.empty(n_samples, dtype=object)
123
+ for i in range(n_samples):
124
+ funcs[i] = StepFunction(x=self.baseline_survival_.x, y=np.power(self.baseline_survival_.y, risk_score[i]))
125
+ return funcs
126
+
127
+
128
+ class CoxPHOptimizer:
129
+ """Negative partial log-likelihood of Cox proportional hazards model"""
130
+
131
+ def __init__(self, X, event, time, alpha, ties):
132
+ # sort descending
133
+ o = np.argsort(-time, kind="mergesort")
134
+ self.x = X[o, :]
135
+ self.event = event[o]
136
+ self.time = time[o]
137
+ self.alpha = alpha
138
+ self.no_alpha = np.all(self.alpha < np.finfo(self.alpha.dtype).eps)
139
+ self._is_breslow = ties == "breslow"
140
+
141
+ def nlog_likelihood(self, w):
142
+ """Compute negative partial log-likelihood
143
+
144
+ Parameters
145
+ ----------
146
+ w : array, shape = (n_features,)
147
+ Estimate of coefficients
148
+
149
+ Returns
150
+ -------
151
+ loss : float
152
+ Average negative partial log-likelihood
153
+ """
154
+ time = self.time
155
+ n_samples = self.x.shape[0]
156
+ breslow = self._is_breslow
157
+ xw = np.dot(self.x, w)
158
+
159
+ loss = 0
160
+ risk_set = 0
161
+ k = 0
162
+ while k < n_samples:
163
+ ti = time[k]
164
+ numerator = 0
165
+ n_events = 0
166
+ risk_set2 = 0
167
+ while k < n_samples and ti == time[k]:
168
+ if self.event[k]:
169
+ numerator += xw[k]
170
+ risk_set2 += np.exp(xw[k])
171
+ n_events += 1
172
+ else:
173
+ risk_set += np.exp(xw[k])
174
+ k += 1
175
+
176
+ if n_events > 0:
177
+ if breslow:
178
+ risk_set += risk_set2
179
+ loss -= (numerator - n_events * np.log(risk_set)) / n_samples
180
+ else:
181
+ numerator /= n_events
182
+ for _ in range(n_events):
183
+ risk_set += risk_set2 / n_events
184
+ loss -= (numerator - np.log(risk_set)) / n_samples
185
+
186
+ # add regularization term to log-likelihood
187
+ return loss + np.sum(self.alpha * np.square(w)) / (2.0 * n_samples)
188
+
189
+ def update(self, w, offset=0):
190
+ """Compute gradient and Hessian matrix with respect to `w`."""
191
+ time = self.time
192
+ x = self.x
193
+ breslow = self._is_breslow
194
+ exp_xw = np.exp(offset + np.dot(x, w))
195
+ n_samples, n_features = x.shape
196
+
197
+ gradient = np.zeros((1, n_features), dtype=w.dtype)
198
+ hessian = np.zeros((n_features, n_features), dtype=w.dtype)
199
+
200
+ inv_n_samples = 1.0 / n_samples
201
+ risk_set = 0
202
+ risk_set_x = np.zeros((1, n_features), dtype=w.dtype)
203
+ risk_set_xx = np.zeros((n_features, n_features), dtype=w.dtype)
204
+ k = 0
205
+ # iterate time in descending order
206
+ while k < n_samples:
207
+ ti = time[k]
208
+ n_events = 0
209
+ numerator = 0
210
+ risk_set2 = 0
211
+ risk_set_x2 = np.zeros_like(risk_set_x)
212
+ risk_set_xx2 = np.zeros_like(risk_set_xx)
213
+ while k < n_samples and ti == time[k]:
214
+ # preserve 2D shape of row vector
215
+ xk = x[k : k + 1]
216
+
217
+ # outer product
218
+ xx = np.dot(xk.T, xk)
219
+
220
+ if self.event[k]:
221
+ numerator += xk
222
+ risk_set2 += exp_xw[k]
223
+ risk_set_x2 += exp_xw[k] * xk
224
+ risk_set_xx2 += exp_xw[k] * xx
225
+ n_events += 1
226
+ else:
227
+ risk_set += exp_xw[k]
228
+ risk_set_x += exp_xw[k] * xk
229
+ risk_set_xx += exp_xw[k] * xx
230
+ k += 1
231
+
232
+ if n_events > 0:
233
+ if breslow:
234
+ risk_set += risk_set2
235
+ risk_set_x += risk_set_x2
236
+ risk_set_xx += risk_set_xx2
237
+
238
+ z = risk_set_x / risk_set
239
+ gradient -= (numerator - n_events * z) * inv_n_samples
240
+
241
+ a = risk_set_xx / risk_set
242
+ # outer product
243
+ b = np.dot(z.T, z)
244
+
245
+ hessian += n_events * (a - b) * inv_n_samples
246
+ else:
247
+ numerator /= n_events
248
+ for _ in range(n_events):
249
+ risk_set += risk_set2 / n_events
250
+ risk_set_x += risk_set_x2 / n_events
251
+ risk_set_xx += risk_set_xx2 / n_events
252
+
253
+ z = risk_set_x / risk_set
254
+ gradient -= (numerator - z) * inv_n_samples
255
+
256
+ a = risk_set_xx / risk_set
257
+ # outer product
258
+ b = np.dot(z.T, z)
259
+
260
+ hessian += (a - b) * inv_n_samples
261
+
262
+ if not self.no_alpha:
263
+ gradient += self.alpha * inv_n_samples * w
264
+
265
+ diag_idx = np.diag_indices(n_features)
266
+ hessian[diag_idx] += self.alpha * inv_n_samples
267
+
268
+ self.gradient = gradient.ravel()
269
+ self.hessian = hessian
270
+
271
+
272
+ class VerboseReporter:
273
+ def __init__(self, verbose):
274
+ self.verbose = verbose
275
+
276
+ def end_max_iter(self, i):
277
+ if self.verbose > 0:
278
+ print(f"iter {i + 1:>6d}: reached maximum number of iterations. Stopping.")
279
+
280
+ def end_converged(self, i):
281
+ if self.verbose > 0:
282
+ print(f"iter {i + 1:>6d}: optimization converged")
283
+
284
+ def update(self, i, delta, loss_new):
285
+ if self.verbose > 2:
286
+ print(f"iter {i + 1:>6d}: update = {delta}")
287
+ if self.verbose > 1:
288
+ print(f"iter {i + 1:>6d}: loss = {loss_new:.10f}")
289
+
290
+ def step_halving(self, i, loss):
291
+ if self.verbose > 1:
292
+ print(f"iter {i:>6d}: loss increased, performing step-halving. loss = {loss:.10f}")
293
+
294
+
295
+ class CoxPHSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
296
+ """Cox proportional hazards model.
297
+
298
+ There are two possible choices for handling tied event times.
299
+ The default is Breslow's method, which considers each of the
300
+ events at a given time as distinct. Efron's method is more
301
+ accurate if there are a large number of ties. When the number
302
+ of ties is small, the estimated coefficients by Breslow's and
303
+ Efron's method are quite close. Uses Newton-Raphson optimization.
304
+
305
+ See [1]_, [2]_, [3]_ for further description.
306
+
307
+ Parameters
308
+ ----------
309
+ alpha : float, ndarray of shape (n_features,), optional, default: 0
310
+ Regularization parameter for ridge regression penalty.
311
+ If a single float, the same penalty is used for all features.
312
+ If an array, there must be one penalty for each feature.
313
+ If you want to include a subset of features without penalization,
314
+ set the corresponding entries to 0.
315
+
316
+ ties : {'breslow', 'efron'}, optional, default: 'breslow'
317
+ The method to handle tied event times. If there are
318
+ no tied event times all the methods are equivalent.
319
+
320
+ n_iter : int, optional, default: 100
321
+ Maximum number of iterations.
322
+
323
+ tol : float, optional, default: 1e-9
324
+ Convergence criteria. Convergence is based on the negative log-likelihood::
325
+
326
+ |1 - (new neg. log-likelihood / old neg. log-likelihood) | < tol
327
+
328
+ verbose : int, optional, default: 0
329
+ Specifies the amount of additional debug information
330
+ during optimization.
331
+
332
+ Attributes
333
+ ----------
334
+ coef_ : ndarray, shape = (n_features,)
335
+ Coefficients of the model
336
+
337
+ cum_baseline_hazard_ : :class:`sksurv.functions.StepFunction`
338
+ Estimated baseline cumulative hazard function.
339
+
340
+ baseline_survival_ : :class:`sksurv.functions.StepFunction`
341
+ Estimated baseline survival function.
342
+
343
+ n_features_in_ : int
344
+ Number of features seen during ``fit``.
345
+
346
+ feature_names_in_ : ndarray of shape (`n_features_in_`,)
347
+ Names of features seen during ``fit``. Defined only when `X`
348
+ has feature names that are all strings.
349
+
350
+ unique_times_ : array of shape = (n_unique_times,)
351
+ Unique time points.
352
+
353
+ See also
354
+ --------
355
+ sksurv.linear_model.CoxnetSurvivalAnalysis
356
+ Cox proportional hazards model with l1 (LASSO) and l2 (ridge) penalty.
357
+
358
+ References
359
+ ----------
360
+ .. [1] Cox, D. R. Regression models and life tables (with discussion).
361
+ Journal of the Royal Statistical Society. Series B, 34, 187-220, 1972.
362
+ .. [2] Breslow, N. E. Covariance Analysis of Censored Survival Data.
363
+ Biometrics 30 (1974): 89–99.
364
+ .. [3] Efron, B. The Efficiency of Cox’s Likelihood Function for Censored Data.
365
+ Journal of the American Statistical Association 72 (1977): 557–565.
366
+ """
367
+
368
+ _parameter_constraints: dict = {
369
+ "alpha": [Interval(numbers.Real, 0, None, closed="left"), np.ndarray],
370
+ "ties": [StrOptions({"breslow", "efron"})],
371
+ "n_iter": [Interval(numbers.Integral, 1, None, closed="left")],
372
+ "tol": [Interval(numbers.Real, 0, None, closed="left")],
373
+ "verbose": ["verbose"],
374
+ }
375
+
376
+ def __init__(self, alpha=0, *, ties="breslow", n_iter=100, tol=1e-9, verbose=0):
377
+ self.alpha = alpha
378
+ self.ties = ties
379
+ self.n_iter = n_iter
380
+ self.tol = tol
381
+ self.verbose = verbose
382
+
383
+ self._baseline_model = BreslowEstimator()
384
+
385
+ @property
386
+ def cum_baseline_hazard_(self):
387
+ return self._baseline_model.cum_baseline_hazard_
388
+
389
+ @property
390
+ def baseline_survival_(self):
391
+ return self._baseline_model.baseline_survival_
392
+
393
+ @property
394
+ def unique_times_(self):
395
+ return self._baseline_model.unique_times_
396
+
397
+ def fit(self, X, y):
398
+ """Minimize negative partial log-likelihood for provided data.
399
+
400
+ Parameters
401
+ ----------
402
+ X : array-like, shape = (n_samples, n_features)
403
+ Data matrix
404
+
405
+ y : structured array, shape = (n_samples,)
406
+ A structured array containing the binary event indicator
407
+ as first field, and time of event or time of censoring as
408
+ second field.
409
+
410
+ Returns
411
+ -------
412
+ self
413
+ """
414
+ self._validate_params()
415
+
416
+ X = self._validate_data(X, ensure_min_samples=2, dtype=np.float64)
417
+ event, time = check_array_survival(X, y)
418
+
419
+ if isinstance(self.alpha, (numbers.Real, numbers.Integral)):
420
+ alphas = np.empty(X.shape[1], dtype=float)
421
+ alphas[:] = self.alpha
422
+ else:
423
+ alphas = self.alpha
424
+
425
+ alphas = check_array(alphas, ensure_2d=False, ensure_min_samples=0, estimator=self, input_name="alpha")
426
+ if np.any(alphas < 0):
427
+ raise ValueError(f"alpha must be positive, but was {self.alpha!r}")
428
+ if alphas.shape[0] != X.shape[1]:
429
+ raise ValueError(f"Length alphas ({alphas.shape[0]}) must match number of features ({X.shape[1]}).")
430
+
431
+ optimizer = CoxPHOptimizer(X, event, time, alphas, self.ties)
432
+
433
+ verbose_reporter = VerboseReporter(self.verbose)
434
+ w = np.zeros(X.shape[1])
435
+ w_prev = w
436
+ i = 0
437
+ loss = float("inf")
438
+ while True:
439
+ if i >= self.n_iter:
440
+ verbose_reporter.end_max_iter(i)
441
+ warnings.warn(
442
+ ("Optimization did not converge: Maximum number of iterations has been exceeded."),
443
+ stacklevel=2,
444
+ category=ConvergenceWarning,
445
+ )
446
+ break
447
+
448
+ optimizer.update(w)
449
+ delta = solve(
450
+ optimizer.hessian, optimizer.gradient, overwrite_a=False, overwrite_b=False, check_finite=False
451
+ )
452
+
453
+ if not np.all(np.isfinite(delta)):
454
+ raise ValueError("search direction contains NaN or infinite values")
455
+
456
+ w_new = w - delta
457
+ loss_new = optimizer.nlog_likelihood(w_new)
458
+ verbose_reporter.update(i, delta, loss_new)
459
+ if loss_new > loss:
460
+ # perform step-halving if negative log-likelihood does not decrease
461
+ w = (w_prev + w) / 2
462
+ loss = optimizer.nlog_likelihood(w)
463
+ verbose_reporter.step_halving(i, loss)
464
+ i += 1
465
+ continue
466
+
467
+ w_prev = w
468
+ w = w_new
469
+
470
+ res = np.abs(1 - (loss_new / loss))
471
+ if res < self.tol:
472
+ verbose_reporter.end_converged(i)
473
+ break
474
+
475
+ loss = loss_new
476
+ i += 1
477
+
478
+ self.coef_ = w
479
+ self._baseline_model.fit(np.dot(X, self.coef_), event, time)
480
+ return self
481
+
482
+ def predict(self, X):
483
+ """Predict risk scores.
484
+
485
+ Parameters
486
+ ----------
487
+ X : array-like, shape = (n_samples, n_features)
488
+ Data matrix.
489
+
490
+ Returns
491
+ -------
492
+ risk_score : array, shape = (n_samples,)
493
+ Predicted risk scores.
494
+ """
495
+ check_is_fitted(self, "coef_")
496
+
497
+ X = self._validate_data(X, reset=False)
498
+
499
+ return np.dot(X, self.coef_)
500
+
501
+ def predict_cumulative_hazard_function(self, X, return_array=False):
502
+ """Predict cumulative hazard function.
503
+
504
+ The cumulative hazard function for an individual
505
+ with feature vector :math:`x` is defined as
506
+
507
+ .. math::
508
+
509
+ H(t \\mid x) = \\exp(x^\\top \\beta) H_0(t) ,
510
+
511
+ where :math:`H_0(t)` is the baseline hazard function,
512
+ estimated by Breslow's estimator.
513
+
514
+ Parameters
515
+ ----------
516
+ X : array-like, shape = (n_samples, n_features)
517
+ Data matrix.
518
+
519
+ return_array : boolean
520
+ If set, return an array with the cumulative hazard rate
521
+ for each `self.unique_times_`, otherwise an array of
522
+ :class:`sksurv.functions.StepFunction`.
523
+
524
+ Returns
525
+ -------
526
+ cum_hazard : ndarray
527
+ If `return_array` is set, an array with the cumulative hazard rate
528
+ for each `self.unique_times_`, otherwise an array of length `n_samples`
529
+ of :class:`sksurv.functions.StepFunction` instances will be returned.
530
+
531
+ Examples
532
+ --------
533
+ >>> import matplotlib.pyplot as plt
534
+ >>> from sksurv.datasets import load_whas500
535
+ >>> from sksurv.linear_model import CoxPHSurvivalAnalysis
536
+
537
+ Load the data.
538
+
539
+ >>> X, y = load_whas500()
540
+ >>> X = X.astype(float)
541
+
542
+ Fit the model.
543
+
544
+ >>> estimator = CoxPHSurvivalAnalysis().fit(X, y)
545
+
546
+ Estimate the cumulative hazard function for the first 10 samples.
547
+
548
+ >>> chf_funcs = estimator.predict_cumulative_hazard_function(X.iloc[:10])
549
+
550
+ Plot the estimated cumulative hazard functions.
551
+
552
+ >>> for fn in chf_funcs:
553
+ ... plt.step(fn.x, fn(fn.x), where="post")
554
+ ...
555
+ >>> plt.ylim(0, 1)
556
+ >>> plt.show()
557
+ """
558
+ return self._predict_cumulative_hazard_function(self._baseline_model, self.predict(X), return_array)
559
+
560
+ def predict_survival_function(self, X, return_array=False):
561
+ """Predict survival function.
562
+
563
+ The survival function for an individual
564
+ with feature vector :math:`x` is defined as
565
+
566
+ .. math::
567
+
568
+ S(t \\mid x) = S_0(t)^{\\exp(x^\\top \\beta)} ,
569
+
570
+ where :math:`S_0(t)` is the baseline survival function,
571
+ estimated by Breslow's estimator.
572
+
573
+ Parameters
574
+ ----------
575
+ X : array-like, shape = (n_samples, n_features)
576
+ Data matrix.
577
+
578
+ return_array : boolean, default: False
579
+ If set, return an array with the probability
580
+ of survival for each `self.unique_times_`,
581
+ otherwise an array of :class:`sksurv.functions.StepFunction`.
582
+
583
+ Returns
584
+ -------
585
+ survival : ndarray
586
+ If `return_array` is set, an array with the probability of
587
+ survival for each `self.unique_times_`, otherwise an array of
588
+ length `n_samples` of :class:`sksurv.functions.StepFunction`
589
+ instances will be returned.
590
+
591
+ Examples
592
+ --------
593
+ >>> import matplotlib.pyplot as plt
594
+ >>> from sksurv.datasets import load_whas500
595
+ >>> from sksurv.linear_model import CoxPHSurvivalAnalysis
596
+
597
+ Load the data.
598
+
599
+ >>> X, y = load_whas500()
600
+ >>> X = X.astype(float)
601
+
602
+ Fit the model.
603
+
604
+ >>> estimator = CoxPHSurvivalAnalysis().fit(X, y)
605
+
606
+ Estimate the survival function for the first 10 samples.
607
+
608
+ >>> surv_funcs = estimator.predict_survival_function(X.iloc[:10])
609
+
610
+ Plot the estimated survival functions.
611
+
612
+ >>> for fn in surv_funcs:
613
+ ... plt.step(fn.x, fn(fn.x), where="post")
614
+ ...
615
+ >>> plt.ylim(0, 1)
616
+ >>> plt.show()
617
+ """
618
+ return self._predict_survival_function(self._baseline_model, self.predict(X), return_array)
@@ -0,0 +1,4 @@
1
+ from .ensemble_selection import EnsembleSelection, EnsembleSelectionRegressor, MeanEstimator
2
+ from .stacking import Stacking
3
+
4
+ __all__ = ["EnsembleSelection", "EnsembleSelectionRegressor", "MeanEstimator", "Stacking"]
sksurv/meta/base.py ADDED
@@ -0,0 +1,35 @@
1
+ # This program is free software: you can redistribute it and/or modify
2
+ # it under the terms of the GNU General Public License as published by
3
+ # the Free Software Foundation, either version 3 of the License, or
4
+ # (at your option) any later version.
5
+ #
6
+ # This program is distributed in the hope that it will be useful,
7
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
8
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9
+ # GNU General Public License for more details.
10
+ #
11
+ # You should have received a copy of the GNU General Public License
12
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
13
+ import numbers
14
+
15
+ from sklearn.utils.metaestimators import _safe_split
16
+
17
+
18
+ def _fit_and_score(est, x, y, scorer, train_index, test_index, parameters, fit_params, predict_params):
19
+ """Train survival model on given data and return its score on test data"""
20
+ X_train, y_train = _safe_split(est, x, y, train_index)
21
+ train_params = fit_params.copy()
22
+
23
+ # Training
24
+ est.set_params(**parameters)
25
+ est.fit(X_train, y_train, **train_params)
26
+
27
+ # Testing
28
+ test_predict_params = predict_params.copy()
29
+ X_test, y_test = _safe_split(est, x, y, test_index, train_index)
30
+
31
+ score = scorer(est, X_test, y_test, **test_predict_params)
32
+ if not isinstance(score, numbers.Number):
33
+ raise ValueError(f"scoring must return a number, got {score!s} ({type(score)}) instead.")
34
+
35
+ return score