scikit-survival 0.25.0__cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. scikit_survival-0.25.0.dist-info/METADATA +185 -0
  2. scikit_survival-0.25.0.dist-info/RECORD +58 -0
  3. scikit_survival-0.25.0.dist-info/WHEEL +6 -0
  4. scikit_survival-0.25.0.dist-info/licenses/COPYING +674 -0
  5. scikit_survival-0.25.0.dist-info/top_level.txt +1 -0
  6. sksurv/__init__.py +183 -0
  7. sksurv/base.py +115 -0
  8. sksurv/bintrees/__init__.py +15 -0
  9. sksurv/bintrees/_binarytrees.cpython-313-x86_64-linux-gnu.so +0 -0
  10. sksurv/column.py +205 -0
  11. sksurv/compare.py +123 -0
  12. sksurv/datasets/__init__.py +12 -0
  13. sksurv/datasets/base.py +614 -0
  14. sksurv/datasets/data/GBSG2.arff +700 -0
  15. sksurv/datasets/data/actg320.arff +1169 -0
  16. sksurv/datasets/data/bmt.arff +46 -0
  17. sksurv/datasets/data/breast_cancer_GSE7390-metastasis.arff +283 -0
  18. sksurv/datasets/data/cgvhd.arff +118 -0
  19. sksurv/datasets/data/flchain.arff +7887 -0
  20. sksurv/datasets/data/veteran.arff +148 -0
  21. sksurv/datasets/data/whas500.arff +520 -0
  22. sksurv/docstrings.py +99 -0
  23. sksurv/ensemble/__init__.py +2 -0
  24. sksurv/ensemble/_coxph_loss.cpython-313-x86_64-linux-gnu.so +0 -0
  25. sksurv/ensemble/boosting.py +1564 -0
  26. sksurv/ensemble/forest.py +902 -0
  27. sksurv/ensemble/survival_loss.py +151 -0
  28. sksurv/exceptions.py +18 -0
  29. sksurv/functions.py +114 -0
  30. sksurv/io/__init__.py +2 -0
  31. sksurv/io/arffread.py +89 -0
  32. sksurv/io/arffwrite.py +181 -0
  33. sksurv/kernels/__init__.py +1 -0
  34. sksurv/kernels/_clinical_kernel.cpython-313-x86_64-linux-gnu.so +0 -0
  35. sksurv/kernels/clinical.py +348 -0
  36. sksurv/linear_model/__init__.py +3 -0
  37. sksurv/linear_model/_coxnet.cpython-313-x86_64-linux-gnu.so +0 -0
  38. sksurv/linear_model/aft.py +208 -0
  39. sksurv/linear_model/coxnet.py +592 -0
  40. sksurv/linear_model/coxph.py +637 -0
  41. sksurv/meta/__init__.py +4 -0
  42. sksurv/meta/base.py +35 -0
  43. sksurv/meta/ensemble_selection.py +724 -0
  44. sksurv/meta/stacking.py +370 -0
  45. sksurv/metrics.py +1028 -0
  46. sksurv/nonparametric.py +911 -0
  47. sksurv/preprocessing.py +183 -0
  48. sksurv/svm/__init__.py +11 -0
  49. sksurv/svm/_minlip.cpython-313-x86_64-linux-gnu.so +0 -0
  50. sksurv/svm/_prsvm.cpython-313-x86_64-linux-gnu.so +0 -0
  51. sksurv/svm/minlip.py +690 -0
  52. sksurv/svm/naive_survival_svm.py +249 -0
  53. sksurv/svm/survival_svm.py +1236 -0
  54. sksurv/testing.py +108 -0
  55. sksurv/tree/__init__.py +1 -0
  56. sksurv/tree/_criterion.cpython-313-x86_64-linux-gnu.so +0 -0
  57. sksurv/tree/tree.py +790 -0
  58. sksurv/util.py +415 -0
@@ -0,0 +1,637 @@
1
+ # This program is free software: you can redistribute it and/or modify
2
+ # it under the terms of the GNU General Public License as published by
3
+ # the Free Software Foundation, either version 3 of the License, or
4
+ # (at your option) any later version.
5
+ #
6
+ # This program is distributed in the hope that it will be useful,
7
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
8
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9
+ # GNU General Public License for more details.
10
+ #
11
+ # You should have received a copy of the GNU General Public License
12
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
13
+ import numbers
14
+ import warnings
15
+
16
+ import numpy as np
17
+ from scipy.linalg import solve
18
+ from sklearn.base import BaseEstimator
19
+ from sklearn.exceptions import ConvergenceWarning
20
+ from sklearn.utils._param_validation import Interval, StrOptions
21
+ from sklearn.utils.validation import check_array, check_is_fitted, validate_data
22
+
23
+ from ..base import SurvivalAnalysisMixin
24
+ from ..docstrings import append_cumulative_hazard_example, append_survival_function_example
25
+ from ..functions import StepFunction
26
+ from ..nonparametric import _compute_counts
27
+ from ..util import check_array_survival
28
+
29
+ __all__ = ["CoxPHSurvivalAnalysis"]
30
+
31
+
32
+ class BreslowEstimator:
33
+ """Breslow's non-parametric estimator for the cumulative baseline hazard.
34
+
35
+ This class is used by :class:`CoxPHSurvivalAnalysis` to estimate the
36
+ cumulative baseline hazard and baseline survival function after the
37
+ coefficients of the Cox model have been fitted.
38
+
39
+ Attributes
40
+ ----------
41
+ cum_baseline_hazard_ : :class:`sksurv.functions.StepFunction`
42
+ Estimated cumulative baseline hazard function.
43
+
44
+ baseline_survival_ : :class:`sksurv.functions.StepFunction`
45
+ Estimated baseline survival function.
46
+
47
+ unique_times_ : ndarray, shape=(n_unique_times,)
48
+ Unique event times.
49
+ """
50
+
51
+ def fit(self, linear_predictor, event, time):
52
+ """Compute baseline cumulative hazard function.
53
+
54
+ Parameters
55
+ ----------
56
+ linear_predictor : array-like, shape = (n_samples,)
57
+ Linear predictor of risk: `X @ coef`.
58
+
59
+ event : array-like, shape = (n_samples,)
60
+ Contains binary event indicators.
61
+
62
+ time : array-like, shape = (n_samples,)
63
+ Contains event/censoring times.
64
+
65
+ Returns
66
+ -------
67
+ self
68
+ """
69
+ risk_score = np.exp(linear_predictor)
70
+ order = np.argsort(time, kind="mergesort")
71
+ risk_score = risk_score[order]
72
+ uniq_times, n_events, n_at_risk, _ = _compute_counts(event, time, order)
73
+
74
+ divisor = np.empty(n_at_risk.shape, dtype=float)
75
+ value = np.sum(risk_score)
76
+ divisor[0] = value
77
+ k = 0
78
+ for i in range(1, len(n_at_risk)):
79
+ d = n_at_risk[i - 1] - n_at_risk[i]
80
+ value -= risk_score[k : (k + d)].sum()
81
+ k += d
82
+ divisor[i] = value
83
+
84
+ assert k == n_at_risk[0] - n_at_risk[-1]
85
+
86
+ y = np.cumsum(n_events / divisor)
87
+ self.cum_baseline_hazard_ = StepFunction(uniq_times, y)
88
+ self.baseline_survival_ = StepFunction(uniq_times, np.exp(-y))
89
+ self.unique_times_ = uniq_times
90
+ return self
91
+
92
+ def get_cumulative_hazard_function(self, linear_predictor):
93
+ """Predict cumulative hazard function.
94
+
95
+ Parameters
96
+ ----------
97
+ linear_predictor : array-like, shape = (n_samples,)
98
+ Linear predictor of risk: `X @ coef`.
99
+
100
+ Returns
101
+ -------
102
+ cum_hazard : ndarray, shape = (n_samples,)
103
+ Predicted cumulative hazard functions.
104
+ """
105
+ risk_score = np.exp(linear_predictor)
106
+ n_samples = risk_score.shape[0]
107
+ funcs = np.empty(n_samples, dtype=object)
108
+ for i in range(n_samples):
109
+ funcs[i] = StepFunction(x=self.cum_baseline_hazard_.x, y=self.cum_baseline_hazard_.y, a=risk_score[i])
110
+ return funcs
111
+
112
+ def get_survival_function(self, linear_predictor):
113
+ """Predict survival function.
114
+
115
+ Parameters
116
+ ----------
117
+ linear_predictor : array-like, shape = (n_samples,)
118
+ Linear predictor of risk: `X @ coef`.
119
+
120
+ Returns
121
+ -------
122
+ survival : ndarray, shape = (n_samples,)
123
+ Predicted survival functions.
124
+ """
125
+ risk_score = np.exp(linear_predictor)
126
+ n_samples = risk_score.shape[0]
127
+ funcs = np.empty(n_samples, dtype=object)
128
+ for i in range(n_samples):
129
+ funcs[i] = StepFunction(x=self.baseline_survival_.x, y=np.power(self.baseline_survival_.y, risk_score[i]))
130
+ return funcs
131
+
132
+
133
+ class CoxPHOptimizer:
134
+ """Helper class for fitting the Cox proportional hazards model.
135
+
136
+ This class computes the negative log-likelihood, its gradient, and the
137
+ Hessian matrix for the Cox model. It is used internally by
138
+ :class:`CoxPHSurvivalAnalysis`.
139
+
140
+ Parameters
141
+ ----------
142
+ X : ndarray, shape=(n_samples, n_features)
143
+ The feature matrix.
144
+
145
+ event : ndarray, shape=(n_samples,)
146
+ The event indicator.
147
+
148
+ time : ndarray, shape=(n_samples,)
149
+ The event/censoring times.
150
+
151
+ alpha : ndarray, shape=(n_features,)
152
+ The regularization parameters.
153
+
154
+ ties : {'breslow', 'efron'}
155
+ The method to handle tied event times.
156
+ """
157
+
158
+ def __init__(self, X, event, time, alpha, ties):
159
+ # sort descending
160
+ o = np.argsort(-time, kind="mergesort")
161
+ self.x = X[o, :]
162
+ self.event = event[o]
163
+ self.time = time[o]
164
+ self.alpha = alpha
165
+ self.no_alpha = np.all(self.alpha < np.finfo(self.alpha.dtype).eps)
166
+ self._is_breslow = ties == "breslow"
167
+
168
+ def nlog_likelihood(self, w):
169
+ """Compute negative partial log-likelihood
170
+
171
+ Parameters
172
+ ----------
173
+ w : array, shape = (n_features,)
174
+ Estimate of coefficients
175
+
176
+ Returns
177
+ -------
178
+ loss : float
179
+ Average negative partial log-likelihood
180
+ """
181
+ time = self.time
182
+ n_samples = self.x.shape[0]
183
+ breslow = self._is_breslow
184
+ xw = np.dot(self.x, w)
185
+
186
+ loss = 0
187
+ risk_set = 0
188
+ k = 0
189
+ while k < n_samples:
190
+ ti = time[k]
191
+ numerator = 0
192
+ n_events = 0
193
+ risk_set2 = 0
194
+ while k < n_samples and ti == time[k]:
195
+ if self.event[k]:
196
+ numerator += xw[k]
197
+ risk_set2 += np.exp(xw[k])
198
+ n_events += 1
199
+ else:
200
+ risk_set += np.exp(xw[k])
201
+ k += 1
202
+
203
+ if n_events > 0:
204
+ if breslow:
205
+ risk_set += risk_set2
206
+ loss -= (numerator - n_events * np.log(risk_set)) / n_samples
207
+ else:
208
+ numerator /= n_events
209
+ for _ in range(n_events):
210
+ risk_set += risk_set2 / n_events
211
+ loss -= (numerator - np.log(risk_set)) / n_samples
212
+
213
+ # add regularization term to log-likelihood
214
+ return loss + np.sum(self.alpha * np.square(w)) / (2.0 * n_samples)
215
+
216
+ def update(self, w, offset=0):
217
+ """Compute gradient and Hessian matrix with respect to `w`."""
218
+ time = self.time
219
+ x = self.x
220
+ breslow = self._is_breslow
221
+ exp_xw = np.exp(offset + np.dot(x, w))
222
+ n_samples, n_features = x.shape
223
+
224
+ gradient = np.zeros((1, n_features), dtype=w.dtype)
225
+ hessian = np.zeros((n_features, n_features), dtype=w.dtype)
226
+
227
+ inv_n_samples = 1.0 / n_samples
228
+ risk_set = 0
229
+ risk_set_x = np.zeros((1, n_features), dtype=w.dtype)
230
+ risk_set_xx = np.zeros((n_features, n_features), dtype=w.dtype)
231
+ k = 0
232
+ # iterate time in descending order
233
+ while k < n_samples:
234
+ ti = time[k]
235
+ n_events = 0
236
+ numerator = 0
237
+ risk_set2 = 0
238
+ risk_set_x2 = np.zeros_like(risk_set_x)
239
+ risk_set_xx2 = np.zeros_like(risk_set_xx)
240
+ while k < n_samples and ti == time[k]:
241
+ # preserve 2D shape of row vector
242
+ xk = x[k : k + 1]
243
+
244
+ # outer product
245
+ xx = np.dot(xk.T, xk)
246
+
247
+ if self.event[k]:
248
+ numerator += xk
249
+ risk_set2 += exp_xw[k]
250
+ risk_set_x2 += exp_xw[k] * xk
251
+ risk_set_xx2 += exp_xw[k] * xx
252
+ n_events += 1
253
+ else:
254
+ risk_set += exp_xw[k]
255
+ risk_set_x += exp_xw[k] * xk
256
+ risk_set_xx += exp_xw[k] * xx
257
+ k += 1
258
+
259
+ if n_events > 0:
260
+ if breslow:
261
+ risk_set += risk_set2
262
+ risk_set_x += risk_set_x2
263
+ risk_set_xx += risk_set_xx2
264
+
265
+ z = risk_set_x / risk_set
266
+ gradient -= (numerator - n_events * z) * inv_n_samples
267
+
268
+ a = risk_set_xx / risk_set
269
+ # outer product
270
+ b = np.dot(z.T, z)
271
+
272
+ hessian += n_events * (a - b) * inv_n_samples
273
+ else:
274
+ numerator /= n_events
275
+ for _ in range(n_events):
276
+ risk_set += risk_set2 / n_events
277
+ risk_set_x += risk_set_x2 / n_events
278
+ risk_set_xx += risk_set_xx2 / n_events
279
+
280
+ z = risk_set_x / risk_set
281
+ gradient -= (numerator - z) * inv_n_samples
282
+
283
+ a = risk_set_xx / risk_set
284
+ # outer product
285
+ b = np.dot(z.T, z)
286
+
287
+ hessian += (a - b) * inv_n_samples
288
+
289
+ if not self.no_alpha:
290
+ gradient += self.alpha * inv_n_samples * w
291
+
292
+ diag_idx = np.diag_indices(n_features)
293
+ hessian[diag_idx] += self.alpha * inv_n_samples
294
+
295
+ self.gradient = gradient.ravel()
296
+ self.hessian = hessian
297
+
298
+
299
+ class VerboseReporter:
300
+ """Helper class to report optimization progress.
301
+
302
+ This class is used by :class:`CoxPHSurvivalAnalysis` to print
303
+ optimization progress depending on the verbosity level.
304
+
305
+ Parameters
306
+ ----------
307
+ verbose : int
308
+ The verbosity level.
309
+ """
310
+
311
+ def __init__(self, verbose):
312
+ self.verbose = verbose
313
+
314
+ def end_max_iter(self, i):
315
+ if self.verbose > 0:
316
+ print(f"iter {i + 1:>6d}: reached maximum number of iterations. Stopping.")
317
+
318
+ def end_converged(self, i):
319
+ if self.verbose > 0:
320
+ print(f"iter {i + 1:>6d}: optimization converged")
321
+
322
+ def update(self, i, delta, loss_new):
323
+ if self.verbose > 2:
324
+ print(f"iter {i + 1:>6d}: update = {delta}")
325
+ if self.verbose > 1:
326
+ print(f"iter {i + 1:>6d}: loss = {loss_new:.10f}")
327
+
328
+ def step_halving(self, i, loss):
329
+ if self.verbose > 1:
330
+ print(f"iter {i:>6d}: loss increased, performing step-halving. loss = {loss:.10f}")
331
+
332
+
333
+ class CoxPHSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
334
+ """The Cox proportional hazards model, also known as Cox regression.
335
+
336
+ This model is a semi-parametric model that can be used to model the
337
+ relationship between a set of features and the time to an event.
338
+ The model is fitted by maximizing the partial likelihood
339
+ using Newton-Raphson optimization.
340
+
341
+ There are two possible choices for handling tied event times.
342
+ The default is Breslow's method, which considers each of the
343
+ events at a given time as distinct. Efron's method is more
344
+ accurate if there are a large number of ties. When the number
345
+ of ties is small, the estimated coefficients by Breslow's and
346
+ Efron's method are quite close.
347
+
348
+ See [1]_, [2]_, [3]_ for further description.
349
+
350
+ Parameters
351
+ ----------
352
+ alpha : float or ndarray, shape = (n_features,), optional, default: 0
353
+ Regularization parameter for ridge regression penalty.
354
+ If a single float, the same penalty is used for all features.
355
+ If an array, there must be one penalty for each feature.
356
+ If you want to include a subset of features without penalization,
357
+ set the corresponding entries to 0.
358
+
359
+ ties : {'breslow', 'efron'}, optional, default: 'breslow'
360
+ The method to handle tied event times. If there are
361
+ no tied event times all the methods are equivalent.
362
+
363
+ n_iter : int, optional, default: 100
364
+ The maximum number of iterations taken for the solver to converge.
365
+
366
+ tol : float, optional, default: 1e-9
367
+ Convergence criteria. Convergence is based on the negative log-likelihood::
368
+
369
+ |1 - (new neg. log-likelihood / old neg. log-likelihood) | < tol
370
+
371
+ verbose : int, optional, default: 0
372
+ Specifies the amount of additional debug information
373
+ during optimization.
374
+
375
+ Attributes
376
+ ----------
377
+ coef_ : ndarray, shape = (n_features,)
378
+ Coefficients of the model.
379
+
380
+ cum_baseline_hazard_ : :class:`sksurv.functions.StepFunction`
381
+ Estimated baseline cumulative hazard function.
382
+
383
+ baseline_survival_ : :class:`sksurv.functions.StepFunction`
384
+ Estimated baseline survival function.
385
+
386
+ n_features_in_ : int
387
+ Number of features seen during ``fit``.
388
+
389
+ feature_names_in_ : ndarray, shape = (`n_features_in_`,)
390
+ Names of features seen during ``fit``. Defined only when `X`
391
+ has feature names that are all strings.
392
+
393
+ unique_times_ : ndarray, shape = (n_unique_times,)
394
+ Unique time points.
395
+
396
+ See also
397
+ --------
398
+ sksurv.linear_model.CoxnetSurvivalAnalysis
399
+ Cox proportional hazards model with l1 (LASSO) and l2 (ridge) penalty.
400
+
401
+ References
402
+ ----------
403
+ .. [1] Cox, D. R. Regression models and life tables (with discussion).
404
+ Journal of the Royal Statistical Society. Series B, 34, 187-220, 1972.
405
+ .. [2] Breslow, N. E. Covariance Analysis of Censored Survival Data.
406
+ Biometrics 30 (1974): 89–99.
407
+ .. [3] Efron, B. The Efficiency of Cox’s Likelihood Function for Censored Data.
408
+ Journal of the American Statistical Association 72 (1977): 557–565.
409
+ """
410
+
411
+ _parameter_constraints: dict = {
412
+ "alpha": [Interval(numbers.Real, 0, None, closed="left"), np.ndarray],
413
+ "ties": [StrOptions({"breslow", "efron"})],
414
+ "n_iter": [Interval(numbers.Integral, 1, None, closed="left")],
415
+ "tol": [Interval(numbers.Real, 0, None, closed="left")],
416
+ "verbose": ["verbose"],
417
+ }
418
+
419
+ def __init__(self, alpha=0, *, ties="breslow", n_iter=100, tol=1e-9, verbose=0):
420
+ self.alpha = alpha
421
+ self.ties = ties
422
+ self.n_iter = n_iter
423
+ self.tol = tol
424
+ self.verbose = verbose
425
+
426
+ self._baseline_model = BreslowEstimator()
427
+
428
+ @property
429
+ def cum_baseline_hazard_(self):
430
+ return self._baseline_model.cum_baseline_hazard_
431
+
432
+ @property
433
+ def baseline_survival_(self):
434
+ return self._baseline_model.baseline_survival_
435
+
436
+ @property
437
+ def unique_times_(self):
438
+ return self._baseline_model.unique_times_
439
+
440
+ def fit(self, X, y):
441
+ """Fit the model to the given data.
442
+
443
+ Parameters
444
+ ----------
445
+ X : array-like, shape = (n_samples, n_features)
446
+ Data matrix
447
+
448
+ y : structured array, shape = (n_samples,)
449
+ A structured array with two fields. The first field is a boolean
450
+ where ``True`` indicates an event and ``False`` indicates right-censoring.
451
+ The second field is a float with the time of event or time of censoring.
452
+
453
+ Returns
454
+ -------
455
+ self
456
+ """
457
+ self._validate_params()
458
+
459
+ X = validate_data(self, X, ensure_min_samples=2, dtype=np.float64)
460
+ event, time = check_array_survival(X, y)
461
+
462
+ if isinstance(self.alpha, numbers.Real | numbers.Integral):
463
+ alphas = np.empty(X.shape[1], dtype=float)
464
+ alphas[:] = self.alpha
465
+ else:
466
+ alphas = self.alpha
467
+
468
+ alphas = check_array(alphas, ensure_2d=False, ensure_min_samples=0, estimator=self, input_name="alpha")
469
+ if np.any(alphas < 0):
470
+ raise ValueError(f"alpha must be positive, but was {self.alpha!r}")
471
+ if alphas.shape[0] != X.shape[1]:
472
+ raise ValueError(f"Length alphas ({alphas.shape[0]}) must match number of features ({X.shape[1]}).")
473
+
474
+ optimizer = CoxPHOptimizer(X, event, time, alphas, self.ties)
475
+
476
+ verbose_reporter = VerboseReporter(self.verbose)
477
+ w = np.zeros(X.shape[1])
478
+ w_prev = w
479
+ i = 0
480
+ loss = float("inf")
481
+ while True:
482
+ if i >= self.n_iter:
483
+ verbose_reporter.end_max_iter(i)
484
+ warnings.warn(
485
+ ("Optimization did not converge: Maximum number of iterations has been exceeded."),
486
+ stacklevel=2,
487
+ category=ConvergenceWarning,
488
+ )
489
+ break
490
+
491
+ optimizer.update(w)
492
+ delta = solve(
493
+ optimizer.hessian, optimizer.gradient, overwrite_a=False, overwrite_b=False, check_finite=False
494
+ )
495
+
496
+ if not np.all(np.isfinite(delta)):
497
+ raise ValueError("search direction contains NaN or infinite values")
498
+
499
+ w_new = w - delta
500
+ loss_new = optimizer.nlog_likelihood(w_new)
501
+ verbose_reporter.update(i, delta, loss_new)
502
+ if loss_new > loss:
503
+ # perform step-halving if negative log-likelihood does not decrease
504
+ w = (w_prev + w) / 2
505
+ loss = optimizer.nlog_likelihood(w)
506
+ verbose_reporter.step_halving(i, loss)
507
+ i += 1
508
+ continue
509
+
510
+ w_prev = w
511
+ w = w_new
512
+
513
+ res = np.abs(1 - (loss_new / loss))
514
+ if res < self.tol:
515
+ verbose_reporter.end_converged(i)
516
+ break
517
+
518
+ loss = loss_new
519
+ i += 1
520
+
521
+ self.coef_ = w
522
+ self._baseline_model.fit(np.dot(X, self.coef_), event, time)
523
+ return self
524
+
525
+ def predict(self, X):
526
+ """Predict risk scores.
527
+
528
+ The risk score is the linear predictor of the model,
529
+ computed as the dot product of the input features `X` and the
530
+ estimated coefficients `coef_`. A higher score indicates a
531
+ higher risk of experiencing the event.
532
+
533
+ Parameters
534
+ ----------
535
+ X : array-like, shape = (n_samples, n_features)
536
+ Data matrix.
537
+
538
+ Returns
539
+ -------
540
+ risk_score : array, shape = (n_samples,)
541
+ Predicted risk scores.
542
+ """
543
+ check_is_fitted(self, "coef_")
544
+
545
+ X = validate_data(self, X, reset=False)
546
+
547
+ return np.dot(X, self.coef_)
548
+
549
+ @append_cumulative_hazard_example(estimator_mod="linear_model", estimator_class="CoxPHSurvivalAnalysis")
550
+ def predict_cumulative_hazard_function(self, X, return_array=False):
551
+ r"""Predict cumulative hazard function.
552
+
553
+ The cumulative hazard function for an individual
554
+ with feature vector :math:`x` is defined as
555
+
556
+ .. math::
557
+
558
+ H(t \mid x) = \exp(x^\top \beta) H_0(t) ,
559
+
560
+ where :math:`H_0(t)` is the baseline hazard function,
561
+ estimated by Breslow's estimator.
562
+
563
+ Parameters
564
+ ----------
565
+ X : array-like, shape = (n_samples, n_features)
566
+ Data matrix.
567
+
568
+ return_array : bool, default: False
569
+ Whether to return a single array of cumulative hazard values
570
+ or a list of step functions.
571
+
572
+ If `False`, a list of :class:`sksurv.functions.StepFunction`
573
+ objects is returned.
574
+
575
+ If `True`, a 2d-array of shape `(n_samples, n_unique_times)` is
576
+ returned, where `n_unique_times` is the number of unique
577
+ event times in the training data. Each row represents the cumulative
578
+ hazard function of an individual evaluated at `unique_times_`.
579
+
580
+ Returns
581
+ -------
582
+ cum_hazard : ndarray
583
+ If `return_array` is `False`, an array of `n_samples`
584
+ :class:`sksurv.functions.StepFunction` instances is returned.
585
+
586
+ If `return_array` is `True`, a numeric array of shape
587
+ `(n_samples, n_unique_times_)` is returned.
588
+
589
+ Examples
590
+ --------
591
+ """
592
+ return self._predict_cumulative_hazard_function(self._baseline_model, self.predict(X), return_array)
593
+
594
+ @append_survival_function_example(estimator_mod="linear_model", estimator_class="CoxPHSurvivalAnalysis")
595
+ def predict_survival_function(self, X, return_array=False):
596
+ r"""Predict survival function.
597
+
598
+ The survival function for an individual
599
+ with feature vector :math:`x` is defined as
600
+
601
+ .. math::
602
+
603
+ S(t \mid x) = S_0(t)^{\exp(x^\top \beta)} ,
604
+
605
+ where :math:`S_0(t)` is the baseline survival function,
606
+ estimated by Breslow's estimator.
607
+
608
+ Parameters
609
+ ----------
610
+ X : array-like, shape = (n_samples, n_features)
611
+ Data matrix.
612
+
613
+ return_array : bool, default: False
614
+ Whether to return a single array of survival probabilities
615
+ or a list of step functions.
616
+
617
+ If `False`, a list of :class:`sksurv.functions.StepFunction`
618
+ objects is returned.
619
+
620
+ If `True`, a 2d-array of shape `(n_samples, n_unique_times)` is
621
+ returned, where `n_unique_times` is the number of unique
622
+ event times in the training data. Each row represents the survival
623
+ function of an individual evaluated at `unique_times_`.
624
+
625
+ Returns
626
+ -------
627
+ survival : ndarray
628
+ If `return_array` is `False`, an array of `n_samples`
629
+ :class:`sksurv.functions.StepFunction` instances is returned.
630
+
631
+ If `return_array` is `True`, a numeric array of shape
632
+ `(n_samples, n_unique_times_)` is returned.
633
+
634
+ Examples
635
+ --------
636
+ """
637
+ return self._predict_survival_function(self._baseline_model, self.predict(X), return_array)
@@ -0,0 +1,4 @@
1
+ from .ensemble_selection import EnsembleSelection, EnsembleSelectionRegressor, MeanEstimator
2
+ from .stacking import Stacking
3
+
4
+ __all__ = ["EnsembleSelection", "EnsembleSelectionRegressor", "MeanEstimator", "Stacking"]
sksurv/meta/base.py ADDED
@@ -0,0 +1,35 @@
1
+ # This program is free software: you can redistribute it and/or modify
2
+ # it under the terms of the GNU General Public License as published by
3
+ # the Free Software Foundation, either version 3 of the License, or
4
+ # (at your option) any later version.
5
+ #
6
+ # This program is distributed in the hope that it will be useful,
7
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
8
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9
+ # GNU General Public License for more details.
10
+ #
11
+ # You should have received a copy of the GNU General Public License
12
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
13
+ import numbers
14
+
15
+ from sklearn.utils.metaestimators import _safe_split
16
+
17
+
18
+ def _fit_and_score(est, x, y, scorer, train_index, test_index, parameters, fit_params, predict_params):
19
+ """Train survival model on given data and return its score on test data"""
20
+ X_train, y_train = _safe_split(est, x, y, train_index)
21
+ train_params = fit_params.copy()
22
+
23
+ # Training
24
+ est.set_params(**parameters)
25
+ est.fit(X_train, y_train, **train_params)
26
+
27
+ # Testing
28
+ test_predict_params = predict_params.copy()
29
+ X_test, y_test = _safe_split(est, x, y, test_index, train_index)
30
+
31
+ score = scorer(est, X_test, y_test, **test_predict_params)
32
+ if not isinstance(score, numbers.Number):
33
+ raise ValueError(f"scoring must return a number, got {score!s} ({type(score)}) instead.")
34
+
35
+ return score