scikit-survival 0.23.1__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. scikit_survival-0.23.1.dist-info/COPYING +674 -0
  2. scikit_survival-0.23.1.dist-info/METADATA +888 -0
  3. scikit_survival-0.23.1.dist-info/RECORD +55 -0
  4. scikit_survival-0.23.1.dist-info/WHEEL +5 -0
  5. scikit_survival-0.23.1.dist-info/top_level.txt +1 -0
  6. sksurv/__init__.py +138 -0
  7. sksurv/base.py +103 -0
  8. sksurv/bintrees/__init__.py +15 -0
  9. sksurv/bintrees/_binarytrees.cp313-win_amd64.pyd +0 -0
  10. sksurv/column.py +201 -0
  11. sksurv/compare.py +123 -0
  12. sksurv/datasets/__init__.py +10 -0
  13. sksurv/datasets/base.py +436 -0
  14. sksurv/datasets/data/GBSG2.arff +700 -0
  15. sksurv/datasets/data/actg320.arff +1169 -0
  16. sksurv/datasets/data/breast_cancer_GSE7390-metastasis.arff +283 -0
  17. sksurv/datasets/data/flchain.arff +7887 -0
  18. sksurv/datasets/data/veteran.arff +148 -0
  19. sksurv/datasets/data/whas500.arff +520 -0
  20. sksurv/ensemble/__init__.py +2 -0
  21. sksurv/ensemble/_coxph_loss.cp313-win_amd64.pyd +0 -0
  22. sksurv/ensemble/boosting.py +1610 -0
  23. sksurv/ensemble/forest.py +947 -0
  24. sksurv/ensemble/survival_loss.py +151 -0
  25. sksurv/exceptions.py +18 -0
  26. sksurv/functions.py +114 -0
  27. sksurv/io/__init__.py +2 -0
  28. sksurv/io/arffread.py +58 -0
  29. sksurv/io/arffwrite.py +145 -0
  30. sksurv/kernels/__init__.py +1 -0
  31. sksurv/kernels/_clinical_kernel.cp313-win_amd64.pyd +0 -0
  32. sksurv/kernels/clinical.py +328 -0
  33. sksurv/linear_model/__init__.py +3 -0
  34. sksurv/linear_model/_coxnet.cp313-win_amd64.pyd +0 -0
  35. sksurv/linear_model/aft.py +205 -0
  36. sksurv/linear_model/coxnet.py +543 -0
  37. sksurv/linear_model/coxph.py +618 -0
  38. sksurv/meta/__init__.py +4 -0
  39. sksurv/meta/base.py +35 -0
  40. sksurv/meta/ensemble_selection.py +642 -0
  41. sksurv/meta/stacking.py +349 -0
  42. sksurv/metrics.py +996 -0
  43. sksurv/nonparametric.py +588 -0
  44. sksurv/preprocessing.py +155 -0
  45. sksurv/svm/__init__.py +11 -0
  46. sksurv/svm/_minlip.cp313-win_amd64.pyd +0 -0
  47. sksurv/svm/_prsvm.cp313-win_amd64.pyd +0 -0
  48. sksurv/svm/minlip.py +606 -0
  49. sksurv/svm/naive_survival_svm.py +221 -0
  50. sksurv/svm/survival_svm.py +1228 -0
  51. sksurv/testing.py +108 -0
  52. sksurv/tree/__init__.py +1 -0
  53. sksurv/tree/_criterion.cp313-win_amd64.pyd +0 -0
  54. sksurv/tree/tree.py +703 -0
  55. sksurv/util.py +333 -0
@@ -0,0 +1,1610 @@
1
+ # This program is free software: you can redistribute it and/or modify
2
+ # it under the terms of the GNU General Public License as published by
3
+ # the Free Software Foundation, either version 3 of the License, or
4
+ # (at your option) any later version.
5
+ #
6
+ # This program is distributed in the hope that it will be useful,
7
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
8
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9
+ # GNU General Public License for more details.
10
+ #
11
+ # You should have received a copy of the GNU General Public License
12
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
13
+ import numbers
14
+
15
+ import numpy as np
16
+ from scipy.sparse import csc_matrix, csr_matrix, issparse
17
+ from sklearn.base import BaseEstimator
18
+ from sklearn.ensemble._base import BaseEnsemble
19
+ from sklearn.ensemble._gb import BaseGradientBoosting, VerboseReporter
20
+ from sklearn.ensemble._gradient_boosting import _random_sample_mask
21
+ from sklearn.model_selection import train_test_split
22
+ from sklearn.tree import DecisionTreeRegressor
23
+ from sklearn.tree._tree import DTYPE
24
+ from sklearn.utils import check_random_state
25
+ from sklearn.utils._param_validation import Interval, StrOptions
26
+ from sklearn.utils.extmath import squared_norm
27
+ from sklearn.utils.validation import _check_sample_weight, check_array, check_is_fitted
28
+
29
+ from ..base import SurvivalAnalysisMixin
30
+ from ..linear_model.coxph import BreslowEstimator
31
+ from ..util import check_array_survival
32
+ from .survival_loss import LOSS_FUNCTIONS, CensoredSquaredLoss, CoxPH, IPCWLeastSquaresError
33
+
34
+ __all__ = ["ComponentwiseGradientBoostingSurvivalAnalysis", "GradientBoostingSurvivalAnalysis"]
35
+
36
+
37
+ def _sample_binomial_plus_one(p, size, random_state):
38
+ drop_model = random_state.binomial(1, p=p, size=size)
39
+ n_dropped = np.sum(drop_model)
40
+ if n_dropped == 0:
41
+ idx = random_state.randint(0, size)
42
+ drop_model[idx] = 1
43
+ n_dropped = 1
44
+ return drop_model, n_dropped
45
+
46
+
47
+ class _ComponentwiseLeastSquares(BaseEstimator):
48
+ def __init__(self, component):
49
+ self.component = component
50
+
51
+ def fit(self, X, y, sample_weight):
52
+ xw = X[:, self.component] * sample_weight
53
+ b = np.dot(xw, y)
54
+ if b == 0:
55
+ self.coef_ = 0
56
+ else:
57
+ a = np.dot(xw, xw)
58
+ self.coef_ = b / a
59
+
60
+ return self
61
+
62
+ def predict(self, X):
63
+ return X[:, self.component] * self.coef_
64
+
65
+
66
+ def _fit_stage_componentwise(X, residuals, sample_weight, **fit_params): # pylint: disable=unused-argument
67
+ """Fit component-wise weighted least squares model"""
68
+ n_features = X.shape[1]
69
+
70
+ base_learners = []
71
+ error = np.empty(n_features)
72
+ for component in range(n_features):
73
+ learner = _ComponentwiseLeastSquares(component).fit(X, residuals, sample_weight)
74
+ l_pred = learner.predict(X)
75
+ error[component] = squared_norm(residuals - l_pred)
76
+ base_learners.append(learner)
77
+
78
+ # TODO: could use bottleneck.nanargmin for speed
79
+ best_component = np.nanargmin(error)
80
+ best_learner = base_learners[best_component]
81
+ return best_learner
82
+
83
+
84
+ class ComponentwiseGradientBoostingSurvivalAnalysis(BaseEnsemble, SurvivalAnalysisMixin):
85
+ r"""Gradient boosting with component-wise least squares as base learner.
86
+
87
+ See the :ref:`User Guide </user_guide/boosting.ipynb>` and [1]_ for further description.
88
+
89
+ Parameters
90
+ ----------
91
+ loss : {'coxph', 'squared', 'ipcwls'}, optional, default: 'coxph'
92
+ loss function to be optimized. 'coxph' refers to partial likelihood loss
93
+ of Cox's proportional hazards model. The loss 'squared' minimizes a
94
+ squared regression loss that ignores predictions beyond the time of censoring,
95
+ and 'ipcwls' refers to inverse-probability of censoring weighted least squares error.
96
+
97
+ learning_rate : float, optional, default: 0.1
98
+ learning rate shrinks the contribution of each base learner by `learning_rate`.
99
+ There is a trade-off between `learning_rate` and `n_estimators`.
100
+ Values must be in the range `[0.0, inf)`.
101
+
102
+ n_estimators : int, default: 100
103
+ The number of boosting stages to perform. Gradient boosting
104
+ is fairly robust to over-fitting so a large number usually
105
+ results in better performance.
106
+ Values must be in the range `[1, inf)`.
107
+
108
+ subsample : float, optional, default: 1.0
109
+ The fraction of samples to be used for fitting the individual base
110
+ learners. If smaller than 1.0 this results in Stochastic Gradient
111
+ Boosting. `subsample` interacts with the parameter `n_estimators`.
112
+ Choosing `subsample < 1.0` leads to a reduction of variance
113
+ and an increase in bias.
114
+ Values must be in the range `(0.0, 1.0]`.
115
+
116
+ warm_start : bool, default: False
117
+ When set to ``True``, reuse the solution of the previous call to fit
118
+ and add more estimators to the ensemble, otherwise, just erase the
119
+ previous solution.
120
+
121
+ dropout_rate : float, optional, default: 0.0
122
+ If larger than zero, the residuals at each iteration are only computed
123
+ from a random subset of base learners. The value corresponds to the
124
+ percentage of base learners that are dropped. In each iteration,
125
+ at least one base learner is dropped. This is an alternative regularization
126
+ to shrinkage, i.e., setting `learning_rate < 1.0`.
127
+ Values must be in the range `[0.0, 1.0)`.
128
+
129
+ random_state : int seed, RandomState instance, or None, default: None
130
+ The seed of the pseudo random number generator to use when
131
+ shuffling the data.
132
+
133
+ verbose : int, default: 0
134
+ Enable verbose output. If 1 then it prints progress and performance
135
+ once in a while.
136
+ Values must be in the range `[0, inf)`.
137
+
138
+ Attributes
139
+ ----------
140
+ coef_ : array, shape = (n_features + 1,)
141
+ The aggregated coefficients. The first element `coef\_[0]` corresponds
142
+ to the intercept. If loss is `coxph`, the intercept will always be zero.
143
+
144
+ estimators_ : list of base learners
145
+ The collection of fitted sub-estimators.
146
+
147
+ train_score_ : ndarray, shape = (n_estimators,)
148
+ The i-th score ``train_score_[i]`` is the loss of the
149
+ model at iteration ``i`` on the in-bag sample.
150
+ If ``subsample == 1`` this is the loss on the training data.
151
+
152
+ oob_improvement_ : ndarray, shape = (n_estimators,)
153
+ The improvement in loss on the out-of-bag samples
154
+ relative to the previous iteration.
155
+ ``oob_improvement_[0]`` is the improvement in
156
+ loss of the first stage over the ``init`` estimator.
157
+ Only available if ``subsample < 1.0``.
158
+
159
+ oob_scores_ : ndarray of shape (n_estimators,)
160
+ The full history of the loss values on the out-of-bag
161
+ samples. Only available if ``subsample < 1.0``.
162
+
163
+ oob_score_ : float
164
+ The last value of the loss on the out-of-bag samples. It is
165
+ the same as ``oob_scores_[-1]``. Only available if ``subsample < 1.0``.
166
+
167
+ n_features_in_ : int
168
+ Number of features seen during ``fit``.
169
+
170
+ feature_names_in_ : ndarray of shape (`n_features_in_`,)
171
+ Names of features seen during ``fit``. Defined only when `X`
172
+ has feature names that are all strings.
173
+
174
+ unique_times_ : array of shape = (n_unique_times,)
175
+ Unique time points.
176
+
177
+ References
178
+ ----------
179
+ .. [1] Hothorn, T., Bühlmann, P., Dudoit, S., Molinaro, A., van der Laan, M. J.,
180
+ "Survival ensembles", Biostatistics, 7(3), 355-73, 2006
181
+ """
182
+
183
+ _parameter_constraints = {
184
+ "loss": [StrOptions(frozenset(LOSS_FUNCTIONS.keys()))],
185
+ "learning_rate": [Interval(numbers.Real, 0.0, None, closed="left")],
186
+ "n_estimators": [Interval(numbers.Integral, 1, None, closed="left")],
187
+ "subsample": [Interval(numbers.Real, 0.0, 1.0, closed="right")],
188
+ "warm_start": ["boolean"],
189
+ "dropout_rate": [Interval(numbers.Real, 0.0, 1.0, closed="left")],
190
+ "random_state": ["random_state"],
191
+ "verbose": ["verbose"],
192
+ }
193
+
194
+ def __init__(
195
+ self,
196
+ *,
197
+ loss="coxph",
198
+ learning_rate=0.1,
199
+ n_estimators=100,
200
+ subsample=1.0,
201
+ warm_start=False,
202
+ dropout_rate=0,
203
+ random_state=None,
204
+ verbose=0,
205
+ ):
206
+ self.loss = loss
207
+ self.n_estimators = n_estimators
208
+ self.learning_rate = learning_rate
209
+ self.subsample = subsample
210
+ self.warm_start = warm_start
211
+ self.dropout_rate = dropout_rate
212
+ self.random_state = random_state
213
+ self.verbose = verbose
214
+
215
+ @property
216
+ def _predict_risk_score(self):
217
+ return isinstance(self._loss, CoxPH)
218
+
219
+ def _is_fitted(self):
220
+ return len(getattr(self, "estimators_", [])) > 0
221
+
222
+ def _init_state(self):
223
+ self.estimators_ = np.empty(self.n_estimators, dtype=object)
224
+
225
+ self.train_score_ = np.zeros(self.n_estimators, dtype=np.float64)
226
+ # do oob?
227
+ if self.subsample < 1.0:
228
+ self.oob_improvement_ = np.zeros(self.n_estimators, dtype=np.float64)
229
+ self.oob_scores_ = np.zeros(self.n_estimators, dtype=np.float64)
230
+ self.oob_score_ = np.nan
231
+
232
+ if self.dropout_rate > 0:
233
+ self._scale = np.ones(int(self.n_estimators), dtype=float)
234
+
235
+ def _resize_state(self):
236
+ """Add additional ``n_estimators`` entries to all attributes."""
237
+ # self.n_estimators is the number of additional est to fit
238
+ total_n_estimators = self.n_estimators
239
+
240
+ self.estimators_ = np.resize(self.estimators_, total_n_estimators)
241
+ self.train_score_ = np.resize(self.train_score_, total_n_estimators)
242
+ if self.subsample < 1 or hasattr(self, "oob_improvement_"):
243
+ # if do oob resize arrays or create new if not available
244
+ if hasattr(self, "oob_improvement_"):
245
+ self.oob_improvement_ = np.resize(self.oob_improvement_, total_n_estimators)
246
+ self.oob_scores_ = np.resize(self.oob_scores_, total_n_estimators)
247
+ self.oob_score_ = np.nan
248
+ else:
249
+ self.oob_improvement_ = np.zeros(total_n_estimators, dtype=np.float64)
250
+ self.oob_scores_ = np.zeros((total_n_estimators,), dtype=np.float64)
251
+ self.oob_score_ = np.nan
252
+
253
+ if self.dropout_rate > 0:
254
+ if not hasattr(self, "_scale"):
255
+ raise ValueError(
256
+ "fitting with warm_start=True and dropout_rate > 0 is only "
257
+ "supported if the previous fit used dropout_rate > 0 too"
258
+ )
259
+
260
+ self._scale = np.resize(self._scale, total_n_estimators)
261
+ self._scale[self.n_estimators_ :] = 1
262
+
263
+ def _clear_state(self):
264
+ """Clear the state of the gradient boosting model."""
265
+ if hasattr(self, "estimators_"):
266
+ self.estimators_ = np.empty(0, dtype=object)
267
+ if hasattr(self, "train_score_"):
268
+ del self.train_score_
269
+ if hasattr(self, "oob_improvement_"):
270
+ del self.oob_improvement_
271
+ if hasattr(self, "oob_scores_"):
272
+ del self.oob_scores_
273
+ if hasattr(self, "oob_score_"):
274
+ del self.oob_score_
275
+ if hasattr(self, "_rng"):
276
+ del self._rng
277
+ if hasattr(self, "_scale"):
278
+ del self._scale
279
+
280
+ def _update_with_dropout(self, i, X, raw_predictions, scale, random_state):
281
+ # select base learners to be dropped for next iteration
282
+ drop_model, n_dropped = _sample_binomial_plus_one(self.dropout_rate, i + 1, random_state)
283
+
284
+ # adjust scaling factor of tree that is going to be trained in next iteration
285
+ scale[i + 1] = 1.0 / (n_dropped + 1.0)
286
+
287
+ raw_predictions[:] = 0
288
+ for m in range(i + 1):
289
+ if drop_model[m] == 1:
290
+ # adjust scaling factor of dropped trees
291
+ scale[m] *= n_dropped / (n_dropped + 1.0)
292
+ else:
293
+ # pseudoresponse of next iteration (without contribution of dropped trees)
294
+ raw_predictions += self.learning_rate * scale[m] * self.estimators_[m].predict(X)
295
+
296
+ def _fit(self, X, event, time, y_pred, sample_weight, random_state, begin_at_stage=0): # noqa: C901
297
+ n_samples = X.shape[0]
298
+ # account for intercept
299
+ y = np.fromiter(zip(event, time), dtype=[("event", bool), ("time", np.float64)])
300
+
301
+ do_oob = self.subsample < 1.0
302
+ if do_oob:
303
+ n_inbag = max(1, int(self.subsample * n_samples))
304
+
305
+ do_dropout = self.dropout_rate > 0
306
+ if do_dropout:
307
+ scale = self._scale
308
+
309
+ if self.verbose:
310
+ verbose_reporter = VerboseReporter(verbose=self.verbose)
311
+ verbose_reporter.init(self, 0)
312
+
313
+ # perform boosting iterations
314
+ i = begin_at_stage
315
+ for i in range(begin_at_stage, int(self.n_estimators)):
316
+ # subsampling
317
+ if do_oob:
318
+ sample_mask = _random_sample_mask(n_samples, n_inbag, random_state)
319
+ subsample_weight = sample_weight * sample_mask.astype(np.float64)
320
+
321
+ # OOB score before adding this stage
322
+ y_oob_masked = y[~sample_mask]
323
+ sample_weight_oob_masked = sample_weight[~sample_mask]
324
+ if i == 0: # store the initial loss to compute the OOB score
325
+ initial_loss = self._loss(
326
+ y_true=y_oob_masked,
327
+ raw_prediction=y_pred[~sample_mask],
328
+ sample_weight=sample_weight_oob_masked,
329
+ )
330
+ else:
331
+ subsample_weight = sample_weight
332
+
333
+ residuals = self._loss.gradient(y, y_pred, sample_weight=sample_weight)
334
+
335
+ best_learner = _fit_stage_componentwise(X, residuals, subsample_weight)
336
+ self.estimators_[i] = best_learner
337
+
338
+ if do_dropout and i < len(scale) - 1:
339
+ self._update_with_dropout(i, X, y_pred, scale, random_state)
340
+ else:
341
+ y_pred += self.learning_rate * best_learner.predict(X)
342
+
343
+ # track loss
344
+ if do_oob:
345
+ self.train_score_[i] = self._loss(
346
+ y_true=y[sample_mask],
347
+ raw_prediction=y_pred[sample_mask],
348
+ sample_weight=sample_weight[sample_mask],
349
+ )
350
+ self.oob_scores_[i] = self._loss(
351
+ y_true=y_oob_masked,
352
+ raw_prediction=y_pred[~sample_mask],
353
+ sample_weight=sample_weight_oob_masked,
354
+ )
355
+ previous_loss = initial_loss if i == 0 else self.oob_scores_[i - 1]
356
+ self.oob_improvement_[i] = previous_loss - self.oob_scores_[i]
357
+ self.oob_score_ = self.oob_scores_[-1]
358
+ else:
359
+ # no need to fancy index w/ no subsampling
360
+ self.train_score_[i] = self._loss(y_true=y, raw_prediction=y_pred, sample_weight=sample_weight)
361
+
362
+ if self.verbose > 0:
363
+ verbose_reporter.update(i, self)
364
+
365
+ return i + 1
366
+
367
+ def fit(self, X, y, sample_weight=None):
368
+ """Fit estimator.
369
+
370
+ Parameters
371
+ ----------
372
+ X : array-like, shape = (n_samples, n_features)
373
+ Data matrix
374
+
375
+ y : structured array, shape = (n_samples,)
376
+ A structured array containing the binary event indicator
377
+ as first field, and time of event or time of censoring as
378
+ second field.
379
+
380
+ sample_weight : array-like, shape = (n_samples,), optional
381
+ Weights given to each sample. If omitted, all samples have weight 1.
382
+
383
+ Returns
384
+ -------
385
+ self
386
+ """
387
+ self._validate_params()
388
+
389
+ if not self.warm_start:
390
+ self._clear_state()
391
+
392
+ X = self._validate_data(X, ensure_min_samples=2)
393
+ event, time = check_array_survival(X, y)
394
+
395
+ sample_weight = _check_sample_weight(sample_weight, X)
396
+
397
+ n_samples = X.shape[0]
398
+ Xi = np.column_stack((np.ones(n_samples), X))
399
+
400
+ self._loss = LOSS_FUNCTIONS[self.loss]()
401
+ if isinstance(self._loss, (CensoredSquaredLoss, IPCWLeastSquaresError)):
402
+ time = np.log(time)
403
+
404
+ if not self._is_fitted():
405
+ self._init_state()
406
+
407
+ y_pred = np.zeros(n_samples, dtype=np.float64)
408
+
409
+ begin_at_stage = 0
410
+
411
+ self._rng = check_random_state(self.random_state)
412
+ else:
413
+ # add more estimators to fitted model
414
+ # invariant: warm_start = True
415
+ if self.n_estimators < self.estimators_.shape[0]:
416
+ raise ValueError(
417
+ "n_estimators=%d must be larger or equal to "
418
+ "estimators_.shape[0]=%d when "
419
+ "warm_start==True" % (self.n_estimators, self.estimators_.shape[0])
420
+ )
421
+ begin_at_stage = self.estimators_.shape[0]
422
+ y_pred = self._raw_predict(Xi)
423
+ self._resize_state()
424
+
425
+ # apply dropout to last stage of previous fit
426
+ if hasattr(self, "_scale") and self.dropout_rate > 0:
427
+ # pylint: disable-next=access-member-before-definition
428
+ self._update_with_dropout(self.n_estimators_ - 1, Xi, y_pred, self._scale, self._rng)
429
+
430
+ self.n_estimators_ = self._fit(Xi, event, time, y_pred, sample_weight, self._rng, begin_at_stage)
431
+
432
+ self._set_baseline_model(X, event, time)
433
+ return self
434
+
435
+ def _set_baseline_model(self, X, event, time):
436
+ if isinstance(self._loss, CoxPH):
437
+ risk_scores = self._predict(X)
438
+ self._baseline_model = BreslowEstimator().fit(risk_scores, event, time)
439
+ else:
440
+ self._baseline_model = None
441
+
442
+ def _raw_predict(self, X):
443
+ pred = np.zeros(X.shape[0], dtype=float)
444
+ for estimator in self.estimators_:
445
+ pred += self.learning_rate * estimator.predict(X)
446
+ return pred
447
+
448
+ def _predict(self, X):
449
+ # account for intercept
450
+ Xi = np.column_stack((np.ones(X.shape[0]), X))
451
+ pred = self._raw_predict(Xi)
452
+ return self._loss._scale_raw_prediction(pred)
453
+
454
+ def predict(self, X):
455
+ """Predict risk scores.
456
+
457
+ If `loss='coxph'`, predictions can be interpreted as log hazard ratio
458
+ corresponding to the linear predictor of a Cox proportional hazards
459
+ model. If `loss='squared'` or `loss='ipcwls'`, predictions are the
460
+ time to event.
461
+
462
+ Parameters
463
+ ----------
464
+ X : array-like, shape = (n_samples, n_features)
465
+ Data matrix.
466
+
467
+ Returns
468
+ -------
469
+ risk_score : array, shape = (n_samples,)
470
+ Predicted risk scores.
471
+ """
472
+ check_is_fitted(self, "estimators_")
473
+ X = self._validate_data(X, reset=False)
474
+
475
+ return self._predict(X)
476
+
477
+ def _get_baseline_model(self):
478
+ if self._baseline_model is None:
479
+ raise ValueError("`fit` must be called with the loss option set to 'coxph'.")
480
+ return self._baseline_model
481
+
482
+ def predict_cumulative_hazard_function(self, X, return_array=False):
483
+ """Predict cumulative hazard function.
484
+
485
+ Only available if :meth:`fit` has been called with `loss = "coxph"`.
486
+
487
+ The cumulative hazard function for an individual
488
+ with feature vector :math:`x` is defined as
489
+
490
+ .. math::
491
+
492
+ H(t \\mid x) = \\exp(f(x)) H_0(t) ,
493
+
494
+ where :math:`f(\\cdot)` is the additive ensemble of base learners,
495
+ and :math:`H_0(t)` is the baseline hazard function,
496
+ estimated by Breslow's estimator.
497
+
498
+ Parameters
499
+ ----------
500
+ X : array-like, shape = (n_samples, n_features)
501
+ Data matrix.
502
+
503
+ return_array : boolean, default: False
504
+ If set, return an array with the cumulative hazard rate
505
+ for each `self.unique_times_`, otherwise an array of
506
+ :class:`sksurv.functions.StepFunction`.
507
+
508
+ Returns
509
+ -------
510
+ cum_hazard : ndarray
511
+ If `return_array` is set, an array with the cumulative hazard rate
512
+ for each `self.unique_times_`, otherwise an array of length `n_samples`
513
+ of :class:`sksurv.functions.StepFunction` instances will be returned.
514
+
515
+ Examples
516
+ --------
517
+ >>> import matplotlib.pyplot as plt
518
+ >>> from sksurv.datasets import load_whas500
519
+ >>> from sksurv.ensemble import ComponentwiseGradientBoostingSurvivalAnalysis
520
+
521
+ Load the data.
522
+
523
+ >>> X, y = load_whas500()
524
+ >>> X = X.astype(float)
525
+
526
+ Fit the model.
527
+
528
+ >>> estimator = ComponentwiseGradientBoostingSurvivalAnalysis(loss="coxph").fit(X, y)
529
+
530
+ Estimate the cumulative hazard function for the first 10 samples.
531
+
532
+ >>> chf_funcs = estimator.predict_cumulative_hazard_function(X.iloc[:10])
533
+
534
+ Plot the estimated cumulative hazard functions.
535
+
536
+ >>> for fn in chf_funcs:
537
+ ... plt.step(fn.x, fn(fn.x), where="post")
538
+ ...
539
+ >>> plt.ylim(0, 1)
540
+ >>> plt.show()
541
+ """
542
+ return self._predict_cumulative_hazard_function(self._get_baseline_model(), self.predict(X), return_array)
543
+
544
+ def predict_survival_function(self, X, return_array=False):
545
+ """Predict survival function.
546
+
547
+ Only available if :meth:`fit` has been called with `loss = "coxph"`.
548
+
549
+ The survival function for an individual
550
+ with feature vector :math:`x` is defined as
551
+
552
+ .. math::
553
+
554
+ S(t \\mid x) = S_0(t)^{\\exp(f(x)} ,
555
+
556
+ where :math:`f(\\cdot)` is the additive ensemble of base learners,
557
+ and :math:`S_0(t)` is the baseline survival function,
558
+ estimated by Breslow's estimator.
559
+
560
+ Parameters
561
+ ----------
562
+ X : array-like, shape = (n_samples, n_features)
563
+ Data matrix.
564
+
565
+ return_array : boolean, default: False
566
+ If set, return an array with the probability
567
+ of survival for each `self.unique_times_`,
568
+ otherwise an array of :class:`sksurv.functions.StepFunction`.
569
+
570
+ Returns
571
+ -------
572
+ survival : ndarray
573
+ If `return_array` is set, an array with the probability of
574
+ survival for each `self.unique_times_`, otherwise an array of
575
+ length `n_samples` of :class:`sksurv.functions.StepFunction`
576
+ instances will be returned.
577
+
578
+ Examples
579
+ --------
580
+ >>> import matplotlib.pyplot as plt
581
+ >>> from sksurv.datasets import load_whas500
582
+ >>> from sksurv.ensemble import ComponentwiseGradientBoostingSurvivalAnalysis
583
+
584
+ Load the data.
585
+
586
+ >>> X, y = load_whas500()
587
+ >>> X = X.astype(float)
588
+
589
+ Fit the model.
590
+
591
+ >>> estimator = ComponentwiseGradientBoostingSurvivalAnalysis(loss="coxph").fit(X, y)
592
+
593
+ Estimate the survival function for the first 10 samples.
594
+
595
+ >>> surv_funcs = estimator.predict_survival_function(X.iloc[:10])
596
+
597
+ Plot the estimated survival functions.
598
+
599
+ >>> for fn in surv_funcs:
600
+ ... plt.step(fn.x, fn(fn.x), where="post")
601
+ ...
602
+ >>> plt.ylim(0, 1)
603
+ >>> plt.show()
604
+ """
605
+ return self._predict_survival_function(self._get_baseline_model(), self.predict(X), return_array)
606
+
607
+ @property
608
+ def coef_(self):
609
+ coef = np.zeros(self.n_features_in_ + 1, dtype=float)
610
+
611
+ for estimator in self.estimators_:
612
+ coef[estimator.component] += self.learning_rate * estimator.coef_
613
+
614
+ return coef
615
+
616
+ @property
617
+ def unique_times_(self):
618
+ return self._get_baseline_model().unique_times_
619
+
620
+ @property
621
+ def feature_importances_(self):
622
+ imp = np.empty(self.n_features_in_ + 1, dtype=object)
623
+ for i in range(imp.shape[0]):
624
+ imp[i] = []
625
+
626
+ for k, estimator in enumerate(self.estimators_):
627
+ imp[estimator.component].append(k + 1)
628
+
629
+ def _importance(x):
630
+ if len(x) > 0:
631
+ return np.min(x)
632
+ return np.nan
633
+
634
+ ret = np.array([_importance(x) for x in imp])
635
+ return ret
636
+
637
+ def _make_estimator(self, append=True, random_state=None):
638
+ # we don't need _make_estimator
639
+ raise NotImplementedError()
640
+
641
+
642
+ class GradientBoostingSurvivalAnalysis(BaseGradientBoosting, SurvivalAnalysisMixin):
643
+ r"""Gradient-boosted Cox proportional hazard loss with
644
+ regression trees as base learner.
645
+
646
+ In each stage, a regression tree is fit on the negative gradient
647
+ of the loss function.
648
+
649
+ For more details on gradient boosting see [1]_ and [2]_. If `loss='coxph'`,
650
+ the partial likelihood of the proportional hazards model is optimized as
651
+ described in [3]_. If `loss='ipcwls'`, the accelerated failure time model with
652
+ inverse-probability of censoring weighted least squares error is optimized as
653
+ described in [4]_. When using a non-zero `dropout_rate`, regularization is
654
+ applied during training following [5]_.
655
+
656
+ See the :ref:`User Guide </user_guide/boosting.ipynb>` for examples.
657
+
658
+ Parameters
659
+ ----------
660
+ loss : {'coxph', 'squared', 'ipcwls'}, optional, default: 'coxph'
661
+ loss function to be optimized. 'coxph' refers to partial likelihood loss
662
+ of Cox's proportional hazards model. The loss 'squared' minimizes a
663
+ squared regression loss that ignores predictions beyond the time of censoring,
664
+ and 'ipcwls' refers to inverse-probability of censoring weighted least squares error.
665
+
666
+ learning_rate : float, optional, default: 0.1
667
+ learning rate shrinks the contribution of each tree by `learning_rate`.
668
+ There is a trade-off between `learning_rate` and `n_estimators`.
669
+ Values must be in the range `[0.0, inf)`.
670
+
671
+ n_estimators : int, default: 100
672
+ The number of regression trees to create. Gradient boosting
673
+ is fairly robust to over-fitting so a large number usually
674
+ results in better performance.
675
+ Values must be in the range `[1, inf)`.
676
+
677
+ subsample : float, optional, default: 1.0
678
+ The fraction of samples to be used for fitting the individual base
679
+ learners. If smaller than 1.0 this results in Stochastic Gradient
680
+ Boosting. `subsample` interacts with the parameter `n_estimators`.
681
+ Choosing `subsample < 1.0` leads to a reduction of variance
682
+ and an increase in bias.
683
+ Values must be in the range `(0.0, 1.0]`.
684
+
685
+ criterion : {'friedman_mse', 'squared_error'}, default: 'friedman_mse'
686
+ The function to measure the quality of a split. Supported criteria are
687
+ 'friedman_mse' for the mean squared error with improvement score by
688
+ Friedman, 'squared_error' for mean squared error. The default value of
689
+ 'friedman_mse' is generally the best as it can provide a better
690
+ approximation in some cases.
691
+
692
+ min_samples_split : int or float, optional, default: 2
693
+ The minimum number of samples required to split an internal node:
694
+
695
+ - If int, values must be in the range `[2, inf)`.
696
+ - If float, values must be in the range `(0.0, 1.0]` and `min_samples_split`
697
+ will be `ceil(min_samples_split * n_samples)`.
698
+
699
+ min_samples_leaf : int or float, default: 1
700
+ The minimum number of samples required to be at a leaf node.
701
+ A split point at any depth will only be considered if it leaves at
702
+ least ``min_samples_leaf`` training samples in each of the left and
703
+ right branches. This may have the effect of smoothing the model,
704
+ especially in regression.
705
+
706
+ - If int, values must be in the range `[1, inf)`.
707
+ - If float, values must be in the range `(0.0, 1.0)` and `min_samples_leaf`
708
+ will be `ceil(min_samples_leaf * n_samples)`.
709
+
710
+ min_weight_fraction_leaf : float, optional, default: 0.
711
+ The minimum weighted fraction of the sum total of weights (of all
712
+ the input samples) required to be at a leaf node. Samples have
713
+ equal weight when `sample_weight` is not provided.
714
+ Values must be in the range `[0.0, 0.5]`.
715
+
716
+ max_depth : int or None, optional, default: 3
717
+ Maximum depth of the individual regression estimators. The maximum
718
+ depth limits the number of nodes in the tree. Tune this parameter
719
+ for best performance; the best value depends on the interaction
720
+ of the input variables. If None, then nodes are expanded until
721
+ all leaves are pure or until all leaves contain less than
722
+ `min_samples_split` samples.
723
+ If int, values must be in the range `[1, inf)`.
724
+
725
+ min_impurity_decrease : float, optional, default: 0.
726
+ A node will be split if this split induces a decrease of the impurity
727
+ greater than or equal to this value.
728
+
729
+ The weighted impurity decrease equation is the following::
730
+
731
+ N_t / N * (impurity - N_t_R / N_t * right_impurity
732
+ - N_t_L / N_t * left_impurity)
733
+
734
+ where ``N`` is the total number of samples, ``N_t`` is the number of
735
+ samples at the current node, ``N_t_L`` is the number of samples in the
736
+ left child, and ``N_t_R`` is the number of samples in the right child.
737
+
738
+ ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,
739
+ if ``sample_weight`` is passed.
740
+
741
+ random_state : int seed, RandomState instance, or None, default: None
742
+ Controls the random seed given to each Tree estimator at each
743
+ boosting iteration.
744
+ In addition, it controls the random permutation of the features at
745
+ each split.
746
+ It also controls the random splitting of the training data to obtain a
747
+ validation set if `n_iter_no_change` is not None.
748
+ Pass an int for reproducible output across multiple function calls.
749
+
750
+ max_features : int, float, string or None, optional, default: None
751
+ The number of features to consider when looking for the best split:
752
+
753
+ - If int, values must be in the range `[1, inf)`.
754
+ - If float, values must be in the range `(0.0, 1.0]` and the features
755
+ considered at each split will be `max(1, int(max_features * n_features_in_))`.
756
+ - If 'sqrt', then `max_features=sqrt(n_features)`.
757
+ - If 'log2', then `max_features=log2(n_features)`.
758
+ - If None, then `max_features=n_features`.
759
+
760
+ Choosing `max_features < n_features` leads to a reduction of variance
761
+ and an increase in bias.
762
+
763
+ Note: the search for a split does not stop until at least one
764
+ valid partition of the node samples is found, even if it requires to
765
+ effectively inspect more than ``max_features`` features.
766
+
767
+ max_leaf_nodes : int or None, optional, default: None
768
+ Grow trees with ``max_leaf_nodes`` in best-first fashion.
769
+ Best nodes are defined as relative reduction in impurity.
770
+ Values must be in the range `[2, inf)`.
771
+ If `None`, then unlimited number of leaf nodes.
772
+
773
+ warm_start : bool, default: False
774
+ When set to ``True``, reuse the solution of the previous call to fit
775
+ and add more estimators to the ensemble, otherwise, just erase the
776
+ previous solution.
777
+
778
+ validation_fraction : float, default: 0.1
779
+ The proportion of training data to set aside as validation set for
780
+ early stopping. Values must be in the range `(0.0, 1.0)`.
781
+ Only used if ``n_iter_no_change`` is set to an integer.
782
+
783
+ n_iter_no_change : int, default: None
784
+ ``n_iter_no_change`` is used to decide if early stopping will be used
785
+ to terminate training when validation score is not improving. By
786
+ default it is set to None to disable early stopping. If set to a
787
+ number, it will set aside ``validation_fraction`` size of the training
788
+ data as validation and terminate training when validation score is not
789
+ improving in all of the previous ``n_iter_no_change`` numbers of
790
+ iterations. The split is stratified.
791
+ Values must be in the range `[1, inf)`.
792
+
793
+ tol : float, default: 1e-4
794
+ Tolerance for the early stopping. When the loss is not improving
795
+ by at least tol for ``n_iter_no_change`` iterations (if set to a
796
+ number), the training stops.
797
+ Values must be in the range `[0.0, inf)`.
798
+
799
+ dropout_rate : float, optional, default: 0.0
800
+ If larger than zero, the residuals at each iteration are only computed
801
+ from a random subset of base learners. The value corresponds to the
802
+ percentage of base learners that are dropped. In each iteration,
803
+ at least one base learner is dropped. This is an alternative regularization
804
+ to shrinkage, i.e., setting `learning_rate < 1.0`.
805
+ Values must be in the range `[0.0, 1.0)`.
806
+
807
+ verbose : int, default: 0
808
+ Enable verbose output. If 1 then it prints progress and performance
809
+ once in a while (the more trees the lower the frequency). If greater
810
+ than 1 then it prints progress and performance for every tree.
811
+ Values must be in the range `[0, inf)`.
812
+
813
+ ccp_alpha : non-negative float, optional, default: 0.0.
814
+ Complexity parameter used for Minimal Cost-Complexity Pruning. The
815
+ subtree with the largest cost complexity that is smaller than
816
+ ``ccp_alpha`` will be chosen. By default, no pruning is performed.
817
+ Values must be in the range `[0.0, inf)`.
818
+
819
+ Attributes
820
+ ----------
821
+ n_estimators_ : int
822
+ The number of estimators as selected by early stopping (if
823
+ ``n_iter_no_change`` is specified). Otherwise it is set to
824
+ ``n_estimators``.
825
+
826
+ feature_importances_ : ndarray, shape = (n_features,)
827
+ The feature importances (the higher, the more important the feature).
828
+
829
+ estimators_ : ndarray of DecisionTreeRegressor, shape = (n_estimators, 1)
830
+ The collection of fitted sub-estimators.
831
+
832
+ train_score_ : ndarray, shape = (n_estimators,)
833
+ The i-th score ``train_score_[i]`` is the loss of the
834
+ model at iteration ``i`` on the in-bag sample.
835
+ If ``subsample == 1`` this is the loss on the training data.
836
+
837
+ oob_improvement_ : ndarray, shape = (n_estimators,)
838
+ The improvement in loss on the out-of-bag samples
839
+ relative to the previous iteration.
840
+ ``oob_improvement_[0]`` is the improvement in
841
+ loss of the first stage over the ``init`` estimator.
842
+ Only available if ``subsample < 1.0``.
843
+
844
+ oob_scores_ : ndarray of shape (n_estimators,)
845
+ The full history of the loss values on the out-of-bag
846
+ samples. Only available if ``subsample < 1.0``.
847
+
848
+ oob_score_ : float
849
+ The last value of the loss on the out-of-bag samples. It is
850
+ the same as ``oob_scores_[-1]``. Only available if ``subsample < 1.0``.
851
+
852
+ n_features_in_ : int
853
+ Number of features seen during ``fit``.
854
+
855
+ feature_names_in_ : ndarray of shape (`n_features_in_`,)
856
+ Names of features seen during ``fit``. Defined only when `X`
857
+ has feature names that are all strings.
858
+
859
+ max_features_ : int
860
+ The inferred value of max_features.
861
+
862
+ unique_times_ : array of shape = (n_unique_times,)
863
+ Unique time points.
864
+
865
+ See also
866
+ --------
867
+ sksurv.ensemble.ComponentwiseGradientBoostingSurvivalAnalysis
868
+ Gradient boosting with component-wise least squares as base learner.
869
+
870
+ References
871
+ ----------
872
+ .. [1] J. H. Friedman, "Greedy function approximation: A gradient boosting machine,"
873
+ The Annals of Statistics, 29(5), 1189–1232, 2001.
874
+ .. [2] J. H. Friedman, "Stochastic gradient boosting,"
875
+ Computational Statistics & Data Analysis, 38(4), 367–378, 2002.
876
+ .. [3] G. Ridgeway, "The state of boosting,"
877
+ Computing Science and Statistics, 172–181, 1999.
878
+ .. [4] Hothorn, T., Bühlmann, P., Dudoit, S., Molinaro, A., van der Laan, M. J.,
879
+ "Survival ensembles", Biostatistics, 7(3), 355-73, 2006.
880
+ .. [5] K. V. Rashmi and R. Gilad-Bachrach,
881
+ "DART: Dropouts meet multiple additive regression trees,"
882
+ in 18th International Conference on Artificial Intelligence and Statistics,
883
+ 2015, 489–497.
884
+ """
885
+
886
+ _parameter_constraints = {
887
+ **BaseGradientBoosting._parameter_constraints,
888
+ "loss": [StrOptions(frozenset(LOSS_FUNCTIONS.keys()))],
889
+ "dropout_rate": [Interval(numbers.Real, 0.0, 1.0, closed="left")],
890
+ }
891
+
892
+ def __init__(
893
+ self,
894
+ *,
895
+ loss="coxph",
896
+ learning_rate=0.1,
897
+ n_estimators=100,
898
+ subsample=1.0,
899
+ criterion="friedman_mse",
900
+ min_samples_split=2,
901
+ min_samples_leaf=1,
902
+ min_weight_fraction_leaf=0.0,
903
+ max_depth=3,
904
+ min_impurity_decrease=0.0,
905
+ random_state=None,
906
+ max_features=None,
907
+ max_leaf_nodes=None,
908
+ warm_start=False,
909
+ validation_fraction=0.1,
910
+ n_iter_no_change=None,
911
+ tol=1e-4,
912
+ dropout_rate=0.0,
913
+ verbose=0,
914
+ ccp_alpha=0.0,
915
+ ):
916
+ super().__init__(
917
+ loss=loss,
918
+ learning_rate=learning_rate,
919
+ n_estimators=n_estimators,
920
+ criterion=criterion,
921
+ min_samples_split=min_samples_split,
922
+ min_samples_leaf=min_samples_leaf,
923
+ min_weight_fraction_leaf=min_weight_fraction_leaf,
924
+ max_depth=max_depth,
925
+ init="zero",
926
+ subsample=subsample,
927
+ max_features=max_features,
928
+ random_state=random_state,
929
+ verbose=verbose,
930
+ max_leaf_nodes=max_leaf_nodes,
931
+ warm_start=warm_start,
932
+ min_impurity_decrease=min_impurity_decrease,
933
+ validation_fraction=validation_fraction,
934
+ n_iter_no_change=n_iter_no_change,
935
+ tol=tol,
936
+ ccp_alpha=ccp_alpha,
937
+ )
938
+ self.dropout_rate = dropout_rate
939
+
940
+ def _encode_y(self, y, sample_weight):
941
+ self.n_trees_per_iteration_ = 1
942
+ return y
943
+
944
+ def _get_loss(self, sample_weight):
945
+ return LOSS_FUNCTIONS[self.loss]()
946
+
947
+ @property
948
+ def _predict_risk_score(self):
949
+ return isinstance(self._loss, CoxPH)
950
+
951
+ def _set_max_features(self):
952
+ """Set self.max_features_."""
953
+ if isinstance(self.max_features, str):
954
+ if self.max_features == "sqrt":
955
+ max_features = max(1, int(np.sqrt(self.n_features_in_)))
956
+ elif self.max_features == "log2":
957
+ max_features = max(1, int(np.log2(self.n_features_in_)))
958
+ elif self.max_features is None:
959
+ max_features = self.n_features_in_
960
+ elif isinstance(self.max_features, (numbers.Integral, np.integer)):
961
+ max_features = self.max_features
962
+ else: # float
963
+ max_features = max(1, int(self.max_features * self.n_features_in_))
964
+
965
+ self.max_features_ = max_features
966
+
967
+ def _update_with_dropout(self, i, X, raw_predictions, k, scale, random_state):
968
+ # select base learners to be dropped for next iteration
969
+ drop_model, n_dropped = _sample_binomial_plus_one(self.dropout_rate, i + 1, random_state)
970
+
971
+ # adjust scaling factor of tree that is going to be trained in next iteration
972
+ scale[i + 1] = 1.0 / (n_dropped + 1.0)
973
+
974
+ raw_predictions[:, k] = 0
975
+ for m in range(i + 1):
976
+ if drop_model[m] == 1:
977
+ # adjust scaling factor of dropped trees
978
+ scale[m] *= n_dropped / (n_dropped + 1.0)
979
+ else:
980
+ # pseudoresponse of next iteration (without contribution of dropped trees)
981
+ raw_predictions[:, k] += self.learning_rate * scale[m] * self.estimators_[m, k].predict(X).ravel()
982
+
983
+ def _fit_stage(
984
+ self,
985
+ i,
986
+ X,
987
+ y,
988
+ raw_predictions,
989
+ sample_weight,
990
+ sample_mask,
991
+ random_state,
992
+ scale,
993
+ X_csc=None,
994
+ X_csr=None,
995
+ ):
996
+ """Fit another stage of ``n_classes_`` trees to the boosting model."""
997
+
998
+ assert sample_mask.dtype == bool
999
+
1000
+ # whether to use dropout in next iteration
1001
+ do_dropout = self.dropout_rate > 0.0 and i < len(scale) - 1
1002
+
1003
+ # Need to pass a copy of raw_predictions to negative_gradient()
1004
+ # because raw_predictions is partially updated at the end of the loop
1005
+ # in update_terminal_regions(), and gradients need to be evaluated at
1006
+ # iteration i - 1.
1007
+ raw_predictions_copy = raw_predictions.copy()
1008
+
1009
+ neg_gradient = self._loss.gradient(
1010
+ y_true=y,
1011
+ raw_prediction=raw_predictions_copy,
1012
+ sample_weight=None, # We pass sample_weights to the tree directly.
1013
+ )
1014
+
1015
+ for k in range(self.n_trees_per_iteration_):
1016
+ # induce regression tree on the negative gradient
1017
+ tree = DecisionTreeRegressor(
1018
+ criterion=self.criterion,
1019
+ splitter="best",
1020
+ max_depth=self.max_depth,
1021
+ min_samples_split=self.min_samples_split,
1022
+ min_samples_leaf=self.min_samples_leaf,
1023
+ min_weight_fraction_leaf=self.min_weight_fraction_leaf,
1024
+ min_impurity_decrease=self.min_impurity_decrease,
1025
+ max_features=self.max_features,
1026
+ max_leaf_nodes=self.max_leaf_nodes,
1027
+ random_state=random_state,
1028
+ ccp_alpha=self.ccp_alpha,
1029
+ )
1030
+
1031
+ if self.subsample < 1.0:
1032
+ # no inplace multiplication!
1033
+ sample_weight = sample_weight * sample_mask.astype(np.float64)
1034
+
1035
+ X = X_csc if X_csc is not None else X
1036
+ tree.fit(X, neg_gradient, sample_weight=sample_weight, check_input=False)
1037
+
1038
+ # add tree to ensemble
1039
+ self.estimators_[i, k] = tree
1040
+
1041
+ # update tree leaves
1042
+ if do_dropout:
1043
+ self._update_with_dropout(i, X, raw_predictions, k, scale, random_state)
1044
+ else:
1045
+ # update tree leaves
1046
+ X_for_tree_update = X_csr if X_csr is not None else X
1047
+ self._loss.update_terminal_regions(
1048
+ tree.tree_,
1049
+ X_for_tree_update,
1050
+ y,
1051
+ neg_gradient,
1052
+ raw_predictions,
1053
+ sample_weight,
1054
+ sample_mask,
1055
+ learning_rate=self.learning_rate,
1056
+ k=k,
1057
+ )
1058
+
1059
+ return raw_predictions
1060
+
1061
+ def _fit_stages( # noqa: C901
1062
+ self,
1063
+ X,
1064
+ y,
1065
+ raw_predictions,
1066
+ sample_weight,
1067
+ random_state,
1068
+ X_val,
1069
+ y_val,
1070
+ sample_weight_val,
1071
+ scale,
1072
+ begin_at_stage=0,
1073
+ monitor=None,
1074
+ ):
1075
+ """Iteratively fits the stages.
1076
+
1077
+ For each stage it computes the progress (OOB, train score)
1078
+ and delegates to ``_fit_stage``.
1079
+ Returns the number of stages fit; might differ from ``n_estimators``
1080
+ due to early stopping.
1081
+ """
1082
+ n_samples = X.shape[0]
1083
+ do_oob = self.subsample < 1.0
1084
+ sample_mask = np.ones((n_samples,), dtype=bool)
1085
+ n_inbag = max(1, int(self.subsample * n_samples))
1086
+
1087
+ if self.verbose:
1088
+ verbose_reporter = VerboseReporter(verbose=self.verbose)
1089
+ verbose_reporter.init(self, begin_at_stage)
1090
+
1091
+ X_csc = csc_matrix(X) if issparse(X) else None
1092
+ X_csr = csr_matrix(X) if issparse(X) else None
1093
+
1094
+ if self.n_iter_no_change is not None:
1095
+ loss_history = np.full(self.n_iter_no_change, np.inf)
1096
+ # We create a generator to get the predictions for X_val after
1097
+ # the addition of each successive stage
1098
+ y_val_pred_iter = self._staged_raw_predict(X_val, check_input=False)
1099
+
1100
+ # perform boosting iterations
1101
+ i = begin_at_stage
1102
+ for i in range(begin_at_stage, self.n_estimators):
1103
+ # subsampling
1104
+ if do_oob:
1105
+ sample_mask = _random_sample_mask(n_samples, n_inbag, random_state)
1106
+ # OOB score before adding this stage
1107
+ y_oob_masked = y[~sample_mask]
1108
+ sample_weight_oob_masked = sample_weight[~sample_mask]
1109
+ if i == 0: # store the initial loss to compute the OOB score
1110
+ initial_loss = self._loss(
1111
+ y_true=y_oob_masked,
1112
+ raw_prediction=raw_predictions[~sample_mask],
1113
+ sample_weight=sample_weight_oob_masked,
1114
+ )
1115
+
1116
+ # fit next stage of trees
1117
+ raw_predictions = self._fit_stage(
1118
+ i,
1119
+ X,
1120
+ y,
1121
+ raw_predictions,
1122
+ sample_weight,
1123
+ sample_mask,
1124
+ random_state,
1125
+ scale,
1126
+ X_csc=X_csc,
1127
+ X_csr=X_csr,
1128
+ )
1129
+
1130
+ # track loss
1131
+ if do_oob:
1132
+ self.train_score_[i] = self._loss(
1133
+ y_true=y[sample_mask],
1134
+ raw_prediction=raw_predictions[sample_mask],
1135
+ sample_weight=sample_weight[sample_mask],
1136
+ )
1137
+ self.oob_scores_[i] = self._loss(
1138
+ y_true=y_oob_masked,
1139
+ raw_prediction=raw_predictions[~sample_mask],
1140
+ sample_weight=sample_weight_oob_masked,
1141
+ )
1142
+ previous_loss = initial_loss if i == 0 else self.oob_scores_[i - 1]
1143
+ self.oob_improvement_[i] = previous_loss - self.oob_scores_[i]
1144
+ self.oob_score_ = self.oob_scores_[-1]
1145
+ else:
1146
+ # no need to fancy index w/ no subsampling
1147
+ self.train_score_[i] = self._loss(y_true=y, raw_prediction=raw_predictions, sample_weight=sample_weight)
1148
+
1149
+ if self.verbose > 0:
1150
+ verbose_reporter.update(i, self)
1151
+
1152
+ if monitor is not None:
1153
+ early_stopping = monitor(i, self, locals())
1154
+ if early_stopping:
1155
+ break
1156
+
1157
+ # We also provide an early stopping based on the score from
1158
+ # validation set (X_val, y_val), if n_iter_no_change is set
1159
+ if self.n_iter_no_change is not None:
1160
+ # By calling next(y_val_pred_iter), we get the predictions
1161
+ # for X_val after the addition of the current stage
1162
+ validation_loss = self._loss(y_val, next(y_val_pred_iter), sample_weight_val)
1163
+
1164
+ # Require validation_score to be better (less) than at least
1165
+ # one of the last n_iter_no_change evaluations
1166
+ if np.any(validation_loss + self.tol < loss_history):
1167
+ loss_history[i % len(loss_history)] = validation_loss
1168
+ else:
1169
+ break
1170
+
1171
+ return i + 1
1172
+
1173
+ def _init_state(self):
1174
+ super()._init_state()
1175
+
1176
+ if self.dropout_rate > 0.0:
1177
+ self._scale = np.ones(self.n_estimators, dtype=float)
1178
+
1179
+ def _resize_state(self):
1180
+ super()._resize_state()
1181
+
1182
+ if self.dropout_rate > 0:
1183
+ if not hasattr(self, "_scale"):
1184
+ raise ValueError(
1185
+ "fitting with warm_start=True and dropout_rate > 0 is only "
1186
+ "supported if the previous fit used dropout_rate > 0 too"
1187
+ )
1188
+
1189
+ self._scale = np.resize(self._scale, self.n_estimators)
1190
+ self._scale[self.n_estimators_ :] = 1
1191
+
1192
+ def _shrink_state(self, n_stages):
1193
+ self.estimators_ = self.estimators_[:n_stages]
1194
+ self.train_score_ = self.train_score_[:n_stages]
1195
+ if hasattr(self, "oob_improvement_"):
1196
+ self.oob_improvement_ = self.oob_improvement_[:n_stages]
1197
+ self.oob_scores_ = self.oob_scores_[:n_stages]
1198
+ self.oob_score_ = self.oob_scores_[-1]
1199
+ if hasattr(self, "_scale"):
1200
+ self._scale = self._scale[:n_stages]
1201
+
1202
+ def fit(self, X, y, sample_weight=None, monitor=None):
1203
+ """Fit the gradient boosting model.
1204
+
1205
+ Parameters
1206
+ ----------
1207
+ X : array-like, shape = (n_samples, n_features)
1208
+ Data matrix
1209
+
1210
+ y : structured array, shape = (n_samples,)
1211
+ A structured array containing the binary event indicator
1212
+ as first field, and time of event or time of censoring as
1213
+ second field.
1214
+
1215
+ sample_weight : array-like, shape = (n_samples,), optional
1216
+ Weights given to each sample. If omitted, all samples have weight 1.
1217
+
1218
+ monitor : callable, optional
1219
+ The monitor is called after each iteration with the current
1220
+ iteration, a reference to the estimator and the local variables of
1221
+ ``_fit_stages`` as keyword arguments ``callable(i, self,
1222
+ locals())``. If the callable returns ``True`` the fitting procedure
1223
+ is stopped. The monitor can be used for various things such as
1224
+ computing held-out estimates, early stopping, model introspect, and
1225
+ snapshoting.
1226
+
1227
+ Returns
1228
+ -------
1229
+ self : object
1230
+ Returns self.
1231
+ """
1232
+ self._validate_params()
1233
+
1234
+ if not self.warm_start:
1235
+ self._clear_state()
1236
+
1237
+ X = self._validate_data(
1238
+ X,
1239
+ ensure_min_samples=2,
1240
+ order="C",
1241
+ accept_sparse=["csr", "csc", "coo"],
1242
+ dtype=DTYPE,
1243
+ )
1244
+ event, time = check_array_survival(X, y)
1245
+
1246
+ sample_weight_is_none = sample_weight is None
1247
+ sample_weight = _check_sample_weight(sample_weight, X)
1248
+
1249
+ if sample_weight_is_none:
1250
+ y = self._encode_y(y=y, sample_weight=None)
1251
+ else:
1252
+ y = self._encode_y(y=y, sample_weight=sample_weight)
1253
+
1254
+ self._set_max_features()
1255
+
1256
+ # self.loss is guaranteed to be a string
1257
+ self._loss = self._get_loss(sample_weight=sample_weight)
1258
+
1259
+ if isinstance(self._loss, (CensoredSquaredLoss, IPCWLeastSquaresError)):
1260
+ time = np.log(time)
1261
+
1262
+ if self.n_iter_no_change is not None:
1263
+ (
1264
+ X_train,
1265
+ X_val,
1266
+ event_train,
1267
+ event_val,
1268
+ time_train,
1269
+ time_val,
1270
+ sample_weight_train,
1271
+ sample_weight_val,
1272
+ ) = train_test_split(
1273
+ X,
1274
+ event,
1275
+ time,
1276
+ sample_weight,
1277
+ random_state=self.random_state,
1278
+ test_size=self.validation_fraction,
1279
+ stratify=event,
1280
+ )
1281
+ y_val = np.fromiter(zip(event_val, time_val), dtype=[("event", bool), ("time", np.float64)])
1282
+ else:
1283
+ X_train, sample_weight_train = X, sample_weight
1284
+ event_train, time_train = event, time
1285
+ X_val = y_val = sample_weight_val = None
1286
+
1287
+ y_train = np.fromiter(zip(event_train, time_train), dtype=[("event", bool), ("time", np.float64)])
1288
+ n_samples = X_train.shape[0]
1289
+
1290
+ # First time calling fit.
1291
+ if not self._is_fitted():
1292
+ # init state
1293
+ self._init_state()
1294
+
1295
+ raw_predictions = np.zeros(
1296
+ shape=(n_samples, self.n_trees_per_iteration_),
1297
+ dtype=np.float64,
1298
+ )
1299
+
1300
+ begin_at_stage = 0
1301
+
1302
+ # The rng state must be preserved if warm_start is True
1303
+ self._rng = check_random_state(self.random_state)
1304
+
1305
+ # warm start: this is not the first time fit was called
1306
+ else:
1307
+ # add more estimators to fitted model
1308
+ # invariant: warm_start = True
1309
+ if self.n_estimators < self.estimators_.shape[0]:
1310
+ raise ValueError(
1311
+ "n_estimators=%d must be larger or equal to "
1312
+ "estimators_.shape[0]=%d when "
1313
+ "warm_start==True" % (self.n_estimators, self.estimators_.shape[0])
1314
+ )
1315
+ begin_at_stage = self.estimators_.shape[0]
1316
+ # The requirements of _raw_predict
1317
+ # are more constrained than fit. It accepts only CSR
1318
+ # matrices. Finite values have already been checked in _validate_data.
1319
+ X_train = check_array(
1320
+ X_train,
1321
+ dtype=DTYPE,
1322
+ order="C",
1323
+ accept_sparse="csr",
1324
+ force_all_finite=False,
1325
+ )
1326
+ raw_predictions = self._raw_predict(X_train)
1327
+ self._resize_state()
1328
+
1329
+ # apply dropout to last stage of previous fit
1330
+ if hasattr(self, "_scale") and self.dropout_rate > 0:
1331
+ for k in range(self.n_trees_per_iteration_):
1332
+ self._update_with_dropout(
1333
+ # pylint: disable-next=access-member-before-definition
1334
+ self.n_estimators_ - 1,
1335
+ X_train,
1336
+ raw_predictions,
1337
+ k,
1338
+ self._scale,
1339
+ self._rng,
1340
+ )
1341
+
1342
+ scale = getattr(self, "_scale", None)
1343
+
1344
+ # fit the boosting stages
1345
+ n_stages = self._fit_stages(
1346
+ X_train,
1347
+ y_train,
1348
+ raw_predictions,
1349
+ sample_weight_train,
1350
+ self._rng,
1351
+ X_val,
1352
+ y_val,
1353
+ sample_weight_val,
1354
+ scale,
1355
+ begin_at_stage,
1356
+ monitor,
1357
+ )
1358
+ # change shape of arrays after fit (early-stopping or additional tests)
1359
+ if n_stages != self.estimators_.shape[0]:
1360
+ self._shrink_state(n_stages)
1361
+ self.n_estimators_ = n_stages
1362
+
1363
+ self._set_baseline_model(X_train, event_train, time_train)
1364
+
1365
+ return self
1366
+
1367
+ def _set_baseline_model(self, X, event, time):
1368
+ if isinstance(self._loss, CoxPH):
1369
+ X_pred = X
1370
+ if issparse(X):
1371
+ X_pred = X.asformat("csr")
1372
+ risk_scores = self._predict(X_pred)
1373
+ self._baseline_model = BreslowEstimator().fit(risk_scores, event, time)
1374
+ else:
1375
+ self._baseline_model = None
1376
+
1377
+ def _dropout_predict_stage(self, X, i, K, score):
1378
+ for k in range(K):
1379
+ tree = self.estimators_[i, k].tree_
1380
+ score += self.learning_rate * self._scale[i] * tree.predict(X).reshape((-1, 1))
1381
+ return score
1382
+
1383
+ def _dropout_raw_predict(self, X):
1384
+ raw_predictions = self._raw_predict_init(X)
1385
+
1386
+ n_estimators, K = self.estimators_.shape
1387
+ for i in range(n_estimators):
1388
+ self._dropout_predict_stage(X, i, K, raw_predictions)
1389
+
1390
+ return raw_predictions
1391
+
1392
+ def _dropout_staged_raw_predict(self, X):
1393
+ X = self._validate_data(X, dtype=DTYPE, order="C", accept_sparse="csr")
1394
+ raw_predictions = self._raw_predict_init(X)
1395
+
1396
+ n_estimators, K = self.estimators_.shape
1397
+ for i in range(n_estimators):
1398
+ self._dropout_predict_stage(X, i, K, raw_predictions)
1399
+ yield raw_predictions.copy()
1400
+
1401
+ def _raw_predict(self, X):
1402
+ # if dropout wasn't used during training, proceed as usual,
1403
+ # otherwise consider scaling factor of individual trees
1404
+ if not hasattr(self, "_scale"):
1405
+ return super()._raw_predict(X)
1406
+ return self._dropout_raw_predict(X)
1407
+
1408
+ def _init_decision_function(self, X): # pragma: no cover
1409
+ return super()._init_decision_function(X).reshape(-1, 1)
1410
+
1411
+ def _decision_function(self, X): # pragma: no cover
1412
+ return self._raw_predict(X)
1413
+
1414
+ def _predict(self, X):
1415
+ score = self._raw_predict(X)
1416
+ if score.shape[1] == 1:
1417
+ score = score.ravel()
1418
+
1419
+ return self._loss._scale_raw_prediction(score)
1420
+
1421
+ def predict(self, X):
1422
+ """Predict risk scores.
1423
+
1424
+ If `loss='coxph'`, predictions can be interpreted as log hazard ratio
1425
+ similar to the linear predictor of a Cox proportional hazards
1426
+ model. If `loss='squared'` or `loss='ipcwls'`, predictions are the
1427
+ time to event.
1428
+
1429
+ Parameters
1430
+ ----------
1431
+ X : array-like, shape = (n_samples, n_features)
1432
+ The input samples.
1433
+
1434
+ Returns
1435
+ -------
1436
+ y : ndarray, shape = (n_samples,)
1437
+ The risk scores.
1438
+ """
1439
+ check_is_fitted(self, "estimators_")
1440
+
1441
+ X = self._validate_data(X, reset=False, order="C", accept_sparse="csr", dtype=DTYPE)
1442
+ return self._predict(X)
1443
+
1444
+ def staged_predict(self, X):
1445
+ """Predict risk scores at each stage for X.
1446
+
1447
+ This method allows monitoring (i.e. determine error on testing set)
1448
+ after each stage.
1449
+
1450
+ If `loss='coxph'`, predictions can be interpreted as log hazard ratio
1451
+ similar to the linear predictor of a Cox proportional hazards
1452
+ model. If `loss='squared'` or `loss='ipcwls'`, predictions are the
1453
+ time to event.
1454
+
1455
+ Parameters
1456
+ ----------
1457
+ X : array-like, shape = (n_samples, n_features)
1458
+ The input samples.
1459
+
1460
+ Returns
1461
+ -------
1462
+ y : generator of array of shape = (n_samples,)
1463
+ The predicted value of the input samples.
1464
+ """
1465
+ check_is_fitted(self, "estimators_")
1466
+
1467
+ # if dropout wasn't used during training, proceed as usual,
1468
+ # otherwise consider scaling factor of individual trees
1469
+ if not hasattr(self, "_scale"):
1470
+ predictions_iter = self._staged_raw_predict(X)
1471
+ else:
1472
+ predictions_iter = self._dropout_staged_raw_predict(X)
1473
+
1474
+ for raw_predictions in predictions_iter:
1475
+ y = self._loss._scale_raw_prediction(raw_predictions)
1476
+ yield y.ravel()
1477
+
1478
+ def _get_baseline_model(self):
1479
+ if self._baseline_model is None:
1480
+ raise ValueError("`fit` must be called with the loss option set to 'coxph'.")
1481
+ return self._baseline_model
1482
+
1483
+ def predict_cumulative_hazard_function(self, X, return_array=False):
1484
+ """Predict cumulative hazard function.
1485
+
1486
+ Only available if :meth:`fit` has been called with `loss = "coxph"`.
1487
+
1488
+ The cumulative hazard function for an individual
1489
+ with feature vector :math:`x` is defined as
1490
+
1491
+ .. math::
1492
+
1493
+ H(t \\mid x) = \\exp(f(x)) H_0(t) ,
1494
+
1495
+ where :math:`f(\\cdot)` is the additive ensemble of base learners,
1496
+ and :math:`H_0(t)` is the baseline hazard function,
1497
+ estimated by Breslow's estimator.
1498
+
1499
+ Parameters
1500
+ ----------
1501
+ X : array-like, shape = (n_samples, n_features)
1502
+ Data matrix.
1503
+
1504
+ return_array : boolean, default: False
1505
+ If set, return an array with the cumulative hazard rate
1506
+ for each `self.unique_times_`, otherwise an array of
1507
+ :class:`sksurv.functions.StepFunction`.
1508
+
1509
+ Returns
1510
+ -------
1511
+ cum_hazard : ndarray
1512
+ If `return_array` is set, an array with the cumulative hazard rate
1513
+ for each `self.unique_times_`, otherwise an array of length `n_samples`
1514
+ of :class:`sksurv.functions.StepFunction` instances will be returned.
1515
+
1516
+ Examples
1517
+ --------
1518
+ >>> import matplotlib.pyplot as plt
1519
+ >>> from sksurv.datasets import load_whas500
1520
+ >>> from sksurv.ensemble import GradientBoostingSurvivalAnalysis
1521
+
1522
+ Load the data.
1523
+
1524
+ >>> X, y = load_whas500()
1525
+ >>> X = X.astype(float)
1526
+
1527
+ Fit the model.
1528
+
1529
+ >>> estimator = GradientBoostingSurvivalAnalysis(loss="coxph").fit(X, y)
1530
+
1531
+ Estimate the cumulative hazard function for the first 10 samples.
1532
+
1533
+ >>> chf_funcs = estimator.predict_cumulative_hazard_function(X.iloc[:10])
1534
+
1535
+ Plot the estimated cumulative hazard functions.
1536
+
1537
+ >>> for fn in chf_funcs:
1538
+ ... plt.step(fn.x, fn(fn.x), where="post")
1539
+ ...
1540
+ >>> plt.ylim(0, 1)
1541
+ >>> plt.show()
1542
+ """
1543
+ return self._predict_cumulative_hazard_function(self._get_baseline_model(), self.predict(X), return_array)
1544
+
1545
+ def predict_survival_function(self, X, return_array=False):
1546
+ """Predict survival function.
1547
+
1548
+ Only available if :meth:`fit` has been called with `loss = "coxph"`.
1549
+
1550
+ The survival function for an individual
1551
+ with feature vector :math:`x` is defined as
1552
+
1553
+ .. math::
1554
+
1555
+ S(t \\mid x) = S_0(t)^{\\exp(f(x)} ,
1556
+
1557
+ where :math:`f(\\cdot)` is the additive ensemble of base learners,
1558
+ and :math:`S_0(t)` is the baseline survival function,
1559
+ estimated by Breslow's estimator.
1560
+
1561
+ Parameters
1562
+ ----------
1563
+ X : array-like, shape = (n_samples, n_features)
1564
+ Data matrix.
1565
+
1566
+ return_array : boolean, default: False
1567
+ If set, return an array with the probability
1568
+ of survival for each `self.unique_times_`,
1569
+ otherwise an array of :class:`sksurv.functions.StepFunction`.
1570
+
1571
+ Returns
1572
+ -------
1573
+ survival : ndarray
1574
+ If `return_array` is set, an array with the probability of
1575
+ survival for each `self.unique_times_`, otherwise an array of
1576
+ length `n_samples` of :class:`sksurv.functions.StepFunction`
1577
+ instances will be returned.
1578
+
1579
+ Examples
1580
+ --------
1581
+ >>> import matplotlib.pyplot as plt
1582
+ >>> from sksurv.datasets import load_whas500
1583
+ >>> from sksurv.ensemble import GradientBoostingSurvivalAnalysis
1584
+
1585
+ Load the data.
1586
+
1587
+ >>> X, y = load_whas500()
1588
+ >>> X = X.astype(float)
1589
+
1590
+ Fit the model.
1591
+
1592
+ >>> estimator = GradientBoostingSurvivalAnalysis(loss="coxph").fit(X, y)
1593
+
1594
+ Estimate the survival function for the first 10 samples.
1595
+
1596
+ >>> surv_funcs = estimator.predict_survival_function(X.iloc[:10])
1597
+
1598
+ Plot the estimated survival functions.
1599
+
1600
+ >>> for fn in surv_funcs:
1601
+ ... plt.step(fn.x, fn(fn.x), where="post")
1602
+ ...
1603
+ >>> plt.ylim(0, 1)
1604
+ >>> plt.show()
1605
+ """
1606
+ return self._predict_survival_function(self._get_baseline_model(), self.predict(X), return_array)
1607
+
1608
+ @property
1609
+ def unique_times_(self):
1610
+ return self._get_baseline_model().unique_times_