scikit-survival 0.26.0__cp314-cp314-macosx_10_15_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. scikit_survival-0.26.0.dist-info/METADATA +185 -0
  2. scikit_survival-0.26.0.dist-info/RECORD +58 -0
  3. scikit_survival-0.26.0.dist-info/WHEEL +6 -0
  4. scikit_survival-0.26.0.dist-info/licenses/COPYING +674 -0
  5. scikit_survival-0.26.0.dist-info/top_level.txt +1 -0
  6. sksurv/__init__.py +183 -0
  7. sksurv/base.py +115 -0
  8. sksurv/bintrees/__init__.py +15 -0
  9. sksurv/bintrees/_binarytrees.cpython-314-darwin.so +0 -0
  10. sksurv/column.py +204 -0
  11. sksurv/compare.py +123 -0
  12. sksurv/datasets/__init__.py +12 -0
  13. sksurv/datasets/base.py +614 -0
  14. sksurv/datasets/data/GBSG2.arff +700 -0
  15. sksurv/datasets/data/actg320.arff +1169 -0
  16. sksurv/datasets/data/bmt.arff +46 -0
  17. sksurv/datasets/data/breast_cancer_GSE7390-metastasis.arff +283 -0
  18. sksurv/datasets/data/cgvhd.arff +118 -0
  19. sksurv/datasets/data/flchain.arff +7887 -0
  20. sksurv/datasets/data/veteran.arff +148 -0
  21. sksurv/datasets/data/whas500.arff +520 -0
  22. sksurv/docstrings.py +99 -0
  23. sksurv/ensemble/__init__.py +2 -0
  24. sksurv/ensemble/_coxph_loss.cpython-314-darwin.so +0 -0
  25. sksurv/ensemble/boosting.py +1564 -0
  26. sksurv/ensemble/forest.py +902 -0
  27. sksurv/ensemble/survival_loss.py +151 -0
  28. sksurv/exceptions.py +18 -0
  29. sksurv/functions.py +114 -0
  30. sksurv/io/__init__.py +2 -0
  31. sksurv/io/arffread.py +91 -0
  32. sksurv/io/arffwrite.py +181 -0
  33. sksurv/kernels/__init__.py +1 -0
  34. sksurv/kernels/_clinical_kernel.cpython-314-darwin.so +0 -0
  35. sksurv/kernels/clinical.py +348 -0
  36. sksurv/linear_model/__init__.py +3 -0
  37. sksurv/linear_model/_coxnet.cpython-314-darwin.so +0 -0
  38. sksurv/linear_model/aft.py +208 -0
  39. sksurv/linear_model/coxnet.py +592 -0
  40. sksurv/linear_model/coxph.py +637 -0
  41. sksurv/meta/__init__.py +4 -0
  42. sksurv/meta/base.py +35 -0
  43. sksurv/meta/ensemble_selection.py +724 -0
  44. sksurv/meta/stacking.py +370 -0
  45. sksurv/metrics.py +1028 -0
  46. sksurv/nonparametric.py +911 -0
  47. sksurv/preprocessing.py +195 -0
  48. sksurv/svm/__init__.py +11 -0
  49. sksurv/svm/_minlip.cpython-314-darwin.so +0 -0
  50. sksurv/svm/_prsvm.cpython-314-darwin.so +0 -0
  51. sksurv/svm/minlip.py +695 -0
  52. sksurv/svm/naive_survival_svm.py +249 -0
  53. sksurv/svm/survival_svm.py +1236 -0
  54. sksurv/testing.py +155 -0
  55. sksurv/tree/__init__.py +1 -0
  56. sksurv/tree/_criterion.cpython-314-darwin.so +0 -0
  57. sksurv/tree/tree.py +790 -0
  58. sksurv/util.py +416 -0
@@ -0,0 +1,1564 @@
1
+ # This program is free software: you can redistribute it and/or modify
2
+ # it under the terms of the GNU General Public License as published by
3
+ # the Free Software Foundation, either version 3 of the License, or
4
+ # (at your option) any later version.
5
+ #
6
+ # This program is distributed in the hope that it will be useful,
7
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
8
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9
+ # GNU General Public License for more details.
10
+ #
11
+ # You should have received a copy of the GNU General Public License
12
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
13
+ import numbers
14
+
15
+ import numpy as np
16
+ from scipy.sparse import csc_matrix, csr_matrix, issparse
17
+ from sklearn.base import BaseEstimator
18
+ from sklearn.ensemble._base import BaseEnsemble
19
+ from sklearn.ensemble._gb import BaseGradientBoosting, VerboseReporter
20
+ from sklearn.ensemble._gradient_boosting import _random_sample_mask
21
+ from sklearn.model_selection import train_test_split
22
+ from sklearn.tree import DecisionTreeRegressor
23
+ from sklearn.tree._tree import DTYPE
24
+ from sklearn.utils._param_validation import Interval, StrOptions
25
+ from sklearn.utils.extmath import squared_norm
26
+ from sklearn.utils.validation import (
27
+ _check_sample_weight,
28
+ check_array,
29
+ check_is_fitted,
30
+ check_random_state,
31
+ validate_data,
32
+ )
33
+
34
+ from ..base import SurvivalAnalysisMixin
35
+ from ..docstrings import append_cumulative_hazard_example, append_survival_function_example
36
+ from ..linear_model.coxph import BreslowEstimator
37
+ from ..util import check_array_survival
38
+ from .survival_loss import LOSS_FUNCTIONS, CensoredSquaredLoss, CoxPH, IPCWLeastSquaresError
39
+
40
+ __all__ = ["ComponentwiseGradientBoostingSurvivalAnalysis", "GradientBoostingSurvivalAnalysis"]
41
+
42
+
43
+ def _sample_binomial_plus_one(p, size, random_state):
44
+ drop_model = random_state.binomial(1, p=p, size=size)
45
+ n_dropped = np.sum(drop_model)
46
+ if n_dropped == 0:
47
+ idx = random_state.randint(0, size)
48
+ drop_model[idx] = 1
49
+ n_dropped = 1
50
+ return drop_model, n_dropped
51
+
52
+
53
+ class _ComponentwiseLeastSquares(BaseEstimator):
54
+ def __init__(self, component):
55
+ self.component = component
56
+
57
+ def fit(self, X, y, sample_weight):
58
+ xw = X[:, self.component] * sample_weight
59
+ b = np.dot(xw, y)
60
+ if b == 0:
61
+ self.coef_ = 0
62
+ else:
63
+ a = np.dot(xw, xw)
64
+ self.coef_ = b / a
65
+
66
+ return self
67
+
68
+ def predict(self, X):
69
+ return X[:, self.component] * self.coef_
70
+
71
+
72
+ def _fit_stage_componentwise(X, residuals, sample_weight, **fit_params): # pylint: disable=unused-argument
73
+ """Fit component-wise weighted least squares model"""
74
+ n_features = X.shape[1]
75
+
76
+ base_learners = []
77
+ error = np.empty(n_features)
78
+ for component in range(n_features):
79
+ learner = _ComponentwiseLeastSquares(component).fit(X, residuals, sample_weight)
80
+ l_pred = learner.predict(X)
81
+ error[component] = squared_norm(residuals - l_pred)
82
+ base_learners.append(learner)
83
+
84
+ # TODO: could use bottleneck.nanargmin for speed
85
+ best_component = np.nanargmin(error)
86
+ best_learner = base_learners[best_component]
87
+ return best_learner
88
+
89
+
90
+ class ComponentwiseGradientBoostingSurvivalAnalysis(BaseEnsemble, SurvivalAnalysisMixin):
91
+ r"""Gradient boosting with component-wise least squares as base learner.
92
+
93
+ See the :ref:`User Guide </user_guide/boosting.ipynb>` and [1]_ for further description.
94
+
95
+ Parameters
96
+ ----------
97
+ loss : {'coxph', 'squared', 'ipcwls'}, optional, default: 'coxph'
98
+ loss function to be optimized. 'coxph' refers to partial likelihood loss
99
+ of Cox's proportional hazards model. The loss 'squared' minimizes a
100
+ squared regression loss that ignores predictions beyond the time of censoring,
101
+ and 'ipcwls' refers to inverse-probability of censoring weighted least squares error.
102
+
103
+ learning_rate : float, optional, default: 0.1
104
+ learning rate shrinks the contribution of each base learner by `learning_rate`.
105
+ There is a trade-off between `learning_rate` and `n_estimators`.
106
+ Values must be in the range `[0.0, inf)`.
107
+
108
+ n_estimators : int, optional, default: 100
109
+ The number of boosting stages to perform. Gradient boosting
110
+ is fairly robust to over-fitting so a large number usually
111
+ results in better performance.
112
+ Values must be in the range `[1, inf)`.
113
+
114
+ subsample : float, optional, default: 1.0
115
+ The fraction of samples to be used for fitting the individual base
116
+ learners. If smaller than 1.0 this results in Stochastic Gradient
117
+ Boosting. `subsample` interacts with the parameter `n_estimators`.
118
+ Choosing `subsample < 1.0` leads to a reduction of variance
119
+ and an increase in bias.
120
+ Values must be in the range `(0.0, 1.0]`.
121
+
122
+ warm_start : bool, optional, default: False
123
+ When set to ``True``, reuse the solution of the previous call to fit
124
+ and add more estimators to the ensemble, otherwise, just erase the
125
+ previous solution.
126
+
127
+ dropout_rate : float, optional, default: 0.0
128
+ If larger than zero, the residuals at each iteration are only computed
129
+ from a random subset of base learners. The value corresponds to the
130
+ percentage of base learners that are dropped. In each iteration,
131
+ at least one base learner is dropped. This is an alternative regularization
132
+ to shrinkage, i.e., setting `learning_rate < 1.0`.
133
+ Values must be in the range `[0.0, 1.0)`.
134
+
135
+ random_state : int, RandomState instance or None, optional, default: None
136
+ Controls the randomness of the subsampling of the data if ``subsample < 1.0``,
137
+ and the random selection of base learners to drop if ``dropout_rate > 0``.
138
+ Pass an int for reproducible output across multiple function calls.
139
+
140
+ verbose : int, optional, default: 0
141
+ Enable verbose output. If 1 then it prints progress and performance
142
+ once in a while.
143
+ Values must be in the range `[0, inf)`.
144
+
145
+ Attributes
146
+ ----------
147
+ coef_ : ndarray, shape = (n_features + 1,), dtype = float
148
+ The aggregated coefficients. The first element `coef\_[0]` corresponds
149
+ to the intercept. If loss is `coxph`, the intercept will always be zero.
150
+
151
+ estimators_ : list of base learners
152
+ The collection of fitted sub-estimators.
153
+
154
+ train_score_ : ndarray, shape = (n_estimators,)
155
+ The i-th score ``train_score_[i]`` is the loss of the
156
+ model at iteration ``i`` on the in-bag sample.
157
+ If ``subsample == 1`` this is the loss on the training data.
158
+
159
+ oob_improvement_ : ndarray, shape = (n_estimators,)
160
+ The improvement in loss on the out-of-bag samples
161
+ relative to the previous iteration.
162
+ ``oob_improvement_[0]`` is the improvement in
163
+ loss of the first stage over the ``init`` estimator.
164
+ Only available if ``subsample < 1.0``.
165
+
166
+ oob_scores_ : ndarray, shape = (n_estimators,)
167
+ The full history of the loss values on the out-of-bag
168
+ samples. Only available if ``subsample < 1.0``.
169
+
170
+ oob_score_ : float
171
+ The last value of the loss on the out-of-bag samples. It is
172
+ the same as ``oob_scores_[-1]``. Only available if ``subsample < 1.0``.
173
+
174
+ n_features_in_ : int
175
+ Number of features seen during ``fit``.
176
+
177
+ feature_names_in_ : ndarray, shape = (`n_features_in_`,)
178
+ Names of features seen during ``fit``. Defined only when `X`
179
+ has feature names that are all strings.
180
+
181
+ unique_times_ : ndarray, shape = (n_unique_times,)
182
+ Unique time points.
183
+
184
+ References
185
+ ----------
186
+ .. [1] Hothorn, T., Bühlmann, P., Dudoit, S., Molinaro, A., van der Laan, M. J.,
187
+ "Survival ensembles", Biostatistics, 7(3), 355-73, 2006
188
+ """
189
+
190
+ _parameter_constraints = {
191
+ "loss": [StrOptions(frozenset(LOSS_FUNCTIONS.keys()))],
192
+ "learning_rate": [Interval(numbers.Real, 0.0, None, closed="left")],
193
+ "n_estimators": [Interval(numbers.Integral, 1, None, closed="left")],
194
+ "subsample": [Interval(numbers.Real, 0.0, 1.0, closed="right")],
195
+ "warm_start": ["boolean"],
196
+ "dropout_rate": [Interval(numbers.Real, 0.0, 1.0, closed="left")],
197
+ "random_state": ["random_state"],
198
+ "verbose": ["verbose"],
199
+ }
200
+
201
+ def __init__(
202
+ self,
203
+ *,
204
+ loss="coxph",
205
+ learning_rate=0.1,
206
+ n_estimators=100,
207
+ subsample=1.0,
208
+ warm_start=False,
209
+ dropout_rate=0,
210
+ random_state=None,
211
+ verbose=0,
212
+ ):
213
+ self.loss = loss
214
+ self.n_estimators = n_estimators
215
+ self.learning_rate = learning_rate
216
+ self.subsample = subsample
217
+ self.warm_start = warm_start
218
+ self.dropout_rate = dropout_rate
219
+ self.random_state = random_state
220
+ self.verbose = verbose
221
+
222
+ @property
223
+ def _predict_risk_score(self):
224
+ return isinstance(self._loss, CoxPH)
225
+
226
+ def _is_fitted(self):
227
+ return len(getattr(self, "estimators_", [])) > 0
228
+
229
+ def _init_state(self):
230
+ self.estimators_ = np.empty(self.n_estimators, dtype=object)
231
+
232
+ self.train_score_ = np.zeros(self.n_estimators, dtype=np.float64)
233
+ # do oob?
234
+ if self.subsample < 1.0:
235
+ self.oob_improvement_ = np.zeros(self.n_estimators, dtype=np.float64)
236
+ self.oob_scores_ = np.zeros(self.n_estimators, dtype=np.float64)
237
+ self.oob_score_ = np.nan
238
+
239
+ if self.dropout_rate > 0:
240
+ self._scale = np.ones(int(self.n_estimators), dtype=float)
241
+
242
+ def _resize_state(self):
243
+ """Add additional ``n_estimators`` entries to all attributes."""
244
+ # self.n_estimators is the number of additional est to fit
245
+ total_n_estimators = self.n_estimators
246
+
247
+ self.estimators_ = np.resize(self.estimators_, total_n_estimators)
248
+ self.train_score_ = np.resize(self.train_score_, total_n_estimators)
249
+ if self.subsample < 1 or hasattr(self, "oob_improvement_"):
250
+ # if do oob resize arrays or create new if not available
251
+ if hasattr(self, "oob_improvement_"):
252
+ self.oob_improvement_ = np.resize(self.oob_improvement_, total_n_estimators)
253
+ self.oob_scores_ = np.resize(self.oob_scores_, total_n_estimators)
254
+ self.oob_score_ = np.nan
255
+ else:
256
+ self.oob_improvement_ = np.zeros(total_n_estimators, dtype=np.float64)
257
+ self.oob_scores_ = np.zeros((total_n_estimators,), dtype=np.float64)
258
+ self.oob_score_ = np.nan
259
+
260
+ if self.dropout_rate > 0:
261
+ if not hasattr(self, "_scale"):
262
+ raise ValueError(
263
+ "fitting with warm_start=True and dropout_rate > 0 is only "
264
+ "supported if the previous fit used dropout_rate > 0 too"
265
+ )
266
+
267
+ self._scale = np.resize(self._scale, total_n_estimators)
268
+ self._scale[self.n_estimators_ :] = 1
269
+
270
+ def _clear_state(self):
271
+ """Clear the state of the gradient boosting model."""
272
+ if hasattr(self, "estimators_"):
273
+ self.estimators_ = np.empty(0, dtype=object)
274
+ if hasattr(self, "train_score_"):
275
+ del self.train_score_
276
+ if hasattr(self, "oob_improvement_"):
277
+ del self.oob_improvement_
278
+ if hasattr(self, "oob_scores_"):
279
+ del self.oob_scores_
280
+ if hasattr(self, "oob_score_"):
281
+ del self.oob_score_
282
+ if hasattr(self, "_rng"):
283
+ del self._rng
284
+ if hasattr(self, "_scale"):
285
+ del self._scale
286
+
287
+ def _update_with_dropout(self, i, X, raw_predictions, scale, random_state):
288
+ # select base learners to be dropped for next iteration
289
+ drop_model, n_dropped = _sample_binomial_plus_one(self.dropout_rate, i + 1, random_state)
290
+
291
+ # adjust scaling factor of tree that is going to be trained in next iteration
292
+ scale[i + 1] = 1.0 / (n_dropped + 1.0)
293
+
294
+ raw_predictions[:] = 0
295
+ for m in range(i + 1):
296
+ if drop_model[m] == 1:
297
+ # adjust scaling factor of dropped trees
298
+ scale[m] *= n_dropped / (n_dropped + 1.0)
299
+ else:
300
+ # pseudoresponse of next iteration (without contribution of dropped trees)
301
+ raw_predictions += self.learning_rate * scale[m] * self.estimators_[m].predict(X)
302
+
303
+ def _fit(self, X, event, time, y_pred, sample_weight, random_state, begin_at_stage=0): # noqa: C901
304
+ n_samples = X.shape[0]
305
+ # account for intercept
306
+ y = np.fromiter(zip(event, time), dtype=[("event", bool), ("time", np.float64)])
307
+
308
+ do_oob = self.subsample < 1.0
309
+ if do_oob:
310
+ n_inbag = max(1, int(self.subsample * n_samples))
311
+
312
+ do_dropout = self.dropout_rate > 0
313
+ if do_dropout:
314
+ scale = self._scale
315
+
316
+ if self.verbose:
317
+ verbose_reporter = VerboseReporter(verbose=self.verbose)
318
+ verbose_reporter.init(self, 0)
319
+
320
+ # perform boosting iterations
321
+ i = begin_at_stage
322
+ for i in range(begin_at_stage, int(self.n_estimators)):
323
+ # subsampling
324
+ if do_oob:
325
+ sample_mask = _random_sample_mask(n_samples, n_inbag, random_state)
326
+ subsample_weight = sample_weight * sample_mask.astype(np.float64)
327
+
328
+ # OOB score before adding this stage
329
+ y_oob_masked = y[~sample_mask]
330
+ sample_weight_oob_masked = sample_weight[~sample_mask]
331
+ if i == 0: # store the initial loss to compute the OOB score
332
+ initial_loss = self._loss(
333
+ y_true=y_oob_masked,
334
+ raw_prediction=y_pred[~sample_mask],
335
+ sample_weight=sample_weight_oob_masked,
336
+ )
337
+ else:
338
+ subsample_weight = sample_weight
339
+
340
+ residuals = self._loss.gradient(y, y_pred, sample_weight=sample_weight)
341
+
342
+ best_learner = _fit_stage_componentwise(X, residuals, subsample_weight)
343
+ self.estimators_[i] = best_learner
344
+
345
+ if do_dropout and i < len(scale) - 1:
346
+ self._update_with_dropout(i, X, y_pred, scale, random_state)
347
+ else:
348
+ y_pred += self.learning_rate * best_learner.predict(X)
349
+
350
+ # track loss
351
+ if do_oob:
352
+ self.train_score_[i] = self._loss(
353
+ y_true=y[sample_mask],
354
+ raw_prediction=y_pred[sample_mask],
355
+ sample_weight=sample_weight[sample_mask],
356
+ )
357
+ self.oob_scores_[i] = self._loss(
358
+ y_true=y_oob_masked,
359
+ raw_prediction=y_pred[~sample_mask],
360
+ sample_weight=sample_weight_oob_masked,
361
+ )
362
+ previous_loss = initial_loss if i == 0 else self.oob_scores_[i - 1]
363
+ self.oob_improvement_[i] = previous_loss - self.oob_scores_[i]
364
+ self.oob_score_ = self.oob_scores_[-1]
365
+ else:
366
+ # no need to fancy index w/ no subsampling
367
+ self.train_score_[i] = self._loss(y_true=y, raw_prediction=y_pred, sample_weight=sample_weight)
368
+
369
+ if self.verbose > 0:
370
+ verbose_reporter.update(i, self)
371
+
372
+ return i + 1
373
+
374
+ def fit(self, X, y, sample_weight=None):
375
+ """Fit estimator.
376
+
377
+ Parameters
378
+ ----------
379
+ X : array-like, shape = (n_samples, n_features)
380
+ Data matrix
381
+
382
+ y : structured array, shape = (n_samples,)
383
+ A structured array with two fields. The first field is a boolean
384
+ where ``True`` indicates an event and ``False`` indicates right-censoring.
385
+ The second field is a float with the time of event or time of censoring.
386
+
387
+ sample_weight : array-like, shape = (n_samples,), optional
388
+ Weights given to each sample. If omitted, all samples have weight 1.
389
+
390
+ Returns
391
+ -------
392
+ self
393
+ """
394
+ self._validate_params()
395
+
396
+ if not self.warm_start:
397
+ self._clear_state()
398
+
399
+ X = validate_data(self, X, ensure_min_samples=2)
400
+ event, time = check_array_survival(X, y)
401
+
402
+ sample_weight = _check_sample_weight(sample_weight, X)
403
+
404
+ n_samples = X.shape[0]
405
+ Xi = np.column_stack((np.ones(n_samples), X))
406
+
407
+ self._loss = LOSS_FUNCTIONS[self.loss]()
408
+ if isinstance(self._loss, CensoredSquaredLoss | IPCWLeastSquaresError):
409
+ time = np.log(time)
410
+
411
+ if not self._is_fitted():
412
+ self._init_state()
413
+
414
+ y_pred = np.zeros(n_samples, dtype=np.float64)
415
+
416
+ begin_at_stage = 0
417
+
418
+ self._rng = check_random_state(self.random_state)
419
+ else:
420
+ # add more estimators to fitted model
421
+ # invariant: warm_start = True
422
+ if self.n_estimators < self.estimators_.shape[0]:
423
+ raise ValueError(
424
+ "n_estimators=%d must be larger or equal to "
425
+ "estimators_.shape[0]=%d when "
426
+ "warm_start==True" % (self.n_estimators, self.estimators_.shape[0])
427
+ )
428
+ begin_at_stage = self.estimators_.shape[0]
429
+ y_pred = self._raw_predict(Xi)
430
+ self._resize_state()
431
+
432
+ # apply dropout to last stage of previous fit
433
+ if hasattr(self, "_scale") and self.dropout_rate > 0:
434
+ # pylint: disable-next=access-member-before-definition
435
+ self._update_with_dropout(self.n_estimators_ - 1, Xi, y_pred, self._scale, self._rng)
436
+
437
+ self.n_estimators_ = self._fit(Xi, event, time, y_pred, sample_weight, self._rng, begin_at_stage)
438
+
439
+ self._set_baseline_model(X, event, time)
440
+ return self
441
+
442
+ def _set_baseline_model(self, X, event, time):
443
+ if isinstance(self._loss, CoxPH):
444
+ risk_scores = self._predict(X)
445
+ self._baseline_model = BreslowEstimator().fit(risk_scores, event, time)
446
+ else:
447
+ self._baseline_model = None
448
+
449
+ def _raw_predict(self, X):
450
+ pred = np.zeros(X.shape[0], dtype=float)
451
+ for estimator in self.estimators_:
452
+ pred += self.learning_rate * estimator.predict(X)
453
+ return pred
454
+
455
+ def _predict(self, X):
456
+ # account for intercept
457
+ Xi = np.column_stack((np.ones(X.shape[0]), X))
458
+ pred = self._raw_predict(Xi)
459
+ return self._loss._scale_raw_prediction(pred)
460
+
461
+ def predict(self, X):
462
+ """Predict risk scores.
463
+
464
+ If `loss='coxph'`, predictions can be interpreted as log hazard ratio
465
+ corresponding to the linear predictor of a Cox proportional hazards
466
+ model. If `loss='squared'` or `loss='ipcwls'`, predictions are the
467
+ time to event.
468
+
469
+ Parameters
470
+ ----------
471
+ X : array-like, shape = (n_samples, n_features)
472
+ Data matrix.
473
+
474
+ Returns
475
+ -------
476
+ risk_score : array, shape = (n_samples,)
477
+ Predicted risk scores.
478
+ """
479
+ check_is_fitted(self, "estimators_")
480
+ X = validate_data(self, X, reset=False)
481
+
482
+ return self._predict(X)
483
+
484
+ def _get_baseline_model(self):
485
+ if self._baseline_model is None:
486
+ raise ValueError("`fit` must be called with the loss option set to 'coxph'.")
487
+ return self._baseline_model
488
+
489
+ @append_cumulative_hazard_example(
490
+ estimator_mod="ensemble", estimator_class="ComponentwiseGradientBoostingSurvivalAnalysis"
491
+ )
492
+ def predict_cumulative_hazard_function(self, X, return_array=False):
493
+ r"""Predict cumulative hazard function.
494
+
495
+ Only available if :meth:`fit` has been called with `loss = "coxph"`.
496
+
497
+ The cumulative hazard function for an individual
498
+ with feature vector :math:`x` is defined as
499
+
500
+ .. math::
501
+
502
+ H(t \mid x) = \exp(f(x)) H_0(t) ,
503
+
504
+ where :math:`f(\cdot)` is the additive ensemble of base learners,
505
+ and :math:`H_0(t)` is the baseline hazard function,
506
+ estimated by Breslow's estimator.
507
+
508
+ Parameters
509
+ ----------
510
+ X : array-like, shape = (n_samples, n_features)
511
+ Data matrix.
512
+
513
+ return_array : bool, default: False
514
+ Whether to return a single array of cumulative hazard values
515
+ or a list of step functions.
516
+
517
+ If `False`, a list of :class:`sksurv.functions.StepFunction`
518
+ objects is returned.
519
+
520
+ If `True`, a 2d-array of shape `(n_samples, n_unique_times)` is
521
+ returned, where `n_unique_times` is the number of unique
522
+ event times in the training data. Each row represents the cumulative
523
+ hazard function of an individual evaluated at `unique_times_`.
524
+
525
+ Returns
526
+ -------
527
+ cum_hazard : ndarray
528
+ If `return_array` is `False`, an array of `n_samples`
529
+ :class:`sksurv.functions.StepFunction` instances is returned.
530
+
531
+ If `return_array` is `True`, a numeric array of shape
532
+ `(n_samples, n_unique_times_)` is returned.
533
+
534
+ Examples
535
+ --------
536
+ """
537
+ return self._predict_cumulative_hazard_function(self._get_baseline_model(), self.predict(X), return_array)
538
+
539
+ @append_survival_function_example(
540
+ estimator_mod="ensemble", estimator_class="ComponentwiseGradientBoostingSurvivalAnalysis"
541
+ )
542
+ def predict_survival_function(self, X, return_array=False):
543
+ r"""Predict survival function.
544
+
545
+ Only available if :meth:`fit` has been called with `loss = "coxph"`.
546
+
547
+ The survival function for an individual
548
+ with feature vector :math:`x` is defined as
549
+
550
+ .. math::
551
+
552
+ S(t \mid x) = S_0(t)^{\exp(f(x)} ,
553
+
554
+ where :math:`f(\cdot)` is the additive ensemble of base learners,
555
+ and :math:`S_0(t)` is the baseline survival function,
556
+ estimated by Breslow's estimator.
557
+
558
+ Parameters
559
+ ----------
560
+ X : array-like, shape = (n_samples, n_features)
561
+ Data matrix.
562
+
563
+ return_array : bool, default: False
564
+ Whether to return a single array of survival probabilities
565
+ or a list of step functions.
566
+
567
+ If `False`, a list of :class:`sksurv.functions.StepFunction`
568
+ objects is returned.
569
+
570
+ If `True`, a 2d-array of shape `(n_samples, n_unique_times)` is
571
+ returned, where `n_unique_times` is the number of unique
572
+ event times in the training data. Each row represents the survival
573
+ function of an individual evaluated at `unique_times_`.
574
+
575
+ Returns
576
+ -------
577
+ survival : ndarray
578
+ If `return_array` is `False`, an array of `n_samples`
579
+ :class:`sksurv.functions.StepFunction` instances is returned.
580
+
581
+ If `return_array` is `True`, a numeric array of shape
582
+ `(n_samples, n_unique_times_)` is returned.
583
+
584
+ Examples
585
+ --------
586
+ """
587
+ return self._predict_survival_function(self._get_baseline_model(), self.predict(X), return_array)
588
+
589
+ @property
590
+ def coef_(self):
591
+ coef = np.zeros(self.n_features_in_ + 1, dtype=float)
592
+
593
+ for estimator in self.estimators_:
594
+ coef[estimator.component] += self.learning_rate * estimator.coef_
595
+
596
+ return coef
597
+
598
+ @property
599
+ def unique_times_(self):
600
+ return self._get_baseline_model().unique_times_
601
+
602
+ @property
603
+ def feature_importances_(self):
604
+ imp = np.empty(self.n_features_in_ + 1, dtype=object)
605
+ for i in range(imp.shape[0]):
606
+ imp[i] = []
607
+
608
+ for k, estimator in enumerate(self.estimators_):
609
+ imp[estimator.component].append(k + 1)
610
+
611
+ def _importance(x):
612
+ if len(x) > 0:
613
+ return np.min(x)
614
+ return np.nan
615
+
616
+ ret = np.array([_importance(x) for x in imp])
617
+ return ret
618
+
619
+ def _make_estimator(self, append=True, random_state=None):
620
+ # we don't need _make_estimator
621
+ raise NotImplementedError()
622
+
623
+
624
+ class GradientBoostingSurvivalAnalysis(BaseGradientBoosting, SurvivalAnalysisMixin):
625
+ r"""Gradient-boosted Cox proportional hazard loss with
626
+ regression trees as base learner.
627
+
628
+ In each stage, a regression tree is fit on the negative gradient
629
+ of the loss function.
630
+
631
+ For more details on gradient boosting see [1]_ and [2]_. If `loss='coxph'`,
632
+ the partial likelihood of the proportional hazards model is optimized as
633
+ described in [3]_. If `loss='ipcwls'`, the accelerated failure time model with
634
+ inverse-probability of censoring weighted least squares error is optimized as
635
+ described in [4]_. When using a non-zero `dropout_rate`, regularization is
636
+ applied during training following [5]_.
637
+
638
+ See the :ref:`User Guide </user_guide/boosting.ipynb>` for examples.
639
+
640
+ Parameters
641
+ ----------
642
+ loss : {'coxph', 'squared', 'ipcwls'}, optional, default: 'coxph'
643
+ loss function to be optimized. 'coxph' refers to partial likelihood loss
644
+ of Cox's proportional hazards model. The loss 'squared' minimizes a
645
+ squared regression loss that ignores predictions beyond the time of censoring,
646
+ and 'ipcwls' refers to inverse-probability of censoring weighted least squares error.
647
+
648
+ learning_rate : float, optional, default: 0.1
649
+ learning rate shrinks the contribution of each tree by `learning_rate`.
650
+ There is a trade-off between `learning_rate` and `n_estimators`.
651
+ Values must be in the range `[0.0, inf)`.
652
+
653
+ n_estimators : int, optional, default: 100
654
+ The number of regression trees to create. Gradient boosting
655
+ is fairly robust to over-fitting so a large number usually
656
+ results in better performance.
657
+ Values must be in the range `[1, inf)`.
658
+
659
+ subsample : float, optional, default: 1.0
660
+ The fraction of samples to be used for fitting the individual base
661
+ learners. If smaller than 1.0 this results in Stochastic Gradient
662
+ Boosting. `subsample` interacts with the parameter `n_estimators`.
663
+ Choosing `subsample < 1.0` leads to a reduction of variance
664
+ and an increase in bias.
665
+ Values must be in the range `(0.0, 1.0]`.
666
+
667
+ criterion : {'friedman_mse', 'squared_error'}, optional, default: 'friedman_mse'
668
+ The function to measure the quality of a split. Supported criteria are
669
+ 'friedman_mse' for the mean squared error with improvement score by
670
+ Friedman, 'squared_error' for mean squared error. The default value of
671
+ 'friedman_mse' is generally the best as it can provide a better
672
+ approximation in some cases.
673
+
674
+ min_samples_split : int or float, optional, default: 2
675
+ The minimum number of samples required to split an internal node:
676
+
677
+ - If int, values must be in the range `[2, inf)`.
678
+ - If float, values must be in the range `(0.0, 1.0]` and `min_samples_split`
679
+ will be `ceil(min_samples_split * n_samples)`.
680
+
681
+ min_samples_leaf : int or float, optional, default: 1
682
+ The minimum number of samples required to be at a leaf node.
683
+ A split point at any depth will only be considered if it leaves at
684
+ least ``min_samples_leaf`` training samples in each of the left and
685
+ right branches. This may have the effect of smoothing the model,
686
+ especially in regression.
687
+
688
+ - If int, values must be in the range `[1, inf)`.
689
+ - If float, values must be in the range `(0.0, 1.0)` and `min_samples_leaf`
690
+ will be `ceil(min_samples_leaf * n_samples)`.
691
+
692
+ min_weight_fraction_leaf : float, optional, default: 0.
693
+ The minimum weighted fraction of the sum total of weights (of all
694
+ the input samples) required to be at a leaf node. Samples have
695
+ equal weight when `sample_weight` is not provided.
696
+ Values must be in the range `[0.0, 0.5]`.
697
+
698
+ max_depth : int or None, optional, default: 3
699
+ Maximum depth of the individual regression estimators. The maximum
700
+ depth limits the number of nodes in the tree. Tune this parameter
701
+ for best performance; the best value depends on the interaction
702
+ of the input variables. If None, then nodes are expanded until
703
+ all leaves are pure or until all leaves contain less than
704
+ `min_samples_split` samples.
705
+ If int, values must be in the range `[1, inf)`.
706
+
707
+ min_impurity_decrease : float, optional, default: 0.
708
+ A node will be split if this split induces a decrease of the impurity
709
+ greater than or equal to this value.
710
+
711
+ The weighted impurity decrease equation is the following::
712
+
713
+ N_t / N * (impurity - N_t_R / N_t * right_impurity
714
+ - N_t_L / N_t * left_impurity)
715
+
716
+ where ``N`` is the total number of samples, ``N_t`` is the number of
717
+ samples at the current node, ``N_t_L`` is the number of samples in the
718
+ left child, and ``N_t_R`` is the number of samples in the right child.
719
+
720
+ ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,
721
+ if ``sample_weight`` is passed.
722
+
723
+ random_state : int, RandomState instance, or None, optional, default: None
724
+ Controls the random seed given to each Tree estimator at each
725
+ boosting iteration.
726
+ In addition, it controls the random permutation of the features at
727
+ each split.
728
+ It also controls the random splitting of the training data to obtain a
729
+ validation set if `n_iter_no_change` is not None.
730
+ Pass an int for reproducible output across multiple function calls.
731
+
732
+ max_features : int, float, {'sqrt', 'log2'} or None, optional, default: None
733
+ The number of features to consider when looking for the best split:
734
+
735
+ - If int, values must be in the range `[1, inf)`.
736
+ - If float, values must be in the range `(0.0, 1.0]` and the features
737
+ considered at each split will be `max(1, int(max_features * n_features_in_))`.
738
+ - If 'sqrt', then `max_features=sqrt(n_features)`.
739
+ - If 'log2', then `max_features=log2(n_features)`.
740
+ - If None, then `max_features=n_features`.
741
+
742
+ Choosing `max_features < n_features` leads to a reduction of variance
743
+ and an increase in bias.
744
+
745
+ Note: the search for a split does not stop until at least one
746
+ valid partition of the node samples is found, even if it requires to
747
+ effectively inspect more than ``max_features`` features.
748
+
749
+ max_leaf_nodes : int or None, optional, default: None
750
+ Grow trees with ``max_leaf_nodes`` in best-first fashion.
751
+ Best nodes are defined as relative reduction in impurity.
752
+ Values must be in the range `[2, inf)`.
753
+ If `None`, then unlimited number of leaf nodes.
754
+
755
+ warm_start : bool, optional, default: False
756
+ When set to ``True``, reuse the solution of the previous call to fit
757
+ and add more estimators to the ensemble, otherwise, just erase the
758
+ previous solution.
759
+
760
+ validation_fraction : float, optional, default: 0.1
761
+ The proportion of training data to set aside as validation set for
762
+ early stopping. Values must be in the range `(0.0, 1.0)`.
763
+ Only used if ``n_iter_no_change`` is set to an integer.
764
+
765
+ n_iter_no_change : int, optional, default: None
766
+ ``n_iter_no_change`` is used to decide if early stopping will be used
767
+ to terminate training when validation score is not improving. By
768
+ default it is set to None to disable early stopping. If set to a
769
+ number, it will set aside ``validation_fraction`` size of the training
770
+ data as validation and terminate training when validation score is not
771
+ improving in all of the previous ``n_iter_no_change`` numbers of
772
+ iterations. The split is stratified.
773
+ Values must be in the range `[1, inf)`.
774
+
775
+ tol : float, optional, default: 1e-4
776
+ Tolerance for the early stopping. When the loss is not improving
777
+ by at least tol for ``n_iter_no_change`` iterations (if set to a
778
+ number), the training stops.
779
+ Values must be in the range `[0.0, inf)`.
780
+
781
+ dropout_rate : float, optional, default: 0.0
782
+ If larger than zero, the residuals at each iteration are only computed
783
+ from a random subset of base learners. The value corresponds to the
784
+ percentage of base learners that are dropped. In each iteration,
785
+ at least one base learner is dropped. This is an alternative regularization
786
+ to shrinkage, i.e., setting `learning_rate < 1.0`.
787
+ Values must be in the range `[0.0, 1.0)`.
788
+
789
+ verbose : int, optional, default: 0
790
+ Enable verbose output. If 1 then it prints progress and performance
791
+ once in a while (the more trees the lower the frequency). If greater
792
+ than 1 then it prints progress and performance for every tree.
793
+ Values must be in the range `[0, inf)`.
794
+
795
+ ccp_alpha : float, optional, default: 0.0
796
+ Complexity parameter used for Minimal Cost-Complexity Pruning. The
797
+ subtree with the largest cost complexity that is smaller than
798
+ ``ccp_alpha`` will be chosen. By default, no pruning is performed.
799
+ Values must be in the range `[0.0, inf)`.
800
+
801
+ Attributes
802
+ ----------
803
+ n_estimators_ : int
804
+ The number of estimators as selected by early stopping (if
805
+ ``n_iter_no_change`` is specified). Otherwise it is set to
806
+ ``n_estimators``.
807
+
808
+ feature_importances_ : ndarray, shape = (n_features,)
809
+ The feature importances (the higher, the more important the feature).
810
+
811
+ estimators_ : ndarray of DecisionTreeRegressor, shape = (n_estimators, 1)
812
+ The collection of fitted sub-estimators.
813
+
814
+ train_score_ : ndarray, shape = (n_estimators,)
815
+ The i-th score ``train_score_[i]`` is the loss of the
816
+ model at iteration ``i`` on the in-bag sample.
817
+ If ``subsample == 1`` this is the loss on the training data.
818
+
819
+ oob_improvement_ : ndarray, shape = (n_estimators,)
820
+ The improvement in loss on the out-of-bag samples
821
+ relative to the previous iteration.
822
+ ``oob_improvement_[0]`` is the improvement in
823
+ loss of the first stage over the ``init`` estimator.
824
+ Only available if ``subsample < 1.0``.
825
+
826
+ oob_scores_ : ndarray, shape = (n_estimators,)
827
+ The full history of the loss values on the out-of-bag
828
+ samples. Only available if ``subsample < 1.0``.
829
+
830
+ oob_score_ : float
831
+ The last value of the loss on the out-of-bag samples. It is
832
+ the same as ``oob_scores_[-1]``. Only available if ``subsample < 1.0``.
833
+
834
+ n_features_in_ : int
835
+ Number of features seen during ``fit``.
836
+
837
+ feature_names_in_ : ndarray, shape = (`n_features_in_`,)
838
+ Names of features seen during ``fit``. Defined only when `X`
839
+ has feature names that are all strings.
840
+
841
+ max_features_ : int
842
+ The inferred value of max_features.
843
+
844
+ unique_times_ : ndarray, shape = (n_unique_times,)
845
+ Unique time points.
846
+
847
+ See also
848
+ --------
849
+ sksurv.ensemble.ComponentwiseGradientBoostingSurvivalAnalysis
850
+ Gradient boosting with component-wise least squares as base learner.
851
+
852
+ References
853
+ ----------
854
+ .. [1] J. H. Friedman, "Greedy function approximation: A gradient boosting machine,"
855
+ The Annals of Statistics, 29(5), 1189–1232, 2001.
856
+ .. [2] J. H. Friedman, "Stochastic gradient boosting,"
857
+ Computational Statistics & Data Analysis, 38(4), 367–378, 2002.
858
+ .. [3] G. Ridgeway, "The state of boosting,"
859
+ Computing Science and Statistics, 172–181, 1999.
860
+ .. [4] Hothorn, T., Bühlmann, P., Dudoit, S., Molinaro, A., van der Laan, M. J.,
861
+ "Survival ensembles", Biostatistics, 7(3), 355-73, 2006.
862
+ .. [5] K. V. Rashmi and R. Gilad-Bachrach,
863
+ "DART: Dropouts meet multiple additive regression trees,"
864
+ in 18th International Conference on Artificial Intelligence and Statistics,
865
+ 2015, 489–497.
866
+ """
867
+
868
+ _parameter_constraints = {
869
+ **BaseGradientBoosting._parameter_constraints,
870
+ "loss": [StrOptions(frozenset(LOSS_FUNCTIONS.keys()))],
871
+ "dropout_rate": [Interval(numbers.Real, 0.0, 1.0, closed="left")],
872
+ }
873
+
874
+ def __init__(
875
+ self,
876
+ *,
877
+ loss="coxph",
878
+ learning_rate=0.1,
879
+ n_estimators=100,
880
+ subsample=1.0,
881
+ criterion="friedman_mse",
882
+ min_samples_split=2,
883
+ min_samples_leaf=1,
884
+ min_weight_fraction_leaf=0.0,
885
+ max_depth=3,
886
+ min_impurity_decrease=0.0,
887
+ random_state=None,
888
+ max_features=None,
889
+ max_leaf_nodes=None,
890
+ warm_start=False,
891
+ validation_fraction=0.1,
892
+ n_iter_no_change=None,
893
+ tol=1e-4,
894
+ dropout_rate=0.0,
895
+ verbose=0,
896
+ ccp_alpha=0.0,
897
+ ):
898
+ super().__init__(
899
+ loss=loss,
900
+ learning_rate=learning_rate,
901
+ n_estimators=n_estimators,
902
+ criterion=criterion,
903
+ min_samples_split=min_samples_split,
904
+ min_samples_leaf=min_samples_leaf,
905
+ min_weight_fraction_leaf=min_weight_fraction_leaf,
906
+ max_depth=max_depth,
907
+ init="zero",
908
+ subsample=subsample,
909
+ max_features=max_features,
910
+ random_state=random_state,
911
+ verbose=verbose,
912
+ max_leaf_nodes=max_leaf_nodes,
913
+ warm_start=warm_start,
914
+ min_impurity_decrease=min_impurity_decrease,
915
+ validation_fraction=validation_fraction,
916
+ n_iter_no_change=n_iter_no_change,
917
+ tol=tol,
918
+ ccp_alpha=ccp_alpha,
919
+ )
920
+ self.dropout_rate = dropout_rate
921
+
922
+ def _encode_y(self, y, sample_weight):
923
+ self.n_trees_per_iteration_ = 1
924
+ return y
925
+
926
+ def _get_loss(self, sample_weight):
927
+ return LOSS_FUNCTIONS[self.loss]()
928
+
929
+ @property
930
+ def _predict_risk_score(self):
931
+ return isinstance(self._loss, CoxPH)
932
+
933
+ def _set_max_features(self):
934
+ """Set self.max_features_."""
935
+ if isinstance(self.max_features, str):
936
+ if self.max_features == "sqrt":
937
+ max_features = max(1, int(np.sqrt(self.n_features_in_)))
938
+ elif self.max_features == "log2":
939
+ max_features = max(1, int(np.log2(self.n_features_in_)))
940
+ elif self.max_features is None:
941
+ max_features = self.n_features_in_
942
+ elif isinstance(self.max_features, numbers.Integral):
943
+ max_features = self.max_features
944
+ else: # float
945
+ max_features = max(1, int(self.max_features * self.n_features_in_))
946
+
947
+ self.max_features_ = max_features
948
+
949
+ def _update_with_dropout(self, i, X, raw_predictions, k, scale, random_state):
950
+ # select base learners to be dropped for next iteration
951
+ drop_model, n_dropped = _sample_binomial_plus_one(self.dropout_rate, i + 1, random_state)
952
+
953
+ # adjust scaling factor of tree that is going to be trained in next iteration
954
+ scale[i + 1] = 1.0 / (n_dropped + 1.0)
955
+
956
+ raw_predictions[:, k] = 0
957
+ for m in range(i + 1):
958
+ if drop_model[m] == 1:
959
+ # adjust scaling factor of dropped trees
960
+ scale[m] *= n_dropped / (n_dropped + 1.0)
961
+ else:
962
+ # pseudoresponse of next iteration (without contribution of dropped trees)
963
+ raw_predictions[:, k] += self.learning_rate * scale[m] * self.estimators_[m, k].predict(X).ravel()
964
+
965
+ def _fit_stage(
966
+ self,
967
+ i,
968
+ X,
969
+ y,
970
+ raw_predictions,
971
+ sample_weight,
972
+ sample_mask,
973
+ random_state,
974
+ scale,
975
+ X_csc=None,
976
+ X_csr=None,
977
+ ):
978
+ """Fit another stage of ``n_classes_`` trees to the boosting model."""
979
+
980
+ assert sample_mask.dtype == bool
981
+
982
+ # whether to use dropout in next iteration
983
+ do_dropout = self.dropout_rate > 0.0 and i < len(scale) - 1
984
+
985
+ # Need to pass a copy of raw_predictions to negative_gradient()
986
+ # because raw_predictions is partially updated at the end of the loop
987
+ # in update_terminal_regions(), and gradients need to be evaluated at
988
+ # iteration i - 1.
989
+ raw_predictions_copy = raw_predictions.copy()
990
+
991
+ neg_gradient = self._loss.gradient(
992
+ y_true=y,
993
+ raw_prediction=raw_predictions_copy,
994
+ sample_weight=None, # We pass sample_weights to the tree directly.
995
+ )
996
+
997
+ for k in range(self.n_trees_per_iteration_):
998
+ # induce regression tree on the negative gradient
999
+ tree = DecisionTreeRegressor(
1000
+ criterion=self.criterion,
1001
+ splitter="best",
1002
+ max_depth=self.max_depth,
1003
+ min_samples_split=self.min_samples_split,
1004
+ min_samples_leaf=self.min_samples_leaf,
1005
+ min_weight_fraction_leaf=self.min_weight_fraction_leaf,
1006
+ min_impurity_decrease=self.min_impurity_decrease,
1007
+ max_features=self.max_features,
1008
+ max_leaf_nodes=self.max_leaf_nodes,
1009
+ random_state=random_state,
1010
+ ccp_alpha=self.ccp_alpha,
1011
+ )
1012
+
1013
+ if self.subsample < 1.0:
1014
+ # no inplace multiplication!
1015
+ sample_weight = sample_weight * sample_mask.astype(np.float64)
1016
+
1017
+ X = X_csc if X_csc is not None else X
1018
+ tree.fit(X, neg_gradient, sample_weight=sample_weight, check_input=False)
1019
+
1020
+ # add tree to ensemble
1021
+ self.estimators_[i, k] = tree
1022
+
1023
+ # update tree leaves
1024
+ if do_dropout:
1025
+ self._update_with_dropout(i, X, raw_predictions, k, scale, random_state)
1026
+ else:
1027
+ # update tree leaves
1028
+ X_for_tree_update = X_csr if X_csr is not None else X
1029
+ self._loss.update_terminal_regions(
1030
+ tree.tree_,
1031
+ X_for_tree_update,
1032
+ y,
1033
+ neg_gradient,
1034
+ raw_predictions,
1035
+ sample_weight,
1036
+ sample_mask,
1037
+ learning_rate=self.learning_rate,
1038
+ k=k,
1039
+ )
1040
+
1041
+ return raw_predictions
1042
+
1043
+ def _fit_stages( # noqa: C901
1044
+ self,
1045
+ X,
1046
+ y,
1047
+ raw_predictions,
1048
+ sample_weight,
1049
+ random_state,
1050
+ X_val,
1051
+ y_val,
1052
+ sample_weight_val,
1053
+ scale,
1054
+ begin_at_stage=0,
1055
+ monitor=None,
1056
+ ):
1057
+ """Iteratively fits the stages.
1058
+
1059
+ For each stage it computes the progress (OOB, train score)
1060
+ and delegates to ``_fit_stage``.
1061
+ Returns the number of stages fit; might differ from ``n_estimators``
1062
+ due to early stopping.
1063
+ """
1064
+ n_samples = X.shape[0]
1065
+ do_oob = self.subsample < 1.0
1066
+ sample_mask = np.ones((n_samples,), dtype=bool)
1067
+ n_inbag = max(1, int(self.subsample * n_samples))
1068
+
1069
+ if self.verbose:
1070
+ verbose_reporter = VerboseReporter(verbose=self.verbose)
1071
+ verbose_reporter.init(self, begin_at_stage)
1072
+
1073
+ X_csc = csc_matrix(X) if issparse(X) else None
1074
+ X_csr = csr_matrix(X) if issparse(X) else None
1075
+
1076
+ if self.n_iter_no_change is not None:
1077
+ loss_history = np.full(self.n_iter_no_change, np.inf)
1078
+ # We create a generator to get the predictions for X_val after
1079
+ # the addition of each successive stage
1080
+ y_val_pred_iter = self._staged_raw_predict(X_val, check_input=False)
1081
+
1082
+ # perform boosting iterations
1083
+ i = begin_at_stage
1084
+ for i in range(begin_at_stage, self.n_estimators):
1085
+ # subsampling
1086
+ if do_oob:
1087
+ sample_mask = _random_sample_mask(n_samples, n_inbag, random_state)
1088
+ # OOB score before adding this stage
1089
+ y_oob_masked = y[~sample_mask]
1090
+ sample_weight_oob_masked = sample_weight[~sample_mask]
1091
+ if i == 0: # store the initial loss to compute the OOB score
1092
+ initial_loss = self._loss(
1093
+ y_true=y_oob_masked,
1094
+ raw_prediction=raw_predictions[~sample_mask],
1095
+ sample_weight=sample_weight_oob_masked,
1096
+ )
1097
+
1098
+ # fit next stage of trees
1099
+ raw_predictions = self._fit_stage(
1100
+ i,
1101
+ X,
1102
+ y,
1103
+ raw_predictions,
1104
+ sample_weight,
1105
+ sample_mask,
1106
+ random_state,
1107
+ scale,
1108
+ X_csc=X_csc,
1109
+ X_csr=X_csr,
1110
+ )
1111
+
1112
+ # track loss
1113
+ if do_oob:
1114
+ self.train_score_[i] = self._loss(
1115
+ y_true=y[sample_mask],
1116
+ raw_prediction=raw_predictions[sample_mask],
1117
+ sample_weight=sample_weight[sample_mask],
1118
+ )
1119
+ self.oob_scores_[i] = self._loss(
1120
+ y_true=y_oob_masked,
1121
+ raw_prediction=raw_predictions[~sample_mask],
1122
+ sample_weight=sample_weight_oob_masked,
1123
+ )
1124
+ previous_loss = initial_loss if i == 0 else self.oob_scores_[i - 1]
1125
+ self.oob_improvement_[i] = previous_loss - self.oob_scores_[i]
1126
+ self.oob_score_ = self.oob_scores_[-1]
1127
+ else:
1128
+ # no need to fancy index w/ no subsampling
1129
+ self.train_score_[i] = self._loss(y_true=y, raw_prediction=raw_predictions, sample_weight=sample_weight)
1130
+
1131
+ if self.verbose > 0:
1132
+ verbose_reporter.update(i, self)
1133
+
1134
+ if monitor is not None:
1135
+ early_stopping = monitor(i, self, locals())
1136
+ if early_stopping:
1137
+ break
1138
+
1139
+ # We also provide an early stopping based on the score from
1140
+ # validation set (X_val, y_val), if n_iter_no_change is set
1141
+ if self.n_iter_no_change is not None:
1142
+ # By calling next(y_val_pred_iter), we get the predictions
1143
+ # for X_val after the addition of the current stage
1144
+ validation_loss = self._loss(y_val, next(y_val_pred_iter), sample_weight_val)
1145
+
1146
+ # Require validation_score to be better (less) than at least
1147
+ # one of the last n_iter_no_change evaluations
1148
+ if np.any(validation_loss + self.tol < loss_history):
1149
+ loss_history[i % len(loss_history)] = validation_loss
1150
+ else:
1151
+ break
1152
+
1153
+ return i + 1
1154
+
1155
+ def _init_state(self):
1156
+ super()._init_state()
1157
+
1158
+ if self.dropout_rate > 0.0:
1159
+ self._scale = np.ones(self.n_estimators, dtype=float)
1160
+
1161
+ def _resize_state(self):
1162
+ super()._resize_state()
1163
+
1164
+ if self.dropout_rate > 0:
1165
+ if not hasattr(self, "_scale"):
1166
+ raise ValueError(
1167
+ "fitting with warm_start=True and dropout_rate > 0 is only "
1168
+ "supported if the previous fit used dropout_rate > 0 too"
1169
+ )
1170
+
1171
+ self._scale = np.resize(self._scale, self.n_estimators)
1172
+ self._scale[self.n_estimators_ :] = 1
1173
+
1174
+ def _shrink_state(self, n_stages):
1175
+ self.estimators_ = self.estimators_[:n_stages]
1176
+ self.train_score_ = self.train_score_[:n_stages]
1177
+ if hasattr(self, "oob_improvement_"):
1178
+ self.oob_improvement_ = self.oob_improvement_[:n_stages]
1179
+ self.oob_scores_ = self.oob_scores_[:n_stages]
1180
+ self.oob_score_ = self.oob_scores_[-1]
1181
+ if hasattr(self, "_scale"):
1182
+ self._scale = self._scale[:n_stages]
1183
+
1184
+ def fit(self, X, y, sample_weight=None, monitor=None):
1185
+ """Fit the gradient boosting model.
1186
+
1187
+ Parameters
1188
+ ----------
1189
+ X : array-like, shape = (n_samples, n_features)
1190
+ Data matrix
1191
+
1192
+ y : structured array, shape = (n_samples,)
1193
+ A structured array with two fields. The first field is a boolean
1194
+ where ``True`` indicates an event and ``False`` indicates right-censoring.
1195
+ The second field is a float with the time of event or time of censoring.
1196
+
1197
+ sample_weight : array-like, shape = (n_samples,), optional
1198
+ Weights given to each sample. If omitted, all samples have weight 1.
1199
+
1200
+ monitor : callable, optional
1201
+ The monitor is called after each iteration with the current
1202
+ iteration, a reference to the estimator and the local variables of
1203
+ ``_fit_stages`` as keyword arguments ``callable(i, self,
1204
+ locals())``. If the callable returns ``True`` the fitting procedure
1205
+ is stopped. The monitor can be used for various things such as
1206
+ computing held-out estimates, early stopping, model introspect, and
1207
+ snapshoting.
1208
+
1209
+ Returns
1210
+ -------
1211
+ self : object
1212
+ Returns self.
1213
+ """
1214
+ self._validate_params()
1215
+
1216
+ if not self.warm_start:
1217
+ self._clear_state()
1218
+
1219
+ X = validate_data(
1220
+ self,
1221
+ X,
1222
+ ensure_min_samples=2,
1223
+ order="C",
1224
+ accept_sparse=["csr", "csc", "coo"],
1225
+ dtype=DTYPE,
1226
+ )
1227
+ event, time = check_array_survival(X, y)
1228
+
1229
+ sample_weight_is_none = sample_weight is None
1230
+ sample_weight = _check_sample_weight(sample_weight, X)
1231
+
1232
+ if sample_weight_is_none:
1233
+ y = self._encode_y(y=y, sample_weight=None)
1234
+ else:
1235
+ y = self._encode_y(y=y, sample_weight=sample_weight)
1236
+
1237
+ self._set_max_features()
1238
+
1239
+ # self.loss is guaranteed to be a string
1240
+ self._loss = self._get_loss(sample_weight=sample_weight)
1241
+
1242
+ if isinstance(self._loss, CensoredSquaredLoss | IPCWLeastSquaresError):
1243
+ time = np.log(time)
1244
+
1245
+ if self.n_iter_no_change is not None:
1246
+ (
1247
+ X_train,
1248
+ X_val,
1249
+ event_train,
1250
+ event_val,
1251
+ time_train,
1252
+ time_val,
1253
+ sample_weight_train,
1254
+ sample_weight_val,
1255
+ ) = train_test_split(
1256
+ X,
1257
+ event,
1258
+ time,
1259
+ sample_weight,
1260
+ random_state=self.random_state,
1261
+ test_size=self.validation_fraction,
1262
+ stratify=event,
1263
+ )
1264
+ y_val = np.fromiter(zip(event_val, time_val), dtype=[("event", bool), ("time", np.float64)])
1265
+ else:
1266
+ X_train, sample_weight_train = X, sample_weight
1267
+ event_train, time_train = event, time
1268
+ X_val = y_val = sample_weight_val = None
1269
+
1270
+ y_train = np.fromiter(zip(event_train, time_train), dtype=[("event", bool), ("time", np.float64)])
1271
+ n_samples = X_train.shape[0]
1272
+
1273
+ # First time calling fit.
1274
+ if not self._is_fitted():
1275
+ # init state
1276
+ self._init_state()
1277
+
1278
+ raw_predictions = np.zeros(
1279
+ shape=(n_samples, self.n_trees_per_iteration_),
1280
+ dtype=np.float64,
1281
+ )
1282
+
1283
+ begin_at_stage = 0
1284
+
1285
+ # The rng state must be preserved if warm_start is True
1286
+ self._rng = check_random_state(self.random_state)
1287
+
1288
+ # warm start: this is not the first time fit was called
1289
+ else:
1290
+ # add more estimators to fitted model
1291
+ # invariant: warm_start = True
1292
+ if self.n_estimators < self.estimators_.shape[0]:
1293
+ raise ValueError(
1294
+ "n_estimators=%d must be larger or equal to "
1295
+ "estimators_.shape[0]=%d when "
1296
+ "warm_start==True" % (self.n_estimators, self.estimators_.shape[0])
1297
+ )
1298
+ begin_at_stage = self.estimators_.shape[0]
1299
+ # The requirements of _raw_predict
1300
+ # are more constrained than fit. It accepts only CSR
1301
+ # matrices. Finite values have already been checked in validate_data.
1302
+ X_train = check_array(
1303
+ X_train,
1304
+ dtype=DTYPE,
1305
+ order="C",
1306
+ accept_sparse="csr",
1307
+ ensure_all_finite=False,
1308
+ )
1309
+ raw_predictions = self._raw_predict(X_train)
1310
+ self._resize_state()
1311
+
1312
+ # apply dropout to last stage of previous fit
1313
+ if hasattr(self, "_scale") and self.dropout_rate > 0:
1314
+ for k in range(self.n_trees_per_iteration_):
1315
+ self._update_with_dropout(
1316
+ # pylint: disable-next=access-member-before-definition
1317
+ self.n_estimators_ - 1,
1318
+ X_train,
1319
+ raw_predictions,
1320
+ k,
1321
+ self._scale,
1322
+ self._rng,
1323
+ )
1324
+
1325
+ scale = getattr(self, "_scale", None)
1326
+
1327
+ # fit the boosting stages
1328
+ n_stages = self._fit_stages(
1329
+ X_train,
1330
+ y_train,
1331
+ raw_predictions,
1332
+ sample_weight_train,
1333
+ self._rng,
1334
+ X_val,
1335
+ y_val,
1336
+ sample_weight_val,
1337
+ scale,
1338
+ begin_at_stage,
1339
+ monitor,
1340
+ )
1341
+ # change shape of arrays after fit (early-stopping or additional tests)
1342
+ if n_stages != self.estimators_.shape[0]:
1343
+ self._shrink_state(n_stages)
1344
+ self.n_estimators_ = n_stages
1345
+
1346
+ self._set_baseline_model(X_train, event_train, time_train)
1347
+
1348
+ return self
1349
+
1350
+ def _set_baseline_model(self, X, event, time):
1351
+ if isinstance(self._loss, CoxPH):
1352
+ X_pred = X
1353
+ if issparse(X):
1354
+ X_pred = X.asformat("csr")
1355
+ risk_scores = self._predict(X_pred)
1356
+ self._baseline_model = BreslowEstimator().fit(risk_scores, event, time)
1357
+ else:
1358
+ self._baseline_model = None
1359
+
1360
+ def _dropout_predict_stage(self, X, i, K, score):
1361
+ for k in range(K):
1362
+ tree = self.estimators_[i, k].tree_
1363
+ score += self.learning_rate * self._scale[i] * tree.predict(X).reshape((-1, 1))
1364
+ return score
1365
+
1366
+ def _dropout_raw_predict(self, X):
1367
+ raw_predictions = self._raw_predict_init(X)
1368
+
1369
+ n_estimators, K = self.estimators_.shape
1370
+ for i in range(n_estimators):
1371
+ self._dropout_predict_stage(X, i, K, raw_predictions)
1372
+
1373
+ return raw_predictions
1374
+
1375
+ def _dropout_staged_raw_predict(self, X):
1376
+ X = validate_data(self, X, dtype=DTYPE, order="C", accept_sparse="csr")
1377
+ raw_predictions = self._raw_predict_init(X)
1378
+
1379
+ n_estimators, K = self.estimators_.shape
1380
+ for i in range(n_estimators):
1381
+ self._dropout_predict_stage(X, i, K, raw_predictions)
1382
+ yield raw_predictions.copy()
1383
+
1384
+ def _raw_predict(self, X):
1385
+ # if dropout wasn't used during training, proceed as usual,
1386
+ # otherwise consider scaling factor of individual trees
1387
+ if not hasattr(self, "_scale"):
1388
+ return super()._raw_predict(X)
1389
+ return self._dropout_raw_predict(X)
1390
+
1391
+ def _init_decision_function(self, X): # pragma: no cover
1392
+ return super()._init_decision_function(X).reshape(-1, 1)
1393
+
1394
+ def _decision_function(self, X): # pragma: no cover
1395
+ return self._raw_predict(X)
1396
+
1397
+ def _predict(self, X):
1398
+ score = self._raw_predict(X)
1399
+ if score.shape[1] == 1:
1400
+ score = score.ravel()
1401
+
1402
+ return self._loss._scale_raw_prediction(score)
1403
+
1404
+ def predict(self, X):
1405
+ """Predict risk scores.
1406
+
1407
+ If `loss='coxph'`, predictions can be interpreted as log hazard ratio
1408
+ similar to the linear predictor of a Cox proportional hazards
1409
+ model. If `loss='squared'` or `loss='ipcwls'`, predictions are the
1410
+ time to event.
1411
+
1412
+ Parameters
1413
+ ----------
1414
+ X : array-like, shape = (n_samples, n_features)
1415
+ The input samples.
1416
+
1417
+ Returns
1418
+ -------
1419
+ y : ndarray, shape = (n_samples,)
1420
+ The risk scores.
1421
+ """
1422
+ check_is_fitted(self, "estimators_")
1423
+
1424
+ X = validate_data(self, X, reset=False, order="C", accept_sparse="csr", dtype=DTYPE)
1425
+ return self._predict(X)
1426
+
1427
+ def staged_predict(self, X):
1428
+ """Predict risk scores at each stage for X.
1429
+
1430
+ This method allows monitoring (i.e. determine error on testing set)
1431
+ after each stage.
1432
+
1433
+ If `loss='coxph'`, predictions can be interpreted as log hazard ratio
1434
+ similar to the linear predictor of a Cox proportional hazards
1435
+ model. If `loss='squared'` or `loss='ipcwls'`, predictions are the
1436
+ time to event.
1437
+
1438
+ Parameters
1439
+ ----------
1440
+ X : array-like, shape = (n_samples, n_features)
1441
+ The input samples.
1442
+
1443
+ Returns
1444
+ -------
1445
+ y : generator of array of shape = (n_samples,)
1446
+ The predicted value of the input samples.
1447
+ """
1448
+ check_is_fitted(self, "estimators_")
1449
+
1450
+ # if dropout wasn't used during training, proceed as usual,
1451
+ # otherwise consider scaling factor of individual trees
1452
+ if not hasattr(self, "_scale"):
1453
+ predictions_iter = self._staged_raw_predict(X)
1454
+ else:
1455
+ predictions_iter = self._dropout_staged_raw_predict(X)
1456
+
1457
+ for raw_predictions in predictions_iter:
1458
+ y = self._loss._scale_raw_prediction(raw_predictions)
1459
+ yield y.ravel()
1460
+
1461
+ def _get_baseline_model(self):
1462
+ if self._baseline_model is None:
1463
+ raise ValueError("`fit` must be called with the loss option set to 'coxph'.")
1464
+ return self._baseline_model
1465
+
1466
+ @append_cumulative_hazard_example(estimator_mod="ensemble", estimator_class="GradientBoostingSurvivalAnalysis")
1467
+ def predict_cumulative_hazard_function(self, X, return_array=False):
1468
+ r"""Predict cumulative hazard function.
1469
+
1470
+ Only available if :meth:`fit` has been called with `loss = "coxph"`.
1471
+
1472
+ The cumulative hazard function for an individual
1473
+ with feature vector :math:`x` is defined as
1474
+
1475
+ .. math::
1476
+
1477
+ H(t \mid x) = \exp(f(x)) H_0(t) ,
1478
+
1479
+ where :math:`f(\cdot)` is the additive ensemble of base learners,
1480
+ and :math:`H_0(t)` is the baseline hazard function,
1481
+ estimated by Breslow's estimator.
1482
+
1483
+ Parameters
1484
+ ----------
1485
+ X : array-like, shape = (n_samples, n_features)
1486
+ Data matrix.
1487
+
1488
+ return_array : bool, default: False
1489
+ Whether to return a single array of cumulative hazard values
1490
+ or a list of step functions.
1491
+
1492
+ If `False`, a list of :class:`sksurv.functions.StepFunction`
1493
+ objects is returned.
1494
+
1495
+ If `True`, a 2d-array of shape `(n_samples, n_unique_times)` is
1496
+ returned, where `n_unique_times` is the number of unique
1497
+ event times in the training data. Each row represents the cumulative
1498
+ hazard function of an individual evaluated at `unique_times_`.
1499
+
1500
+ Returns
1501
+ -------
1502
+ cum_hazard : ndarray
1503
+ If `return_array` is `False`, an array of `n_samples`
1504
+ :class:`sksurv.functions.StepFunction` instances is returned.
1505
+
1506
+ If `return_array` is `True`, a numeric array of shape
1507
+ `(n_samples, n_unique_times_)` is returned.
1508
+
1509
+ Examples
1510
+ --------
1511
+ """
1512
+ return self._predict_cumulative_hazard_function(self._get_baseline_model(), self.predict(X), return_array)
1513
+
1514
+ @append_survival_function_example(estimator_mod="ensemble", estimator_class="GradientBoostingSurvivalAnalysis")
1515
+ def predict_survival_function(self, X, return_array=False):
1516
+ r"""Predict survival function.
1517
+
1518
+ Only available if :meth:`fit` has been called with `loss = "coxph"`.
1519
+
1520
+ The survival function for an individual
1521
+ with feature vector :math:`x` is defined as
1522
+
1523
+ .. math::
1524
+
1525
+ S(t \mid x) = S_0(t)^{\exp(f(x)} ,
1526
+
1527
+ where :math:`f(\cdot)` is the additive ensemble of base learners,
1528
+ and :math:`S_0(t)` is the baseline survival function,
1529
+ estimated by Breslow's estimator.
1530
+
1531
+ Parameters
1532
+ ----------
1533
+ X : array-like, shape = (n_samples, n_features)
1534
+ Data matrix.
1535
+
1536
+ return_array : bool, default: False
1537
+ Whether to return a single array of survival probabilities
1538
+ or a list of step functions.
1539
+
1540
+ If `False`, a list of :class:`sksurv.functions.StepFunction`
1541
+ objects is returned.
1542
+
1543
+ If `True`, a 2d-array of shape `(n_samples, n_unique_times)` is
1544
+ returned, where `n_unique_times` is the number of unique
1545
+ event times in the training data. Each row represents the survival
1546
+ function of an individual evaluated at `unique_times_`.
1547
+
1548
+ Returns
1549
+ -------
1550
+ survival : ndarray
1551
+ If `return_array` is `False`, an array of `n_samples`
1552
+ :class:`sksurv.functions.StepFunction` instances is returned.
1553
+
1554
+ If `return_array` is `True`, a numeric array of shape
1555
+ `(n_samples, n_unique_times_)` is returned.
1556
+
1557
+ Examples
1558
+ --------
1559
+ """
1560
+ return self._predict_survival_function(self._get_baseline_model(), self.predict(X), return_array)
1561
+
1562
+ @property
1563
+ def unique_times_(self):
1564
+ return self._get_baseline_model().unique_times_