scikit-survival 0.24.1__cp310-cp310-macosx_11_0_arm64.whl → 0.25.0__cp310-cp310-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scikit_survival-0.25.0.dist-info/METADATA +185 -0
- scikit_survival-0.25.0.dist-info/RECORD +58 -0
- {scikit_survival-0.24.1.dist-info → scikit_survival-0.25.0.dist-info}/WHEEL +1 -1
- sksurv/__init__.py +51 -6
- sksurv/base.py +12 -2
- sksurv/bintrees/_binarytrees.cpython-310-darwin.so +0 -0
- sksurv/column.py +33 -29
- sksurv/compare.py +22 -22
- sksurv/datasets/base.py +45 -20
- sksurv/docstrings.py +99 -0
- sksurv/ensemble/_coxph_loss.cpython-310-darwin.so +0 -0
- sksurv/ensemble/boosting.py +116 -168
- sksurv/ensemble/forest.py +94 -151
- sksurv/functions.py +29 -29
- sksurv/io/arffread.py +34 -3
- sksurv/io/arffwrite.py +38 -2
- sksurv/kernels/_clinical_kernel.cpython-310-darwin.so +0 -0
- sksurv/kernels/clinical.py +33 -13
- sksurv/linear_model/_coxnet.cpython-310-darwin.so +0 -0
- sksurv/linear_model/aft.py +14 -11
- sksurv/linear_model/coxnet.py +138 -89
- sksurv/linear_model/coxph.py +102 -83
- sksurv/meta/ensemble_selection.py +91 -9
- sksurv/meta/stacking.py +47 -26
- sksurv/metrics.py +257 -224
- sksurv/nonparametric.py +150 -81
- sksurv/preprocessing.py +55 -27
- sksurv/svm/_minlip.cpython-310-darwin.so +0 -0
- sksurv/svm/_prsvm.cpython-310-darwin.so +0 -0
- sksurv/svm/minlip.py +160 -79
- sksurv/svm/naive_survival_svm.py +63 -34
- sksurv/svm/survival_svm.py +103 -103
- sksurv/tree/_criterion.cpython-310-darwin.so +0 -0
- sksurv/tree/tree.py +170 -84
- sksurv/util.py +80 -26
- scikit_survival-0.24.1.dist-info/METADATA +0 -889
- scikit_survival-0.24.1.dist-info/RECORD +0 -57
- {scikit_survival-0.24.1.dist-info → scikit_survival-0.25.0.dist-info}/licenses/COPYING +0 -0
- {scikit_survival-0.24.1.dist-info → scikit_survival-0.25.0.dist-info}/top_level.txt +0 -0
sksurv/ensemble/boosting.py
CHANGED
|
@@ -32,6 +32,7 @@ from sklearn.utils.validation import (
|
|
|
32
32
|
)
|
|
33
33
|
|
|
34
34
|
from ..base import SurvivalAnalysisMixin
|
|
35
|
+
from ..docstrings import append_cumulative_hazard_example, append_survival_function_example
|
|
35
36
|
from ..linear_model.coxph import BreslowEstimator
|
|
36
37
|
from ..util import check_array_survival
|
|
37
38
|
from .survival_loss import LOSS_FUNCTIONS, CensoredSquaredLoss, CoxPH, IPCWLeastSquaresError
|
|
@@ -104,7 +105,7 @@ class ComponentwiseGradientBoostingSurvivalAnalysis(BaseEnsemble, SurvivalAnalys
|
|
|
104
105
|
There is a trade-off between `learning_rate` and `n_estimators`.
|
|
105
106
|
Values must be in the range `[0.0, inf)`.
|
|
106
107
|
|
|
107
|
-
n_estimators : int, default: 100
|
|
108
|
+
n_estimators : int, optional, default: 100
|
|
108
109
|
The number of boosting stages to perform. Gradient boosting
|
|
109
110
|
is fairly robust to over-fitting so a large number usually
|
|
110
111
|
results in better performance.
|
|
@@ -118,7 +119,7 @@ class ComponentwiseGradientBoostingSurvivalAnalysis(BaseEnsemble, SurvivalAnalys
|
|
|
118
119
|
and an increase in bias.
|
|
119
120
|
Values must be in the range `(0.0, 1.0]`.
|
|
120
121
|
|
|
121
|
-
warm_start : bool, default: False
|
|
122
|
+
warm_start : bool, optional, default: False
|
|
122
123
|
When set to ``True``, reuse the solution of the previous call to fit
|
|
123
124
|
and add more estimators to the ensemble, otherwise, just erase the
|
|
124
125
|
previous solution.
|
|
@@ -131,18 +132,19 @@ class ComponentwiseGradientBoostingSurvivalAnalysis(BaseEnsemble, SurvivalAnalys
|
|
|
131
132
|
to shrinkage, i.e., setting `learning_rate < 1.0`.
|
|
132
133
|
Values must be in the range `[0.0, 1.0)`.
|
|
133
134
|
|
|
134
|
-
random_state : int
|
|
135
|
-
|
|
136
|
-
|
|
135
|
+
random_state : int, RandomState instance or None, optional, default: None
|
|
136
|
+
Controls the randomness of the subsampling of the data if ``subsample < 1.0``,
|
|
137
|
+
and the random selection of base learners to drop if ``dropout_rate > 0``.
|
|
138
|
+
Pass an int for reproducible output across multiple function calls.
|
|
137
139
|
|
|
138
|
-
verbose : int, default: 0
|
|
140
|
+
verbose : int, optional, default: 0
|
|
139
141
|
Enable verbose output. If 1 then it prints progress and performance
|
|
140
142
|
once in a while.
|
|
141
143
|
Values must be in the range `[0, inf)`.
|
|
142
144
|
|
|
143
145
|
Attributes
|
|
144
146
|
----------
|
|
145
|
-
coef_ :
|
|
147
|
+
coef_ : ndarray, shape = (n_features + 1,), dtype = float
|
|
146
148
|
The aggregated coefficients. The first element `coef\_[0]` corresponds
|
|
147
149
|
to the intercept. If loss is `coxph`, the intercept will always be zero.
|
|
148
150
|
|
|
@@ -161,7 +163,7 @@ class ComponentwiseGradientBoostingSurvivalAnalysis(BaseEnsemble, SurvivalAnalys
|
|
|
161
163
|
loss of the first stage over the ``init`` estimator.
|
|
162
164
|
Only available if ``subsample < 1.0``.
|
|
163
165
|
|
|
164
|
-
oob_scores_ : ndarray
|
|
166
|
+
oob_scores_ : ndarray, shape = (n_estimators,)
|
|
165
167
|
The full history of the loss values on the out-of-bag
|
|
166
168
|
samples. Only available if ``subsample < 1.0``.
|
|
167
169
|
|
|
@@ -172,11 +174,11 @@ class ComponentwiseGradientBoostingSurvivalAnalysis(BaseEnsemble, SurvivalAnalys
|
|
|
172
174
|
n_features_in_ : int
|
|
173
175
|
Number of features seen during ``fit``.
|
|
174
176
|
|
|
175
|
-
feature_names_in_ : ndarray
|
|
177
|
+
feature_names_in_ : ndarray, shape = (`n_features_in_`,)
|
|
176
178
|
Names of features seen during ``fit``. Defined only when `X`
|
|
177
179
|
has feature names that are all strings.
|
|
178
180
|
|
|
179
|
-
unique_times_ :
|
|
181
|
+
unique_times_ : ndarray, shape = (n_unique_times,)
|
|
180
182
|
Unique time points.
|
|
181
183
|
|
|
182
184
|
References
|
|
@@ -378,9 +380,9 @@ class ComponentwiseGradientBoostingSurvivalAnalysis(BaseEnsemble, SurvivalAnalys
|
|
|
378
380
|
Data matrix
|
|
379
381
|
|
|
380
382
|
y : structured array, shape = (n_samples,)
|
|
381
|
-
A structured array
|
|
382
|
-
|
|
383
|
-
second field.
|
|
383
|
+
A structured array with two fields. The first field is a boolean
|
|
384
|
+
where ``True`` indicates an event and ``False`` indicates right-censoring.
|
|
385
|
+
The second field is a float with the time of event or time of censoring.
|
|
384
386
|
|
|
385
387
|
sample_weight : array-like, shape = (n_samples,), optional
|
|
386
388
|
Weights given to each sample. If omitted, all samples have weight 1.
|
|
@@ -484,8 +486,11 @@ class ComponentwiseGradientBoostingSurvivalAnalysis(BaseEnsemble, SurvivalAnalys
|
|
|
484
486
|
raise ValueError("`fit` must be called with the loss option set to 'coxph'.")
|
|
485
487
|
return self._baseline_model
|
|
486
488
|
|
|
489
|
+
@append_cumulative_hazard_example(
|
|
490
|
+
estimator_mod="ensemble", estimator_class="ComponentwiseGradientBoostingSurvivalAnalysis"
|
|
491
|
+
)
|
|
487
492
|
def predict_cumulative_hazard_function(self, X, return_array=False):
|
|
488
|
-
"""Predict cumulative hazard function.
|
|
493
|
+
r"""Predict cumulative hazard function.
|
|
489
494
|
|
|
490
495
|
Only available if :meth:`fit` has been called with `loss = "coxph"`.
|
|
491
496
|
|
|
@@ -494,9 +499,9 @@ class ComponentwiseGradientBoostingSurvivalAnalysis(BaseEnsemble, SurvivalAnalys
|
|
|
494
499
|
|
|
495
500
|
.. math::
|
|
496
501
|
|
|
497
|
-
H(t
|
|
502
|
+
H(t \mid x) = \exp(f(x)) H_0(t) ,
|
|
498
503
|
|
|
499
|
-
where :math:`f(
|
|
504
|
+
where :math:`f(\cdot)` is the additive ensemble of base learners,
|
|
500
505
|
and :math:`H_0(t)` is the baseline hazard function,
|
|
501
506
|
estimated by Breslow's estimator.
|
|
502
507
|
|
|
@@ -505,49 +510,37 @@ class ComponentwiseGradientBoostingSurvivalAnalysis(BaseEnsemble, SurvivalAnalys
|
|
|
505
510
|
X : array-like, shape = (n_samples, n_features)
|
|
506
511
|
Data matrix.
|
|
507
512
|
|
|
508
|
-
return_array :
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
513
|
+
return_array : bool, default: False
|
|
514
|
+
Whether to return a single array of cumulative hazard values
|
|
515
|
+
or a list of step functions.
|
|
516
|
+
|
|
517
|
+
If `False`, a list of :class:`sksurv.functions.StepFunction`
|
|
518
|
+
objects is returned.
|
|
519
|
+
|
|
520
|
+
If `True`, a 2d-array of shape `(n_samples, n_unique_times)` is
|
|
521
|
+
returned, where `n_unique_times` is the number of unique
|
|
522
|
+
event times in the training data. Each row represents the cumulative
|
|
523
|
+
hazard function of an individual evaluated at `unique_times_`.
|
|
512
524
|
|
|
513
525
|
Returns
|
|
514
526
|
-------
|
|
515
527
|
cum_hazard : ndarray
|
|
516
|
-
If `return_array` is
|
|
517
|
-
|
|
518
|
-
|
|
528
|
+
If `return_array` is `False`, an array of `n_samples`
|
|
529
|
+
:class:`sksurv.functions.StepFunction` instances is returned.
|
|
530
|
+
|
|
531
|
+
If `return_array` is `True`, a numeric array of shape
|
|
532
|
+
`(n_samples, n_unique_times_)` is returned.
|
|
519
533
|
|
|
520
534
|
Examples
|
|
521
535
|
--------
|
|
522
|
-
>>> import matplotlib.pyplot as plt
|
|
523
|
-
>>> from sksurv.datasets import load_whas500
|
|
524
|
-
>>> from sksurv.ensemble import ComponentwiseGradientBoostingSurvivalAnalysis
|
|
525
|
-
|
|
526
|
-
Load the data.
|
|
527
|
-
|
|
528
|
-
>>> X, y = load_whas500()
|
|
529
|
-
>>> X = X.astype(float)
|
|
530
|
-
|
|
531
|
-
Fit the model.
|
|
532
|
-
|
|
533
|
-
>>> estimator = ComponentwiseGradientBoostingSurvivalAnalysis(loss="coxph").fit(X, y)
|
|
534
|
-
|
|
535
|
-
Estimate the cumulative hazard function for the first 10 samples.
|
|
536
|
-
|
|
537
|
-
>>> chf_funcs = estimator.predict_cumulative_hazard_function(X.iloc[:10])
|
|
538
|
-
|
|
539
|
-
Plot the estimated cumulative hazard functions.
|
|
540
|
-
|
|
541
|
-
>>> for fn in chf_funcs:
|
|
542
|
-
... plt.step(fn.x, fn(fn.x), where="post")
|
|
543
|
-
...
|
|
544
|
-
>>> plt.ylim(0, 1)
|
|
545
|
-
>>> plt.show()
|
|
546
536
|
"""
|
|
547
537
|
return self._predict_cumulative_hazard_function(self._get_baseline_model(), self.predict(X), return_array)
|
|
548
538
|
|
|
539
|
+
@append_survival_function_example(
|
|
540
|
+
estimator_mod="ensemble", estimator_class="ComponentwiseGradientBoostingSurvivalAnalysis"
|
|
541
|
+
)
|
|
549
542
|
def predict_survival_function(self, X, return_array=False):
|
|
550
|
-
"""Predict survival function.
|
|
543
|
+
r"""Predict survival function.
|
|
551
544
|
|
|
552
545
|
Only available if :meth:`fit` has been called with `loss = "coxph"`.
|
|
553
546
|
|
|
@@ -556,9 +549,9 @@ class ComponentwiseGradientBoostingSurvivalAnalysis(BaseEnsemble, SurvivalAnalys
|
|
|
556
549
|
|
|
557
550
|
.. math::
|
|
558
551
|
|
|
559
|
-
S(t
|
|
552
|
+
S(t \mid x) = S_0(t)^{\exp(f(x)} ,
|
|
560
553
|
|
|
561
|
-
where :math:`f(
|
|
554
|
+
where :math:`f(\cdot)` is the additive ensemble of base learners,
|
|
562
555
|
and :math:`S_0(t)` is the baseline survival function,
|
|
563
556
|
estimated by Breslow's estimator.
|
|
564
557
|
|
|
@@ -567,45 +560,29 @@ class ComponentwiseGradientBoostingSurvivalAnalysis(BaseEnsemble, SurvivalAnalys
|
|
|
567
560
|
X : array-like, shape = (n_samples, n_features)
|
|
568
561
|
Data matrix.
|
|
569
562
|
|
|
570
|
-
return_array :
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
563
|
+
return_array : bool, default: False
|
|
564
|
+
Whether to return a single array of survival probabilities
|
|
565
|
+
or a list of step functions.
|
|
566
|
+
|
|
567
|
+
If `False`, a list of :class:`sksurv.functions.StepFunction`
|
|
568
|
+
objects is returned.
|
|
569
|
+
|
|
570
|
+
If `True`, a 2d-array of shape `(n_samples, n_unique_times)` is
|
|
571
|
+
returned, where `n_unique_times` is the number of unique
|
|
572
|
+
event times in the training data. Each row represents the survival
|
|
573
|
+
function of an individual evaluated at `unique_times_`.
|
|
574
574
|
|
|
575
575
|
Returns
|
|
576
576
|
-------
|
|
577
577
|
survival : ndarray
|
|
578
|
-
If `return_array` is
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
578
|
+
If `return_array` is `False`, an array of `n_samples`
|
|
579
|
+
:class:`sksurv.functions.StepFunction` instances is returned.
|
|
580
|
+
|
|
581
|
+
If `return_array` is `True`, a numeric array of shape
|
|
582
|
+
`(n_samples, n_unique_times_)` is returned.
|
|
582
583
|
|
|
583
584
|
Examples
|
|
584
585
|
--------
|
|
585
|
-
>>> import matplotlib.pyplot as plt
|
|
586
|
-
>>> from sksurv.datasets import load_whas500
|
|
587
|
-
>>> from sksurv.ensemble import ComponentwiseGradientBoostingSurvivalAnalysis
|
|
588
|
-
|
|
589
|
-
Load the data.
|
|
590
|
-
|
|
591
|
-
>>> X, y = load_whas500()
|
|
592
|
-
>>> X = X.astype(float)
|
|
593
|
-
|
|
594
|
-
Fit the model.
|
|
595
|
-
|
|
596
|
-
>>> estimator = ComponentwiseGradientBoostingSurvivalAnalysis(loss="coxph").fit(X, y)
|
|
597
|
-
|
|
598
|
-
Estimate the survival function for the first 10 samples.
|
|
599
|
-
|
|
600
|
-
>>> surv_funcs = estimator.predict_survival_function(X.iloc[:10])
|
|
601
|
-
|
|
602
|
-
Plot the estimated survival functions.
|
|
603
|
-
|
|
604
|
-
>>> for fn in surv_funcs:
|
|
605
|
-
... plt.step(fn.x, fn(fn.x), where="post")
|
|
606
|
-
...
|
|
607
|
-
>>> plt.ylim(0, 1)
|
|
608
|
-
>>> plt.show()
|
|
609
586
|
"""
|
|
610
587
|
return self._predict_survival_function(self._get_baseline_model(), self.predict(X), return_array)
|
|
611
588
|
|
|
@@ -673,7 +650,7 @@ class GradientBoostingSurvivalAnalysis(BaseGradientBoosting, SurvivalAnalysisMix
|
|
|
673
650
|
There is a trade-off between `learning_rate` and `n_estimators`.
|
|
674
651
|
Values must be in the range `[0.0, inf)`.
|
|
675
652
|
|
|
676
|
-
n_estimators : int, default: 100
|
|
653
|
+
n_estimators : int, optional, default: 100
|
|
677
654
|
The number of regression trees to create. Gradient boosting
|
|
678
655
|
is fairly robust to over-fitting so a large number usually
|
|
679
656
|
results in better performance.
|
|
@@ -687,7 +664,7 @@ class GradientBoostingSurvivalAnalysis(BaseGradientBoosting, SurvivalAnalysisMix
|
|
|
687
664
|
and an increase in bias.
|
|
688
665
|
Values must be in the range `(0.0, 1.0]`.
|
|
689
666
|
|
|
690
|
-
criterion : {'friedman_mse', 'squared_error'}, default: 'friedman_mse'
|
|
667
|
+
criterion : {'friedman_mse', 'squared_error'}, optional, default: 'friedman_mse'
|
|
691
668
|
The function to measure the quality of a split. Supported criteria are
|
|
692
669
|
'friedman_mse' for the mean squared error with improvement score by
|
|
693
670
|
Friedman, 'squared_error' for mean squared error. The default value of
|
|
@@ -701,7 +678,7 @@ class GradientBoostingSurvivalAnalysis(BaseGradientBoosting, SurvivalAnalysisMix
|
|
|
701
678
|
- If float, values must be in the range `(0.0, 1.0]` and `min_samples_split`
|
|
702
679
|
will be `ceil(min_samples_split * n_samples)`.
|
|
703
680
|
|
|
704
|
-
min_samples_leaf : int or float, default: 1
|
|
681
|
+
min_samples_leaf : int or float, optional, default: 1
|
|
705
682
|
The minimum number of samples required to be at a leaf node.
|
|
706
683
|
A split point at any depth will only be considered if it leaves at
|
|
707
684
|
least ``min_samples_leaf`` training samples in each of the left and
|
|
@@ -743,7 +720,7 @@ class GradientBoostingSurvivalAnalysis(BaseGradientBoosting, SurvivalAnalysisMix
|
|
|
743
720
|
``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,
|
|
744
721
|
if ``sample_weight`` is passed.
|
|
745
722
|
|
|
746
|
-
random_state : int
|
|
723
|
+
random_state : int, RandomState instance, or None, optional, default: None
|
|
747
724
|
Controls the random seed given to each Tree estimator at each
|
|
748
725
|
boosting iteration.
|
|
749
726
|
In addition, it controls the random permutation of the features at
|
|
@@ -752,7 +729,7 @@ class GradientBoostingSurvivalAnalysis(BaseGradientBoosting, SurvivalAnalysisMix
|
|
|
752
729
|
validation set if `n_iter_no_change` is not None.
|
|
753
730
|
Pass an int for reproducible output across multiple function calls.
|
|
754
731
|
|
|
755
|
-
max_features : int, float,
|
|
732
|
+
max_features : int, float, {'sqrt', 'log2'} or None, optional, default: None
|
|
756
733
|
The number of features to consider when looking for the best split:
|
|
757
734
|
|
|
758
735
|
- If int, values must be in the range `[1, inf)`.
|
|
@@ -775,17 +752,17 @@ class GradientBoostingSurvivalAnalysis(BaseGradientBoosting, SurvivalAnalysisMix
|
|
|
775
752
|
Values must be in the range `[2, inf)`.
|
|
776
753
|
If `None`, then unlimited number of leaf nodes.
|
|
777
754
|
|
|
778
|
-
warm_start : bool, default: False
|
|
755
|
+
warm_start : bool, optional, default: False
|
|
779
756
|
When set to ``True``, reuse the solution of the previous call to fit
|
|
780
757
|
and add more estimators to the ensemble, otherwise, just erase the
|
|
781
758
|
previous solution.
|
|
782
759
|
|
|
783
|
-
validation_fraction : float, default: 0.1
|
|
760
|
+
validation_fraction : float, optional, default: 0.1
|
|
784
761
|
The proportion of training data to set aside as validation set for
|
|
785
762
|
early stopping. Values must be in the range `(0.0, 1.0)`.
|
|
786
763
|
Only used if ``n_iter_no_change`` is set to an integer.
|
|
787
764
|
|
|
788
|
-
n_iter_no_change : int, default: None
|
|
765
|
+
n_iter_no_change : int, optional, default: None
|
|
789
766
|
``n_iter_no_change`` is used to decide if early stopping will be used
|
|
790
767
|
to terminate training when validation score is not improving. By
|
|
791
768
|
default it is set to None to disable early stopping. If set to a
|
|
@@ -795,7 +772,7 @@ class GradientBoostingSurvivalAnalysis(BaseGradientBoosting, SurvivalAnalysisMix
|
|
|
795
772
|
iterations. The split is stratified.
|
|
796
773
|
Values must be in the range `[1, inf)`.
|
|
797
774
|
|
|
798
|
-
tol : float, default: 1e-4
|
|
775
|
+
tol : float, optional, default: 1e-4
|
|
799
776
|
Tolerance for the early stopping. When the loss is not improving
|
|
800
777
|
by at least tol for ``n_iter_no_change`` iterations (if set to a
|
|
801
778
|
number), the training stops.
|
|
@@ -809,13 +786,13 @@ class GradientBoostingSurvivalAnalysis(BaseGradientBoosting, SurvivalAnalysisMix
|
|
|
809
786
|
to shrinkage, i.e., setting `learning_rate < 1.0`.
|
|
810
787
|
Values must be in the range `[0.0, 1.0)`.
|
|
811
788
|
|
|
812
|
-
verbose : int, default: 0
|
|
789
|
+
verbose : int, optional, default: 0
|
|
813
790
|
Enable verbose output. If 1 then it prints progress and performance
|
|
814
791
|
once in a while (the more trees the lower the frequency). If greater
|
|
815
792
|
than 1 then it prints progress and performance for every tree.
|
|
816
793
|
Values must be in the range `[0, inf)`.
|
|
817
794
|
|
|
818
|
-
ccp_alpha :
|
|
795
|
+
ccp_alpha : float, optional, default: 0.0
|
|
819
796
|
Complexity parameter used for Minimal Cost-Complexity Pruning. The
|
|
820
797
|
subtree with the largest cost complexity that is smaller than
|
|
821
798
|
``ccp_alpha`` will be chosen. By default, no pruning is performed.
|
|
@@ -846,7 +823,7 @@ class GradientBoostingSurvivalAnalysis(BaseGradientBoosting, SurvivalAnalysisMix
|
|
|
846
823
|
loss of the first stage over the ``init`` estimator.
|
|
847
824
|
Only available if ``subsample < 1.0``.
|
|
848
825
|
|
|
849
|
-
oob_scores_ : ndarray
|
|
826
|
+
oob_scores_ : ndarray, shape = (n_estimators,)
|
|
850
827
|
The full history of the loss values on the out-of-bag
|
|
851
828
|
samples. Only available if ``subsample < 1.0``.
|
|
852
829
|
|
|
@@ -857,14 +834,14 @@ class GradientBoostingSurvivalAnalysis(BaseGradientBoosting, SurvivalAnalysisMix
|
|
|
857
834
|
n_features_in_ : int
|
|
858
835
|
Number of features seen during ``fit``.
|
|
859
836
|
|
|
860
|
-
feature_names_in_ : ndarray
|
|
837
|
+
feature_names_in_ : ndarray, shape = (`n_features_in_`,)
|
|
861
838
|
Names of features seen during ``fit``. Defined only when `X`
|
|
862
839
|
has feature names that are all strings.
|
|
863
840
|
|
|
864
841
|
max_features_ : int
|
|
865
842
|
The inferred value of max_features.
|
|
866
843
|
|
|
867
|
-
unique_times_ :
|
|
844
|
+
unique_times_ : ndarray, shape = (n_unique_times,)
|
|
868
845
|
Unique time points.
|
|
869
846
|
|
|
870
847
|
See also
|
|
@@ -1213,9 +1190,9 @@ class GradientBoostingSurvivalAnalysis(BaseGradientBoosting, SurvivalAnalysisMix
|
|
|
1213
1190
|
Data matrix
|
|
1214
1191
|
|
|
1215
1192
|
y : structured array, shape = (n_samples,)
|
|
1216
|
-
A structured array
|
|
1217
|
-
|
|
1218
|
-
second field.
|
|
1193
|
+
A structured array with two fields. The first field is a boolean
|
|
1194
|
+
where ``True`` indicates an event and ``False`` indicates right-censoring.
|
|
1195
|
+
The second field is a float with the time of event or time of censoring.
|
|
1219
1196
|
|
|
1220
1197
|
sample_weight : array-like, shape = (n_samples,), optional
|
|
1221
1198
|
Weights given to each sample. If omitted, all samples have weight 1.
|
|
@@ -1486,8 +1463,9 @@ class GradientBoostingSurvivalAnalysis(BaseGradientBoosting, SurvivalAnalysisMix
|
|
|
1486
1463
|
raise ValueError("`fit` must be called with the loss option set to 'coxph'.")
|
|
1487
1464
|
return self._baseline_model
|
|
1488
1465
|
|
|
1466
|
+
@append_cumulative_hazard_example(estimator_mod="ensemble", estimator_class="GradientBoostingSurvivalAnalysis")
|
|
1489
1467
|
def predict_cumulative_hazard_function(self, X, return_array=False):
|
|
1490
|
-
"""Predict cumulative hazard function.
|
|
1468
|
+
r"""Predict cumulative hazard function.
|
|
1491
1469
|
|
|
1492
1470
|
Only available if :meth:`fit` has been called with `loss = "coxph"`.
|
|
1493
1471
|
|
|
@@ -1496,9 +1474,9 @@ class GradientBoostingSurvivalAnalysis(BaseGradientBoosting, SurvivalAnalysisMix
|
|
|
1496
1474
|
|
|
1497
1475
|
.. math::
|
|
1498
1476
|
|
|
1499
|
-
H(t
|
|
1477
|
+
H(t \mid x) = \exp(f(x)) H_0(t) ,
|
|
1500
1478
|
|
|
1501
|
-
where :math:`f(
|
|
1479
|
+
where :math:`f(\cdot)` is the additive ensemble of base learners,
|
|
1502
1480
|
and :math:`H_0(t)` is the baseline hazard function,
|
|
1503
1481
|
estimated by Breslow's estimator.
|
|
1504
1482
|
|
|
@@ -1507,49 +1485,35 @@ class GradientBoostingSurvivalAnalysis(BaseGradientBoosting, SurvivalAnalysisMix
|
|
|
1507
1485
|
X : array-like, shape = (n_samples, n_features)
|
|
1508
1486
|
Data matrix.
|
|
1509
1487
|
|
|
1510
|
-
return_array :
|
|
1511
|
-
|
|
1512
|
-
|
|
1513
|
-
|
|
1488
|
+
return_array : bool, default: False
|
|
1489
|
+
Whether to return a single array of cumulative hazard values
|
|
1490
|
+
or a list of step functions.
|
|
1491
|
+
|
|
1492
|
+
If `False`, a list of :class:`sksurv.functions.StepFunction`
|
|
1493
|
+
objects is returned.
|
|
1494
|
+
|
|
1495
|
+
If `True`, a 2d-array of shape `(n_samples, n_unique_times)` is
|
|
1496
|
+
returned, where `n_unique_times` is the number of unique
|
|
1497
|
+
event times in the training data. Each row represents the cumulative
|
|
1498
|
+
hazard function of an individual evaluated at `unique_times_`.
|
|
1514
1499
|
|
|
1515
1500
|
Returns
|
|
1516
1501
|
-------
|
|
1517
1502
|
cum_hazard : ndarray
|
|
1518
|
-
If `return_array` is
|
|
1519
|
-
|
|
1520
|
-
|
|
1503
|
+
If `return_array` is `False`, an array of `n_samples`
|
|
1504
|
+
:class:`sksurv.functions.StepFunction` instances is returned.
|
|
1505
|
+
|
|
1506
|
+
If `return_array` is `True`, a numeric array of shape
|
|
1507
|
+
`(n_samples, n_unique_times_)` is returned.
|
|
1521
1508
|
|
|
1522
1509
|
Examples
|
|
1523
1510
|
--------
|
|
1524
|
-
>>> import matplotlib.pyplot as plt
|
|
1525
|
-
>>> from sksurv.datasets import load_whas500
|
|
1526
|
-
>>> from sksurv.ensemble import GradientBoostingSurvivalAnalysis
|
|
1527
|
-
|
|
1528
|
-
Load the data.
|
|
1529
|
-
|
|
1530
|
-
>>> X, y = load_whas500()
|
|
1531
|
-
>>> X = X.astype(float)
|
|
1532
|
-
|
|
1533
|
-
Fit the model.
|
|
1534
|
-
|
|
1535
|
-
>>> estimator = GradientBoostingSurvivalAnalysis(loss="coxph").fit(X, y)
|
|
1536
|
-
|
|
1537
|
-
Estimate the cumulative hazard function for the first 10 samples.
|
|
1538
|
-
|
|
1539
|
-
>>> chf_funcs = estimator.predict_cumulative_hazard_function(X.iloc[:10])
|
|
1540
|
-
|
|
1541
|
-
Plot the estimated cumulative hazard functions.
|
|
1542
|
-
|
|
1543
|
-
>>> for fn in chf_funcs:
|
|
1544
|
-
... plt.step(fn.x, fn(fn.x), where="post")
|
|
1545
|
-
...
|
|
1546
|
-
>>> plt.ylim(0, 1)
|
|
1547
|
-
>>> plt.show()
|
|
1548
1511
|
"""
|
|
1549
1512
|
return self._predict_cumulative_hazard_function(self._get_baseline_model(), self.predict(X), return_array)
|
|
1550
1513
|
|
|
1514
|
+
@append_survival_function_example(estimator_mod="ensemble", estimator_class="GradientBoostingSurvivalAnalysis")
|
|
1551
1515
|
def predict_survival_function(self, X, return_array=False):
|
|
1552
|
-
"""Predict survival function.
|
|
1516
|
+
r"""Predict survival function.
|
|
1553
1517
|
|
|
1554
1518
|
Only available if :meth:`fit` has been called with `loss = "coxph"`.
|
|
1555
1519
|
|
|
@@ -1558,9 +1522,9 @@ class GradientBoostingSurvivalAnalysis(BaseGradientBoosting, SurvivalAnalysisMix
|
|
|
1558
1522
|
|
|
1559
1523
|
.. math::
|
|
1560
1524
|
|
|
1561
|
-
S(t
|
|
1525
|
+
S(t \mid x) = S_0(t)^{\exp(f(x)} ,
|
|
1562
1526
|
|
|
1563
|
-
where :math:`f(
|
|
1527
|
+
where :math:`f(\cdot)` is the additive ensemble of base learners,
|
|
1564
1528
|
and :math:`S_0(t)` is the baseline survival function,
|
|
1565
1529
|
estimated by Breslow's estimator.
|
|
1566
1530
|
|
|
@@ -1569,45 +1533,29 @@ class GradientBoostingSurvivalAnalysis(BaseGradientBoosting, SurvivalAnalysisMix
|
|
|
1569
1533
|
X : array-like, shape = (n_samples, n_features)
|
|
1570
1534
|
Data matrix.
|
|
1571
1535
|
|
|
1572
|
-
return_array :
|
|
1573
|
-
|
|
1574
|
-
|
|
1575
|
-
|
|
1536
|
+
return_array : bool, default: False
|
|
1537
|
+
Whether to return a single array of survival probabilities
|
|
1538
|
+
or a list of step functions.
|
|
1539
|
+
|
|
1540
|
+
If `False`, a list of :class:`sksurv.functions.StepFunction`
|
|
1541
|
+
objects is returned.
|
|
1542
|
+
|
|
1543
|
+
If `True`, a 2d-array of shape `(n_samples, n_unique_times)` is
|
|
1544
|
+
returned, where `n_unique_times` is the number of unique
|
|
1545
|
+
event times in the training data. Each row represents the survival
|
|
1546
|
+
function of an individual evaluated at `unique_times_`.
|
|
1576
1547
|
|
|
1577
1548
|
Returns
|
|
1578
1549
|
-------
|
|
1579
1550
|
survival : ndarray
|
|
1580
|
-
If `return_array` is
|
|
1581
|
-
|
|
1582
|
-
|
|
1583
|
-
|
|
1551
|
+
If `return_array` is `False`, an array of `n_samples`
|
|
1552
|
+
:class:`sksurv.functions.StepFunction` instances is returned.
|
|
1553
|
+
|
|
1554
|
+
If `return_array` is `True`, a numeric array of shape
|
|
1555
|
+
`(n_samples, n_unique_times_)` is returned.
|
|
1584
1556
|
|
|
1585
1557
|
Examples
|
|
1586
1558
|
--------
|
|
1587
|
-
>>> import matplotlib.pyplot as plt
|
|
1588
|
-
>>> from sksurv.datasets import load_whas500
|
|
1589
|
-
>>> from sksurv.ensemble import GradientBoostingSurvivalAnalysis
|
|
1590
|
-
|
|
1591
|
-
Load the data.
|
|
1592
|
-
|
|
1593
|
-
>>> X, y = load_whas500()
|
|
1594
|
-
>>> X = X.astype(float)
|
|
1595
|
-
|
|
1596
|
-
Fit the model.
|
|
1597
|
-
|
|
1598
|
-
>>> estimator = GradientBoostingSurvivalAnalysis(loss="coxph").fit(X, y)
|
|
1599
|
-
|
|
1600
|
-
Estimate the survival function for the first 10 samples.
|
|
1601
|
-
|
|
1602
|
-
>>> surv_funcs = estimator.predict_survival_function(X.iloc[:10])
|
|
1603
|
-
|
|
1604
|
-
Plot the estimated survival functions.
|
|
1605
|
-
|
|
1606
|
-
>>> for fn in surv_funcs:
|
|
1607
|
-
... plt.step(fn.x, fn(fn.x), where="post")
|
|
1608
|
-
...
|
|
1609
|
-
>>> plt.ylim(0, 1)
|
|
1610
|
-
>>> plt.show()
|
|
1611
1559
|
"""
|
|
1612
1560
|
return self._predict_survival_function(self._get_baseline_model(), self.predict(X), return_array)
|
|
1613
1561
|
|