scikit-survival 0.24.1__cp310-cp310-win_amd64.whl → 0.25.0__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scikit_survival-0.25.0.dist-info/METADATA +185 -0
- scikit_survival-0.25.0.dist-info/RECORD +58 -0
- {scikit_survival-0.24.1.dist-info → scikit_survival-0.25.0.dist-info}/WHEEL +1 -1
- sksurv/__init__.py +51 -6
- sksurv/base.py +12 -2
- sksurv/bintrees/_binarytrees.cp310-win_amd64.pyd +0 -0
- sksurv/column.py +33 -29
- sksurv/compare.py +22 -22
- sksurv/datasets/base.py +45 -20
- sksurv/docstrings.py +99 -0
- sksurv/ensemble/_coxph_loss.cp310-win_amd64.pyd +0 -0
- sksurv/ensemble/boosting.py +116 -168
- sksurv/ensemble/forest.py +94 -151
- sksurv/functions.py +29 -29
- sksurv/io/arffread.py +34 -3
- sksurv/io/arffwrite.py +38 -2
- sksurv/kernels/_clinical_kernel.cp310-win_amd64.pyd +0 -0
- sksurv/kernels/clinical.py +33 -13
- sksurv/linear_model/_coxnet.cp310-win_amd64.pyd +0 -0
- sksurv/linear_model/aft.py +14 -11
- sksurv/linear_model/coxnet.py +138 -89
- sksurv/linear_model/coxph.py +102 -83
- sksurv/meta/ensemble_selection.py +91 -9
- sksurv/meta/stacking.py +47 -26
- sksurv/metrics.py +257 -224
- sksurv/nonparametric.py +150 -81
- sksurv/preprocessing.py +55 -27
- sksurv/svm/_minlip.cp310-win_amd64.pyd +0 -0
- sksurv/svm/_prsvm.cp310-win_amd64.pyd +0 -0
- sksurv/svm/minlip.py +160 -79
- sksurv/svm/naive_survival_svm.py +63 -34
- sksurv/svm/survival_svm.py +103 -103
- sksurv/tree/_criterion.cp310-win_amd64.pyd +0 -0
- sksurv/tree/tree.py +170 -84
- sksurv/util.py +80 -26
- scikit_survival-0.24.1.dist-info/METADATA +0 -889
- scikit_survival-0.24.1.dist-info/RECORD +0 -57
- {scikit_survival-0.24.1.dist-info → scikit_survival-0.25.0.dist-info}/licenses/COPYING +0 -0
- {scikit_survival-0.24.1.dist-info → scikit_survival-0.25.0.dist-info}/top_level.txt +0 -0
sksurv/svm/survival_svm.py
CHANGED
|
@@ -83,7 +83,8 @@ class OrderStatisticTreeSurvivalCounter(Counter):
|
|
|
83
83
|
Event indicator of samples.
|
|
84
84
|
|
|
85
85
|
tree_class : type
|
|
86
|
-
|
|
86
|
+
The class to use as an order statistic tree, either
|
|
87
|
+
:class:`sksurv.bintrees.AVLTree` or :class:`sksurv.bintrees.RBTree`.
|
|
87
88
|
|
|
88
89
|
time : array, shape = (n_samples,)
|
|
89
90
|
Survival times.
|
|
@@ -374,8 +375,8 @@ class LargeScaleOptimizer(RankSVMOptimizer):
|
|
|
374
375
|
Whether to fit an intercept. Only used if regression objective
|
|
375
376
|
is optimized (rank_ratio < 1.0).
|
|
376
377
|
|
|
377
|
-
counter :
|
|
378
|
-
|
|
378
|
+
counter : :class:`OrderStatisticTreeSurvivalCounter` or :class:`SurvivalCounter`
|
|
379
|
+
An instance of a :class:`Counter` subclass used for counting comparable pairs.
|
|
379
380
|
|
|
380
381
|
References
|
|
381
382
|
----------
|
|
@@ -511,8 +512,8 @@ class NonlinearLargeScaleOptimizer(RankSVMOptimizer):
|
|
|
511
512
|
rank_ratio : float
|
|
512
513
|
Trade-off between regression and ranking objectives.
|
|
513
514
|
|
|
514
|
-
counter :
|
|
515
|
-
|
|
515
|
+
counter : :class:`OrderStatisticTreeSurvivalCounter` or :class:`SurvivalCounter`
|
|
516
|
+
An instance of a :class:`Counter` subclass used for counting comparable pairs.
|
|
516
517
|
|
|
517
518
|
References
|
|
518
519
|
----------
|
|
@@ -730,7 +731,29 @@ class BaseSurvivalSVM(BaseEstimator, metaclass=ABCMeta):
|
|
|
730
731
|
|
|
731
732
|
@abstractmethod
|
|
732
733
|
def predict(self, X):
|
|
733
|
-
"""Predict risk
|
|
734
|
+
"""Predict risk scores or transformed survival times.
|
|
735
|
+
|
|
736
|
+
If the model has been fit only considering the ranking objective
|
|
737
|
+
(``rank_ratio = 1``), predictions are risk scores (i.e. higher values
|
|
738
|
+
indicate an increased risk of experiencing an event). The scores
|
|
739
|
+
have no unit and are only meaningful to rank samples by their risk
|
|
740
|
+
of experiencing an event.
|
|
741
|
+
|
|
742
|
+
If the regression objective has been used (``rank_ratio < 1``),
|
|
743
|
+
predictions are transformed survival times.
|
|
744
|
+
Lower scores indicate shorter survival, higher scores longer survival.
|
|
745
|
+
|
|
746
|
+
Parameters
|
|
747
|
+
----------
|
|
748
|
+
X : array-like, shape = (n_samples, n_features)
|
|
749
|
+
The input samples.
|
|
750
|
+
|
|
751
|
+
Returns
|
|
752
|
+
-------
|
|
753
|
+
y : ndarray, shape = (n_samples,), dtype=float
|
|
754
|
+
Risk scores (if ``rank_ratio = 1``), or transformed survival times
|
|
755
|
+
(if ``rank_ratio < 1``).
|
|
756
|
+
"""
|
|
734
757
|
|
|
735
758
|
def _validate_for_fit(self, X):
|
|
736
759
|
return validate_data(self, X, ensure_min_samples=2)
|
|
@@ -744,9 +767,9 @@ class BaseSurvivalSVM(BaseEstimator, metaclass=ABCMeta):
|
|
|
744
767
|
Data matrix.
|
|
745
768
|
|
|
746
769
|
y : structured array, shape = (n_samples,)
|
|
747
|
-
A structured array
|
|
748
|
-
|
|
749
|
-
second field.
|
|
770
|
+
A structured array with two fields. The first field is a boolean
|
|
771
|
+
where ``True`` indicates an event and ``False`` indicates right-censoring.
|
|
772
|
+
The second field is a float with the time of event or time of censoring.
|
|
750
773
|
|
|
751
774
|
Returns
|
|
752
775
|
-------
|
|
@@ -811,80 +834,82 @@ class BaseSurvivalSVM(BaseEstimator, metaclass=ABCMeta):
|
|
|
811
834
|
|
|
812
835
|
|
|
813
836
|
class FastSurvivalSVM(BaseSurvivalSVM, SurvivalAnalysisMixin):
|
|
814
|
-
"""
|
|
837
|
+
r"""Implements an efficient linear Support Vector Machine for survival analysis,
|
|
838
|
+
capable of optimizing both ranking and regression objectives.
|
|
815
839
|
|
|
816
|
-
Training data consists of *n* triplets :math:`(
|
|
817
|
-
where :math
|
|
818
|
-
the survival time or time of censoring, and :math
|
|
840
|
+
Training data consists of *n* triplets :math:`(\mathbf{x}_i, y_i, \delta_i)`,
|
|
841
|
+
where :math:`\mathbf{x}_i` is a *d*-dimensional feature vector, :math:`y_i > 0`
|
|
842
|
+
the survival time or time of censoring, and :math:`\delta_i \in \{0,1\}`
|
|
819
843
|
the binary event indicator. Using the training data, the objective is to
|
|
820
844
|
minimize the following function:
|
|
821
845
|
|
|
822
846
|
.. math::
|
|
823
847
|
|
|
824
|
-
|
|
825
|
-
+
|
|
826
|
-
|
|
827
|
-
+ (1 - r)
|
|
828
|
-
|
|
848
|
+
\arg \min_{\mathbf{w}, b} \frac{1}{2} \mathbf{w}^\top \mathbf{w}
|
|
849
|
+
+ \frac{\alpha}{2} \left[ r \sum_{i,j \in \mathcal{P}}
|
|
850
|
+
\max(0, 1 - (\mathbf{w}^\top \mathbf{x}_i - \mathbf{w}^\top \mathbf{x}_j))^2
|
|
851
|
+
+ (1 - r) \sum_{i=0}^n \left( \zeta_{\mathbf{w}, b} (y_i, x_i, \delta_i)
|
|
852
|
+
\right)^2 \right]
|
|
829
853
|
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
y_i -
|
|
834
|
-
|
|
854
|
+
\zeta_{\mathbf{w},b} (y_i, \mathbf{x}_i, \delta_i) =
|
|
855
|
+
\begin{cases}
|
|
856
|
+
\max(0, y_i - \mathbf{w}^\top \mathbf{x}_i - b) \quad \text{if $\delta_i = 0$,} \\
|
|
857
|
+
y_i - \mathbf{w}^\top \mathbf{x}_i - b \quad \text{if $\delta_i = 1$,} \\
|
|
858
|
+
\end{cases}
|
|
835
859
|
|
|
836
|
-
|
|
860
|
+
\mathcal{P} = \{ (i, j) \mid y_i > y_j \land \delta_j = 1 \}_{i,j=1,\dots,n}
|
|
837
861
|
|
|
838
|
-
The hyper-parameter :math
|
|
862
|
+
The hyper-parameter :math:`\alpha > 0` determines the amount of regularization
|
|
839
863
|
to apply: a smaller value increases the amount of regularization and a
|
|
840
864
|
higher value reduces the amount of regularization. The hyper-parameter
|
|
841
|
-
:math:`r
|
|
865
|
+
:math:`r \in [0; 1]` determines the trade-off between the ranking objective
|
|
842
866
|
and the regression objective. If :math:`r = 1` it reduces to the ranking
|
|
843
867
|
objective, and if :math:`r = 0` to the regression objective. If the regression
|
|
844
|
-
objective is used, survival/censoring times are log-
|
|
868
|
+
objective is used, survival/censoring times are log-transformed and thus cannot be
|
|
845
869
|
zero or negative.
|
|
846
870
|
|
|
847
871
|
See the :ref:`User Guide </user_guide/survival-svm.ipynb>` and [1]_ for further description.
|
|
848
872
|
|
|
849
873
|
Parameters
|
|
850
874
|
----------
|
|
851
|
-
alpha : float,
|
|
852
|
-
Weight of penalizing the squared hinge loss in the objective function
|
|
875
|
+
alpha : float, default: 1
|
|
876
|
+
Weight of penalizing the squared hinge loss in the objective function. Must be greater than 0.
|
|
853
877
|
|
|
854
878
|
rank_ratio : float, optional, default: 1.0
|
|
855
|
-
Mixing parameter between regression and ranking
|
|
856
|
-
If ``rank_ratio = 1``, only ranking is performed
|
|
857
|
-
is performed. A
|
|
858
|
-
or 'direct-count'.
|
|
879
|
+
Mixing parameter between regression and ranking objectives, with ``0 <= rank_ratio <= 1``.
|
|
880
|
+
If ``rank_ratio = 1``, only ranking is performed. If ``rank_ratio = 0``, only regression
|
|
881
|
+
is performed. A ``rank_ratio`` less than 1.0 (i.e., including a regression objective) is
|
|
882
|
+
only supported if the ``optimizer`` is 'avltree', 'rbtree', or 'direct-count'.
|
|
859
883
|
|
|
860
|
-
fit_intercept :
|
|
884
|
+
fit_intercept : bool, optional, default: False
|
|
861
885
|
Whether to calculate an intercept for the regression model. If set to ``False``, no intercept
|
|
862
|
-
will be calculated.
|
|
886
|
+
will be calculated. This parameter has no effect if ``rank_ratio = 1``, i.e., only ranking is performed.
|
|
863
887
|
|
|
864
888
|
max_iter : int, optional, default: 20
|
|
865
|
-
Maximum number of iterations to perform in Newton optimization
|
|
889
|
+
Maximum number of iterations to perform in Newton optimization.
|
|
866
890
|
|
|
867
891
|
verbose : bool, optional, default: False
|
|
868
|
-
|
|
892
|
+
If ``True``, print messages during optimization.
|
|
869
893
|
|
|
870
894
|
tol : float or None, optional, default: None
|
|
871
|
-
Tolerance for termination.
|
|
872
|
-
|
|
895
|
+
Tolerance for termination. If ``None``, the solver's default tolerance is used.
|
|
896
|
+
See :func:`scipy.optimize.minimize`.
|
|
873
897
|
|
|
874
898
|
optimizer : {'avltree', 'direct-count', 'PRSVM', 'rbtree', 'simple'}, optional, default: 'avltree'
|
|
875
|
-
|
|
899
|
+
Specifies which optimizer to use.
|
|
876
900
|
|
|
877
|
-
random_state : int
|
|
878
|
-
|
|
901
|
+
random_state : int, :class:`numpy.random.RandomState` instance, or None, optional, default: None
|
|
902
|
+
Used to resolve ties in survival times. Pass an int for reproducible output across
|
|
903
|
+
multiple :meth:`fit` calls.
|
|
879
904
|
|
|
880
|
-
timeit :
|
|
881
|
-
If non-zero
|
|
882
|
-
|
|
883
|
-
``optimizer_result_`` attribute.
|
|
905
|
+
timeit : bool, int, or None, optional, default: False
|
|
906
|
+
If ``True`` or a non-zero integer, the time taken for optimization is measured.
|
|
907
|
+
If an integer is provided, the optimization is repeated that many times.
|
|
908
|
+
Results can be accessed from the ``optimizer_result_`` attribute.
|
|
884
909
|
|
|
885
910
|
Attributes
|
|
886
911
|
----------
|
|
887
|
-
coef_ : ndarray, shape = (n_features,)
|
|
912
|
+
coef_ : ndarray, shape = (n_features,), dtype = float
|
|
888
913
|
Coefficients of the features in the decision function.
|
|
889
914
|
|
|
890
915
|
optimizer_result_ : :class:`scipy.optimize.OptimizeResult`
|
|
@@ -893,7 +918,7 @@ class FastSurvivalSVM(BaseSurvivalSVM, SurvivalAnalysisMixin):
|
|
|
893
918
|
n_features_in_ : int
|
|
894
919
|
Number of features seen during ``fit``.
|
|
895
920
|
|
|
896
|
-
feature_names_in_ : ndarray
|
|
921
|
+
feature_names_in_ : ndarray, shape = (`n_features_in_`,), dtype = object
|
|
897
922
|
Names of features seen during ``fit``. Defined only when `X`
|
|
898
923
|
has feature names that are all strings.
|
|
899
924
|
|
|
@@ -953,20 +978,6 @@ class FastSurvivalSVM(BaseSurvivalSVM, SurvivalAnalysisMixin):
|
|
|
953
978
|
return opt_result
|
|
954
979
|
|
|
955
980
|
def predict(self, X):
|
|
956
|
-
"""Rank samples according to survival times
|
|
957
|
-
|
|
958
|
-
Lower ranks indicate shorter survival, higher ranks longer survival.
|
|
959
|
-
|
|
960
|
-
Parameters
|
|
961
|
-
----------
|
|
962
|
-
X : array-like, shape = (n_samples, n_features)
|
|
963
|
-
The input samples.
|
|
964
|
-
|
|
965
|
-
Returns
|
|
966
|
-
-------
|
|
967
|
-
y : ndarray, shape = (n_samples,)
|
|
968
|
-
Predicted ranks.
|
|
969
|
-
"""
|
|
970
981
|
check_is_fitted(self, "coef_")
|
|
971
982
|
X = validate_data(self, X, reset=False)
|
|
972
983
|
|
|
@@ -985,26 +996,28 @@ class FastSurvivalSVM(BaseSurvivalSVM, SurvivalAnalysisMixin):
|
|
|
985
996
|
|
|
986
997
|
|
|
987
998
|
class FastKernelSurvivalSVM(BaseSurvivalSVM, SurvivalAnalysisMixin):
|
|
988
|
-
"""
|
|
999
|
+
"""Implements an efficient kernel Support Vector Machine for survival analysis.
|
|
1000
|
+
|
|
1001
|
+
The model extends :class:`FastSurvivalSVM` to non-linear relationships through kernel functions.
|
|
989
1002
|
|
|
990
1003
|
See the :ref:`User Guide </user_guide/survival-svm.ipynb>` and [1]_ for further description.
|
|
991
1004
|
|
|
992
1005
|
Parameters
|
|
993
1006
|
----------
|
|
994
|
-
alpha : float,
|
|
995
|
-
Weight of penalizing the squared hinge loss in the objective function
|
|
1007
|
+
alpha : float, default: 1
|
|
1008
|
+
Weight of penalizing the squared hinge loss in the objective function. Must be greater than 0.
|
|
996
1009
|
|
|
997
1010
|
rank_ratio : float, optional, default: 1.0
|
|
998
|
-
Mixing parameter between regression and ranking
|
|
999
|
-
If ``rank_ratio = 1``, only ranking is performed
|
|
1000
|
-
is performed. A
|
|
1001
|
-
or 'rbtree'.
|
|
1011
|
+
Mixing parameter between regression and ranking objectives, with ``0 <= rank_ratio <= 1``.
|
|
1012
|
+
If ``rank_ratio = 1``, only ranking is performed. If ``rank_ratio = 0``, only regression
|
|
1013
|
+
is performed. A ``rank_ratio`` less than 1.0 (i.e., including a regression objective) is
|
|
1014
|
+
only supported if the ``optimizer`` is 'avltree', 'PRSVM', or 'rbtree'.
|
|
1002
1015
|
|
|
1003
|
-
fit_intercept :
|
|
1016
|
+
fit_intercept : bool, optional, default: False
|
|
1004
1017
|
Whether to calculate an intercept for the regression model. If set to ``False``, no intercept
|
|
1005
|
-
will be calculated.
|
|
1018
|
+
will be calculated. This parameter has no effect if ``rank_ratio = 1``, i.e., only ranking is performed.
|
|
1006
1019
|
|
|
1007
|
-
kernel : str or callable, default: '
|
|
1020
|
+
kernel : str or callable, default: 'rbf'
|
|
1008
1021
|
Kernel mapping used internally. This parameter is directly passed to
|
|
1009
1022
|
:func:`sklearn.metrics.pairwise.pairwise_kernels`.
|
|
1010
1023
|
If `kernel` is a string, it must be one of the metrics
|
|
@@ -1024,14 +1037,14 @@ class FastKernelSurvivalSVM(BaseSurvivalSVM, SurvivalAnalysisMixin):
|
|
|
1024
1037
|
the kernel; see the documentation for :mod:`sklearn.metrics.pairwise`.
|
|
1025
1038
|
Ignored by other kernels.
|
|
1026
1039
|
|
|
1027
|
-
degree : int, default: 3
|
|
1040
|
+
degree : int, optional, default: 3
|
|
1028
1041
|
Degree of the polynomial kernel. Ignored by other kernels.
|
|
1029
1042
|
|
|
1030
|
-
coef0 : float, optional
|
|
1043
|
+
coef0 : float, optional, default: 1
|
|
1031
1044
|
Zero coefficient for polynomial and sigmoid kernels.
|
|
1032
1045
|
Ignored by other kernels.
|
|
1033
1046
|
|
|
1034
|
-
kernel_params :
|
|
1047
|
+
kernel_params : dict or None, optional, default: None
|
|
1035
1048
|
Additional parameters (keyword arguments) for kernel function passed
|
|
1036
1049
|
as callable object.
|
|
1037
1050
|
|
|
@@ -1039,31 +1052,32 @@ class FastKernelSurvivalSVM(BaseSurvivalSVM, SurvivalAnalysisMixin):
|
|
|
1039
1052
|
Maximum number of iterations to perform in Newton optimization
|
|
1040
1053
|
|
|
1041
1054
|
verbose : bool, optional, default: False
|
|
1042
|
-
|
|
1055
|
+
If ``True``, print messages during optimization.
|
|
1043
1056
|
|
|
1044
1057
|
tol : float or None, optional, default: None
|
|
1045
|
-
Tolerance for termination.
|
|
1046
|
-
|
|
1058
|
+
Tolerance for termination. If ``None``, the solver's default tolerance is used.
|
|
1059
|
+
See :func:`scipy.optimize.minimize`.
|
|
1047
1060
|
|
|
1048
1061
|
optimizer : {'avltree', 'rbtree'}, optional, default: 'rbtree'
|
|
1049
|
-
|
|
1062
|
+
Specifies which optimizer to use..
|
|
1050
1063
|
|
|
1051
|
-
random_state : int
|
|
1052
|
-
|
|
1064
|
+
random_state : int, :class:`numpy.random.RandomState` instance, or None, optional, default: None
|
|
1065
|
+
Used to resolve ties in survival times. Pass an int for reproducible output across
|
|
1066
|
+
multiple :meth:`fit` calls.
|
|
1053
1067
|
|
|
1054
|
-
timeit :
|
|
1055
|
-
If non-zero
|
|
1056
|
-
|
|
1057
|
-
``optimizer_result_`` attribute.
|
|
1068
|
+
timeit : bool, int, or None, optional, default: False
|
|
1069
|
+
If ``True`` or a non-zero integer, the time taken for optimization is measured.
|
|
1070
|
+
If an integer is provided, the optimization is repeated that many times.
|
|
1071
|
+
Results can be accessed from the ``optimizer_result_`` attribute.
|
|
1058
1072
|
|
|
1059
1073
|
Attributes
|
|
1060
1074
|
----------
|
|
1061
|
-
coef_ : ndarray, shape = (n_samples,)
|
|
1075
|
+
coef_ : ndarray, shape = (n_samples,), dtype = float
|
|
1062
1076
|
Weights assigned to the samples in training data to represent
|
|
1063
1077
|
the decision function in kernel space.
|
|
1064
1078
|
|
|
1065
|
-
fit_X_ : ndarray
|
|
1066
|
-
Training data.
|
|
1079
|
+
fit_X_ : ndarray, shape = (n_samples, `n_features_in_`), dtype = float
|
|
1080
|
+
Training data used for fitting. Used to compute the kernel matrix for prediction.
|
|
1067
1081
|
|
|
1068
1082
|
optimizer_result_ : :class:`scipy.optimize.OptimizeResult`
|
|
1069
1083
|
Stats returned by the optimizer. See :class:`scipy.optimize.OptimizeResult`.
|
|
@@ -1071,7 +1085,7 @@ class FastKernelSurvivalSVM(BaseSurvivalSVM, SurvivalAnalysisMixin):
|
|
|
1071
1085
|
n_features_in_ : int
|
|
1072
1086
|
Number of features seen during ``fit``.
|
|
1073
1087
|
|
|
1074
|
-
feature_names_in_ : ndarray
|
|
1088
|
+
feature_names_in_ : ndarray, shape = (`n_features_in_`,), dtype = object
|
|
1075
1089
|
Names of features seen during ``fit``. Defined only when `X`
|
|
1076
1090
|
has feature names that are all strings.
|
|
1077
1091
|
|
|
@@ -1205,20 +1219,6 @@ class FastKernelSurvivalSVM(BaseSurvivalSVM, SurvivalAnalysisMixin):
|
|
|
1205
1219
|
return opt_result
|
|
1206
1220
|
|
|
1207
1221
|
def predict(self, X):
|
|
1208
|
-
"""Rank samples according to survival times
|
|
1209
|
-
|
|
1210
|
-
Lower ranks indicate shorter survival, higher ranks longer survival.
|
|
1211
|
-
|
|
1212
|
-
Parameters
|
|
1213
|
-
----------
|
|
1214
|
-
X : array-like, shape = (n_samples, n_features)
|
|
1215
|
-
The input samples.
|
|
1216
|
-
|
|
1217
|
-
Returns
|
|
1218
|
-
-------
|
|
1219
|
-
y : ndarray, shape = (n_samples,)
|
|
1220
|
-
Predicted ranks.
|
|
1221
|
-
"""
|
|
1222
1222
|
X = validate_data(self, X, reset=False)
|
|
1223
1223
|
kernel_mat = self._get_kernel(X, self.fit_X_)
|
|
1224
1224
|
|
|
Binary file
|