scikit-survival 0.24.0__cp313-cp313-macosx_11_0_arm64.whl → 0.25.0__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. scikit_survival-0.25.0.dist-info/METADATA +185 -0
  2. scikit_survival-0.25.0.dist-info/RECORD +58 -0
  3. {scikit_survival-0.24.0.dist-info → scikit_survival-0.25.0.dist-info}/WHEEL +2 -1
  4. sksurv/__init__.py +51 -6
  5. sksurv/base.py +12 -2
  6. sksurv/bintrees/_binarytrees.cpython-313-darwin.so +0 -0
  7. sksurv/column.py +33 -29
  8. sksurv/compare.py +22 -22
  9. sksurv/datasets/base.py +45 -20
  10. sksurv/docstrings.py +99 -0
  11. sksurv/ensemble/_coxph_loss.cpython-313-darwin.so +0 -0
  12. sksurv/ensemble/boosting.py +116 -168
  13. sksurv/ensemble/forest.py +94 -151
  14. sksurv/functions.py +29 -29
  15. sksurv/io/arffread.py +34 -3
  16. sksurv/io/arffwrite.py +38 -2
  17. sksurv/kernels/_clinical_kernel.cpython-313-darwin.so +0 -0
  18. sksurv/kernels/clinical.py +33 -13
  19. sksurv/linear_model/_coxnet.cpython-313-darwin.so +0 -0
  20. sksurv/linear_model/aft.py +14 -11
  21. sksurv/linear_model/coxnet.py +138 -89
  22. sksurv/linear_model/coxph.py +102 -83
  23. sksurv/meta/ensemble_selection.py +91 -9
  24. sksurv/meta/stacking.py +47 -26
  25. sksurv/metrics.py +257 -224
  26. sksurv/nonparametric.py +150 -81
  27. sksurv/preprocessing.py +55 -27
  28. sksurv/svm/_minlip.cpython-313-darwin.so +0 -0
  29. sksurv/svm/_prsvm.cpython-313-darwin.so +0 -0
  30. sksurv/svm/minlip.py +160 -79
  31. sksurv/svm/naive_survival_svm.py +63 -34
  32. sksurv/svm/survival_svm.py +104 -104
  33. sksurv/tree/_criterion.cpython-313-darwin.so +0 -0
  34. sksurv/tree/tree.py +170 -84
  35. sksurv/util.py +80 -26
  36. scikit_survival-0.24.0.dist-info/METADATA +0 -888
  37. scikit_survival-0.24.0.dist-info/RECORD +0 -57
  38. {scikit_survival-0.24.0.dist-info → scikit_survival-0.25.0.dist-info/licenses}/COPYING +0 -0
  39. {scikit_survival-0.24.0.dist-info → scikit_survival-0.25.0.dist-info}/top_level.txt +0 -0
@@ -83,7 +83,8 @@ class OrderStatisticTreeSurvivalCounter(Counter):
83
83
  Event indicator of samples.
84
84
 
85
85
  tree_class : type
86
- Which class to use as order statistic tree
86
+ The class to use as an order statistic tree, either
87
+ :class:`sksurv.bintrees.AVLTree` or :class:`sksurv.bintrees.RBTree`.
87
88
 
88
89
  time : array, shape = (n_samples,)
89
90
  Survival times.
@@ -374,8 +375,8 @@ class LargeScaleOptimizer(RankSVMOptimizer):
374
375
  Whether to fit an intercept. Only used if regression objective
375
376
  is optimized (rank_ratio < 1.0).
376
377
 
377
- counter : object
378
- Instance of :class:`Counter` subclass.
378
+ counter : :class:`OrderStatisticTreeSurvivalCounter` or :class:`SurvivalCounter`
379
+ An instance of a :class:`Counter` subclass used for counting comparable pairs.
379
380
 
380
381
  References
381
382
  ----------
@@ -460,7 +461,7 @@ class LargeScaleOptimizer(RankSVMOptimizer):
460
461
  l_plus, xv_plus, l_minus, xv_minus = self._counter.calculate(wf) # pylint: disable=unused-variable
461
462
  x = self._counter.x
462
463
 
463
- xw = self._xw # noqa: F841; # pylint: disable=unused-variable
464
+ xw = self._xw # pylint: disable=unused-variable; # noqa: F841
464
465
  z = numexpr.evaluate("(l_plus + l_minus) * xw - xv_plus - xv_minus - l_minus + l_plus")
465
466
 
466
467
  grad = wf + self._rank_penalty * np.dot(x.T, z)
@@ -511,8 +512,8 @@ class NonlinearLargeScaleOptimizer(RankSVMOptimizer):
511
512
  rank_ratio : float
512
513
  Trade-off between regression and ranking objectives.
513
514
 
514
- counter : object
515
- Instance of :class:`Counter` subclass.
515
+ counter : :class:`OrderStatisticTreeSurvivalCounter` or :class:`SurvivalCounter`
516
+ An instance of a :class:`Counter` subclass used for counting comparable pairs.
516
517
 
517
518
  References
518
519
  ----------
@@ -730,7 +731,29 @@ class BaseSurvivalSVM(BaseEstimator, metaclass=ABCMeta):
730
731
 
731
732
  @abstractmethod
732
733
  def predict(self, X):
733
- """Predict risk score"""
734
+ """Predict risk scores or transformed survival times.
735
+
736
+ If the model has been fit only considering the ranking objective
737
+ (``rank_ratio = 1``), predictions are risk scores (i.e. higher values
738
+ indicate an increased risk of experiencing an event). The scores
739
+ have no unit and are only meaningful to rank samples by their risk
740
+ of experiencing an event.
741
+
742
+ If the regression objective has been used (``rank_ratio < 1``),
743
+ predictions are transformed survival times.
744
+ Lower scores indicate shorter survival, higher scores longer survival.
745
+
746
+ Parameters
747
+ ----------
748
+ X : array-like, shape = (n_samples, n_features)
749
+ The input samples.
750
+
751
+ Returns
752
+ -------
753
+ y : ndarray, shape = (n_samples,), dtype=float
754
+ Risk scores (if ``rank_ratio = 1``), or transformed survival times
755
+ (if ``rank_ratio < 1``).
756
+ """
734
757
 
735
758
  def _validate_for_fit(self, X):
736
759
  return validate_data(self, X, ensure_min_samples=2)
@@ -744,9 +767,9 @@ class BaseSurvivalSVM(BaseEstimator, metaclass=ABCMeta):
744
767
  Data matrix.
745
768
 
746
769
  y : structured array, shape = (n_samples,)
747
- A structured array containing the binary event indicator
748
- as first field, and time of event or time of censoring as
749
- second field.
770
+ A structured array with two fields. The first field is a boolean
771
+ where ``True`` indicates an event and ``False`` indicates right-censoring.
772
+ The second field is a float with the time of event or time of censoring.
750
773
 
751
774
  Returns
752
775
  -------
@@ -811,80 +834,82 @@ class BaseSurvivalSVM(BaseEstimator, metaclass=ABCMeta):
811
834
 
812
835
 
813
836
  class FastSurvivalSVM(BaseSurvivalSVM, SurvivalAnalysisMixin):
814
- """Efficient Training of linear Survival Support Vector Machine
837
+ r"""Implements an efficient linear Support Vector Machine for survival analysis,
838
+ capable of optimizing both ranking and regression objectives.
815
839
 
816
- Training data consists of *n* triplets :math:`(\\mathbf{x}_i, y_i, \\delta_i)`,
817
- where :math:`\\mathbf{x}_i` is a *d*-dimensional feature vector, :math:`y_i > 0`
818
- the survival time or time of censoring, and :math:`\\delta_i \\in \\{0,1\\}`
840
+ Training data consists of *n* triplets :math:`(\mathbf{x}_i, y_i, \delta_i)`,
841
+ where :math:`\mathbf{x}_i` is a *d*-dimensional feature vector, :math:`y_i > 0`
842
+ the survival time or time of censoring, and :math:`\delta_i \in \{0,1\}`
819
843
  the binary event indicator. Using the training data, the objective is to
820
844
  minimize the following function:
821
845
 
822
846
  .. math::
823
847
 
824
- \\arg \\min_{\\mathbf{w}, b} \\frac{1}{2} \\mathbf{w}^\\top \\mathbf{w}
825
- + \\frac{\\alpha}{2} \\left[ r \\sum_{i,j \\in \\mathcal{P}}
826
- \\max(0, 1 - (\\mathbf{w}^\\top \\mathbf{x}_i - \\mathbf{w}^\\top \\mathbf{x}_j))^2
827
- + (1 - r) \\sum_{i=0}^n \\left( \\zeta_{\\mathbf{w}, b} (y_i, x_i, \\delta_i)
828
- \\right)^2 \\right]
848
+ \arg \min_{\mathbf{w}, b} \frac{1}{2} \mathbf{w}^\top \mathbf{w}
849
+ + \frac{\alpha}{2} \left[ r \sum_{i,j \in \mathcal{P}}
850
+ \max(0, 1 - (\mathbf{w}^\top \mathbf{x}_i - \mathbf{w}^\top \mathbf{x}_j))^2
851
+ + (1 - r) \sum_{i=0}^n \left( \zeta_{\mathbf{w}, b} (y_i, x_i, \delta_i)
852
+ \right)^2 \right]
829
853
 
830
- \\zeta_{\\mathbf{w},b} (y_i, \\mathbf{x}_i, \\delta_i) =
831
- \\begin{cases}
832
- \\max(0, y_i - \\mathbf{w}^\\top \\mathbf{x}_i - b) \\quad \\text{if $\\delta_i = 0$,} \\\\
833
- y_i - \\mathbf{w}^\\top \\mathbf{x}_i - b \\quad \\text{if $\\delta_i = 1$,} \\\\
834
- \\end{cases}
854
+ \zeta_{\mathbf{w},b} (y_i, \mathbf{x}_i, \delta_i) =
855
+ \begin{cases}
856
+ \max(0, y_i - \mathbf{w}^\top \mathbf{x}_i - b) \quad \text{if $\delta_i = 0$,} \\
857
+ y_i - \mathbf{w}^\top \mathbf{x}_i - b \quad \text{if $\delta_i = 1$,} \\
858
+ \end{cases}
835
859
 
836
- \\mathcal{P} = \\{ (i, j) \\mid y_i > y_j \\land \\delta_j = 1 \\}_{i,j=1,\\dots,n}
860
+ \mathcal{P} = \{ (i, j) \mid y_i > y_j \land \delta_j = 1 \}_{i,j=1,\dots,n}
837
861
 
838
- The hyper-parameter :math:`\\alpha > 0` determines the amount of regularization
862
+ The hyper-parameter :math:`\alpha > 0` determines the amount of regularization
839
863
  to apply: a smaller value increases the amount of regularization and a
840
864
  higher value reduces the amount of regularization. The hyper-parameter
841
- :math:`r \\in [0; 1]` determines the trade-off between the ranking objective
865
+ :math:`r \in [0; 1]` determines the trade-off between the ranking objective
842
866
  and the regression objective. If :math:`r = 1` it reduces to the ranking
843
867
  objective, and if :math:`r = 0` to the regression objective. If the regression
844
- objective is used, survival/censoring times are log-transform and thus cannot be
868
+ objective is used, survival/censoring times are log-transformed and thus cannot be
845
869
  zero or negative.
846
870
 
847
871
  See the :ref:`User Guide </user_guide/survival-svm.ipynb>` and [1]_ for further description.
848
872
 
849
873
  Parameters
850
874
  ----------
851
- alpha : float, positive, default: 1
852
- Weight of penalizing the squared hinge loss in the objective function
875
+ alpha : float, default: 1
876
+ Weight of penalizing the squared hinge loss in the objective function. Must be greater than 0.
853
877
 
854
878
  rank_ratio : float, optional, default: 1.0
855
- Mixing parameter between regression and ranking objective with ``0 <= rank_ratio <= 1``.
856
- If ``rank_ratio = 1``, only ranking is performed, if ``rank_ratio = 0``, only regression
857
- is performed. A non-zero value is only allowed if optimizer is one of 'avltree', 'rbtree',
858
- or 'direct-count'.
879
+ Mixing parameter between regression and ranking objectives, with ``0 <= rank_ratio <= 1``.
880
+ If ``rank_ratio = 1``, only ranking is performed. If ``rank_ratio = 0``, only regression
881
+ is performed. A ``rank_ratio`` less than 1.0 (i.e., including a regression objective) is
882
+ only supported if the ``optimizer`` is 'avltree', 'rbtree', or 'direct-count'.
859
883
 
860
- fit_intercept : boolean, optional, default: False
884
+ fit_intercept : bool, optional, default: False
861
885
  Whether to calculate an intercept for the regression model. If set to ``False``, no intercept
862
- will be calculated. Has no effect if ``rank_ratio = 1``, i.e., only ranking is performed.
886
+ will be calculated. This parameter has no effect if ``rank_ratio = 1``, i.e., only ranking is performed.
863
887
 
864
888
  max_iter : int, optional, default: 20
865
- Maximum number of iterations to perform in Newton optimization
889
+ Maximum number of iterations to perform in Newton optimization.
866
890
 
867
891
  verbose : bool, optional, default: False
868
- Whether to print messages during optimization
892
+ If ``True``, print messages during optimization.
869
893
 
870
894
  tol : float or None, optional, default: None
871
- Tolerance for termination. For detailed control, use solver-specific
872
- options.
895
+ Tolerance for termination. If ``None``, the solver's default tolerance is used.
896
+ See :func:`scipy.optimize.minimize`.
873
897
 
874
898
  optimizer : {'avltree', 'direct-count', 'PRSVM', 'rbtree', 'simple'}, optional, default: 'avltree'
875
- Which optimizer to use.
899
+ Specifies which optimizer to use.
876
900
 
877
- random_state : int or :class:`numpy.random.RandomState` instance, optional
878
- Random number generator (used to resolve ties in survival times).
901
+ random_state : int, :class:`numpy.random.RandomState` instance, or None, optional, default: None
902
+ Used to resolve ties in survival times. Pass an int for reproducible output across
903
+ multiple :meth:`fit` calls.
879
904
 
880
- timeit : False, int or None, default: None
881
- If non-zero value is provided the time it takes for optimization is measured.
882
- The given number of repetitions are performed. Results can be accessed from the
883
- ``optimizer_result_`` attribute.
905
+ timeit : bool, int, or None, optional, default: False
906
+ If ``True`` or a non-zero integer, the time taken for optimization is measured.
907
+ If an integer is provided, the optimization is repeated that many times.
908
+ Results can be accessed from the ``optimizer_result_`` attribute.
884
909
 
885
910
  Attributes
886
911
  ----------
887
- coef_ : ndarray, shape = (n_features,)
912
+ coef_ : ndarray, shape = (n_features,), dtype = float
888
913
  Coefficients of the features in the decision function.
889
914
 
890
915
  optimizer_result_ : :class:`scipy.optimize.OptimizeResult`
@@ -893,7 +918,7 @@ class FastSurvivalSVM(BaseSurvivalSVM, SurvivalAnalysisMixin):
893
918
  n_features_in_ : int
894
919
  Number of features seen during ``fit``.
895
920
 
896
- feature_names_in_ : ndarray of shape (`n_features_in_`,)
921
+ feature_names_in_ : ndarray, shape = (`n_features_in_`,), dtype = object
897
922
  Names of features seen during ``fit``. Defined only when `X`
898
923
  has feature names that are all strings.
899
924
 
@@ -953,20 +978,6 @@ class FastSurvivalSVM(BaseSurvivalSVM, SurvivalAnalysisMixin):
953
978
  return opt_result
954
979
 
955
980
  def predict(self, X):
956
- """Rank samples according to survival times
957
-
958
- Lower ranks indicate shorter survival, higher ranks longer survival.
959
-
960
- Parameters
961
- ----------
962
- X : array-like, shape = (n_samples, n_features)
963
- The input samples.
964
-
965
- Returns
966
- -------
967
- y : ndarray, shape = (n_samples,)
968
- Predicted ranks.
969
- """
970
981
  check_is_fitted(self, "coef_")
971
982
  X = validate_data(self, X, reset=False)
972
983
 
@@ -985,26 +996,28 @@ class FastSurvivalSVM(BaseSurvivalSVM, SurvivalAnalysisMixin):
985
996
 
986
997
 
987
998
  class FastKernelSurvivalSVM(BaseSurvivalSVM, SurvivalAnalysisMixin):
988
- """Efficient Training of kernel Survival Support Vector Machine.
999
+ """Implements an efficient kernel Support Vector Machine for survival analysis.
1000
+
1001
+ The model extends :class:`FastSurvivalSVM` to non-linear relationships through kernel functions.
989
1002
 
990
1003
  See the :ref:`User Guide </user_guide/survival-svm.ipynb>` and [1]_ for further description.
991
1004
 
992
1005
  Parameters
993
1006
  ----------
994
- alpha : float, positive, default: 1
995
- Weight of penalizing the squared hinge loss in the objective function
1007
+ alpha : float, default: 1
1008
+ Weight of penalizing the squared hinge loss in the objective function. Must be greater than 0.
996
1009
 
997
1010
  rank_ratio : float, optional, default: 1.0
998
- Mixing parameter between regression and ranking objective with ``0 <= rank_ratio <= 1``.
999
- If ``rank_ratio = 1``, only ranking is performed, if ``rank_ratio = 0``, only regression
1000
- is performed. A non-zero value is only allowed if optimizer is one of 'avltree', 'PRSVM',
1001
- or 'rbtree'.
1011
+ Mixing parameter between regression and ranking objectives, with ``0 <= rank_ratio <= 1``.
1012
+ If ``rank_ratio = 1``, only ranking is performed. If ``rank_ratio = 0``, only regression
1013
+ is performed. A ``rank_ratio`` less than 1.0 (i.e., including a regression objective) is
1014
+ only supported if the ``optimizer`` is 'avltree', 'PRSVM', or 'rbtree'.
1002
1015
 
1003
- fit_intercept : boolean, optional, default: False
1016
+ fit_intercept : bool, optional, default: False
1004
1017
  Whether to calculate an intercept for the regression model. If set to ``False``, no intercept
1005
- will be calculated. Has no effect if ``rank_ratio = 1``, i.e., only ranking is performed.
1018
+ will be calculated. This parameter has no effect if ``rank_ratio = 1``, i.e., only ranking is performed.
1006
1019
 
1007
- kernel : str or callable, default: 'linear'.
1020
+ kernel : str or callable, default: 'rbf'
1008
1021
  Kernel mapping used internally. This parameter is directly passed to
1009
1022
  :func:`sklearn.metrics.pairwise.pairwise_kernels`.
1010
1023
  If `kernel` is a string, it must be one of the metrics
@@ -1024,14 +1037,14 @@ class FastKernelSurvivalSVM(BaseSurvivalSVM, SurvivalAnalysisMixin):
1024
1037
  the kernel; see the documentation for :mod:`sklearn.metrics.pairwise`.
1025
1038
  Ignored by other kernels.
1026
1039
 
1027
- degree : int, default: 3
1040
+ degree : int, optional, default: 3
1028
1041
  Degree of the polynomial kernel. Ignored by other kernels.
1029
1042
 
1030
- coef0 : float, optional
1043
+ coef0 : float, optional, default: 1
1031
1044
  Zero coefficient for polynomial and sigmoid kernels.
1032
1045
  Ignored by other kernels.
1033
1046
 
1034
- kernel_params : mapping of string to any, optional
1047
+ kernel_params : dict or None, optional, default: None
1035
1048
  Additional parameters (keyword arguments) for kernel function passed
1036
1049
  as callable object.
1037
1050
 
@@ -1039,31 +1052,32 @@ class FastKernelSurvivalSVM(BaseSurvivalSVM, SurvivalAnalysisMixin):
1039
1052
  Maximum number of iterations to perform in Newton optimization
1040
1053
 
1041
1054
  verbose : bool, optional, default: False
1042
- Whether to print messages during optimization
1055
+ If ``True``, print messages during optimization.
1043
1056
 
1044
1057
  tol : float or None, optional, default: None
1045
- Tolerance for termination. For detailed control, use solver-specific
1046
- options.
1058
+ Tolerance for termination. If ``None``, the solver's default tolerance is used.
1059
+ See :func:`scipy.optimize.minimize`.
1047
1060
 
1048
1061
  optimizer : {'avltree', 'rbtree'}, optional, default: 'rbtree'
1049
- Which optimizer to use.
1062
+ Specifies which optimizer to use..
1050
1063
 
1051
- random_state : int or :class:`numpy.random.RandomState` instance, optional
1052
- Random number generator (used to resolve ties in survival times).
1064
+ random_state : int, :class:`numpy.random.RandomState` instance, or None, optional, default: None
1065
+ Used to resolve ties in survival times. Pass an int for reproducible output across
1066
+ multiple :meth:`fit` calls.
1053
1067
 
1054
- timeit : False, int or None, default: None
1055
- If non-zero value is provided the time it takes for optimization is measured.
1056
- The given number of repetitions are performed. Results can be accessed from the
1057
- ``optimizer_result_`` attribute.
1068
+ timeit : bool, int, or None, optional, default: False
1069
+ If ``True`` or a non-zero integer, the time taken for optimization is measured.
1070
+ If an integer is provided, the optimization is repeated that many times.
1071
+ Results can be accessed from the ``optimizer_result_`` attribute.
1058
1072
 
1059
1073
  Attributes
1060
1074
  ----------
1061
- coef_ : ndarray, shape = (n_samples,)
1075
+ coef_ : ndarray, shape = (n_samples,), dtype = float
1062
1076
  Weights assigned to the samples in training data to represent
1063
1077
  the decision function in kernel space.
1064
1078
 
1065
- fit_X_ : ndarray
1066
- Training data.
1079
+ fit_X_ : ndarray, shape = (n_samples, `n_features_in_`), dtype = float
1080
+ Training data used for fitting. Used to compute the kernel matrix for prediction.
1067
1081
 
1068
1082
  optimizer_result_ : :class:`scipy.optimize.OptimizeResult`
1069
1083
  Stats returned by the optimizer. See :class:`scipy.optimize.OptimizeResult`.
@@ -1071,7 +1085,7 @@ class FastKernelSurvivalSVM(BaseSurvivalSVM, SurvivalAnalysisMixin):
1071
1085
  n_features_in_ : int
1072
1086
  Number of features seen during ``fit``.
1073
1087
 
1074
- feature_names_in_ : ndarray of shape (`n_features_in_`,)
1088
+ feature_names_in_ : ndarray, shape = (`n_features_in_`,), dtype = object
1075
1089
  Names of features seen during ``fit``. Defined only when `X`
1076
1090
  has feature names that are all strings.
1077
1091
 
@@ -1205,20 +1219,6 @@ class FastKernelSurvivalSVM(BaseSurvivalSVM, SurvivalAnalysisMixin):
1205
1219
  return opt_result
1206
1220
 
1207
1221
  def predict(self, X):
1208
- """Rank samples according to survival times
1209
-
1210
- Lower ranks indicate shorter survival, higher ranks longer survival.
1211
-
1212
- Parameters
1213
- ----------
1214
- X : array-like, shape = (n_samples, n_features)
1215
- The input samples.
1216
-
1217
- Returns
1218
- -------
1219
- y : ndarray, shape = (n_samples,)
1220
- Predicted ranks.
1221
- """
1222
1222
  X = validate_data(self, X, reset=False)
1223
1223
  kernel_mat = self._get_kernel(X, self.fit_X_)
1224
1224