PyPI - scikit-survival - Versions diffs - 0.24.1__cp311-cp311-win_amd64.whl → 0.25.0__cp311-cp311-win_amd64.whl - Mend

scikit-survival 0.24.1__cp311-cp311-win_amd64.whl → 0.25.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

scikit_survival-0.25.0.dist-info/METADATA +185 -0
scikit_survival-0.25.0.dist-info/RECORD +58 -0
{scikit_survival-0.24.1.dist-info → scikit_survival-0.25.0.dist-info}/WHEEL +1 -1
sksurv/__init__.py +51 -6
sksurv/base.py +12 -2
sksurv/bintrees/_binarytrees.cp311-win_amd64.pyd +0 -0
sksurv/column.py +33 -29
sksurv/compare.py +22 -22
sksurv/datasets/base.py +45 -20
sksurv/docstrings.py +99 -0
sksurv/ensemble/_coxph_loss.cp311-win_amd64.pyd +0 -0
sksurv/ensemble/boosting.py +116 -168
sksurv/ensemble/forest.py +94 -151
sksurv/functions.py +29 -29
sksurv/io/arffread.py +34 -3
sksurv/io/arffwrite.py +38 -2
sksurv/kernels/_clinical_kernel.cp311-win_amd64.pyd +0 -0
sksurv/kernels/clinical.py +33 -13
sksurv/linear_model/_coxnet.cp311-win_amd64.pyd +0 -0
sksurv/linear_model/aft.py +14 -11
sksurv/linear_model/coxnet.py +138 -89
sksurv/linear_model/coxph.py +102 -83
sksurv/meta/ensemble_selection.py +91 -9
sksurv/meta/stacking.py +47 -26
sksurv/metrics.py +257 -224
sksurv/nonparametric.py +150 -81
sksurv/preprocessing.py +55 -27
sksurv/svm/_minlip.cp311-win_amd64.pyd +0 -0
sksurv/svm/_prsvm.cp311-win_amd64.pyd +0 -0
sksurv/svm/minlip.py +160 -79
sksurv/svm/naive_survival_svm.py +63 -34
sksurv/svm/survival_svm.py +103 -103
sksurv/tree/_criterion.cp311-win_amd64.pyd +0 -0
sksurv/tree/tree.py +170 -84
sksurv/util.py +80 -26
scikit_survival-0.24.1.dist-info/METADATA +0 -889
scikit_survival-0.24.1.dist-info/RECORD +0 -57
{scikit_survival-0.24.1.dist-info → scikit_survival-0.25.0.dist-info}/licenses/COPYING +0 -0
{scikit_survival-0.24.1.dist-info → scikit_survival-0.25.0.dist-info}/top_level.txt +0 -0

sksurv/linear_model/coxph.py CHANGED Viewed

@@ -21,6 +21,7 @@ from sklearn.utils._param_validation import Interval, StrOptions
 from sklearn.utils.validation import check_array, check_is_fitted, validate_data
 from ..base import SurvivalAnalysisMixin
+from ..docstrings import append_cumulative_hazard_example, append_survival_function_example
 from ..functions import StepFunction
 from ..nonparametric import _compute_counts
 from ..util import check_array_survival
@@ -29,17 +30,21 @@ __all__ = ["CoxPHSurvivalAnalysis"]
 class BreslowEstimator:
-    """Breslow's estimator of the cumulative hazard function.
+    """Breslow's non-parametric estimator for the cumulative baseline hazard.
+    This class is used by :class:`CoxPHSurvivalAnalysis` to estimate the
+    cumulative baseline hazard and baseline survival function after the
+    coefficients of the Cox model have been fitted.
     Attributes
     ----------
     cum_baseline_hazard_ : :class:`sksurv.functions.StepFunction`
-        Cumulative baseline hazard function.
+        Estimated cumulative baseline hazard function.
     baseline_survival_ : :class:`sksurv.functions.StepFunction`
-        Baseline survival function.
+        Estimated baseline survival function.
-    unique_times_ : ndarray
+    unique_times_ : ndarray, shape=(n_unique_times,)
         Unique event times.
     """
@@ -126,7 +131,29 @@ class BreslowEstimator:
 class CoxPHOptimizer:
-    """Negative partial log-likelihood of Cox proportional hazards model"""
+    """Helper class for fitting the Cox proportional hazards model.
+    This class computes the negative log-likelihood, its gradient, and the
+    Hessian matrix for the Cox model. It is used internally by
+    :class:`CoxPHSurvivalAnalysis`.
+    Parameters
+    ----------
+    X : ndarray, shape=(n_samples, n_features)
+        The feature matrix.
+    event : ndarray, shape=(n_samples,)
+        The event indicator.
+    time : ndarray, shape=(n_samples,)
+        The event/censoring times.
+    alpha : ndarray, shape=(n_features,)
+        The regularization parameters.
+    ties : {'breslow', 'efron'}
+        The method to handle tied event times.
+    """
     def __init__(self, X, event, time, alpha, ties):
         # sort descending
@@ -270,6 +297,17 @@ class CoxPHOptimizer:
 class VerboseReporter:
+    """Helper class to report optimization progress.
+    This class is used by :class:`CoxPHSurvivalAnalysis` to print
+    optimization progress depending on the verbosity level.
+    Parameters
+    ----------
+    verbose : int
+        The verbosity level.
+    """
     def __init__(self, verbose):
         self.verbose = verbose
@@ -293,20 +331,25 @@ class VerboseReporter:
 class CoxPHSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
-    """Cox proportional hazards model.
+    """The Cox proportional hazards model, also known as Cox regression.
+    This model is a semi-parametric model that can be used to model the
+    relationship between a set of features and the time to an event.
+    The model is fitted by maximizing the partial likelihood
+    using Newton-Raphson optimization.
     There are two possible choices for handling tied event times.
     The default is Breslow's method, which considers each of the
     events at a given time as distinct. Efron's method is more
     accurate if there are a large number of ties. When the number
     of ties is small, the estimated coefficients by Breslow's and
-    Efron's method are quite close. Uses Newton-Raphson optimization.
+    Efron's method are quite close.
     See [1]_, [2]_, [3]_ for further description.
     Parameters
     ----------
-    alpha : float, ndarray of shape (n_features,), optional, default: 0
+    alpha : float or ndarray, shape = (n_features,), optional, default: 0
         Regularization parameter for ridge regression penalty.
         If a single float, the same penalty is used for all features.
         If an array, there must be one penalty for each feature.
@@ -318,7 +361,7 @@ class CoxPHSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
         no tied event times all the methods are equivalent.
     n_iter : int, optional, default: 100
-        Maximum number of iterations.
+        The maximum number of iterations taken for the solver to converge.
     tol : float, optional, default: 1e-9
         Convergence criteria. Convergence is based on the negative log-likelihood::
@@ -332,7 +375,7 @@ class CoxPHSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
     Attributes
     ----------
     coef_ : ndarray, shape = (n_features,)
-        Coefficients of the model
+        Coefficients of the model.
     cum_baseline_hazard_ : :class:`sksurv.functions.StepFunction`
         Estimated baseline cumulative hazard function.
@@ -343,11 +386,11 @@ class CoxPHSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
     n_features_in_ : int
         Number of features seen during ``fit``.
-    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+    feature_names_in_ : ndarray, shape = (`n_features_in_`,)
         Names of features seen during ``fit``. Defined only when `X`
         has feature names that are all strings.
-    unique_times_ : array of shape = (n_unique_times,)
+    unique_times_ : ndarray, shape = (n_unique_times,)
         Unique time points.
     See also
@@ -395,7 +438,7 @@ class CoxPHSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
         return self._baseline_model.unique_times_
     def fit(self, X, y):
-        """Minimize negative partial log-likelihood for provided data.
+        """Fit the model to the given data.
         Parameters
         ----------
@@ -403,9 +446,9 @@ class CoxPHSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
             Data matrix
         y : structured array, shape = (n_samples,)
-            A structured array containing the binary event indicator
-            as first field, and time of event or time of censoring as
-            second field.
+            A structured array with two fields. The first field is a boolean
+            where ``True`` indicates an event and ``False`` indicates right-censoring.
+            The second field is a float with the time of event or time of censoring.
         Returns
         -------
@@ -482,6 +525,11 @@ class CoxPHSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
     def predict(self, X):
         """Predict risk scores.
+        The risk score is the linear predictor of the model,
+        computed as the dot product of the input features `X` and the
+        estimated coefficients `coef_`. A higher score indicates a
+        higher risk of experiencing the event.
         Parameters
         ----------
         X : array-like, shape = (n_samples, n_features)
@@ -498,15 +546,16 @@ class CoxPHSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
         return np.dot(X, self.coef_)
+    @append_cumulative_hazard_example(estimator_mod="linear_model", estimator_class="CoxPHSurvivalAnalysis")
     def predict_cumulative_hazard_function(self, X, return_array=False):
-        """Predict cumulative hazard function.
+        r"""Predict cumulative hazard function.
         The cumulative hazard function for an individual
         with feature vector :math:`x` is defined as
         .. math::
-            H(t \\mid x) = \\exp(x^\\top \\beta) H_0(t) ,
+            H(t \mid x) = \exp(x^\top \beta) H_0(t) ,
         where :math:`H_0(t)` is the baseline hazard function,
         estimated by Breslow's estimator.
@@ -516,56 +565,42 @@ class CoxPHSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
         X : array-like, shape = (n_samples, n_features)
             Data matrix.
-        return_array : boolean
-            If set, return an array with the cumulative hazard rate
-            for each `self.unique_times_`, otherwise an array of
-            :class:`sksurv.functions.StepFunction`.
+        return_array : bool, default: False
+            Whether to return a single array of cumulative hazard values
+            or a list of step functions.
+            If `False`, a list of :class:`sksurv.functions.StepFunction`
+            objects is returned.
+            If `True`, a 2d-array of shape `(n_samples, n_unique_times)` is
+            returned, where `n_unique_times` is the number of unique
+            event times in the training data. Each row represents the cumulative
+            hazard function of an individual evaluated at `unique_times_`.
         Returns
         -------
         cum_hazard : ndarray
-            If `return_array` is set, an array with the cumulative hazard rate
-            for each `self.unique_times_`, otherwise an array of length `n_samples`
-            of :class:`sksurv.functions.StepFunction` instances will be returned.
+            If `return_array` is `False`, an array of `n_samples`
+            :class:`sksurv.functions.StepFunction` instances is returned.
+            If `return_array` is `True`, a numeric array of shape
+            `(n_samples, n_unique_times_)` is returned.
         Examples
         --------
-        >>> import matplotlib.pyplot as plt
-        >>> from sksurv.datasets import load_whas500
-        >>> from sksurv.linear_model import CoxPHSurvivalAnalysis
-        Load the data.
-        >>> X, y = load_whas500()
-        >>> X = X.astype(float)
-        Fit the model.
-        >>> estimator = CoxPHSurvivalAnalysis().fit(X, y)
-        Estimate the cumulative hazard function for the first 10 samples.
-        >>> chf_funcs = estimator.predict_cumulative_hazard_function(X.iloc[:10])
-        Plot the estimated cumulative hazard functions.
-        >>> for fn in chf_funcs:
-        ...     plt.step(fn.x, fn(fn.x), where="post")
-        ...
-        >>> plt.ylim(0, 1)
-        >>> plt.show()
         """
         return self._predict_cumulative_hazard_function(self._baseline_model, self.predict(X), return_array)
+    @append_survival_function_example(estimator_mod="linear_model", estimator_class="CoxPHSurvivalAnalysis")
     def predict_survival_function(self, X, return_array=False):
-        """Predict survival function.
+        r"""Predict survival function.
         The survival function for an individual
         with feature vector :math:`x` is defined as
         .. math::
-            S(t \\mid x) = S_0(t)^{\\exp(x^\\top \\beta)} ,
+            S(t \mid x) = S_0(t)^{\exp(x^\top \beta)} ,
         where :math:`S_0(t)` is the baseline survival function,
         estimated by Breslow's estimator.
@@ -575,44 +610,28 @@ class CoxPHSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
         X : array-like, shape = (n_samples, n_features)
             Data matrix.
-        return_array : boolean, default: False
-            If set, return an array with the probability
-            of survival for each `self.unique_times_`,
-            otherwise an array of :class:`sksurv.functions.StepFunction`.
+        return_array : bool, default: False
+            Whether to return a single array of survival probabilities
+            or a list of step functions.
+            If `False`, a list of :class:`sksurv.functions.StepFunction`
+            objects is returned.
+            If `True`, a 2d-array of shape `(n_samples, n_unique_times)` is
+            returned, where `n_unique_times` is the number of unique
+            event times in the training data. Each row represents the survival
+            function of an individual evaluated at `unique_times_`.
         Returns
         -------
         survival : ndarray
-            If `return_array` is set, an array with the probability of
-            survival for each `self.unique_times_`, otherwise an array of
-            length `n_samples` of :class:`sksurv.functions.StepFunction`
-            instances will be returned.
+            If `return_array` is `False`, an array of `n_samples`
+            :class:`sksurv.functions.StepFunction` instances is returned.
+            If `return_array` is `True`, a numeric array of shape
+            `(n_samples, n_unique_times_)` is returned.
         Examples
         --------
-        >>> import matplotlib.pyplot as plt
-        >>> from sksurv.datasets import load_whas500
-        >>> from sksurv.linear_model import CoxPHSurvivalAnalysis
-        Load the data.
-        >>> X, y = load_whas500()
-        >>> X = X.astype(float)
-        Fit the model.
-        >>> estimator = CoxPHSurvivalAnalysis().fit(X, y)
-        Estimate the survival function for the first 10 samples.
-        >>> surv_funcs = estimator.predict_survival_function(X.iloc[:10])
-        Plot the estimated survival functions.
-        >>> for fn in surv_funcs:
-        ...     plt.step(fn.x, fn(fn.x), where="post")
-        ...
-        >>> plt.ylim(0, 1)
-        >>> plt.show()
         """
         return self._predict_survival_function(self._baseline_model, self.predict(X), return_array)

sksurv/meta/ensemble_selection.py CHANGED Viewed

@@ -37,18 +37,55 @@ def _corr_kendalltau(X):
 class EnsembleAverage(BaseEstimator):
+    """A meta-estimator that averages the predictions of base estimators.
+    This estimator is for internal use by :class:`BaseEnsembleSelection`.
+    It takes a list of estimators that have already been fitted and
+    averages their predictions.
+    Parameters
+    ----------
+    base_estimators : list of estimators
+        The base estimators to average. The estimators must be fitted.
+    name : str, optional, default: None
+        The name of the ensemble.
+    """
     def __init__(self, base_estimators, name=None):
         self.base_estimators = base_estimators
         self.name = name
         assert not hasattr(self.base_estimators[0], "classes_"), "base estimator cannot be a classifier"
     def get_base_params(self):
+        """Get parameters for this estimator's first base estimator.
+        Returns
+        -------
+        params : dict
+            Parameter names mapped to their values.
+        """
         return self.base_estimators[0].get_params()
     def fit(self, X, y=None, **kwargs):  # pragma: no cover; # pylint: disable=unused-argument
         return self
     def predict(self, X):
+        """Predict using the ensemble of estimators.
+        The prediction is the average of the predictions of all base
+        estimators.
+        Parameters
+        ----------
+        X : array-like, shape = (n_samples, n_features)
+            Data to predict on.
+        Returns
+        -------
+        y_pred : ndarray, shape = (n_samples,)
+            The predicted values.
+        """
         prediction = np.zeros(X.shape[0])
         for est in self.base_estimators:
             prediction += est.predict(X)
@@ -57,18 +94,59 @@ class EnsembleAverage(BaseEstimator):
 class MeanEstimator(BaseEstimator):
+    """A meta-estimator that averages predictions.
+    This estimator computes the mean of an array along its last axis.
+    It is intended to be used as a ``meta_estimator`` in an ensemble model,
+    where it averages the predictions of the base estimators.
+    """
     def fit(self, X, y=None, **kwargs):  # pragma: no cover; # pylint: disable=unused-argument
         return self
     def predict(self, X):  # pylint: disable=no-self-use
+        """Return the mean of an array along its last axis.
+        Parameters
+        ----------
+        X : array-like, shape = (n_samples, n_estimators)
+            The predictions of base estimators.
+        Returns
+        -------
+        y_pred : ndarray, shape = (n_samples,)
+            The averaged predictions.
+        """
         return X.mean(axis=X.ndim - 1)
 class MeanRankEstimator(BaseEstimator):
+    """A meta-estimator that averages the ranks of predictions of base estimators.
+    This estimator first converts the predictions of each base estimator
+    into ranks and then averages the ranks. It is intended to be used as
+    a ``meta_estimator`` in an ensemble model.
+    """
     def fit(self, X, y=None, **kwargs):  # pragma: no cover; # pylint: disable=unused-argument
         return self
     def predict(self, X):  # pylint: disable=no-self-use
+        """Return the mean of ranks.
+        The predictions of each base estimator are first converted into
+        ranks and then averaged.
+        Parameters
+        ----------
+        X : array-like, shape = (n_samples, n_estimators)
+            The predictions of base estimators.
+        Returns
+        -------
+        y_pred : ndarray, shape = (n_samples,)
+            The averaged ranks.
+        """
         # convert predictions of individual models into ranks
         ranks = np.apply_along_axis(rankdata, 0, X)
         # average predicted ranks
@@ -134,6 +212,7 @@ class BaseEnsembleSelection(Stacking):
         self._extra_params.extend(["scorer", "n_estimators", "min_score", "min_correlation", "cv", "n_jobs", "verbose"])
     def __len__(self):
+        """Return the number of fitted models."""
         if hasattr(self, "fitted_models_"):
             return len(self.fitted_models_)
         return 0
@@ -300,16 +379,19 @@ class BaseEnsembleSelection(Stacking):
         raise NotImplementedError()
     def fit(self, X, y=None, **fit_params):
-        """Fit ensemble of models
+        """Fit ensemble of models.
         Parameters
         ----------
         X : array-like, shape = (n_samples, n_features)
             Training data.
-        y : array-like, optional
+        y : array-like, shape = (n_samples,), optional
             Target data if base estimators are supervised.
+        **fit_params : dict
+            Parameters passed to the ``fit`` method of each base estimator.
         Returns
         -------
         self
@@ -347,7 +429,7 @@ class EnsembleSelection(BaseEnsembleSelection):
         If a float, the percentage of estimators in the ensemble to retain, if an int the
         absolute number of estimators to retain.
-    min_score : float, optional, default: 0.66
+    min_score : float, optional, default: 0.2
         Threshold for pruning estimators based on scoring metric. After `fit`, only estimators
         with a score above `min_score` are retained.
@@ -379,7 +461,7 @@ class EnsembleSelection(BaseEnsembleSelection):
     n_features_in_ : int
         Number of features seen during ``fit``.
-    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+    feature_names_in_ : ndarray, shape = (`n_features_in_`,)
         Names of features seen during ``fit``. Defined only when `X`
         has feature names that are all strings.
@@ -473,14 +555,14 @@ class EnsembleSelection(BaseEnsembleSelection):
 class EnsembleSelectionRegressor(BaseEnsembleSelection):
-    """Ensemble selection for regression that accounts for the accuracy and correlation of errors.
+    r"""Ensemble selection for regression that accounts for the accuracy and correlation of errors.
     The ensemble is pruned during training according to estimators' accuracy and the correlation
     between prediction errors per sample. The accuracy of the *i*-th estimator defined as
-    :math:`\\frac{ \\min_{i=1,\\ldots, n}(error_i) }{ error_i }`.
+    :math:`\frac{ \min_{i=1,\ldots, n}(error_i) }{ error_i }`.
     In addition to the accuracy, models are selected based on the correlation between residuals
     of different models (diversity). The diversity of the *i*-th estimator is defined as
-    :math:`\\frac{n-count}{n}`, where *count* is the number of estimators for whom the correlation
+    :math:`\frac{n-count}{n}`, where *count* is the number of estimators for whom the correlation
     of residuals exceeds `min_correlation`.
     The hillclimbing is based on cross-validation to avoid having to create a separate validation set.
@@ -504,7 +586,7 @@ class EnsembleSelectionRegressor(BaseEnsembleSelection):
     min_score : float, optional, default: 0.66
         Threshold for pruning estimators based on scoring metric. After `fit`, only estimators
-        with a accuracy above `min_score` are retained.
+        with an accuracy above `min_score` are retained.
     min_correlation : float, optional, default: 0.6
         Threshold for Pearson's correlation coefficient that determines when residuals of
@@ -534,7 +616,7 @@ class EnsembleSelectionRegressor(BaseEnsembleSelection):
     n_features_in_ : int
         Number of features seen during ``fit``.
-    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+    feature_names_in_ : ndarray, shape = (`n_features_in_`,)
         Names of features seen during ``fit``. Defined only when `X`
         has feature names that are all strings.