PyPI - scikit-survival - Versions diffs - 0.24.1__cp313-cp313-macosx_11_0_arm64.whl → 0.26.0__cp313-cp313-macosx_11_0_arm64.whl - Mend

scikit-survival 0.24.1__cp313-cp313-macosx_11_0_arm64.whl → 0.26.0__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

scikit_survival-0.26.0.dist-info/METADATA +185 -0
scikit_survival-0.26.0.dist-info/RECORD +58 -0
{scikit_survival-0.24.1.dist-info → scikit_survival-0.26.0.dist-info}/WHEEL +1 -1
sksurv/__init__.py +51 -6
sksurv/base.py +12 -2
sksurv/bintrees/_binarytrees.cpython-313-darwin.so +0 -0
sksurv/column.py +38 -35
sksurv/compare.py +23 -23
sksurv/datasets/base.py +52 -27
sksurv/docstrings.py +99 -0
sksurv/ensemble/_coxph_loss.cpython-313-darwin.so +0 -0
sksurv/ensemble/boosting.py +116 -168
sksurv/ensemble/forest.py +94 -151
sksurv/functions.py +29 -29
sksurv/io/arffread.py +37 -4
sksurv/io/arffwrite.py +41 -5
sksurv/kernels/_clinical_kernel.cpython-313-darwin.so +0 -0
sksurv/kernels/clinical.py +36 -16
sksurv/linear_model/_coxnet.cpython-313-darwin.so +0 -0
sksurv/linear_model/aft.py +14 -11
sksurv/linear_model/coxnet.py +138 -89
sksurv/linear_model/coxph.py +102 -83
sksurv/meta/ensemble_selection.py +91 -9
sksurv/meta/stacking.py +47 -26
sksurv/metrics.py +257 -224
sksurv/nonparametric.py +150 -81
sksurv/preprocessing.py +74 -34
sksurv/svm/_minlip.cpython-313-darwin.so +0 -0
sksurv/svm/_prsvm.cpython-313-darwin.so +0 -0
sksurv/svm/minlip.py +171 -85
sksurv/svm/naive_survival_svm.py +63 -34
sksurv/svm/survival_svm.py +103 -103
sksurv/testing.py +47 -0
sksurv/tree/_criterion.cpython-313-darwin.so +0 -0
sksurv/tree/tree.py +170 -84
sksurv/util.py +85 -30
scikit_survival-0.24.1.dist-info/METADATA +0 -889
scikit_survival-0.24.1.dist-info/RECORD +0 -57
{scikit_survival-0.24.1.dist-info → scikit_survival-0.26.0.dist-info}/licenses/COPYING +0 -0
{scikit_survival-0.24.1.dist-info → scikit_survival-0.26.0.dist-info}/top_level.txt +0 -0

sksurv/datasets/base.py CHANGED Viewed

@@ -36,10 +36,10 @@ def _get_x_y_survival(dataset, col_event, col_time, val_outcome, competing_risks
         event_type = np.int64 if competing_risks else bool
         y = np.empty(dtype=[(col_event, event_type), (col_time, np.float64)], shape=dataset.shape[0])
         if competing_risks:
-            y[col_event] = dataset[col_event].values
+            y[col_event] = dataset[col_event].to_numpy()
         else:
-            y[col_event] = (dataset[col_event] == val_outcome).values
-        y[col_time] = dataset[col_time].values
+            y[col_event] = (dataset[col_event] == val_outcome).to_numpy()
+        y[col_time] = dataset[col_time].to_numpy()
         x_frame = dataset.drop([col_event, col_time], axis=1)
@@ -82,18 +82,23 @@ def get_x_y(data_frame, attr_labels, pos_label=None, survival=True, competing_ri
         Whether to return `y` that can be used for survival analysis.
     competing_risks : bool, optional, default: False
-        Whether `y` refers to competing risks situation. Only used if `survival` is True
+        Whether `y` refers to competing risks situation. Only used if `survival` is `True`.
     Returns
     -------
     X : pandas.DataFrame, shape = (n_samples, n_columns - len(attr_labels))
         Data frame containing features.
-    y : None or pandas.DataFrame, shape = (n_samples, len(attr_labels))
-        Data frame containing columns with supervised information.
-        If `survival` was `True`, then the column denoting the event
-        indicator will be boolean and survival times will be float.
-        If `attr_labels` contains `None`, y is set to `None`.
+    y : structured array, shape = (n_samples,), or pandas.DataFrame, shape = (n_samples, len(attr_labels)), or None
+        If `survival` is `True`, a structured array with two fields.
+        The first field is a boolean where ``True`` indicates an event and ``False``
+        indicates right-censoring. The second field is a float with the time of
+        event or time of censoring.
+        If `survival` is `False` and `attr_labels` not `None`, a :class:`pandas.DataFrame`
+        with columns specified by `attr_labels`.
+        If `survival` is `False` and `attr_labels` is `None`, `y` is set to `None`.
     """
     if survival:
         if len(attr_labels) != 2:
@@ -111,7 +116,7 @@ def _loadarff_with_index(filename):
         if isinstance(dataset["index"].dtype, CategoricalDtype):
             # concatenating categorical index may raise TypeError
             # see https://github.com/pandas-dev/pandas/issues/14586
-            dataset["index"] = dataset["index"].astype(object)
+            dataset = dataset.astype({"index": "str"})
         dataset.set_index("index", inplace=True)
     return dataset
@@ -154,7 +159,7 @@ def load_arff_files_standardized(
         Whether to standardize data to zero mean and unit variance.
         See :func:`sksurv.column.standardize`.
-    to_numeric : boo, optional, default: True
+    to_numeric : bool, optional, default: True
         Whether to convert categorical variables to numeric values.
         See :func:`sksurv.column.categorical_to_numeric`.
@@ -163,14 +168,34 @@ def load_arff_files_standardized(
     x_train : pandas.DataFrame, shape = (n_train, n_features)
         Training data.
-    y_train : pandas.DataFrame, shape = (n_train, n_labels)
+    y_train : structured array, shape = (n_train,), or pandas.DataFrame, shape = (n_train, len(attr_labels))
         Dependent variables of training data.
-    x_test : None or pandas.DataFrame, shape = (n_train, n_features)
+        If `survival` is `True`, a structured array with two fields.
+        The first field is a boolean where ``True`` indicates an event and ``False``
+        indicates right-censoring. The second field is a float with the time of
+        event or time of censoring.
+        If `survival` is `False` and `attr_labels` not `None`, a :class:`pandas.DataFrame`
+        with columns specified by `attr_labels`.
+        If `survival` is `False` and `attr_labels` is `None`, `y_train` is set to `None`.
+    x_test : None or pandas.DataFrame, shape = (n_test, n_features)
         Testing data if `path_testing` was provided.
-    y_test : None or pandas.DataFrame, shape = (n_train, n_labels)
+    y_test : None or structured array, shape = (n_test,)
         Dependent variables of testing data if `path_testing` was provided.
+        If `survival` is `True`, a structured array with two fields.
+        The first field is a boolean where ``True`` indicates an event and ``False``
+        indicates right-censoring. The second field is a float with the time of
+        event or time of censoring.
+        If `survival` is `False` and `attr_labels` not `None`, a :class:`pandas.DataFrame`
+        with columns specified by `attr_labels`.
+        If `survival` is `False` and `attr_labels` is `None`, `y_test` is set to `None`.
     """
     dataset = _loadarff_with_index(path_training)
@@ -237,7 +262,7 @@ def load_whas500():
     y : structured array with 2 fields
         *fstat*: boolean indicating whether the endpoint has been reached
-        or the event time is right censored.
+        or the event time is right-censored.
         *lenfol*: total length of follow-up (days from hospital admission date
         to date of last follow-up)
@@ -269,7 +294,7 @@ def load_gbsg2():
     y : structured array with 2 fields
         *cens*: boolean indicating whether the endpoint has been reached
-        or the event time is right censored.
+        or the event time is right-censored.
         *time*: total length of follow-up
@@ -302,7 +327,7 @@ def load_veterans_lung_cancer():
     y : structured array with 2 fields
         *Status*: boolean indicating whether the endpoint has been reached
-        or the event time is right censored.
+        or the event time is right-censored.
         *Survival_in_days*: total length of follow-up
@@ -328,8 +353,8 @@ def load_aids(endpoint="aids"):
     Parameters
     ----------
-    endpoint : aids|death
-        The endpoint
+    endpoint : {'aids', 'death'}, default: 'aids'
+        The endpoint.
     Returns
     -------
@@ -338,7 +363,7 @@ def load_aids(endpoint="aids"):
     y : structured array with 2 fields
         *censor*: boolean indicating whether the endpoint has been reached
-        or the event time is right censored.
+        or the event time is right-censored.
         *time*: total length of follow-up
@@ -384,7 +409,7 @@ def load_breast_cancer():
     y : structured array with 2 fields
         *e.tdm*: boolean indicating whether the endpoint has been reached
-        or the event time is right censored.
+        or the event time is right-censored.
         *t.tdm*: time to distant metastasis (days)
@@ -428,7 +453,7 @@ def load_flchain():
     y : structured array with 2 fields
         *death*: boolean indicating whether the subject died
-        or the event time is right censored.
+        or the event time is right-censored.
         *futime*: total length of follow-up or time of death.
@@ -473,7 +498,7 @@ def load_bmt():
         The measurements for each patient.
     y : structured array with 2 fields
-        *status*: Integer indicating the endpoint: 0-(survival i.e. right censored data), 1-(TRM), 2-(relapse)
+        *status*: Integer indicating the endpoint: 0-(survival i.e. right-censored data), 1-(TRM), 2-(relapse)
         *ftime*: total length of follow-up or time of event.
@@ -487,7 +512,7 @@ def load_bmt():
     """
     full_path = _get_data_path("bmt.arff")
     data = loadarff(full_path)
-    data["ftime"] = data["ftime"].astype(int)
+    data = data.astype({"ftime": int})
     return get_x_y(data, attr_labels=["status", "ftime"], competing_risks=True)
@@ -566,7 +591,7 @@ def load_cgvhd():
         The measurements for each patient.
     y : structured array with 2 fields
-        *status*: Integer indicating the endpoint: 0: right censored data; 1: CGVHD; 2: relapse; 3: death.
+        *status*: Integer indicating the endpoint: 0: right-censored data; 1: CGVHD; 2: relapse; 3: death.
         *ftime*: total length of follow-up or time of event.
@@ -578,8 +603,8 @@ def load_cgvhd():
     """
     full_path = _get_data_path("cgvhd.arff")
     data = loadarff(full_path)
-    data["ftime"] = data[["survtime", "reltime", "cgvhtime"]].min(axis=1)
-    data["status"] = (
+    data.loc[:, "ftime"] = data[["survtime", "reltime", "cgvhtime"]].min(axis=1)
+    data.loc[:, "status"] = (
         ((data["ftime"] == data["cgvhtime"]) & (data["cgvh"] == "1")).astype(int)
         + 2 * ((data["ftime"] == data["reltime"]) & (data["rcens"] == "1")).astype(int)
         + 3 * ((data["ftime"] == data["survtime"]) & (data["stat"] == "1")).astype(int)

sksurv/docstrings.py ADDED Viewed

@@ -0,0 +1,99 @@
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+_PRED_SURV_FN_EXAMPLE_DOC = """
+        .. plot::
+            >>> import matplotlib.pyplot as plt
+            >>> from sksurv.datasets import load_veterans_lung_cancer
+            >>> from sksurv.preprocessing import OneHotEncoder
+            >>> from sksurv.{estimator_mod} import {estimator_class}
+            Load the data and encode categorical features.
+            >>> X, y = load_veterans_lung_cancer()
+            >>> Xt = OneHotEncoder().fit_transform(X)
+            Fit the model.
+            >>> estimator = {estimator_class}().fit(Xt, y)
+            Estimate the survival function for the first 10 samples.
+            >>> surv_funcs = estimator.predict_survival_function(Xt.iloc[:10])
+            Plot the estimated survival functions.
+            >>> for fn in surv_funcs:
+            ...     plt.step(fn.x, fn(fn.x), where="post")
+            ...
+            [...]
+            >>> plt.ylim(0, 1)
+            (0.0, 1.0)
+            >>> plt.show()  # doctest: +SKIP
+"""
+_PRED_CUMHAZ_FN_EXAMPLE_DOC = """
+        .. plot::
+            >>> import matplotlib.pyplot as plt
+            >>> from sksurv.datasets import load_veterans_lung_cancer
+            >>> from sksurv.preprocessing import OneHotEncoder
+            >>> from sksurv.{estimator_mod} import {estimator_class}
+            Load the data and encode categorical features.
+            >>> X, y = load_veterans_lung_cancer()
+            >>> Xt = OneHotEncoder().fit_transform(X)
+            Fit the model.
+            >>> estimator = {estimator_class}().fit(Xt, y)
+            Estimate the cumulative hazard function for the first 10 samples.
+            >>> chf_funcs = estimator.predict_cumulative_hazard_function(Xt.iloc[:10])
+            Plot the estimated cumulative hazard functions.
+            >>> for fn in chf_funcs:
+            ...     plt.step(fn.x, fn(fn.x), where="post")
+            ...
+            [...]
+            >>> plt.show()  # doctest: +SKIP
+"""
+def append_survival_function_example(*, estimator_mod, estimator_class):
+    """Append example of using predict_survival_function to API doc"""
+    def func(f):
+        f.__doc__ += _PRED_SURV_FN_EXAMPLE_DOC.format(
+            estimator_mod=estimator_mod,
+            estimator_class=estimator_class,
+        )
+        return f
+    return func
+def append_cumulative_hazard_example(*, estimator_mod, estimator_class):
+    """Append example of using predict_cumulative_hazard_function to API doc"""
+    def func(f):
+        f.__doc__ += _PRED_CUMHAZ_FN_EXAMPLE_DOC.format(
+            estimator_mod=estimator_mod,
+            estimator_class=estimator_class,
+        )
+        return f
+    return func

sksurv/ensemble/_coxph_loss.cpython-313-darwin.so CHANGED Viewed

Binary file