PyPI - scikit-survival - Versions diffs - 0.24.1__cp312-cp312-win_amd64.whl → 0.26.0__cp312-cp312-win_amd64.whl - Mend

scikit-survival 0.24.1__cp312-cp312-win_amd64.whl → 0.26.0__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

scikit_survival-0.26.0.dist-info/METADATA +185 -0
scikit_survival-0.26.0.dist-info/RECORD +58 -0
{scikit_survival-0.24.1.dist-info → scikit_survival-0.26.0.dist-info}/WHEEL +1 -1
sksurv/__init__.py +51 -6
sksurv/base.py +12 -2
sksurv/bintrees/_binarytrees.cp312-win_amd64.pyd +0 -0
sksurv/column.py +38 -35
sksurv/compare.py +23 -23
sksurv/datasets/base.py +52 -27
sksurv/docstrings.py +99 -0
sksurv/ensemble/_coxph_loss.cp312-win_amd64.pyd +0 -0
sksurv/ensemble/boosting.py +116 -168
sksurv/ensemble/forest.py +94 -151
sksurv/functions.py +29 -29
sksurv/io/arffread.py +37 -4
sksurv/io/arffwrite.py +41 -5
sksurv/kernels/_clinical_kernel.cp312-win_amd64.pyd +0 -0
sksurv/kernels/clinical.py +36 -16
sksurv/linear_model/_coxnet.cp312-win_amd64.pyd +0 -0
sksurv/linear_model/aft.py +14 -11
sksurv/linear_model/coxnet.py +138 -89
sksurv/linear_model/coxph.py +102 -83
sksurv/meta/ensemble_selection.py +91 -9
sksurv/meta/stacking.py +47 -26
sksurv/metrics.py +257 -224
sksurv/nonparametric.py +150 -81
sksurv/preprocessing.py +74 -34
sksurv/svm/_minlip.cp312-win_amd64.pyd +0 -0
sksurv/svm/_prsvm.cp312-win_amd64.pyd +0 -0
sksurv/svm/minlip.py +171 -85
sksurv/svm/naive_survival_svm.py +63 -34
sksurv/svm/survival_svm.py +103 -103
sksurv/testing.py +47 -0
sksurv/tree/_criterion.cp312-win_amd64.pyd +0 -0
sksurv/tree/tree.py +170 -84
sksurv/util.py +85 -30
scikit_survival-0.24.1.dist-info/METADATA +0 -889
scikit_survival-0.24.1.dist-info/RECORD +0 -57
{scikit_survival-0.24.1.dist-info → scikit_survival-0.26.0.dist-info}/licenses/COPYING +0 -0
{scikit_survival-0.24.1.dist-info → scikit_survival-0.26.0.dist-info}/top_level.txt +0 -0

sksurv/ensemble/forest.py CHANGED Viewed

@@ -18,6 +18,7 @@ from sklearn.utils._tags import get_tags
 from sklearn.utils.validation import check_is_fitted, check_random_state, validate_data
 from ..base import SurvivalAnalysisMixin
+from ..docstrings import append_cumulative_hazard_example, append_survival_function_example
 from ..metrics import concordance_index_censored
 from ..tree import ExtraSurvivalTree, SurvivalTree
 from ..tree._criterion import get_unique_times
@@ -96,9 +97,9 @@ class _BaseSurvivalForest(BaseForest, metaclass=ABCMeta):
             Data matrix
         y : structured array, shape = (n_samples,)
-            A structured array containing the binary event indicator
-            as first field, and time of event or time of censoring as
-            second field.
+            A structured array with two fields. The first field is a boolean
+            where ``True`` indicates an event and ``False`` indicates right-censoring.
+            The second field is a float with the time of event or time of censoring.
         Returns
         -------
@@ -266,15 +267,15 @@ class _BaseSurvivalForest(BaseForest, metaclass=ABCMeta):
         return y_hat
     def predict(self, X):
-        """Predict risk score.
+        r"""Predict risk score.
         The ensemble risk score is the total number of events,
         which can be estimated by the sum of the estimated
-        ensemble cumulative hazard function :math:`\\hat{H}_e`.
+        ensemble cumulative hazard function :math:`\hat{H}_e`.
         .. math::
-            \\sum_{j=1}^{n} \\hat{H}_e(T_{j} \\mid x) ,
+            \sum_{j=1}^{n} \hat{H}_e(T_{j} \mid x) ,
         where :math:`n` denotes the total number of distinct
         event times in the training data.
@@ -322,7 +323,7 @@ class RandomSurvivalForest(SurvivalAnalysisMixin, _BaseSurvivalForest):
     Parameters
     ----------
-    n_estimators : integer, optional, default: 100
+    n_estimators : int, optional, default: 100
         The number of trees in the forest.
     max_depth : int or None, optional, default: None
@@ -355,7 +356,7 @@ class RandomSurvivalForest(SurvivalAnalysisMixin, _BaseSurvivalForest):
         the input samples) required to be at a leaf node. Samples have
         equal weight when sample_weight is not provided.
-    max_features : int, float, string or None, optional, default: None
+    max_features : int, float, {'sqrt', 'log2'} or None, optional, default: 'sqrt'
         The number of features to consider when looking for the best split:
         - If int, then consider `max_features` features at each split.
@@ -375,11 +376,11 @@ class RandomSurvivalForest(SurvivalAnalysisMixin, _BaseSurvivalForest):
         Best nodes are defined as relative reduction in impurity.
         If None then unlimited number of leaf nodes.
-    bootstrap : boolean, optional, default: True
+    bootstrap : bool, optional, default: True
         Whether bootstrap samples are used when building trees. If False, the
         whole dataset is used to build each tree.
-    oob_score : bool, default: False
+    oob_score : bool, optional, default: False
         Whether to use out-of-bag samples to estimate
         the generalization accuracy.
@@ -412,22 +413,22 @@ class RandomSurvivalForest(SurvivalAnalysisMixin, _BaseSurvivalForest):
         - If float, then draw `max_samples * X.shape[0]` samples. Thus,
           `max_samples` should be in the interval `(0.0, 1.0]`.
-    low_memory : boolean, default: False
-        If set, ``predict`` computations use reduced memory but ``predict_cumulative_hazard_function``
-        and ``predict_survival_function`` are not implemented.
+    low_memory : bool, optional, default: False
+        If set, :meth:`predict` computations use reduced memory but :meth:`predict_cumulative_hazard_function`
+        and :meth:`predict_survival_function` are not implemented.
     Attributes
     ----------
     estimators_ : list of SurvivalTree instances
         The collection of fitted sub-estimators.
-    unique_times_ : array of shape = (n_unique_times,)
+    unique_times_ : ndarray, shape = (n_unique_times,)
         Unique time points.
     n_features_in_ : int
         Number of features seen during ``fit``.
-    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+    feature_names_in_ : ndarray, shape = (`n_features_in_`,)
         Names of features seen during ``fit``. Defined only when `X`
         has feature names that are all strings.
@@ -527,6 +528,7 @@ class RandomSurvivalForest(SurvivalAnalysisMixin, _BaseSurvivalForest):
         self.max_leaf_nodes = max_leaf_nodes
         self.low_memory = low_memory
+    @append_cumulative_hazard_example(estimator_mod="ensemble", estimator_class="RandomSurvivalForest")
     def predict_cumulative_hazard_function(self, X, return_array=False):
         """Predict cumulative hazard function.
@@ -544,47 +546,33 @@ class RandomSurvivalForest(SurvivalAnalysisMixin, _BaseSurvivalForest):
         X : array-like, shape = (n_samples, n_features)
             Data matrix.
-        return_array : boolean, default: False
-            If set, return an array with the cumulative hazard rate
-            for each `self.unique_times_`, otherwise an array of
-            :class:`sksurv.functions.StepFunction`.
+        return_array : bool, default: False
+            Whether to return a single array of cumulative hazard values
+            or a list of step functions.
+            If `False`, a list of :class:`sksurv.functions.StepFunction`
+            objects is returned.
+            If `True`, a 2d-array of shape `(n_samples, n_unique_times)` is
+            returned, where `n_unique_times` is the number of unique
+            event times in the training data. Each row represents the cumulative
+            hazard function of an individual evaluated at `unique_times_`.
         Returns
         -------
         cum_hazard : ndarray
-            If `return_array` is set, an array with the cumulative hazard rate
-            for each `self.unique_times_`, otherwise an array of length `n_samples`
-            of :class:`sksurv.functions.StepFunction` instances will be returned.
+            If `return_array` is `False`, an array of `n_samples`
+            :class:`sksurv.functions.StepFunction` instances is returned.
+            If `return_array` is `True`, a numeric array of shape
+            `(n_samples, n_unique_times_)` is returned.
         Examples
         --------
-        >>> import matplotlib.pyplot as plt
-        >>> from sksurv.datasets import load_whas500
-        >>> from sksurv.ensemble import RandomSurvivalForest
-        Load and prepare the data.
-        >>> X, y = load_whas500()
-        >>> X = X.astype(float)
-        Fit the model.
-        >>> estimator = RandomSurvivalForest().fit(X, y)
-        Estimate the cumulative hazard function for the first 5 samples.
-        >>> chf_funcs = estimator.predict_cumulative_hazard_function(X.iloc[:5])
-        Plot the estimated cumulative hazard functions.
-        >>> for fn in chf_funcs:
-        ...    plt.step(fn.x, fn(fn.x), where="post")
-        ...
-        >>> plt.ylim(0, 1)
-        >>> plt.show()
         """
         return super().predict_cumulative_hazard_function(X, return_array)
+    @append_survival_function_example(estimator_mod="ensemble", estimator_class="RandomSurvivalForest")
     def predict_survival_function(self, X, return_array=False):
         """Predict survival function.
@@ -602,45 +590,29 @@ class RandomSurvivalForest(SurvivalAnalysisMixin, _BaseSurvivalForest):
         X : array-like, shape = (n_samples, n_features)
             Data matrix.
-        return_array : boolean
-            If set, return an array with the probability
-            of survival for each `self.unique_times_`,
-            otherwise an array of :class:`sksurv.functions.StepFunction`.
+        return_array : bool, default: False
+            Whether to return a single array of survival probabilities
+            or a list of step functions.
+            If `False`, a list of :class:`sksurv.functions.StepFunction`
+            objects is returned.
+            If `True`, a 2d-array of shape `(n_samples, n_unique_times)` is
+            returned, where `n_unique_times` is the number of unique
+            event times in the training data. Each row represents the survival
+            function of an individual evaluated at `unique_times_`.
         Returns
         -------
         survival : ndarray
-            If `return_array` is set, an array with the probability
-            of survival for each `self.unique_times_`,
-            otherwise an array of :class:`sksurv.functions.StepFunction`
-            will be returned.
+            If `return_array` is `False`, an array of `n_samples`
+            :class:`sksurv.functions.StepFunction` instances is returned.
+            If `return_array` is `True`, a numeric array of shape
+            `(n_samples, n_unique_times_)` is returned.
         Examples
         --------
-        >>> import matplotlib.pyplot as plt
-        >>> from sksurv.datasets import load_whas500
-        >>> from sksurv.ensemble import RandomSurvivalForest
-        Load and prepare the data.
-        >>> X, y = load_whas500()
-        >>> X = X.astype(float)
-        Fit the model.
-        >>> estimator = RandomSurvivalForest().fit(X, y)
-        Estimate the survival function for the first 5 samples.
-        >>> surv_funcs = estimator.predict_survival_function(X.iloc[:5])
-        Plot the estimated survival functions.
-        >>> for fn in surv_funcs:
-        ...    plt.step(fn.x, fn(fn.x), where="post")
-        ...
-        >>> plt.ylim(0, 1)
-        >>> plt.show()
         """
         return super().predict_survival_function(X, return_array)
@@ -667,7 +639,7 @@ class ExtraSurvivalTrees(SurvivalAnalysisMixin, _BaseSurvivalForest):
     Parameters
     ----------
-    n_estimators : integer, optional, default: 100
+    n_estimators : int, optional, default: 100
         The number of trees in the forest.
     max_depth : int or None, optional, default: None
@@ -700,7 +672,7 @@ class ExtraSurvivalTrees(SurvivalAnalysisMixin, _BaseSurvivalForest):
         the input samples) required to be at a leaf node. Samples have
         equal weight when sample_weight is not provided.
-    max_features : int, float, string or None, optional, default: None
+    max_features : int, float, {'sqrt', 'log2'} or None, optional, default: 'sqrt'
         The number of features to consider when looking for the best split:
         - If int, then consider `max_features` features at each split.
@@ -720,11 +692,11 @@ class ExtraSurvivalTrees(SurvivalAnalysisMixin, _BaseSurvivalForest):
         Best nodes are defined as relative reduction in impurity.
         If None then unlimited number of leaf nodes.
-    bootstrap : boolean, optional, default: True
+    bootstrap : bool, optional, default: True
         Whether bootstrap samples are used when building trees. If False, the
         whole dataset is used to build each tree.
-    oob_score : bool, default: False
+    oob_score : bool, optional, default: False
         Whether to use out-of-bag samples to estimate
         the generalization accuracy.
@@ -757,22 +729,22 @@ class ExtraSurvivalTrees(SurvivalAnalysisMixin, _BaseSurvivalForest):
         - If float, then draw `max_samples * X.shape[0]` samples. Thus,
           `max_samples` should be in the interval `(0.0, 1.0]`.
-    low_memory : boolean, default: False
-        If set, ``predict`` computations use reduced memory but ``predict_cumulative_hazard_function``
-        and ``predict_survival_function`` are not implemented.
+    low_memory : bool, optional, default: False
+        If set, :meth:`predict` computations use reduced memory but :meth:`predict_cumulative_hazard_function`
+        and :meth:`predict_survival_function` are not implemented.
     Attributes
     ----------
     estimators_ : list of SurvivalTree instances
         The collection of fitted sub-estimators.
-    unique_times_ : array of shape = (n_unique_times,)
+    unique_times_ : ndarray, shape = (n_unique_times,)
         Unique time points.
     n_features_in_ : int
-        The number of features when ``fit`` is performed.
+        Number of features seen during ``fit``.
-    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+    feature_names_in_ : ndarray, shape = (`n_features_in_`,)
         Names of features seen during ``fit``. Defined only when `X`
         has feature names that are all strings.
@@ -841,6 +813,7 @@ class ExtraSurvivalTrees(SurvivalAnalysisMixin, _BaseSurvivalForest):
         self.max_leaf_nodes = max_leaf_nodes
         self.low_memory = low_memory
+    @append_cumulative_hazard_example(estimator_mod="ensemble", estimator_class="ExtraSurvivalTrees")
     def predict_cumulative_hazard_function(self, X, return_array=False):
         """Predict cumulative hazard function.
@@ -858,47 +831,33 @@ class ExtraSurvivalTrees(SurvivalAnalysisMixin, _BaseSurvivalForest):
         X : array-like, shape = (n_samples, n_features)
             Data matrix.
-        return_array : boolean, default: False
-            If set, return an array with the cumulative hazard rate
-            for each `self.unique_times_`, otherwise an array of
-            :class:`sksurv.functions.StepFunction`.
+        return_array : bool, default: False
+            Whether to return a single array of cumulative hazard values
+            or a list of step functions.
+            If `False`, a list of :class:`sksurv.functions.StepFunction`
+            objects is returned.
+            If `True`, a 2d-array of shape `(n_samples, n_unique_times)` is
+            returned, where `n_unique_times` is the number of unique
+            event times in the training data. Each row represents the cumulative
+            hazard function of an individual evaluated at `unique_times_`.
         Returns
         -------
         cum_hazard : ndarray
-            If `return_array` is set, an array with the cumulative hazard rate
-            for each `self.unique_times_`, otherwise an array of length `n_samples`
-            of :class:`sksurv.functions.StepFunction` instances will be returned.
+            If `return_array` is `False`, an array of `n_samples`
+            :class:`sksurv.functions.StepFunction` instances is returned.
+            If `return_array` is `True`, a numeric array of shape
+            `(n_samples, n_unique_times_)` is returned.
         Examples
         --------
-        >>> import matplotlib.pyplot as plt
-        >>> from sksurv.datasets import load_whas500
-        >>> from sksurv.ensemble import ExtraSurvivalTrees
-        Load and prepare the data.
-        >>> X, y = load_whas500()
-        >>> X = X.astype(float)
-        Fit the model.
-        >>> estimator = ExtraSurvivalTrees().fit(X, y)
-        Estimate the cumulative hazard function for the first 5 samples.
-        >>> chf_funcs = estimator.predict_cumulative_hazard_function(X.iloc[:5])
-        Plot the estimated cumulative hazard functions.
-        >>> for fn in chf_funcs:
-        ...    plt.step(fn.x, fn(fn.x), where="post")
-        ...
-        >>> plt.ylim(0, 1)
-        >>> plt.show()
         """
         return super().predict_cumulative_hazard_function(X, return_array)
+    @append_survival_function_example(estimator_mod="ensemble", estimator_class="ExtraSurvivalTrees")
     def predict_survival_function(self, X, return_array=False):
         """Predict survival function.
@@ -916,44 +875,28 @@ class ExtraSurvivalTrees(SurvivalAnalysisMixin, _BaseSurvivalForest):
         X : array-like, shape = (n_samples, n_features)
             Data matrix.
-        return_array : boolean, default: False
-            If set, return an array with the probability
-            of survival for each `self.unique_times_`,
-            otherwise an array of :class:`sksurv.functions.StepFunction`.
+        return_array : bool, default: False
+            Whether to return a single array of survival probabilities
+            or a list of step functions.
+            If `False`, a list of :class:`sksurv.functions.StepFunction`
+            objects is returned.
+            If `True`, a 2d-array of shape `(n_samples, n_unique_times)` is
+            returned, where `n_unique_times` is the number of unique
+            event times in the training data. Each row represents the survival
+            function of an individual evaluated at `unique_times_`.
         Returns
         -------
         survival : ndarray
-            If `return_array` is set, an array with the probability of
-            survival for each `self.unique_times_`, otherwise an array of
-            length `n_samples` of :class:`sksurv.functions.StepFunction`
-            instances will be returned.
+            If `return_array` is `False`, an array of `n_samples`
+            :class:`sksurv.functions.StepFunction` instances is returned.
+            If `return_array` is `True`, a numeric array of shape
+            `(n_samples, n_unique_times_)` is returned.
         Examples
         --------
-        >>> import matplotlib.pyplot as plt
-        >>> from sksurv.datasets import load_whas500
-        >>> from sksurv.ensemble import ExtraSurvivalTrees
-        Load and prepare the data.
-        >>> X, y = load_whas500()
-        >>> X = X.astype(float)
-        Fit the model.
-        >>> estimator = ExtraSurvivalTrees().fit(X, y)
-        Estimate the survival function for the first 5 samples.
-        >>> surv_funcs = estimator.predict_survival_function(X.iloc[:5])
-        Plot the estimated survival functions.
-        >>> for fn in surv_funcs:
-        ...    plt.step(fn.x, fn(fn.x), where="post")
-        ...
-        >>> plt.ylim(0, 1)
-        >>> plt.show()
         """
         return super().predict_survival_function(X, return_array)

sksurv/functions.py CHANGED Viewed

@@ -18,31 +18,29 @@ __all__ = ["StepFunction"]
 class StepFunction:
-    """Callable step function.
+    r"""A callable step function.
+    The function is defined by a set of points :math:`(x_i, y_i)` and is
+    evaluated as:
     .. math::
-        f(z) = a * y_i + b,
-        x_i \\leq z < x_{i + 1}
+        f(z) = a \cdot y_i + b \quad \text{if} \quad x_i \leq z < x_{i + 1}
     Parameters
     ----------
     x : ndarray, shape = (n_points,)
-        Values on the x axis in ascending order.
+        The values on the x-axis, must be in ascending order.
     y : ndarray, shape = (n_points,)
-        Corresponding values on the y axis.
+        The corresponding values on the y-axis.
     a : float, optional, default: 1.0
-        Constant to multiply by.
+        A constant factor to scale ``y`` by.
     b : float, optional, default: 0.0
-        Constant offset term.
-    domain : tuple, optional
-        A tuple with two entries that sets the limits of the
-        domain of the step function.
-        If entry is `None`, use the first/last value of `x` as limit.
+        A constant offset term.
+    domain : tuple, optional, default: (0, None)
+        A tuple ``(lower, upper)`` that defines the domain of the step function.
+        If ``lower`` or ``upper`` is ``None``, the first or last value of ``x`` is
+        used as the limit, respectively.
     """
     def __init__(self, x, y, *, a=1.0, b=0.0, domain=(0, None)):
@@ -57,36 +55,38 @@ class StepFunction:
     @property
     def domain(self):
-        """Returns the domain of the function, that means
-        the range of values that the function accepts.
+        """The domain of the function.
+        The domain is the range of values that the function accepts.
         Returns
         -------
         lower_limit : float
-            Lower limit of domain.
+            Lower limit of the omain.
         upper_limit : float
-            Upper limit of domain.
+            Upper limit of the domain.
         """
         return self._domain
     def __call__(self, x):
-        """Evaluate step function.
-        Values outside the interval specified by `self.domain`
-        will raise an exception.
-        Values in `x` that are in the interval `[self.domain[0]; self.x[0]]`
-        get mapped to `self.y[0]`.
+        """Evaluate the step function at given values.
         Parameters
         ----------
-        x : float|array-like, shape=(n_values,)
-            Values to evaluate step function at.
+        x : float or array-like, shape=(n_values,)
+            The values at which to evaluate the step function.
+            Values must be within the function's ``domain``.
         Returns
         -------
-        y : float|array-like, shape=(n_values,)
-            Values of step function at `x`.
+        y : float or array-like, shape=(n_values,)
+            The value of the step function at ``x``.
+        Raises
+        ------
+        ValueError
+            If ``x`` contains values outside the function's ``domain``.
         """
         x = np.atleast_1d(x)
         if not np.isfinite(x).all():

sksurv/io/arffread.py CHANGED Viewed

@@ -12,6 +12,7 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 import numpy as np
 import pandas as pd
+from pandas.api.types import is_string_dtype
 from scipy.io.arff import loadarff as scipy_loadarff
 __all__ = ["loadarff"]
@@ -34,7 +35,8 @@ def _to_pandas(data, meta):
             data_dict[name] = pd.Categorical(raw, categories=attr_format, ordered=False)
         else:
             arr = data[name]
-            p = pd.Series(arr, dtype=arr.dtype)
+            dtype = "str" if is_string_dtype(arr.dtype) else arr.dtype
+            p = pd.Series(arr, dtype=dtype)
             data_dict[name] = p
     # currently, this step converts all pandas.Categorial columns back to pandas.Series
@@ -42,17 +44,48 @@ def _to_pandas(data, meta):
 def loadarff(filename):
-    """Load ARFF file
+    """Load ARFF file.
     Parameters
     ----------
-    filename : string
-        Path to ARFF file
+    filename : str or file-like
+        Path to ARFF file, or file-like object to read from.
     Returns
     -------
     data_frame : :class:`pandas.DataFrame`
         DataFrame containing data of ARFF file
+    See Also
+    --------
+    scipy.io.arff.loadarff : The underlying function that reads the ARFF file.
+    Examples
+    --------
+    >>> from io import StringIO
+    >>> from sksurv.io import loadarff
+    >>>
+    >>> # Create a dummy ARFF file
+    >>> arff_content = '''
+    ... @relation test_data
+    ... @attribute feature1 numeric
+    ... @attribute feature2 numeric
+    ... @attribute class {A,B,C}
+    ... @data
+    ... 1.0,2.0,A
+    ... 3.0,4.0,B
+    ... 5.0,6.0,C
+    ... '''
+    >>>
+    >>> # Load the ARFF file
+    >>> with StringIO(arff_content) as f:
+    ...     data = loadarff(f)
+    >>>
+    >>> print(data)
+      class  feature1  feature2
+    0     A       1.0       2.0
+    1     B       3.0       4.0
+    2     C       5.0       6.0
     """
     data, meta = scipy_loadarff(filename)
     return _to_pandas(data, meta)

sksurv/io/arffwrite.py CHANGED Viewed

@@ -15,7 +15,7 @@ import re
 import numpy as np
 import pandas as pd
-from pandas.api.types import CategoricalDtype, is_object_dtype
+from pandas.api.types import CategoricalDtype, is_string_dtype
 _ILLEGAL_CHARACTER_PAT = re.compile(r"[^-_=\w\d\(\)<>\.]")
@@ -28,15 +28,51 @@ def writearff(data, filename, relation_name=None, index=True):
     data : :class:`pandas.DataFrame`
         DataFrame containing data
-    filename : string or file-like object
+    filename : str or file-like object
         Path to ARFF file or file-like object. In the latter case,
         the handle is closed by calling this function.
-    relation_name : string, optional, default: "pandas"
+    relation_name : str, optional, default: 'pandas'
         Name of relation in ARFF file.
     index : boolean, optional, default: True
         Write row names (index)
+    See Also
+    --------
+    loadarff : Function to read ARFF files.
+    Examples
+    --------
+    >>> import numpy as np
+    >>> import pandas as pd
+    >>> from sksurv.io import writearff
+    >>>
+    >>> # Create a dummy DataFrame
+    >>> data = pd.DataFrame({
+    ...     'feature1': [1.0, 3.0, 5.0],
+    ...     'feature2': [2.0, np.nan, 6.0],
+    ...     'class': ['A', 'B', 'C']
+    ... }, index=['One', 'Two', 'Three'])
+    >>>
+    >>> # Write to ARFF file
+    >>> writearff(data, 'test_output.arff', relation_name='test_data')
+    >>>
+    >>> # Read contents of ARFF file
+    >>> with open('test_output.arff') as f:
+    ...     arff_contents = "".join(f.readlines())
+    >>> print(arff_contents)
+    @relation test_data
+    <BLANKLINE>
+    @attribute index        {One,Three,Two}
+    @attribute feature1     real
+    @attribute feature2     real
+    @attribute class        {A,B,C}
+    <BLANKLINE>
+    @data
+    One,1.0,2.0,A
+    Two,3.0,?,B
+    Three,5.0,6.0,C
     """
     if isinstance(filename, str):
         fp = open(filename, "w")
@@ -70,7 +106,7 @@ def _write_header(data, fp, relation_name, index):
         name = attribute_names[column]
         fp.write(f"@attribute {name}\t")
-        if isinstance(series.dtype, CategoricalDtype) or is_object_dtype(series):
+        if isinstance(series.dtype, CategoricalDtype) or is_string_dtype(series.dtype):
             _write_attribute_categorical(series, fp)
         elif np.issubdtype(series.dtype, np.floating):
             fp.write("real")
@@ -132,7 +168,7 @@ def _write_data(data, fp):
     fp.write("@data\n")
     def to_str(x):
-        if pd.isnull(x):
+        if pd.isna(x):
             return "?"
         return str(x)

sksurv/kernels/_clinical_kernel.cp312-win_amd64.pyd CHANGED Viewed

Binary file