scikit-survival 0.25.0__cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scikit_survival-0.25.0.dist-info/METADATA +185 -0
- scikit_survival-0.25.0.dist-info/RECORD +58 -0
- scikit_survival-0.25.0.dist-info/WHEEL +6 -0
- scikit_survival-0.25.0.dist-info/licenses/COPYING +674 -0
- scikit_survival-0.25.0.dist-info/top_level.txt +1 -0
- sksurv/__init__.py +183 -0
- sksurv/base.py +115 -0
- sksurv/bintrees/__init__.py +15 -0
- sksurv/bintrees/_binarytrees.cpython-312-x86_64-linux-gnu.so +0 -0
- sksurv/column.py +205 -0
- sksurv/compare.py +123 -0
- sksurv/datasets/__init__.py +12 -0
- sksurv/datasets/base.py +614 -0
- sksurv/datasets/data/GBSG2.arff +700 -0
- sksurv/datasets/data/actg320.arff +1169 -0
- sksurv/datasets/data/bmt.arff +46 -0
- sksurv/datasets/data/breast_cancer_GSE7390-metastasis.arff +283 -0
- sksurv/datasets/data/cgvhd.arff +118 -0
- sksurv/datasets/data/flchain.arff +7887 -0
- sksurv/datasets/data/veteran.arff +148 -0
- sksurv/datasets/data/whas500.arff +520 -0
- sksurv/docstrings.py +99 -0
- sksurv/ensemble/__init__.py +2 -0
- sksurv/ensemble/_coxph_loss.cpython-312-x86_64-linux-gnu.so +0 -0
- sksurv/ensemble/boosting.py +1564 -0
- sksurv/ensemble/forest.py +902 -0
- sksurv/ensemble/survival_loss.py +151 -0
- sksurv/exceptions.py +18 -0
- sksurv/functions.py +114 -0
- sksurv/io/__init__.py +2 -0
- sksurv/io/arffread.py +89 -0
- sksurv/io/arffwrite.py +181 -0
- sksurv/kernels/__init__.py +1 -0
- sksurv/kernels/_clinical_kernel.cpython-312-x86_64-linux-gnu.so +0 -0
- sksurv/kernels/clinical.py +348 -0
- sksurv/linear_model/__init__.py +3 -0
- sksurv/linear_model/_coxnet.cpython-312-x86_64-linux-gnu.so +0 -0
- sksurv/linear_model/aft.py +208 -0
- sksurv/linear_model/coxnet.py +592 -0
- sksurv/linear_model/coxph.py +637 -0
- sksurv/meta/__init__.py +4 -0
- sksurv/meta/base.py +35 -0
- sksurv/meta/ensemble_selection.py +724 -0
- sksurv/meta/stacking.py +370 -0
- sksurv/metrics.py +1028 -0
- sksurv/nonparametric.py +911 -0
- sksurv/preprocessing.py +183 -0
- sksurv/svm/__init__.py +11 -0
- sksurv/svm/_minlip.cpython-312-x86_64-linux-gnu.so +0 -0
- sksurv/svm/_prsvm.cpython-312-x86_64-linux-gnu.so +0 -0
- sksurv/svm/minlip.py +690 -0
- sksurv/svm/naive_survival_svm.py +249 -0
- sksurv/svm/survival_svm.py +1236 -0
- sksurv/testing.py +108 -0
- sksurv/tree/__init__.py +1 -0
- sksurv/tree/_criterion.cpython-312-x86_64-linux-gnu.so +0 -0
- sksurv/tree/tree.py +790 -0
- sksurv/util.py +415 -0
sksurv/preprocessing.py
ADDED
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
# This program is free software: you can redistribute it and/or modify
|
|
2
|
+
# it under the terms of the GNU General Public License as published by
|
|
3
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
4
|
+
# (at your option) any later version.
|
|
5
|
+
#
|
|
6
|
+
# This program is distributed in the hope that it will be useful,
|
|
7
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
8
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
9
|
+
# GNU General Public License for more details.
|
|
10
|
+
#
|
|
11
|
+
# You should have received a copy of the GNU General Public License
|
|
12
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
13
|
+
from sklearn.base import BaseEstimator, TransformerMixin
|
|
14
|
+
from sklearn.utils.validation import _check_feature_names, _check_feature_names_in, _check_n_features, check_is_fitted
|
|
15
|
+
|
|
16
|
+
from .column import encode_categorical
|
|
17
|
+
|
|
18
|
+
__all__ = ["OneHotEncoder"]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def check_columns_exist(actual, expected):
|
|
22
|
+
"""Check if all expected columns are present in a dataframe.
|
|
23
|
+
|
|
24
|
+
Parameters
|
|
25
|
+
----------
|
|
26
|
+
actual : pandas.Index
|
|
27
|
+
The actual columns of a dataframe.
|
|
28
|
+
expected : pandas.Index
|
|
29
|
+
The expected columns.
|
|
30
|
+
|
|
31
|
+
Raises
|
|
32
|
+
------
|
|
33
|
+
ValueError
|
|
34
|
+
If any of the expected columns are missing from the actual columns.
|
|
35
|
+
"""
|
|
36
|
+
missing_features = expected.difference(actual)
|
|
37
|
+
if len(missing_features) != 0:
|
|
38
|
+
raise ValueError(f"{len(missing_features)} features are missing from data: {missing_features.tolist()}")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class OneHotEncoder(BaseEstimator, TransformerMixin):
|
|
42
|
+
"""Encode categorical features using a one-hot scheme.
|
|
43
|
+
|
|
44
|
+
This transformer only works on pandas DataFrames. It identifies columns
|
|
45
|
+
with `category` or `object` data type as categorical features.
|
|
46
|
+
The features are encoded using a one-hot (or dummy) encoding scheme, which
|
|
47
|
+
creates a binary column for each category. By default, one category per feature
|
|
48
|
+
is dropped. a column with `M` categories is encoded as `M-1` integer columns
|
|
49
|
+
according to the one-hot scheme.
|
|
50
|
+
|
|
51
|
+
The order of non-categorical columns is preserved. Encoded columns are inserted
|
|
52
|
+
in place of the original column.
|
|
53
|
+
|
|
54
|
+
Parameters
|
|
55
|
+
----------
|
|
56
|
+
allow_drop : bool, optional, default: True
|
|
57
|
+
Whether to allow dropping categorical columns that only consist
|
|
58
|
+
of a single category.
|
|
59
|
+
|
|
60
|
+
Attributes
|
|
61
|
+
----------
|
|
62
|
+
feature_names_ : pandas.Index
|
|
63
|
+
Names of categorical features that were encoded.
|
|
64
|
+
|
|
65
|
+
categories_ : dict
|
|
66
|
+
A dictionary mapping each categorical feature name to a list of its
|
|
67
|
+
categories.
|
|
68
|
+
|
|
69
|
+
encoded_columns_ : pandas.Index
|
|
70
|
+
The full list of feature names in the transformed output.
|
|
71
|
+
|
|
72
|
+
n_features_in_ : int
|
|
73
|
+
Number of features seen during ``fit``.
|
|
74
|
+
|
|
75
|
+
feature_names_in_ : ndarray, shape = (`n_features_in_`,)
|
|
76
|
+
Names of features seen during ``fit``. Defined only when `X`
|
|
77
|
+
has feature names that are all strings.
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
def __init__(self, *, allow_drop=True):
|
|
81
|
+
self.allow_drop = allow_drop
|
|
82
|
+
|
|
83
|
+
def fit(self, X, y=None): # pylint: disable=unused-argument
|
|
84
|
+
"""Determine which features are categorical and should be one-hot encoded.
|
|
85
|
+
|
|
86
|
+
Parameters
|
|
87
|
+
----------
|
|
88
|
+
X : pandas.DataFrame
|
|
89
|
+
The data to determine categorical features from.
|
|
90
|
+
y : None
|
|
91
|
+
Ignored. This parameter exists only for compatibility with
|
|
92
|
+
:class:`sklearn.pipeline.Pipeline`.
|
|
93
|
+
|
|
94
|
+
Returns
|
|
95
|
+
-------
|
|
96
|
+
self : object
|
|
97
|
+
Returns the instance itself.
|
|
98
|
+
"""
|
|
99
|
+
self.fit_transform(X)
|
|
100
|
+
return self
|
|
101
|
+
|
|
102
|
+
def _encode(self, X, columns_to_encode):
|
|
103
|
+
return encode_categorical(X, columns=columns_to_encode, allow_drop=self.allow_drop)
|
|
104
|
+
|
|
105
|
+
def fit_transform(self, X, y=None, **fit_params): # pylint: disable=unused-argument
|
|
106
|
+
"""Fit to data, then transform it.
|
|
107
|
+
|
|
108
|
+
Fits the transformer to ``X`` by identifying categorical features and
|
|
109
|
+
then returns a transformed version of ``X`` with categorical features
|
|
110
|
+
one-hot encoded.
|
|
111
|
+
|
|
112
|
+
Parameters
|
|
113
|
+
----------
|
|
114
|
+
X : pandas.DataFrame
|
|
115
|
+
The data to fit and transform.
|
|
116
|
+
y : None, optional
|
|
117
|
+
Ignored. This parameter exists only for compatibility with
|
|
118
|
+
:class:`sklearn.pipeline.Pipeline`.
|
|
119
|
+
fit_params : dict, optional
|
|
120
|
+
Ignored. This parameter exists only for compatibility with
|
|
121
|
+
:class:`sklearn.pipeline.Pipeline`.
|
|
122
|
+
|
|
123
|
+
Returns
|
|
124
|
+
-------
|
|
125
|
+
Xt : pandas.DataFrame
|
|
126
|
+
The transformed data.
|
|
127
|
+
"""
|
|
128
|
+
_check_feature_names(self, X, reset=True)
|
|
129
|
+
_check_n_features(self, X, reset=True)
|
|
130
|
+
columns_to_encode = X.select_dtypes(include=["object", "category"]).columns
|
|
131
|
+
x_dummy = self._encode(X, columns_to_encode)
|
|
132
|
+
|
|
133
|
+
self.feature_names_ = columns_to_encode
|
|
134
|
+
self.categories_ = {k: X[k].cat.categories for k in columns_to_encode}
|
|
135
|
+
self.encoded_columns_ = x_dummy.columns
|
|
136
|
+
return x_dummy
|
|
137
|
+
|
|
138
|
+
def transform(self, X):
|
|
139
|
+
"""Transform ``X`` by one-hot encoding categorical features.
|
|
140
|
+
|
|
141
|
+
Parameters
|
|
142
|
+
----------
|
|
143
|
+
X : pandas.DataFrame
|
|
144
|
+
The data to transform.
|
|
145
|
+
|
|
146
|
+
Returns
|
|
147
|
+
-------
|
|
148
|
+
Xt : pandas.DataFrame
|
|
149
|
+
The transformed data.
|
|
150
|
+
"""
|
|
151
|
+
check_is_fitted(self, "encoded_columns_")
|
|
152
|
+
_check_n_features(self, X, reset=False)
|
|
153
|
+
check_columns_exist(X.columns, self.feature_names_)
|
|
154
|
+
|
|
155
|
+
Xt = X.copy()
|
|
156
|
+
for col, cat in self.categories_.items():
|
|
157
|
+
Xt[col] = Xt[col].cat.set_categories(cat)
|
|
158
|
+
|
|
159
|
+
new_data = self._encode(Xt, self.feature_names_)
|
|
160
|
+
return new_data.loc[:, self.encoded_columns_]
|
|
161
|
+
|
|
162
|
+
def get_feature_names_out(self, input_features=None):
|
|
163
|
+
"""Get output feature names for transformation.
|
|
164
|
+
|
|
165
|
+
Parameters
|
|
166
|
+
----------
|
|
167
|
+
input_features : array-like of str or None, default: None
|
|
168
|
+
Input features.
|
|
169
|
+
|
|
170
|
+
- If `input_features` is `None`, then `feature_names_in_` is
|
|
171
|
+
used as feature names in.
|
|
172
|
+
- If `input_features` is an array-like, then `input_features` must
|
|
173
|
+
match `feature_names_in_` if `feature_names_in_` is defined.
|
|
174
|
+
|
|
175
|
+
Returns
|
|
176
|
+
-------
|
|
177
|
+
feature_names_out : ndarray of str objects
|
|
178
|
+
Transformed feature names.
|
|
179
|
+
"""
|
|
180
|
+
check_is_fitted(self, "encoded_columns_")
|
|
181
|
+
input_features = _check_feature_names_in(self, input_features)
|
|
182
|
+
|
|
183
|
+
return self.encoded_columns_.values.copy()
|
sksurv/svm/__init__.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from .minlip import HingeLossSurvivalSVM, MinlipSurvivalAnalysis
|
|
2
|
+
from .naive_survival_svm import NaiveSurvivalSVM
|
|
3
|
+
from .survival_svm import FastKernelSurvivalSVM, FastSurvivalSVM
|
|
4
|
+
|
|
5
|
+
__all__ = [
|
|
6
|
+
"FastKernelSurvivalSVM",
|
|
7
|
+
"FastSurvivalSVM",
|
|
8
|
+
"HingeLossSurvivalSVM",
|
|
9
|
+
"MinlipSurvivalAnalysis",
|
|
10
|
+
"NaiveSurvivalSVM",
|
|
11
|
+
]
|
|
Binary file
|
|
Binary file
|