scikit-survival 0.23.1__cp313-cp313-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scikit_survival-0.23.1.dist-info/COPYING +674 -0
- scikit_survival-0.23.1.dist-info/METADATA +888 -0
- scikit_survival-0.23.1.dist-info/RECORD +55 -0
- scikit_survival-0.23.1.dist-info/WHEEL +5 -0
- scikit_survival-0.23.1.dist-info/top_level.txt +1 -0
- sksurv/__init__.py +138 -0
- sksurv/base.py +103 -0
- sksurv/bintrees/__init__.py +15 -0
- sksurv/bintrees/_binarytrees.cpython-313-darwin.so +0 -0
- sksurv/column.py +201 -0
- sksurv/compare.py +123 -0
- sksurv/datasets/__init__.py +10 -0
- sksurv/datasets/base.py +436 -0
- sksurv/datasets/data/GBSG2.arff +700 -0
- sksurv/datasets/data/actg320.arff +1169 -0
- sksurv/datasets/data/breast_cancer_GSE7390-metastasis.arff +283 -0
- sksurv/datasets/data/flchain.arff +7887 -0
- sksurv/datasets/data/veteran.arff +148 -0
- sksurv/datasets/data/whas500.arff +520 -0
- sksurv/ensemble/__init__.py +2 -0
- sksurv/ensemble/_coxph_loss.cpython-313-darwin.so +0 -0
- sksurv/ensemble/boosting.py +1610 -0
- sksurv/ensemble/forest.py +947 -0
- sksurv/ensemble/survival_loss.py +151 -0
- sksurv/exceptions.py +18 -0
- sksurv/functions.py +114 -0
- sksurv/io/__init__.py +2 -0
- sksurv/io/arffread.py +58 -0
- sksurv/io/arffwrite.py +145 -0
- sksurv/kernels/__init__.py +1 -0
- sksurv/kernels/_clinical_kernel.cpython-313-darwin.so +0 -0
- sksurv/kernels/clinical.py +328 -0
- sksurv/linear_model/__init__.py +3 -0
- sksurv/linear_model/_coxnet.cpython-313-darwin.so +0 -0
- sksurv/linear_model/aft.py +205 -0
- sksurv/linear_model/coxnet.py +543 -0
- sksurv/linear_model/coxph.py +618 -0
- sksurv/meta/__init__.py +4 -0
- sksurv/meta/base.py +35 -0
- sksurv/meta/ensemble_selection.py +642 -0
- sksurv/meta/stacking.py +349 -0
- sksurv/metrics.py +996 -0
- sksurv/nonparametric.py +588 -0
- sksurv/preprocessing.py +155 -0
- sksurv/svm/__init__.py +11 -0
- sksurv/svm/_minlip.cpython-313-darwin.so +0 -0
- sksurv/svm/_prsvm.cpython-313-darwin.so +0 -0
- sksurv/svm/minlip.py +606 -0
- sksurv/svm/naive_survival_svm.py +221 -0
- sksurv/svm/survival_svm.py +1228 -0
- sksurv/testing.py +108 -0
- sksurv/tree/__init__.py +1 -0
- sksurv/tree/_criterion.cpython-313-darwin.so +0 -0
- sksurv/tree/tree.py +703 -0
- sksurv/util.py +333 -0
sksurv/preprocessing.py
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
# This program is free software: you can redistribute it and/or modify
|
|
2
|
+
# it under the terms of the GNU General Public License as published by
|
|
3
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
4
|
+
# (at your option) any later version.
|
|
5
|
+
#
|
|
6
|
+
# This program is distributed in the hope that it will be useful,
|
|
7
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
8
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
9
|
+
# GNU General Public License for more details.
|
|
10
|
+
#
|
|
11
|
+
# You should have received a copy of the GNU General Public License
|
|
12
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
13
|
+
from sklearn.base import BaseEstimator, TransformerMixin
|
|
14
|
+
from sklearn.utils.validation import _check_feature_names_in, check_is_fitted
|
|
15
|
+
|
|
16
|
+
from .column import encode_categorical
|
|
17
|
+
|
|
18
|
+
__all__ = ["OneHotEncoder"]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def check_columns_exist(actual, expected):
|
|
22
|
+
missing_features = expected.difference(actual)
|
|
23
|
+
if len(missing_features) != 0:
|
|
24
|
+
raise ValueError(f"{len(missing_features)} features are missing from data: {missing_features.tolist()}")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class OneHotEncoder(BaseEstimator, TransformerMixin):
|
|
28
|
+
"""Encode categorical columns with `M` categories into `M-1` columns according
|
|
29
|
+
to the one-hot scheme.
|
|
30
|
+
|
|
31
|
+
The order of non-categorical columns is preserved, encoded columns are inserted
|
|
32
|
+
inplace of the original column.
|
|
33
|
+
|
|
34
|
+
Parameters
|
|
35
|
+
----------
|
|
36
|
+
allow_drop : boolean, optional, default: True
|
|
37
|
+
Whether to allow dropping categorical columns that only consist
|
|
38
|
+
of a single category.
|
|
39
|
+
|
|
40
|
+
Attributes
|
|
41
|
+
----------
|
|
42
|
+
feature_names_ : pandas.Index
|
|
43
|
+
List of encoded columns.
|
|
44
|
+
|
|
45
|
+
categories_ : dict
|
|
46
|
+
Categories of encoded columns.
|
|
47
|
+
|
|
48
|
+
encoded_columns_ : list
|
|
49
|
+
Name of columns after encoding.
|
|
50
|
+
Includes names of non-categorical columns.
|
|
51
|
+
|
|
52
|
+
n_features_in_ : int
|
|
53
|
+
Number of features seen during ``fit``.
|
|
54
|
+
|
|
55
|
+
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
|
56
|
+
Names of features seen during ``fit``. Defined only when `X`
|
|
57
|
+
has feature names that are all strings.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
def __init__(self, *, allow_drop=True):
|
|
61
|
+
self.allow_drop = allow_drop
|
|
62
|
+
|
|
63
|
+
def fit(self, X, y=None): # pylint: disable=unused-argument
|
|
64
|
+
"""Retrieve categorical columns.
|
|
65
|
+
|
|
66
|
+
Parameters
|
|
67
|
+
----------
|
|
68
|
+
X : pandas.DataFrame
|
|
69
|
+
Data to encode.
|
|
70
|
+
y :
|
|
71
|
+
Ignored. For compatibility with Pipeline.
|
|
72
|
+
Returns
|
|
73
|
+
-------
|
|
74
|
+
self : object
|
|
75
|
+
Returns self
|
|
76
|
+
"""
|
|
77
|
+
self.fit_transform(X)
|
|
78
|
+
return self
|
|
79
|
+
|
|
80
|
+
def _encode(self, X, columns_to_encode):
|
|
81
|
+
return encode_categorical(X, columns=columns_to_encode, allow_drop=self.allow_drop)
|
|
82
|
+
|
|
83
|
+
def fit_transform(self, X, y=None, **fit_params): # pylint: disable=unused-argument
|
|
84
|
+
"""Convert categorical columns to numeric values.
|
|
85
|
+
|
|
86
|
+
Parameters
|
|
87
|
+
----------
|
|
88
|
+
X : pandas.DataFrame
|
|
89
|
+
Data to encode.
|
|
90
|
+
y :
|
|
91
|
+
Ignored. For compatibility with TransformerMixin.
|
|
92
|
+
fit_params :
|
|
93
|
+
Ignored. For compatibility with TransformerMixin.
|
|
94
|
+
|
|
95
|
+
Returns
|
|
96
|
+
-------
|
|
97
|
+
Xt : pandas.DataFrame
|
|
98
|
+
Encoded data.
|
|
99
|
+
"""
|
|
100
|
+
self._check_feature_names(X, reset=True)
|
|
101
|
+
self._check_n_features(X, reset=True)
|
|
102
|
+
columns_to_encode = X.select_dtypes(include=["object", "category"]).columns
|
|
103
|
+
x_dummy = self._encode(X, columns_to_encode)
|
|
104
|
+
|
|
105
|
+
self.feature_names_ = columns_to_encode
|
|
106
|
+
self.categories_ = {k: X[k].cat.categories for k in columns_to_encode}
|
|
107
|
+
self.encoded_columns_ = x_dummy.columns
|
|
108
|
+
return x_dummy
|
|
109
|
+
|
|
110
|
+
def transform(self, X):
|
|
111
|
+
"""Convert categorical columns to numeric values.
|
|
112
|
+
|
|
113
|
+
Parameters
|
|
114
|
+
----------
|
|
115
|
+
X : pandas.DataFrame
|
|
116
|
+
Data to encode.
|
|
117
|
+
|
|
118
|
+
Returns
|
|
119
|
+
-------
|
|
120
|
+
Xt : pandas.DataFrame
|
|
121
|
+
Encoded data.
|
|
122
|
+
"""
|
|
123
|
+
check_is_fitted(self, "encoded_columns_")
|
|
124
|
+
self._check_n_features(X, reset=False)
|
|
125
|
+
check_columns_exist(X.columns, self.feature_names_)
|
|
126
|
+
|
|
127
|
+
Xt = X.copy()
|
|
128
|
+
for col, cat in self.categories_.items():
|
|
129
|
+
Xt[col] = Xt[col].cat.set_categories(cat)
|
|
130
|
+
|
|
131
|
+
new_data = self._encode(Xt, self.feature_names_)
|
|
132
|
+
return new_data.loc[:, self.encoded_columns_]
|
|
133
|
+
|
|
134
|
+
def get_feature_names_out(self, input_features=None):
|
|
135
|
+
"""Get output feature names for transformation.
|
|
136
|
+
|
|
137
|
+
Parameters
|
|
138
|
+
----------
|
|
139
|
+
input_features : array-like of str or None, default=None
|
|
140
|
+
Input features.
|
|
141
|
+
|
|
142
|
+
- If `input_features` is `None`, then `feature_names_in_` is
|
|
143
|
+
used as feature names in.
|
|
144
|
+
- If `input_features` is an array-like, then `input_features` must
|
|
145
|
+
match `feature_names_in_` if `feature_names_in_` is defined.
|
|
146
|
+
|
|
147
|
+
Returns
|
|
148
|
+
-------
|
|
149
|
+
feature_names_out : ndarray of str objects
|
|
150
|
+
Transformed feature names.
|
|
151
|
+
"""
|
|
152
|
+
check_is_fitted(self, "encoded_columns_")
|
|
153
|
+
input_features = _check_feature_names_in(self, input_features)
|
|
154
|
+
|
|
155
|
+
return self.encoded_columns_.values.copy()
|
sksurv/svm/__init__.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from .minlip import HingeLossSurvivalSVM, MinlipSurvivalAnalysis
|
|
2
|
+
from .naive_survival_svm import NaiveSurvivalSVM
|
|
3
|
+
from .survival_svm import FastKernelSurvivalSVM, FastSurvivalSVM
|
|
4
|
+
|
|
5
|
+
__all__ = [
|
|
6
|
+
"FastKernelSurvivalSVM",
|
|
7
|
+
"FastSurvivalSVM",
|
|
8
|
+
"HingeLossSurvivalSVM",
|
|
9
|
+
"MinlipSurvivalAnalysis",
|
|
10
|
+
"NaiveSurvivalSVM",
|
|
11
|
+
]
|
|
Binary file
|
|
Binary file
|