scikit-survival 0.23.1__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. scikit_survival-0.23.1.dist-info/COPYING +674 -0
  2. scikit_survival-0.23.1.dist-info/METADATA +888 -0
  3. scikit_survival-0.23.1.dist-info/RECORD +55 -0
  4. scikit_survival-0.23.1.dist-info/WHEEL +5 -0
  5. scikit_survival-0.23.1.dist-info/top_level.txt +1 -0
  6. sksurv/__init__.py +138 -0
  7. sksurv/base.py +103 -0
  8. sksurv/bintrees/__init__.py +15 -0
  9. sksurv/bintrees/_binarytrees.cp313-win_amd64.pyd +0 -0
  10. sksurv/column.py +201 -0
  11. sksurv/compare.py +123 -0
  12. sksurv/datasets/__init__.py +10 -0
  13. sksurv/datasets/base.py +436 -0
  14. sksurv/datasets/data/GBSG2.arff +700 -0
  15. sksurv/datasets/data/actg320.arff +1169 -0
  16. sksurv/datasets/data/breast_cancer_GSE7390-metastasis.arff +283 -0
  17. sksurv/datasets/data/flchain.arff +7887 -0
  18. sksurv/datasets/data/veteran.arff +148 -0
  19. sksurv/datasets/data/whas500.arff +520 -0
  20. sksurv/ensemble/__init__.py +2 -0
  21. sksurv/ensemble/_coxph_loss.cp313-win_amd64.pyd +0 -0
  22. sksurv/ensemble/boosting.py +1610 -0
  23. sksurv/ensemble/forest.py +947 -0
  24. sksurv/ensemble/survival_loss.py +151 -0
  25. sksurv/exceptions.py +18 -0
  26. sksurv/functions.py +114 -0
  27. sksurv/io/__init__.py +2 -0
  28. sksurv/io/arffread.py +58 -0
  29. sksurv/io/arffwrite.py +145 -0
  30. sksurv/kernels/__init__.py +1 -0
  31. sksurv/kernels/_clinical_kernel.cp313-win_amd64.pyd +0 -0
  32. sksurv/kernels/clinical.py +328 -0
  33. sksurv/linear_model/__init__.py +3 -0
  34. sksurv/linear_model/_coxnet.cp313-win_amd64.pyd +0 -0
  35. sksurv/linear_model/aft.py +205 -0
  36. sksurv/linear_model/coxnet.py +543 -0
  37. sksurv/linear_model/coxph.py +618 -0
  38. sksurv/meta/__init__.py +4 -0
  39. sksurv/meta/base.py +35 -0
  40. sksurv/meta/ensemble_selection.py +642 -0
  41. sksurv/meta/stacking.py +349 -0
  42. sksurv/metrics.py +996 -0
  43. sksurv/nonparametric.py +588 -0
  44. sksurv/preprocessing.py +155 -0
  45. sksurv/svm/__init__.py +11 -0
  46. sksurv/svm/_minlip.cp313-win_amd64.pyd +0 -0
  47. sksurv/svm/_prsvm.cp313-win_amd64.pyd +0 -0
  48. sksurv/svm/minlip.py +606 -0
  49. sksurv/svm/naive_survival_svm.py +221 -0
  50. sksurv/svm/survival_svm.py +1228 -0
  51. sksurv/testing.py +108 -0
  52. sksurv/tree/__init__.py +1 -0
  53. sksurv/tree/_criterion.cp313-win_amd64.pyd +0 -0
  54. sksurv/tree/tree.py +703 -0
  55. sksurv/util.py +333 -0
@@ -0,0 +1,221 @@
1
+ # This program is free software: you can redistribute it and/or modify
2
+ # it under the terms of the GNU General Public License as published by
3
+ # the Free Software Foundation, either version 3 of the License, or
4
+ # (at your option) any later version.
5
+ #
6
+ # This program is distributed in the hope that it will be useful,
7
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
8
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9
+ # GNU General Public License for more details.
10
+ #
11
+ # You should have received a copy of the GNU General Public License
12
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
13
+ import itertools
14
+
15
+ import numpy as np
16
+ import pandas as pd
17
+ from scipy.special import comb
18
+ from sklearn.svm import LinearSVC
19
+ from sklearn.utils import check_random_state
20
+ from sklearn.utils.validation import _get_feature_names
21
+
22
+ from ..base import SurvivalAnalysisMixin
23
+ from ..exceptions import NoComparablePairException
24
+ from ..util import check_array_survival
25
+
26
+
27
+ class NaiveSurvivalSVM(SurvivalAnalysisMixin, LinearSVC):
28
+ """Naive version of linear Survival Support Vector Machine.
29
+
30
+ Uses regular linear support vector classifier (liblinear).
31
+ A new set of samples is created by building the difference between any two feature
32
+ vectors in the original data, thus this version requires :math:`O(\\text{n_samples}^2)` space.
33
+
34
+ See :class:`sksurv.svm.HingeLossSurvivalSVM` for the kernel naive survival SVM.
35
+
36
+ .. math::
37
+
38
+ \\min_{\\mathbf{w}}\\quad
39
+ \\frac{1}{2} \\lVert \\mathbf{w} \\rVert_2^2
40
+ + \\gamma \\sum_{i = 1}^n \\xi_i \\\\
41
+ \\text{subject to}\\quad
42
+ \\mathbf{w}^\\top \\mathbf{x}_i - \\mathbf{w}^\\top \\mathbf{x}_j \\geq 1 - \\xi_{ij},\\quad
43
+ \\forall (i, j) \\in \\mathcal{P}, \\\\
44
+ \\xi_i \\geq 0,\\quad \\forall (i, j) \\in \\mathcal{P}.
45
+
46
+ \\mathcal{P} = \\{ (i, j) \\mid y_i > y_j \\land \\delta_j = 1 \\}_{i,j=1,\\dots,n}.
47
+
48
+ See [1]_, [2]_ for further description.
49
+
50
+ Parameters
51
+ ----------
52
+ alpha : float, positive, default: 1.0
53
+ Weight of penalizing the squared hinge loss in the objective function.
54
+
55
+ loss : {'hinge', 'squared_hinge'}, default: 'squared_hinge'
56
+ Specifies the loss function. 'hinge' is the standard SVM loss
57
+ (used e.g. by the SVC class) while 'squared_hinge' is the
58
+ square of the hinge loss.
59
+
60
+ penalty : {'l1', 'l2'}, default: 'l2'
61
+ Specifies the norm used in the penalization. The 'l2'
62
+ penalty is the standard used in SVC. The 'l1' leads to `coef_`
63
+ vectors that are sparse.
64
+
65
+ dual : bool, default: True
66
+ Select the algorithm to either solve the dual or primal
67
+ optimization problem. Prefer dual=False when n_samples > n_features.
68
+
69
+ tol : float, optional, default: 1e-4
70
+ Tolerance for stopping criteria.
71
+
72
+ verbose : int, default: 0
73
+ Enable verbose output. Note that this setting takes advantage of a
74
+ per-process runtime setting in liblinear that, if enabled, may not work
75
+ properly in a multithreaded context.
76
+
77
+ random_state : int seed, RandomState instance, or None, default: None
78
+ The seed of the pseudo random number generator to use when
79
+ shuffling the data.
80
+
81
+ max_iter : int, default: 1000
82
+ The maximum number of iterations to be run.
83
+
84
+ Attributes
85
+ ----------
86
+ n_iter_ : int
87
+ Number of iterations run by the optimization routine to fit the model.
88
+
89
+ See also
90
+ --------
91
+ sksurv.svm.FastSurvivalSVM
92
+ Alternative implementation with reduced time complexity for training.
93
+
94
+ References
95
+ ----------
96
+ .. [1] Van Belle, V., Pelckmans, K., Suykens, J. A., & Van Huffel, S.
97
+ Support Vector Machines for Survival Analysis. In Proc. of the 3rd Int. Conf.
98
+ on Computational Intelligence in Medicine and Healthcare (CIMED). 1-8. 2007
99
+
100
+ .. [2] Evers, L., Messow, C.M.,
101
+ "Sparse kernel methods for high-dimensional survival data",
102
+ Bioinformatics 24(14), 1632-8, 2008.
103
+
104
+ """
105
+
106
+ _parameter_constraints = {
107
+ "penalty": LinearSVC._parameter_constraints["penalty"],
108
+ "loss": LinearSVC._parameter_constraints["loss"],
109
+ "dual": LinearSVC._parameter_constraints["dual"],
110
+ "tol": LinearSVC._parameter_constraints["tol"],
111
+ "alpha": LinearSVC._parameter_constraints["C"],
112
+ "verbose": LinearSVC._parameter_constraints["verbose"],
113
+ "random_state": LinearSVC._parameter_constraints["random_state"],
114
+ "max_iter": LinearSVC._parameter_constraints["max_iter"],
115
+ }
116
+
117
+ def __init__(
118
+ self,
119
+ penalty="l2",
120
+ loss="squared_hinge",
121
+ *,
122
+ dual=False,
123
+ tol=1e-4,
124
+ alpha=1.0,
125
+ verbose=0,
126
+ random_state=None,
127
+ max_iter=1000,
128
+ ):
129
+ super().__init__(
130
+ penalty=penalty,
131
+ loss=loss,
132
+ dual=dual,
133
+ tol=tol,
134
+ verbose=verbose,
135
+ random_state=random_state,
136
+ max_iter=max_iter,
137
+ fit_intercept=False,
138
+ )
139
+ self.alpha = alpha
140
+
141
+ def _get_survival_pairs(self, X, y, random_state): # pylint: disable=no-self-use
142
+ feature_names = _get_feature_names(X)
143
+
144
+ X = self._validate_data(X, ensure_min_samples=2)
145
+ event, time = check_array_survival(X, y)
146
+
147
+ idx = np.arange(X.shape[0], dtype=int)
148
+ random_state.shuffle(idx)
149
+
150
+ n_pairs = int(comb(X.shape[0], 2))
151
+ x_pairs = np.empty((n_pairs, X.shape[1]), dtype=float)
152
+ y_pairs = np.empty(n_pairs, dtype=np.int8)
153
+ k = 0
154
+ for xi, xj in itertools.combinations(idx, 2):
155
+ if time[xi] > time[xj] and event[xj]:
156
+ np.subtract(X[xi, :], X[xj, :], out=x_pairs[k, :])
157
+ y_pairs[k] = 1
158
+ k += 1
159
+ elif time[xi] < time[xj] and event[xi]:
160
+ np.subtract(X[xi, :], X[xj, :], out=x_pairs[k, :])
161
+ y_pairs[k] = -1
162
+ k += 1
163
+ elif time[xi] == time[xj] and (event[xi] or event[xj]):
164
+ np.subtract(X[xi, :], X[xj, :], out=x_pairs[k, :])
165
+ y_pairs[k] = 1 if event[xj] else -1
166
+ k += 1
167
+
168
+ x_pairs.resize((k, X.shape[1]), refcheck=False)
169
+ y_pairs.resize(k, refcheck=False)
170
+
171
+ if feature_names is not None:
172
+ x_pairs = pd.DataFrame(x_pairs, columns=feature_names)
173
+ return x_pairs, y_pairs
174
+
175
+ def fit(self, X, y, sample_weight=None):
176
+ """Build a survival support vector machine model from training data.
177
+
178
+ Parameters
179
+ ----------
180
+ X : array-like, shape = (n_samples, n_features)
181
+ Data matrix.
182
+
183
+ y : structured array, shape = (n_samples,)
184
+ A structured array containing the binary event indicator
185
+ as first field, and time of event or time of censoring as
186
+ second field.
187
+
188
+ sample_weight : array-like, shape = (n_samples,), optional
189
+ Array of weights that are assigned to individual
190
+ samples. If not provided,
191
+ then each sample is given unit weight.
192
+
193
+ Returns
194
+ -------
195
+ self
196
+ """
197
+ random_state = check_random_state(self.random_state)
198
+
199
+ x_pairs, y_pairs = self._get_survival_pairs(X, y, random_state)
200
+ if x_pairs.shape[0] == 0:
201
+ raise NoComparablePairException("Data has no comparable pairs, cannot fit model.")
202
+
203
+ self.C = self.alpha
204
+ return super().fit(x_pairs, y_pairs, sample_weight=sample_weight)
205
+
206
+ def predict(self, X):
207
+ """Rank samples according to survival times
208
+
209
+ Lower ranks indicate shorter survival, higher ranks longer survival.
210
+
211
+ Parameters
212
+ ----------
213
+ X : array-like, shape = (n_samples, n_features,)
214
+ The input samples.
215
+
216
+ Returns
217
+ -------
218
+ y : ndarray, shape = (n_samples,)
219
+ Predicted ranks.
220
+ """
221
+ return -self.decision_function(X)