scikit-survival 0.23.1__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scikit_survival-0.23.1.dist-info/COPYING +674 -0
- scikit_survival-0.23.1.dist-info/METADATA +888 -0
- scikit_survival-0.23.1.dist-info/RECORD +55 -0
- scikit_survival-0.23.1.dist-info/WHEEL +5 -0
- scikit_survival-0.23.1.dist-info/top_level.txt +1 -0
- sksurv/__init__.py +138 -0
- sksurv/base.py +103 -0
- sksurv/bintrees/__init__.py +15 -0
- sksurv/bintrees/_binarytrees.cp313-win_amd64.pyd +0 -0
- sksurv/column.py +201 -0
- sksurv/compare.py +123 -0
- sksurv/datasets/__init__.py +10 -0
- sksurv/datasets/base.py +436 -0
- sksurv/datasets/data/GBSG2.arff +700 -0
- sksurv/datasets/data/actg320.arff +1169 -0
- sksurv/datasets/data/breast_cancer_GSE7390-metastasis.arff +283 -0
- sksurv/datasets/data/flchain.arff +7887 -0
- sksurv/datasets/data/veteran.arff +148 -0
- sksurv/datasets/data/whas500.arff +520 -0
- sksurv/ensemble/__init__.py +2 -0
- sksurv/ensemble/_coxph_loss.cp313-win_amd64.pyd +0 -0
- sksurv/ensemble/boosting.py +1610 -0
- sksurv/ensemble/forest.py +947 -0
- sksurv/ensemble/survival_loss.py +151 -0
- sksurv/exceptions.py +18 -0
- sksurv/functions.py +114 -0
- sksurv/io/__init__.py +2 -0
- sksurv/io/arffread.py +58 -0
- sksurv/io/arffwrite.py +145 -0
- sksurv/kernels/__init__.py +1 -0
- sksurv/kernels/_clinical_kernel.cp313-win_amd64.pyd +0 -0
- sksurv/kernels/clinical.py +328 -0
- sksurv/linear_model/__init__.py +3 -0
- sksurv/linear_model/_coxnet.cp313-win_amd64.pyd +0 -0
- sksurv/linear_model/aft.py +205 -0
- sksurv/linear_model/coxnet.py +543 -0
- sksurv/linear_model/coxph.py +618 -0
- sksurv/meta/__init__.py +4 -0
- sksurv/meta/base.py +35 -0
- sksurv/meta/ensemble_selection.py +642 -0
- sksurv/meta/stacking.py +349 -0
- sksurv/metrics.py +996 -0
- sksurv/nonparametric.py +588 -0
- sksurv/preprocessing.py +155 -0
- sksurv/svm/__init__.py +11 -0
- sksurv/svm/_minlip.cp313-win_amd64.pyd +0 -0
- sksurv/svm/_prsvm.cp313-win_amd64.pyd +0 -0
- sksurv/svm/minlip.py +606 -0
- sksurv/svm/naive_survival_svm.py +221 -0
- sksurv/svm/survival_svm.py +1228 -0
- sksurv/testing.py +108 -0
- sksurv/tree/__init__.py +1 -0
- sksurv/tree/_criterion.cp313-win_amd64.pyd +0 -0
- sksurv/tree/tree.py +703 -0
- sksurv/util.py +333 -0
sksurv/svm/minlip.py
ADDED
|
@@ -0,0 +1,606 @@
|
|
|
1
|
+
from abc import ABCMeta, abstractmethod
|
|
2
|
+
import numbers
|
|
3
|
+
import warnings
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
from scipy import linalg, sparse
|
|
7
|
+
from sklearn.base import BaseEstimator
|
|
8
|
+
from sklearn.exceptions import ConvergenceWarning
|
|
9
|
+
from sklearn.metrics.pairwise import PAIRWISE_KERNEL_FUNCTIONS, pairwise_kernels
|
|
10
|
+
from sklearn.utils._param_validation import Interval, StrOptions
|
|
11
|
+
|
|
12
|
+
from ..base import SurvivalAnalysisMixin
|
|
13
|
+
from ..exceptions import NoComparablePairException
|
|
14
|
+
from ..util import check_array_survival
|
|
15
|
+
from ._minlip import create_difference_matrix
|
|
16
|
+
|
|
17
|
+
__all__ = ["MinlipSurvivalAnalysis", "HingeLossSurvivalSVM"]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class QPSolver(metaclass=ABCMeta):
|
|
21
|
+
"""
|
|
22
|
+
Solves a quadratic program::
|
|
23
|
+
|
|
24
|
+
minimize (1/2)*x'*P*x + q'*x
|
|
25
|
+
subject to G*x <= h
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
@abstractmethod
|
|
29
|
+
def __init__(self, max_iter, verbose):
|
|
30
|
+
self.max_iter = max_iter
|
|
31
|
+
self.verbose = verbose
|
|
32
|
+
|
|
33
|
+
@abstractmethod
|
|
34
|
+
def solve(self, P, q, G, h):
|
|
35
|
+
"""Returns solution to QP."""
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class OsqpSolver(QPSolver):
|
|
39
|
+
def __init__(self, max_iter, verbose):
|
|
40
|
+
super().__init__(
|
|
41
|
+
max_iter=max_iter,
|
|
42
|
+
verbose=verbose,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
def solve(self, P, q, G, h):
|
|
46
|
+
import osqp
|
|
47
|
+
|
|
48
|
+
P = sparse.csc_matrix(P)
|
|
49
|
+
|
|
50
|
+
solver_opts = self._get_options()
|
|
51
|
+
m = osqp.OSQP()
|
|
52
|
+
m.setup(P=sparse.csc_matrix(P), q=q, A=G, u=h, **solver_opts) # noqa: E741
|
|
53
|
+
results = m.solve()
|
|
54
|
+
|
|
55
|
+
if results.info.status_val == -2: # max iter reached
|
|
56
|
+
warnings.warn(
|
|
57
|
+
(f"OSQP solver did not converge: {results.info.status}"),
|
|
58
|
+
category=ConvergenceWarning,
|
|
59
|
+
stacklevel=2,
|
|
60
|
+
)
|
|
61
|
+
elif results.info.status_val not in (1, 2): # pragma: no cover
|
|
62
|
+
# non of solved, solved inaccurate
|
|
63
|
+
raise RuntimeError(f"OSQP solver failed: {results.info.status}")
|
|
64
|
+
|
|
65
|
+
n_iter = results.info.iter
|
|
66
|
+
return results.x[np.newaxis], n_iter
|
|
67
|
+
|
|
68
|
+
def _get_options(self):
|
|
69
|
+
solver_opts = {
|
|
70
|
+
"eps_abs": 1e-5,
|
|
71
|
+
"eps_rel": 1e-5,
|
|
72
|
+
"max_iter": self.max_iter or 4000,
|
|
73
|
+
"polish": True,
|
|
74
|
+
"verbose": self.verbose,
|
|
75
|
+
}
|
|
76
|
+
return solver_opts
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class EcosSolver(QPSolver):
|
|
80
|
+
"""Solves QP by expressing it as second-order cone program::
|
|
81
|
+
|
|
82
|
+
minimize c^T @ x
|
|
83
|
+
subject to G @ x <=_K h
|
|
84
|
+
|
|
85
|
+
where the last inequality is generalized, i.e. ``h - G*x``
|
|
86
|
+
belongs to the cone ``K``. ECOS supports the positive orthant
|
|
87
|
+
``R_+`` and second-order cones ``Q_n``.
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
EXIT_OPTIMAL = 0 # Optimal solution found
|
|
91
|
+
EXIT_PINF = 1 # Certificate of primal infeasibility found
|
|
92
|
+
EXIT_DINF = 2 # Certificate of dual infeasibility found
|
|
93
|
+
EXIT_MAXIT = -1 # Maximum number of iterations reached
|
|
94
|
+
EXIT_NUMERICS = -2 # Numerical problems (unreliable search direction)
|
|
95
|
+
EXIT_OUTCONE = -3 # Numerical problems (slacks or multipliers outside cone)
|
|
96
|
+
EXIT_INACC_OFFSET = 10
|
|
97
|
+
|
|
98
|
+
def __init__(self, max_iter, verbose, cond=None):
|
|
99
|
+
super().__init__(
|
|
100
|
+
max_iter=max_iter,
|
|
101
|
+
verbose=verbose,
|
|
102
|
+
)
|
|
103
|
+
self.cond = cond
|
|
104
|
+
|
|
105
|
+
def solve(self, P, q, G, h):
|
|
106
|
+
import ecos
|
|
107
|
+
|
|
108
|
+
n_pairs = P.shape[0]
|
|
109
|
+
L, max_eigval = self._decompose(P)
|
|
110
|
+
|
|
111
|
+
# minimize wrt t,x
|
|
112
|
+
c = np.empty(n_pairs + 1)
|
|
113
|
+
c[1:] = q
|
|
114
|
+
c[0] = 0.5 * max_eigval
|
|
115
|
+
|
|
116
|
+
zerorow = np.zeros((1, L.shape[1]))
|
|
117
|
+
G_quad = np.block(
|
|
118
|
+
[
|
|
119
|
+
[-1, zerorow],
|
|
120
|
+
[1, zerorow],
|
|
121
|
+
[np.zeros((L.shape[0], 1)), -2 * L],
|
|
122
|
+
]
|
|
123
|
+
)
|
|
124
|
+
G_lin = sparse.hstack((sparse.csc_matrix((G.shape[0], 1)), G))
|
|
125
|
+
G_all = sparse.vstack((G_lin, sparse.csc_matrix(G_quad)), format="csc")
|
|
126
|
+
|
|
127
|
+
n_constraints = G.shape[0]
|
|
128
|
+
h_all = np.empty(G_all.shape[0])
|
|
129
|
+
h_all[:n_constraints] = h
|
|
130
|
+
h_all[n_constraints : (n_constraints + 2)] = 1
|
|
131
|
+
h_all[(n_constraints + 2) :] = 0
|
|
132
|
+
|
|
133
|
+
dims = {
|
|
134
|
+
"l": G.shape[0], # scalar, dimension of positive orthant
|
|
135
|
+
"q": [G_quad.shape[0]], # vector with dimensions of second order cones
|
|
136
|
+
}
|
|
137
|
+
results = ecos.solve(c, G_all, h_all, dims, verbose=self.verbose, max_iters=self.max_iter or 1000)
|
|
138
|
+
self._check_success(results)
|
|
139
|
+
|
|
140
|
+
# drop solution for t
|
|
141
|
+
x = results["x"][1:]
|
|
142
|
+
n_iter = results["info"]["iter"]
|
|
143
|
+
return x[np.newaxis], n_iter
|
|
144
|
+
|
|
145
|
+
def _check_success(self, results): # pylint: disable=no-self-use
|
|
146
|
+
exit_flag = results["info"]["exitFlag"]
|
|
147
|
+
if exit_flag in (EcosSolver.EXIT_OPTIMAL, EcosSolver.EXIT_OPTIMAL + EcosSolver.EXIT_INACC_OFFSET):
|
|
148
|
+
return
|
|
149
|
+
|
|
150
|
+
if exit_flag == EcosSolver.EXIT_MAXIT:
|
|
151
|
+
warnings.warn(
|
|
152
|
+
"ECOS solver did not converge: maximum iterations reached", category=ConvergenceWarning, stacklevel=3
|
|
153
|
+
)
|
|
154
|
+
elif exit_flag == EcosSolver.EXIT_PINF: # pragma: no cover
|
|
155
|
+
raise RuntimeError("Certificate of primal infeasibility found")
|
|
156
|
+
elif exit_flag == EcosSolver.EXIT_DINF: # pragma: no cover
|
|
157
|
+
raise RuntimeError("Certificate of dual infeasibility found")
|
|
158
|
+
else: # pragma: no cover
|
|
159
|
+
raise RuntimeError(f"Unknown problem in ECOS solver, exit status: {exit_flag}")
|
|
160
|
+
|
|
161
|
+
def _decompose(self, P):
|
|
162
|
+
# from scipy.linalg.pinvh
|
|
163
|
+
s, u = linalg.eigh(P)
|
|
164
|
+
largest_eigenvalue = np.max(np.abs(s))
|
|
165
|
+
|
|
166
|
+
cond = self.cond
|
|
167
|
+
if cond is None:
|
|
168
|
+
t = u.dtype
|
|
169
|
+
cond = largest_eigenvalue * max(P.shape) * np.finfo(t).eps
|
|
170
|
+
|
|
171
|
+
not_below_cutoff = abs(s) > -cond
|
|
172
|
+
assert not_below_cutoff.all(), f"matrix has negative eigenvalues: {s.min()}"
|
|
173
|
+
|
|
174
|
+
above_cutoff = abs(s) > cond
|
|
175
|
+
u = u[:, above_cutoff]
|
|
176
|
+
s = s[above_cutoff]
|
|
177
|
+
|
|
178
|
+
# set maximum eigenvalue to 1
|
|
179
|
+
decomposed = u * np.sqrt(s / largest_eigenvalue)
|
|
180
|
+
return decomposed.T, largest_eigenvalue
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
class MinlipSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
|
|
184
|
+
"""Survival model related to survival SVM, using a minimal Lipschitz smoothness strategy
|
|
185
|
+
instead of a maximal margin strategy.
|
|
186
|
+
|
|
187
|
+
.. math::
|
|
188
|
+
|
|
189
|
+
\\min_{\\mathbf{w}}\\quad
|
|
190
|
+
\\frac{1}{2} \\lVert \\mathbf{w} \\rVert_2^2
|
|
191
|
+
+ \\gamma \\sum_{i = 1}^n \\xi_i \\\\
|
|
192
|
+
\\text{subject to}\\quad
|
|
193
|
+
\\mathbf{w}^\\top \\mathbf{x}_i - \\mathbf{w}^\\top \\mathbf{x}_j \\geq y_i - y_j - \\xi_i,\\quad
|
|
194
|
+
\\forall (i, j) \\in \\mathcal{P}_\\text{1-NN}, \\\\
|
|
195
|
+
\\xi_i \\geq 0,\\quad \\forall i = 1,\\dots,n.
|
|
196
|
+
|
|
197
|
+
\\mathcal{P}_\\text{1-NN} = \\{ (i, j) \\mid y_i > y_j \\land \\delta_j = 1
|
|
198
|
+
\\land \\nexists k : y_i > y_k > y_j \\land \\delta_k = 1 \\}_{i,j=1}^n.
|
|
199
|
+
|
|
200
|
+
See [1]_ for further description.
|
|
201
|
+
|
|
202
|
+
Parameters
|
|
203
|
+
----------
|
|
204
|
+
alpha : float, positive, default: 1
|
|
205
|
+
Weight of penalizing the hinge loss in the objective function.
|
|
206
|
+
|
|
207
|
+
solver : {'ecos', 'osqp'}, optional, default: 'ecos'
|
|
208
|
+
Which quadratic program solver to use.
|
|
209
|
+
|
|
210
|
+
kernel : str or callable, default: 'linear'.
|
|
211
|
+
Kernel mapping used internally. This parameter is directly passed to
|
|
212
|
+
:func:`sklearn.metrics.pairwise.pairwise_kernels`.
|
|
213
|
+
If `kernel` is a string, it must be one of the metrics
|
|
214
|
+
in `sklearn.pairwise.PAIRWISE_KERNEL_FUNCTIONS` or "precomputed".
|
|
215
|
+
If `kernel` is "precomputed", X is assumed to be a kernel matrix.
|
|
216
|
+
Alternatively, if `kernel` is a callable function, it is called on
|
|
217
|
+
each pair of instances (rows) and the resulting value recorded. The
|
|
218
|
+
callable should take two rows from X as input and return the
|
|
219
|
+
corresponding kernel value as a single number. This means that
|
|
220
|
+
callables from :mod:`sklearn.metrics.pairwise` are not allowed, as
|
|
221
|
+
they operate on matrices, not single samples. Use the string
|
|
222
|
+
identifying the kernel instead.
|
|
223
|
+
|
|
224
|
+
gamma : float, optional, default: None
|
|
225
|
+
Gamma parameter for the RBF, laplacian, polynomial, exponential chi2
|
|
226
|
+
and sigmoid kernels. Interpretation of the default value is left to
|
|
227
|
+
the kernel; see the documentation for :mod:`sklearn.metrics.pairwise`.
|
|
228
|
+
Ignored by other kernels.
|
|
229
|
+
|
|
230
|
+
degree : int, default: 3
|
|
231
|
+
Degree of the polynomial kernel. Ignored by other kernels.
|
|
232
|
+
|
|
233
|
+
coef0 : float, optional
|
|
234
|
+
Zero coefficient for polynomial and sigmoid kernels.
|
|
235
|
+
Ignored by other kernels.
|
|
236
|
+
|
|
237
|
+
kernel_params : mapping of string to any, optional
|
|
238
|
+
Additional parameters (keyword arguments) for kernel function passed
|
|
239
|
+
as callable object.
|
|
240
|
+
|
|
241
|
+
pairs : {'all', 'nearest', 'next'}, optional, default: 'nearest'
|
|
242
|
+
Which constraints to use in the optimization problem.
|
|
243
|
+
|
|
244
|
+
- all: Use all comparable pairs. Scales quadratic in number of samples
|
|
245
|
+
(cf. :class:`sksurv.svm.HingeLossSurvivalSVM`).
|
|
246
|
+
- nearest: Only considers comparable pairs :math:`(i, j)` where :math:`j` is the
|
|
247
|
+
uncensored sample with highest survival time smaller than :math:`y_i`.
|
|
248
|
+
Scales linear in number of samples.
|
|
249
|
+
- next: Only compare against direct nearest neighbor according to observed time,
|
|
250
|
+
disregarding its censoring status. Scales linear in number of samples.
|
|
251
|
+
|
|
252
|
+
verbose : bool, default: False
|
|
253
|
+
Enable verbose output of solver.
|
|
254
|
+
|
|
255
|
+
timeit : False, int or None, default: None
|
|
256
|
+
If non-zero value is provided the time it takes for optimization is measured.
|
|
257
|
+
The given number of repetitions are performed. Results can be accessed from the
|
|
258
|
+
``timings_`` attribute.
|
|
259
|
+
|
|
260
|
+
max_iter : int or None, optional, default: None
|
|
261
|
+
Maximum number of iterations to perform. By default
|
|
262
|
+
use solver's default value.
|
|
263
|
+
|
|
264
|
+
Attributes
|
|
265
|
+
----------
|
|
266
|
+
X_fit_ : ndarray
|
|
267
|
+
Training data.
|
|
268
|
+
|
|
269
|
+
coef_ : ndarray, shape = (n_samples,)
|
|
270
|
+
Coefficients of the features in the decision function.
|
|
271
|
+
|
|
272
|
+
n_features_in_ : int
|
|
273
|
+
Number of features seen during ``fit``.
|
|
274
|
+
|
|
275
|
+
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
|
276
|
+
Names of features seen during ``fit``. Defined only when `X`
|
|
277
|
+
has feature names that are all strings.
|
|
278
|
+
|
|
279
|
+
n_iter_ : int
|
|
280
|
+
Number of iterations run by the optimization routine to fit the model.
|
|
281
|
+
|
|
282
|
+
References
|
|
283
|
+
----------
|
|
284
|
+
.. [1] Van Belle, V., Pelckmans, K., Suykens, J. A. K., and Van Huffel, S.
|
|
285
|
+
Learning transformation models for ranking and survival analysis.
|
|
286
|
+
The Journal of Machine Learning Research, 12, 819-862. 2011
|
|
287
|
+
"""
|
|
288
|
+
|
|
289
|
+
_parameter_constraints = {
|
|
290
|
+
"solver": [StrOptions({"ecos", "osqp"})],
|
|
291
|
+
"alpha": [Interval(numbers.Real, 0, None, closed="neither")],
|
|
292
|
+
"kernel": [
|
|
293
|
+
StrOptions(set(PAIRWISE_KERNEL_FUNCTIONS.keys()) | {"precomputed"}),
|
|
294
|
+
callable,
|
|
295
|
+
],
|
|
296
|
+
"degree": [Interval(numbers.Integral, 0, None, closed="left")],
|
|
297
|
+
"gamma": [Interval(numbers.Real, 0.0, None, closed="left"), None],
|
|
298
|
+
"coef0": [Interval(numbers.Real, None, None, closed="neither")],
|
|
299
|
+
"kernel_params": [dict, None],
|
|
300
|
+
"pairs": [StrOptions({"all", "nearest", "next"})],
|
|
301
|
+
"verbose": ["boolean"],
|
|
302
|
+
"timeit": [Interval(numbers.Integral, 1, None, closed="left"), None],
|
|
303
|
+
"max_iter": [Interval(numbers.Integral, 1, None, closed="left"), None],
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
def __init__(
|
|
307
|
+
self,
|
|
308
|
+
alpha=1.0,
|
|
309
|
+
*,
|
|
310
|
+
solver="ecos",
|
|
311
|
+
kernel="linear",
|
|
312
|
+
gamma=None,
|
|
313
|
+
degree=3,
|
|
314
|
+
coef0=1,
|
|
315
|
+
kernel_params=None,
|
|
316
|
+
pairs="nearest",
|
|
317
|
+
verbose=False,
|
|
318
|
+
timeit=None,
|
|
319
|
+
max_iter=None,
|
|
320
|
+
):
|
|
321
|
+
self.solver = solver
|
|
322
|
+
self.alpha = alpha
|
|
323
|
+
self.kernel = kernel
|
|
324
|
+
self.gamma = gamma
|
|
325
|
+
self.degree = degree
|
|
326
|
+
self.coef0 = coef0
|
|
327
|
+
self.kernel_params = kernel_params
|
|
328
|
+
self.pairs = pairs
|
|
329
|
+
self.verbose = verbose
|
|
330
|
+
self.timeit = timeit
|
|
331
|
+
self.max_iter = max_iter
|
|
332
|
+
|
|
333
|
+
def _more_tags(self):
|
|
334
|
+
# tell sklearn.utils.metaestimators._safe_split function that we expect kernel matrix
|
|
335
|
+
return {"pairwise": self.kernel == "precomputed"}
|
|
336
|
+
|
|
337
|
+
def _get_kernel(self, X, Y=None):
|
|
338
|
+
if callable(self.kernel):
|
|
339
|
+
params = self.kernel_params or {}
|
|
340
|
+
else:
|
|
341
|
+
params = {"gamma": self.gamma, "degree": self.degree, "coef0": self.coef0}
|
|
342
|
+
return pairwise_kernels(X, Y, metric=self.kernel, filter_params=True, **params)
|
|
343
|
+
|
|
344
|
+
def _setup_qp(self, K, D, time):
|
|
345
|
+
n_pairs = D.shape[0]
|
|
346
|
+
P = D.dot(D.dot(K).T).T
|
|
347
|
+
q = -D.dot(time)
|
|
348
|
+
|
|
349
|
+
Dt = D.T.astype(P.dtype) # cast constraints to correct type
|
|
350
|
+
G = sparse.vstack(
|
|
351
|
+
(
|
|
352
|
+
Dt, # upper bound
|
|
353
|
+
-Dt, # lower bound
|
|
354
|
+
-sparse.eye(n_pairs, dtype=P.dtype), # lower bound >= 0
|
|
355
|
+
),
|
|
356
|
+
format="csc",
|
|
357
|
+
)
|
|
358
|
+
n_constraints = Dt.shape[0]
|
|
359
|
+
h = np.empty(G.shape[0], dtype=float)
|
|
360
|
+
h[: 2 * n_constraints] = self.alpha
|
|
361
|
+
h[-n_pairs:] = 0.0
|
|
362
|
+
|
|
363
|
+
return {"P": P, "q": q, "G": G, "h": h}
|
|
364
|
+
|
|
365
|
+
def _fit(self, x, event, time):
|
|
366
|
+
D = create_difference_matrix(event.astype(np.uint8), time, kind=self.pairs)
|
|
367
|
+
if D.shape[0] == 0:
|
|
368
|
+
raise NoComparablePairException("Data has no comparable pairs, cannot fit model.")
|
|
369
|
+
|
|
370
|
+
max_iter = self.max_iter
|
|
371
|
+
if self.solver == "ecos":
|
|
372
|
+
solver = EcosSolver(max_iter=max_iter, verbose=self.verbose)
|
|
373
|
+
elif self.solver == "osqp":
|
|
374
|
+
solver = OsqpSolver(max_iter=max_iter, verbose=self.verbose)
|
|
375
|
+
|
|
376
|
+
K = self._get_kernel(x)
|
|
377
|
+
problem_data = self._setup_qp(K, D, time)
|
|
378
|
+
|
|
379
|
+
if self.timeit is not None:
|
|
380
|
+
import timeit
|
|
381
|
+
|
|
382
|
+
def _inner():
|
|
383
|
+
return solver.solve(**problem_data)
|
|
384
|
+
|
|
385
|
+
timer = timeit.Timer(_inner)
|
|
386
|
+
self.timings_ = timer.repeat(self.timeit, number=1)
|
|
387
|
+
|
|
388
|
+
coef, n_iter = solver.solve(**problem_data)
|
|
389
|
+
self._update_coef(coef, D)
|
|
390
|
+
self.n_iter_ = n_iter
|
|
391
|
+
self.X_fit_ = x
|
|
392
|
+
|
|
393
|
+
def _update_coef(self, coef, D):
|
|
394
|
+
self.coef_ = coef * D
|
|
395
|
+
|
|
396
|
+
def fit(self, X, y):
|
|
397
|
+
"""Build a MINLIP survival model from training data.
|
|
398
|
+
|
|
399
|
+
Parameters
|
|
400
|
+
----------
|
|
401
|
+
X : array-like, shape = (n_samples, n_features)
|
|
402
|
+
Data matrix.
|
|
403
|
+
|
|
404
|
+
y : structured array, shape = (n_samples,)
|
|
405
|
+
A structured array containing the binary event indicator
|
|
406
|
+
as first field, and time of event or time of censoring as
|
|
407
|
+
second field.
|
|
408
|
+
|
|
409
|
+
Returns
|
|
410
|
+
-------
|
|
411
|
+
self
|
|
412
|
+
"""
|
|
413
|
+
self._validate_params()
|
|
414
|
+
X = self._validate_data(X, ensure_min_samples=2)
|
|
415
|
+
event, time = check_array_survival(X, y)
|
|
416
|
+
self._fit(X, event, time)
|
|
417
|
+
|
|
418
|
+
return self
|
|
419
|
+
|
|
420
|
+
def predict(self, X):
|
|
421
|
+
"""Predict risk score of experiencing an event.
|
|
422
|
+
|
|
423
|
+
Higher scores indicate shorter survival (high risk),
|
|
424
|
+
lower scores longer survival (low risk).
|
|
425
|
+
|
|
426
|
+
Parameters
|
|
427
|
+
----------
|
|
428
|
+
X : array-like, shape = (n_samples, n_features)
|
|
429
|
+
The input samples.
|
|
430
|
+
|
|
431
|
+
Returns
|
|
432
|
+
-------
|
|
433
|
+
y : ndarray, shape = (n_samples,)
|
|
434
|
+
Predicted risk.
|
|
435
|
+
"""
|
|
436
|
+
X = self._validate_data(X, reset=False)
|
|
437
|
+
K = self._get_kernel(X, self.X_fit_)
|
|
438
|
+
pred = -np.dot(self.coef_, K.T)
|
|
439
|
+
return pred.ravel()
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
class HingeLossSurvivalSVM(MinlipSurvivalAnalysis):
|
|
443
|
+
"""Naive implementation of kernel survival support vector machine.
|
|
444
|
+
|
|
445
|
+
A new set of samples is created by building the difference between any two feature
|
|
446
|
+
vectors in the original data, thus this version requires :math:`O(\\text{n_samples}^4)` space and
|
|
447
|
+
:math:`O(\\text{n_samples}^6 \\cdot \\text{n_features})` time.
|
|
448
|
+
|
|
449
|
+
See :class:`sksurv.svm.NaiveSurvivalSVM` for the linear naive survival SVM based on liblinear.
|
|
450
|
+
|
|
451
|
+
.. math::
|
|
452
|
+
|
|
453
|
+
\\min_{\\mathbf{w}}\\quad
|
|
454
|
+
\\frac{1}{2} \\lVert \\mathbf{w} \\rVert_2^2
|
|
455
|
+
+ \\gamma \\sum_{i = 1}^n \\xi_i \\\\
|
|
456
|
+
\\text{subject to}\\quad
|
|
457
|
+
\\mathbf{w}^\\top \\phi(\\mathbf{x})_i - \\mathbf{w}^\\top \\phi(\\mathbf{x})_j \\geq 1 - \\xi_{ij},\\quad
|
|
458
|
+
\\forall (i, j) \\in \\mathcal{P}, \\\\
|
|
459
|
+
\\xi_i \\geq 0,\\quad \\forall (i, j) \\in \\mathcal{P}.
|
|
460
|
+
|
|
461
|
+
\\mathcal{P} = \\{ (i, j) \\mid y_i > y_j \\land \\delta_j = 1 \\}_{i,j=1,\\dots,n}.
|
|
462
|
+
|
|
463
|
+
See [1]_, [2]_, [3]_ for further description.
|
|
464
|
+
|
|
465
|
+
Parameters
|
|
466
|
+
----------
|
|
467
|
+
alpha : float, positive, default: 1
|
|
468
|
+
Weight of penalizing the hinge loss in the objective function.
|
|
469
|
+
|
|
470
|
+
solver : {'ecos', 'osqp'}, optional, default: 'ecos'
|
|
471
|
+
Which quadratic program solver to use.
|
|
472
|
+
|
|
473
|
+
kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'cosine', 'precomputed'} or callable, default: 'linear'.
|
|
474
|
+
Kernel mapping used internally. This parameter is directly passed to
|
|
475
|
+
:func:`sklearn.metrics.pairwise.pairwise_kernels`.
|
|
476
|
+
If `kernel` is a string, it must be one of the metrics
|
|
477
|
+
in `sklearn.pairwise.PAIRWISE_KERNEL_FUNCTIONS` or "precomputed".
|
|
478
|
+
If `kernel` is "precomputed", X is assumed to be a kernel matrix.
|
|
479
|
+
Alternatively, if `kernel` is a callable function, it is called on
|
|
480
|
+
each pair of instances (rows) and the resulting value recorded. The
|
|
481
|
+
callable should take two rows from X as input and return the
|
|
482
|
+
corresponding kernel value as a single number. This means that
|
|
483
|
+
callables from :mod:`sklearn.metrics.pairwise` are not allowed, as
|
|
484
|
+
they operate on matrices, not single samples. Use the string
|
|
485
|
+
identifying the kernel instead.
|
|
486
|
+
|
|
487
|
+
gamma : float, optional, default: None
|
|
488
|
+
Gamma parameter for the RBF, laplacian, polynomial, exponential chi2
|
|
489
|
+
and sigmoid kernels. Interpretation of the default value is left to
|
|
490
|
+
the kernel; see the documentation for :mod:`sklearn.metrics.pairwise`.
|
|
491
|
+
Ignored by other kernels.
|
|
492
|
+
|
|
493
|
+
degree : int, default: 3
|
|
494
|
+
Degree of the polynomial kernel. Ignored by other kernels.
|
|
495
|
+
|
|
496
|
+
coef0 : float, optional
|
|
497
|
+
Zero coefficient for polynomial and sigmoid kernels.
|
|
498
|
+
Ignored by other kernels.
|
|
499
|
+
|
|
500
|
+
kernel_params : mapping of string to any, optional
|
|
501
|
+
Additional parameters (keyword arguments) for kernel function passed
|
|
502
|
+
as callable object.
|
|
503
|
+
|
|
504
|
+
pairs : {'all', 'nearest', 'next'}, optional, default: 'all'
|
|
505
|
+
Which constraints to use in the optimization problem.
|
|
506
|
+
|
|
507
|
+
- all: Use all comparable pairs. Scales quadratic in number of samples.
|
|
508
|
+
- nearest: Only considers comparable pairs :math:`(i, j)` where :math:`j` is the
|
|
509
|
+
uncensored sample with highest survival time smaller than :math:`y_i`.
|
|
510
|
+
Scales linear in number of samples (cf. :class:`sksurv.svm.MinlipSurvivalAnalysis`).
|
|
511
|
+
- next: Only compare against direct nearest neighbor according to observed time,
|
|
512
|
+
disregarding its censoring status. Scales linear in number of samples.
|
|
513
|
+
|
|
514
|
+
verbose : bool, default: False
|
|
515
|
+
Enable verbose output of solver.
|
|
516
|
+
|
|
517
|
+
timeit : False, int or None, default: None
|
|
518
|
+
If non-zero value is provided the time it takes for optimization is measured.
|
|
519
|
+
The given number of repetitions are performed. Results can be accessed from the
|
|
520
|
+
``timings_`` attribute.
|
|
521
|
+
|
|
522
|
+
max_iter : int or None, optional, default: None
|
|
523
|
+
Maximum number of iterations to perform. By default
|
|
524
|
+
use solver's default value.
|
|
525
|
+
|
|
526
|
+
Attributes
|
|
527
|
+
----------
|
|
528
|
+
X_fit_ : ndarray
|
|
529
|
+
Training data.
|
|
530
|
+
|
|
531
|
+
coef_ : ndarray, shape = (n_samples,)
|
|
532
|
+
Coefficients of the features in the decision function.
|
|
533
|
+
|
|
534
|
+
n_features_in_ : int
|
|
535
|
+
Number of features seen during ``fit``.
|
|
536
|
+
|
|
537
|
+
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
|
538
|
+
Names of features seen during ``fit``. Defined only when `X`
|
|
539
|
+
has feature names that are all strings.
|
|
540
|
+
|
|
541
|
+
n_iter_ : int
|
|
542
|
+
Number of iterations run by the optimization routine to fit the model.
|
|
543
|
+
|
|
544
|
+
References
|
|
545
|
+
----------
|
|
546
|
+
.. [1] Van Belle, V., Pelckmans, K., Suykens, J. A., & Van Huffel, S.
|
|
547
|
+
Support Vector Machines for Survival Analysis. In Proc. of the 3rd Int. Conf.
|
|
548
|
+
on Computational Intelligence in Medicine and Healthcare (CIMED). 1-8. 2007
|
|
549
|
+
|
|
550
|
+
.. [2] Evers, L., Messow, C.M.,
|
|
551
|
+
"Sparse kernel methods for high-dimensional survival data",
|
|
552
|
+
Bioinformatics 24(14), 1632-8, 2008.
|
|
553
|
+
|
|
554
|
+
.. [3] Van Belle, V., Pelckmans, K., Suykens, J.A., Van Huffel, S.,
|
|
555
|
+
"Survival SVM: a practical scalable algorithm",
|
|
556
|
+
In: Proc. of 16th European Symposium on Artificial Neural Networks,
|
|
557
|
+
89-94, 2008.
|
|
558
|
+
"""
|
|
559
|
+
|
|
560
|
+
_parameter_constraints = MinlipSurvivalAnalysis._parameter_constraints
|
|
561
|
+
|
|
562
|
+
def __init__(
|
|
563
|
+
self,
|
|
564
|
+
alpha=1.0,
|
|
565
|
+
*,
|
|
566
|
+
solver="ecos",
|
|
567
|
+
kernel="linear",
|
|
568
|
+
gamma=None,
|
|
569
|
+
degree=3,
|
|
570
|
+
coef0=1,
|
|
571
|
+
kernel_params=None,
|
|
572
|
+
pairs="all",
|
|
573
|
+
verbose=False,
|
|
574
|
+
timeit=None,
|
|
575
|
+
max_iter=None,
|
|
576
|
+
):
|
|
577
|
+
super().__init__(
|
|
578
|
+
solver=solver,
|
|
579
|
+
alpha=alpha,
|
|
580
|
+
kernel=kernel,
|
|
581
|
+
gamma=gamma,
|
|
582
|
+
degree=degree,
|
|
583
|
+
coef0=coef0,
|
|
584
|
+
kernel_params=kernel_params,
|
|
585
|
+
pairs=pairs,
|
|
586
|
+
verbose=verbose,
|
|
587
|
+
timeit=timeit,
|
|
588
|
+
max_iter=max_iter,
|
|
589
|
+
)
|
|
590
|
+
|
|
591
|
+
def _setup_qp(self, K, D, time):
|
|
592
|
+
n_pairs = D.shape[0]
|
|
593
|
+
|
|
594
|
+
P = D.dot(D.dot(K).T).T
|
|
595
|
+
q = -np.ones(n_pairs)
|
|
596
|
+
|
|
597
|
+
G = sparse.vstack((-sparse.eye(n_pairs), sparse.eye(n_pairs)), format="csc")
|
|
598
|
+
h = np.empty(2 * n_pairs)
|
|
599
|
+
h[:n_pairs] = 0
|
|
600
|
+
h[n_pairs:] = self.alpha
|
|
601
|
+
|
|
602
|
+
return {"P": P, "q": q, "G": G, "h": h}
|
|
603
|
+
|
|
604
|
+
def _update_coef(self, coef, D):
|
|
605
|
+
sv = np.flatnonzero(coef > 1e-5)
|
|
606
|
+
self.coef_ = coef[:, sv] * D[sv, :]
|