scikit-survival 0.25.0__cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. scikit_survival-0.25.0.dist-info/METADATA +185 -0
  2. scikit_survival-0.25.0.dist-info/RECORD +58 -0
  3. scikit_survival-0.25.0.dist-info/WHEEL +6 -0
  4. scikit_survival-0.25.0.dist-info/licenses/COPYING +674 -0
  5. scikit_survival-0.25.0.dist-info/top_level.txt +1 -0
  6. sksurv/__init__.py +183 -0
  7. sksurv/base.py +115 -0
  8. sksurv/bintrees/__init__.py +15 -0
  9. sksurv/bintrees/_binarytrees.cpython-311-x86_64-linux-gnu.so +0 -0
  10. sksurv/column.py +205 -0
  11. sksurv/compare.py +123 -0
  12. sksurv/datasets/__init__.py +12 -0
  13. sksurv/datasets/base.py +614 -0
  14. sksurv/datasets/data/GBSG2.arff +700 -0
  15. sksurv/datasets/data/actg320.arff +1169 -0
  16. sksurv/datasets/data/bmt.arff +46 -0
  17. sksurv/datasets/data/breast_cancer_GSE7390-metastasis.arff +283 -0
  18. sksurv/datasets/data/cgvhd.arff +118 -0
  19. sksurv/datasets/data/flchain.arff +7887 -0
  20. sksurv/datasets/data/veteran.arff +148 -0
  21. sksurv/datasets/data/whas500.arff +520 -0
  22. sksurv/docstrings.py +99 -0
  23. sksurv/ensemble/__init__.py +2 -0
  24. sksurv/ensemble/_coxph_loss.cpython-311-x86_64-linux-gnu.so +0 -0
  25. sksurv/ensemble/boosting.py +1564 -0
  26. sksurv/ensemble/forest.py +902 -0
  27. sksurv/ensemble/survival_loss.py +151 -0
  28. sksurv/exceptions.py +18 -0
  29. sksurv/functions.py +114 -0
  30. sksurv/io/__init__.py +2 -0
  31. sksurv/io/arffread.py +89 -0
  32. sksurv/io/arffwrite.py +181 -0
  33. sksurv/kernels/__init__.py +1 -0
  34. sksurv/kernels/_clinical_kernel.cpython-311-x86_64-linux-gnu.so +0 -0
  35. sksurv/kernels/clinical.py +348 -0
  36. sksurv/linear_model/__init__.py +3 -0
  37. sksurv/linear_model/_coxnet.cpython-311-x86_64-linux-gnu.so +0 -0
  38. sksurv/linear_model/aft.py +208 -0
  39. sksurv/linear_model/coxnet.py +592 -0
  40. sksurv/linear_model/coxph.py +637 -0
  41. sksurv/meta/__init__.py +4 -0
  42. sksurv/meta/base.py +35 -0
  43. sksurv/meta/ensemble_selection.py +724 -0
  44. sksurv/meta/stacking.py +370 -0
  45. sksurv/metrics.py +1028 -0
  46. sksurv/nonparametric.py +911 -0
  47. sksurv/preprocessing.py +183 -0
  48. sksurv/svm/__init__.py +11 -0
  49. sksurv/svm/_minlip.cpython-311-x86_64-linux-gnu.so +0 -0
  50. sksurv/svm/_prsvm.cpython-311-x86_64-linux-gnu.so +0 -0
  51. sksurv/svm/minlip.py +690 -0
  52. sksurv/svm/naive_survival_svm.py +249 -0
  53. sksurv/svm/survival_svm.py +1236 -0
  54. sksurv/testing.py +108 -0
  55. sksurv/tree/__init__.py +1 -0
  56. sksurv/tree/_criterion.cpython-311-x86_64-linux-gnu.so +0 -0
  57. sksurv/tree/tree.py +790 -0
  58. sksurv/util.py +415 -0
sksurv/svm/minlip.py ADDED
@@ -0,0 +1,690 @@
1
+ from abc import ABCMeta, abstractmethod
2
+ import numbers
3
+ import warnings
4
+
5
+ import numpy as np
6
+ from scipy import linalg, sparse
7
+ from sklearn.base import BaseEstimator
8
+ from sklearn.exceptions import ConvergenceWarning
9
+ from sklearn.metrics.pairwise import PAIRWISE_KERNEL_FUNCTIONS, pairwise_kernels
10
+ from sklearn.utils._param_validation import Interval, StrOptions
11
+ from sklearn.utils.validation import validate_data
12
+
13
+ from ..base import SurvivalAnalysisMixin
14
+ from ..exceptions import NoComparablePairException
15
+ from ..util import check_array_survival
16
+ from ._minlip import create_difference_matrix
17
+
18
+ __all__ = ["MinlipSurvivalAnalysis", "HingeLossSurvivalSVM"]
19
+
20
+
21
+ class QPSolver(metaclass=ABCMeta):
22
+ r"""Abstract base class for quadratic program solvers.
23
+
24
+ This class defines the interface for solvers that minimize a quadratic
25
+ objective function subject to linear inequality constraints,
26
+ formulated as:
27
+
28
+ .. math::
29
+
30
+ \min_{x} \quad (1/2)x^T P x + q^T x \\
31
+ \text{subject to} \quad G x \preceq h
32
+
33
+ Parameters
34
+ ----------
35
+ max_iter : int or None
36
+ Maximum number of iterations to perform.
37
+ verbose : bool
38
+ Enable verbose output of the solver.
39
+ """
40
+
41
+ @abstractmethod
42
+ def __init__(self, max_iter, verbose):
43
+ self.max_iter = max_iter
44
+ self.verbose = verbose
45
+
46
+ @abstractmethod
47
+ def solve(self, P, q, G, h):
48
+ """Find solution to QP.
49
+
50
+ Parameters
51
+ ----------
52
+ P : array-like, shape=(n_variables, n_variables)
53
+ Quadratic part of the objective function.
54
+ q : array-like, shape=(n_variables,)
55
+ Linear part of the objective function.
56
+ G : array-like, shape=(n_constraints, n_variables)
57
+ Matrix for inequality constraints.
58
+ h : array-like, shape=(n_constraints,)
59
+ Vector for inequality constraints.
60
+
61
+ Returns
62
+ -------
63
+ x : ndarray, shape=(n_variables,)
64
+ The optimal solution.
65
+ n_iter : int
66
+ Number of iterations performed by the solver.
67
+ """
68
+
69
+
70
+ class OsqpSolver(QPSolver):
71
+ def __init__(self, max_iter, verbose):
72
+ super().__init__(
73
+ max_iter=max_iter,
74
+ verbose=verbose,
75
+ )
76
+
77
+ def solve(self, P, q, G, h):
78
+ import osqp
79
+
80
+ P = sparse.csc_matrix(P)
81
+
82
+ solver_opts = self._get_options()
83
+ m = osqp.OSQP()
84
+ m.setup(P=sparse.csc_matrix(P), q=q, A=G, u=h, **solver_opts) # noqa: E741
85
+ results = m.solve()
86
+
87
+ if results.info.status_val == -2: # max iter reached
88
+ warnings.warn(
89
+ (f"OSQP solver did not converge: {results.info.status}"),
90
+ category=ConvergenceWarning,
91
+ stacklevel=2,
92
+ )
93
+ elif results.info.status_val not in (1, 2): # pragma: no cover
94
+ # non of solved, solved inaccurate
95
+ raise RuntimeError(f"OSQP solver failed: {results.info.status}")
96
+
97
+ n_iter = results.info.iter
98
+ return results.x[np.newaxis], n_iter
99
+
100
+ def _get_options(self):
101
+ """Returns a dictionary of OSQP solver options."""
102
+ solver_opts = {
103
+ "eps_abs": 1e-5,
104
+ "eps_rel": 1e-5,
105
+ "max_iter": self.max_iter or 4000,
106
+ "polish": True,
107
+ "verbose": self.verbose,
108
+ }
109
+ return solver_opts
110
+
111
+
112
+ class EcosSolver(QPSolver):
113
+ r"""Solves QP by expressing it as second-order cone program:
114
+
115
+ .. math::
116
+
117
+ \min \quad c^T x \\
118
+ \text{subject to} \quad G x \preceq_K h
119
+
120
+ where the last inequality is generalized, i.e. :math:`h - G x`
121
+ belongs to the cone :math:`K`.
122
+
123
+ Parameters
124
+ ----------
125
+ max_iter : int or None
126
+ Maximum number of iterations to perform.
127
+ verbose : bool
128
+ Enable verbose output of the solver.
129
+ cond : float or None, default: None
130
+ Condition number for eigenvalue decomposition.
131
+ """
132
+
133
+ EXIT_OPTIMAL = 0 # Optimal solution found
134
+ EXIT_PINF = 1 # Certificate of primal infeasibility found
135
+ EXIT_DINF = 2 # Certificate of dual infeasibility found
136
+ EXIT_MAXIT = -1 # Maximum number of iterations reached
137
+ EXIT_NUMERICS = -2 # Numerical problems (unreliable search direction)
138
+ EXIT_OUTCONE = -3 # Numerical problems (slacks or multipliers outside cone)
139
+ EXIT_INACC_OFFSET = 10
140
+
141
+ def __init__(self, max_iter, verbose, cond=None):
142
+ super().__init__(
143
+ max_iter=max_iter,
144
+ verbose=verbose,
145
+ )
146
+ self.cond = cond
147
+
148
+ def solve(self, P, q, G, h):
149
+ import ecos
150
+
151
+ n_pairs = P.shape[0]
152
+ L, max_eigval = self._decompose(P)
153
+
154
+ # minimize wrt t,x
155
+ c = np.empty(n_pairs + 1)
156
+ c[1:] = q
157
+ c[0] = 0.5 * max_eigval
158
+
159
+ zerorow = np.zeros((1, L.shape[1]))
160
+ G_quad = np.block(
161
+ [
162
+ [-1, zerorow],
163
+ [1, zerorow],
164
+ [np.zeros((L.shape[0], 1)), -2 * L],
165
+ ]
166
+ )
167
+ G_lin = sparse.hstack((sparse.csc_matrix((G.shape[0], 1)), G))
168
+ G_all = sparse.vstack((G_lin, sparse.csc_matrix(G_quad)), format="csc")
169
+
170
+ n_constraints = G.shape[0]
171
+ h_all = np.empty(G_all.shape[0])
172
+ h_all[:n_constraints] = h
173
+ h_all[n_constraints : (n_constraints + 2)] = 1
174
+ h_all[(n_constraints + 2) :] = 0
175
+
176
+ dims = {
177
+ "l": G.shape[0], # scalar, dimension of positive orthant
178
+ "q": [G_quad.shape[0]], # vector with dimensions of second order cones
179
+ }
180
+ results = ecos.solve(c, G_all, h_all, dims, verbose=self.verbose, max_iters=self.max_iter or 1000)
181
+ self._check_success(results)
182
+
183
+ # drop solution for t
184
+ x = results["x"][1:]
185
+ n_iter = results["info"]["iter"]
186
+ return x[np.newaxis], n_iter
187
+
188
+ def _check_success(self, results): # pylint: disable=no-self-use
189
+ """Checks if the ECOS solver converged successfully.
190
+
191
+ Parameters
192
+ ----------
193
+ results : dict
194
+ The results dictionary returned by ``ecos.solve``.
195
+
196
+ Raises
197
+ -------
198
+ RuntimeError
199
+ If the solver failed for an unknown reason or found primal/dual infeasibility.
200
+ """
201
+ exit_flag = results["info"]["exitFlag"]
202
+ if exit_flag in (EcosSolver.EXIT_OPTIMAL, EcosSolver.EXIT_OPTIMAL + EcosSolver.EXIT_INACC_OFFSET):
203
+ return
204
+
205
+ if exit_flag == EcosSolver.EXIT_MAXIT:
206
+ warnings.warn(
207
+ "ECOS solver did not converge: maximum iterations reached", category=ConvergenceWarning, stacklevel=3
208
+ )
209
+ elif exit_flag == EcosSolver.EXIT_PINF: # pragma: no cover
210
+ raise RuntimeError("Certificate of primal infeasibility found")
211
+ elif exit_flag == EcosSolver.EXIT_DINF: # pragma: no cover
212
+ raise RuntimeError("Certificate of dual infeasibility found")
213
+ else: # pragma: no cover
214
+ raise RuntimeError(f"Unknown problem in ECOS solver, exit status: {exit_flag}")
215
+
216
+ def _decompose(self, P):
217
+ """Performs eigenvalue decomposition of P.
218
+
219
+ Parameters
220
+ ----------
221
+ P : array-like, shape=(n_variables, n_variables)
222
+ Quadratic part of the objective function.
223
+
224
+ Returns
225
+ -------
226
+ decomposed : ndarray
227
+ Decomposed matrix.
228
+ largest_eigenvalue : float
229
+ The largest eigenvalue of P.
230
+ """
231
+ # from scipy.linalg.pinvh
232
+ s, u = linalg.eigh(P)
233
+ largest_eigenvalue = np.max(np.abs(s))
234
+
235
+ cond = self.cond
236
+ if cond is None:
237
+ t = u.dtype
238
+ cond = largest_eigenvalue * max(P.shape) * np.finfo(t).eps
239
+
240
+ not_below_cutoff = abs(s) > -cond
241
+ assert not_below_cutoff.all(), f"matrix has negative eigenvalues: {s.min()}"
242
+
243
+ above_cutoff = abs(s) > cond
244
+ u = u[:, above_cutoff]
245
+ s = s[above_cutoff]
246
+
247
+ # set maximum eigenvalue to 1
248
+ decomposed = u * np.sqrt(s / largest_eigenvalue)
249
+ return decomposed.T, largest_eigenvalue
250
+
251
+
252
+ class MinlipSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
253
+ r"""Survival model based on a minimal Lipschitz smoothness strategy.
254
+
255
+ This model is related to :class:`sksurv.svm.FastKernelSurvivalSVM` but
256
+ minimizes a different objective function, focusing on Lipschitz
257
+ smoothness rather than maximal margin. The optimization problem is
258
+ formulated as:
259
+
260
+ .. math::
261
+
262
+ \min_{\mathbf{w}}\quad
263
+ \frac{1}{2} \lVert \mathbf{w} \rVert_2^2
264
+ + \gamma \sum_{i = 1}^n \xi_i \\
265
+ \text{subject to}\quad
266
+ \mathbf{w}^\top \mathbf{x}_i - \mathbf{w}^\top \mathbf{x}_j \geq y_i - y_j - \xi_i,\quad
267
+ \forall (i, j) \in \mathcal{P}_\text{1-NN}, \\
268
+ \xi_i \geq 0,\quad \forall i = 1,\dots,n.
269
+
270
+ \mathcal{P}_\text{1-NN} = \{ (i, j) \mid y_i > y_j \land \delta_j = 1
271
+ \land \nexists k : y_i > y_k > y_j \land \delta_k = 1 \}_{i,j=1}^n.
272
+
273
+ See [1]_ for further description.
274
+
275
+ Parameters
276
+ ----------
277
+ alpha : float, optional, default: 1
278
+ Weight of penalizing the hinge loss in the objective function.
279
+ Must be greater than 0.
280
+
281
+ solver : {'ecos', 'osqp'}, optional, default: 'ecos'
282
+ Which quadratic program solver to use.
283
+
284
+ kernel : str or callable, optional, default: 'linear'.
285
+ Kernel mapping used internally. This parameter is directly passed to
286
+ :func:`sklearn.metrics.pairwise.pairwise_kernels`.
287
+ If `kernel` is a string, it must be one of the metrics
288
+ in `sklearn.pairwise.PAIRWISE_KERNEL_FUNCTIONS` or "precomputed".
289
+ If `kernel` is "precomputed", X is assumed to be a kernel matrix.
290
+ Alternatively, if `kernel` is a callable function, it is called on
291
+ each pair of instances (rows) and the resulting value recorded. The
292
+ callable should take two rows from X as input and return the
293
+ corresponding kernel value as a single number. This means that
294
+ callables from :mod:`sklearn.metrics.pairwise` are not allowed, as
295
+ they operate on matrices, not single samples. Use the string
296
+ identifying the kernel instead.
297
+
298
+ gamma : float, optional, default: None
299
+ Gamma parameter for the RBF, laplacian, polynomial, exponential chi2
300
+ and sigmoid kernels. Interpretation of the default value is left to
301
+ the kernel; see the documentation for :mod:`sklearn.metrics.pairwise`.
302
+ Ignored by other kernels.
303
+
304
+ degree : int, optional, default: 3
305
+ Degree of the polynomial kernel. Ignored by other kernels.
306
+
307
+ coef0 : float, optional, default: 1
308
+ Zero coefficient for polynomial and sigmoid kernels.
309
+ Ignored by other kernels.
310
+
311
+ kernel_params : dict, optional, default: None
312
+ Additional parameters (keyword arguments) for kernel function passed
313
+ as callable object.
314
+
315
+ pairs : {'all', 'nearest', 'next'}, optional, default: 'nearest'
316
+ Which constraints to use in the optimization problem.
317
+
318
+ - all: Use all comparable pairs. Scales quadratically in number of samples
319
+ (cf. :class:`sksurv.svm.HingeLossSurvivalSVM`).
320
+ - nearest: Only considers comparable pairs :math:`(i, j)` where :math:`j` is the
321
+ uncensored sample with highest survival time smaller than :math:`y_i`.
322
+ Scales linearly in number of samples.
323
+ - next: Only compare against direct nearest neighbor according to observed time,
324
+ disregarding its censoring status. Scales linearly in number of samples.
325
+
326
+ verbose : bool, optional, default: False
327
+ Enable verbose output of solver.
328
+
329
+ timeit : bool, int, or None, optional, default: False
330
+ If ``True`` or a non-zero integer, the time taken for optimization is measured.
331
+ If an integer is provided, the optimization is repeated that many times.
332
+ Results can be accessed from the ``timings_`` attribute.
333
+
334
+ max_iter : int or None, optional, default: None
335
+ The maximum number of iterations taken for the solvers to converge.
336
+ If ``None``, use solver's default value.
337
+
338
+ Attributes
339
+ ----------
340
+ X_fit_ : ndarray, shape = (n_samples, `n_features_in_`)
341
+ Training data.
342
+
343
+ coef_ : ndarray, shape = (n_samples,), dtype = float
344
+ Coefficients of the features in the decision function.
345
+
346
+ n_features_in_ : int
347
+ Number of features seen during ``fit``.
348
+
349
+ feature_names_in_ : ndarray, shape = (`n_features_in_`,)
350
+ Names of features seen during ``fit``. Defined only when `X`
351
+ has feature names that are all strings.
352
+
353
+ n_iter_ : int
354
+ Number of iterations run by the optimization routine to fit the model.
355
+
356
+ References
357
+ ----------
358
+ .. [1] Van Belle, V., Pelckmans, K., Suykens, J. A. K., and Van Huffel, S.
359
+ Learning transformation models for ranking and survival analysis.
360
+ The Journal of Machine Learning Research, 12, 819-862. 2011
361
+ """
362
+
363
+ _parameter_constraints = {
364
+ "solver": [StrOptions({"ecos", "osqp"})],
365
+ "alpha": [Interval(numbers.Real, 0, None, closed="neither")],
366
+ "kernel": [
367
+ StrOptions(set(PAIRWISE_KERNEL_FUNCTIONS.keys()) | {"precomputed"}),
368
+ callable,
369
+ ],
370
+ "degree": [Interval(numbers.Integral, 0, None, closed="left")],
371
+ "gamma": [Interval(numbers.Real, 0.0, None, closed="left"), None],
372
+ "coef0": [Interval(numbers.Real, None, None, closed="neither")],
373
+ "kernel_params": [dict, None],
374
+ "pairs": [StrOptions({"all", "nearest", "next"})],
375
+ "verbose": ["boolean"],
376
+ "timeit": [Interval(numbers.Integral, 1, None, closed="left"), None],
377
+ "max_iter": [Interval(numbers.Integral, 1, None, closed="left"), None],
378
+ }
379
+
380
+ def __init__(
381
+ self,
382
+ alpha=1.0,
383
+ *,
384
+ solver="ecos",
385
+ kernel="linear",
386
+ gamma=None,
387
+ degree=3,
388
+ coef0=1,
389
+ kernel_params=None,
390
+ pairs="nearest",
391
+ verbose=False,
392
+ timeit=None,
393
+ max_iter=None,
394
+ ):
395
+ self.solver = solver
396
+ self.alpha = alpha
397
+ self.kernel = kernel
398
+ self.gamma = gamma
399
+ self.degree = degree
400
+ self.coef0 = coef0
401
+ self.kernel_params = kernel_params
402
+ self.pairs = pairs
403
+ self.verbose = verbose
404
+ self.timeit = timeit
405
+ self.max_iter = max_iter
406
+
407
+ def __sklearn_tags__(self):
408
+ # tell sklearn.utils.metaestimators._safe_split function that we expect kernel matrix
409
+ tags = super().__sklearn_tags__()
410
+ tags.input_tags.pairwise = self.kernel == "precomputed"
411
+ return tags
412
+
413
+ def _get_kernel(self, X, Y=None):
414
+ if callable(self.kernel):
415
+ params = self.kernel_params or {}
416
+ else:
417
+ params = {"gamma": self.gamma, "degree": self.degree, "coef0": self.coef0}
418
+ return pairwise_kernels(X, Y, metric=self.kernel, filter_params=True, **params)
419
+
420
+ def _setup_qp(self, K, D, time):
421
+ n_pairs = D.shape[0]
422
+ P = D.dot(D.dot(K).T).T
423
+ q = -D.dot(time)
424
+
425
+ Dt = D.T.astype(P.dtype) # cast constraints to correct type
426
+ G = sparse.vstack(
427
+ (
428
+ Dt, # upper bound
429
+ -Dt, # lower bound
430
+ -sparse.eye(n_pairs, dtype=P.dtype), # lower bound >= 0
431
+ ),
432
+ format="csc",
433
+ )
434
+ n_constraints = Dt.shape[0]
435
+ h = np.empty(G.shape[0], dtype=float)
436
+ h[: 2 * n_constraints] = self.alpha
437
+ h[-n_pairs:] = 0.0
438
+
439
+ return {"P": P, "q": q, "G": G, "h": h}
440
+
441
+ def _fit(self, x, event, time):
442
+ D = create_difference_matrix(event.astype(np.uint8), time, kind=self.pairs)
443
+ if D.shape[0] == 0:
444
+ raise NoComparablePairException("Data has no comparable pairs, cannot fit model.")
445
+
446
+ max_iter = self.max_iter
447
+ if self.solver == "ecos":
448
+ solver = EcosSolver(max_iter=max_iter, verbose=self.verbose)
449
+ elif self.solver == "osqp":
450
+ solver = OsqpSolver(max_iter=max_iter, verbose=self.verbose)
451
+
452
+ K = self._get_kernel(x)
453
+ problem_data = self._setup_qp(K, D, time)
454
+
455
+ if self.timeit is not None:
456
+ import timeit
457
+
458
+ def _inner():
459
+ return solver.solve(**problem_data)
460
+
461
+ timer = timeit.Timer(_inner)
462
+ self.timings_ = timer.repeat(self.timeit, number=1)
463
+
464
+ coef, n_iter = solver.solve(**problem_data)
465
+ self._update_coef(coef, D)
466
+ self.n_iter_ = n_iter
467
+ self.X_fit_ = x
468
+
469
+ def _update_coef(self, coef, D):
470
+ self.coef_ = coef * D
471
+
472
+ def fit(self, X, y):
473
+ """Build a MINLIP survival model from training data.
474
+
475
+ Parameters
476
+ ----------
477
+ X : array-like, shape = (n_samples, n_features)
478
+ Data matrix.
479
+
480
+ y : structured array, shape = (n_samples,)
481
+ A structured array with two fields. The first field is a boolean
482
+ where ``True`` indicates an event and ``False`` indicates right-censoring.
483
+ The second field is a float with the time of event or time of censoring.
484
+
485
+ Returns
486
+ -------
487
+ self
488
+ """
489
+ self._validate_params()
490
+ X = validate_data(self, X, ensure_min_samples=2)
491
+ event, time = check_array_survival(X, y)
492
+ self._fit(X, event, time)
493
+
494
+ return self
495
+
496
+ def predict(self, X):
497
+ """Predict risk score of experiencing an event.
498
+
499
+ Higher values indicate an increased risk of experiencing an event,
500
+ lower values a decreased risk of experiencing an event. The scores
501
+ have no unit and are only meaningful to rank samples by their risk
502
+ of experiencing an event.
503
+
504
+ Parameters
505
+ ----------
506
+ X : array-like, shape = (n_samples, n_features)
507
+ The input samples.
508
+
509
+ Returns
510
+ -------
511
+ y : ndarray, shape = (n_samples,)
512
+ Predicted risk.
513
+ """
514
+ X = validate_data(self, X, reset=False)
515
+ K = self._get_kernel(X, self.X_fit_)
516
+ pred = -np.dot(self.coef_, K.T)
517
+ return pred.ravel()
518
+
519
+
520
+ class HingeLossSurvivalSVM(MinlipSurvivalAnalysis):
521
+ r"""Naive implementation of kernel survival support vector machine.
522
+
523
+ This implementation creates a new set of samples by building the difference
524
+ between any two feature vectors in the original data. This approach
525
+ requires :math:`O(\text{n_samples}^4)` space and
526
+ :math:`O(\text{n_samples}^6 \cdot \text{n_features})` time, making it
527
+ computationally intensive for large datasets.
528
+
529
+ The optimization problem is formulated as:
530
+
531
+ .. math::
532
+
533
+ \min_{\mathbf{w}}\quad
534
+ \frac{1}{2} \lVert \mathbf{w} \rVert_2^2
535
+ + \gamma \sum_{i = 1}^n \xi_i \\
536
+ \text{subject to}\quad
537
+ \mathbf{w}^\top \phi(\mathbf{x})_i - \mathbf{w}^\top \phi(\mathbf{x})_j \geq 1 - \xi_{ij},\quad
538
+ \forall (i, j) \in \mathcal{P}, \\
539
+ \xi_i \geq 0,\quad \forall (i, j) \in \mathcal{P}.
540
+
541
+ \mathcal{P} = \{ (i, j) \mid y_i > y_j \land \delta_j = 1 \}_{i,j=1,\dots,n}.
542
+
543
+ See [1]_, [2]_, [3]_ for further description.
544
+
545
+ Parameters
546
+ ----------
547
+ alpha : float, optional, default: 1
548
+ Weight of penalizing the hinge loss in the objective function. Must be greater than 0.
549
+
550
+ solver : {'ecos', 'osqp'}, optional, default: 'ecos'
551
+ Which quadratic program solver to use.
552
+
553
+ kernel : str or callable, optional, default: 'linear'
554
+ Kernel mapping used internally. This parameter is directly passed to
555
+ :func:`sklearn.metrics.pairwise.pairwise_kernels`.
556
+ If `kernel` is a string, it must be one of the metrics
557
+ in `sklearn.pairwise.PAIRWISE_KERNEL_FUNCTIONS` or "precomputed".
558
+ If `kernel` is "precomputed", X is assumed to be a kernel matrix.
559
+ Alternatively, if `kernel` is a callable function, it is called on
560
+ each pair of instances (rows) and the resulting value recorded. The
561
+ callable should take two rows from X as input and return the
562
+ corresponding kernel value as a single number. This means that
563
+ callables from :mod:`sklearn.metrics.pairwise` are not allowed, as
564
+ they operate on matrices, not single samples. Use the string
565
+ identifying the kernel instead.
566
+
567
+ gamma : float or None, optional, default: None
568
+ Gamma parameter for the RBF, laplacian, polynomial, exponential chi2
569
+ and sigmoid kernels. Interpretation of the default value is left to
570
+ the kernel; see the documentation for :mod:`sklearn.metrics.pairwise`.
571
+ Ignored by other kernels.
572
+
573
+ degree : int, optional, default: 3
574
+ Degree of the polynomial kernel. Ignored by other kernels.
575
+
576
+ coef0 : float, optional, default: 1
577
+ Zero coefficient for polynomial and sigmoid kernels.
578
+ Ignored by other kernels.
579
+
580
+ kernel_params : dict or None, optional, default: None
581
+ Additional parameters (keyword arguments) for kernel function passed
582
+ as callable object.
583
+
584
+ pairs : {'all', 'nearest', 'next'}, optional, default: 'all'
585
+ Which constraints to use in the optimization problem.
586
+
587
+ - all: Use all comparable pairs. Scales quadratically in number of samples.
588
+ - nearest: Only considers comparable pairs :math:`(i, j)` where :math:`j` is the
589
+ uncensored sample with highest survival time smaller than :math:`y_i`.
590
+ Scales linearly in number of samples (cf. :class:`sksurv.svm.MinlipSurvivalAnalysis`).
591
+ - next: Only compare against direct nearest neighbor according to observed time,
592
+ disregarding its censoring status. Scales linearly in number of samples.
593
+
594
+ verbose : bool, optional, default: False
595
+ If ``True``, enable verbose output of the solver.
596
+
597
+ timeit : bool, int, or None, optional, default: False
598
+ If ``True`` or a non-zero integer, the time taken for optimization is measured.
599
+ If an integer is provided, the optimization is repeated that many times.
600
+ Results can be accessed from the ``timings_`` attribute.
601
+
602
+ max_iter : int or None, optional, default: None
603
+ The maximum number of iterations taken for the solvers to converge.
604
+ If ``None``, use solver's default value.
605
+
606
+ Attributes
607
+ ----------
608
+ X_fit_ : ndarray, shape = (n_samples, `n_features_in_`)
609
+ Training data.
610
+
611
+ coef_ : ndarray, shape = (n_samples,), dtype = float
612
+ Coefficients of the features in the decision function.
613
+
614
+ n_features_in_ : int
615
+ Number of features seen during ``fit``.
616
+
617
+ feature_names_in_ : ndarray, shape = (`n_features_in_`,), dtype = object
618
+ Names of features seen during ``fit``. Defined only when `X`
619
+ has feature names that are all strings.
620
+
621
+ n_iter_ : int
622
+ Number of iterations run by the optimization routine to fit the model.
623
+
624
+ See also
625
+ --------
626
+ sksurv.svm.NaiveSurvivalSVM : The linear naive survival SVM based on liblinear.
627
+
628
+ References
629
+ ----------
630
+ .. [1] Van Belle, V., Pelckmans, K., Suykens, J. A., & Van Huffel, S.
631
+ Support Vector Machines for Survival Analysis. In Proc. of the 3rd Int. Conf.
632
+ on Computational Intelligence in Medicine and Healthcare (CIMED). 1-8. 2007
633
+
634
+ .. [2] Evers, L., Messow, C.M.,
635
+ "Sparse kernel methods for high-dimensional survival data",
636
+ Bioinformatics 24(14), 1632-8, 2008.
637
+
638
+ .. [3] Van Belle, V., Pelckmans, K., Suykens, J.A., Van Huffel, S.,
639
+ "Survival SVM: a practical scalable algorithm",
640
+ In: Proc. of 16th European Symposium on Artificial Neural Networks,
641
+ 89-94, 2008.
642
+ """
643
+
644
+ _parameter_constraints = MinlipSurvivalAnalysis._parameter_constraints
645
+
646
+ def __init__(
647
+ self,
648
+ alpha=1.0,
649
+ *,
650
+ solver="ecos",
651
+ kernel="linear",
652
+ gamma=None,
653
+ degree=3,
654
+ coef0=1,
655
+ kernel_params=None,
656
+ pairs="all",
657
+ verbose=False,
658
+ timeit=None,
659
+ max_iter=None,
660
+ ):
661
+ super().__init__(
662
+ solver=solver,
663
+ alpha=alpha,
664
+ kernel=kernel,
665
+ gamma=gamma,
666
+ degree=degree,
667
+ coef0=coef0,
668
+ kernel_params=kernel_params,
669
+ pairs=pairs,
670
+ verbose=verbose,
671
+ timeit=timeit,
672
+ max_iter=max_iter,
673
+ )
674
+
675
+ def _setup_qp(self, K, D, time):
676
+ n_pairs = D.shape[0]
677
+
678
+ P = D.dot(D.dot(K).T).T
679
+ q = -np.ones(n_pairs)
680
+
681
+ G = sparse.vstack((-sparse.eye(n_pairs), sparse.eye(n_pairs)), format="csc")
682
+ h = np.empty(2 * n_pairs)
683
+ h[:n_pairs] = 0
684
+ h[n_pairs:] = self.alpha
685
+
686
+ return {"P": P, "q": q, "G": G, "h": h}
687
+
688
+ def _update_coef(self, coef, D):
689
+ sv = np.flatnonzero(coef > 1e-5)
690
+ self.coef_ = coef[:, sv] * D[sv, :]