scikit-survival 0.26.0__cp314-cp314-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. scikit_survival-0.26.0.dist-info/METADATA +185 -0
  2. scikit_survival-0.26.0.dist-info/RECORD +58 -0
  3. scikit_survival-0.26.0.dist-info/WHEEL +6 -0
  4. scikit_survival-0.26.0.dist-info/licenses/COPYING +674 -0
  5. scikit_survival-0.26.0.dist-info/top_level.txt +1 -0
  6. sksurv/__init__.py +183 -0
  7. sksurv/base.py +115 -0
  8. sksurv/bintrees/__init__.py +15 -0
  9. sksurv/bintrees/_binarytrees.cpython-314-darwin.so +0 -0
  10. sksurv/column.py +204 -0
  11. sksurv/compare.py +123 -0
  12. sksurv/datasets/__init__.py +12 -0
  13. sksurv/datasets/base.py +614 -0
  14. sksurv/datasets/data/GBSG2.arff +700 -0
  15. sksurv/datasets/data/actg320.arff +1169 -0
  16. sksurv/datasets/data/bmt.arff +46 -0
  17. sksurv/datasets/data/breast_cancer_GSE7390-metastasis.arff +283 -0
  18. sksurv/datasets/data/cgvhd.arff +118 -0
  19. sksurv/datasets/data/flchain.arff +7887 -0
  20. sksurv/datasets/data/veteran.arff +148 -0
  21. sksurv/datasets/data/whas500.arff +520 -0
  22. sksurv/docstrings.py +99 -0
  23. sksurv/ensemble/__init__.py +2 -0
  24. sksurv/ensemble/_coxph_loss.cpython-314-darwin.so +0 -0
  25. sksurv/ensemble/boosting.py +1564 -0
  26. sksurv/ensemble/forest.py +902 -0
  27. sksurv/ensemble/survival_loss.py +151 -0
  28. sksurv/exceptions.py +18 -0
  29. sksurv/functions.py +114 -0
  30. sksurv/io/__init__.py +2 -0
  31. sksurv/io/arffread.py +91 -0
  32. sksurv/io/arffwrite.py +181 -0
  33. sksurv/kernels/__init__.py +1 -0
  34. sksurv/kernels/_clinical_kernel.cpython-314-darwin.so +0 -0
  35. sksurv/kernels/clinical.py +348 -0
  36. sksurv/linear_model/__init__.py +3 -0
  37. sksurv/linear_model/_coxnet.cpython-314-darwin.so +0 -0
  38. sksurv/linear_model/aft.py +208 -0
  39. sksurv/linear_model/coxnet.py +592 -0
  40. sksurv/linear_model/coxph.py +637 -0
  41. sksurv/meta/__init__.py +4 -0
  42. sksurv/meta/base.py +35 -0
  43. sksurv/meta/ensemble_selection.py +724 -0
  44. sksurv/meta/stacking.py +370 -0
  45. sksurv/metrics.py +1028 -0
  46. sksurv/nonparametric.py +911 -0
  47. sksurv/preprocessing.py +195 -0
  48. sksurv/svm/__init__.py +11 -0
  49. sksurv/svm/_minlip.cpython-314-darwin.so +0 -0
  50. sksurv/svm/_prsvm.cpython-314-darwin.so +0 -0
  51. sksurv/svm/minlip.py +695 -0
  52. sksurv/svm/naive_survival_svm.py +249 -0
  53. sksurv/svm/survival_svm.py +1236 -0
  54. sksurv/testing.py +155 -0
  55. sksurv/tree/__init__.py +1 -0
  56. sksurv/tree/_criterion.cpython-314-darwin.so +0 -0
  57. sksurv/tree/tree.py +790 -0
  58. sksurv/util.py +416 -0
sksurv/svm/minlip.py ADDED
@@ -0,0 +1,695 @@
1
+ from abc import ABCMeta, abstractmethod
2
+ import numbers
3
+ import warnings
4
+
5
+ import numpy as np
6
+ from scipy import linalg, sparse
7
+ from sklearn.base import BaseEstimator
8
+ from sklearn.exceptions import ConvergenceWarning
9
+ from sklearn.metrics.pairwise import PAIRWISE_KERNEL_FUNCTIONS, pairwise_kernels
10
+ from sklearn.utils._param_validation import Interval, StrOptions
11
+ from sklearn.utils.validation import validate_data
12
+
13
+ from ..base import SurvivalAnalysisMixin
14
+ from ..exceptions import NoComparablePairException
15
+ from ..util import check_array_survival
16
+ from ._minlip import create_difference_matrix
17
+
18
+ __all__ = ["MinlipSurvivalAnalysis", "HingeLossSurvivalSVM"]
19
+
20
+
21
+ class QPSolver(metaclass=ABCMeta):
22
+ r"""Abstract base class for quadratic program solvers.
23
+
24
+ This class defines the interface for solvers that minimize a quadratic
25
+ objective function subject to linear inequality constraints,
26
+ formulated as:
27
+
28
+ .. math::
29
+
30
+ \min_{x} \quad (1/2)x^T P x + q^T x \\
31
+ \text{subject to} \quad G x \preceq h
32
+
33
+ Parameters
34
+ ----------
35
+ max_iter : int or None
36
+ Maximum number of iterations to perform.
37
+ verbose : bool
38
+ Enable verbose output of the solver.
39
+ """
40
+
41
+ @abstractmethod
42
+ def __init__(self, max_iter, verbose):
43
+ self.max_iter = max_iter
44
+ self.verbose = verbose
45
+
46
+ @abstractmethod
47
+ def solve(self, P, q, G, h):
48
+ """Find solution to QP.
49
+
50
+ Parameters
51
+ ----------
52
+ P : array-like, shape=(n_variables, n_variables)
53
+ Quadratic part of the objective function.
54
+ q : array-like, shape=(n_variables,)
55
+ Linear part of the objective function.
56
+ G : array-like, shape=(n_constraints, n_variables)
57
+ Matrix for inequality constraints.
58
+ h : array-like, shape=(n_constraints,)
59
+ Vector for inequality constraints.
60
+
61
+ Returns
62
+ -------
63
+ x : ndarray, shape=(n_variables,)
64
+ The optimal solution.
65
+ n_iter : int
66
+ Number of iterations performed by the solver.
67
+ """
68
+
69
+
70
+ class OsqpSolver(QPSolver):
71
+ def __init__(self, max_iter, verbose):
72
+ super().__init__(
73
+ max_iter=max_iter,
74
+ verbose=verbose,
75
+ )
76
+
77
+ def solve(self, P, q, G, h):
78
+ import osqp
79
+
80
+ P = sparse.csc_matrix(P)
81
+
82
+ solver_opts = self._get_options()
83
+ m = osqp.OSQP()
84
+ m.setup(P=sparse.csc_matrix(P), q=q, A=G, l=None, u=h, **solver_opts) # noqa: E741
85
+ results = m.solve(raise_error=False)
86
+
87
+ solved_codes = (
88
+ osqp.SolverStatus.OSQP_SOLVED,
89
+ osqp.SolverStatus.OSQP_SOLVED_INACCURATE,
90
+ )
91
+
92
+ if results.info.status_val == osqp.SolverStatus.OSQP_MAX_ITER_REACHED: # max iter reached
93
+ warnings.warn(
94
+ (f"OSQP solver did not converge: {results.info.status}"),
95
+ category=ConvergenceWarning,
96
+ stacklevel=2,
97
+ )
98
+ elif results.info.status_val not in solved_codes: # pragma: no cover
99
+ # none of SOLVED, SOLVED_INACCURATE
100
+ raise RuntimeError(f"OSQP solver failed: {results.info.status}")
101
+
102
+ n_iter = results.info.iter
103
+ return results.x[np.newaxis], n_iter
104
+
105
+ def _get_options(self):
106
+ """Returns a dictionary of OSQP solver options."""
107
+ solver_opts = {
108
+ "eps_abs": 1e-5,
109
+ "eps_rel": 1e-5,
110
+ "max_iter": self.max_iter or 4000,
111
+ "polishing": True,
112
+ "verbose": self.verbose,
113
+ }
114
+ return solver_opts
115
+
116
+
117
+ class EcosSolver(QPSolver):
118
+ r"""Solves QP by expressing it as second-order cone program:
119
+
120
+ .. math::
121
+
122
+ \min \quad c^T x \\
123
+ \text{subject to} \quad G x \preceq_K h
124
+
125
+ where the last inequality is generalized, i.e. :math:`h - G x`
126
+ belongs to the cone :math:`K`.
127
+
128
+ Parameters
129
+ ----------
130
+ max_iter : int or None
131
+ Maximum number of iterations to perform.
132
+ verbose : bool
133
+ Enable verbose output of the solver.
134
+ cond : float or None, default: None
135
+ Condition number for eigenvalue decomposition.
136
+ """
137
+
138
+ EXIT_OPTIMAL = 0 # Optimal solution found
139
+ EXIT_PINF = 1 # Certificate of primal infeasibility found
140
+ EXIT_DINF = 2 # Certificate of dual infeasibility found
141
+ EXIT_MAXIT = -1 # Maximum number of iterations reached
142
+ EXIT_NUMERICS = -2 # Numerical problems (unreliable search direction)
143
+ EXIT_OUTCONE = -3 # Numerical problems (slacks or multipliers outside cone)
144
+ EXIT_INACC_OFFSET = 10
145
+
146
+ def __init__(self, max_iter, verbose, cond=None):
147
+ super().__init__(
148
+ max_iter=max_iter,
149
+ verbose=verbose,
150
+ )
151
+ self.cond = cond
152
+
153
+ def solve(self, P, q, G, h):
154
+ import ecos
155
+
156
+ n_pairs = P.shape[0]
157
+ L, max_eigval = self._decompose(P)
158
+
159
+ # minimize wrt t,x
160
+ c = np.empty(n_pairs + 1)
161
+ c[1:] = q
162
+ c[0] = 0.5 * max_eigval
163
+
164
+ zerorow = np.zeros((1, L.shape[1]))
165
+ G_quad = np.block(
166
+ [
167
+ [-1, zerorow],
168
+ [1, zerorow],
169
+ [np.zeros((L.shape[0], 1)), -2 * L],
170
+ ]
171
+ )
172
+ G_lin = sparse.hstack((sparse.csc_matrix((G.shape[0], 1)), G))
173
+ G_all = sparse.vstack((G_lin, sparse.csc_matrix(G_quad)), format="csc")
174
+
175
+ n_constraints = G.shape[0]
176
+ h_all = np.empty(G_all.shape[0])
177
+ h_all[:n_constraints] = h
178
+ h_all[n_constraints : (n_constraints + 2)] = 1
179
+ h_all[(n_constraints + 2) :] = 0
180
+
181
+ dims = {
182
+ "l": G.shape[0], # scalar, dimension of positive orthant
183
+ "q": [G_quad.shape[0]], # vector with dimensions of second order cones
184
+ }
185
+ results = ecos.solve(c, G_all, h_all, dims, verbose=self.verbose, max_iters=self.max_iter or 1000)
186
+ self._check_success(results)
187
+
188
+ # drop solution for t
189
+ x = results["x"][1:]
190
+ n_iter = results["info"]["iter"]
191
+ return x[np.newaxis], n_iter
192
+
193
+ def _check_success(self, results): # pylint: disable=no-self-use
194
+ """Checks if the ECOS solver converged successfully.
195
+
196
+ Parameters
197
+ ----------
198
+ results : dict
199
+ The results dictionary returned by ``ecos.solve``.
200
+
201
+ Raises
202
+ -------
203
+ RuntimeError
204
+ If the solver failed for an unknown reason or found primal/dual infeasibility.
205
+ """
206
+ exit_flag = results["info"]["exitFlag"]
207
+ if exit_flag in (EcosSolver.EXIT_OPTIMAL, EcosSolver.EXIT_OPTIMAL + EcosSolver.EXIT_INACC_OFFSET):
208
+ return
209
+
210
+ if exit_flag == EcosSolver.EXIT_MAXIT:
211
+ warnings.warn(
212
+ "ECOS solver did not converge: maximum iterations reached", category=ConvergenceWarning, stacklevel=3
213
+ )
214
+ elif exit_flag == EcosSolver.EXIT_PINF: # pragma: no cover
215
+ raise RuntimeError("Certificate of primal infeasibility found")
216
+ elif exit_flag == EcosSolver.EXIT_DINF: # pragma: no cover
217
+ raise RuntimeError("Certificate of dual infeasibility found")
218
+ else: # pragma: no cover
219
+ raise RuntimeError(f"Unknown problem in ECOS solver, exit status: {exit_flag}")
220
+
221
+ def _decompose(self, P):
222
+ """Performs eigenvalue decomposition of P.
223
+
224
+ Parameters
225
+ ----------
226
+ P : array-like, shape=(n_variables, n_variables)
227
+ Quadratic part of the objective function.
228
+
229
+ Returns
230
+ -------
231
+ decomposed : ndarray
232
+ Decomposed matrix.
233
+ largest_eigenvalue : float
234
+ The largest eigenvalue of P.
235
+ """
236
+ # from scipy.linalg.pinvh
237
+ s, u = linalg.eigh(P)
238
+ largest_eigenvalue = np.max(np.abs(s))
239
+
240
+ cond = self.cond
241
+ if cond is None:
242
+ t = u.dtype
243
+ cond = largest_eigenvalue * max(P.shape) * np.finfo(t).eps
244
+
245
+ not_below_cutoff = abs(s) > -cond
246
+ assert not_below_cutoff.all(), f"matrix has negative eigenvalues: {s.min()}"
247
+
248
+ above_cutoff = abs(s) > cond
249
+ u = u[:, above_cutoff]
250
+ s = s[above_cutoff]
251
+
252
+ # set maximum eigenvalue to 1
253
+ decomposed = u * np.sqrt(s / largest_eigenvalue)
254
+ return decomposed.T, largest_eigenvalue
255
+
256
+
257
+ class MinlipSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
258
+ r"""Survival model based on a minimal Lipschitz smoothness strategy.
259
+
260
+ This model is related to :class:`sksurv.svm.FastKernelSurvivalSVM` but
261
+ minimizes a different objective function, focusing on Lipschitz
262
+ smoothness rather than maximal margin. The optimization problem is
263
+ formulated as:
264
+
265
+ .. math::
266
+
267
+ \min_{\mathbf{w}}\quad
268
+ \frac{1}{2} \lVert \mathbf{w} \rVert_2^2
269
+ + \gamma \sum_{i = 1}^n \xi_i \\
270
+ \text{subject to}\quad
271
+ \mathbf{w}^\top \mathbf{x}_i - \mathbf{w}^\top \mathbf{x}_j \geq y_i - y_j - \xi_i,\quad
272
+ \forall (i, j) \in \mathcal{P}_\text{1-NN}, \\
273
+ \xi_i \geq 0,\quad \forall i = 1,\dots,n.
274
+
275
+ \mathcal{P}_\text{1-NN} = \{ (i, j) \mid y_i > y_j \land \delta_j = 1
276
+ \land \nexists k : y_i > y_k > y_j \land \delta_k = 1 \}_{i,j=1}^n.
277
+
278
+ See [1]_ for further description.
279
+
280
+ Parameters
281
+ ----------
282
+ alpha : float, optional, default: 1
283
+ Weight of penalizing the hinge loss in the objective function.
284
+ Must be greater than 0.
285
+
286
+ solver : {'ecos', 'osqp'}, optional, default: 'ecos'
287
+ Which quadratic program solver to use.
288
+
289
+ kernel : str or callable, optional, default: 'linear'.
290
+ Kernel mapping used internally. This parameter is directly passed to
291
+ :func:`sklearn.metrics.pairwise.pairwise_kernels`.
292
+ If `kernel` is a string, it must be one of the metrics
293
+ in `sklearn.pairwise.PAIRWISE_KERNEL_FUNCTIONS` or "precomputed".
294
+ If `kernel` is "precomputed", X is assumed to be a kernel matrix.
295
+ Alternatively, if `kernel` is a callable function, it is called on
296
+ each pair of instances (rows) and the resulting value recorded. The
297
+ callable should take two rows from X as input and return the
298
+ corresponding kernel value as a single number. This means that
299
+ callables from :mod:`sklearn.metrics.pairwise` are not allowed, as
300
+ they operate on matrices, not single samples. Use the string
301
+ identifying the kernel instead.
302
+
303
+ gamma : float, optional, default: None
304
+ Gamma parameter for the RBF, laplacian, polynomial, exponential chi2
305
+ and sigmoid kernels. Interpretation of the default value is left to
306
+ the kernel; see the documentation for :mod:`sklearn.metrics.pairwise`.
307
+ Ignored by other kernels.
308
+
309
+ degree : int, optional, default: 3
310
+ Degree of the polynomial kernel. Ignored by other kernels.
311
+
312
+ coef0 : float, optional, default: 1
313
+ Zero coefficient for polynomial and sigmoid kernels.
314
+ Ignored by other kernels.
315
+
316
+ kernel_params : dict, optional, default: None
317
+ Additional parameters (keyword arguments) for kernel function passed
318
+ as callable object.
319
+
320
+ pairs : {'all', 'nearest', 'next'}, optional, default: 'nearest'
321
+ Which constraints to use in the optimization problem.
322
+
323
+ - all: Use all comparable pairs. Scales quadratically in number of samples
324
+ (cf. :class:`sksurv.svm.HingeLossSurvivalSVM`).
325
+ - nearest: Only considers comparable pairs :math:`(i, j)` where :math:`j` is the
326
+ uncensored sample with highest survival time smaller than :math:`y_i`.
327
+ Scales linearly in number of samples.
328
+ - next: Only compare against direct nearest neighbor according to observed time,
329
+ disregarding its censoring status. Scales linearly in number of samples.
330
+
331
+ verbose : bool, optional, default: False
332
+ Enable verbose output of solver.
333
+
334
+ timeit : bool, int, or None, optional, default: False
335
+ If ``True`` or a non-zero integer, the time taken for optimization is measured.
336
+ If an integer is provided, the optimization is repeated that many times.
337
+ Results can be accessed from the ``timings_`` attribute.
338
+
339
+ max_iter : int or None, optional, default: None
340
+ The maximum number of iterations taken for the solvers to converge.
341
+ If ``None``, use solver's default value.
342
+
343
+ Attributes
344
+ ----------
345
+ X_fit_ : ndarray, shape = (n_samples, `n_features_in_`)
346
+ Training data.
347
+
348
+ coef_ : ndarray, shape = (n_samples,), dtype = float
349
+ Coefficients of the features in the decision function.
350
+
351
+ n_features_in_ : int
352
+ Number of features seen during ``fit``.
353
+
354
+ feature_names_in_ : ndarray, shape = (`n_features_in_`,)
355
+ Names of features seen during ``fit``. Defined only when `X`
356
+ has feature names that are all strings.
357
+
358
+ n_iter_ : int
359
+ Number of iterations run by the optimization routine to fit the model.
360
+
361
+ References
362
+ ----------
363
+ .. [1] Van Belle, V., Pelckmans, K., Suykens, J. A. K., and Van Huffel, S.
364
+ Learning transformation models for ranking and survival analysis.
365
+ The Journal of Machine Learning Research, 12, 819-862. 2011
366
+ """
367
+
368
+ _parameter_constraints = {
369
+ "solver": [StrOptions({"ecos", "osqp"})],
370
+ "alpha": [Interval(numbers.Real, 0, None, closed="neither")],
371
+ "kernel": [
372
+ StrOptions(set(PAIRWISE_KERNEL_FUNCTIONS.keys()) | {"precomputed"}),
373
+ callable,
374
+ ],
375
+ "degree": [Interval(numbers.Integral, 0, None, closed="left")],
376
+ "gamma": [Interval(numbers.Real, 0.0, None, closed="left"), None],
377
+ "coef0": [Interval(numbers.Real, None, None, closed="neither")],
378
+ "kernel_params": [dict, None],
379
+ "pairs": [StrOptions({"all", "nearest", "next"})],
380
+ "verbose": ["boolean"],
381
+ "timeit": [Interval(numbers.Integral, 1, None, closed="left"), None],
382
+ "max_iter": [Interval(numbers.Integral, 1, None, closed="left"), None],
383
+ }
384
+
385
+ def __init__(
386
+ self,
387
+ alpha=1.0,
388
+ *,
389
+ solver="ecos",
390
+ kernel="linear",
391
+ gamma=None,
392
+ degree=3,
393
+ coef0=1,
394
+ kernel_params=None,
395
+ pairs="nearest",
396
+ verbose=False,
397
+ timeit=None,
398
+ max_iter=None,
399
+ ):
400
+ self.solver = solver
401
+ self.alpha = alpha
402
+ self.kernel = kernel
403
+ self.gamma = gamma
404
+ self.degree = degree
405
+ self.coef0 = coef0
406
+ self.kernel_params = kernel_params
407
+ self.pairs = pairs
408
+ self.verbose = verbose
409
+ self.timeit = timeit
410
+ self.max_iter = max_iter
411
+
412
+ def __sklearn_tags__(self):
413
+ # tell sklearn.utils.metaestimators._safe_split function that we expect kernel matrix
414
+ tags = super().__sklearn_tags__()
415
+ tags.input_tags.pairwise = self.kernel == "precomputed"
416
+ return tags
417
+
418
+ def _get_kernel(self, X, Y=None):
419
+ if callable(self.kernel):
420
+ params = self.kernel_params or {}
421
+ else:
422
+ params = {"gamma": self.gamma, "degree": self.degree, "coef0": self.coef0}
423
+ return pairwise_kernels(X, Y, metric=self.kernel, filter_params=True, **params)
424
+
425
+ def _setup_qp(self, K, D, time):
426
+ n_pairs = D.shape[0]
427
+ P = D.dot(D.dot(K).T).T
428
+ q = -D.dot(time)
429
+
430
+ Dt = D.T.astype(P.dtype) # cast constraints to correct type
431
+ G = sparse.vstack(
432
+ (
433
+ Dt, # upper bound
434
+ -Dt, # lower bound
435
+ -sparse.eye(n_pairs, dtype=P.dtype), # lower bound >= 0
436
+ ),
437
+ format="csc",
438
+ )
439
+ n_constraints = Dt.shape[0]
440
+ h = np.empty(G.shape[0], dtype=float)
441
+ h[: 2 * n_constraints] = self.alpha
442
+ h[-n_pairs:] = 0.0
443
+
444
+ return {"P": P, "q": q, "G": G, "h": h}
445
+
446
+ def _fit(self, x, event, time):
447
+ D = create_difference_matrix(event.astype(np.uint8), time, kind=self.pairs)
448
+ if D.shape[0] == 0:
449
+ raise NoComparablePairException("Data has no comparable pairs, cannot fit model.")
450
+
451
+ max_iter = self.max_iter
452
+ if self.solver == "ecos":
453
+ solver = EcosSolver(max_iter=max_iter, verbose=self.verbose)
454
+ elif self.solver == "osqp":
455
+ solver = OsqpSolver(max_iter=max_iter, verbose=self.verbose)
456
+
457
+ K = self._get_kernel(x)
458
+ problem_data = self._setup_qp(K, D, time)
459
+
460
+ if self.timeit is not None:
461
+ import timeit
462
+
463
+ def _inner():
464
+ return solver.solve(**problem_data)
465
+
466
+ timer = timeit.Timer(_inner)
467
+ self.timings_ = timer.repeat(self.timeit, number=1)
468
+
469
+ coef, n_iter = solver.solve(**problem_data)
470
+ self._update_coef(coef, D)
471
+ self.n_iter_ = n_iter
472
+ self.X_fit_ = x
473
+
474
+ def _update_coef(self, coef, D):
475
+ self.coef_ = coef * D
476
+
477
+ def fit(self, X, y):
478
+ """Build a MINLIP survival model from training data.
479
+
480
+ Parameters
481
+ ----------
482
+ X : array-like, shape = (n_samples, n_features)
483
+ Data matrix.
484
+
485
+ y : structured array, shape = (n_samples,)
486
+ A structured array with two fields. The first field is a boolean
487
+ where ``True`` indicates an event and ``False`` indicates right-censoring.
488
+ The second field is a float with the time of event or time of censoring.
489
+
490
+ Returns
491
+ -------
492
+ self
493
+ """
494
+ self._validate_params()
495
+ X = validate_data(self, X, ensure_min_samples=2)
496
+ event, time = check_array_survival(X, y)
497
+ self._fit(X, event, time)
498
+
499
+ return self
500
+
501
+ def predict(self, X):
502
+ """Predict risk score of experiencing an event.
503
+
504
+ Higher values indicate an increased risk of experiencing an event,
505
+ lower values a decreased risk of experiencing an event. The scores
506
+ have no unit and are only meaningful to rank samples by their risk
507
+ of experiencing an event.
508
+
509
+ Parameters
510
+ ----------
511
+ X : array-like, shape = (n_samples, n_features)
512
+ The input samples.
513
+
514
+ Returns
515
+ -------
516
+ y : ndarray, shape = (n_samples,)
517
+ Predicted risk.
518
+ """
519
+ X = validate_data(self, X, reset=False)
520
+ K = self._get_kernel(X, self.X_fit_)
521
+ pred = -np.dot(self.coef_, K.T)
522
+ return pred.ravel()
523
+
524
+
525
+ class HingeLossSurvivalSVM(MinlipSurvivalAnalysis):
526
+ r"""Naive implementation of kernel survival support vector machine.
527
+
528
+ This implementation creates a new set of samples by building the difference
529
+ between any two feature vectors in the original data. This approach
530
+ requires :math:`O(\text{n_samples}^4)` space and
531
+ :math:`O(\text{n_samples}^6 \cdot \text{n_features})` time, making it
532
+ computationally intensive for large datasets.
533
+
534
+ The optimization problem is formulated as:
535
+
536
+ .. math::
537
+
538
+ \min_{\mathbf{w}}\quad
539
+ \frac{1}{2} \lVert \mathbf{w} \rVert_2^2
540
+ + \gamma \sum_{i = 1}^n \xi_i \\
541
+ \text{subject to}\quad
542
+ \mathbf{w}^\top \phi(\mathbf{x})_i - \mathbf{w}^\top \phi(\mathbf{x})_j \geq 1 - \xi_{ij},\quad
543
+ \forall (i, j) \in \mathcal{P}, \\
544
+ \xi_i \geq 0,\quad \forall (i, j) \in \mathcal{P}.
545
+
546
+ \mathcal{P} = \{ (i, j) \mid y_i > y_j \land \delta_j = 1 \}_{i,j=1,\dots,n}.
547
+
548
+ See [1]_, [2]_, [3]_ for further description.
549
+
550
+ Parameters
551
+ ----------
552
+ alpha : float, optional, default: 1
553
+ Weight of penalizing the hinge loss in the objective function. Must be greater than 0.
554
+
555
+ solver : {'ecos', 'osqp'}, optional, default: 'ecos'
556
+ Which quadratic program solver to use.
557
+
558
+ kernel : str or callable, optional, default: 'linear'
559
+ Kernel mapping used internally. This parameter is directly passed to
560
+ :func:`sklearn.metrics.pairwise.pairwise_kernels`.
561
+ If `kernel` is a string, it must be one of the metrics
562
+ in `sklearn.pairwise.PAIRWISE_KERNEL_FUNCTIONS` or "precomputed".
563
+ If `kernel` is "precomputed", X is assumed to be a kernel matrix.
564
+ Alternatively, if `kernel` is a callable function, it is called on
565
+ each pair of instances (rows) and the resulting value recorded. The
566
+ callable should take two rows from X as input and return the
567
+ corresponding kernel value as a single number. This means that
568
+ callables from :mod:`sklearn.metrics.pairwise` are not allowed, as
569
+ they operate on matrices, not single samples. Use the string
570
+ identifying the kernel instead.
571
+
572
+ gamma : float or None, optional, default: None
573
+ Gamma parameter for the RBF, laplacian, polynomial, exponential chi2
574
+ and sigmoid kernels. Interpretation of the default value is left to
575
+ the kernel; see the documentation for :mod:`sklearn.metrics.pairwise`.
576
+ Ignored by other kernels.
577
+
578
+ degree : int, optional, default: 3
579
+ Degree of the polynomial kernel. Ignored by other kernels.
580
+
581
+ coef0 : float, optional, default: 1
582
+ Zero coefficient for polynomial and sigmoid kernels.
583
+ Ignored by other kernels.
584
+
585
+ kernel_params : dict or None, optional, default: None
586
+ Additional parameters (keyword arguments) for kernel function passed
587
+ as callable object.
588
+
589
+ pairs : {'all', 'nearest', 'next'}, optional, default: 'all'
590
+ Which constraints to use in the optimization problem.
591
+
592
+ - all: Use all comparable pairs. Scales quadratically in number of samples.
593
+ - nearest: Only considers comparable pairs :math:`(i, j)` where :math:`j` is the
594
+ uncensored sample with highest survival time smaller than :math:`y_i`.
595
+ Scales linearly in number of samples (cf. :class:`sksurv.svm.MinlipSurvivalAnalysis`).
596
+ - next: Only compare against direct nearest neighbor according to observed time,
597
+ disregarding its censoring status. Scales linearly in number of samples.
598
+
599
+ verbose : bool, optional, default: False
600
+ If ``True``, enable verbose output of the solver.
601
+
602
+ timeit : bool, int, or None, optional, default: False
603
+ If ``True`` or a non-zero integer, the time taken for optimization is measured.
604
+ If an integer is provided, the optimization is repeated that many times.
605
+ Results can be accessed from the ``timings_`` attribute.
606
+
607
+ max_iter : int or None, optional, default: None
608
+ The maximum number of iterations taken for the solvers to converge.
609
+ If ``None``, use solver's default value.
610
+
611
+ Attributes
612
+ ----------
613
+ X_fit_ : ndarray, shape = (n_samples, `n_features_in_`)
614
+ Training data.
615
+
616
+ coef_ : ndarray, shape = (n_samples,), dtype = float
617
+ Coefficients of the features in the decision function.
618
+
619
+ n_features_in_ : int
620
+ Number of features seen during ``fit``.
621
+
622
+ feature_names_in_ : ndarray, shape = (`n_features_in_`,), dtype = object
623
+ Names of features seen during ``fit``. Defined only when `X`
624
+ has feature names that are all strings.
625
+
626
+ n_iter_ : int
627
+ Number of iterations run by the optimization routine to fit the model.
628
+
629
+ See also
630
+ --------
631
+ sksurv.svm.NaiveSurvivalSVM : The linear naive survival SVM based on liblinear.
632
+
633
+ References
634
+ ----------
635
+ .. [1] Van Belle, V., Pelckmans, K., Suykens, J. A., & Van Huffel, S.
636
+ Support Vector Machines for Survival Analysis. In Proc. of the 3rd Int. Conf.
637
+ on Computational Intelligence in Medicine and Healthcare (CIMED). 1-8. 2007
638
+
639
+ .. [2] Evers, L., Messow, C.M.,
640
+ "Sparse kernel methods for high-dimensional survival data",
641
+ Bioinformatics 24(14), 1632-8, 2008.
642
+
643
+ .. [3] Van Belle, V., Pelckmans, K., Suykens, J.A., Van Huffel, S.,
644
+ "Survival SVM: a practical scalable algorithm",
645
+ In: Proc. of 16th European Symposium on Artificial Neural Networks,
646
+ 89-94, 2008.
647
+ """
648
+
649
+ _parameter_constraints = MinlipSurvivalAnalysis._parameter_constraints
650
+
651
+ def __init__(
652
+ self,
653
+ alpha=1.0,
654
+ *,
655
+ solver="ecos",
656
+ kernel="linear",
657
+ gamma=None,
658
+ degree=3,
659
+ coef0=1,
660
+ kernel_params=None,
661
+ pairs="all",
662
+ verbose=False,
663
+ timeit=None,
664
+ max_iter=None,
665
+ ):
666
+ super().__init__(
667
+ solver=solver,
668
+ alpha=alpha,
669
+ kernel=kernel,
670
+ gamma=gamma,
671
+ degree=degree,
672
+ coef0=coef0,
673
+ kernel_params=kernel_params,
674
+ pairs=pairs,
675
+ verbose=verbose,
676
+ timeit=timeit,
677
+ max_iter=max_iter,
678
+ )
679
+
680
+ def _setup_qp(self, K, D, time):
681
+ n_pairs = D.shape[0]
682
+
683
+ P = D.dot(D.dot(K).T).T
684
+ q = -np.ones(n_pairs)
685
+
686
+ G = sparse.vstack((-sparse.eye(n_pairs), sparse.eye(n_pairs)), format="csc")
687
+ h = np.empty(2 * n_pairs)
688
+ h[:n_pairs] = 0
689
+ h[n_pairs:] = self.alpha
690
+
691
+ return {"P": P, "q": q, "G": G, "h": h}
692
+
693
+ def _update_coef(self, coef, D):
694
+ sv = np.flatnonzero(coef > 1e-5)
695
+ self.coef_ = coef[:, sv] * D[sv, :]