scikit-survival 0.24.1__cp313-cp313-win_amd64.whl → 0.26.0__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scikit_survival-0.26.0.dist-info/METADATA +185 -0
- scikit_survival-0.26.0.dist-info/RECORD +58 -0
- {scikit_survival-0.24.1.dist-info → scikit_survival-0.26.0.dist-info}/WHEEL +1 -1
- sksurv/__init__.py +51 -6
- sksurv/base.py +12 -2
- sksurv/bintrees/_binarytrees.cp313-win_amd64.pyd +0 -0
- sksurv/column.py +38 -35
- sksurv/compare.py +23 -23
- sksurv/datasets/base.py +52 -27
- sksurv/docstrings.py +99 -0
- sksurv/ensemble/_coxph_loss.cp313-win_amd64.pyd +0 -0
- sksurv/ensemble/boosting.py +116 -168
- sksurv/ensemble/forest.py +94 -151
- sksurv/functions.py +29 -29
- sksurv/io/arffread.py +37 -4
- sksurv/io/arffwrite.py +41 -5
- sksurv/kernels/_clinical_kernel.cp313-win_amd64.pyd +0 -0
- sksurv/kernels/clinical.py +36 -16
- sksurv/linear_model/_coxnet.cp313-win_amd64.pyd +0 -0
- sksurv/linear_model/aft.py +14 -11
- sksurv/linear_model/coxnet.py +138 -89
- sksurv/linear_model/coxph.py +102 -83
- sksurv/meta/ensemble_selection.py +91 -9
- sksurv/meta/stacking.py +47 -26
- sksurv/metrics.py +257 -224
- sksurv/nonparametric.py +150 -81
- sksurv/preprocessing.py +74 -34
- sksurv/svm/_minlip.cp313-win_amd64.pyd +0 -0
- sksurv/svm/_prsvm.cp313-win_amd64.pyd +0 -0
- sksurv/svm/minlip.py +171 -85
- sksurv/svm/naive_survival_svm.py +63 -34
- sksurv/svm/survival_svm.py +103 -103
- sksurv/testing.py +47 -0
- sksurv/tree/_criterion.cp313-win_amd64.pyd +0 -0
- sksurv/tree/tree.py +170 -84
- sksurv/util.py +85 -30
- scikit_survival-0.24.1.dist-info/METADATA +0 -889
- scikit_survival-0.24.1.dist-info/RECORD +0 -57
- {scikit_survival-0.24.1.dist-info → scikit_survival-0.26.0.dist-info}/licenses/COPYING +0 -0
- {scikit_survival-0.24.1.dist-info → scikit_survival-0.26.0.dist-info}/top_level.txt +0 -0
sksurv/svm/minlip.py
CHANGED
|
@@ -19,11 +19,23 @@ __all__ = ["MinlipSurvivalAnalysis", "HingeLossSurvivalSVM"]
|
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class QPSolver(metaclass=ABCMeta):
|
|
22
|
-
"""
|
|
23
|
-
|
|
22
|
+
r"""Abstract base class for quadratic program solvers.
|
|
23
|
+
|
|
24
|
+
This class defines the interface for solvers that minimize a quadratic
|
|
25
|
+
objective function subject to linear inequality constraints,
|
|
26
|
+
formulated as:
|
|
27
|
+
|
|
28
|
+
.. math::
|
|
29
|
+
|
|
30
|
+
\min_{x} \quad (1/2)x^T P x + q^T x \\
|
|
31
|
+
\text{subject to} \quad G x \preceq h
|
|
24
32
|
|
|
25
|
-
|
|
26
|
-
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
max_iter : int or None
|
|
36
|
+
Maximum number of iterations to perform.
|
|
37
|
+
verbose : bool
|
|
38
|
+
Enable verbose output of the solver.
|
|
27
39
|
"""
|
|
28
40
|
|
|
29
41
|
@abstractmethod
|
|
@@ -33,7 +45,26 @@ class QPSolver(metaclass=ABCMeta):
|
|
|
33
45
|
|
|
34
46
|
@abstractmethod
|
|
35
47
|
def solve(self, P, q, G, h):
|
|
36
|
-
"""
|
|
48
|
+
"""Find solution to QP.
|
|
49
|
+
|
|
50
|
+
Parameters
|
|
51
|
+
----------
|
|
52
|
+
P : array-like, shape=(n_variables, n_variables)
|
|
53
|
+
Quadratic part of the objective function.
|
|
54
|
+
q : array-like, shape=(n_variables,)
|
|
55
|
+
Linear part of the objective function.
|
|
56
|
+
G : array-like, shape=(n_constraints, n_variables)
|
|
57
|
+
Matrix for inequality constraints.
|
|
58
|
+
h : array-like, shape=(n_constraints,)
|
|
59
|
+
Vector for inequality constraints.
|
|
60
|
+
|
|
61
|
+
Returns
|
|
62
|
+
-------
|
|
63
|
+
x : ndarray, shape=(n_variables,)
|
|
64
|
+
The optimal solution.
|
|
65
|
+
n_iter : int
|
|
66
|
+
Number of iterations performed by the solver.
|
|
67
|
+
"""
|
|
37
68
|
|
|
38
69
|
|
|
39
70
|
class OsqpSolver(QPSolver):
|
|
@@ -50,42 +81,58 @@ class OsqpSolver(QPSolver):
|
|
|
50
81
|
|
|
51
82
|
solver_opts = self._get_options()
|
|
52
83
|
m = osqp.OSQP()
|
|
53
|
-
m.setup(P=sparse.csc_matrix(P), q=q, A=G, u=h, **solver_opts) # noqa: E741
|
|
54
|
-
results = m.solve()
|
|
84
|
+
m.setup(P=sparse.csc_matrix(P), q=q, A=G, l=None, u=h, **solver_opts) # noqa: E741
|
|
85
|
+
results = m.solve(raise_error=False)
|
|
86
|
+
|
|
87
|
+
solved_codes = (
|
|
88
|
+
osqp.SolverStatus.OSQP_SOLVED,
|
|
89
|
+
osqp.SolverStatus.OSQP_SOLVED_INACCURATE,
|
|
90
|
+
)
|
|
55
91
|
|
|
56
|
-
if results.info.status_val ==
|
|
92
|
+
if results.info.status_val == osqp.SolverStatus.OSQP_MAX_ITER_REACHED: # max iter reached
|
|
57
93
|
warnings.warn(
|
|
58
94
|
(f"OSQP solver did not converge: {results.info.status}"),
|
|
59
95
|
category=ConvergenceWarning,
|
|
60
96
|
stacklevel=2,
|
|
61
97
|
)
|
|
62
|
-
elif results.info.status_val not in
|
|
63
|
-
#
|
|
98
|
+
elif results.info.status_val not in solved_codes: # pragma: no cover
|
|
99
|
+
# none of SOLVED, SOLVED_INACCURATE
|
|
64
100
|
raise RuntimeError(f"OSQP solver failed: {results.info.status}")
|
|
65
101
|
|
|
66
102
|
n_iter = results.info.iter
|
|
67
103
|
return results.x[np.newaxis], n_iter
|
|
68
104
|
|
|
69
105
|
def _get_options(self):
|
|
106
|
+
"""Returns a dictionary of OSQP solver options."""
|
|
70
107
|
solver_opts = {
|
|
71
108
|
"eps_abs": 1e-5,
|
|
72
109
|
"eps_rel": 1e-5,
|
|
73
110
|
"max_iter": self.max_iter or 4000,
|
|
74
|
-
"
|
|
111
|
+
"polishing": True,
|
|
75
112
|
"verbose": self.verbose,
|
|
76
113
|
}
|
|
77
114
|
return solver_opts
|
|
78
115
|
|
|
79
116
|
|
|
80
117
|
class EcosSolver(QPSolver):
|
|
81
|
-
"""Solves QP by expressing it as second-order cone program
|
|
118
|
+
r"""Solves QP by expressing it as second-order cone program:
|
|
119
|
+
|
|
120
|
+
.. math::
|
|
82
121
|
|
|
83
|
-
|
|
84
|
-
subject to
|
|
122
|
+
\min \quad c^T x \\
|
|
123
|
+
\text{subject to} \quad G x \preceq_K h
|
|
85
124
|
|
|
86
|
-
where the last inequality is generalized, i.e.
|
|
87
|
-
belongs to the cone
|
|
88
|
-
|
|
125
|
+
where the last inequality is generalized, i.e. :math:`h - G x`
|
|
126
|
+
belongs to the cone :math:`K`.
|
|
127
|
+
|
|
128
|
+
Parameters
|
|
129
|
+
----------
|
|
130
|
+
max_iter : int or None
|
|
131
|
+
Maximum number of iterations to perform.
|
|
132
|
+
verbose : bool
|
|
133
|
+
Enable verbose output of the solver.
|
|
134
|
+
cond : float or None, default: None
|
|
135
|
+
Condition number for eigenvalue decomposition.
|
|
89
136
|
"""
|
|
90
137
|
|
|
91
138
|
EXIT_OPTIMAL = 0 # Optimal solution found
|
|
@@ -144,6 +191,18 @@ class EcosSolver(QPSolver):
|
|
|
144
191
|
return x[np.newaxis], n_iter
|
|
145
192
|
|
|
146
193
|
def _check_success(self, results): # pylint: disable=no-self-use
|
|
194
|
+
"""Checks if the ECOS solver converged successfully.
|
|
195
|
+
|
|
196
|
+
Parameters
|
|
197
|
+
----------
|
|
198
|
+
results : dict
|
|
199
|
+
The results dictionary returned by ``ecos.solve``.
|
|
200
|
+
|
|
201
|
+
Raises
|
|
202
|
+
-------
|
|
203
|
+
RuntimeError
|
|
204
|
+
If the solver failed for an unknown reason or found primal/dual infeasibility.
|
|
205
|
+
"""
|
|
147
206
|
exit_flag = results["info"]["exitFlag"]
|
|
148
207
|
if exit_flag in (EcosSolver.EXIT_OPTIMAL, EcosSolver.EXIT_OPTIMAL + EcosSolver.EXIT_INACC_OFFSET):
|
|
149
208
|
return
|
|
@@ -160,6 +219,20 @@ class EcosSolver(QPSolver):
|
|
|
160
219
|
raise RuntimeError(f"Unknown problem in ECOS solver, exit status: {exit_flag}")
|
|
161
220
|
|
|
162
221
|
def _decompose(self, P):
|
|
222
|
+
"""Performs eigenvalue decomposition of P.
|
|
223
|
+
|
|
224
|
+
Parameters
|
|
225
|
+
----------
|
|
226
|
+
P : array-like, shape=(n_variables, n_variables)
|
|
227
|
+
Quadratic part of the objective function.
|
|
228
|
+
|
|
229
|
+
Returns
|
|
230
|
+
-------
|
|
231
|
+
decomposed : ndarray
|
|
232
|
+
Decomposed matrix.
|
|
233
|
+
largest_eigenvalue : float
|
|
234
|
+
The largest eigenvalue of P.
|
|
235
|
+
"""
|
|
163
236
|
# from scipy.linalg.pinvh
|
|
164
237
|
s, u = linalg.eigh(P)
|
|
165
238
|
largest_eigenvalue = np.max(np.abs(s))
|
|
@@ -182,33 +255,38 @@ class EcosSolver(QPSolver):
|
|
|
182
255
|
|
|
183
256
|
|
|
184
257
|
class MinlipSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
|
|
185
|
-
"""Survival model
|
|
186
|
-
|
|
258
|
+
r"""Survival model based on a minimal Lipschitz smoothness strategy.
|
|
259
|
+
|
|
260
|
+
This model is related to :class:`sksurv.svm.FastKernelSurvivalSVM` but
|
|
261
|
+
minimizes a different objective function, focusing on Lipschitz
|
|
262
|
+
smoothness rather than maximal margin. The optimization problem is
|
|
263
|
+
formulated as:
|
|
187
264
|
|
|
188
265
|
.. math::
|
|
189
266
|
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
267
|
+
\min_{\mathbf{w}}\quad
|
|
268
|
+
\frac{1}{2} \lVert \mathbf{w} \rVert_2^2
|
|
269
|
+
+ \gamma \sum_{i = 1}^n \xi_i \\
|
|
270
|
+
\text{subject to}\quad
|
|
271
|
+
\mathbf{w}^\top \mathbf{x}_i - \mathbf{w}^\top \mathbf{x}_j \geq y_i - y_j - \xi_i,\quad
|
|
272
|
+
\forall (i, j) \in \mathcal{P}_\text{1-NN}, \\
|
|
273
|
+
\xi_i \geq 0,\quad \forall i = 1,\dots,n.
|
|
197
274
|
|
|
198
|
-
|
|
199
|
-
|
|
275
|
+
\mathcal{P}_\text{1-NN} = \{ (i, j) \mid y_i > y_j \land \delta_j = 1
|
|
276
|
+
\land \nexists k : y_i > y_k > y_j \land \delta_k = 1 \}_{i,j=1}^n.
|
|
200
277
|
|
|
201
278
|
See [1]_ for further description.
|
|
202
279
|
|
|
203
280
|
Parameters
|
|
204
281
|
----------
|
|
205
|
-
alpha : float,
|
|
282
|
+
alpha : float, optional, default: 1
|
|
206
283
|
Weight of penalizing the hinge loss in the objective function.
|
|
284
|
+
Must be greater than 0.
|
|
207
285
|
|
|
208
286
|
solver : {'ecos', 'osqp'}, optional, default: 'ecos'
|
|
209
287
|
Which quadratic program solver to use.
|
|
210
288
|
|
|
211
|
-
kernel : str or callable, default: 'linear'.
|
|
289
|
+
kernel : str or callable, optional, default: 'linear'.
|
|
212
290
|
Kernel mapping used internally. This parameter is directly passed to
|
|
213
291
|
:func:`sklearn.metrics.pairwise.pairwise_kernels`.
|
|
214
292
|
If `kernel` is a string, it must be one of the metrics
|
|
@@ -228,52 +306,52 @@ class MinlipSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
|
|
|
228
306
|
the kernel; see the documentation for :mod:`sklearn.metrics.pairwise`.
|
|
229
307
|
Ignored by other kernels.
|
|
230
308
|
|
|
231
|
-
degree : int, default: 3
|
|
309
|
+
degree : int, optional, default: 3
|
|
232
310
|
Degree of the polynomial kernel. Ignored by other kernels.
|
|
233
311
|
|
|
234
|
-
coef0 : float, optional
|
|
312
|
+
coef0 : float, optional, default: 1
|
|
235
313
|
Zero coefficient for polynomial and sigmoid kernels.
|
|
236
314
|
Ignored by other kernels.
|
|
237
315
|
|
|
238
|
-
kernel_params :
|
|
316
|
+
kernel_params : dict, optional, default: None
|
|
239
317
|
Additional parameters (keyword arguments) for kernel function passed
|
|
240
318
|
as callable object.
|
|
241
319
|
|
|
242
320
|
pairs : {'all', 'nearest', 'next'}, optional, default: 'nearest'
|
|
243
321
|
Which constraints to use in the optimization problem.
|
|
244
322
|
|
|
245
|
-
- all: Use all comparable pairs. Scales
|
|
323
|
+
- all: Use all comparable pairs. Scales quadratically in number of samples
|
|
246
324
|
(cf. :class:`sksurv.svm.HingeLossSurvivalSVM`).
|
|
247
325
|
- nearest: Only considers comparable pairs :math:`(i, j)` where :math:`j` is the
|
|
248
326
|
uncensored sample with highest survival time smaller than :math:`y_i`.
|
|
249
|
-
Scales
|
|
327
|
+
Scales linearly in number of samples.
|
|
250
328
|
- next: Only compare against direct nearest neighbor according to observed time,
|
|
251
|
-
disregarding its censoring status. Scales
|
|
329
|
+
disregarding its censoring status. Scales linearly in number of samples.
|
|
252
330
|
|
|
253
|
-
verbose : bool, default: False
|
|
331
|
+
verbose : bool, optional, default: False
|
|
254
332
|
Enable verbose output of solver.
|
|
255
333
|
|
|
256
|
-
timeit :
|
|
257
|
-
If non-zero
|
|
258
|
-
|
|
259
|
-
``timings_`` attribute.
|
|
334
|
+
timeit : bool, int, or None, optional, default: False
|
|
335
|
+
If ``True`` or a non-zero integer, the time taken for optimization is measured.
|
|
336
|
+
If an integer is provided, the optimization is repeated that many times.
|
|
337
|
+
Results can be accessed from the ``timings_`` attribute.
|
|
260
338
|
|
|
261
339
|
max_iter : int or None, optional, default: None
|
|
262
|
-
|
|
263
|
-
use solver's default value.
|
|
340
|
+
The maximum number of iterations taken for the solvers to converge.
|
|
341
|
+
If ``None``, use solver's default value.
|
|
264
342
|
|
|
265
343
|
Attributes
|
|
266
344
|
----------
|
|
267
|
-
X_fit_ : ndarray
|
|
345
|
+
X_fit_ : ndarray, shape = (n_samples, `n_features_in_`)
|
|
268
346
|
Training data.
|
|
269
347
|
|
|
270
|
-
coef_ : ndarray, shape = (n_samples,)
|
|
348
|
+
coef_ : ndarray, shape = (n_samples,), dtype = float
|
|
271
349
|
Coefficients of the features in the decision function.
|
|
272
350
|
|
|
273
351
|
n_features_in_ : int
|
|
274
352
|
Number of features seen during ``fit``.
|
|
275
353
|
|
|
276
|
-
feature_names_in_ : ndarray
|
|
354
|
+
feature_names_in_ : ndarray, shape = (`n_features_in_`,)
|
|
277
355
|
Names of features seen during ``fit``. Defined only when `X`
|
|
278
356
|
has feature names that are all strings.
|
|
279
357
|
|
|
@@ -405,9 +483,9 @@ class MinlipSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
|
|
|
405
483
|
Data matrix.
|
|
406
484
|
|
|
407
485
|
y : structured array, shape = (n_samples,)
|
|
408
|
-
A structured array
|
|
409
|
-
|
|
410
|
-
second field.
|
|
486
|
+
A structured array with two fields. The first field is a boolean
|
|
487
|
+
where ``True`` indicates an event and ``False`` indicates right-censoring.
|
|
488
|
+
The second field is a float with the time of event or time of censoring.
|
|
411
489
|
|
|
412
490
|
Returns
|
|
413
491
|
-------
|
|
@@ -423,8 +501,10 @@ class MinlipSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
|
|
|
423
501
|
def predict(self, X):
|
|
424
502
|
"""Predict risk score of experiencing an event.
|
|
425
503
|
|
|
426
|
-
Higher
|
|
427
|
-
lower
|
|
504
|
+
Higher values indicate an increased risk of experiencing an event,
|
|
505
|
+
lower values a decreased risk of experiencing an event. The scores
|
|
506
|
+
have no unit and are only meaningful to rank samples by their risk
|
|
507
|
+
of experiencing an event.
|
|
428
508
|
|
|
429
509
|
Parameters
|
|
430
510
|
----------
|
|
@@ -443,37 +523,39 @@ class MinlipSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
|
|
|
443
523
|
|
|
444
524
|
|
|
445
525
|
class HingeLossSurvivalSVM(MinlipSurvivalAnalysis):
|
|
446
|
-
"""Naive implementation of kernel survival support vector machine.
|
|
526
|
+
r"""Naive implementation of kernel survival support vector machine.
|
|
447
527
|
|
|
448
|
-
|
|
449
|
-
vectors in the original data
|
|
450
|
-
:math:`O(
|
|
528
|
+
This implementation creates a new set of samples by building the difference
|
|
529
|
+
between any two feature vectors in the original data. This approach
|
|
530
|
+
requires :math:`O(\text{n_samples}^4)` space and
|
|
531
|
+
:math:`O(\text{n_samples}^6 \cdot \text{n_features})` time, making it
|
|
532
|
+
computationally intensive for large datasets.
|
|
451
533
|
|
|
452
|
-
|
|
534
|
+
The optimization problem is formulated as:
|
|
453
535
|
|
|
454
536
|
.. math::
|
|
455
537
|
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
538
|
+
\min_{\mathbf{w}}\quad
|
|
539
|
+
\frac{1}{2} \lVert \mathbf{w} \rVert_2^2
|
|
540
|
+
+ \gamma \sum_{i = 1}^n \xi_i \\
|
|
541
|
+
\text{subject to}\quad
|
|
542
|
+
\mathbf{w}^\top \phi(\mathbf{x})_i - \mathbf{w}^\top \phi(\mathbf{x})_j \geq 1 - \xi_{ij},\quad
|
|
543
|
+
\forall (i, j) \in \mathcal{P}, \\
|
|
544
|
+
\xi_i \geq 0,\quad \forall (i, j) \in \mathcal{P}.
|
|
463
545
|
|
|
464
|
-
|
|
546
|
+
\mathcal{P} = \{ (i, j) \mid y_i > y_j \land \delta_j = 1 \}_{i,j=1,\dots,n}.
|
|
465
547
|
|
|
466
548
|
See [1]_, [2]_, [3]_ for further description.
|
|
467
549
|
|
|
468
550
|
Parameters
|
|
469
551
|
----------
|
|
470
|
-
alpha : float,
|
|
471
|
-
Weight of penalizing the hinge loss in the objective function.
|
|
552
|
+
alpha : float, optional, default: 1
|
|
553
|
+
Weight of penalizing the hinge loss in the objective function. Must be greater than 0.
|
|
472
554
|
|
|
473
555
|
solver : {'ecos', 'osqp'}, optional, default: 'ecos'
|
|
474
556
|
Which quadratic program solver to use.
|
|
475
557
|
|
|
476
|
-
kernel :
|
|
558
|
+
kernel : str or callable, optional, default: 'linear'
|
|
477
559
|
Kernel mapping used internally. This parameter is directly passed to
|
|
478
560
|
:func:`sklearn.metrics.pairwise.pairwise_kernels`.
|
|
479
561
|
If `kernel` is a string, it must be one of the metrics
|
|
@@ -487,63 +569,67 @@ class HingeLossSurvivalSVM(MinlipSurvivalAnalysis):
|
|
|
487
569
|
they operate on matrices, not single samples. Use the string
|
|
488
570
|
identifying the kernel instead.
|
|
489
571
|
|
|
490
|
-
gamma : float, optional, default: None
|
|
572
|
+
gamma : float or None, optional, default: None
|
|
491
573
|
Gamma parameter for the RBF, laplacian, polynomial, exponential chi2
|
|
492
574
|
and sigmoid kernels. Interpretation of the default value is left to
|
|
493
575
|
the kernel; see the documentation for :mod:`sklearn.metrics.pairwise`.
|
|
494
576
|
Ignored by other kernels.
|
|
495
577
|
|
|
496
|
-
degree : int, default: 3
|
|
578
|
+
degree : int, optional, default: 3
|
|
497
579
|
Degree of the polynomial kernel. Ignored by other kernels.
|
|
498
580
|
|
|
499
|
-
coef0 : float, optional
|
|
581
|
+
coef0 : float, optional, default: 1
|
|
500
582
|
Zero coefficient for polynomial and sigmoid kernels.
|
|
501
583
|
Ignored by other kernels.
|
|
502
584
|
|
|
503
|
-
kernel_params :
|
|
585
|
+
kernel_params : dict or None, optional, default: None
|
|
504
586
|
Additional parameters (keyword arguments) for kernel function passed
|
|
505
587
|
as callable object.
|
|
506
588
|
|
|
507
589
|
pairs : {'all', 'nearest', 'next'}, optional, default: 'all'
|
|
508
590
|
Which constraints to use in the optimization problem.
|
|
509
591
|
|
|
510
|
-
- all: Use all comparable pairs. Scales
|
|
592
|
+
- all: Use all comparable pairs. Scales quadratically in number of samples.
|
|
511
593
|
- nearest: Only considers comparable pairs :math:`(i, j)` where :math:`j` is the
|
|
512
594
|
uncensored sample with highest survival time smaller than :math:`y_i`.
|
|
513
|
-
Scales
|
|
595
|
+
Scales linearly in number of samples (cf. :class:`sksurv.svm.MinlipSurvivalAnalysis`).
|
|
514
596
|
- next: Only compare against direct nearest neighbor according to observed time,
|
|
515
|
-
disregarding its censoring status. Scales
|
|
597
|
+
disregarding its censoring status. Scales linearly in number of samples.
|
|
516
598
|
|
|
517
|
-
verbose : bool, default: False
|
|
518
|
-
|
|
599
|
+
verbose : bool, optional, default: False
|
|
600
|
+
If ``True``, enable verbose output of the solver.
|
|
519
601
|
|
|
520
|
-
timeit :
|
|
521
|
-
If non-zero
|
|
522
|
-
|
|
523
|
-
``timings_`` attribute.
|
|
602
|
+
timeit : bool, int, or None, optional, default: False
|
|
603
|
+
If ``True`` or a non-zero integer, the time taken for optimization is measured.
|
|
604
|
+
If an integer is provided, the optimization is repeated that many times.
|
|
605
|
+
Results can be accessed from the ``timings_`` attribute.
|
|
524
606
|
|
|
525
607
|
max_iter : int or None, optional, default: None
|
|
526
|
-
|
|
527
|
-
use solver's default value.
|
|
608
|
+
The maximum number of iterations taken for the solvers to converge.
|
|
609
|
+
If ``None``, use solver's default value.
|
|
528
610
|
|
|
529
611
|
Attributes
|
|
530
612
|
----------
|
|
531
|
-
X_fit_ : ndarray
|
|
613
|
+
X_fit_ : ndarray, shape = (n_samples, `n_features_in_`)
|
|
532
614
|
Training data.
|
|
533
615
|
|
|
534
|
-
coef_ : ndarray, shape = (n_samples,)
|
|
616
|
+
coef_ : ndarray, shape = (n_samples,), dtype = float
|
|
535
617
|
Coefficients of the features in the decision function.
|
|
536
618
|
|
|
537
619
|
n_features_in_ : int
|
|
538
620
|
Number of features seen during ``fit``.
|
|
539
621
|
|
|
540
|
-
feature_names_in_ : ndarray
|
|
622
|
+
feature_names_in_ : ndarray, shape = (`n_features_in_`,), dtype = object
|
|
541
623
|
Names of features seen during ``fit``. Defined only when `X`
|
|
542
624
|
has feature names that are all strings.
|
|
543
625
|
|
|
544
626
|
n_iter_ : int
|
|
545
627
|
Number of iterations run by the optimization routine to fit the model.
|
|
546
628
|
|
|
629
|
+
See also
|
|
630
|
+
--------
|
|
631
|
+
sksurv.svm.NaiveSurvivalSVM : The linear naive survival SVM based on liblinear.
|
|
632
|
+
|
|
547
633
|
References
|
|
548
634
|
----------
|
|
549
635
|
.. [1] Van Belle, V., Pelckmans, K., Suykens, J. A., & Van Huffel, S.
|
sksurv/svm/naive_survival_svm.py
CHANGED
|
@@ -24,61 +24,63 @@ from ..util import check_array_survival
|
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
class NaiveSurvivalSVM(SurvivalAnalysisMixin, LinearSVC):
|
|
27
|
-
"""Naive
|
|
27
|
+
r"""Naive implementation of linear Survival Support Vector Machine.
|
|
28
28
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
29
|
+
This class uses a regular linear support vector classifier (liblinear)
|
|
30
|
+
to implement a survival SVM. It constructs a new dataset by computing
|
|
31
|
+
the difference between feature vectors of comparable pairs from the
|
|
32
|
+
original data. This approach results in a space complexity of
|
|
33
|
+
:math:`O(\text{n_samples}^2)`.
|
|
32
34
|
|
|
33
|
-
|
|
35
|
+
The optimization problem is formulated as:
|
|
34
36
|
|
|
35
37
|
.. math::
|
|
36
38
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
39
|
+
\min_{\mathbf{w}}\quad
|
|
40
|
+
\frac{1}{2} \lVert \mathbf{w} \rVert_2^2
|
|
41
|
+
+ \gamma \sum_{i = 1}^n \xi_i \\
|
|
42
|
+
\text{subject to}\quad
|
|
43
|
+
\mathbf{w}^\top \mathbf{x}_i - \mathbf{w}^\top \mathbf{x}_j \geq 1 - \xi_{ij},\quad
|
|
44
|
+
\forall (i, j) \in \mathcal{P}, \\
|
|
45
|
+
\xi_i \geq 0,\quad \forall (i, j) \in \mathcal{P}.
|
|
44
46
|
|
|
45
|
-
|
|
47
|
+
\mathcal{P} = \{ (i, j) \mid y_i > y_j \land \delta_j = 1 \}_{i,j=1,\dots,n}.
|
|
46
48
|
|
|
47
49
|
See [1]_, [2]_ for further description.
|
|
48
50
|
|
|
49
51
|
Parameters
|
|
50
52
|
----------
|
|
51
|
-
alpha : float,
|
|
52
|
-
Weight of penalizing the squared hinge loss in the objective function.
|
|
53
|
+
alpha : float, optional, default: 1.0
|
|
54
|
+
Weight of penalizing the squared hinge loss in the objective function. Must be greater than 0.
|
|
53
55
|
|
|
54
|
-
loss : {'hinge', 'squared_hinge'}, default: 'squared_hinge'
|
|
56
|
+
loss : {'hinge', 'squared_hinge'}, optional,default: 'squared_hinge'
|
|
55
57
|
Specifies the loss function. 'hinge' is the standard SVM loss
|
|
56
58
|
(used e.g. by the SVC class) while 'squared_hinge' is the
|
|
57
59
|
square of the hinge loss.
|
|
58
60
|
|
|
59
|
-
penalty : {'l1', 'l2'}, default: 'l2'
|
|
61
|
+
penalty : {'l1', 'l2'}, optional,default: 'l2'
|
|
60
62
|
Specifies the norm used in the penalization. The 'l2'
|
|
61
63
|
penalty is the standard used in SVC. The 'l1' leads to `coef_`
|
|
62
64
|
vectors that are sparse.
|
|
63
65
|
|
|
64
|
-
dual : bool, default: True
|
|
66
|
+
dual : bool, optional,default: True
|
|
65
67
|
Select the algorithm to either solve the dual or primal
|
|
66
68
|
optimization problem. Prefer dual=False when n_samples > n_features.
|
|
67
69
|
|
|
68
70
|
tol : float, optional, default: 1e-4
|
|
69
71
|
Tolerance for stopping criteria.
|
|
70
72
|
|
|
71
|
-
verbose : int, default: 0
|
|
72
|
-
|
|
73
|
+
verbose : int, optional, default: 0
|
|
74
|
+
If ``True``, enable verbose output. Note that this setting takes advantage of a
|
|
73
75
|
per-process runtime setting in liblinear that, if enabled, may not work
|
|
74
76
|
properly in a multithreaded context.
|
|
75
77
|
|
|
76
|
-
random_state : int
|
|
77
|
-
|
|
78
|
-
|
|
78
|
+
random_state : int, :class:`numpy.random.RandomState` instance, or None, optional, default: None
|
|
79
|
+
Used to resolve ties in survival times. Pass an int for reproducible output across
|
|
80
|
+
multiple :meth:`fit` calls.
|
|
79
81
|
|
|
80
|
-
max_iter : int, default: 1000
|
|
81
|
-
The maximum number of iterations to
|
|
82
|
+
max_iter : int, optional, default: 1000
|
|
83
|
+
The maximum number of iterations taken for the solver to converge.
|
|
82
84
|
|
|
83
85
|
Attributes
|
|
84
86
|
----------
|
|
@@ -87,8 +89,8 @@ class NaiveSurvivalSVM(SurvivalAnalysisMixin, LinearSVC):
|
|
|
87
89
|
|
|
88
90
|
See also
|
|
89
91
|
--------
|
|
90
|
-
sksurv.svm.FastSurvivalSVM
|
|
91
|
-
|
|
92
|
+
sksurv.svm.FastSurvivalSVM : Alternative implementation with reduced time complexity for training.
|
|
93
|
+
sksurv.svm.HingeLossSurvivalSVM : Non-linear version of the naive survival SVM based on kernel functions.
|
|
92
94
|
|
|
93
95
|
References
|
|
94
96
|
----------
|
|
@@ -138,6 +140,30 @@ class NaiveSurvivalSVM(SurvivalAnalysisMixin, LinearSVC):
|
|
|
138
140
|
self.alpha = alpha
|
|
139
141
|
|
|
140
142
|
def _get_survival_pairs(self, X, y, random_state): # pylint: disable=no-self-use
|
|
143
|
+
"""Generates comparable pairs from survival data.
|
|
144
|
+
|
|
145
|
+
Parameters
|
|
146
|
+
----------
|
|
147
|
+
X : array-like, shape = (n_samples, n_features)
|
|
148
|
+
Data matrix.
|
|
149
|
+
y : structured array, shape = (n_samples,)
|
|
150
|
+
A structured array containing the binary event indicator
|
|
151
|
+
and time of event or time of censoring.
|
|
152
|
+
random_state : RandomState instance
|
|
153
|
+
Random number generator used for shuffling.
|
|
154
|
+
|
|
155
|
+
Returns
|
|
156
|
+
-------
|
|
157
|
+
x_pairs : ndarray, shape = (n_pairs, n_features)
|
|
158
|
+
Feature differences for comparable pairs.
|
|
159
|
+
y_pairs : ndarray, shape = (n_pairs,)
|
|
160
|
+
Labels for comparable pairs (1 or -1).
|
|
161
|
+
|
|
162
|
+
Raises
|
|
163
|
+
------
|
|
164
|
+
NoComparablePairException
|
|
165
|
+
If no comparable pairs can be formed from the input data.
|
|
166
|
+
"""
|
|
141
167
|
feature_names = _get_feature_names(X)
|
|
142
168
|
|
|
143
169
|
X = validate_data(self, X, ensure_min_samples=2)
|
|
@@ -180,9 +206,9 @@ class NaiveSurvivalSVM(SurvivalAnalysisMixin, LinearSVC):
|
|
|
180
206
|
Data matrix.
|
|
181
207
|
|
|
182
208
|
y : structured array, shape = (n_samples,)
|
|
183
|
-
A structured array
|
|
184
|
-
|
|
185
|
-
second field.
|
|
209
|
+
A structured array with two fields. The first field is a boolean
|
|
210
|
+
where ``True`` indicates an event and ``False`` indicates right-censoring.
|
|
211
|
+
The second field is a float with the time of event or time of censoring.
|
|
186
212
|
|
|
187
213
|
sample_weight : array-like, shape = (n_samples,), optional
|
|
188
214
|
Array of weights that are assigned to individual
|
|
@@ -203,9 +229,12 @@ class NaiveSurvivalSVM(SurvivalAnalysisMixin, LinearSVC):
|
|
|
203
229
|
return super().fit(x_pairs, y_pairs, sample_weight=sample_weight)
|
|
204
230
|
|
|
205
231
|
def predict(self, X):
|
|
206
|
-
"""
|
|
232
|
+
"""Predict risk scores.
|
|
207
233
|
|
|
208
|
-
|
|
234
|
+
Predictions are risk scores (i.e. higher values indicate an
|
|
235
|
+
increased risk of experiencing an event). The scores have no
|
|
236
|
+
unit and are only meaningful to rank samples by their risk
|
|
237
|
+
of experiencing an event.
|
|
209
238
|
|
|
210
239
|
Parameters
|
|
211
240
|
----------
|
|
@@ -214,7 +243,7 @@ class NaiveSurvivalSVM(SurvivalAnalysisMixin, LinearSVC):
|
|
|
214
243
|
|
|
215
244
|
Returns
|
|
216
245
|
-------
|
|
217
|
-
y : ndarray, shape = (n_samples,)
|
|
218
|
-
Predicted
|
|
246
|
+
y : ndarray, shape = (n_samples,), dtype = float
|
|
247
|
+
Predicted risk scores.
|
|
219
248
|
"""
|
|
220
249
|
return -self.decision_function(X)
|