scikit-survival 0.24.0__cp313-cp313-macosx_11_0_arm64.whl → 0.25.0__cp313-cp313-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scikit_survival-0.25.0.dist-info/METADATA +185 -0
- scikit_survival-0.25.0.dist-info/RECORD +58 -0
- {scikit_survival-0.24.0.dist-info → scikit_survival-0.25.0.dist-info}/WHEEL +2 -1
- sksurv/__init__.py +51 -6
- sksurv/base.py +12 -2
- sksurv/bintrees/_binarytrees.cpython-313-darwin.so +0 -0
- sksurv/column.py +33 -29
- sksurv/compare.py +22 -22
- sksurv/datasets/base.py +45 -20
- sksurv/docstrings.py +99 -0
- sksurv/ensemble/_coxph_loss.cpython-313-darwin.so +0 -0
- sksurv/ensemble/boosting.py +116 -168
- sksurv/ensemble/forest.py +94 -151
- sksurv/functions.py +29 -29
- sksurv/io/arffread.py +34 -3
- sksurv/io/arffwrite.py +38 -2
- sksurv/kernels/_clinical_kernel.cpython-313-darwin.so +0 -0
- sksurv/kernels/clinical.py +33 -13
- sksurv/linear_model/_coxnet.cpython-313-darwin.so +0 -0
- sksurv/linear_model/aft.py +14 -11
- sksurv/linear_model/coxnet.py +138 -89
- sksurv/linear_model/coxph.py +102 -83
- sksurv/meta/ensemble_selection.py +91 -9
- sksurv/meta/stacking.py +47 -26
- sksurv/metrics.py +257 -224
- sksurv/nonparametric.py +150 -81
- sksurv/preprocessing.py +55 -27
- sksurv/svm/_minlip.cpython-313-darwin.so +0 -0
- sksurv/svm/_prsvm.cpython-313-darwin.so +0 -0
- sksurv/svm/minlip.py +160 -79
- sksurv/svm/naive_survival_svm.py +63 -34
- sksurv/svm/survival_svm.py +104 -104
- sksurv/tree/_criterion.cpython-313-darwin.so +0 -0
- sksurv/tree/tree.py +170 -84
- sksurv/util.py +80 -26
- scikit_survival-0.24.0.dist-info/METADATA +0 -888
- scikit_survival-0.24.0.dist-info/RECORD +0 -57
- {scikit_survival-0.24.0.dist-info → scikit_survival-0.25.0.dist-info/licenses}/COPYING +0 -0
- {scikit_survival-0.24.0.dist-info → scikit_survival-0.25.0.dist-info}/top_level.txt +0 -0
sksurv/svm/minlip.py
CHANGED
|
@@ -19,11 +19,23 @@ __all__ = ["MinlipSurvivalAnalysis", "HingeLossSurvivalSVM"]
|
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class QPSolver(metaclass=ABCMeta):
|
|
22
|
-
"""
|
|
23
|
-
|
|
22
|
+
r"""Abstract base class for quadratic program solvers.
|
|
23
|
+
|
|
24
|
+
This class defines the interface for solvers that minimize a quadratic
|
|
25
|
+
objective function subject to linear inequality constraints,
|
|
26
|
+
formulated as:
|
|
27
|
+
|
|
28
|
+
.. math::
|
|
29
|
+
|
|
30
|
+
\min_{x} \quad (1/2)x^T P x + q^T x \\
|
|
31
|
+
\text{subject to} \quad G x \preceq h
|
|
24
32
|
|
|
25
|
-
|
|
26
|
-
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
max_iter : int or None
|
|
36
|
+
Maximum number of iterations to perform.
|
|
37
|
+
verbose : bool
|
|
38
|
+
Enable verbose output of the solver.
|
|
27
39
|
"""
|
|
28
40
|
|
|
29
41
|
@abstractmethod
|
|
@@ -33,7 +45,26 @@ class QPSolver(metaclass=ABCMeta):
|
|
|
33
45
|
|
|
34
46
|
@abstractmethod
|
|
35
47
|
def solve(self, P, q, G, h):
|
|
36
|
-
"""
|
|
48
|
+
"""Find solution to QP.
|
|
49
|
+
|
|
50
|
+
Parameters
|
|
51
|
+
----------
|
|
52
|
+
P : array-like, shape=(n_variables, n_variables)
|
|
53
|
+
Quadratic part of the objective function.
|
|
54
|
+
q : array-like, shape=(n_variables,)
|
|
55
|
+
Linear part of the objective function.
|
|
56
|
+
G : array-like, shape=(n_constraints, n_variables)
|
|
57
|
+
Matrix for inequality constraints.
|
|
58
|
+
h : array-like, shape=(n_constraints,)
|
|
59
|
+
Vector for inequality constraints.
|
|
60
|
+
|
|
61
|
+
Returns
|
|
62
|
+
-------
|
|
63
|
+
x : ndarray, shape=(n_variables,)
|
|
64
|
+
The optimal solution.
|
|
65
|
+
n_iter : int
|
|
66
|
+
Number of iterations performed by the solver.
|
|
67
|
+
"""
|
|
37
68
|
|
|
38
69
|
|
|
39
70
|
class OsqpSolver(QPSolver):
|
|
@@ -67,6 +98,7 @@ class OsqpSolver(QPSolver):
|
|
|
67
98
|
return results.x[np.newaxis], n_iter
|
|
68
99
|
|
|
69
100
|
def _get_options(self):
|
|
101
|
+
"""Returns a dictionary of OSQP solver options."""
|
|
70
102
|
solver_opts = {
|
|
71
103
|
"eps_abs": 1e-5,
|
|
72
104
|
"eps_rel": 1e-5,
|
|
@@ -78,14 +110,24 @@ class OsqpSolver(QPSolver):
|
|
|
78
110
|
|
|
79
111
|
|
|
80
112
|
class EcosSolver(QPSolver):
|
|
81
|
-
"""Solves QP by expressing it as second-order cone program
|
|
113
|
+
r"""Solves QP by expressing it as second-order cone program:
|
|
82
114
|
|
|
83
|
-
|
|
84
|
-
subject to G @ x <=_K h
|
|
115
|
+
.. math::
|
|
85
116
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
117
|
+
\min \quad c^T x \\
|
|
118
|
+
\text{subject to} \quad G x \preceq_K h
|
|
119
|
+
|
|
120
|
+
where the last inequality is generalized, i.e. :math:`h - G x`
|
|
121
|
+
belongs to the cone :math:`K`.
|
|
122
|
+
|
|
123
|
+
Parameters
|
|
124
|
+
----------
|
|
125
|
+
max_iter : int or None
|
|
126
|
+
Maximum number of iterations to perform.
|
|
127
|
+
verbose : bool
|
|
128
|
+
Enable verbose output of the solver.
|
|
129
|
+
cond : float or None, default: None
|
|
130
|
+
Condition number for eigenvalue decomposition.
|
|
89
131
|
"""
|
|
90
132
|
|
|
91
133
|
EXIT_OPTIMAL = 0 # Optimal solution found
|
|
@@ -144,6 +186,18 @@ class EcosSolver(QPSolver):
|
|
|
144
186
|
return x[np.newaxis], n_iter
|
|
145
187
|
|
|
146
188
|
def _check_success(self, results): # pylint: disable=no-self-use
|
|
189
|
+
"""Checks if the ECOS solver converged successfully.
|
|
190
|
+
|
|
191
|
+
Parameters
|
|
192
|
+
----------
|
|
193
|
+
results : dict
|
|
194
|
+
The results dictionary returned by ``ecos.solve``.
|
|
195
|
+
|
|
196
|
+
Raises
|
|
197
|
+
-------
|
|
198
|
+
RuntimeError
|
|
199
|
+
If the solver failed for an unknown reason or found primal/dual infeasibility.
|
|
200
|
+
"""
|
|
147
201
|
exit_flag = results["info"]["exitFlag"]
|
|
148
202
|
if exit_flag in (EcosSolver.EXIT_OPTIMAL, EcosSolver.EXIT_OPTIMAL + EcosSolver.EXIT_INACC_OFFSET):
|
|
149
203
|
return
|
|
@@ -160,6 +214,20 @@ class EcosSolver(QPSolver):
|
|
|
160
214
|
raise RuntimeError(f"Unknown problem in ECOS solver, exit status: {exit_flag}")
|
|
161
215
|
|
|
162
216
|
def _decompose(self, P):
|
|
217
|
+
"""Performs eigenvalue decomposition of P.
|
|
218
|
+
|
|
219
|
+
Parameters
|
|
220
|
+
----------
|
|
221
|
+
P : array-like, shape=(n_variables, n_variables)
|
|
222
|
+
Quadratic part of the objective function.
|
|
223
|
+
|
|
224
|
+
Returns
|
|
225
|
+
-------
|
|
226
|
+
decomposed : ndarray
|
|
227
|
+
Decomposed matrix.
|
|
228
|
+
largest_eigenvalue : float
|
|
229
|
+
The largest eigenvalue of P.
|
|
230
|
+
"""
|
|
163
231
|
# from scipy.linalg.pinvh
|
|
164
232
|
s, u = linalg.eigh(P)
|
|
165
233
|
largest_eigenvalue = np.max(np.abs(s))
|
|
@@ -182,33 +250,38 @@ class EcosSolver(QPSolver):
|
|
|
182
250
|
|
|
183
251
|
|
|
184
252
|
class MinlipSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
|
|
185
|
-
"""Survival model
|
|
186
|
-
|
|
253
|
+
r"""Survival model based on a minimal Lipschitz smoothness strategy.
|
|
254
|
+
|
|
255
|
+
This model is related to :class:`sksurv.svm.FastKernelSurvivalSVM` but
|
|
256
|
+
minimizes a different objective function, focusing on Lipschitz
|
|
257
|
+
smoothness rather than maximal margin. The optimization problem is
|
|
258
|
+
formulated as:
|
|
187
259
|
|
|
188
260
|
.. math::
|
|
189
261
|
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
262
|
+
\min_{\mathbf{w}}\quad
|
|
263
|
+
\frac{1}{2} \lVert \mathbf{w} \rVert_2^2
|
|
264
|
+
+ \gamma \sum_{i = 1}^n \xi_i \\
|
|
265
|
+
\text{subject to}\quad
|
|
266
|
+
\mathbf{w}^\top \mathbf{x}_i - \mathbf{w}^\top \mathbf{x}_j \geq y_i - y_j - \xi_i,\quad
|
|
267
|
+
\forall (i, j) \in \mathcal{P}_\text{1-NN}, \\
|
|
268
|
+
\xi_i \geq 0,\quad \forall i = 1,\dots,n.
|
|
197
269
|
|
|
198
|
-
|
|
199
|
-
|
|
270
|
+
\mathcal{P}_\text{1-NN} = \{ (i, j) \mid y_i > y_j \land \delta_j = 1
|
|
271
|
+
\land \nexists k : y_i > y_k > y_j \land \delta_k = 1 \}_{i,j=1}^n.
|
|
200
272
|
|
|
201
273
|
See [1]_ for further description.
|
|
202
274
|
|
|
203
275
|
Parameters
|
|
204
276
|
----------
|
|
205
|
-
alpha : float,
|
|
277
|
+
alpha : float, optional, default: 1
|
|
206
278
|
Weight of penalizing the hinge loss in the objective function.
|
|
279
|
+
Must be greater than 0.
|
|
207
280
|
|
|
208
281
|
solver : {'ecos', 'osqp'}, optional, default: 'ecos'
|
|
209
282
|
Which quadratic program solver to use.
|
|
210
283
|
|
|
211
|
-
kernel : str or callable, default: 'linear'.
|
|
284
|
+
kernel : str or callable, optional, default: 'linear'.
|
|
212
285
|
Kernel mapping used internally. This parameter is directly passed to
|
|
213
286
|
:func:`sklearn.metrics.pairwise.pairwise_kernels`.
|
|
214
287
|
If `kernel` is a string, it must be one of the metrics
|
|
@@ -228,52 +301,52 @@ class MinlipSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
|
|
|
228
301
|
the kernel; see the documentation for :mod:`sklearn.metrics.pairwise`.
|
|
229
302
|
Ignored by other kernels.
|
|
230
303
|
|
|
231
|
-
degree : int, default: 3
|
|
304
|
+
degree : int, optional, default: 3
|
|
232
305
|
Degree of the polynomial kernel. Ignored by other kernels.
|
|
233
306
|
|
|
234
|
-
coef0 : float, optional
|
|
307
|
+
coef0 : float, optional, default: 1
|
|
235
308
|
Zero coefficient for polynomial and sigmoid kernels.
|
|
236
309
|
Ignored by other kernels.
|
|
237
310
|
|
|
238
|
-
kernel_params :
|
|
311
|
+
kernel_params : dict, optional, default: None
|
|
239
312
|
Additional parameters (keyword arguments) for kernel function passed
|
|
240
313
|
as callable object.
|
|
241
314
|
|
|
242
315
|
pairs : {'all', 'nearest', 'next'}, optional, default: 'nearest'
|
|
243
316
|
Which constraints to use in the optimization problem.
|
|
244
317
|
|
|
245
|
-
- all: Use all comparable pairs. Scales
|
|
318
|
+
- all: Use all comparable pairs. Scales quadratically in number of samples
|
|
246
319
|
(cf. :class:`sksurv.svm.HingeLossSurvivalSVM`).
|
|
247
320
|
- nearest: Only considers comparable pairs :math:`(i, j)` where :math:`j` is the
|
|
248
321
|
uncensored sample with highest survival time smaller than :math:`y_i`.
|
|
249
|
-
Scales
|
|
322
|
+
Scales linearly in number of samples.
|
|
250
323
|
- next: Only compare against direct nearest neighbor according to observed time,
|
|
251
|
-
disregarding its censoring status. Scales
|
|
324
|
+
disregarding its censoring status. Scales linearly in number of samples.
|
|
252
325
|
|
|
253
|
-
verbose : bool, default: False
|
|
326
|
+
verbose : bool, optional, default: False
|
|
254
327
|
Enable verbose output of solver.
|
|
255
328
|
|
|
256
|
-
timeit :
|
|
257
|
-
If non-zero
|
|
258
|
-
|
|
259
|
-
``timings_`` attribute.
|
|
329
|
+
timeit : bool, int, or None, optional, default: False
|
|
330
|
+
If ``True`` or a non-zero integer, the time taken for optimization is measured.
|
|
331
|
+
If an integer is provided, the optimization is repeated that many times.
|
|
332
|
+
Results can be accessed from the ``timings_`` attribute.
|
|
260
333
|
|
|
261
334
|
max_iter : int or None, optional, default: None
|
|
262
|
-
|
|
263
|
-
use solver's default value.
|
|
335
|
+
The maximum number of iterations taken for the solvers to converge.
|
|
336
|
+
If ``None``, use solver's default value.
|
|
264
337
|
|
|
265
338
|
Attributes
|
|
266
339
|
----------
|
|
267
|
-
X_fit_ : ndarray
|
|
340
|
+
X_fit_ : ndarray, shape = (n_samples, `n_features_in_`)
|
|
268
341
|
Training data.
|
|
269
342
|
|
|
270
|
-
coef_ : ndarray, shape = (n_samples,)
|
|
343
|
+
coef_ : ndarray, shape = (n_samples,), dtype = float
|
|
271
344
|
Coefficients of the features in the decision function.
|
|
272
345
|
|
|
273
346
|
n_features_in_ : int
|
|
274
347
|
Number of features seen during ``fit``.
|
|
275
348
|
|
|
276
|
-
feature_names_in_ : ndarray
|
|
349
|
+
feature_names_in_ : ndarray, shape = (`n_features_in_`,)
|
|
277
350
|
Names of features seen during ``fit``. Defined only when `X`
|
|
278
351
|
has feature names that are all strings.
|
|
279
352
|
|
|
@@ -405,9 +478,9 @@ class MinlipSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
|
|
|
405
478
|
Data matrix.
|
|
406
479
|
|
|
407
480
|
y : structured array, shape = (n_samples,)
|
|
408
|
-
A structured array
|
|
409
|
-
|
|
410
|
-
second field.
|
|
481
|
+
A structured array with two fields. The first field is a boolean
|
|
482
|
+
where ``True`` indicates an event and ``False`` indicates right-censoring.
|
|
483
|
+
The second field is a float with the time of event or time of censoring.
|
|
411
484
|
|
|
412
485
|
Returns
|
|
413
486
|
-------
|
|
@@ -423,8 +496,10 @@ class MinlipSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
|
|
|
423
496
|
def predict(self, X):
|
|
424
497
|
"""Predict risk score of experiencing an event.
|
|
425
498
|
|
|
426
|
-
Higher
|
|
427
|
-
lower
|
|
499
|
+
Higher values indicate an increased risk of experiencing an event,
|
|
500
|
+
lower values a decreased risk of experiencing an event. The scores
|
|
501
|
+
have no unit and are only meaningful to rank samples by their risk
|
|
502
|
+
of experiencing an event.
|
|
428
503
|
|
|
429
504
|
Parameters
|
|
430
505
|
----------
|
|
@@ -443,37 +518,39 @@ class MinlipSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
|
|
|
443
518
|
|
|
444
519
|
|
|
445
520
|
class HingeLossSurvivalSVM(MinlipSurvivalAnalysis):
|
|
446
|
-
"""Naive implementation of kernel survival support vector machine.
|
|
521
|
+
r"""Naive implementation of kernel survival support vector machine.
|
|
447
522
|
|
|
448
|
-
|
|
449
|
-
vectors in the original data
|
|
450
|
-
:math:`O(
|
|
523
|
+
This implementation creates a new set of samples by building the difference
|
|
524
|
+
between any two feature vectors in the original data. This approach
|
|
525
|
+
requires :math:`O(\text{n_samples}^4)` space and
|
|
526
|
+
:math:`O(\text{n_samples}^6 \cdot \text{n_features})` time, making it
|
|
527
|
+
computationally intensive for large datasets.
|
|
451
528
|
|
|
452
|
-
|
|
529
|
+
The optimization problem is formulated as:
|
|
453
530
|
|
|
454
531
|
.. math::
|
|
455
532
|
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
533
|
+
\min_{\mathbf{w}}\quad
|
|
534
|
+
\frac{1}{2} \lVert \mathbf{w} \rVert_2^2
|
|
535
|
+
+ \gamma \sum_{i = 1}^n \xi_i \\
|
|
536
|
+
\text{subject to}\quad
|
|
537
|
+
\mathbf{w}^\top \phi(\mathbf{x})_i - \mathbf{w}^\top \phi(\mathbf{x})_j \geq 1 - \xi_{ij},\quad
|
|
538
|
+
\forall (i, j) \in \mathcal{P}, \\
|
|
539
|
+
\xi_i \geq 0,\quad \forall (i, j) \in \mathcal{P}.
|
|
463
540
|
|
|
464
|
-
|
|
541
|
+
\mathcal{P} = \{ (i, j) \mid y_i > y_j \land \delta_j = 1 \}_{i,j=1,\dots,n}.
|
|
465
542
|
|
|
466
543
|
See [1]_, [2]_, [3]_ for further description.
|
|
467
544
|
|
|
468
545
|
Parameters
|
|
469
546
|
----------
|
|
470
|
-
alpha : float,
|
|
471
|
-
Weight of penalizing the hinge loss in the objective function.
|
|
547
|
+
alpha : float, optional, default: 1
|
|
548
|
+
Weight of penalizing the hinge loss in the objective function. Must be greater than 0.
|
|
472
549
|
|
|
473
550
|
solver : {'ecos', 'osqp'}, optional, default: 'ecos'
|
|
474
551
|
Which quadratic program solver to use.
|
|
475
552
|
|
|
476
|
-
kernel :
|
|
553
|
+
kernel : str or callable, optional, default: 'linear'
|
|
477
554
|
Kernel mapping used internally. This parameter is directly passed to
|
|
478
555
|
:func:`sklearn.metrics.pairwise.pairwise_kernels`.
|
|
479
556
|
If `kernel` is a string, it must be one of the metrics
|
|
@@ -487,63 +564,67 @@ class HingeLossSurvivalSVM(MinlipSurvivalAnalysis):
|
|
|
487
564
|
they operate on matrices, not single samples. Use the string
|
|
488
565
|
identifying the kernel instead.
|
|
489
566
|
|
|
490
|
-
gamma : float, optional, default: None
|
|
567
|
+
gamma : float or None, optional, default: None
|
|
491
568
|
Gamma parameter for the RBF, laplacian, polynomial, exponential chi2
|
|
492
569
|
and sigmoid kernels. Interpretation of the default value is left to
|
|
493
570
|
the kernel; see the documentation for :mod:`sklearn.metrics.pairwise`.
|
|
494
571
|
Ignored by other kernels.
|
|
495
572
|
|
|
496
|
-
degree : int, default: 3
|
|
573
|
+
degree : int, optional, default: 3
|
|
497
574
|
Degree of the polynomial kernel. Ignored by other kernels.
|
|
498
575
|
|
|
499
|
-
coef0 : float, optional
|
|
576
|
+
coef0 : float, optional, default: 1
|
|
500
577
|
Zero coefficient for polynomial and sigmoid kernels.
|
|
501
578
|
Ignored by other kernels.
|
|
502
579
|
|
|
503
|
-
kernel_params :
|
|
580
|
+
kernel_params : dict or None, optional, default: None
|
|
504
581
|
Additional parameters (keyword arguments) for kernel function passed
|
|
505
582
|
as callable object.
|
|
506
583
|
|
|
507
584
|
pairs : {'all', 'nearest', 'next'}, optional, default: 'all'
|
|
508
585
|
Which constraints to use in the optimization problem.
|
|
509
586
|
|
|
510
|
-
- all: Use all comparable pairs. Scales
|
|
587
|
+
- all: Use all comparable pairs. Scales quadratically in number of samples.
|
|
511
588
|
- nearest: Only considers comparable pairs :math:`(i, j)` where :math:`j` is the
|
|
512
589
|
uncensored sample with highest survival time smaller than :math:`y_i`.
|
|
513
|
-
Scales
|
|
590
|
+
Scales linearly in number of samples (cf. :class:`sksurv.svm.MinlipSurvivalAnalysis`).
|
|
514
591
|
- next: Only compare against direct nearest neighbor according to observed time,
|
|
515
|
-
disregarding its censoring status. Scales
|
|
592
|
+
disregarding its censoring status. Scales linearly in number of samples.
|
|
516
593
|
|
|
517
|
-
verbose : bool, default: False
|
|
518
|
-
|
|
594
|
+
verbose : bool, optional, default: False
|
|
595
|
+
If ``True``, enable verbose output of the solver.
|
|
519
596
|
|
|
520
|
-
timeit :
|
|
521
|
-
If non-zero
|
|
522
|
-
|
|
523
|
-
``timings_`` attribute.
|
|
597
|
+
timeit : bool, int, or None, optional, default: False
|
|
598
|
+
If ``True`` or a non-zero integer, the time taken for optimization is measured.
|
|
599
|
+
If an integer is provided, the optimization is repeated that many times.
|
|
600
|
+
Results can be accessed from the ``timings_`` attribute.
|
|
524
601
|
|
|
525
602
|
max_iter : int or None, optional, default: None
|
|
526
|
-
|
|
527
|
-
use solver's default value.
|
|
603
|
+
The maximum number of iterations taken for the solvers to converge.
|
|
604
|
+
If ``None``, use solver's default value.
|
|
528
605
|
|
|
529
606
|
Attributes
|
|
530
607
|
----------
|
|
531
|
-
X_fit_ : ndarray
|
|
608
|
+
X_fit_ : ndarray, shape = (n_samples, `n_features_in_`)
|
|
532
609
|
Training data.
|
|
533
610
|
|
|
534
|
-
coef_ : ndarray, shape = (n_samples,)
|
|
611
|
+
coef_ : ndarray, shape = (n_samples,), dtype = float
|
|
535
612
|
Coefficients of the features in the decision function.
|
|
536
613
|
|
|
537
614
|
n_features_in_ : int
|
|
538
615
|
Number of features seen during ``fit``.
|
|
539
616
|
|
|
540
|
-
feature_names_in_ : ndarray
|
|
617
|
+
feature_names_in_ : ndarray, shape = (`n_features_in_`,), dtype = object
|
|
541
618
|
Names of features seen during ``fit``. Defined only when `X`
|
|
542
619
|
has feature names that are all strings.
|
|
543
620
|
|
|
544
621
|
n_iter_ : int
|
|
545
622
|
Number of iterations run by the optimization routine to fit the model.
|
|
546
623
|
|
|
624
|
+
See also
|
|
625
|
+
--------
|
|
626
|
+
sksurv.svm.NaiveSurvivalSVM : The linear naive survival SVM based on liblinear.
|
|
627
|
+
|
|
547
628
|
References
|
|
548
629
|
----------
|
|
549
630
|
.. [1] Van Belle, V., Pelckmans, K., Suykens, J. A., & Van Huffel, S.
|
sksurv/svm/naive_survival_svm.py
CHANGED
|
@@ -24,61 +24,63 @@ from ..util import check_array_survival
|
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
class NaiveSurvivalSVM(SurvivalAnalysisMixin, LinearSVC):
|
|
27
|
-
"""Naive
|
|
27
|
+
r"""Naive implementation of linear Survival Support Vector Machine.
|
|
28
28
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
29
|
+
This class uses a regular linear support vector classifier (liblinear)
|
|
30
|
+
to implement a survival SVM. It constructs a new dataset by computing
|
|
31
|
+
the difference between feature vectors of comparable pairs from the
|
|
32
|
+
original data. This approach results in a space complexity of
|
|
33
|
+
:math:`O(\text{n_samples}^2)`.
|
|
32
34
|
|
|
33
|
-
|
|
35
|
+
The optimization problem is formulated as:
|
|
34
36
|
|
|
35
37
|
.. math::
|
|
36
38
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
39
|
+
\min_{\mathbf{w}}\quad
|
|
40
|
+
\frac{1}{2} \lVert \mathbf{w} \rVert_2^2
|
|
41
|
+
+ \gamma \sum_{i = 1}^n \xi_i \\
|
|
42
|
+
\text{subject to}\quad
|
|
43
|
+
\mathbf{w}^\top \mathbf{x}_i - \mathbf{w}^\top \mathbf{x}_j \geq 1 - \xi_{ij},\quad
|
|
44
|
+
\forall (i, j) \in \mathcal{P}, \\
|
|
45
|
+
\xi_i \geq 0,\quad \forall (i, j) \in \mathcal{P}.
|
|
44
46
|
|
|
45
|
-
|
|
47
|
+
\mathcal{P} = \{ (i, j) \mid y_i > y_j \land \delta_j = 1 \}_{i,j=1,\dots,n}.
|
|
46
48
|
|
|
47
49
|
See [1]_, [2]_ for further description.
|
|
48
50
|
|
|
49
51
|
Parameters
|
|
50
52
|
----------
|
|
51
|
-
alpha : float,
|
|
52
|
-
Weight of penalizing the squared hinge loss in the objective function.
|
|
53
|
+
alpha : float, optional, default: 1.0
|
|
54
|
+
Weight of penalizing the squared hinge loss in the objective function. Must be greater than 0.
|
|
53
55
|
|
|
54
|
-
loss : {'hinge', 'squared_hinge'}, default: 'squared_hinge'
|
|
56
|
+
loss : {'hinge', 'squared_hinge'}, optional,default: 'squared_hinge'
|
|
55
57
|
Specifies the loss function. 'hinge' is the standard SVM loss
|
|
56
58
|
(used e.g. by the SVC class) while 'squared_hinge' is the
|
|
57
59
|
square of the hinge loss.
|
|
58
60
|
|
|
59
|
-
penalty : {'l1', 'l2'}, default: 'l2'
|
|
61
|
+
penalty : {'l1', 'l2'}, optional,default: 'l2'
|
|
60
62
|
Specifies the norm used in the penalization. The 'l2'
|
|
61
63
|
penalty is the standard used in SVC. The 'l1' leads to `coef_`
|
|
62
64
|
vectors that are sparse.
|
|
63
65
|
|
|
64
|
-
dual : bool, default: True
|
|
66
|
+
dual : bool, optional,default: True
|
|
65
67
|
Select the algorithm to either solve the dual or primal
|
|
66
68
|
optimization problem. Prefer dual=False when n_samples > n_features.
|
|
67
69
|
|
|
68
70
|
tol : float, optional, default: 1e-4
|
|
69
71
|
Tolerance for stopping criteria.
|
|
70
72
|
|
|
71
|
-
verbose : int, default: 0
|
|
72
|
-
|
|
73
|
+
verbose : int, optional, default: 0
|
|
74
|
+
If ``True``, enable verbose output. Note that this setting takes advantage of a
|
|
73
75
|
per-process runtime setting in liblinear that, if enabled, may not work
|
|
74
76
|
properly in a multithreaded context.
|
|
75
77
|
|
|
76
|
-
random_state : int
|
|
77
|
-
|
|
78
|
-
|
|
78
|
+
random_state : int, :class:`numpy.random.RandomState` instance, or None, optional, default: None
|
|
79
|
+
Used to resolve ties in survival times. Pass an int for reproducible output across
|
|
80
|
+
multiple :meth:`fit` calls.
|
|
79
81
|
|
|
80
|
-
max_iter : int, default: 1000
|
|
81
|
-
The maximum number of iterations to
|
|
82
|
+
max_iter : int, optional, default: 1000
|
|
83
|
+
The maximum number of iterations taken for the solver to converge.
|
|
82
84
|
|
|
83
85
|
Attributes
|
|
84
86
|
----------
|
|
@@ -87,8 +89,8 @@ class NaiveSurvivalSVM(SurvivalAnalysisMixin, LinearSVC):
|
|
|
87
89
|
|
|
88
90
|
See also
|
|
89
91
|
--------
|
|
90
|
-
sksurv.svm.FastSurvivalSVM
|
|
91
|
-
|
|
92
|
+
sksurv.svm.FastSurvivalSVM : Alternative implementation with reduced time complexity for training.
|
|
93
|
+
sksurv.svm.HingeLossSurvivalSVM : Non-linear version of the naive survival SVM based on kernel functions.
|
|
92
94
|
|
|
93
95
|
References
|
|
94
96
|
----------
|
|
@@ -138,6 +140,30 @@ class NaiveSurvivalSVM(SurvivalAnalysisMixin, LinearSVC):
|
|
|
138
140
|
self.alpha = alpha
|
|
139
141
|
|
|
140
142
|
def _get_survival_pairs(self, X, y, random_state): # pylint: disable=no-self-use
|
|
143
|
+
"""Generates comparable pairs from survival data.
|
|
144
|
+
|
|
145
|
+
Parameters
|
|
146
|
+
----------
|
|
147
|
+
X : array-like, shape = (n_samples, n_features)
|
|
148
|
+
Data matrix.
|
|
149
|
+
y : structured array, shape = (n_samples,)
|
|
150
|
+
A structured array containing the binary event indicator
|
|
151
|
+
and time of event or time of censoring.
|
|
152
|
+
random_state : RandomState instance
|
|
153
|
+
Random number generator used for shuffling.
|
|
154
|
+
|
|
155
|
+
Returns
|
|
156
|
+
-------
|
|
157
|
+
x_pairs : ndarray, shape = (n_pairs, n_features)
|
|
158
|
+
Feature differences for comparable pairs.
|
|
159
|
+
y_pairs : ndarray, shape = (n_pairs,)
|
|
160
|
+
Labels for comparable pairs (1 or -1).
|
|
161
|
+
|
|
162
|
+
Raises
|
|
163
|
+
------
|
|
164
|
+
NoComparablePairException
|
|
165
|
+
If no comparable pairs can be formed from the input data.
|
|
166
|
+
"""
|
|
141
167
|
feature_names = _get_feature_names(X)
|
|
142
168
|
|
|
143
169
|
X = validate_data(self, X, ensure_min_samples=2)
|
|
@@ -180,9 +206,9 @@ class NaiveSurvivalSVM(SurvivalAnalysisMixin, LinearSVC):
|
|
|
180
206
|
Data matrix.
|
|
181
207
|
|
|
182
208
|
y : structured array, shape = (n_samples,)
|
|
183
|
-
A structured array
|
|
184
|
-
|
|
185
|
-
second field.
|
|
209
|
+
A structured array with two fields. The first field is a boolean
|
|
210
|
+
where ``True`` indicates an event and ``False`` indicates right-censoring.
|
|
211
|
+
The second field is a float with the time of event or time of censoring.
|
|
186
212
|
|
|
187
213
|
sample_weight : array-like, shape = (n_samples,), optional
|
|
188
214
|
Array of weights that are assigned to individual
|
|
@@ -203,9 +229,12 @@ class NaiveSurvivalSVM(SurvivalAnalysisMixin, LinearSVC):
|
|
|
203
229
|
return super().fit(x_pairs, y_pairs, sample_weight=sample_weight)
|
|
204
230
|
|
|
205
231
|
def predict(self, X):
|
|
206
|
-
"""
|
|
232
|
+
"""Predict risk scores.
|
|
207
233
|
|
|
208
|
-
|
|
234
|
+
Predictions are risk scores (i.e. higher values indicate an
|
|
235
|
+
increased risk of experiencing an event). The scores have no
|
|
236
|
+
unit and are only meaningful to rank samples by their risk
|
|
237
|
+
of experiencing an event.
|
|
209
238
|
|
|
210
239
|
Parameters
|
|
211
240
|
----------
|
|
@@ -214,7 +243,7 @@ class NaiveSurvivalSVM(SurvivalAnalysisMixin, LinearSVC):
|
|
|
214
243
|
|
|
215
244
|
Returns
|
|
216
245
|
-------
|
|
217
|
-
y : ndarray, shape = (n_samples,)
|
|
218
|
-
Predicted
|
|
246
|
+
y : ndarray, shape = (n_samples,), dtype = float
|
|
247
|
+
Predicted risk scores.
|
|
219
248
|
"""
|
|
220
249
|
return -self.decision_function(X)
|