scikit-survival 0.24.1__cp313-cp313-win_amd64.whl → 0.26.0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. scikit_survival-0.26.0.dist-info/METADATA +185 -0
  2. scikit_survival-0.26.0.dist-info/RECORD +58 -0
  3. {scikit_survival-0.24.1.dist-info → scikit_survival-0.26.0.dist-info}/WHEEL +1 -1
  4. sksurv/__init__.py +51 -6
  5. sksurv/base.py +12 -2
  6. sksurv/bintrees/_binarytrees.cp313-win_amd64.pyd +0 -0
  7. sksurv/column.py +38 -35
  8. sksurv/compare.py +23 -23
  9. sksurv/datasets/base.py +52 -27
  10. sksurv/docstrings.py +99 -0
  11. sksurv/ensemble/_coxph_loss.cp313-win_amd64.pyd +0 -0
  12. sksurv/ensemble/boosting.py +116 -168
  13. sksurv/ensemble/forest.py +94 -151
  14. sksurv/functions.py +29 -29
  15. sksurv/io/arffread.py +37 -4
  16. sksurv/io/arffwrite.py +41 -5
  17. sksurv/kernels/_clinical_kernel.cp313-win_amd64.pyd +0 -0
  18. sksurv/kernels/clinical.py +36 -16
  19. sksurv/linear_model/_coxnet.cp313-win_amd64.pyd +0 -0
  20. sksurv/linear_model/aft.py +14 -11
  21. sksurv/linear_model/coxnet.py +138 -89
  22. sksurv/linear_model/coxph.py +102 -83
  23. sksurv/meta/ensemble_selection.py +91 -9
  24. sksurv/meta/stacking.py +47 -26
  25. sksurv/metrics.py +257 -224
  26. sksurv/nonparametric.py +150 -81
  27. sksurv/preprocessing.py +74 -34
  28. sksurv/svm/_minlip.cp313-win_amd64.pyd +0 -0
  29. sksurv/svm/_prsvm.cp313-win_amd64.pyd +0 -0
  30. sksurv/svm/minlip.py +171 -85
  31. sksurv/svm/naive_survival_svm.py +63 -34
  32. sksurv/svm/survival_svm.py +103 -103
  33. sksurv/testing.py +47 -0
  34. sksurv/tree/_criterion.cp313-win_amd64.pyd +0 -0
  35. sksurv/tree/tree.py +170 -84
  36. sksurv/util.py +85 -30
  37. scikit_survival-0.24.1.dist-info/METADATA +0 -889
  38. scikit_survival-0.24.1.dist-info/RECORD +0 -57
  39. {scikit_survival-0.24.1.dist-info → scikit_survival-0.26.0.dist-info}/licenses/COPYING +0 -0
  40. {scikit_survival-0.24.1.dist-info → scikit_survival-0.26.0.dist-info}/top_level.txt +0 -0
sksurv/svm/minlip.py CHANGED
@@ -19,11 +19,23 @@ __all__ = ["MinlipSurvivalAnalysis", "HingeLossSurvivalSVM"]
19
19
 
20
20
 
21
21
  class QPSolver(metaclass=ABCMeta):
22
- """
23
- Solves a quadratic program::
22
+ r"""Abstract base class for quadratic program solvers.
23
+
24
+ This class defines the interface for solvers that minimize a quadratic
25
+ objective function subject to linear inequality constraints,
26
+ formulated as:
27
+
28
+ .. math::
29
+
30
+ \min_{x} \quad (1/2)x^T P x + q^T x \\
31
+ \text{subject to} \quad G x \preceq h
24
32
 
25
- minimize (1/2)*x'*P*x + q'*x
26
- subject to G*x <= h
33
+ Parameters
34
+ ----------
35
+ max_iter : int or None
36
+ Maximum number of iterations to perform.
37
+ verbose : bool
38
+ Enable verbose output of the solver.
27
39
  """
28
40
 
29
41
  @abstractmethod
@@ -33,7 +45,26 @@ class QPSolver(metaclass=ABCMeta):
33
45
 
34
46
  @abstractmethod
35
47
  def solve(self, P, q, G, h):
36
- """Returns solution to QP."""
48
+ """Find solution to QP.
49
+
50
+ Parameters
51
+ ----------
52
+ P : array-like, shape=(n_variables, n_variables)
53
+ Quadratic part of the objective function.
54
+ q : array-like, shape=(n_variables,)
55
+ Linear part of the objective function.
56
+ G : array-like, shape=(n_constraints, n_variables)
57
+ Matrix for inequality constraints.
58
+ h : array-like, shape=(n_constraints,)
59
+ Vector for inequality constraints.
60
+
61
+ Returns
62
+ -------
63
+ x : ndarray, shape=(n_variables,)
64
+ The optimal solution.
65
+ n_iter : int
66
+ Number of iterations performed by the solver.
67
+ """
37
68
 
38
69
 
39
70
  class OsqpSolver(QPSolver):
@@ -50,42 +81,58 @@ class OsqpSolver(QPSolver):
50
81
 
51
82
  solver_opts = self._get_options()
52
83
  m = osqp.OSQP()
53
- m.setup(P=sparse.csc_matrix(P), q=q, A=G, u=h, **solver_opts) # noqa: E741
54
- results = m.solve()
84
+ m.setup(P=sparse.csc_matrix(P), q=q, A=G, l=None, u=h, **solver_opts) # noqa: E741
85
+ results = m.solve(raise_error=False)
86
+
87
+ solved_codes = (
88
+ osqp.SolverStatus.OSQP_SOLVED,
89
+ osqp.SolverStatus.OSQP_SOLVED_INACCURATE,
90
+ )
55
91
 
56
- if results.info.status_val == -2: # max iter reached
92
+ if results.info.status_val == osqp.SolverStatus.OSQP_MAX_ITER_REACHED: # max iter reached
57
93
  warnings.warn(
58
94
  (f"OSQP solver did not converge: {results.info.status}"),
59
95
  category=ConvergenceWarning,
60
96
  stacklevel=2,
61
97
  )
62
- elif results.info.status_val not in (1, 2): # pragma: no cover
63
- # non of solved, solved inaccurate
98
+ elif results.info.status_val not in solved_codes: # pragma: no cover
99
+ # none of SOLVED, SOLVED_INACCURATE
64
100
  raise RuntimeError(f"OSQP solver failed: {results.info.status}")
65
101
 
66
102
  n_iter = results.info.iter
67
103
  return results.x[np.newaxis], n_iter
68
104
 
69
105
  def _get_options(self):
106
+ """Returns a dictionary of OSQP solver options."""
70
107
  solver_opts = {
71
108
  "eps_abs": 1e-5,
72
109
  "eps_rel": 1e-5,
73
110
  "max_iter": self.max_iter or 4000,
74
- "polish": True,
111
+ "polishing": True,
75
112
  "verbose": self.verbose,
76
113
  }
77
114
  return solver_opts
78
115
 
79
116
 
80
117
  class EcosSolver(QPSolver):
81
- """Solves QP by expressing it as second-order cone program::
118
+ r"""Solves QP by expressing it as second-order cone program:
119
+
120
+ .. math::
82
121
 
83
- minimize c^T @ x
84
- subject to G @ x <=_K h
122
+ \min \quad c^T x \\
123
+ \text{subject to} \quad G x \preceq_K h
85
124
 
86
- where the last inequality is generalized, i.e. ``h - G*x``
87
- belongs to the cone ``K``. ECOS supports the positive orthant
88
- ``R_+`` and second-order cones ``Q_n``.
125
+ where the last inequality is generalized, i.e. :math:`h - G x`
126
+ belongs to the cone :math:`K`.
127
+
128
+ Parameters
129
+ ----------
130
+ max_iter : int or None
131
+ Maximum number of iterations to perform.
132
+ verbose : bool
133
+ Enable verbose output of the solver.
134
+ cond : float or None, default: None
135
+ Condition number for eigenvalue decomposition.
89
136
  """
90
137
 
91
138
  EXIT_OPTIMAL = 0 # Optimal solution found
@@ -144,6 +191,18 @@ class EcosSolver(QPSolver):
144
191
  return x[np.newaxis], n_iter
145
192
 
146
193
  def _check_success(self, results): # pylint: disable=no-self-use
194
+ """Checks if the ECOS solver converged successfully.
195
+
196
+ Parameters
197
+ ----------
198
+ results : dict
199
+ The results dictionary returned by ``ecos.solve``.
200
+
201
+ Raises
202
+ -------
203
+ RuntimeError
204
+ If the solver failed for an unknown reason or found primal/dual infeasibility.
205
+ """
147
206
  exit_flag = results["info"]["exitFlag"]
148
207
  if exit_flag in (EcosSolver.EXIT_OPTIMAL, EcosSolver.EXIT_OPTIMAL + EcosSolver.EXIT_INACC_OFFSET):
149
208
  return
@@ -160,6 +219,20 @@ class EcosSolver(QPSolver):
160
219
  raise RuntimeError(f"Unknown problem in ECOS solver, exit status: {exit_flag}")
161
220
 
162
221
  def _decompose(self, P):
222
+ """Performs eigenvalue decomposition of P.
223
+
224
+ Parameters
225
+ ----------
226
+ P : array-like, shape=(n_variables, n_variables)
227
+ Quadratic part of the objective function.
228
+
229
+ Returns
230
+ -------
231
+ decomposed : ndarray
232
+ Decomposed matrix.
233
+ largest_eigenvalue : float
234
+ The largest eigenvalue of P.
235
+ """
163
236
  # from scipy.linalg.pinvh
164
237
  s, u = linalg.eigh(P)
165
238
  largest_eigenvalue = np.max(np.abs(s))
@@ -182,33 +255,38 @@ class EcosSolver(QPSolver):
182
255
 
183
256
 
184
257
  class MinlipSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
185
- """Survival model related to survival SVM, using a minimal Lipschitz smoothness strategy
186
- instead of a maximal margin strategy.
258
+ r"""Survival model based on a minimal Lipschitz smoothness strategy.
259
+
260
+ This model is related to :class:`sksurv.svm.FastKernelSurvivalSVM` but
261
+ minimizes a different objective function, focusing on Lipschitz
262
+ smoothness rather than maximal margin. The optimization problem is
263
+ formulated as:
187
264
 
188
265
  .. math::
189
266
 
190
- \\min_{\\mathbf{w}}\\quad
191
- \\frac{1}{2} \\lVert \\mathbf{w} \\rVert_2^2
192
- + \\gamma \\sum_{i = 1}^n \\xi_i \\\\
193
- \\text{subject to}\\quad
194
- \\mathbf{w}^\\top \\mathbf{x}_i - \\mathbf{w}^\\top \\mathbf{x}_j \\geq y_i - y_j - \\xi_i,\\quad
195
- \\forall (i, j) \\in \\mathcal{P}_\\text{1-NN}, \\\\
196
- \\xi_i \\geq 0,\\quad \\forall i = 1,\\dots,n.
267
+ \min_{\mathbf{w}}\quad
268
+ \frac{1}{2} \lVert \mathbf{w} \rVert_2^2
269
+ + \gamma \sum_{i = 1}^n \xi_i \\
270
+ \text{subject to}\quad
271
+ \mathbf{w}^\top \mathbf{x}_i - \mathbf{w}^\top \mathbf{x}_j \geq y_i - y_j - \xi_i,\quad
272
+ \forall (i, j) \in \mathcal{P}_\text{1-NN}, \\
273
+ \xi_i \geq 0,\quad \forall i = 1,\dots,n.
197
274
 
198
- \\mathcal{P}_\\text{1-NN} = \\{ (i, j) \\mid y_i > y_j \\land \\delta_j = 1
199
- \\land \\nexists k : y_i > y_k > y_j \\land \\delta_k = 1 \\}_{i,j=1}^n.
275
+ \mathcal{P}_\text{1-NN} = \{ (i, j) \mid y_i > y_j \land \delta_j = 1
276
+ \land \nexists k : y_i > y_k > y_j \land \delta_k = 1 \}_{i,j=1}^n.
200
277
 
201
278
  See [1]_ for further description.
202
279
 
203
280
  Parameters
204
281
  ----------
205
- alpha : float, positive, default: 1
282
+ alpha : float, optional, default: 1
206
283
  Weight of penalizing the hinge loss in the objective function.
284
+ Must be greater than 0.
207
285
 
208
286
  solver : {'ecos', 'osqp'}, optional, default: 'ecos'
209
287
  Which quadratic program solver to use.
210
288
 
211
- kernel : str or callable, default: 'linear'.
289
+ kernel : str or callable, optional, default: 'linear'.
212
290
  Kernel mapping used internally. This parameter is directly passed to
213
291
  :func:`sklearn.metrics.pairwise.pairwise_kernels`.
214
292
  If `kernel` is a string, it must be one of the metrics
@@ -228,52 +306,52 @@ class MinlipSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
228
306
  the kernel; see the documentation for :mod:`sklearn.metrics.pairwise`.
229
307
  Ignored by other kernels.
230
308
 
231
- degree : int, default: 3
309
+ degree : int, optional, default: 3
232
310
  Degree of the polynomial kernel. Ignored by other kernels.
233
311
 
234
- coef0 : float, optional
312
+ coef0 : float, optional, default: 1
235
313
  Zero coefficient for polynomial and sigmoid kernels.
236
314
  Ignored by other kernels.
237
315
 
238
- kernel_params : mapping of string to any, optional
316
+ kernel_params : dict, optional, default: None
239
317
  Additional parameters (keyword arguments) for kernel function passed
240
318
  as callable object.
241
319
 
242
320
  pairs : {'all', 'nearest', 'next'}, optional, default: 'nearest'
243
321
  Which constraints to use in the optimization problem.
244
322
 
245
- - all: Use all comparable pairs. Scales quadratic in number of samples
323
+ - all: Use all comparable pairs. Scales quadratically in number of samples
246
324
  (cf. :class:`sksurv.svm.HingeLossSurvivalSVM`).
247
325
  - nearest: Only considers comparable pairs :math:`(i, j)` where :math:`j` is the
248
326
  uncensored sample with highest survival time smaller than :math:`y_i`.
249
- Scales linear in number of samples.
327
+ Scales linearly in number of samples.
250
328
  - next: Only compare against direct nearest neighbor according to observed time,
251
- disregarding its censoring status. Scales linear in number of samples.
329
+ disregarding its censoring status. Scales linearly in number of samples.
252
330
 
253
- verbose : bool, default: False
331
+ verbose : bool, optional, default: False
254
332
  Enable verbose output of solver.
255
333
 
256
- timeit : False, int or None, default: None
257
- If non-zero value is provided the time it takes for optimization is measured.
258
- The given number of repetitions are performed. Results can be accessed from the
259
- ``timings_`` attribute.
334
+ timeit : bool, int, or None, optional, default: False
335
+ If ``True`` or a non-zero integer, the time taken for optimization is measured.
336
+ If an integer is provided, the optimization is repeated that many times.
337
+ Results can be accessed from the ``timings_`` attribute.
260
338
 
261
339
  max_iter : int or None, optional, default: None
262
- Maximum number of iterations to perform. By default
263
- use solver's default value.
340
+ The maximum number of iterations taken for the solvers to converge.
341
+ If ``None``, use solver's default value.
264
342
 
265
343
  Attributes
266
344
  ----------
267
- X_fit_ : ndarray
345
+ X_fit_ : ndarray, shape = (n_samples, `n_features_in_`)
268
346
  Training data.
269
347
 
270
- coef_ : ndarray, shape = (n_samples,)
348
+ coef_ : ndarray, shape = (n_samples,), dtype = float
271
349
  Coefficients of the features in the decision function.
272
350
 
273
351
  n_features_in_ : int
274
352
  Number of features seen during ``fit``.
275
353
 
276
- feature_names_in_ : ndarray of shape (`n_features_in_`,)
354
+ feature_names_in_ : ndarray, shape = (`n_features_in_`,)
277
355
  Names of features seen during ``fit``. Defined only when `X`
278
356
  has feature names that are all strings.
279
357
 
@@ -405,9 +483,9 @@ class MinlipSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
405
483
  Data matrix.
406
484
 
407
485
  y : structured array, shape = (n_samples,)
408
- A structured array containing the binary event indicator
409
- as first field, and time of event or time of censoring as
410
- second field.
486
+ A structured array with two fields. The first field is a boolean
487
+ where ``True`` indicates an event and ``False`` indicates right-censoring.
488
+ The second field is a float with the time of event or time of censoring.
411
489
 
412
490
  Returns
413
491
  -------
@@ -423,8 +501,10 @@ class MinlipSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
423
501
  def predict(self, X):
424
502
  """Predict risk score of experiencing an event.
425
503
 
426
- Higher scores indicate shorter survival (high risk),
427
- lower scores longer survival (low risk).
504
+ Higher values indicate an increased risk of experiencing an event,
505
+ lower values a decreased risk of experiencing an event. The scores
506
+ have no unit and are only meaningful to rank samples by their risk
507
+ of experiencing an event.
428
508
 
429
509
  Parameters
430
510
  ----------
@@ -443,37 +523,39 @@ class MinlipSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
443
523
 
444
524
 
445
525
  class HingeLossSurvivalSVM(MinlipSurvivalAnalysis):
446
- """Naive implementation of kernel survival support vector machine.
526
+ r"""Naive implementation of kernel survival support vector machine.
447
527
 
448
- A new set of samples is created by building the difference between any two feature
449
- vectors in the original data, thus this version requires :math:`O(\\text{n_samples}^4)` space and
450
- :math:`O(\\text{n_samples}^6 \\cdot \\text{n_features})` time.
528
+ This implementation creates a new set of samples by building the difference
529
+ between any two feature vectors in the original data. This approach
530
+ requires :math:`O(\text{n_samples}^4)` space and
531
+ :math:`O(\text{n_samples}^6 \cdot \text{n_features})` time, making it
532
+ computationally intensive for large datasets.
451
533
 
452
- See :class:`sksurv.svm.NaiveSurvivalSVM` for the linear naive survival SVM based on liblinear.
534
+ The optimization problem is formulated as:
453
535
 
454
536
  .. math::
455
537
 
456
- \\min_{\\mathbf{w}}\\quad
457
- \\frac{1}{2} \\lVert \\mathbf{w} \\rVert_2^2
458
- + \\gamma \\sum_{i = 1}^n \\xi_i \\\\
459
- \\text{subject to}\\quad
460
- \\mathbf{w}^\\top \\phi(\\mathbf{x})_i - \\mathbf{w}^\\top \\phi(\\mathbf{x})_j \\geq 1 - \\xi_{ij},\\quad
461
- \\forall (i, j) \\in \\mathcal{P}, \\\\
462
- \\xi_i \\geq 0,\\quad \\forall (i, j) \\in \\mathcal{P}.
538
+ \min_{\mathbf{w}}\quad
539
+ \frac{1}{2} \lVert \mathbf{w} \rVert_2^2
540
+ + \gamma \sum_{i = 1}^n \xi_i \\
541
+ \text{subject to}\quad
542
+ \mathbf{w}^\top \phi(\mathbf{x})_i - \mathbf{w}^\top \phi(\mathbf{x})_j \geq 1 - \xi_{ij},\quad
543
+ \forall (i, j) \in \mathcal{P}, \\
544
+ \xi_i \geq 0,\quad \forall (i, j) \in \mathcal{P}.
463
545
 
464
- \\mathcal{P} = \\{ (i, j) \\mid y_i > y_j \\land \\delta_j = 1 \\}_{i,j=1,\\dots,n}.
546
+ \mathcal{P} = \{ (i, j) \mid y_i > y_j \land \delta_j = 1 \}_{i,j=1,\dots,n}.
465
547
 
466
548
  See [1]_, [2]_, [3]_ for further description.
467
549
 
468
550
  Parameters
469
551
  ----------
470
- alpha : float, positive, default: 1
471
- Weight of penalizing the hinge loss in the objective function.
552
+ alpha : float, optional, default: 1
553
+ Weight of penalizing the hinge loss in the objective function. Must be greater than 0.
472
554
 
473
555
  solver : {'ecos', 'osqp'}, optional, default: 'ecos'
474
556
  Which quadratic program solver to use.
475
557
 
476
- kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'cosine', 'precomputed'} or callable, default: 'linear'.
558
+ kernel : str or callable, optional, default: 'linear'
477
559
  Kernel mapping used internally. This parameter is directly passed to
478
560
  :func:`sklearn.metrics.pairwise.pairwise_kernels`.
479
561
  If `kernel` is a string, it must be one of the metrics
@@ -487,63 +569,67 @@ class HingeLossSurvivalSVM(MinlipSurvivalAnalysis):
487
569
  they operate on matrices, not single samples. Use the string
488
570
  identifying the kernel instead.
489
571
 
490
- gamma : float, optional, default: None
572
+ gamma : float or None, optional, default: None
491
573
  Gamma parameter for the RBF, laplacian, polynomial, exponential chi2
492
574
  and sigmoid kernels. Interpretation of the default value is left to
493
575
  the kernel; see the documentation for :mod:`sklearn.metrics.pairwise`.
494
576
  Ignored by other kernels.
495
577
 
496
- degree : int, default: 3
578
+ degree : int, optional, default: 3
497
579
  Degree of the polynomial kernel. Ignored by other kernels.
498
580
 
499
- coef0 : float, optional
581
+ coef0 : float, optional, default: 1
500
582
  Zero coefficient for polynomial and sigmoid kernels.
501
583
  Ignored by other kernels.
502
584
 
503
- kernel_params : mapping of string to any, optional
585
+ kernel_params : dict or None, optional, default: None
504
586
  Additional parameters (keyword arguments) for kernel function passed
505
587
  as callable object.
506
588
 
507
589
  pairs : {'all', 'nearest', 'next'}, optional, default: 'all'
508
590
  Which constraints to use in the optimization problem.
509
591
 
510
- - all: Use all comparable pairs. Scales quadratic in number of samples.
592
+ - all: Use all comparable pairs. Scales quadratically in number of samples.
511
593
  - nearest: Only considers comparable pairs :math:`(i, j)` where :math:`j` is the
512
594
  uncensored sample with highest survival time smaller than :math:`y_i`.
513
- Scales linear in number of samples (cf. :class:`sksurv.svm.MinlipSurvivalAnalysis`).
595
+ Scales linearly in number of samples (cf. :class:`sksurv.svm.MinlipSurvivalAnalysis`).
514
596
  - next: Only compare against direct nearest neighbor according to observed time,
515
- disregarding its censoring status. Scales linear in number of samples.
597
+ disregarding its censoring status. Scales linearly in number of samples.
516
598
 
517
- verbose : bool, default: False
518
- Enable verbose output of solver.
599
+ verbose : bool, optional, default: False
600
+ If ``True``, enable verbose output of the solver.
519
601
 
520
- timeit : False, int or None, default: None
521
- If non-zero value is provided the time it takes for optimization is measured.
522
- The given number of repetitions are performed. Results can be accessed from the
523
- ``timings_`` attribute.
602
+ timeit : bool, int, or None, optional, default: False
603
+ If ``True`` or a non-zero integer, the time taken for optimization is measured.
604
+ If an integer is provided, the optimization is repeated that many times.
605
+ Results can be accessed from the ``timings_`` attribute.
524
606
 
525
607
  max_iter : int or None, optional, default: None
526
- Maximum number of iterations to perform. By default
527
- use solver's default value.
608
+ The maximum number of iterations taken for the solvers to converge.
609
+ If ``None``, use solver's default value.
528
610
 
529
611
  Attributes
530
612
  ----------
531
- X_fit_ : ndarray
613
+ X_fit_ : ndarray, shape = (n_samples, `n_features_in_`)
532
614
  Training data.
533
615
 
534
- coef_ : ndarray, shape = (n_samples,)
616
+ coef_ : ndarray, shape = (n_samples,), dtype = float
535
617
  Coefficients of the features in the decision function.
536
618
 
537
619
  n_features_in_ : int
538
620
  Number of features seen during ``fit``.
539
621
 
540
- feature_names_in_ : ndarray of shape (`n_features_in_`,)
622
+ feature_names_in_ : ndarray, shape = (`n_features_in_`,), dtype = object
541
623
  Names of features seen during ``fit``. Defined only when `X`
542
624
  has feature names that are all strings.
543
625
 
544
626
  n_iter_ : int
545
627
  Number of iterations run by the optimization routine to fit the model.
546
628
 
629
+ See also
630
+ --------
631
+ sksurv.svm.NaiveSurvivalSVM : The linear naive survival SVM based on liblinear.
632
+
547
633
  References
548
634
  ----------
549
635
  .. [1] Van Belle, V., Pelckmans, K., Suykens, J. A., & Van Huffel, S.
@@ -24,61 +24,63 @@ from ..util import check_array_survival
24
24
 
25
25
 
26
26
  class NaiveSurvivalSVM(SurvivalAnalysisMixin, LinearSVC):
27
- """Naive version of linear Survival Support Vector Machine.
27
+ r"""Naive implementation of linear Survival Support Vector Machine.
28
28
 
29
- Uses regular linear support vector classifier (liblinear).
30
- A new set of samples is created by building the difference between any two feature
31
- vectors in the original data, thus this version requires :math:`O(\\text{n_samples}^2)` space.
29
+ This class uses a regular linear support vector classifier (liblinear)
30
+ to implement a survival SVM. It constructs a new dataset by computing
31
+ the difference between feature vectors of comparable pairs from the
32
+ original data. This approach results in a space complexity of
33
+ :math:`O(\text{n_samples}^2)`.
32
34
 
33
- See :class:`sksurv.svm.HingeLossSurvivalSVM` for the kernel naive survival SVM.
35
+ The optimization problem is formulated as:
34
36
 
35
37
  .. math::
36
38
 
37
- \\min_{\\mathbf{w}}\\quad
38
- \\frac{1}{2} \\lVert \\mathbf{w} \\rVert_2^2
39
- + \\gamma \\sum_{i = 1}^n \\xi_i \\\\
40
- \\text{subject to}\\quad
41
- \\mathbf{w}^\\top \\mathbf{x}_i - \\mathbf{w}^\\top \\mathbf{x}_j \\geq 1 - \\xi_{ij},\\quad
42
- \\forall (i, j) \\in \\mathcal{P}, \\\\
43
- \\xi_i \\geq 0,\\quad \\forall (i, j) \\in \\mathcal{P}.
39
+ \min_{\mathbf{w}}\quad
40
+ \frac{1}{2} \lVert \mathbf{w} \rVert_2^2
41
+ + \gamma \sum_{i = 1}^n \xi_i \\
42
+ \text{subject to}\quad
43
+ \mathbf{w}^\top \mathbf{x}_i - \mathbf{w}^\top \mathbf{x}_j \geq 1 - \xi_{ij},\quad
44
+ \forall (i, j) \in \mathcal{P}, \\
45
+ \xi_i \geq 0,\quad \forall (i, j) \in \mathcal{P}.
44
46
 
45
- \\mathcal{P} = \\{ (i, j) \\mid y_i > y_j \\land \\delta_j = 1 \\}_{i,j=1,\\dots,n}.
47
+ \mathcal{P} = \{ (i, j) \mid y_i > y_j \land \delta_j = 1 \}_{i,j=1,\dots,n}.
46
48
 
47
49
  See [1]_, [2]_ for further description.
48
50
 
49
51
  Parameters
50
52
  ----------
51
- alpha : float, positive, default: 1.0
52
- Weight of penalizing the squared hinge loss in the objective function.
53
+ alpha : float, optional, default: 1.0
54
+ Weight of penalizing the squared hinge loss in the objective function. Must be greater than 0.
53
55
 
54
- loss : {'hinge', 'squared_hinge'}, default: 'squared_hinge'
56
+ loss : {'hinge', 'squared_hinge'}, optional,default: 'squared_hinge'
55
57
  Specifies the loss function. 'hinge' is the standard SVM loss
56
58
  (used e.g. by the SVC class) while 'squared_hinge' is the
57
59
  square of the hinge loss.
58
60
 
59
- penalty : {'l1', 'l2'}, default: 'l2'
61
+ penalty : {'l1', 'l2'}, optional,default: 'l2'
60
62
  Specifies the norm used in the penalization. The 'l2'
61
63
  penalty is the standard used in SVC. The 'l1' leads to `coef_`
62
64
  vectors that are sparse.
63
65
 
64
- dual : bool, default: True
66
+ dual : bool, optional,default: True
65
67
  Select the algorithm to either solve the dual or primal
66
68
  optimization problem. Prefer dual=False when n_samples > n_features.
67
69
 
68
70
  tol : float, optional, default: 1e-4
69
71
  Tolerance for stopping criteria.
70
72
 
71
- verbose : int, default: 0
72
- Enable verbose output. Note that this setting takes advantage of a
73
+ verbose : int, optional, default: 0
74
+ If ``True``, enable verbose output. Note that this setting takes advantage of a
73
75
  per-process runtime setting in liblinear that, if enabled, may not work
74
76
  properly in a multithreaded context.
75
77
 
76
- random_state : int seed, RandomState instance, or None, default: None
77
- The seed of the pseudo random number generator to use when
78
- shuffling the data.
78
+ random_state : int, :class:`numpy.random.RandomState` instance, or None, optional, default: None
79
+ Used to resolve ties in survival times. Pass an int for reproducible output across
80
+ multiple :meth:`fit` calls.
79
81
 
80
- max_iter : int, default: 1000
81
- The maximum number of iterations to be run.
82
+ max_iter : int, optional, default: 1000
83
+ The maximum number of iterations taken for the solver to converge.
82
84
 
83
85
  Attributes
84
86
  ----------
@@ -87,8 +89,8 @@ class NaiveSurvivalSVM(SurvivalAnalysisMixin, LinearSVC):
87
89
 
88
90
  See also
89
91
  --------
90
- sksurv.svm.FastSurvivalSVM
91
- Alternative implementation with reduced time complexity for training.
92
+ sksurv.svm.FastSurvivalSVM : Alternative implementation with reduced time complexity for training.
93
+ sksurv.svm.HingeLossSurvivalSVM : Non-linear version of the naive survival SVM based on kernel functions.
92
94
 
93
95
  References
94
96
  ----------
@@ -138,6 +140,30 @@ class NaiveSurvivalSVM(SurvivalAnalysisMixin, LinearSVC):
138
140
  self.alpha = alpha
139
141
 
140
142
  def _get_survival_pairs(self, X, y, random_state): # pylint: disable=no-self-use
143
+ """Generates comparable pairs from survival data.
144
+
145
+ Parameters
146
+ ----------
147
+ X : array-like, shape = (n_samples, n_features)
148
+ Data matrix.
149
+ y : structured array, shape = (n_samples,)
150
+ A structured array containing the binary event indicator
151
+ and time of event or time of censoring.
152
+ random_state : RandomState instance
153
+ Random number generator used for shuffling.
154
+
155
+ Returns
156
+ -------
157
+ x_pairs : ndarray, shape = (n_pairs, n_features)
158
+ Feature differences for comparable pairs.
159
+ y_pairs : ndarray, shape = (n_pairs,)
160
+ Labels for comparable pairs (1 or -1).
161
+
162
+ Raises
163
+ ------
164
+ NoComparablePairException
165
+ If no comparable pairs can be formed from the input data.
166
+ """
141
167
  feature_names = _get_feature_names(X)
142
168
 
143
169
  X = validate_data(self, X, ensure_min_samples=2)
@@ -180,9 +206,9 @@ class NaiveSurvivalSVM(SurvivalAnalysisMixin, LinearSVC):
180
206
  Data matrix.
181
207
 
182
208
  y : structured array, shape = (n_samples,)
183
- A structured array containing the binary event indicator
184
- as first field, and time of event or time of censoring as
185
- second field.
209
+ A structured array with two fields. The first field is a boolean
210
+ where ``True`` indicates an event and ``False`` indicates right-censoring.
211
+ The second field is a float with the time of event or time of censoring.
186
212
 
187
213
  sample_weight : array-like, shape = (n_samples,), optional
188
214
  Array of weights that are assigned to individual
@@ -203,9 +229,12 @@ class NaiveSurvivalSVM(SurvivalAnalysisMixin, LinearSVC):
203
229
  return super().fit(x_pairs, y_pairs, sample_weight=sample_weight)
204
230
 
205
231
  def predict(self, X):
206
- """Rank samples according to survival times
232
+ """Predict risk scores.
207
233
 
208
- Lower ranks indicate shorter survival, higher ranks longer survival.
234
+ Predictions are risk scores (i.e. higher values indicate an
235
+ increased risk of experiencing an event). The scores have no
236
+ unit and are only meaningful to rank samples by their risk
237
+ of experiencing an event.
209
238
 
210
239
  Parameters
211
240
  ----------
@@ -214,7 +243,7 @@ class NaiveSurvivalSVM(SurvivalAnalysisMixin, LinearSVC):
214
243
 
215
244
  Returns
216
245
  -------
217
- y : ndarray, shape = (n_samples,)
218
- Predicted ranks.
246
+ y : ndarray, shape = (n_samples,), dtype = float
247
+ Predicted risk scores.
219
248
  """
220
249
  return -self.decision_function(X)