scikit-survival 0.24.0__cp310-cp310-macosx_11_0_arm64.whl → 0.25.0__cp310-cp310-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. scikit_survival-0.25.0.dist-info/METADATA +185 -0
  2. scikit_survival-0.25.0.dist-info/RECORD +58 -0
  3. {scikit_survival-0.24.0.dist-info → scikit_survival-0.25.0.dist-info}/WHEEL +2 -1
  4. sksurv/__init__.py +51 -6
  5. sksurv/base.py +12 -2
  6. sksurv/bintrees/_binarytrees.cpython-310-darwin.so +0 -0
  7. sksurv/column.py +33 -29
  8. sksurv/compare.py +22 -22
  9. sksurv/datasets/base.py +45 -20
  10. sksurv/docstrings.py +99 -0
  11. sksurv/ensemble/_coxph_loss.cpython-310-darwin.so +0 -0
  12. sksurv/ensemble/boosting.py +116 -168
  13. sksurv/ensemble/forest.py +94 -151
  14. sksurv/functions.py +29 -29
  15. sksurv/io/arffread.py +34 -3
  16. sksurv/io/arffwrite.py +38 -2
  17. sksurv/kernels/_clinical_kernel.cpython-310-darwin.so +0 -0
  18. sksurv/kernels/clinical.py +33 -13
  19. sksurv/linear_model/_coxnet.cpython-310-darwin.so +0 -0
  20. sksurv/linear_model/aft.py +14 -11
  21. sksurv/linear_model/coxnet.py +138 -89
  22. sksurv/linear_model/coxph.py +102 -83
  23. sksurv/meta/ensemble_selection.py +91 -9
  24. sksurv/meta/stacking.py +47 -26
  25. sksurv/metrics.py +257 -224
  26. sksurv/nonparametric.py +150 -81
  27. sksurv/preprocessing.py +55 -27
  28. sksurv/svm/_minlip.cpython-310-darwin.so +0 -0
  29. sksurv/svm/_prsvm.cpython-310-darwin.so +0 -0
  30. sksurv/svm/minlip.py +160 -79
  31. sksurv/svm/naive_survival_svm.py +63 -34
  32. sksurv/svm/survival_svm.py +104 -104
  33. sksurv/tree/_criterion.cpython-310-darwin.so +0 -0
  34. sksurv/tree/tree.py +170 -84
  35. sksurv/util.py +80 -26
  36. scikit_survival-0.24.0.dist-info/METADATA +0 -888
  37. scikit_survival-0.24.0.dist-info/RECORD +0 -57
  38. {scikit_survival-0.24.0.dist-info → scikit_survival-0.25.0.dist-info/licenses}/COPYING +0 -0
  39. {scikit_survival-0.24.0.dist-info → scikit_survival-0.25.0.dist-info}/top_level.txt +0 -0
sksurv/svm/minlip.py CHANGED
@@ -19,11 +19,23 @@ __all__ = ["MinlipSurvivalAnalysis", "HingeLossSurvivalSVM"]
19
19
 
20
20
 
21
21
  class QPSolver(metaclass=ABCMeta):
22
- """
23
- Solves a quadratic program::
22
+ r"""Abstract base class for quadratic program solvers.
23
+
24
+ This class defines the interface for solvers that minimize a quadratic
25
+ objective function subject to linear inequality constraints,
26
+ formulated as:
27
+
28
+ .. math::
29
+
30
+ \min_{x} \quad (1/2)x^T P x + q^T x \\
31
+ \text{subject to} \quad G x \preceq h
24
32
 
25
- minimize (1/2)*x'*P*x + q'*x
26
- subject to G*x <= h
33
+ Parameters
34
+ ----------
35
+ max_iter : int or None
36
+ Maximum number of iterations to perform.
37
+ verbose : bool
38
+ Enable verbose output of the solver.
27
39
  """
28
40
 
29
41
  @abstractmethod
@@ -33,7 +45,26 @@ class QPSolver(metaclass=ABCMeta):
33
45
 
34
46
  @abstractmethod
35
47
  def solve(self, P, q, G, h):
36
- """Returns solution to QP."""
48
+ """Find solution to QP.
49
+
50
+ Parameters
51
+ ----------
52
+ P : array-like, shape=(n_variables, n_variables)
53
+ Quadratic part of the objective function.
54
+ q : array-like, shape=(n_variables,)
55
+ Linear part of the objective function.
56
+ G : array-like, shape=(n_constraints, n_variables)
57
+ Matrix for inequality constraints.
58
+ h : array-like, shape=(n_constraints,)
59
+ Vector for inequality constraints.
60
+
61
+ Returns
62
+ -------
63
+ x : ndarray, shape=(n_variables,)
64
+ The optimal solution.
65
+ n_iter : int
66
+ Number of iterations performed by the solver.
67
+ """
37
68
 
38
69
 
39
70
  class OsqpSolver(QPSolver):
@@ -67,6 +98,7 @@ class OsqpSolver(QPSolver):
67
98
  return results.x[np.newaxis], n_iter
68
99
 
69
100
  def _get_options(self):
101
+ """Returns a dictionary of OSQP solver options."""
70
102
  solver_opts = {
71
103
  "eps_abs": 1e-5,
72
104
  "eps_rel": 1e-5,
@@ -78,14 +110,24 @@ class OsqpSolver(QPSolver):
78
110
 
79
111
 
80
112
  class EcosSolver(QPSolver):
81
- """Solves QP by expressing it as second-order cone program::
113
+ r"""Solves QP by expressing it as second-order cone program:
82
114
 
83
- minimize c^T @ x
84
- subject to G @ x <=_K h
115
+ .. math::
85
116
 
86
- where the last inequality is generalized, i.e. ``h - G*x``
87
- belongs to the cone ``K``. ECOS supports the positive orthant
88
- ``R_+`` and second-order cones ``Q_n``.
117
+ \min \quad c^T x \\
118
+ \text{subject to} \quad G x \preceq_K h
119
+
120
+ where the last inequality is generalized, i.e. :math:`h - G x`
121
+ belongs to the cone :math:`K`.
122
+
123
+ Parameters
124
+ ----------
125
+ max_iter : int or None
126
+ Maximum number of iterations to perform.
127
+ verbose : bool
128
+ Enable verbose output of the solver.
129
+ cond : float or None, default: None
130
+ Condition number for eigenvalue decomposition.
89
131
  """
90
132
 
91
133
  EXIT_OPTIMAL = 0 # Optimal solution found
@@ -144,6 +186,18 @@ class EcosSolver(QPSolver):
144
186
  return x[np.newaxis], n_iter
145
187
 
146
188
  def _check_success(self, results): # pylint: disable=no-self-use
189
+ """Checks if the ECOS solver converged successfully.
190
+
191
+ Parameters
192
+ ----------
193
+ results : dict
194
+ The results dictionary returned by ``ecos.solve``.
195
+
196
+ Raises
197
+ -------
198
+ RuntimeError
199
+ If the solver failed for an unknown reason or found primal/dual infeasibility.
200
+ """
147
201
  exit_flag = results["info"]["exitFlag"]
148
202
  if exit_flag in (EcosSolver.EXIT_OPTIMAL, EcosSolver.EXIT_OPTIMAL + EcosSolver.EXIT_INACC_OFFSET):
149
203
  return
@@ -160,6 +214,20 @@ class EcosSolver(QPSolver):
160
214
  raise RuntimeError(f"Unknown problem in ECOS solver, exit status: {exit_flag}")
161
215
 
162
216
  def _decompose(self, P):
217
+ """Performs eigenvalue decomposition of P.
218
+
219
+ Parameters
220
+ ----------
221
+ P : array-like, shape=(n_variables, n_variables)
222
+ Quadratic part of the objective function.
223
+
224
+ Returns
225
+ -------
226
+ decomposed : ndarray
227
+ Decomposed matrix.
228
+ largest_eigenvalue : float
229
+ The largest eigenvalue of P.
230
+ """
163
231
  # from scipy.linalg.pinvh
164
232
  s, u = linalg.eigh(P)
165
233
  largest_eigenvalue = np.max(np.abs(s))
@@ -182,33 +250,38 @@ class EcosSolver(QPSolver):
182
250
 
183
251
 
184
252
  class MinlipSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
185
- """Survival model related to survival SVM, using a minimal Lipschitz smoothness strategy
186
- instead of a maximal margin strategy.
253
+ r"""Survival model based on a minimal Lipschitz smoothness strategy.
254
+
255
+ This model is related to :class:`sksurv.svm.FastKernelSurvivalSVM` but
256
+ minimizes a different objective function, focusing on Lipschitz
257
+ smoothness rather than maximal margin. The optimization problem is
258
+ formulated as:
187
259
 
188
260
  .. math::
189
261
 
190
- \\min_{\\mathbf{w}}\\quad
191
- \\frac{1}{2} \\lVert \\mathbf{w} \\rVert_2^2
192
- + \\gamma \\sum_{i = 1}^n \\xi_i \\\\
193
- \\text{subject to}\\quad
194
- \\mathbf{w}^\\top \\mathbf{x}_i - \\mathbf{w}^\\top \\mathbf{x}_j \\geq y_i - y_j - \\xi_i,\\quad
195
- \\forall (i, j) \\in \\mathcal{P}_\\text{1-NN}, \\\\
196
- \\xi_i \\geq 0,\\quad \\forall i = 1,\\dots,n.
262
+ \min_{\mathbf{w}}\quad
263
+ \frac{1}{2} \lVert \mathbf{w} \rVert_2^2
264
+ + \gamma \sum_{i = 1}^n \xi_i \\
265
+ \text{subject to}\quad
266
+ \mathbf{w}^\top \mathbf{x}_i - \mathbf{w}^\top \mathbf{x}_j \geq y_i - y_j - \xi_i,\quad
267
+ \forall (i, j) \in \mathcal{P}_\text{1-NN}, \\
268
+ \xi_i \geq 0,\quad \forall i = 1,\dots,n.
197
269
 
198
- \\mathcal{P}_\\text{1-NN} = \\{ (i, j) \\mid y_i > y_j \\land \\delta_j = 1
199
- \\land \\nexists k : y_i > y_k > y_j \\land \\delta_k = 1 \\}_{i,j=1}^n.
270
+ \mathcal{P}_\text{1-NN} = \{ (i, j) \mid y_i > y_j \land \delta_j = 1
271
+ \land \nexists k : y_i > y_k > y_j \land \delta_k = 1 \}_{i,j=1}^n.
200
272
 
201
273
  See [1]_ for further description.
202
274
 
203
275
  Parameters
204
276
  ----------
205
- alpha : float, positive, default: 1
277
+ alpha : float, optional, default: 1
206
278
  Weight of penalizing the hinge loss in the objective function.
279
+ Must be greater than 0.
207
280
 
208
281
  solver : {'ecos', 'osqp'}, optional, default: 'ecos'
209
282
  Which quadratic program solver to use.
210
283
 
211
- kernel : str or callable, default: 'linear'.
284
+ kernel : str or callable, optional, default: 'linear'.
212
285
  Kernel mapping used internally. This parameter is directly passed to
213
286
  :func:`sklearn.metrics.pairwise.pairwise_kernels`.
214
287
  If `kernel` is a string, it must be one of the metrics
@@ -228,52 +301,52 @@ class MinlipSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
228
301
  the kernel; see the documentation for :mod:`sklearn.metrics.pairwise`.
229
302
  Ignored by other kernels.
230
303
 
231
- degree : int, default: 3
304
+ degree : int, optional, default: 3
232
305
  Degree of the polynomial kernel. Ignored by other kernels.
233
306
 
234
- coef0 : float, optional
307
+ coef0 : float, optional, default: 1
235
308
  Zero coefficient for polynomial and sigmoid kernels.
236
309
  Ignored by other kernels.
237
310
 
238
- kernel_params : mapping of string to any, optional
311
+ kernel_params : dict, optional, default: None
239
312
  Additional parameters (keyword arguments) for kernel function passed
240
313
  as callable object.
241
314
 
242
315
  pairs : {'all', 'nearest', 'next'}, optional, default: 'nearest'
243
316
  Which constraints to use in the optimization problem.
244
317
 
245
- - all: Use all comparable pairs. Scales quadratic in number of samples
318
+ - all: Use all comparable pairs. Scales quadratically in number of samples
246
319
  (cf. :class:`sksurv.svm.HingeLossSurvivalSVM`).
247
320
  - nearest: Only considers comparable pairs :math:`(i, j)` where :math:`j` is the
248
321
  uncensored sample with highest survival time smaller than :math:`y_i`.
249
- Scales linear in number of samples.
322
+ Scales linearly in number of samples.
250
323
  - next: Only compare against direct nearest neighbor according to observed time,
251
- disregarding its censoring status. Scales linear in number of samples.
324
+ disregarding its censoring status. Scales linearly in number of samples.
252
325
 
253
- verbose : bool, default: False
326
+ verbose : bool, optional, default: False
254
327
  Enable verbose output of solver.
255
328
 
256
- timeit : False, int or None, default: None
257
- If non-zero value is provided the time it takes for optimization is measured.
258
- The given number of repetitions are performed. Results can be accessed from the
259
- ``timings_`` attribute.
329
+ timeit : bool, int, or None, optional, default: False
330
+ If ``True`` or a non-zero integer, the time taken for optimization is measured.
331
+ If an integer is provided, the optimization is repeated that many times.
332
+ Results can be accessed from the ``timings_`` attribute.
260
333
 
261
334
  max_iter : int or None, optional, default: None
262
- Maximum number of iterations to perform. By default
263
- use solver's default value.
335
+ The maximum number of iterations taken for the solvers to converge.
336
+ If ``None``, use solver's default value.
264
337
 
265
338
  Attributes
266
339
  ----------
267
- X_fit_ : ndarray
340
+ X_fit_ : ndarray, shape = (n_samples, `n_features_in_`)
268
341
  Training data.
269
342
 
270
- coef_ : ndarray, shape = (n_samples,)
343
+ coef_ : ndarray, shape = (n_samples,), dtype = float
271
344
  Coefficients of the features in the decision function.
272
345
 
273
346
  n_features_in_ : int
274
347
  Number of features seen during ``fit``.
275
348
 
276
- feature_names_in_ : ndarray of shape (`n_features_in_`,)
349
+ feature_names_in_ : ndarray, shape = (`n_features_in_`,)
277
350
  Names of features seen during ``fit``. Defined only when `X`
278
351
  has feature names that are all strings.
279
352
 
@@ -405,9 +478,9 @@ class MinlipSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
405
478
  Data matrix.
406
479
 
407
480
  y : structured array, shape = (n_samples,)
408
- A structured array containing the binary event indicator
409
- as first field, and time of event or time of censoring as
410
- second field.
481
+ A structured array with two fields. The first field is a boolean
482
+ where ``True`` indicates an event and ``False`` indicates right-censoring.
483
+ The second field is a float with the time of event or time of censoring.
411
484
 
412
485
  Returns
413
486
  -------
@@ -423,8 +496,10 @@ class MinlipSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
423
496
  def predict(self, X):
424
497
  """Predict risk score of experiencing an event.
425
498
 
426
- Higher scores indicate shorter survival (high risk),
427
- lower scores longer survival (low risk).
499
+ Higher values indicate an increased risk of experiencing an event,
500
+ lower values a decreased risk of experiencing an event. The scores
501
+ have no unit and are only meaningful to rank samples by their risk
502
+ of experiencing an event.
428
503
 
429
504
  Parameters
430
505
  ----------
@@ -443,37 +518,39 @@ class MinlipSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
443
518
 
444
519
 
445
520
  class HingeLossSurvivalSVM(MinlipSurvivalAnalysis):
446
- """Naive implementation of kernel survival support vector machine.
521
+ r"""Naive implementation of kernel survival support vector machine.
447
522
 
448
- A new set of samples is created by building the difference between any two feature
449
- vectors in the original data, thus this version requires :math:`O(\\text{n_samples}^4)` space and
450
- :math:`O(\\text{n_samples}^6 \\cdot \\text{n_features})` time.
523
+ This implementation creates a new set of samples by building the difference
524
+ between any two feature vectors in the original data. This approach
525
+ requires :math:`O(\text{n_samples}^4)` space and
526
+ :math:`O(\text{n_samples}^6 \cdot \text{n_features})` time, making it
527
+ computationally intensive for large datasets.
451
528
 
452
- See :class:`sksurv.svm.NaiveSurvivalSVM` for the linear naive survival SVM based on liblinear.
529
+ The optimization problem is formulated as:
453
530
 
454
531
  .. math::
455
532
 
456
- \\min_{\\mathbf{w}}\\quad
457
- \\frac{1}{2} \\lVert \\mathbf{w} \\rVert_2^2
458
- + \\gamma \\sum_{i = 1}^n \\xi_i \\\\
459
- \\text{subject to}\\quad
460
- \\mathbf{w}^\\top \\phi(\\mathbf{x})_i - \\mathbf{w}^\\top \\phi(\\mathbf{x})_j \\geq 1 - \\xi_{ij},\\quad
461
- \\forall (i, j) \\in \\mathcal{P}, \\\\
462
- \\xi_i \\geq 0,\\quad \\forall (i, j) \\in \\mathcal{P}.
533
+ \min_{\mathbf{w}}\quad
534
+ \frac{1}{2} \lVert \mathbf{w} \rVert_2^2
535
+ + \gamma \sum_{i = 1}^n \xi_i \\
536
+ \text{subject to}\quad
537
+ \mathbf{w}^\top \phi(\mathbf{x})_i - \mathbf{w}^\top \phi(\mathbf{x})_j \geq 1 - \xi_{ij},\quad
538
+ \forall (i, j) \in \mathcal{P}, \\
539
+ \xi_i \geq 0,\quad \forall (i, j) \in \mathcal{P}.
463
540
 
464
- \\mathcal{P} = \\{ (i, j) \\mid y_i > y_j \\land \\delta_j = 1 \\}_{i,j=1,\\dots,n}.
541
+ \mathcal{P} = \{ (i, j) \mid y_i > y_j \land \delta_j = 1 \}_{i,j=1,\dots,n}.
465
542
 
466
543
  See [1]_, [2]_, [3]_ for further description.
467
544
 
468
545
  Parameters
469
546
  ----------
470
- alpha : float, positive, default: 1
471
- Weight of penalizing the hinge loss in the objective function.
547
+ alpha : float, optional, default: 1
548
+ Weight of penalizing the hinge loss in the objective function. Must be greater than 0.
472
549
 
473
550
  solver : {'ecos', 'osqp'}, optional, default: 'ecos'
474
551
  Which quadratic program solver to use.
475
552
 
476
- kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'cosine', 'precomputed'} or callable, default: 'linear'.
553
+ kernel : str or callable, optional, default: 'linear'
477
554
  Kernel mapping used internally. This parameter is directly passed to
478
555
  :func:`sklearn.metrics.pairwise.pairwise_kernels`.
479
556
  If `kernel` is a string, it must be one of the metrics
@@ -487,63 +564,67 @@ class HingeLossSurvivalSVM(MinlipSurvivalAnalysis):
487
564
  they operate on matrices, not single samples. Use the string
488
565
  identifying the kernel instead.
489
566
 
490
- gamma : float, optional, default: None
567
+ gamma : float or None, optional, default: None
491
568
  Gamma parameter for the RBF, laplacian, polynomial, exponential chi2
492
569
  and sigmoid kernels. Interpretation of the default value is left to
493
570
  the kernel; see the documentation for :mod:`sklearn.metrics.pairwise`.
494
571
  Ignored by other kernels.
495
572
 
496
- degree : int, default: 3
573
+ degree : int, optional, default: 3
497
574
  Degree of the polynomial kernel. Ignored by other kernels.
498
575
 
499
- coef0 : float, optional
576
+ coef0 : float, optional, default: 1
500
577
  Zero coefficient for polynomial and sigmoid kernels.
501
578
  Ignored by other kernels.
502
579
 
503
- kernel_params : mapping of string to any, optional
580
+ kernel_params : dict or None, optional, default: None
504
581
  Additional parameters (keyword arguments) for kernel function passed
505
582
  as callable object.
506
583
 
507
584
  pairs : {'all', 'nearest', 'next'}, optional, default: 'all'
508
585
  Which constraints to use in the optimization problem.
509
586
 
510
- - all: Use all comparable pairs. Scales quadratic in number of samples.
587
+ - all: Use all comparable pairs. Scales quadratically in number of samples.
511
588
  - nearest: Only considers comparable pairs :math:`(i, j)` where :math:`j` is the
512
589
  uncensored sample with highest survival time smaller than :math:`y_i`.
513
- Scales linear in number of samples (cf. :class:`sksurv.svm.MinlipSurvivalAnalysis`).
590
+ Scales linearly in number of samples (cf. :class:`sksurv.svm.MinlipSurvivalAnalysis`).
514
591
  - next: Only compare against direct nearest neighbor according to observed time,
515
- disregarding its censoring status. Scales linear in number of samples.
592
+ disregarding its censoring status. Scales linearly in number of samples.
516
593
 
517
- verbose : bool, default: False
518
- Enable verbose output of solver.
594
+ verbose : bool, optional, default: False
595
+ If ``True``, enable verbose output of the solver.
519
596
 
520
- timeit : False, int or None, default: None
521
- If non-zero value is provided the time it takes for optimization is measured.
522
- The given number of repetitions are performed. Results can be accessed from the
523
- ``timings_`` attribute.
597
+ timeit : bool, int, or None, optional, default: False
598
+ If ``True`` or a non-zero integer, the time taken for optimization is measured.
599
+ If an integer is provided, the optimization is repeated that many times.
600
+ Results can be accessed from the ``timings_`` attribute.
524
601
 
525
602
  max_iter : int or None, optional, default: None
526
- Maximum number of iterations to perform. By default
527
- use solver's default value.
603
+ The maximum number of iterations taken for the solvers to converge.
604
+ If ``None``, use solver's default value.
528
605
 
529
606
  Attributes
530
607
  ----------
531
- X_fit_ : ndarray
608
+ X_fit_ : ndarray, shape = (n_samples, `n_features_in_`)
532
609
  Training data.
533
610
 
534
- coef_ : ndarray, shape = (n_samples,)
611
+ coef_ : ndarray, shape = (n_samples,), dtype = float
535
612
  Coefficients of the features in the decision function.
536
613
 
537
614
  n_features_in_ : int
538
615
  Number of features seen during ``fit``.
539
616
 
540
- feature_names_in_ : ndarray of shape (`n_features_in_`,)
617
+ feature_names_in_ : ndarray, shape = (`n_features_in_`,), dtype = object
541
618
  Names of features seen during ``fit``. Defined only when `X`
542
619
  has feature names that are all strings.
543
620
 
544
621
  n_iter_ : int
545
622
  Number of iterations run by the optimization routine to fit the model.
546
623
 
624
+ See also
625
+ --------
626
+ sksurv.svm.NaiveSurvivalSVM : The linear naive survival SVM based on liblinear.
627
+
547
628
  References
548
629
  ----------
549
630
  .. [1] Van Belle, V., Pelckmans, K., Suykens, J. A., & Van Huffel, S.
@@ -24,61 +24,63 @@ from ..util import check_array_survival
24
24
 
25
25
 
26
26
  class NaiveSurvivalSVM(SurvivalAnalysisMixin, LinearSVC):
27
- """Naive version of linear Survival Support Vector Machine.
27
+ r"""Naive implementation of linear Survival Support Vector Machine.
28
28
 
29
- Uses regular linear support vector classifier (liblinear).
30
- A new set of samples is created by building the difference between any two feature
31
- vectors in the original data, thus this version requires :math:`O(\\text{n_samples}^2)` space.
29
+ This class uses a regular linear support vector classifier (liblinear)
30
+ to implement a survival SVM. It constructs a new dataset by computing
31
+ the difference between feature vectors of comparable pairs from the
32
+ original data. This approach results in a space complexity of
33
+ :math:`O(\text{n_samples}^2)`.
32
34
 
33
- See :class:`sksurv.svm.HingeLossSurvivalSVM` for the kernel naive survival SVM.
35
+ The optimization problem is formulated as:
34
36
 
35
37
  .. math::
36
38
 
37
- \\min_{\\mathbf{w}}\\quad
38
- \\frac{1}{2} \\lVert \\mathbf{w} \\rVert_2^2
39
- + \\gamma \\sum_{i = 1}^n \\xi_i \\\\
40
- \\text{subject to}\\quad
41
- \\mathbf{w}^\\top \\mathbf{x}_i - \\mathbf{w}^\\top \\mathbf{x}_j \\geq 1 - \\xi_{ij},\\quad
42
- \\forall (i, j) \\in \\mathcal{P}, \\\\
43
- \\xi_i \\geq 0,\\quad \\forall (i, j) \\in \\mathcal{P}.
39
+ \min_{\mathbf{w}}\quad
40
+ \frac{1}{2} \lVert \mathbf{w} \rVert_2^2
41
+ + \gamma \sum_{i = 1}^n \xi_i \\
42
+ \text{subject to}\quad
43
+ \mathbf{w}^\top \mathbf{x}_i - \mathbf{w}^\top \mathbf{x}_j \geq 1 - \xi_{ij},\quad
44
+ \forall (i, j) \in \mathcal{P}, \\
45
+ \xi_i \geq 0,\quad \forall (i, j) \in \mathcal{P}.
44
46
 
45
- \\mathcal{P} = \\{ (i, j) \\mid y_i > y_j \\land \\delta_j = 1 \\}_{i,j=1,\\dots,n}.
47
+ \mathcal{P} = \{ (i, j) \mid y_i > y_j \land \delta_j = 1 \}_{i,j=1,\dots,n}.
46
48
 
47
49
  See [1]_, [2]_ for further description.
48
50
 
49
51
  Parameters
50
52
  ----------
51
- alpha : float, positive, default: 1.0
52
- Weight of penalizing the squared hinge loss in the objective function.
53
+ alpha : float, optional, default: 1.0
54
+ Weight of penalizing the squared hinge loss in the objective function. Must be greater than 0.
53
55
 
54
- loss : {'hinge', 'squared_hinge'}, default: 'squared_hinge'
56
+ loss : {'hinge', 'squared_hinge'}, optional,default: 'squared_hinge'
55
57
  Specifies the loss function. 'hinge' is the standard SVM loss
56
58
  (used e.g. by the SVC class) while 'squared_hinge' is the
57
59
  square of the hinge loss.
58
60
 
59
- penalty : {'l1', 'l2'}, default: 'l2'
61
+ penalty : {'l1', 'l2'}, optional,default: 'l2'
60
62
  Specifies the norm used in the penalization. The 'l2'
61
63
  penalty is the standard used in SVC. The 'l1' leads to `coef_`
62
64
  vectors that are sparse.
63
65
 
64
- dual : bool, default: True
66
+ dual : bool, optional,default: True
65
67
  Select the algorithm to either solve the dual or primal
66
68
  optimization problem. Prefer dual=False when n_samples > n_features.
67
69
 
68
70
  tol : float, optional, default: 1e-4
69
71
  Tolerance for stopping criteria.
70
72
 
71
- verbose : int, default: 0
72
- Enable verbose output. Note that this setting takes advantage of a
73
+ verbose : int, optional, default: 0
74
+ If ``True``, enable verbose output. Note that this setting takes advantage of a
73
75
  per-process runtime setting in liblinear that, if enabled, may not work
74
76
  properly in a multithreaded context.
75
77
 
76
- random_state : int seed, RandomState instance, or None, default: None
77
- The seed of the pseudo random number generator to use when
78
- shuffling the data.
78
+ random_state : int, :class:`numpy.random.RandomState` instance, or None, optional, default: None
79
+ Used to resolve ties in survival times. Pass an int for reproducible output across
80
+ multiple :meth:`fit` calls.
79
81
 
80
- max_iter : int, default: 1000
81
- The maximum number of iterations to be run.
82
+ max_iter : int, optional, default: 1000
83
+ The maximum number of iterations taken for the solver to converge.
82
84
 
83
85
  Attributes
84
86
  ----------
@@ -87,8 +89,8 @@ class NaiveSurvivalSVM(SurvivalAnalysisMixin, LinearSVC):
87
89
 
88
90
  See also
89
91
  --------
90
- sksurv.svm.FastSurvivalSVM
91
- Alternative implementation with reduced time complexity for training.
92
+ sksurv.svm.FastSurvivalSVM : Alternative implementation with reduced time complexity for training.
93
+ sksurv.svm.HingeLossSurvivalSVM : Non-linear version of the naive survival SVM based on kernel functions.
92
94
 
93
95
  References
94
96
  ----------
@@ -138,6 +140,30 @@ class NaiveSurvivalSVM(SurvivalAnalysisMixin, LinearSVC):
138
140
  self.alpha = alpha
139
141
 
140
142
  def _get_survival_pairs(self, X, y, random_state): # pylint: disable=no-self-use
143
+ """Generates comparable pairs from survival data.
144
+
145
+ Parameters
146
+ ----------
147
+ X : array-like, shape = (n_samples, n_features)
148
+ Data matrix.
149
+ y : structured array, shape = (n_samples,)
150
+ A structured array containing the binary event indicator
151
+ and time of event or time of censoring.
152
+ random_state : RandomState instance
153
+ Random number generator used for shuffling.
154
+
155
+ Returns
156
+ -------
157
+ x_pairs : ndarray, shape = (n_pairs, n_features)
158
+ Feature differences for comparable pairs.
159
+ y_pairs : ndarray, shape = (n_pairs,)
160
+ Labels for comparable pairs (1 or -1).
161
+
162
+ Raises
163
+ ------
164
+ NoComparablePairException
165
+ If no comparable pairs can be formed from the input data.
166
+ """
141
167
  feature_names = _get_feature_names(X)
142
168
 
143
169
  X = validate_data(self, X, ensure_min_samples=2)
@@ -180,9 +206,9 @@ class NaiveSurvivalSVM(SurvivalAnalysisMixin, LinearSVC):
180
206
  Data matrix.
181
207
 
182
208
  y : structured array, shape = (n_samples,)
183
- A structured array containing the binary event indicator
184
- as first field, and time of event or time of censoring as
185
- second field.
209
+ A structured array with two fields. The first field is a boolean
210
+ where ``True`` indicates an event and ``False`` indicates right-censoring.
211
+ The second field is a float with the time of event or time of censoring.
186
212
 
187
213
  sample_weight : array-like, shape = (n_samples,), optional
188
214
  Array of weights that are assigned to individual
@@ -203,9 +229,12 @@ class NaiveSurvivalSVM(SurvivalAnalysisMixin, LinearSVC):
203
229
  return super().fit(x_pairs, y_pairs, sample_weight=sample_weight)
204
230
 
205
231
  def predict(self, X):
206
- """Rank samples according to survival times
232
+ """Predict risk scores.
207
233
 
208
- Lower ranks indicate shorter survival, higher ranks longer survival.
234
+ Predictions are risk scores (i.e. higher values indicate an
235
+ increased risk of experiencing an event). The scores have no
236
+ unit and are only meaningful to rank samples by their risk
237
+ of experiencing an event.
209
238
 
210
239
  Parameters
211
240
  ----------
@@ -214,7 +243,7 @@ class NaiveSurvivalSVM(SurvivalAnalysisMixin, LinearSVC):
214
243
 
215
244
  Returns
216
245
  -------
217
- y : ndarray, shape = (n_samples,)
218
- Predicted ranks.
246
+ y : ndarray, shape = (n_samples,), dtype = float
247
+ Predicted risk scores.
219
248
  """
220
249
  return -self.decision_function(X)