scikit-survival 0.26.0__cp314-cp314-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scikit_survival-0.26.0.dist-info/METADATA +185 -0
- scikit_survival-0.26.0.dist-info/RECORD +58 -0
- scikit_survival-0.26.0.dist-info/WHEEL +5 -0
- scikit_survival-0.26.0.dist-info/licenses/COPYING +674 -0
- scikit_survival-0.26.0.dist-info/top_level.txt +1 -0
- sksurv/__init__.py +183 -0
- sksurv/base.py +115 -0
- sksurv/bintrees/__init__.py +15 -0
- sksurv/bintrees/_binarytrees.cp314-win_amd64.pyd +0 -0
- sksurv/column.py +204 -0
- sksurv/compare.py +123 -0
- sksurv/datasets/__init__.py +12 -0
- sksurv/datasets/base.py +614 -0
- sksurv/datasets/data/GBSG2.arff +700 -0
- sksurv/datasets/data/actg320.arff +1169 -0
- sksurv/datasets/data/bmt.arff +46 -0
- sksurv/datasets/data/breast_cancer_GSE7390-metastasis.arff +283 -0
- sksurv/datasets/data/cgvhd.arff +118 -0
- sksurv/datasets/data/flchain.arff +7887 -0
- sksurv/datasets/data/veteran.arff +148 -0
- sksurv/datasets/data/whas500.arff +520 -0
- sksurv/docstrings.py +99 -0
- sksurv/ensemble/__init__.py +2 -0
- sksurv/ensemble/_coxph_loss.cp314-win_amd64.pyd +0 -0
- sksurv/ensemble/boosting.py +1564 -0
- sksurv/ensemble/forest.py +902 -0
- sksurv/ensemble/survival_loss.py +151 -0
- sksurv/exceptions.py +18 -0
- sksurv/functions.py +114 -0
- sksurv/io/__init__.py +2 -0
- sksurv/io/arffread.py +91 -0
- sksurv/io/arffwrite.py +181 -0
- sksurv/kernels/__init__.py +1 -0
- sksurv/kernels/_clinical_kernel.cp314-win_amd64.pyd +0 -0
- sksurv/kernels/clinical.py +348 -0
- sksurv/linear_model/__init__.py +3 -0
- sksurv/linear_model/_coxnet.cp314-win_amd64.pyd +0 -0
- sksurv/linear_model/aft.py +208 -0
- sksurv/linear_model/coxnet.py +592 -0
- sksurv/linear_model/coxph.py +637 -0
- sksurv/meta/__init__.py +4 -0
- sksurv/meta/base.py +35 -0
- sksurv/meta/ensemble_selection.py +724 -0
- sksurv/meta/stacking.py +370 -0
- sksurv/metrics.py +1028 -0
- sksurv/nonparametric.py +911 -0
- sksurv/preprocessing.py +195 -0
- sksurv/svm/__init__.py +11 -0
- sksurv/svm/_minlip.cp314-win_amd64.pyd +0 -0
- sksurv/svm/_prsvm.cp314-win_amd64.pyd +0 -0
- sksurv/svm/minlip.py +695 -0
- sksurv/svm/naive_survival_svm.py +249 -0
- sksurv/svm/survival_svm.py +1236 -0
- sksurv/testing.py +155 -0
- sksurv/tree/__init__.py +1 -0
- sksurv/tree/_criterion.cp314-win_amd64.pyd +0 -0
- sksurv/tree/tree.py +790 -0
- sksurv/util.py +416 -0
|
@@ -0,0 +1,1236 @@
|
|
|
1
|
+
# This program is free software: you can redistribute it and/or modify
|
|
2
|
+
# it under the terms of the GNU General Public License as published by
|
|
3
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
4
|
+
# (at your option) any later version.
|
|
5
|
+
#
|
|
6
|
+
# This program is distributed in the hope that it will be useful,
|
|
7
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
8
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
9
|
+
# GNU General Public License for more details.
|
|
10
|
+
#
|
|
11
|
+
# You should have received a copy of the GNU General Public License
|
|
12
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
13
|
+
from abc import ABCMeta, abstractmethod
|
|
14
|
+
from numbers import Integral, Real
|
|
15
|
+
import warnings
|
|
16
|
+
|
|
17
|
+
import numexpr
|
|
18
|
+
import numpy as np
|
|
19
|
+
from scipy.optimize import minimize
|
|
20
|
+
from sklearn.base import BaseEstimator
|
|
21
|
+
from sklearn.exceptions import ConvergenceWarning
|
|
22
|
+
from sklearn.metrics.pairwise import PAIRWISE_KERNEL_FUNCTIONS, pairwise_kernels
|
|
23
|
+
from sklearn.utils._param_validation import Interval, StrOptions
|
|
24
|
+
from sklearn.utils.extmath import safe_sparse_dot, squared_norm
|
|
25
|
+
from sklearn.utils.validation import (
|
|
26
|
+
check_array,
|
|
27
|
+
check_consistent_length,
|
|
28
|
+
check_is_fitted,
|
|
29
|
+
check_random_state,
|
|
30
|
+
check_X_y,
|
|
31
|
+
validate_data,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
from ..base import SurvivalAnalysisMixin
|
|
35
|
+
from ..bintrees import AVLTree, RBTree
|
|
36
|
+
from ..exceptions import NoComparablePairException
|
|
37
|
+
from ..util import check_array_survival
|
|
38
|
+
from ._prsvm import survival_constraints_simple, survival_constraints_with_support_vectors
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class Counter(metaclass=ABCMeta):
|
|
42
|
+
@abstractmethod
|
|
43
|
+
def __init__(self, x, y, status, time=None):
|
|
44
|
+
self.x, self.y = check_X_y(x, y)
|
|
45
|
+
|
|
46
|
+
assert np.issubdtype(y.dtype, np.integer), f"y vector must have integer type, but was {y.dtype}"
|
|
47
|
+
assert y.min() == 0, "minimum element of y vector must be 0"
|
|
48
|
+
|
|
49
|
+
if time is None:
|
|
50
|
+
self.status = check_array(status, dtype=bool, ensure_2d=False)
|
|
51
|
+
check_consistent_length(self.x, self.status)
|
|
52
|
+
else:
|
|
53
|
+
self.status = check_array(status, dtype=bool, ensure_2d=False)
|
|
54
|
+
self.time = check_array(time, ensure_2d=False)
|
|
55
|
+
check_consistent_length(self.x, self.status, self.time)
|
|
56
|
+
|
|
57
|
+
self.eps = np.finfo(self.x.dtype).eps
|
|
58
|
+
|
|
59
|
+
def update_sort_order(self, w):
|
|
60
|
+
xw = np.dot(self.x, w)
|
|
61
|
+
order = xw.argsort(kind="mergesort")
|
|
62
|
+
self.xw = xw[order]
|
|
63
|
+
self.order = order
|
|
64
|
+
return xw
|
|
65
|
+
|
|
66
|
+
@abstractmethod
|
|
67
|
+
def calculate(self, v):
|
|
68
|
+
"""Return l_plus, xv_plus, l_minus, xv_minus"""
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class OrderStatisticTreeSurvivalCounter(Counter):
|
|
72
|
+
"""Counting method used by :class:`LargeScaleOptimizer` for survival analysis.
|
|
73
|
+
|
|
74
|
+
Parameters
|
|
75
|
+
----------
|
|
76
|
+
x : array, shape = (n_samples, n_features)
|
|
77
|
+
Feature matrix
|
|
78
|
+
|
|
79
|
+
y : array of int, shape = (n_samples,)
|
|
80
|
+
Unique ranks of samples, starting with 0.
|
|
81
|
+
|
|
82
|
+
status : array of bool, shape = (n_samples,)
|
|
83
|
+
Event indicator of samples.
|
|
84
|
+
|
|
85
|
+
tree_class : type
|
|
86
|
+
The class to use as an order statistic tree, either
|
|
87
|
+
:class:`sksurv.bintrees.AVLTree` or :class:`sksurv.bintrees.RBTree`.
|
|
88
|
+
|
|
89
|
+
time : array, shape = (n_samples,)
|
|
90
|
+
Survival times.
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
def __init__(self, x, y, status, tree_class, time=None):
|
|
94
|
+
super().__init__(x, y, status, time)
|
|
95
|
+
self._tree_class = tree_class
|
|
96
|
+
|
|
97
|
+
def calculate(self, v):
|
|
98
|
+
# only self.xw is sorted, for everything else use self.order
|
|
99
|
+
# the order of return values is with respect to original order of samples, NOT self.order
|
|
100
|
+
xv = np.dot(self.x, v)
|
|
101
|
+
|
|
102
|
+
od = self.order
|
|
103
|
+
|
|
104
|
+
n_samples = self.x.shape[0]
|
|
105
|
+
l_plus = np.zeros(n_samples, dtype=int)
|
|
106
|
+
l_minus = np.zeros(n_samples, dtype=int)
|
|
107
|
+
xv_plus = np.zeros(n_samples, dtype=float)
|
|
108
|
+
xv_minus = np.zeros(n_samples, dtype=float)
|
|
109
|
+
|
|
110
|
+
j = 0
|
|
111
|
+
tree = self._tree_class(n_samples)
|
|
112
|
+
for i in range(n_samples):
|
|
113
|
+
while j < n_samples and 1 - self.xw[j] + self.xw[i] > 0:
|
|
114
|
+
tree.insert(self.y[od[j]], xv[od[j]])
|
|
115
|
+
j += 1
|
|
116
|
+
|
|
117
|
+
# larger (root of t, y[od[i]])
|
|
118
|
+
count, vec_sum = tree.count_larger_with_event(self.y[od[i]], self.status[od[i]])
|
|
119
|
+
l_plus[od[i]] = count
|
|
120
|
+
xv_plus[od[i]] = vec_sum
|
|
121
|
+
|
|
122
|
+
tree = self._tree_class(n_samples)
|
|
123
|
+
j = n_samples - 1
|
|
124
|
+
for i in range(j, -1, -1):
|
|
125
|
+
while j >= 0 and 1 - self.xw[i] + self.xw[j] > 0:
|
|
126
|
+
if self.status[od[j]]:
|
|
127
|
+
tree.insert(self.y[od[j]], xv[od[j]])
|
|
128
|
+
j -= 1
|
|
129
|
+
|
|
130
|
+
# smaller (root of T, y[od[i]])
|
|
131
|
+
count, vec_sum = tree.count_smaller(self.y[od[i]])
|
|
132
|
+
l_minus[od[i]] = count
|
|
133
|
+
xv_minus[od[i]] = vec_sum
|
|
134
|
+
|
|
135
|
+
return l_plus, xv_plus, l_minus, xv_minus
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
class SurvivalCounter(Counter):
|
|
139
|
+
def __init__(self, x, y, status, n_relevance_levels, time=None):
|
|
140
|
+
super().__init__(x, y, status, time)
|
|
141
|
+
self.n_relevance_levels = n_relevance_levels
|
|
142
|
+
|
|
143
|
+
def _count_values(self):
|
|
144
|
+
"""Return dict mapping relevance level to sample index"""
|
|
145
|
+
indices = {yi: [i] for i, yi in enumerate(self.y) if self.status[i]}
|
|
146
|
+
|
|
147
|
+
return indices
|
|
148
|
+
|
|
149
|
+
def calculate(self, v):
|
|
150
|
+
n_samples = self.x.shape[0]
|
|
151
|
+
l_plus = np.zeros(n_samples, dtype=int)
|
|
152
|
+
l_minus = np.zeros(n_samples, dtype=int)
|
|
153
|
+
xv_plus = np.zeros(n_samples, dtype=float)
|
|
154
|
+
xv_minus = np.zeros(n_samples, dtype=float)
|
|
155
|
+
indices = self._count_values()
|
|
156
|
+
|
|
157
|
+
od = self.order
|
|
158
|
+
|
|
159
|
+
for relevance in range(self.n_relevance_levels):
|
|
160
|
+
j = 0
|
|
161
|
+
count_plus = 0
|
|
162
|
+
# relevance levels are unique, therefore count can only be 1 or 0
|
|
163
|
+
count_minus = 1 if relevance in indices else 0
|
|
164
|
+
xv_count_plus = 0
|
|
165
|
+
xv_count_minus = np.dot(self.x.take(indices.get(relevance, []), axis=0), v).sum()
|
|
166
|
+
|
|
167
|
+
for i in range(n_samples):
|
|
168
|
+
if self.y[od[i]] != relevance or not self.status[od[i]]:
|
|
169
|
+
continue
|
|
170
|
+
|
|
171
|
+
while j < n_samples and 1 - self.xw[j] + self.xw[i] > 0:
|
|
172
|
+
if self.y[od[j]] > relevance:
|
|
173
|
+
count_plus += 1
|
|
174
|
+
xv_count_plus += np.dot(self.x[od[j], :], v)
|
|
175
|
+
l_minus[od[j]] += count_minus
|
|
176
|
+
xv_minus[od[j]] += xv_count_minus
|
|
177
|
+
|
|
178
|
+
j += 1
|
|
179
|
+
|
|
180
|
+
l_plus[od[i]] = count_plus
|
|
181
|
+
xv_plus[od[i]] += xv_count_plus
|
|
182
|
+
count_minus -= 1
|
|
183
|
+
xv_count_minus -= np.dot(self.x.take(od[i], axis=0), v)
|
|
184
|
+
|
|
185
|
+
return l_plus, xv_plus, l_minus, xv_minus
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
class RankSVMOptimizer(metaclass=ABCMeta):
|
|
189
|
+
"""Abstract base class for all optimizers"""
|
|
190
|
+
|
|
191
|
+
def __init__(self, alpha, rank_ratio, timeit=False):
|
|
192
|
+
self.alpha = alpha
|
|
193
|
+
self.rank_ratio = rank_ratio
|
|
194
|
+
self.timeit = timeit
|
|
195
|
+
|
|
196
|
+
self._last_w = None
|
|
197
|
+
# cache gradient computations
|
|
198
|
+
self._last_gradient_w = None
|
|
199
|
+
self._last_gradient = None
|
|
200
|
+
|
|
201
|
+
@abstractmethod
|
|
202
|
+
def _objective_func(self, w):
|
|
203
|
+
"""Evaluate objective function at w"""
|
|
204
|
+
|
|
205
|
+
@abstractmethod
|
|
206
|
+
def _update_constraints(self, w):
|
|
207
|
+
"""Update constraints"""
|
|
208
|
+
|
|
209
|
+
@abstractmethod
|
|
210
|
+
def _gradient_func(self, w):
|
|
211
|
+
"""Evaluate gradient at w"""
|
|
212
|
+
|
|
213
|
+
@abstractmethod
|
|
214
|
+
def _hessian_func(self, w, s):
|
|
215
|
+
"""Evaluate Hessian at w"""
|
|
216
|
+
|
|
217
|
+
@property
|
|
218
|
+
@abstractmethod
|
|
219
|
+
def n_coefficients(self):
|
|
220
|
+
"""Return number of coefficients (includes intercept)"""
|
|
221
|
+
|
|
222
|
+
def _update_constraints_if_necessary(self, w):
|
|
223
|
+
needs_update = (w != self._last_w).any()
|
|
224
|
+
if needs_update:
|
|
225
|
+
self._update_constraints(w)
|
|
226
|
+
self._last_w = w.copy()
|
|
227
|
+
return needs_update
|
|
228
|
+
|
|
229
|
+
def _do_objective_func(self, w):
|
|
230
|
+
self._update_constraints_if_necessary(w)
|
|
231
|
+
return self._objective_func(w)
|
|
232
|
+
|
|
233
|
+
def _do_gradient_func(self, w):
|
|
234
|
+
if self._last_gradient_w is not None and (w == self._last_gradient_w).all():
|
|
235
|
+
return self._last_gradient
|
|
236
|
+
|
|
237
|
+
self._update_constraints_if_necessary(w)
|
|
238
|
+
self._last_gradient_w = w.copy()
|
|
239
|
+
self._last_gradient = self._gradient_func(w)
|
|
240
|
+
return self._last_gradient
|
|
241
|
+
|
|
242
|
+
def _init_coefficients(self):
|
|
243
|
+
w = np.zeros(self.n_coefficients)
|
|
244
|
+
self._update_constraints(w)
|
|
245
|
+
self._last_w = w.copy()
|
|
246
|
+
return w
|
|
247
|
+
|
|
248
|
+
def run(self, **kwargs):
|
|
249
|
+
w = self._init_coefficients()
|
|
250
|
+
|
|
251
|
+
timings = None
|
|
252
|
+
if self.timeit:
|
|
253
|
+
import timeit
|
|
254
|
+
|
|
255
|
+
def _inner():
|
|
256
|
+
return minimize(
|
|
257
|
+
self._do_objective_func,
|
|
258
|
+
w,
|
|
259
|
+
method="newton-cg",
|
|
260
|
+
jac=self._do_gradient_func,
|
|
261
|
+
hessp=self._hessian_func,
|
|
262
|
+
**kwargs,
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
timer = timeit.Timer(_inner)
|
|
266
|
+
timings = timer.repeat(self.timeit, number=1)
|
|
267
|
+
|
|
268
|
+
opt_result = minimize(
|
|
269
|
+
self._do_objective_func,
|
|
270
|
+
w,
|
|
271
|
+
method="newton-cg",
|
|
272
|
+
jac=self._do_gradient_func,
|
|
273
|
+
hessp=self._hessian_func,
|
|
274
|
+
**kwargs,
|
|
275
|
+
)
|
|
276
|
+
opt_result["timings"] = timings
|
|
277
|
+
|
|
278
|
+
return opt_result
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
class SimpleOptimizer(RankSVMOptimizer):
|
|
282
|
+
"""Simple optimizer, which explicitly constructs matrix of all pairs of samples"""
|
|
283
|
+
|
|
284
|
+
def __init__(self, x, y, alpha, rank_ratio, timeit=False):
|
|
285
|
+
super().__init__(alpha, rank_ratio, timeit)
|
|
286
|
+
self.data_x = x
|
|
287
|
+
self.constraints = survival_constraints_simple(np.asarray(y, dtype=np.uint8))
|
|
288
|
+
|
|
289
|
+
if self.constraints.shape[0] == 0:
|
|
290
|
+
raise NoComparablePairException("Data has no comparable pairs, cannot fit model.")
|
|
291
|
+
|
|
292
|
+
self.L = np.ones(self.constraints.shape[0])
|
|
293
|
+
|
|
294
|
+
@property
|
|
295
|
+
def n_coefficients(self):
|
|
296
|
+
return self.data_x.shape[1]
|
|
297
|
+
|
|
298
|
+
def _objective_func(self, w):
|
|
299
|
+
val = 0.5 * squared_norm(w) + 0.5 * self.alpha * squared_norm(self.L)
|
|
300
|
+
return val
|
|
301
|
+
|
|
302
|
+
def _update_constraints(self, w):
|
|
303
|
+
self.xw = np.dot(self.data_x, w)
|
|
304
|
+
self.L = 1 - self.constraints.dot(self.xw)
|
|
305
|
+
np.maximum(0, self.L, out=self.L)
|
|
306
|
+
support_vectors = np.nonzero(self.L > 0)[0]
|
|
307
|
+
self.Asv = self.constraints[support_vectors, :]
|
|
308
|
+
|
|
309
|
+
def _gradient_func(self, w):
|
|
310
|
+
# sum over columns without running into overflow problems
|
|
311
|
+
col_sum = self.Asv.sum(axis=0, dtype=int)
|
|
312
|
+
v = col_sum.A.squeeze()
|
|
313
|
+
|
|
314
|
+
z = np.dot(self.data_x.T, (self.Asv.T.dot(self.Asv.dot(self.xw)) - v))
|
|
315
|
+
return w + self.alpha * z
|
|
316
|
+
|
|
317
|
+
def _hessian_func(self, w, s):
|
|
318
|
+
z = self.alpha * self.Asv.dot(np.dot(self.data_x, s))
|
|
319
|
+
return s + np.dot(safe_sparse_dot(z.T, self.Asv), self.data_x).T
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
class PRSVMOptimizer(RankSVMOptimizer):
|
|
323
|
+
"""PRSVM optimizer that after each iteration of Newton's method
|
|
324
|
+
constructs matrix of support vector pairs"""
|
|
325
|
+
|
|
326
|
+
def __init__(self, x, y, alpha, rank_ratio, timeit=False):
|
|
327
|
+
super().__init__(alpha, rank_ratio, timeit)
|
|
328
|
+
self.data_x = x
|
|
329
|
+
self.data_y = np.asarray(y, dtype=np.uint8)
|
|
330
|
+
self._constraints = lambda w: survival_constraints_with_support_vectors(self.data_y, w)
|
|
331
|
+
|
|
332
|
+
Aw = self._constraints(np.zeros(x.shape[1]))
|
|
333
|
+
if Aw.shape[0] == 0:
|
|
334
|
+
raise NoComparablePairException("Data has no comparable pairs, cannot fit model.")
|
|
335
|
+
|
|
336
|
+
@property
|
|
337
|
+
def n_coefficients(self):
|
|
338
|
+
return self.data_x.shape[1]
|
|
339
|
+
|
|
340
|
+
def _objective_func(self, w):
|
|
341
|
+
z = self.Aw.shape[0] + squared_norm(self.AXw) - 2.0 * self.AXw.sum()
|
|
342
|
+
val = 0.5 * squared_norm(w) + 0.5 * self.alpha * z
|
|
343
|
+
return val
|
|
344
|
+
|
|
345
|
+
def _update_constraints(self, w):
|
|
346
|
+
xw = np.dot(self.data_x, w)
|
|
347
|
+
self.Aw = self._constraints(xw)
|
|
348
|
+
self.AXw = self.Aw.dot(xw)
|
|
349
|
+
|
|
350
|
+
def _gradient_func(self, w):
|
|
351
|
+
# sum over columns without running into overflow problems
|
|
352
|
+
col_sum = self.Aw.sum(axis=0, dtype=int)
|
|
353
|
+
v = col_sum.A.squeeze()
|
|
354
|
+
z = np.dot(self.data_x.T, self.Aw.T.dot(self.AXw) - v)
|
|
355
|
+
return w + self.alpha * z
|
|
356
|
+
|
|
357
|
+
def _hessian_func(self, w, s):
|
|
358
|
+
v = self.Aw.dot(np.dot(self.data_x, s))
|
|
359
|
+
z = self.alpha * np.dot(self.data_x.T, self.Aw.T.dot(v))
|
|
360
|
+
return s + z
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
class LargeScaleOptimizer(RankSVMOptimizer):
|
|
364
|
+
"""Optimizer that does not explicitly create matrix of constraints
|
|
365
|
+
|
|
366
|
+
Parameters
|
|
367
|
+
----------
|
|
368
|
+
alpha : float
|
|
369
|
+
Regularization parameter.
|
|
370
|
+
|
|
371
|
+
rank_ratio : float
|
|
372
|
+
Trade-off between regression and ranking objectives.
|
|
373
|
+
|
|
374
|
+
fit_intercept : bool
|
|
375
|
+
Whether to fit an intercept. Only used if regression objective
|
|
376
|
+
is optimized (rank_ratio < 1.0).
|
|
377
|
+
|
|
378
|
+
counter : :class:`OrderStatisticTreeSurvivalCounter` or :class:`SurvivalCounter`
|
|
379
|
+
An instance of a :class:`Counter` subclass used for counting comparable pairs.
|
|
380
|
+
|
|
381
|
+
References
|
|
382
|
+
----------
|
|
383
|
+
Lee, C.-P., & Lin, C.-J. (2014). Supplement Materials for "Large-scale linear RankSVM". Neural Computation, 26(4),
|
|
384
|
+
781–817. doi:10.1162/NECO_a_00571
|
|
385
|
+
"""
|
|
386
|
+
|
|
387
|
+
def __init__(self, alpha, rank_ratio, fit_intercept, counter, timeit=False):
|
|
388
|
+
super().__init__(alpha, rank_ratio, timeit)
|
|
389
|
+
|
|
390
|
+
self._counter = counter
|
|
391
|
+
self._regr_penalty = (1.0 - rank_ratio) * alpha
|
|
392
|
+
self._rank_penalty = rank_ratio * alpha
|
|
393
|
+
self._has_time = hasattr(self._counter, "time") and self._regr_penalty > 0
|
|
394
|
+
self._fit_intercept = fit_intercept if self._has_time else False
|
|
395
|
+
|
|
396
|
+
@property
|
|
397
|
+
def n_coefficients(self):
|
|
398
|
+
n = self._counter.x.shape[1]
|
|
399
|
+
if self._fit_intercept:
|
|
400
|
+
n += 1
|
|
401
|
+
return n
|
|
402
|
+
|
|
403
|
+
def _init_coefficients(self):
|
|
404
|
+
w = super()._init_coefficients()
|
|
405
|
+
n = w.shape[0]
|
|
406
|
+
if self._fit_intercept:
|
|
407
|
+
w[0] = self._counter.time.mean()
|
|
408
|
+
n -= 1
|
|
409
|
+
|
|
410
|
+
l_plus, _, l_minus, _ = self._counter.calculate(np.zeros(n))
|
|
411
|
+
if np.all(l_plus == 0) and np.all(l_minus == 0):
|
|
412
|
+
raise NoComparablePairException("Data has no comparable pairs, cannot fit model.")
|
|
413
|
+
|
|
414
|
+
return w
|
|
415
|
+
|
|
416
|
+
def _split_coefficents(self, w):
|
|
417
|
+
"""Split into intercept/bias and feature-specific coefficients"""
|
|
418
|
+
if self._fit_intercept:
|
|
419
|
+
bias = w[0]
|
|
420
|
+
wf = w[1:]
|
|
421
|
+
else:
|
|
422
|
+
bias = 0.0
|
|
423
|
+
wf = w
|
|
424
|
+
return bias, wf
|
|
425
|
+
|
|
426
|
+
def _objective_func(self, w):
|
|
427
|
+
bias, wf = self._split_coefficents(w)
|
|
428
|
+
|
|
429
|
+
l_plus, xv_plus, l_minus, xv_minus = self._counter.calculate(wf) # pylint: disable=unused-variable
|
|
430
|
+
|
|
431
|
+
xw = self._xw
|
|
432
|
+
val = 0.5 * squared_norm(wf)
|
|
433
|
+
if self._has_time:
|
|
434
|
+
val += (
|
|
435
|
+
0.5 * self._regr_penalty * squared_norm(self.y_compressed - bias - xw.compress(self.regr_mask, axis=0))
|
|
436
|
+
)
|
|
437
|
+
|
|
438
|
+
val += (
|
|
439
|
+
0.5
|
|
440
|
+
* self._rank_penalty
|
|
441
|
+
* numexpr.evaluate(
|
|
442
|
+
"sum(xw * ((l_plus + l_minus) * xw - xv_plus - xv_minus - 2 * (l_minus - l_plus)) + l_minus)"
|
|
443
|
+
)
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
return val
|
|
447
|
+
|
|
448
|
+
def _update_constraints(self, w):
|
|
449
|
+
bias, wf = self._split_coefficents(w)
|
|
450
|
+
|
|
451
|
+
self._xw = self._counter.update_sort_order(wf)
|
|
452
|
+
|
|
453
|
+
if self._has_time:
|
|
454
|
+
pred_time = self._counter.time - self._xw - bias
|
|
455
|
+
self.regr_mask = (pred_time > 0) | self._counter.status
|
|
456
|
+
self.y_compressed = self._counter.time.compress(self.regr_mask, axis=0)
|
|
457
|
+
|
|
458
|
+
def _gradient_func(self, w):
|
|
459
|
+
bias, wf = self._split_coefficents(w)
|
|
460
|
+
|
|
461
|
+
l_plus, xv_plus, l_minus, xv_minus = self._counter.calculate(wf) # pylint: disable=unused-variable
|
|
462
|
+
x = self._counter.x
|
|
463
|
+
|
|
464
|
+
xw = self._xw # pylint: disable=unused-variable; # noqa: F841
|
|
465
|
+
z = numexpr.evaluate("(l_plus + l_minus) * xw - xv_plus - xv_minus - l_minus + l_plus")
|
|
466
|
+
|
|
467
|
+
grad = wf + self._rank_penalty * np.dot(x.T, z)
|
|
468
|
+
if self._has_time:
|
|
469
|
+
xc = x.compress(self.regr_mask, axis=0)
|
|
470
|
+
xcs = np.dot(xc, wf)
|
|
471
|
+
grad += self._regr_penalty * (np.dot(xc.T, xcs) + xc.sum(axis=0) * bias - np.dot(xc.T, self.y_compressed))
|
|
472
|
+
|
|
473
|
+
# intercept
|
|
474
|
+
if self._fit_intercept:
|
|
475
|
+
grad_intercept = self._regr_penalty * (xcs.sum() + xc.shape[0] * bias - self.y_compressed.sum())
|
|
476
|
+
grad = np.r_[grad_intercept, grad]
|
|
477
|
+
|
|
478
|
+
return grad
|
|
479
|
+
|
|
480
|
+
def _hessian_func(self, w, s):
|
|
481
|
+
s_bias, s_feat = self._split_coefficents(s)
|
|
482
|
+
|
|
483
|
+
l_plus, xv_plus, l_minus, xv_minus = self._counter.calculate(s_feat) # pylint: disable=unused-variable
|
|
484
|
+
x = self._counter.x
|
|
485
|
+
|
|
486
|
+
xs = np.dot(x, s_feat) # pylint: disable=unused-variable
|
|
487
|
+
xs = numexpr.evaluate("(l_plus + l_minus) * xs - xv_plus - xv_minus")
|
|
488
|
+
|
|
489
|
+
hessp = s_feat + self._rank_penalty * np.dot(x.T, xs)
|
|
490
|
+
if self._has_time:
|
|
491
|
+
xc = x.compress(self.regr_mask, axis=0)
|
|
492
|
+
hessp += self._regr_penalty * np.dot(xc.T, np.dot(xc, s_feat))
|
|
493
|
+
|
|
494
|
+
# intercept
|
|
495
|
+
if self._fit_intercept:
|
|
496
|
+
xsum = xc.sum(axis=0)
|
|
497
|
+
hessp += self._regr_penalty * xsum * s_bias
|
|
498
|
+
hessp_intercept = self._regr_penalty * xc.shape[0] * s_bias + self._regr_penalty * np.dot(xsum, s_feat)
|
|
499
|
+
hessp = np.r_[hessp_intercept, hessp]
|
|
500
|
+
|
|
501
|
+
return hessp
|
|
502
|
+
|
|
503
|
+
|
|
504
|
+
class NonlinearLargeScaleOptimizer(RankSVMOptimizer):
|
|
505
|
+
"""Optimizer that does not explicitly create matrix of constraints
|
|
506
|
+
|
|
507
|
+
Parameters
|
|
508
|
+
----------
|
|
509
|
+
alpha : float
|
|
510
|
+
Regularization parameter.
|
|
511
|
+
|
|
512
|
+
rank_ratio : float
|
|
513
|
+
Trade-off between regression and ranking objectives.
|
|
514
|
+
|
|
515
|
+
counter : :class:`OrderStatisticTreeSurvivalCounter` or :class:`SurvivalCounter`
|
|
516
|
+
An instance of a :class:`Counter` subclass used for counting comparable pairs.
|
|
517
|
+
|
|
518
|
+
References
|
|
519
|
+
----------
|
|
520
|
+
Lee, C.-P., & Lin, C.-J. (2014). Supplement Materials for "Large-scale linear RankSVM". Neural Computation, 26(4),
|
|
521
|
+
781–817. doi:10.1162/NECO_a_00571
|
|
522
|
+
"""
|
|
523
|
+
|
|
524
|
+
def __init__(self, alpha, rank_ratio, fit_intercept, counter, timeit=False):
|
|
525
|
+
super().__init__(alpha, rank_ratio, timeit)
|
|
526
|
+
|
|
527
|
+
self._counter = counter
|
|
528
|
+
self._fit_intercept = fit_intercept
|
|
529
|
+
self._rank_penalty = rank_ratio * alpha
|
|
530
|
+
self._regr_penalty = (1.0 - rank_ratio) * alpha
|
|
531
|
+
self._has_time = hasattr(self._counter, "time") and self._regr_penalty > 0
|
|
532
|
+
self._fit_intercept = fit_intercept if self._has_time else False
|
|
533
|
+
|
|
534
|
+
@property
|
|
535
|
+
def n_coefficients(self):
|
|
536
|
+
n = self._counter.x.shape[0]
|
|
537
|
+
if self._fit_intercept:
|
|
538
|
+
n += 1
|
|
539
|
+
return n
|
|
540
|
+
|
|
541
|
+
def _init_coefficients(self):
|
|
542
|
+
w = super()._init_coefficients()
|
|
543
|
+
n = w.shape[0]
|
|
544
|
+
if self._fit_intercept:
|
|
545
|
+
w[0] = self._counter.time.mean()
|
|
546
|
+
n -= 1
|
|
547
|
+
|
|
548
|
+
l_plus, _, l_minus, _ = self._counter.calculate(np.zeros(n))
|
|
549
|
+
if np.all(l_plus == 0) and np.all(l_minus == 0):
|
|
550
|
+
raise NoComparablePairException("Data has no comparable pairs, cannot fit model.")
|
|
551
|
+
|
|
552
|
+
return w
|
|
553
|
+
|
|
554
|
+
def _split_coefficents(self, w):
|
|
555
|
+
"""Split into intercept/bias and feature-specific coefficients"""
|
|
556
|
+
if self._fit_intercept:
|
|
557
|
+
bias = w[0]
|
|
558
|
+
wf = w[1:]
|
|
559
|
+
else:
|
|
560
|
+
bias = 0.0
|
|
561
|
+
wf = w
|
|
562
|
+
return bias, wf
|
|
563
|
+
|
|
564
|
+
def _update_constraints(self, beta_bias):
|
|
565
|
+
bias, beta = self._split_coefficents(beta_bias)
|
|
566
|
+
|
|
567
|
+
self._Kw = self._counter.update_sort_order(beta)
|
|
568
|
+
|
|
569
|
+
if self._has_time:
|
|
570
|
+
pred_time = self._counter.time - self._Kw - bias
|
|
571
|
+
self.regr_mask = (pred_time > 0) | self._counter.status
|
|
572
|
+
self.y_compressed = self._counter.time.compress(self.regr_mask, axis=0)
|
|
573
|
+
|
|
574
|
+
def _objective_func(self, beta_bias):
|
|
575
|
+
bias, beta = self._split_coefficents(beta_bias)
|
|
576
|
+
|
|
577
|
+
Kw = self._Kw
|
|
578
|
+
|
|
579
|
+
val = 0.5 * np.dot(beta, Kw)
|
|
580
|
+
if self._has_time:
|
|
581
|
+
val += (
|
|
582
|
+
0.5 * self._regr_penalty * squared_norm(self.y_compressed - bias - Kw.compress(self.regr_mask, axis=0))
|
|
583
|
+
)
|
|
584
|
+
|
|
585
|
+
l_plus, xv_plus, l_minus, xv_minus = self._counter.calculate(beta) # pylint: disable=unused-variable
|
|
586
|
+
val += (
|
|
587
|
+
0.5
|
|
588
|
+
* self._rank_penalty
|
|
589
|
+
* numexpr.evaluate(
|
|
590
|
+
"sum(Kw * ((l_plus + l_minus) * Kw - xv_plus - xv_minus - 2 * (l_minus - l_plus)) + l_minus)"
|
|
591
|
+
)
|
|
592
|
+
)
|
|
593
|
+
|
|
594
|
+
return val
|
|
595
|
+
|
|
596
|
+
def _gradient_func(self, beta_bias):
|
|
597
|
+
bias, beta = self._split_coefficents(beta_bias)
|
|
598
|
+
|
|
599
|
+
K = self._counter.x
|
|
600
|
+
Kw = self._Kw
|
|
601
|
+
|
|
602
|
+
l_plus, xv_plus, l_minus, xv_minus = self._counter.calculate(beta) # pylint: disable=unused-variable
|
|
603
|
+
z = numexpr.evaluate("(l_plus + l_minus) * Kw - xv_plus - xv_minus - l_minus + l_plus")
|
|
604
|
+
|
|
605
|
+
gradient = Kw + self._rank_penalty * np.dot(K, z)
|
|
606
|
+
if self._has_time:
|
|
607
|
+
K_comp = K.compress(self.regr_mask, axis=0)
|
|
608
|
+
K_comp_beta = np.dot(K_comp, beta)
|
|
609
|
+
gradient += self._regr_penalty * (
|
|
610
|
+
np.dot(K_comp.T, K_comp_beta) + K_comp.sum(axis=0) * bias - np.dot(K_comp.T, self.y_compressed)
|
|
611
|
+
)
|
|
612
|
+
|
|
613
|
+
# intercept
|
|
614
|
+
if self._fit_intercept:
|
|
615
|
+
grad_intercept = self._regr_penalty * (
|
|
616
|
+
K_comp_beta.sum() + K_comp.shape[0] * bias - self.y_compressed.sum()
|
|
617
|
+
)
|
|
618
|
+
gradient = np.r_[grad_intercept, gradient]
|
|
619
|
+
|
|
620
|
+
return gradient
|
|
621
|
+
|
|
622
|
+
def _hessian_func(self, _beta, s):
|
|
623
|
+
s_bias, s_feat = self._split_coefficents(s)
|
|
624
|
+
|
|
625
|
+
K = self._counter.x
|
|
626
|
+
Ks = np.dot(K, s_feat)
|
|
627
|
+
|
|
628
|
+
l_plus, xv_plus, l_minus, xv_minus = self._counter.calculate(s_feat) # pylint: disable=unused-variable
|
|
629
|
+
xs = numexpr.evaluate("(l_plus + l_minus) * Ks - xv_plus - xv_minus")
|
|
630
|
+
|
|
631
|
+
hessian = Ks + self._rank_penalty * np.dot(K, xs)
|
|
632
|
+
if self._has_time:
|
|
633
|
+
K_comp = K.compress(self.regr_mask, axis=0)
|
|
634
|
+
hessian += self._regr_penalty * np.dot(K_comp.T, np.dot(K_comp, s_feat))
|
|
635
|
+
|
|
636
|
+
# intercept
|
|
637
|
+
if self._fit_intercept:
|
|
638
|
+
xsum = K_comp.sum(axis=0)
|
|
639
|
+
hessian += self._regr_penalty * xsum * s_bias
|
|
640
|
+
hessian_intercept = self._regr_penalty * K_comp.shape[0] * s_bias + self._regr_penalty * np.dot(
|
|
641
|
+
xsum, s_feat
|
|
642
|
+
)
|
|
643
|
+
hessian = np.r_[hessian_intercept, hessian]
|
|
644
|
+
|
|
645
|
+
return hessian
|
|
646
|
+
|
|
647
|
+
|
|
648
|
+
class BaseSurvivalSVM(BaseEstimator, metaclass=ABCMeta):
|
|
649
|
+
_parameter_constraints = {
|
|
650
|
+
"alpha": [Interval(Real, 0.0, None, closed="neither")],
|
|
651
|
+
"rank_ratio": [Interval(Real, 0.0, 1.0, closed="both")],
|
|
652
|
+
"fit_intercept": ["boolean"],
|
|
653
|
+
"max_iter": [Interval(Integral, 1, None, closed="left")],
|
|
654
|
+
"verbose": ["verbose"],
|
|
655
|
+
"tol": [Interval(Real, 0.0, None, closed="neither"), None],
|
|
656
|
+
"random_state": ["random_state"],
|
|
657
|
+
"timeit": [Interval(Integral, 1, None, closed="left"), "boolean"],
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
@abstractmethod
|
|
661
|
+
def __init__(
|
|
662
|
+
self,
|
|
663
|
+
alpha=1,
|
|
664
|
+
rank_ratio=1.0,
|
|
665
|
+
fit_intercept=False,
|
|
666
|
+
max_iter=20,
|
|
667
|
+
verbose=False,
|
|
668
|
+
tol=None,
|
|
669
|
+
optimizer=None,
|
|
670
|
+
random_state=None,
|
|
671
|
+
timeit=False,
|
|
672
|
+
):
|
|
673
|
+
self.alpha = alpha
|
|
674
|
+
self.rank_ratio = rank_ratio
|
|
675
|
+
self.fit_intercept = fit_intercept
|
|
676
|
+
self.max_iter = max_iter
|
|
677
|
+
self.verbose = verbose
|
|
678
|
+
self.tol = tol
|
|
679
|
+
self.optimizer = optimizer
|
|
680
|
+
self.random_state = random_state
|
|
681
|
+
self.timeit = timeit
|
|
682
|
+
|
|
683
|
+
self.coef_ = None
|
|
684
|
+
self.optimizer_result_ = None
|
|
685
|
+
|
|
686
|
+
def _create_optimizer(self, X, y, status):
|
|
687
|
+
"""Samples are ordered by relevance"""
|
|
688
|
+
if self.optimizer is None:
|
|
689
|
+
self.optimizer = "avltree"
|
|
690
|
+
|
|
691
|
+
times, ranks = y
|
|
692
|
+
|
|
693
|
+
if self.optimizer == "simple":
|
|
694
|
+
optimizer = SimpleOptimizer(X, status, self.alpha, self.rank_ratio, timeit=self.timeit)
|
|
695
|
+
elif self.optimizer == "PRSVM":
|
|
696
|
+
optimizer = PRSVMOptimizer(X, status, self.alpha, self.rank_ratio, timeit=self.timeit)
|
|
697
|
+
elif self.optimizer == "direct-count":
|
|
698
|
+
optimizer = LargeScaleOptimizer(
|
|
699
|
+
self.alpha,
|
|
700
|
+
self.rank_ratio,
|
|
701
|
+
self.fit_intercept,
|
|
702
|
+
SurvivalCounter(X, ranks, status, len(ranks), times),
|
|
703
|
+
timeit=self.timeit,
|
|
704
|
+
)
|
|
705
|
+
elif self.optimizer == "rbtree":
|
|
706
|
+
optimizer = LargeScaleOptimizer(
|
|
707
|
+
self.alpha,
|
|
708
|
+
self.rank_ratio,
|
|
709
|
+
self.fit_intercept,
|
|
710
|
+
OrderStatisticTreeSurvivalCounter(X, ranks, status, RBTree, times),
|
|
711
|
+
timeit=self.timeit,
|
|
712
|
+
)
|
|
713
|
+
elif self.optimizer == "avltree":
|
|
714
|
+
optimizer = LargeScaleOptimizer(
|
|
715
|
+
self.alpha,
|
|
716
|
+
self.rank_ratio,
|
|
717
|
+
self.fit_intercept,
|
|
718
|
+
OrderStatisticTreeSurvivalCounter(X, ranks, status, AVLTree, times),
|
|
719
|
+
timeit=self.timeit,
|
|
720
|
+
)
|
|
721
|
+
|
|
722
|
+
return optimizer
|
|
723
|
+
|
|
724
|
+
@property
|
|
725
|
+
def _predict_risk_score(self):
|
|
726
|
+
return self.rank_ratio == 1
|
|
727
|
+
|
|
728
|
+
@abstractmethod
|
|
729
|
+
def _fit(self, X, time, event, samples_order):
|
|
730
|
+
"""Create and run optimizer"""
|
|
731
|
+
|
|
732
|
+
@abstractmethod
|
|
733
|
+
def predict(self, X):
|
|
734
|
+
"""Predict risk scores or transformed survival times.
|
|
735
|
+
|
|
736
|
+
If the model has been fit only considering the ranking objective
|
|
737
|
+
(``rank_ratio = 1``), predictions are risk scores (i.e. higher values
|
|
738
|
+
indicate an increased risk of experiencing an event). The scores
|
|
739
|
+
have no unit and are only meaningful to rank samples by their risk
|
|
740
|
+
of experiencing an event.
|
|
741
|
+
|
|
742
|
+
If the regression objective has been used (``rank_ratio < 1``),
|
|
743
|
+
predictions are transformed survival times.
|
|
744
|
+
Lower scores indicate shorter survival, higher scores longer survival.
|
|
745
|
+
|
|
746
|
+
Parameters
|
|
747
|
+
----------
|
|
748
|
+
X : array-like, shape = (n_samples, n_features)
|
|
749
|
+
The input samples.
|
|
750
|
+
|
|
751
|
+
Returns
|
|
752
|
+
-------
|
|
753
|
+
y : ndarray, shape = (n_samples,), dtype=float
|
|
754
|
+
Risk scores (if ``rank_ratio = 1``), or transformed survival times
|
|
755
|
+
(if ``rank_ratio < 1``).
|
|
756
|
+
"""
|
|
757
|
+
|
|
758
|
+
def _validate_for_fit(self, X):
|
|
759
|
+
return validate_data(self, X, ensure_min_samples=2)
|
|
760
|
+
|
|
761
|
+
def fit(self, X, y):
|
|
762
|
+
"""Build a survival support vector machine model from training data.
|
|
763
|
+
|
|
764
|
+
Parameters
|
|
765
|
+
----------
|
|
766
|
+
X : array-like, shape = (n_samples, n_features)
|
|
767
|
+
Data matrix.
|
|
768
|
+
|
|
769
|
+
y : structured array, shape = (n_samples,)
|
|
770
|
+
A structured array with two fields. The first field is a boolean
|
|
771
|
+
where ``True`` indicates an event and ``False`` indicates right-censoring.
|
|
772
|
+
The second field is a float with the time of event or time of censoring.
|
|
773
|
+
|
|
774
|
+
Returns
|
|
775
|
+
-------
|
|
776
|
+
self
|
|
777
|
+
"""
|
|
778
|
+
X = self._validate_for_fit(X)
|
|
779
|
+
event, time = check_array_survival(X, y, allow_time_zero=False)
|
|
780
|
+
|
|
781
|
+
self._validate_params()
|
|
782
|
+
|
|
783
|
+
if self.fit_intercept and self.rank_ratio == 1.0:
|
|
784
|
+
raise ValueError("fit_intercept=True is only meaningful if rank_ratio < 1.0")
|
|
785
|
+
|
|
786
|
+
if self.rank_ratio < 1.0:
|
|
787
|
+
if self.optimizer in {"simple", "PRSVM"}:
|
|
788
|
+
raise ValueError(f"optimizer {self.optimizer!r} does not implement regression objective")
|
|
789
|
+
|
|
790
|
+
# log-transform time
|
|
791
|
+
time = np.log(time)
|
|
792
|
+
assert np.isfinite(time).all()
|
|
793
|
+
|
|
794
|
+
random_state = check_random_state(self.random_state)
|
|
795
|
+
samples_order = BaseSurvivalSVM._argsort_and_resolve_ties(time, random_state)
|
|
796
|
+
|
|
797
|
+
opt_result = self._fit(X, time, event, samples_order)
|
|
798
|
+
coef = opt_result.x
|
|
799
|
+
if self.fit_intercept:
|
|
800
|
+
self.coef_ = coef[1:]
|
|
801
|
+
self.intercept_ = coef[0]
|
|
802
|
+
else:
|
|
803
|
+
self.coef_ = coef
|
|
804
|
+
|
|
805
|
+
if not opt_result.success:
|
|
806
|
+
warnings.warn(
|
|
807
|
+
("Optimization did not converge: " + opt_result.message), category=ConvergenceWarning, stacklevel=2
|
|
808
|
+
)
|
|
809
|
+
self.optimizer_result_ = opt_result
|
|
810
|
+
|
|
811
|
+
return self
|
|
812
|
+
|
|
813
|
+
@property
|
|
814
|
+
def n_iter_(self):
|
|
815
|
+
return self.optimizer_result_.nit
|
|
816
|
+
|
|
817
|
+
@staticmethod
|
|
818
|
+
def _argsort_and_resolve_ties(time, random_state):
|
|
819
|
+
"""Like np.argsort, but resolves ties uniformly at random"""
|
|
820
|
+
n_samples = len(time)
|
|
821
|
+
order = np.argsort(time, kind="mergesort")
|
|
822
|
+
|
|
823
|
+
i = 0
|
|
824
|
+
while i < n_samples - 1:
|
|
825
|
+
inext = i + 1
|
|
826
|
+
while inext < n_samples and time[order[i]] == time[order[inext]]:
|
|
827
|
+
inext += 1
|
|
828
|
+
|
|
829
|
+
if i + 1 != inext:
|
|
830
|
+
# resolve ties randomly
|
|
831
|
+
random_state.shuffle(order[i:inext])
|
|
832
|
+
i = inext
|
|
833
|
+
return order
|
|
834
|
+
|
|
835
|
+
|
|
836
|
+
class FastSurvivalSVM(BaseSurvivalSVM, SurvivalAnalysisMixin):
|
|
837
|
+
r"""Implements an efficient linear Support Vector Machine for survival analysis,
|
|
838
|
+
capable of optimizing both ranking and regression objectives.
|
|
839
|
+
|
|
840
|
+
Training data consists of *n* triplets :math:`(\mathbf{x}_i, y_i, \delta_i)`,
|
|
841
|
+
where :math:`\mathbf{x}_i` is a *d*-dimensional feature vector, :math:`y_i > 0`
|
|
842
|
+
the survival time or time of censoring, and :math:`\delta_i \in \{0,1\}`
|
|
843
|
+
the binary event indicator. Using the training data, the objective is to
|
|
844
|
+
minimize the following function:
|
|
845
|
+
|
|
846
|
+
.. math::
|
|
847
|
+
|
|
848
|
+
\arg \min_{\mathbf{w}, b} \frac{1}{2} \mathbf{w}^\top \mathbf{w}
|
|
849
|
+
+ \frac{\alpha}{2} \left[ r \sum_{i,j \in \mathcal{P}}
|
|
850
|
+
\max(0, 1 - (\mathbf{w}^\top \mathbf{x}_i - \mathbf{w}^\top \mathbf{x}_j))^2
|
|
851
|
+
+ (1 - r) \sum_{i=0}^n \left( \zeta_{\mathbf{w}, b} (y_i, x_i, \delta_i)
|
|
852
|
+
\right)^2 \right]
|
|
853
|
+
|
|
854
|
+
\zeta_{\mathbf{w},b} (y_i, \mathbf{x}_i, \delta_i) =
|
|
855
|
+
\begin{cases}
|
|
856
|
+
\max(0, y_i - \mathbf{w}^\top \mathbf{x}_i - b) \quad \text{if $\delta_i = 0$,} \\
|
|
857
|
+
y_i - \mathbf{w}^\top \mathbf{x}_i - b \quad \text{if $\delta_i = 1$,} \\
|
|
858
|
+
\end{cases}
|
|
859
|
+
|
|
860
|
+
\mathcal{P} = \{ (i, j) \mid y_i > y_j \land \delta_j = 1 \}_{i,j=1,\dots,n}
|
|
861
|
+
|
|
862
|
+
The hyper-parameter :math:`\alpha > 0` determines the amount of regularization
|
|
863
|
+
to apply: a smaller value increases the amount of regularization and a
|
|
864
|
+
higher value reduces the amount of regularization. The hyper-parameter
|
|
865
|
+
:math:`r \in [0; 1]` determines the trade-off between the ranking objective
|
|
866
|
+
and the regression objective. If :math:`r = 1` it reduces to the ranking
|
|
867
|
+
objective, and if :math:`r = 0` to the regression objective. If the regression
|
|
868
|
+
objective is used, survival/censoring times are log-transformed and thus cannot be
|
|
869
|
+
zero or negative.
|
|
870
|
+
|
|
871
|
+
See the :ref:`User Guide </user_guide/survival-svm.ipynb>` and [1]_ for further description.
|
|
872
|
+
|
|
873
|
+
Parameters
|
|
874
|
+
----------
|
|
875
|
+
alpha : float, default: 1
|
|
876
|
+
Weight of penalizing the squared hinge loss in the objective function. Must be greater than 0.
|
|
877
|
+
|
|
878
|
+
rank_ratio : float, optional, default: 1.0
|
|
879
|
+
Mixing parameter between regression and ranking objectives, with ``0 <= rank_ratio <= 1``.
|
|
880
|
+
If ``rank_ratio = 1``, only ranking is performed. If ``rank_ratio = 0``, only regression
|
|
881
|
+
is performed. A ``rank_ratio`` less than 1.0 (i.e., including a regression objective) is
|
|
882
|
+
only supported if the ``optimizer`` is 'avltree', 'rbtree', or 'direct-count'.
|
|
883
|
+
|
|
884
|
+
fit_intercept : bool, optional, default: False
|
|
885
|
+
Whether to calculate an intercept for the regression model. If set to ``False``, no intercept
|
|
886
|
+
will be calculated. This parameter has no effect if ``rank_ratio = 1``, i.e., only ranking is performed.
|
|
887
|
+
|
|
888
|
+
max_iter : int, optional, default: 20
|
|
889
|
+
Maximum number of iterations to perform in Newton optimization.
|
|
890
|
+
|
|
891
|
+
verbose : bool, optional, default: False
|
|
892
|
+
If ``True``, print messages during optimization.
|
|
893
|
+
|
|
894
|
+
tol : float or None, optional, default: None
|
|
895
|
+
Tolerance for termination. If ``None``, the solver's default tolerance is used.
|
|
896
|
+
See :func:`scipy.optimize.minimize`.
|
|
897
|
+
|
|
898
|
+
optimizer : {'avltree', 'direct-count', 'PRSVM', 'rbtree', 'simple'}, optional, default: 'avltree'
|
|
899
|
+
Specifies which optimizer to use.
|
|
900
|
+
|
|
901
|
+
random_state : int, :class:`numpy.random.RandomState` instance, or None, optional, default: None
|
|
902
|
+
Used to resolve ties in survival times. Pass an int for reproducible output across
|
|
903
|
+
multiple :meth:`fit` calls.
|
|
904
|
+
|
|
905
|
+
timeit : bool, int, or None, optional, default: False
|
|
906
|
+
If ``True`` or a non-zero integer, the time taken for optimization is measured.
|
|
907
|
+
If an integer is provided, the optimization is repeated that many times.
|
|
908
|
+
Results can be accessed from the ``optimizer_result_`` attribute.
|
|
909
|
+
|
|
910
|
+
Attributes
|
|
911
|
+
----------
|
|
912
|
+
coef_ : ndarray, shape = (n_features,), dtype = float
|
|
913
|
+
Coefficients of the features in the decision function.
|
|
914
|
+
|
|
915
|
+
optimizer_result_ : :class:`scipy.optimize.OptimizeResult`
|
|
916
|
+
Stats returned by the optimizer. See :class:`scipy.optimize.OptimizeResult`.
|
|
917
|
+
|
|
918
|
+
n_features_in_ : int
|
|
919
|
+
Number of features seen during ``fit``.
|
|
920
|
+
|
|
921
|
+
feature_names_in_ : ndarray, shape = (`n_features_in_`,), dtype = object
|
|
922
|
+
Names of features seen during ``fit``. Defined only when `X`
|
|
923
|
+
has feature names that are all strings.
|
|
924
|
+
|
|
925
|
+
n_iter_ : int
|
|
926
|
+
Number of iterations run by the optimization routine to fit the model.
|
|
927
|
+
|
|
928
|
+
See also
|
|
929
|
+
--------
|
|
930
|
+
FastKernelSurvivalSVM
|
|
931
|
+
Fast implementation for arbitrary kernel functions.
|
|
932
|
+
|
|
933
|
+
References
|
|
934
|
+
----------
|
|
935
|
+
.. [1] Pölsterl, S., Navab, N., and Katouzian, A.,
|
|
936
|
+
"Fast Training of Support Vector Machines for Survival Analysis",
|
|
937
|
+
Machine Learning and Knowledge Discovery in Databases: European Conference,
|
|
938
|
+
ECML PKDD 2015, Porto, Portugal,
|
|
939
|
+
Lecture Notes in Computer Science, vol. 9285, pp. 243-259 (2015)
|
|
940
|
+
"""
|
|
941
|
+
|
|
942
|
+
_parameter_constraints = {
|
|
943
|
+
**BaseSurvivalSVM._parameter_constraints,
|
|
944
|
+
"optimizer": [StrOptions({"simple", "PRSVM", "direct-count", "rbtree", "avltree"}), None],
|
|
945
|
+
}
|
|
946
|
+
|
|
947
|
+
def __init__(
|
|
948
|
+
self,
|
|
949
|
+
alpha=1,
|
|
950
|
+
*,
|
|
951
|
+
rank_ratio=1.0,
|
|
952
|
+
fit_intercept=False,
|
|
953
|
+
max_iter=20,
|
|
954
|
+
verbose=False,
|
|
955
|
+
tol=None,
|
|
956
|
+
optimizer=None,
|
|
957
|
+
random_state=None,
|
|
958
|
+
timeit=False,
|
|
959
|
+
):
|
|
960
|
+
super().__init__(
|
|
961
|
+
alpha=alpha,
|
|
962
|
+
rank_ratio=rank_ratio,
|
|
963
|
+
fit_intercept=fit_intercept,
|
|
964
|
+
max_iter=max_iter,
|
|
965
|
+
verbose=verbose,
|
|
966
|
+
tol=tol,
|
|
967
|
+
optimizer=optimizer,
|
|
968
|
+
random_state=random_state,
|
|
969
|
+
timeit=timeit,
|
|
970
|
+
)
|
|
971
|
+
|
|
972
|
+
def _fit(self, X, time, event, samples_order):
|
|
973
|
+
data_y = (time[samples_order], np.arange(len(samples_order)))
|
|
974
|
+
status = event[samples_order]
|
|
975
|
+
|
|
976
|
+
optimizer = self._create_optimizer(X[samples_order], data_y, status)
|
|
977
|
+
opt_result = optimizer.run(tol=self.tol, options={"maxiter": self.max_iter, "disp": self.verbose})
|
|
978
|
+
return opt_result
|
|
979
|
+
|
|
980
|
+
def predict(self, X):
|
|
981
|
+
check_is_fitted(self, "coef_")
|
|
982
|
+
X = validate_data(self, X, reset=False)
|
|
983
|
+
|
|
984
|
+
val = np.dot(X, self.coef_)
|
|
985
|
+
if hasattr(self, "intercept_"):
|
|
986
|
+
val += self.intercept_
|
|
987
|
+
|
|
988
|
+
# Order by increasing survival time if objective is pure ranking
|
|
989
|
+
if self.rank_ratio == 1:
|
|
990
|
+
val *= -1
|
|
991
|
+
else:
|
|
992
|
+
# model was fitted on log(time), transform to original scale
|
|
993
|
+
val = np.exp(val)
|
|
994
|
+
|
|
995
|
+
return val
|
|
996
|
+
|
|
997
|
+
|
|
998
|
+
class FastKernelSurvivalSVM(BaseSurvivalSVM, SurvivalAnalysisMixin):
|
|
999
|
+
"""Implements an efficient kernel Support Vector Machine for survival analysis.
|
|
1000
|
+
|
|
1001
|
+
The model extends :class:`FastSurvivalSVM` to non-linear relationships through kernel functions.
|
|
1002
|
+
|
|
1003
|
+
See the :ref:`User Guide </user_guide/survival-svm.ipynb>` and [1]_ for further description.
|
|
1004
|
+
|
|
1005
|
+
Parameters
|
|
1006
|
+
----------
|
|
1007
|
+
alpha : float, default: 1
|
|
1008
|
+
Weight of penalizing the squared hinge loss in the objective function. Must be greater than 0.
|
|
1009
|
+
|
|
1010
|
+
rank_ratio : float, optional, default: 1.0
|
|
1011
|
+
Mixing parameter between regression and ranking objectives, with ``0 <= rank_ratio <= 1``.
|
|
1012
|
+
If ``rank_ratio = 1``, only ranking is performed. If ``rank_ratio = 0``, only regression
|
|
1013
|
+
is performed. A ``rank_ratio`` less than 1.0 (i.e., including a regression objective) is
|
|
1014
|
+
only supported if the ``optimizer`` is 'avltree', 'PRSVM', or 'rbtree'.
|
|
1015
|
+
|
|
1016
|
+
fit_intercept : bool, optional, default: False
|
|
1017
|
+
Whether to calculate an intercept for the regression model. If set to ``False``, no intercept
|
|
1018
|
+
will be calculated. This parameter has no effect if ``rank_ratio = 1``, i.e., only ranking is performed.
|
|
1019
|
+
|
|
1020
|
+
kernel : str or callable, default: 'rbf'
|
|
1021
|
+
Kernel mapping used internally. This parameter is directly passed to
|
|
1022
|
+
:func:`sklearn.metrics.pairwise.pairwise_kernels`.
|
|
1023
|
+
If `kernel` is a string, it must be one of the metrics
|
|
1024
|
+
in `pairwise.PAIRWISE_KERNEL_FUNCTIONS` or "precomputed".
|
|
1025
|
+
If `kernel` is "precomputed", X is assumed to be a kernel matrix.
|
|
1026
|
+
Alternatively, if `kernel` is a callable function, it is called on
|
|
1027
|
+
each pair of instances (rows) and the resulting value recorded. The
|
|
1028
|
+
callable should take two rows from X as input and return the
|
|
1029
|
+
corresponding kernel value as a single number. This means that
|
|
1030
|
+
callables from :mod:`sklearn.metrics.pairwise` are not allowed, as
|
|
1031
|
+
they operate on matrices, not single samples. Use the string
|
|
1032
|
+
identifying the kernel instead.
|
|
1033
|
+
|
|
1034
|
+
gamma : float, optional, default: None
|
|
1035
|
+
Gamma parameter for the RBF, laplacian, polynomial, exponential chi2
|
|
1036
|
+
and sigmoid kernels. Interpretation of the default value is left to
|
|
1037
|
+
the kernel; see the documentation for :mod:`sklearn.metrics.pairwise`.
|
|
1038
|
+
Ignored by other kernels.
|
|
1039
|
+
|
|
1040
|
+
degree : int, optional, default: 3
|
|
1041
|
+
Degree of the polynomial kernel. Ignored by other kernels.
|
|
1042
|
+
|
|
1043
|
+
coef0 : float, optional, default: 1
|
|
1044
|
+
Zero coefficient for polynomial and sigmoid kernels.
|
|
1045
|
+
Ignored by other kernels.
|
|
1046
|
+
|
|
1047
|
+
kernel_params : dict or None, optional, default: None
|
|
1048
|
+
Additional parameters (keyword arguments) for kernel function passed
|
|
1049
|
+
as callable object.
|
|
1050
|
+
|
|
1051
|
+
max_iter : int, optional, default: 20
|
|
1052
|
+
Maximum number of iterations to perform in Newton optimization
|
|
1053
|
+
|
|
1054
|
+
verbose : bool, optional, default: False
|
|
1055
|
+
If ``True``, print messages during optimization.
|
|
1056
|
+
|
|
1057
|
+
tol : float or None, optional, default: None
|
|
1058
|
+
Tolerance for termination. If ``None``, the solver's default tolerance is used.
|
|
1059
|
+
See :func:`scipy.optimize.minimize`.
|
|
1060
|
+
|
|
1061
|
+
optimizer : {'avltree', 'rbtree'}, optional, default: 'rbtree'
|
|
1062
|
+
Specifies which optimizer to use..
|
|
1063
|
+
|
|
1064
|
+
random_state : int, :class:`numpy.random.RandomState` instance, or None, optional, default: None
|
|
1065
|
+
Used to resolve ties in survival times. Pass an int for reproducible output across
|
|
1066
|
+
multiple :meth:`fit` calls.
|
|
1067
|
+
|
|
1068
|
+
timeit : bool, int, or None, optional, default: False
|
|
1069
|
+
If ``True`` or a non-zero integer, the time taken for optimization is measured.
|
|
1070
|
+
If an integer is provided, the optimization is repeated that many times.
|
|
1071
|
+
Results can be accessed from the ``optimizer_result_`` attribute.
|
|
1072
|
+
|
|
1073
|
+
Attributes
|
|
1074
|
+
----------
|
|
1075
|
+
coef_ : ndarray, shape = (n_samples,), dtype = float
|
|
1076
|
+
Weights assigned to the samples in training data to represent
|
|
1077
|
+
the decision function in kernel space.
|
|
1078
|
+
|
|
1079
|
+
fit_X_ : ndarray, shape = (n_samples, `n_features_in_`), dtype = float
|
|
1080
|
+
Training data used for fitting. Used to compute the kernel matrix for prediction.
|
|
1081
|
+
|
|
1082
|
+
optimizer_result_ : :class:`scipy.optimize.OptimizeResult`
|
|
1083
|
+
Stats returned by the optimizer. See :class:`scipy.optimize.OptimizeResult`.
|
|
1084
|
+
|
|
1085
|
+
n_features_in_ : int
|
|
1086
|
+
Number of features seen during ``fit``.
|
|
1087
|
+
|
|
1088
|
+
feature_names_in_ : ndarray, shape = (`n_features_in_`,), dtype = object
|
|
1089
|
+
Names of features seen during ``fit``. Defined only when `X`
|
|
1090
|
+
has feature names that are all strings.
|
|
1091
|
+
|
|
1092
|
+
n_iter_ : int
|
|
1093
|
+
Number of iterations run by the optimization routine to fit the model.
|
|
1094
|
+
|
|
1095
|
+
See also
|
|
1096
|
+
--------
|
|
1097
|
+
FastSurvivalSVM
|
|
1098
|
+
Fast implementation for linear kernel.
|
|
1099
|
+
|
|
1100
|
+
References
|
|
1101
|
+
----------
|
|
1102
|
+
.. [1] Pölsterl, S., Navab, N., and Katouzian, A.,
|
|
1103
|
+
*An Efficient Training Algorithm for Kernel Survival Support Vector Machines*
|
|
1104
|
+
4th Workshop on Machine Learning in Life Sciences,
|
|
1105
|
+
23 September 2016, Riva del Garda, Italy. arXiv:1611.07054
|
|
1106
|
+
"""
|
|
1107
|
+
|
|
1108
|
+
_parameter_constraints = {
|
|
1109
|
+
**FastSurvivalSVM._parameter_constraints,
|
|
1110
|
+
"kernel": [
|
|
1111
|
+
StrOptions(set(PAIRWISE_KERNEL_FUNCTIONS.keys()) | {"precomputed"}),
|
|
1112
|
+
callable,
|
|
1113
|
+
],
|
|
1114
|
+
"gamma": [Interval(Real, 0.0, None, closed="left"), None],
|
|
1115
|
+
"degree": [Interval(Integral, 0, None, closed="left")],
|
|
1116
|
+
"coef0": [Interval(Real, None, None, closed="neither")],
|
|
1117
|
+
"kernel_params": [dict, None],
|
|
1118
|
+
"optimizer": [StrOptions({"rbtree", "avltree"}), None],
|
|
1119
|
+
}
|
|
1120
|
+
|
|
1121
|
+
def __init__(
|
|
1122
|
+
self,
|
|
1123
|
+
alpha=1,
|
|
1124
|
+
*,
|
|
1125
|
+
rank_ratio=1.0,
|
|
1126
|
+
fit_intercept=False,
|
|
1127
|
+
kernel="rbf",
|
|
1128
|
+
gamma=None,
|
|
1129
|
+
degree=3,
|
|
1130
|
+
coef0=1,
|
|
1131
|
+
kernel_params=None,
|
|
1132
|
+
max_iter=20,
|
|
1133
|
+
verbose=False,
|
|
1134
|
+
tol=None,
|
|
1135
|
+
optimizer=None,
|
|
1136
|
+
random_state=None,
|
|
1137
|
+
timeit=False,
|
|
1138
|
+
):
|
|
1139
|
+
super().__init__(
|
|
1140
|
+
alpha=alpha,
|
|
1141
|
+
rank_ratio=rank_ratio,
|
|
1142
|
+
fit_intercept=fit_intercept,
|
|
1143
|
+
max_iter=max_iter,
|
|
1144
|
+
verbose=verbose,
|
|
1145
|
+
tol=tol,
|
|
1146
|
+
optimizer=optimizer,
|
|
1147
|
+
random_state=random_state,
|
|
1148
|
+
timeit=timeit,
|
|
1149
|
+
)
|
|
1150
|
+
self.kernel = kernel
|
|
1151
|
+
self.gamma = gamma
|
|
1152
|
+
self.degree = degree
|
|
1153
|
+
self.coef0 = coef0
|
|
1154
|
+
self.kernel_params = kernel_params
|
|
1155
|
+
|
|
1156
|
+
def __sklearn_tags__(self):
|
|
1157
|
+
# tell sklearn.utils.metaestimators._safe_split function that we expect kernel matrix
|
|
1158
|
+
tags = super().__sklearn_tags__()
|
|
1159
|
+
tags.input_tags.pairwise = self.kernel == "precomputed"
|
|
1160
|
+
return tags
|
|
1161
|
+
|
|
1162
|
+
def _get_kernel(self, X, Y=None):
|
|
1163
|
+
if callable(self.kernel):
|
|
1164
|
+
params = self.kernel_params or {}
|
|
1165
|
+
else:
|
|
1166
|
+
params = {"gamma": self.gamma, "degree": self.degree, "coef0": self.coef0}
|
|
1167
|
+
return pairwise_kernels(X, Y, metric=self.kernel, filter_params=True, **params)
|
|
1168
|
+
|
|
1169
|
+
def _create_optimizer(self, kernel_mat, y, status):
|
|
1170
|
+
if self.optimizer is None:
|
|
1171
|
+
self.optimizer = "rbtree"
|
|
1172
|
+
|
|
1173
|
+
times, ranks = y
|
|
1174
|
+
|
|
1175
|
+
if self.optimizer == "rbtree":
|
|
1176
|
+
optimizer = NonlinearLargeScaleOptimizer(
|
|
1177
|
+
self.alpha,
|
|
1178
|
+
self.rank_ratio,
|
|
1179
|
+
self.fit_intercept,
|
|
1180
|
+
OrderStatisticTreeSurvivalCounter(kernel_mat, ranks, status, RBTree, times),
|
|
1181
|
+
timeit=self.timeit,
|
|
1182
|
+
)
|
|
1183
|
+
elif self.optimizer == "avltree":
|
|
1184
|
+
optimizer = NonlinearLargeScaleOptimizer(
|
|
1185
|
+
self.alpha,
|
|
1186
|
+
self.rank_ratio,
|
|
1187
|
+
self.fit_intercept,
|
|
1188
|
+
OrderStatisticTreeSurvivalCounter(kernel_mat, ranks, status, AVLTree, times),
|
|
1189
|
+
timeit=self.timeit,
|
|
1190
|
+
)
|
|
1191
|
+
|
|
1192
|
+
return optimizer
|
|
1193
|
+
|
|
1194
|
+
def _validate_for_fit(self, X):
|
|
1195
|
+
if self.kernel != "precomputed":
|
|
1196
|
+
return super()._validate_for_fit(X)
|
|
1197
|
+
return X
|
|
1198
|
+
|
|
1199
|
+
def _fit(self, X, time, event, samples_order):
|
|
1200
|
+
# don't reorder X here, because it might be a precomputed kernel matrix
|
|
1201
|
+
kernel_mat = self._get_kernel(X)
|
|
1202
|
+
if (np.abs(kernel_mat.T - kernel_mat) > 1e-12).any():
|
|
1203
|
+
raise ValueError("kernel matrix is not symmetric")
|
|
1204
|
+
|
|
1205
|
+
data_y = (time[samples_order], np.arange(len(samples_order)))
|
|
1206
|
+
status = event[samples_order]
|
|
1207
|
+
|
|
1208
|
+
optimizer = self._create_optimizer(kernel_mat[np.ix_(samples_order, samples_order)], data_y, status)
|
|
1209
|
+
opt_result = optimizer.run(tol=self.tol, options={"maxiter": self.max_iter, "disp": self.verbose})
|
|
1210
|
+
|
|
1211
|
+
# reorder coefficients according to order in original training data,
|
|
1212
|
+
# i.e., reverse ordering according to samples_order
|
|
1213
|
+
self.fit_X_ = X
|
|
1214
|
+
if self.fit_intercept:
|
|
1215
|
+
opt_result.x[samples_order + 1] = opt_result.x[1:].copy()
|
|
1216
|
+
else:
|
|
1217
|
+
opt_result.x[samples_order] = opt_result.x.copy()
|
|
1218
|
+
|
|
1219
|
+
return opt_result
|
|
1220
|
+
|
|
1221
|
+
def predict(self, X):
|
|
1222
|
+
X = validate_data(self, X, reset=False)
|
|
1223
|
+
kernel_mat = self._get_kernel(X, self.fit_X_)
|
|
1224
|
+
|
|
1225
|
+
val = np.dot(kernel_mat, self.coef_)
|
|
1226
|
+
if hasattr(self, "intercept_"):
|
|
1227
|
+
val += self.intercept_
|
|
1228
|
+
|
|
1229
|
+
# Order by increasing survival time if objective is pure ranking
|
|
1230
|
+
if self.rank_ratio == 1:
|
|
1231
|
+
val *= -1
|
|
1232
|
+
else:
|
|
1233
|
+
# model was fitted on log(time), transform to original scale
|
|
1234
|
+
val = np.exp(val)
|
|
1235
|
+
|
|
1236
|
+
return val
|