scikit-survival 0.23.1__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scikit_survival-0.23.1.dist-info/COPYING +674 -0
- scikit_survival-0.23.1.dist-info/METADATA +888 -0
- scikit_survival-0.23.1.dist-info/RECORD +55 -0
- scikit_survival-0.23.1.dist-info/WHEEL +5 -0
- scikit_survival-0.23.1.dist-info/top_level.txt +1 -0
- sksurv/__init__.py +138 -0
- sksurv/base.py +103 -0
- sksurv/bintrees/__init__.py +15 -0
- sksurv/bintrees/_binarytrees.cp313-win_amd64.pyd +0 -0
- sksurv/column.py +201 -0
- sksurv/compare.py +123 -0
- sksurv/datasets/__init__.py +10 -0
- sksurv/datasets/base.py +436 -0
- sksurv/datasets/data/GBSG2.arff +700 -0
- sksurv/datasets/data/actg320.arff +1169 -0
- sksurv/datasets/data/breast_cancer_GSE7390-metastasis.arff +283 -0
- sksurv/datasets/data/flchain.arff +7887 -0
- sksurv/datasets/data/veteran.arff +148 -0
- sksurv/datasets/data/whas500.arff +520 -0
- sksurv/ensemble/__init__.py +2 -0
- sksurv/ensemble/_coxph_loss.cp313-win_amd64.pyd +0 -0
- sksurv/ensemble/boosting.py +1610 -0
- sksurv/ensemble/forest.py +947 -0
- sksurv/ensemble/survival_loss.py +151 -0
- sksurv/exceptions.py +18 -0
- sksurv/functions.py +114 -0
- sksurv/io/__init__.py +2 -0
- sksurv/io/arffread.py +58 -0
- sksurv/io/arffwrite.py +145 -0
- sksurv/kernels/__init__.py +1 -0
- sksurv/kernels/_clinical_kernel.cp313-win_amd64.pyd +0 -0
- sksurv/kernels/clinical.py +328 -0
- sksurv/linear_model/__init__.py +3 -0
- sksurv/linear_model/_coxnet.cp313-win_amd64.pyd +0 -0
- sksurv/linear_model/aft.py +205 -0
- sksurv/linear_model/coxnet.py +543 -0
- sksurv/linear_model/coxph.py +618 -0
- sksurv/meta/__init__.py +4 -0
- sksurv/meta/base.py +35 -0
- sksurv/meta/ensemble_selection.py +642 -0
- sksurv/meta/stacking.py +349 -0
- sksurv/metrics.py +996 -0
- sksurv/nonparametric.py +588 -0
- sksurv/preprocessing.py +155 -0
- sksurv/svm/__init__.py +11 -0
- sksurv/svm/_minlip.cp313-win_amd64.pyd +0 -0
- sksurv/svm/_prsvm.cp313-win_amd64.pyd +0 -0
- sksurv/svm/minlip.py +606 -0
- sksurv/svm/naive_survival_svm.py +221 -0
- sksurv/svm/survival_svm.py +1228 -0
- sksurv/testing.py +108 -0
- sksurv/tree/__init__.py +1 -0
- sksurv/tree/_criterion.cp313-win_amd64.pyd +0 -0
- sksurv/tree/tree.py +703 -0
- sksurv/util.py +333 -0
|
@@ -0,0 +1,618 @@
|
|
|
1
|
+
# This program is free software: you can redistribute it and/or modify
|
|
2
|
+
# it under the terms of the GNU General Public License as published by
|
|
3
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
4
|
+
# (at your option) any later version.
|
|
5
|
+
#
|
|
6
|
+
# This program is distributed in the hope that it will be useful,
|
|
7
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
8
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
9
|
+
# GNU General Public License for more details.
|
|
10
|
+
#
|
|
11
|
+
# You should have received a copy of the GNU General Public License
|
|
12
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
13
|
+
import numbers
|
|
14
|
+
import warnings
|
|
15
|
+
|
|
16
|
+
import numpy as np
|
|
17
|
+
from scipy.linalg import solve
|
|
18
|
+
from sklearn.base import BaseEstimator
|
|
19
|
+
from sklearn.exceptions import ConvergenceWarning
|
|
20
|
+
from sklearn.utils._param_validation import Interval, StrOptions
|
|
21
|
+
from sklearn.utils.validation import check_array, check_is_fitted
|
|
22
|
+
|
|
23
|
+
from ..base import SurvivalAnalysisMixin
|
|
24
|
+
from ..functions import StepFunction
|
|
25
|
+
from ..nonparametric import _compute_counts
|
|
26
|
+
from ..util import check_array_survival
|
|
27
|
+
|
|
28
|
+
__all__ = ["CoxPHSurvivalAnalysis"]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class BreslowEstimator:
|
|
32
|
+
"""Breslow's estimator of the cumulative hazard function.
|
|
33
|
+
|
|
34
|
+
Attributes
|
|
35
|
+
----------
|
|
36
|
+
cum_baseline_hazard_ : :class:`sksurv.functions.StepFunction`
|
|
37
|
+
Cumulative baseline hazard function.
|
|
38
|
+
|
|
39
|
+
baseline_survival_ : :class:`sksurv.functions.StepFunction`
|
|
40
|
+
Baseline survival function.
|
|
41
|
+
|
|
42
|
+
unique_times_ : ndarray
|
|
43
|
+
Unique event times.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
def fit(self, linear_predictor, event, time):
|
|
47
|
+
"""Compute baseline cumulative hazard function.
|
|
48
|
+
|
|
49
|
+
Parameters
|
|
50
|
+
----------
|
|
51
|
+
linear_predictor : array-like, shape = (n_samples,)
|
|
52
|
+
Linear predictor of risk: `X @ coef`.
|
|
53
|
+
|
|
54
|
+
event : array-like, shape = (n_samples,)
|
|
55
|
+
Contains binary event indicators.
|
|
56
|
+
|
|
57
|
+
time : array-like, shape = (n_samples,)
|
|
58
|
+
Contains event/censoring times.
|
|
59
|
+
|
|
60
|
+
Returns
|
|
61
|
+
-------
|
|
62
|
+
self
|
|
63
|
+
"""
|
|
64
|
+
risk_score = np.exp(linear_predictor)
|
|
65
|
+
order = np.argsort(time, kind="mergesort")
|
|
66
|
+
risk_score = risk_score[order]
|
|
67
|
+
uniq_times, n_events, n_at_risk, _ = _compute_counts(event, time, order)
|
|
68
|
+
|
|
69
|
+
divisor = np.empty(n_at_risk.shape, dtype=float)
|
|
70
|
+
value = np.sum(risk_score)
|
|
71
|
+
divisor[0] = value
|
|
72
|
+
k = 0
|
|
73
|
+
for i in range(1, len(n_at_risk)):
|
|
74
|
+
d = n_at_risk[i - 1] - n_at_risk[i]
|
|
75
|
+
value -= risk_score[k : (k + d)].sum()
|
|
76
|
+
k += d
|
|
77
|
+
divisor[i] = value
|
|
78
|
+
|
|
79
|
+
assert k == n_at_risk[0] - n_at_risk[-1]
|
|
80
|
+
|
|
81
|
+
y = np.cumsum(n_events / divisor)
|
|
82
|
+
self.cum_baseline_hazard_ = StepFunction(uniq_times, y)
|
|
83
|
+
self.baseline_survival_ = StepFunction(uniq_times, np.exp(-y))
|
|
84
|
+
self.unique_times_ = uniq_times
|
|
85
|
+
return self
|
|
86
|
+
|
|
87
|
+
def get_cumulative_hazard_function(self, linear_predictor):
|
|
88
|
+
"""Predict cumulative hazard function.
|
|
89
|
+
|
|
90
|
+
Parameters
|
|
91
|
+
----------
|
|
92
|
+
linear_predictor : array-like, shape = (n_samples,)
|
|
93
|
+
Linear predictor of risk: `X @ coef`.
|
|
94
|
+
|
|
95
|
+
Returns
|
|
96
|
+
-------
|
|
97
|
+
cum_hazard : ndarray, shape = (n_samples,)
|
|
98
|
+
Predicted cumulative hazard functions.
|
|
99
|
+
"""
|
|
100
|
+
risk_score = np.exp(linear_predictor)
|
|
101
|
+
n_samples = risk_score.shape[0]
|
|
102
|
+
funcs = np.empty(n_samples, dtype=object)
|
|
103
|
+
for i in range(n_samples):
|
|
104
|
+
funcs[i] = StepFunction(x=self.cum_baseline_hazard_.x, y=self.cum_baseline_hazard_.y, a=risk_score[i])
|
|
105
|
+
return funcs
|
|
106
|
+
|
|
107
|
+
def get_survival_function(self, linear_predictor):
|
|
108
|
+
"""Predict survival function.
|
|
109
|
+
|
|
110
|
+
Parameters
|
|
111
|
+
----------
|
|
112
|
+
linear_predictor : array-like, shape = (n_samples,)
|
|
113
|
+
Linear predictor of risk: `X @ coef`.
|
|
114
|
+
|
|
115
|
+
Returns
|
|
116
|
+
-------
|
|
117
|
+
survival : ndarray, shape = (n_samples,)
|
|
118
|
+
Predicted survival functions.
|
|
119
|
+
"""
|
|
120
|
+
risk_score = np.exp(linear_predictor)
|
|
121
|
+
n_samples = risk_score.shape[0]
|
|
122
|
+
funcs = np.empty(n_samples, dtype=object)
|
|
123
|
+
for i in range(n_samples):
|
|
124
|
+
funcs[i] = StepFunction(x=self.baseline_survival_.x, y=np.power(self.baseline_survival_.y, risk_score[i]))
|
|
125
|
+
return funcs
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class CoxPHOptimizer:
|
|
129
|
+
"""Negative partial log-likelihood of Cox proportional hazards model"""
|
|
130
|
+
|
|
131
|
+
def __init__(self, X, event, time, alpha, ties):
|
|
132
|
+
# sort descending
|
|
133
|
+
o = np.argsort(-time, kind="mergesort")
|
|
134
|
+
self.x = X[o, :]
|
|
135
|
+
self.event = event[o]
|
|
136
|
+
self.time = time[o]
|
|
137
|
+
self.alpha = alpha
|
|
138
|
+
self.no_alpha = np.all(self.alpha < np.finfo(self.alpha.dtype).eps)
|
|
139
|
+
self._is_breslow = ties == "breslow"
|
|
140
|
+
|
|
141
|
+
def nlog_likelihood(self, w):
|
|
142
|
+
"""Compute negative partial log-likelihood
|
|
143
|
+
|
|
144
|
+
Parameters
|
|
145
|
+
----------
|
|
146
|
+
w : array, shape = (n_features,)
|
|
147
|
+
Estimate of coefficients
|
|
148
|
+
|
|
149
|
+
Returns
|
|
150
|
+
-------
|
|
151
|
+
loss : float
|
|
152
|
+
Average negative partial log-likelihood
|
|
153
|
+
"""
|
|
154
|
+
time = self.time
|
|
155
|
+
n_samples = self.x.shape[0]
|
|
156
|
+
breslow = self._is_breslow
|
|
157
|
+
xw = np.dot(self.x, w)
|
|
158
|
+
|
|
159
|
+
loss = 0
|
|
160
|
+
risk_set = 0
|
|
161
|
+
k = 0
|
|
162
|
+
while k < n_samples:
|
|
163
|
+
ti = time[k]
|
|
164
|
+
numerator = 0
|
|
165
|
+
n_events = 0
|
|
166
|
+
risk_set2 = 0
|
|
167
|
+
while k < n_samples and ti == time[k]:
|
|
168
|
+
if self.event[k]:
|
|
169
|
+
numerator += xw[k]
|
|
170
|
+
risk_set2 += np.exp(xw[k])
|
|
171
|
+
n_events += 1
|
|
172
|
+
else:
|
|
173
|
+
risk_set += np.exp(xw[k])
|
|
174
|
+
k += 1
|
|
175
|
+
|
|
176
|
+
if n_events > 0:
|
|
177
|
+
if breslow:
|
|
178
|
+
risk_set += risk_set2
|
|
179
|
+
loss -= (numerator - n_events * np.log(risk_set)) / n_samples
|
|
180
|
+
else:
|
|
181
|
+
numerator /= n_events
|
|
182
|
+
for _ in range(n_events):
|
|
183
|
+
risk_set += risk_set2 / n_events
|
|
184
|
+
loss -= (numerator - np.log(risk_set)) / n_samples
|
|
185
|
+
|
|
186
|
+
# add regularization term to log-likelihood
|
|
187
|
+
return loss + np.sum(self.alpha * np.square(w)) / (2.0 * n_samples)
|
|
188
|
+
|
|
189
|
+
def update(self, w, offset=0):
|
|
190
|
+
"""Compute gradient and Hessian matrix with respect to `w`."""
|
|
191
|
+
time = self.time
|
|
192
|
+
x = self.x
|
|
193
|
+
breslow = self._is_breslow
|
|
194
|
+
exp_xw = np.exp(offset + np.dot(x, w))
|
|
195
|
+
n_samples, n_features = x.shape
|
|
196
|
+
|
|
197
|
+
gradient = np.zeros((1, n_features), dtype=w.dtype)
|
|
198
|
+
hessian = np.zeros((n_features, n_features), dtype=w.dtype)
|
|
199
|
+
|
|
200
|
+
inv_n_samples = 1.0 / n_samples
|
|
201
|
+
risk_set = 0
|
|
202
|
+
risk_set_x = np.zeros((1, n_features), dtype=w.dtype)
|
|
203
|
+
risk_set_xx = np.zeros((n_features, n_features), dtype=w.dtype)
|
|
204
|
+
k = 0
|
|
205
|
+
# iterate time in descending order
|
|
206
|
+
while k < n_samples:
|
|
207
|
+
ti = time[k]
|
|
208
|
+
n_events = 0
|
|
209
|
+
numerator = 0
|
|
210
|
+
risk_set2 = 0
|
|
211
|
+
risk_set_x2 = np.zeros_like(risk_set_x)
|
|
212
|
+
risk_set_xx2 = np.zeros_like(risk_set_xx)
|
|
213
|
+
while k < n_samples and ti == time[k]:
|
|
214
|
+
# preserve 2D shape of row vector
|
|
215
|
+
xk = x[k : k + 1]
|
|
216
|
+
|
|
217
|
+
# outer product
|
|
218
|
+
xx = np.dot(xk.T, xk)
|
|
219
|
+
|
|
220
|
+
if self.event[k]:
|
|
221
|
+
numerator += xk
|
|
222
|
+
risk_set2 += exp_xw[k]
|
|
223
|
+
risk_set_x2 += exp_xw[k] * xk
|
|
224
|
+
risk_set_xx2 += exp_xw[k] * xx
|
|
225
|
+
n_events += 1
|
|
226
|
+
else:
|
|
227
|
+
risk_set += exp_xw[k]
|
|
228
|
+
risk_set_x += exp_xw[k] * xk
|
|
229
|
+
risk_set_xx += exp_xw[k] * xx
|
|
230
|
+
k += 1
|
|
231
|
+
|
|
232
|
+
if n_events > 0:
|
|
233
|
+
if breslow:
|
|
234
|
+
risk_set += risk_set2
|
|
235
|
+
risk_set_x += risk_set_x2
|
|
236
|
+
risk_set_xx += risk_set_xx2
|
|
237
|
+
|
|
238
|
+
z = risk_set_x / risk_set
|
|
239
|
+
gradient -= (numerator - n_events * z) * inv_n_samples
|
|
240
|
+
|
|
241
|
+
a = risk_set_xx / risk_set
|
|
242
|
+
# outer product
|
|
243
|
+
b = np.dot(z.T, z)
|
|
244
|
+
|
|
245
|
+
hessian += n_events * (a - b) * inv_n_samples
|
|
246
|
+
else:
|
|
247
|
+
numerator /= n_events
|
|
248
|
+
for _ in range(n_events):
|
|
249
|
+
risk_set += risk_set2 / n_events
|
|
250
|
+
risk_set_x += risk_set_x2 / n_events
|
|
251
|
+
risk_set_xx += risk_set_xx2 / n_events
|
|
252
|
+
|
|
253
|
+
z = risk_set_x / risk_set
|
|
254
|
+
gradient -= (numerator - z) * inv_n_samples
|
|
255
|
+
|
|
256
|
+
a = risk_set_xx / risk_set
|
|
257
|
+
# outer product
|
|
258
|
+
b = np.dot(z.T, z)
|
|
259
|
+
|
|
260
|
+
hessian += (a - b) * inv_n_samples
|
|
261
|
+
|
|
262
|
+
if not self.no_alpha:
|
|
263
|
+
gradient += self.alpha * inv_n_samples * w
|
|
264
|
+
|
|
265
|
+
diag_idx = np.diag_indices(n_features)
|
|
266
|
+
hessian[diag_idx] += self.alpha * inv_n_samples
|
|
267
|
+
|
|
268
|
+
self.gradient = gradient.ravel()
|
|
269
|
+
self.hessian = hessian
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
class VerboseReporter:
|
|
273
|
+
def __init__(self, verbose):
|
|
274
|
+
self.verbose = verbose
|
|
275
|
+
|
|
276
|
+
def end_max_iter(self, i):
|
|
277
|
+
if self.verbose > 0:
|
|
278
|
+
print(f"iter {i + 1:>6d}: reached maximum number of iterations. Stopping.")
|
|
279
|
+
|
|
280
|
+
def end_converged(self, i):
|
|
281
|
+
if self.verbose > 0:
|
|
282
|
+
print(f"iter {i + 1:>6d}: optimization converged")
|
|
283
|
+
|
|
284
|
+
def update(self, i, delta, loss_new):
|
|
285
|
+
if self.verbose > 2:
|
|
286
|
+
print(f"iter {i + 1:>6d}: update = {delta}")
|
|
287
|
+
if self.verbose > 1:
|
|
288
|
+
print(f"iter {i + 1:>6d}: loss = {loss_new:.10f}")
|
|
289
|
+
|
|
290
|
+
def step_halving(self, i, loss):
|
|
291
|
+
if self.verbose > 1:
|
|
292
|
+
print(f"iter {i:>6d}: loss increased, performing step-halving. loss = {loss:.10f}")
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
class CoxPHSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
|
|
296
|
+
"""Cox proportional hazards model.
|
|
297
|
+
|
|
298
|
+
There are two possible choices for handling tied event times.
|
|
299
|
+
The default is Breslow's method, which considers each of the
|
|
300
|
+
events at a given time as distinct. Efron's method is more
|
|
301
|
+
accurate if there are a large number of ties. When the number
|
|
302
|
+
of ties is small, the estimated coefficients by Breslow's and
|
|
303
|
+
Efron's method are quite close. Uses Newton-Raphson optimization.
|
|
304
|
+
|
|
305
|
+
See [1]_, [2]_, [3]_ for further description.
|
|
306
|
+
|
|
307
|
+
Parameters
|
|
308
|
+
----------
|
|
309
|
+
alpha : float, ndarray of shape (n_features,), optional, default: 0
|
|
310
|
+
Regularization parameter for ridge regression penalty.
|
|
311
|
+
If a single float, the same penalty is used for all features.
|
|
312
|
+
If an array, there must be one penalty for each feature.
|
|
313
|
+
If you want to include a subset of features without penalization,
|
|
314
|
+
set the corresponding entries to 0.
|
|
315
|
+
|
|
316
|
+
ties : {'breslow', 'efron'}, optional, default: 'breslow'
|
|
317
|
+
The method to handle tied event times. If there are
|
|
318
|
+
no tied event times all the methods are equivalent.
|
|
319
|
+
|
|
320
|
+
n_iter : int, optional, default: 100
|
|
321
|
+
Maximum number of iterations.
|
|
322
|
+
|
|
323
|
+
tol : float, optional, default: 1e-9
|
|
324
|
+
Convergence criteria. Convergence is based on the negative log-likelihood::
|
|
325
|
+
|
|
326
|
+
|1 - (new neg. log-likelihood / old neg. log-likelihood) | < tol
|
|
327
|
+
|
|
328
|
+
verbose : int, optional, default: 0
|
|
329
|
+
Specifies the amount of additional debug information
|
|
330
|
+
during optimization.
|
|
331
|
+
|
|
332
|
+
Attributes
|
|
333
|
+
----------
|
|
334
|
+
coef_ : ndarray, shape = (n_features,)
|
|
335
|
+
Coefficients of the model
|
|
336
|
+
|
|
337
|
+
cum_baseline_hazard_ : :class:`sksurv.functions.StepFunction`
|
|
338
|
+
Estimated baseline cumulative hazard function.
|
|
339
|
+
|
|
340
|
+
baseline_survival_ : :class:`sksurv.functions.StepFunction`
|
|
341
|
+
Estimated baseline survival function.
|
|
342
|
+
|
|
343
|
+
n_features_in_ : int
|
|
344
|
+
Number of features seen during ``fit``.
|
|
345
|
+
|
|
346
|
+
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
|
347
|
+
Names of features seen during ``fit``. Defined only when `X`
|
|
348
|
+
has feature names that are all strings.
|
|
349
|
+
|
|
350
|
+
unique_times_ : array of shape = (n_unique_times,)
|
|
351
|
+
Unique time points.
|
|
352
|
+
|
|
353
|
+
See also
|
|
354
|
+
--------
|
|
355
|
+
sksurv.linear_model.CoxnetSurvivalAnalysis
|
|
356
|
+
Cox proportional hazards model with l1 (LASSO) and l2 (ridge) penalty.
|
|
357
|
+
|
|
358
|
+
References
|
|
359
|
+
----------
|
|
360
|
+
.. [1] Cox, D. R. Regression models and life tables (with discussion).
|
|
361
|
+
Journal of the Royal Statistical Society. Series B, 34, 187-220, 1972.
|
|
362
|
+
.. [2] Breslow, N. E. Covariance Analysis of Censored Survival Data.
|
|
363
|
+
Biometrics 30 (1974): 89–99.
|
|
364
|
+
.. [3] Efron, B. The Efficiency of Cox’s Likelihood Function for Censored Data.
|
|
365
|
+
Journal of the American Statistical Association 72 (1977): 557–565.
|
|
366
|
+
"""
|
|
367
|
+
|
|
368
|
+
_parameter_constraints: dict = {
|
|
369
|
+
"alpha": [Interval(numbers.Real, 0, None, closed="left"), np.ndarray],
|
|
370
|
+
"ties": [StrOptions({"breslow", "efron"})],
|
|
371
|
+
"n_iter": [Interval(numbers.Integral, 1, None, closed="left")],
|
|
372
|
+
"tol": [Interval(numbers.Real, 0, None, closed="left")],
|
|
373
|
+
"verbose": ["verbose"],
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
def __init__(self, alpha=0, *, ties="breslow", n_iter=100, tol=1e-9, verbose=0):
|
|
377
|
+
self.alpha = alpha
|
|
378
|
+
self.ties = ties
|
|
379
|
+
self.n_iter = n_iter
|
|
380
|
+
self.tol = tol
|
|
381
|
+
self.verbose = verbose
|
|
382
|
+
|
|
383
|
+
self._baseline_model = BreslowEstimator()
|
|
384
|
+
|
|
385
|
+
@property
|
|
386
|
+
def cum_baseline_hazard_(self):
|
|
387
|
+
return self._baseline_model.cum_baseline_hazard_
|
|
388
|
+
|
|
389
|
+
@property
|
|
390
|
+
def baseline_survival_(self):
|
|
391
|
+
return self._baseline_model.baseline_survival_
|
|
392
|
+
|
|
393
|
+
@property
|
|
394
|
+
def unique_times_(self):
|
|
395
|
+
return self._baseline_model.unique_times_
|
|
396
|
+
|
|
397
|
+
def fit(self, X, y):
|
|
398
|
+
"""Minimize negative partial log-likelihood for provided data.
|
|
399
|
+
|
|
400
|
+
Parameters
|
|
401
|
+
----------
|
|
402
|
+
X : array-like, shape = (n_samples, n_features)
|
|
403
|
+
Data matrix
|
|
404
|
+
|
|
405
|
+
y : structured array, shape = (n_samples,)
|
|
406
|
+
A structured array containing the binary event indicator
|
|
407
|
+
as first field, and time of event or time of censoring as
|
|
408
|
+
second field.
|
|
409
|
+
|
|
410
|
+
Returns
|
|
411
|
+
-------
|
|
412
|
+
self
|
|
413
|
+
"""
|
|
414
|
+
self._validate_params()
|
|
415
|
+
|
|
416
|
+
X = self._validate_data(X, ensure_min_samples=2, dtype=np.float64)
|
|
417
|
+
event, time = check_array_survival(X, y)
|
|
418
|
+
|
|
419
|
+
if isinstance(self.alpha, (numbers.Real, numbers.Integral)):
|
|
420
|
+
alphas = np.empty(X.shape[1], dtype=float)
|
|
421
|
+
alphas[:] = self.alpha
|
|
422
|
+
else:
|
|
423
|
+
alphas = self.alpha
|
|
424
|
+
|
|
425
|
+
alphas = check_array(alphas, ensure_2d=False, ensure_min_samples=0, estimator=self, input_name="alpha")
|
|
426
|
+
if np.any(alphas < 0):
|
|
427
|
+
raise ValueError(f"alpha must be positive, but was {self.alpha!r}")
|
|
428
|
+
if alphas.shape[0] != X.shape[1]:
|
|
429
|
+
raise ValueError(f"Length alphas ({alphas.shape[0]}) must match number of features ({X.shape[1]}).")
|
|
430
|
+
|
|
431
|
+
optimizer = CoxPHOptimizer(X, event, time, alphas, self.ties)
|
|
432
|
+
|
|
433
|
+
verbose_reporter = VerboseReporter(self.verbose)
|
|
434
|
+
w = np.zeros(X.shape[1])
|
|
435
|
+
w_prev = w
|
|
436
|
+
i = 0
|
|
437
|
+
loss = float("inf")
|
|
438
|
+
while True:
|
|
439
|
+
if i >= self.n_iter:
|
|
440
|
+
verbose_reporter.end_max_iter(i)
|
|
441
|
+
warnings.warn(
|
|
442
|
+
("Optimization did not converge: Maximum number of iterations has been exceeded."),
|
|
443
|
+
stacklevel=2,
|
|
444
|
+
category=ConvergenceWarning,
|
|
445
|
+
)
|
|
446
|
+
break
|
|
447
|
+
|
|
448
|
+
optimizer.update(w)
|
|
449
|
+
delta = solve(
|
|
450
|
+
optimizer.hessian, optimizer.gradient, overwrite_a=False, overwrite_b=False, check_finite=False
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
if not np.all(np.isfinite(delta)):
|
|
454
|
+
raise ValueError("search direction contains NaN or infinite values")
|
|
455
|
+
|
|
456
|
+
w_new = w - delta
|
|
457
|
+
loss_new = optimizer.nlog_likelihood(w_new)
|
|
458
|
+
verbose_reporter.update(i, delta, loss_new)
|
|
459
|
+
if loss_new > loss:
|
|
460
|
+
# perform step-halving if negative log-likelihood does not decrease
|
|
461
|
+
w = (w_prev + w) / 2
|
|
462
|
+
loss = optimizer.nlog_likelihood(w)
|
|
463
|
+
verbose_reporter.step_halving(i, loss)
|
|
464
|
+
i += 1
|
|
465
|
+
continue
|
|
466
|
+
|
|
467
|
+
w_prev = w
|
|
468
|
+
w = w_new
|
|
469
|
+
|
|
470
|
+
res = np.abs(1 - (loss_new / loss))
|
|
471
|
+
if res < self.tol:
|
|
472
|
+
verbose_reporter.end_converged(i)
|
|
473
|
+
break
|
|
474
|
+
|
|
475
|
+
loss = loss_new
|
|
476
|
+
i += 1
|
|
477
|
+
|
|
478
|
+
self.coef_ = w
|
|
479
|
+
self._baseline_model.fit(np.dot(X, self.coef_), event, time)
|
|
480
|
+
return self
|
|
481
|
+
|
|
482
|
+
def predict(self, X):
|
|
483
|
+
"""Predict risk scores.
|
|
484
|
+
|
|
485
|
+
Parameters
|
|
486
|
+
----------
|
|
487
|
+
X : array-like, shape = (n_samples, n_features)
|
|
488
|
+
Data matrix.
|
|
489
|
+
|
|
490
|
+
Returns
|
|
491
|
+
-------
|
|
492
|
+
risk_score : array, shape = (n_samples,)
|
|
493
|
+
Predicted risk scores.
|
|
494
|
+
"""
|
|
495
|
+
check_is_fitted(self, "coef_")
|
|
496
|
+
|
|
497
|
+
X = self._validate_data(X, reset=False)
|
|
498
|
+
|
|
499
|
+
return np.dot(X, self.coef_)
|
|
500
|
+
|
|
501
|
+
def predict_cumulative_hazard_function(self, X, return_array=False):
|
|
502
|
+
"""Predict cumulative hazard function.
|
|
503
|
+
|
|
504
|
+
The cumulative hazard function for an individual
|
|
505
|
+
with feature vector :math:`x` is defined as
|
|
506
|
+
|
|
507
|
+
.. math::
|
|
508
|
+
|
|
509
|
+
H(t \\mid x) = \\exp(x^\\top \\beta) H_0(t) ,
|
|
510
|
+
|
|
511
|
+
where :math:`H_0(t)` is the baseline hazard function,
|
|
512
|
+
estimated by Breslow's estimator.
|
|
513
|
+
|
|
514
|
+
Parameters
|
|
515
|
+
----------
|
|
516
|
+
X : array-like, shape = (n_samples, n_features)
|
|
517
|
+
Data matrix.
|
|
518
|
+
|
|
519
|
+
return_array : boolean
|
|
520
|
+
If set, return an array with the cumulative hazard rate
|
|
521
|
+
for each `self.unique_times_`, otherwise an array of
|
|
522
|
+
:class:`sksurv.functions.StepFunction`.
|
|
523
|
+
|
|
524
|
+
Returns
|
|
525
|
+
-------
|
|
526
|
+
cum_hazard : ndarray
|
|
527
|
+
If `return_array` is set, an array with the cumulative hazard rate
|
|
528
|
+
for each `self.unique_times_`, otherwise an array of length `n_samples`
|
|
529
|
+
of :class:`sksurv.functions.StepFunction` instances will be returned.
|
|
530
|
+
|
|
531
|
+
Examples
|
|
532
|
+
--------
|
|
533
|
+
>>> import matplotlib.pyplot as plt
|
|
534
|
+
>>> from sksurv.datasets import load_whas500
|
|
535
|
+
>>> from sksurv.linear_model import CoxPHSurvivalAnalysis
|
|
536
|
+
|
|
537
|
+
Load the data.
|
|
538
|
+
|
|
539
|
+
>>> X, y = load_whas500()
|
|
540
|
+
>>> X = X.astype(float)
|
|
541
|
+
|
|
542
|
+
Fit the model.
|
|
543
|
+
|
|
544
|
+
>>> estimator = CoxPHSurvivalAnalysis().fit(X, y)
|
|
545
|
+
|
|
546
|
+
Estimate the cumulative hazard function for the first 10 samples.
|
|
547
|
+
|
|
548
|
+
>>> chf_funcs = estimator.predict_cumulative_hazard_function(X.iloc[:10])
|
|
549
|
+
|
|
550
|
+
Plot the estimated cumulative hazard functions.
|
|
551
|
+
|
|
552
|
+
>>> for fn in chf_funcs:
|
|
553
|
+
... plt.step(fn.x, fn(fn.x), where="post")
|
|
554
|
+
...
|
|
555
|
+
>>> plt.ylim(0, 1)
|
|
556
|
+
>>> plt.show()
|
|
557
|
+
"""
|
|
558
|
+
return self._predict_cumulative_hazard_function(self._baseline_model, self.predict(X), return_array)
|
|
559
|
+
|
|
560
|
+
def predict_survival_function(self, X, return_array=False):
|
|
561
|
+
"""Predict survival function.
|
|
562
|
+
|
|
563
|
+
The survival function for an individual
|
|
564
|
+
with feature vector :math:`x` is defined as
|
|
565
|
+
|
|
566
|
+
.. math::
|
|
567
|
+
|
|
568
|
+
S(t \\mid x) = S_0(t)^{\\exp(x^\\top \\beta)} ,
|
|
569
|
+
|
|
570
|
+
where :math:`S_0(t)` is the baseline survival function,
|
|
571
|
+
estimated by Breslow's estimator.
|
|
572
|
+
|
|
573
|
+
Parameters
|
|
574
|
+
----------
|
|
575
|
+
X : array-like, shape = (n_samples, n_features)
|
|
576
|
+
Data matrix.
|
|
577
|
+
|
|
578
|
+
return_array : boolean, default: False
|
|
579
|
+
If set, return an array with the probability
|
|
580
|
+
of survival for each `self.unique_times_`,
|
|
581
|
+
otherwise an array of :class:`sksurv.functions.StepFunction`.
|
|
582
|
+
|
|
583
|
+
Returns
|
|
584
|
+
-------
|
|
585
|
+
survival : ndarray
|
|
586
|
+
If `return_array` is set, an array with the probability of
|
|
587
|
+
survival for each `self.unique_times_`, otherwise an array of
|
|
588
|
+
length `n_samples` of :class:`sksurv.functions.StepFunction`
|
|
589
|
+
instances will be returned.
|
|
590
|
+
|
|
591
|
+
Examples
|
|
592
|
+
--------
|
|
593
|
+
>>> import matplotlib.pyplot as plt
|
|
594
|
+
>>> from sksurv.datasets import load_whas500
|
|
595
|
+
>>> from sksurv.linear_model import CoxPHSurvivalAnalysis
|
|
596
|
+
|
|
597
|
+
Load the data.
|
|
598
|
+
|
|
599
|
+
>>> X, y = load_whas500()
|
|
600
|
+
>>> X = X.astype(float)
|
|
601
|
+
|
|
602
|
+
Fit the model.
|
|
603
|
+
|
|
604
|
+
>>> estimator = CoxPHSurvivalAnalysis().fit(X, y)
|
|
605
|
+
|
|
606
|
+
Estimate the survival function for the first 10 samples.
|
|
607
|
+
|
|
608
|
+
>>> surv_funcs = estimator.predict_survival_function(X.iloc[:10])
|
|
609
|
+
|
|
610
|
+
Plot the estimated survival functions.
|
|
611
|
+
|
|
612
|
+
>>> for fn in surv_funcs:
|
|
613
|
+
... plt.step(fn.x, fn(fn.x), where="post")
|
|
614
|
+
...
|
|
615
|
+
>>> plt.ylim(0, 1)
|
|
616
|
+
>>> plt.show()
|
|
617
|
+
"""
|
|
618
|
+
return self._predict_survival_function(self._baseline_model, self.predict(X), return_array)
|
sksurv/meta/__init__.py
ADDED
sksurv/meta/base.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# This program is free software: you can redistribute it and/or modify
|
|
2
|
+
# it under the terms of the GNU General Public License as published by
|
|
3
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
4
|
+
# (at your option) any later version.
|
|
5
|
+
#
|
|
6
|
+
# This program is distributed in the hope that it will be useful,
|
|
7
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
8
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
9
|
+
# GNU General Public License for more details.
|
|
10
|
+
#
|
|
11
|
+
# You should have received a copy of the GNU General Public License
|
|
12
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
13
|
+
import numbers
|
|
14
|
+
|
|
15
|
+
from sklearn.utils.metaestimators import _safe_split
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _fit_and_score(est, x, y, scorer, train_index, test_index, parameters, fit_params, predict_params):
|
|
19
|
+
"""Train survival model on given data and return its score on test data"""
|
|
20
|
+
X_train, y_train = _safe_split(est, x, y, train_index)
|
|
21
|
+
train_params = fit_params.copy()
|
|
22
|
+
|
|
23
|
+
# Training
|
|
24
|
+
est.set_params(**parameters)
|
|
25
|
+
est.fit(X_train, y_train, **train_params)
|
|
26
|
+
|
|
27
|
+
# Testing
|
|
28
|
+
test_predict_params = predict_params.copy()
|
|
29
|
+
X_test, y_test = _safe_split(est, x, y, test_index, train_index)
|
|
30
|
+
|
|
31
|
+
score = scorer(est, X_test, y_test, **test_predict_params)
|
|
32
|
+
if not isinstance(score, numbers.Number):
|
|
33
|
+
raise ValueError(f"scoring must return a number, got {score!s} ({type(score)}) instead.")
|
|
34
|
+
|
|
35
|
+
return score
|