lifelines 0.27.7__py3-none-any.whl → 0.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lifelines/datasets/__init__.py +2 -2
- lifelines/exceptions.py +4 -0
- lifelines/fitters/__init__.py +33 -20
- lifelines/fitters/aalen_johansen_fitter.py +44 -0
- lifelines/fitters/breslow_fleming_harrington_fitter.py +9 -1
- lifelines/fitters/cox_time_varying_fitter.py +15 -10
- lifelines/fitters/coxph_fitter.py +17 -13
- lifelines/fitters/generalized_gamma_fitter.py +6 -5
- lifelines/fitters/kaplan_meier_fitter.py +9 -3
- lifelines/fitters/mixins.py +8 -3
- lifelines/fitters/nelson_aalen_fitter.py +2 -2
- lifelines/plotting.py +163 -91
- lifelines/utils/__init__.py +5 -7
- lifelines/version.py +1 -1
- {lifelines-0.27.7.dist-info → lifelines-0.28.0.dist-info}/METADATA +9 -11
- {lifelines-0.27.7.dist-info → lifelines-0.28.0.dist-info}/RECORD +19 -22
- {lifelines-0.27.7.dist-info → lifelines-0.28.0.dist-info}/WHEEL +1 -1
- lifelines/datasets/ACTG175.csv +0 -2140
- lifelines/metrics.py +0 -60
- lifelines/utils/sklearn_adapter.py +0 -135
- {lifelines-0.27.7.dist-info → lifelines-0.28.0.dist-info}/LICENSE +0 -0
- {lifelines-0.27.7.dist-info → lifelines-0.28.0.dist-info}/top_level.txt +0 -0
lifelines/datasets/__init__.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
import pandas as pd
|
|
3
|
-
from
|
|
3
|
+
from importlib import resources
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
def _load_dataset(filename, **kwargs):
|
|
@@ -18,7 +18,7 @@ def _load_dataset(filename, **kwargs):
|
|
|
18
18
|
-------
|
|
19
19
|
output: DataFrame
|
|
20
20
|
"""
|
|
21
|
-
return pd.read_csv(
|
|
21
|
+
return pd.read_csv(resources.files("lifelines") / "datasets" / filename, engine="python", **kwargs)
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
def load_recur(**kwargs):
|
lifelines/exceptions.py
CHANGED
lifelines/fitters/__init__.py
CHANGED
|
@@ -70,6 +70,10 @@ class BaseFitter:
|
|
|
70
70
|
s = """<lifelines.%s>""" % classname
|
|
71
71
|
return s
|
|
72
72
|
|
|
73
|
+
@property
|
|
74
|
+
def label(self):
|
|
75
|
+
return self._label
|
|
76
|
+
|
|
73
77
|
@utils.CensoringType.right_censoring
|
|
74
78
|
def fit(*args, **kwargs):
|
|
75
79
|
raise NotImplementedError()
|
|
@@ -135,6 +139,10 @@ class UnivariateFitter(BaseFitter):
|
|
|
135
139
|
"The `plot` function is deprecated, and will be removed in future versions. Use `plot_%s`" % self._estimate_name,
|
|
136
140
|
DeprecationWarning,
|
|
137
141
|
)
|
|
142
|
+
# Fix the confidence interval plot bug from Aalen-Johansen
|
|
143
|
+
# when calculate_variance is False.
|
|
144
|
+
if getattr(self, "_calc_var", None) is False:
|
|
145
|
+
kwargs["ci_show"] = False
|
|
138
146
|
return _plot_estimate(self, estimate=self._estimate_name, **kwargs)
|
|
139
147
|
|
|
140
148
|
def subtract(self, other) -> pd.DataFrame:
|
|
@@ -213,10 +221,10 @@ class UnivariateFitter(BaseFitter):
|
|
|
213
221
|
might be 9 years.
|
|
214
222
|
"""
|
|
215
223
|
age = self.survival_function_.index.values[:, None]
|
|
216
|
-
columns = ["%s - Conditional median duration remaining to event" % self.
|
|
224
|
+
columns = ["%s - Conditional median duration remaining to event" % self.label]
|
|
217
225
|
return (
|
|
218
226
|
pd.DataFrame(
|
|
219
|
-
utils.qth_survival_times(self.survival_function_[self.
|
|
227
|
+
utils.qth_survival_times(self.survival_function_[self.label] * 0.5, self.survival_function_)
|
|
220
228
|
.sort_index(ascending=False)
|
|
221
229
|
.values,
|
|
222
230
|
index=self.survival_function_.index,
|
|
@@ -454,7 +462,7 @@ class ParametricUnivariateFitter(UnivariateFitter):
|
|
|
454
462
|
return -ll / weights.sum()
|
|
455
463
|
|
|
456
464
|
def _compute_confidence_bounds_of_cumulative_hazard(self, alpha, ci_labels) -> pd.DataFrame:
|
|
457
|
-
return self._compute_confidence_bounds_of_transform(self._cumulative_hazard, alpha, ci_labels)
|
|
465
|
+
return self._compute_confidence_bounds_of_transform(self._cumulative_hazard, alpha, ci_labels, self.timeline)
|
|
458
466
|
|
|
459
467
|
def _compute_variance_of_transform(self, transform, timeline=None):
|
|
460
468
|
"""
|
|
@@ -487,7 +495,9 @@ class ParametricUnivariateFitter(UnivariateFitter):
|
|
|
487
495
|
np.einsum("nj,jk,nk->n", gradient_at_times.T, self.variance_matrix_, gradient_at_times.T), index=timeline
|
|
488
496
|
)
|
|
489
497
|
|
|
490
|
-
def _compute_confidence_bounds_of_transform(
|
|
498
|
+
def _compute_confidence_bounds_of_transform(
|
|
499
|
+
self, transform, alpha: float, ci_labels: tuple[str, str], timeline
|
|
500
|
+
) -> pd.DataFrame:
|
|
491
501
|
"""
|
|
492
502
|
This computes the confidence intervals of a transform of the parameters. Ex: take
|
|
493
503
|
the fitted parameters, a function/transform and the variance matrix and give me
|
|
@@ -503,20 +513,21 @@ class ParametricUnivariateFitter(UnivariateFitter):
|
|
|
503
513
|
alpha: float
|
|
504
514
|
confidence level
|
|
505
515
|
ci_labels: tuple
|
|
516
|
+
timeline: iterable
|
|
506
517
|
|
|
507
518
|
"""
|
|
508
519
|
alpha2 = 1 - alpha / 2.0
|
|
509
520
|
z = utils.inv_normal_cdf(alpha2)
|
|
510
|
-
df = pd.DataFrame(index=
|
|
521
|
+
df = pd.DataFrame(index=timeline)
|
|
511
522
|
|
|
512
523
|
std_of_transform = np.sqrt(self._compute_variance_of_transform(transform))
|
|
513
524
|
|
|
514
525
|
if ci_labels is None:
|
|
515
|
-
ci_labels = ["%s_lower_%g" % (self.
|
|
526
|
+
ci_labels = ["%s_lower_%g" % (self.label, 1 - alpha), "%s_upper_%g" % (self.label, 1 - alpha)]
|
|
516
527
|
assert len(ci_labels) == 2, "ci_labels should be a length 2 array."
|
|
517
528
|
|
|
518
|
-
df[ci_labels[0]] = transform(self._fitted_parameters_,
|
|
519
|
-
df[ci_labels[1]] = transform(self._fitted_parameters_,
|
|
529
|
+
df[ci_labels[0]] = transform(self._fitted_parameters_, timeline) - z * std_of_transform
|
|
530
|
+
df[ci_labels[1]] = transform(self._fitted_parameters_, timeline) + z * std_of_transform
|
|
520
531
|
return df
|
|
521
532
|
|
|
522
533
|
def _create_initial_point(self, *args) -> np.ndarray:
|
|
@@ -539,7 +550,7 @@ class ParametricUnivariateFitter(UnivariateFitter):
|
|
|
539
550
|
minimizing_results, previous_results, minimizing_ll = None, None, np.inf
|
|
540
551
|
for method, option in zip(
|
|
541
552
|
["Nelder-Mead", self._scipy_fit_method],
|
|
542
|
-
[{"maxiter":
|
|
553
|
+
[{"maxiter": 400}, {**{"disp": show_progress}, **self._scipy_fit_options, **fit_options}],
|
|
543
554
|
):
|
|
544
555
|
|
|
545
556
|
initial_value = self._initial_values if previous_results is None else utils._to_1d_array(previous_results.x)
|
|
@@ -1054,7 +1065,7 @@ class ParametricUnivariateFitter(UnivariateFitter):
|
|
|
1054
1065
|
Rename the series returned. Useful for plotting.
|
|
1055
1066
|
|
|
1056
1067
|
"""
|
|
1057
|
-
label = utils.coalesce(label, self.
|
|
1068
|
+
label = utils.coalesce(label, self.label)
|
|
1058
1069
|
return pd.Series(self._survival_function(self._fitted_parameters_, times), index=utils._to_1d_array(times), name=label)
|
|
1059
1070
|
|
|
1060
1071
|
def cumulative_density_at_times(self, times, label: t.Optional[str] = None) -> pd.Series:
|
|
@@ -1069,7 +1080,7 @@ class ParametricUnivariateFitter(UnivariateFitter):
|
|
|
1069
1080
|
Rename the series returned. Useful for plotting.
|
|
1070
1081
|
|
|
1071
1082
|
"""
|
|
1072
|
-
label = utils.coalesce(label, self.
|
|
1083
|
+
label = utils.coalesce(label, self.label)
|
|
1073
1084
|
return pd.Series(self._cumulative_density(self._fitted_parameters_, times), index=utils._to_1d_array(times), name=label)
|
|
1074
1085
|
|
|
1075
1086
|
def density_at_times(self, times, label=None) -> pd.Series:
|
|
@@ -1084,7 +1095,7 @@ class ParametricUnivariateFitter(UnivariateFitter):
|
|
|
1084
1095
|
Rename the series returned. Useful for plotting.
|
|
1085
1096
|
|
|
1086
1097
|
"""
|
|
1087
|
-
label = utils.coalesce(label, self.
|
|
1098
|
+
label = utils.coalesce(label, self.label)
|
|
1088
1099
|
return pd.Series(self._density(self._fitted_parameters_, times), index=utils._to_1d_array(times), name=label)
|
|
1089
1100
|
|
|
1090
1101
|
def cumulative_hazard_at_times(self, times, label: t.Optional[str] = None) -> pd.Series:
|
|
@@ -1098,7 +1109,7 @@ class ParametricUnivariateFitter(UnivariateFitter):
|
|
|
1098
1109
|
label: string, optional
|
|
1099
1110
|
Rename the series returned. Useful for plotting.
|
|
1100
1111
|
"""
|
|
1101
|
-
label = utils.coalesce(label, self.
|
|
1112
|
+
label = utils.coalesce(label, self.label)
|
|
1102
1113
|
return pd.Series(self._cumulative_hazard(self._fitted_parameters_, times), index=utils._to_1d_array(times), name=label)
|
|
1103
1114
|
|
|
1104
1115
|
def hazard_at_times(self, times, label: t.Optional[str] = None) -> pd.Series:
|
|
@@ -1113,7 +1124,7 @@ class ParametricUnivariateFitter(UnivariateFitter):
|
|
|
1113
1124
|
Rename the series returned. Useful for plotting.
|
|
1114
1125
|
|
|
1115
1126
|
"""
|
|
1116
|
-
label = utils.coalesce(label, self.
|
|
1127
|
+
label = utils.coalesce(label, self.label)
|
|
1117
1128
|
return pd.Series(self._hazard(self._fitted_parameters_, times), index=utils._to_1d_array(times), name=label)
|
|
1118
1129
|
|
|
1119
1130
|
@property
|
|
@@ -1135,28 +1146,28 @@ class ParametricUnivariateFitter(UnivariateFitter):
|
|
|
1135
1146
|
"""
|
|
1136
1147
|
The confidence interval of the hazard.
|
|
1137
1148
|
"""
|
|
1138
|
-
return self._compute_confidence_bounds_of_transform(self._hazard, self.alpha, self._ci_labels)
|
|
1149
|
+
return self._compute_confidence_bounds_of_transform(self._hazard, self.alpha, self._ci_labels, self.timeline)
|
|
1139
1150
|
|
|
1140
1151
|
@property
|
|
1141
1152
|
def confidence_interval_density_(self) -> pd.DataFrame:
|
|
1142
1153
|
"""
|
|
1143
1154
|
The confidence interval of the hazard.
|
|
1144
1155
|
"""
|
|
1145
|
-
return self._compute_confidence_bounds_of_transform(self._density, self.alpha, self._ci_labels)
|
|
1156
|
+
return self._compute_confidence_bounds_of_transform(self._density, self.alpha, self._ci_labels, self.timeline)
|
|
1146
1157
|
|
|
1147
1158
|
@property
|
|
1148
1159
|
def confidence_interval_survival_function_(self) -> pd.DataFrame:
|
|
1149
1160
|
"""
|
|
1150
1161
|
The lower and upper confidence intervals for the survival function
|
|
1151
1162
|
"""
|
|
1152
|
-
return self._compute_confidence_bounds_of_transform(self._survival_function, self.alpha, self._ci_labels)
|
|
1163
|
+
return self._compute_confidence_bounds_of_transform(self._survival_function, self.alpha, self._ci_labels, self.timeline)
|
|
1153
1164
|
|
|
1154
1165
|
@property
|
|
1155
1166
|
def confidence_interval_cumulative_density_(self) -> pd.DataFrame:
|
|
1156
1167
|
"""
|
|
1157
1168
|
The lower and upper confidence intervals for the cumulative density
|
|
1158
1169
|
"""
|
|
1159
|
-
return self._compute_confidence_bounds_of_transform(self._cumulative_density, self.alpha, self._ci_labels)
|
|
1170
|
+
return self._compute_confidence_bounds_of_transform(self._cumulative_density, self.alpha, self._ci_labels, self.timeline)
|
|
1160
1171
|
|
|
1161
1172
|
def plot(self, **kwargs):
|
|
1162
1173
|
"""
|
|
@@ -1203,7 +1214,7 @@ class ParametricUnivariateFitter(UnivariateFitter):
|
|
|
1203
1214
|
|
|
1204
1215
|
"""
|
|
1205
1216
|
age = self.timeline
|
|
1206
|
-
columns = ["%s - Conditional median duration remaining to event" % self.
|
|
1217
|
+
columns = ["%s - Conditional median duration remaining to event" % self.label]
|
|
1207
1218
|
|
|
1208
1219
|
return pd.DataFrame(self.percentile(0.5 * self.survival_function_.values) - age[:, None], index=age, columns=columns)
|
|
1209
1220
|
|
|
@@ -1398,7 +1409,7 @@ class ParametricRegressionFitter(RegressionFitter):
|
|
|
1398
1409
|
def _survival_function(self, params, T, Xs):
|
|
1399
1410
|
return anp.clip(anp.exp(-self._cumulative_hazard(params, T, Xs)), 1e-12, 1 - 1e-12)
|
|
1400
1411
|
|
|
1401
|
-
def _log_likelihood_right_censoring(self, params, Ts, E, W, entries, Xs) -> float:
|
|
1412
|
+
def _log_likelihood_right_censoring(self, params, Ts: tuple, E, W, entries, Xs) -> float:
|
|
1402
1413
|
|
|
1403
1414
|
T = Ts[0]
|
|
1404
1415
|
non_zero_entries = entries > 0
|
|
@@ -3354,6 +3365,8 @@ class ParametericAFTRegressionFitter(ParametricRegressionFitter):
|
|
|
3354
3365
|
also display the baseline survival, defined as the survival at the mean of the original dataset.
|
|
3355
3366
|
times: iterable
|
|
3356
3367
|
pass in a times to plot
|
|
3368
|
+
y: str
|
|
3369
|
+
one of "survival_function", "hazard", "cumulative_hazard". Default "survival_function"
|
|
3357
3370
|
kwargs:
|
|
3358
3371
|
pass in additional plotting commands
|
|
3359
3372
|
|
|
@@ -7,6 +7,7 @@ import warnings
|
|
|
7
7
|
from lifelines.fitters import NonParametricUnivariateFitter
|
|
8
8
|
from lifelines.utils import _preprocess_inputs, inv_normal_cdf, CensoringType, coalesce
|
|
9
9
|
from lifelines import KaplanMeierFitter
|
|
10
|
+
from lifelines.plotting import _plot_estimate
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
class AalenJohansenFitter(NonParametricUnivariateFitter):
|
|
@@ -261,3 +262,46 @@ class AalenJohansenFitter(NonParametricUnivariateFitter):
|
|
|
261
262
|
|
|
262
263
|
# Detect duplicated times with different event types
|
|
263
264
|
return (dup_times & (~dup_events)).any()
|
|
265
|
+
|
|
266
|
+
def plot_cumulative_density(self, **kwargs):
|
|
267
|
+
"""Plots a pretty figure of the model
|
|
268
|
+
|
|
269
|
+
Matplotlib plot arguments can be passed in inside the kwargs.
|
|
270
|
+
|
|
271
|
+
Parameters
|
|
272
|
+
-----------
|
|
273
|
+
show_censors: bool
|
|
274
|
+
place markers at censorship events. Default: False
|
|
275
|
+
censor_styles: dict
|
|
276
|
+
If show_censors, this dictionary will be passed into the plot call.
|
|
277
|
+
ci_alpha: float
|
|
278
|
+
the transparency level of the confidence interval. Default: 0.3
|
|
279
|
+
ci_force_lines: bool
|
|
280
|
+
force the confidence intervals to be line plots (versus default shaded areas). Default: False
|
|
281
|
+
ci_show: bool
|
|
282
|
+
show confidence intervals. Default: True
|
|
283
|
+
ci_legend: bool
|
|
284
|
+
if ci_force_lines is True, this is a boolean flag to add the lines' labels to the legend. Default: False
|
|
285
|
+
at_risk_counts: bool
|
|
286
|
+
show group sizes at time points. See function ``add_at_risk_counts`` for details. Default: False
|
|
287
|
+
loc: slice
|
|
288
|
+
specify a time-based subsection of the curves to plot, ex:
|
|
289
|
+
|
|
290
|
+
>>> model.plot(loc=slice(0.,10.))
|
|
291
|
+
|
|
292
|
+
will plot the time values between t=0. and t=10.
|
|
293
|
+
iloc: slice
|
|
294
|
+
specify a location-based subsection of the curves to plot, ex:
|
|
295
|
+
|
|
296
|
+
>>> model.plot(iloc=slice(0,10))
|
|
297
|
+
|
|
298
|
+
will plot the first 10 time points.
|
|
299
|
+
|
|
300
|
+
Returns
|
|
301
|
+
-------
|
|
302
|
+
ax:
|
|
303
|
+
a pyplot axis object
|
|
304
|
+
"""
|
|
305
|
+
if not self._calc_var:
|
|
306
|
+
kwargs["ci_show"] = False
|
|
307
|
+
_plot_estimate(self, estimate=self._estimate_name, **kwargs)
|
|
@@ -72,7 +72,14 @@ class BreslowFlemingHarringtonFitter(NonParametricUnivariateFitter):
|
|
|
72
72
|
alpha = coalesce(alpha, self.alpha)
|
|
73
73
|
|
|
74
74
|
naf = NelsonAalenFitter(alpha=alpha)
|
|
75
|
-
naf.fit(
|
|
75
|
+
naf.fit(
|
|
76
|
+
durations,
|
|
77
|
+
event_observed=event_observed,
|
|
78
|
+
timeline=timeline,
|
|
79
|
+
label=self._label,
|
|
80
|
+
entry=entry,
|
|
81
|
+
ci_labels=ci_labels,
|
|
82
|
+
)
|
|
76
83
|
self.durations, self.event_observed, self.timeline, self.entry, self.event_table, self.weights = (
|
|
77
84
|
naf.durations,
|
|
78
85
|
naf.event_observed,
|
|
@@ -87,6 +94,7 @@ class BreslowFlemingHarringtonFitter(NonParametricUnivariateFitter):
|
|
|
87
94
|
self.confidence_interval_ = np.exp(-naf.confidence_interval_)
|
|
88
95
|
self.confidence_interval_survival_function_ = self.confidence_interval_
|
|
89
96
|
self.confidence_interval_cumulative_density = 1 - self.confidence_interval_
|
|
97
|
+
self.confidence_interval_cumulative_density[:] = np.fliplr(self.confidence_interval_cumulative_density.values)
|
|
90
98
|
|
|
91
99
|
# estimation methods
|
|
92
100
|
self._estimation_method = "survival_function_"
|
|
@@ -153,6 +153,7 @@ class CoxTimeVaryingFitter(SemiParametricRegressionFitter, ProportionalHazardMix
|
|
|
153
153
|
Override the default values in NR algorithm:
|
|
154
154
|
step_size: 0.95,
|
|
155
155
|
precision: 1e-07,
|
|
156
|
+
r_precision=1e-9,
|
|
156
157
|
max_steps: 500,
|
|
157
158
|
|
|
158
159
|
Returns
|
|
@@ -328,12 +329,13 @@ class CoxTimeVaryingFitter(SemiParametricRegressionFitter, ProportionalHazardMix
|
|
|
328
329
|
weights,
|
|
329
330
|
show_progress=False,
|
|
330
331
|
step_size=0.95,
|
|
331
|
-
precision=
|
|
332
|
+
precision=1e-8,
|
|
333
|
+
r_precision=1e-9,
|
|
332
334
|
max_steps=50,
|
|
333
335
|
initial_point=None,
|
|
334
336
|
): # pylint: disable=too-many-arguments,too-many-locals,too-many-branches,too-many-statements
|
|
335
337
|
"""
|
|
336
|
-
Newton
|
|
338
|
+
Newton Raphson algorithm for fitting CPH model.
|
|
337
339
|
|
|
338
340
|
Parameters
|
|
339
341
|
----------
|
|
@@ -345,8 +347,11 @@ class CoxTimeVaryingFitter(SemiParametricRegressionFitter, ProportionalHazardMix
|
|
|
345
347
|
step_size: float
|
|
346
348
|
> 0 to determine a starting step size in NR algorithm.
|
|
347
349
|
precision: float
|
|
348
|
-
the
|
|
349
|
-
|
|
350
|
+
the algorithm stops if the norm of delta between
|
|
351
|
+
successive positions is less than ``precision``.
|
|
352
|
+
r_precision: float, optional
|
|
353
|
+
the algorithms stops if the relative decrease in log-likelihood
|
|
354
|
+
between successive iterations goes below ``r_precision``.
|
|
350
355
|
|
|
351
356
|
Returns
|
|
352
357
|
--------
|
|
@@ -443,17 +448,17 @@ https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergen
|
|
|
443
448
|
|
|
444
449
|
if show_progress:
|
|
445
450
|
print(
|
|
446
|
-
"\rIteration %d: norm_delta = %.
|
|
451
|
+
"\rIteration %d: norm_delta = %.2e, step_size = %.4f, log_lik = %.5f, newton_decrement = %.2e, seconds_since_start = %.1f"
|
|
447
452
|
% (i, norm_delta, step_size, ll, newton_decrement, time.time() - start_time)
|
|
448
453
|
)
|
|
449
454
|
|
|
450
455
|
# convergence criteria
|
|
451
456
|
if norm_delta < precision:
|
|
452
457
|
converging, completed = False, True
|
|
453
|
-
elif previous_ll > 0 and abs(ll - previous_ll) / (-previous_ll) <
|
|
454
|
-
# this is what R uses by default
|
|
458
|
+
elif previous_ll > 0 and abs(ll - previous_ll) / (-previous_ll) < r_precision:
|
|
459
|
+
# this is what R uses by default with r_precision=1e-9
|
|
455
460
|
converging, completed = False, True
|
|
456
|
-
elif newton_decrement <
|
|
461
|
+
elif newton_decrement < precision:
|
|
457
462
|
converging, completed = False, True
|
|
458
463
|
elif i >= max_steps:
|
|
459
464
|
# 50 iterations steps with N-R is a lot.
|
|
@@ -481,12 +486,12 @@ See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-sep
|
|
|
481
486
|
# report to the user problems that we detect.
|
|
482
487
|
if completed and norm_delta > 0.1:
|
|
483
488
|
warnings.warn(
|
|
484
|
-
"Newton-
|
|
489
|
+
"Newton-Raphson convergence completed but norm(delta) is still high, %.3f. This may imply non-unique solutions to the maximum likelihood. Perhaps there is colinearity or complete separation in the dataset?"
|
|
485
490
|
% norm_delta,
|
|
486
491
|
ConvergenceWarning,
|
|
487
492
|
)
|
|
488
493
|
elif not completed:
|
|
489
|
-
warnings.warn("Newton-
|
|
494
|
+
warnings.warn("Newton-Raphson failed to converge sufficiently in %d steps." % max_steps, ConvergenceWarning)
|
|
490
495
|
|
|
491
496
|
return beta, ll, hessian
|
|
492
497
|
|
|
@@ -80,7 +80,7 @@ class CoxPHFitter(RegressionFitter, ProportionalHazardMixin):
|
|
|
80
80
|
When ``baseline_estimation_method="spline"``, this allows customizing the points in the time axis for the baseline hazard curve.
|
|
81
81
|
To use evenly-spaced points in time, the ``n_baseline_knots`` parameter can be employed instead.
|
|
82
82
|
|
|
83
|
-
breakpoints:
|
|
83
|
+
breakpoints: list, optional
|
|
84
84
|
Used when ``baseline_estimation_method="piecewise"``. Set the positions of the baseline hazard breakpoints.
|
|
85
85
|
|
|
86
86
|
Examples
|
|
@@ -242,7 +242,7 @@ class CoxPHFitter(RegressionFitter, ProportionalHazardMixin):
|
|
|
242
242
|
algorithm. Default is the zero vector.
|
|
243
243
|
|
|
244
244
|
fit_options: dict, optional
|
|
245
|
-
pass kwargs for the fitting algorithm. For semi-parametric models, this is the Newton-
|
|
245
|
+
pass kwargs for the fitting algorithm. For semi-parametric models, this is the Newton-Raphson method (see method _newton_raphson_for_efron_model for kwargs)
|
|
246
246
|
|
|
247
247
|
Returns
|
|
248
248
|
-------
|
|
@@ -1430,10 +1430,11 @@ estimate the variances. See paper "Variance estimation when using inverse probab
|
|
|
1430
1430
|
show_progress: bool = True,
|
|
1431
1431
|
step_size: float = 0.95,
|
|
1432
1432
|
precision: float = 1e-07,
|
|
1433
|
+
r_precision: float = 1e-9,
|
|
1433
1434
|
max_steps: int = 500,
|
|
1434
1435
|
): # pylint: disable=too-many-statements,too-many-branches
|
|
1435
1436
|
"""
|
|
1436
|
-
Newton
|
|
1437
|
+
Newton Raphson algorithm for fitting CPH model.
|
|
1437
1438
|
|
|
1438
1439
|
Note
|
|
1439
1440
|
----
|
|
@@ -1450,13 +1451,15 @@ estimate the variances. See paper "Variance estimation when using inverse probab
|
|
|
1450
1451
|
step_size: float, optional
|
|
1451
1452
|
> 0.001 to determine a starting step size in NR algorithm.
|
|
1452
1453
|
precision: float, optional
|
|
1453
|
-
the
|
|
1454
|
-
successive positions is less than
|
|
1454
|
+
the algorithm stops if the norm of delta between
|
|
1455
|
+
successive positions is less than ``precision``.
|
|
1456
|
+
r_precision: float, optional
|
|
1457
|
+
the algorithms stops if the relative decrease in log-likelihood
|
|
1458
|
+
between successive iterations goes below ``r_precision``.
|
|
1455
1459
|
show_progress: bool, optional
|
|
1456
|
-
since the fitter is iterative, show convergence
|
|
1457
|
-
diagnostics.
|
|
1460
|
+
since the fitter is iterative, show convergence diagnostics.
|
|
1458
1461
|
max_steps: int, optional
|
|
1459
|
-
the maximum number of iterations of the Newton-
|
|
1462
|
+
the maximum number of iterations of the Newton-Raphson algorithm.
|
|
1460
1463
|
|
|
1461
1464
|
Returns
|
|
1462
1465
|
-------
|
|
@@ -1564,15 +1567,15 @@ estimate the variances. See paper "Variance estimation when using inverse probab
|
|
|
1564
1567
|
|
|
1565
1568
|
if show_progress:
|
|
1566
1569
|
print(
|
|
1567
|
-
"\rIteration %d: norm_delta = %.
|
|
1570
|
+
"\rIteration %d: norm_delta = %.2e, step_size = %.4f, log_lik = %.5f, newton_decrement = %.2e, seconds_since_start = %.1f"
|
|
1568
1571
|
% (i, norm_delta, step_size, ll_, newton_decrement, time.time() - start)
|
|
1569
1572
|
)
|
|
1570
1573
|
|
|
1571
1574
|
# convergence criteria
|
|
1572
1575
|
if norm_delta < precision:
|
|
1573
1576
|
converging, success = False, True
|
|
1574
|
-
elif previous_ll_ != 0 and abs(ll_ - previous_ll_) / (-previous_ll_) <
|
|
1575
|
-
# this is what R uses by default
|
|
1577
|
+
elif previous_ll_ != 0 and abs(ll_ - previous_ll_) / (-previous_ll_) < r_precision:
|
|
1578
|
+
# this is what R uses by default, with r_precision = 1e-9
|
|
1576
1579
|
converging, success = False, True
|
|
1577
1580
|
elif newton_decrement < precision:
|
|
1578
1581
|
converging, success = False, True
|
|
@@ -1602,14 +1605,14 @@ See https://stats.stackexchange.com/q/11109/11867 for more.\n",
|
|
|
1602
1605
|
if success and norm_delta > 0.1:
|
|
1603
1606
|
self._check_values_post_fitting(X, T, E, weights)
|
|
1604
1607
|
warnings.warn(
|
|
1605
|
-
"Newton-
|
|
1608
|
+
"Newton-Raphson convergence completed successfully but norm(delta) is still high, %.3f. This may imply non-unique solutions to the maximum likelihood. Perhaps there is collinearity or complete separation in the dataset?\n"
|
|
1606
1609
|
% norm_delta,
|
|
1607
1610
|
exceptions.ConvergenceWarning,
|
|
1608
1611
|
)
|
|
1609
1612
|
elif not success:
|
|
1610
1613
|
self._check_values_post_fitting(X, T, E, weights)
|
|
1611
1614
|
warnings.warn(
|
|
1612
|
-
"Newton-
|
|
1615
|
+
"Newton-Raphson failed to converge sufficiently. {0}".format(CONVERGENCE_DOCS), exceptions.ConvergenceWarning
|
|
1613
1616
|
)
|
|
1614
1617
|
|
|
1615
1618
|
return beta, ll_, hessian
|
|
@@ -2855,6 +2858,7 @@ class ParametricCoxModelFitter(ParametricRegressionFitter, ProportionalHazardMix
|
|
|
2855
2858
|
df = df.to_frame().T.infer_objects()
|
|
2856
2859
|
|
|
2857
2860
|
df = df.copy()
|
|
2861
|
+
df.index.name = None
|
|
2858
2862
|
|
|
2859
2863
|
if self.strata is not None:
|
|
2860
2864
|
df = df.reset_index().set_index(self.strata)
|
|
@@ -105,6 +105,7 @@ class GeneralizedGammaFitter(KnownModelParametricUnivariateFitter):
|
|
|
105
105
|
"""
|
|
106
106
|
|
|
107
107
|
_scipy_fit_method = "SLSQP"
|
|
108
|
+
_scipy_fit_options = {"maxiter": 10_000, "maxfev": 10_000}
|
|
108
109
|
_fitted_parameter_names = ["mu_", "ln_sigma_", "lambda_"]
|
|
109
110
|
_bounds = [(None, None), (None, None), (None, None)]
|
|
110
111
|
_compare_to_values = np.array([0.0, 0.0, 1.0])
|
|
@@ -117,14 +118,14 @@ class GeneralizedGammaFitter(KnownModelParametricUnivariateFitter):
|
|
|
117
118
|
elif CensoringType.is_interval_censoring(self):
|
|
118
119
|
# this fails if Ts[1] == Ts[0], so we add a some fudge factors.
|
|
119
120
|
log_data = log(Ts[1] - Ts[0] + 0.1)
|
|
120
|
-
return np.array([log_data.mean(), log(log_data.std() + 0.
|
|
121
|
+
return np.array([log_data.mean() * 1.5, log(log_data.std() + 0.1), 1.0])
|
|
121
122
|
|
|
122
123
|
def _cumulative_hazard(self, params, times):
|
|
123
124
|
mu_, ln_sigma_, lambda_ = params
|
|
124
125
|
|
|
125
126
|
sigma_ = safe_exp(ln_sigma_)
|
|
126
127
|
Z = (log(times) - mu_) / sigma_
|
|
127
|
-
ilambda_2 = 1 / lambda_
|
|
128
|
+
ilambda_2 = 1 / lambda_**2
|
|
128
129
|
clipped_exp = np.clip(safe_exp(lambda_ * Z) * ilambda_2, 1e-300, 1e20)
|
|
129
130
|
|
|
130
131
|
if lambda_ > 0:
|
|
@@ -137,7 +138,7 @@ class GeneralizedGammaFitter(KnownModelParametricUnivariateFitter):
|
|
|
137
138
|
|
|
138
139
|
def _log_hazard(self, params, times):
|
|
139
140
|
mu_, ln_sigma_, lambda_ = params
|
|
140
|
-
ilambda_2 = 1 / lambda_
|
|
141
|
+
ilambda_2 = 1 / lambda_**2
|
|
141
142
|
Z = (log(times) - mu_) / safe_exp(ln_sigma_)
|
|
142
143
|
clipped_exp = np.clip(safe_exp(lambda_ * Z) * ilambda_2, 1e-300, 1e20)
|
|
143
144
|
if lambda_ > 0:
|
|
@@ -171,5 +172,5 @@ class GeneralizedGammaFitter(KnownModelParametricUnivariateFitter):
|
|
|
171
172
|
sigma_ = exp(self.ln_sigma_)
|
|
172
173
|
|
|
173
174
|
if lambda_ > 0:
|
|
174
|
-
return exp(sigma_ * log(gammainccinv(1 / lambda_
|
|
175
|
-
return exp(sigma_ * log(gammaincinv(1 / lambda_
|
|
175
|
+
return exp(sigma_ * log(gammainccinv(1 / lambda_**2, p) * lambda_**2) / lambda_) * exp(self.mu_)
|
|
176
|
+
return exp(sigma_ * log(gammaincinv(1 / lambda_**2, p) * lambda_**2) / lambda_) * exp(self.mu_)
|
|
@@ -351,9 +351,14 @@ class KaplanMeierFitter(NonParametricUnivariateFitter):
|
|
|
351
351
|
primary_estimate_name = "survival_function_"
|
|
352
352
|
secondary_estimate_name = "cumulative_density_"
|
|
353
353
|
|
|
354
|
-
(
|
|
355
|
-
durations,
|
|
356
|
-
|
|
354
|
+
(
|
|
355
|
+
self.durations,
|
|
356
|
+
self.event_observed,
|
|
357
|
+
self.timeline,
|
|
358
|
+
self.entry,
|
|
359
|
+
self.event_table,
|
|
360
|
+
self.weights,
|
|
361
|
+
) = _preprocess_inputs(durations, event_observed, timeline, entry, weights)
|
|
357
362
|
|
|
358
363
|
alpha = alpha if alpha else self.alpha
|
|
359
364
|
log_estimate, cumulative_sq_ = _additive_estimate(
|
|
@@ -386,6 +391,7 @@ class KaplanMeierFitter(NonParametricUnivariateFitter):
|
|
|
386
391
|
|
|
387
392
|
self.confidence_interval_survival_function_ = self.confidence_interval_
|
|
388
393
|
self.confidence_interval_cumulative_density_ = 1 - self.confidence_interval_
|
|
394
|
+
self.confidence_interval_cumulative_density_[:] = np.fliplr(self.confidence_interval_cumulative_density_.values)
|
|
389
395
|
self._median = median_survival_times(self.survival_function_)
|
|
390
396
|
self._cumulative_sq_ = cumulative_sq_
|
|
391
397
|
|
lifelines/fitters/mixins.py
CHANGED
|
@@ -4,6 +4,7 @@ from textwrap import dedent, fill
|
|
|
4
4
|
from autograd import numpy as anp
|
|
5
5
|
import numpy as np
|
|
6
6
|
from pandas import DataFrame, Series
|
|
7
|
+
from lifelines.exceptions import ProportionalHazardAssumptionError
|
|
7
8
|
from lifelines.statistics import proportional_hazard_test, TimeTransformers
|
|
8
9
|
from lifelines.utils import format_p_value
|
|
9
10
|
from lifelines.utils.lowess import lowess
|
|
@@ -28,6 +29,7 @@ class ProportionalHazardMixin:
|
|
|
28
29
|
p_value_threshold: float = 0.01,
|
|
29
30
|
plot_n_bootstraps: int = 15,
|
|
30
31
|
columns: Optional[List[str]] = None,
|
|
32
|
+
raise_on_fail: bool = False,
|
|
31
33
|
) -> None:
|
|
32
34
|
"""
|
|
33
35
|
Use this function to test the proportional hazards assumption. See usage example at
|
|
@@ -51,6 +53,8 @@ class ProportionalHazardMixin:
|
|
|
51
53
|
the function significantly.
|
|
52
54
|
columns: list, optional
|
|
53
55
|
specify a subset of columns to test.
|
|
56
|
+
raise_on_fail: bool, optional
|
|
57
|
+
throw a ``ProportionalHazardAssumptionError`` if the test fails. Default: False.
|
|
54
58
|
|
|
55
59
|
Returns
|
|
56
60
|
--------
|
|
@@ -107,7 +111,7 @@ class ProportionalHazardMixin:
|
|
|
107
111
|
|
|
108
112
|
for variable in self.params_.index.intersection(columns or self.params_.index):
|
|
109
113
|
minumum_observed_p_value = test_results.summary.loc[variable, "p"].min()
|
|
110
|
-
|
|
114
|
+
|
|
111
115
|
# plot is done (regardless of test result) whenever `show_plots = True`
|
|
112
116
|
if show_plots:
|
|
113
117
|
axes.append([])
|
|
@@ -224,9 +228,8 @@ class ProportionalHazardMixin:
|
|
|
224
228
|
),
|
|
225
229
|
end="\n\n",
|
|
226
230
|
)
|
|
227
|
-
#################
|
|
231
|
+
#################
|
|
228
232
|
|
|
229
|
-
|
|
230
233
|
if advice and counter > 0:
|
|
231
234
|
print(
|
|
232
235
|
dedent(
|
|
@@ -243,6 +246,8 @@ class ProportionalHazardMixin:
|
|
|
243
246
|
|
|
244
247
|
if counter == 0:
|
|
245
248
|
print("Proportional hazard assumption looks okay.")
|
|
249
|
+
elif raise_on_fail:
|
|
250
|
+
raise ProportionalHazardAssumptionError()
|
|
246
251
|
return axes
|
|
247
252
|
|
|
248
253
|
@property
|
|
@@ -183,7 +183,7 @@ class NelsonAalenFitter(UnivariateFitter):
|
|
|
183
183
|
)
|
|
184
184
|
|
|
185
185
|
def _variance_f_discrete(self, population, deaths):
|
|
186
|
-
return (
|
|
186
|
+
return (1 - deaths / population) * (deaths / population) * (1.0 / population)
|
|
187
187
|
|
|
188
188
|
def _additive_f_smooth(self, population, deaths):
|
|
189
189
|
cum_ = np.cumsum(1.0 / np.arange(1, np.max(population) + 1))
|
|
@@ -239,7 +239,7 @@ class NelsonAalenFitter(UnivariateFitter):
|
|
|
239
239
|
C = var_hazard_.values != 0.0 # only consider the points with jumps
|
|
240
240
|
std_hazard_ = np.sqrt(
|
|
241
241
|
1.0
|
|
242
|
-
/ (bandwidth
|
|
242
|
+
/ (bandwidth**2)
|
|
243
243
|
* np.dot(epanechnikov_kernel(timeline[:, None], timeline[C][None, :], bandwidth) ** 2, var_hazard_.values[C])
|
|
244
244
|
)
|
|
245
245
|
values = {
|