lifelines 0.27.8__py3-none-any.whl → 0.29.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lifelines/datasets/__init__.py +1 -1
- lifelines/exceptions.py +4 -0
- lifelines/fitters/__init__.py +10 -8
- lifelines/fitters/aalen_additive_fitter.py +2 -2
- lifelines/fitters/breslow_fleming_harrington_fitter.py +9 -1
- lifelines/fitters/cox_time_varying_fitter.py +1 -1
- lifelines/fitters/coxph_fitter.py +7 -5
- lifelines/fitters/generalized_gamma_fitter.py +6 -5
- lifelines/fitters/kaplan_meier_fitter.py +9 -3
- lifelines/fitters/mixins.py +11 -6
- lifelines/fitters/nelson_aalen_fitter.py +3 -3
- lifelines/fitters/npmle.py +1 -1
- lifelines/fitters/piecewise_exponential_regression_fitter.py +1 -1
- lifelines/generate_datasets.py +6 -6
- lifelines/utils/__init__.py +11 -13
- lifelines/version.py +1 -1
- {lifelines-0.27.8.dist-info → lifelines-0.29.0.dist-info}/METADATA +6 -8
- {lifelines-0.27.8.dist-info → lifelines-0.29.0.dist-info}/RECORD +21 -22
- {lifelines-0.27.8.dist-info → lifelines-0.29.0.dist-info}/WHEEL +1 -1
- lifelines/utils/sklearn_adapter.py +0 -135
- {lifelines-0.27.8.dist-info → lifelines-0.29.0.dist-info}/LICENSE +0 -0
- {lifelines-0.27.8.dist-info → lifelines-0.29.0.dist-info}/top_level.txt +0 -0
lifelines/datasets/__init__.py
CHANGED
lifelines/exceptions.py
CHANGED
lifelines/fitters/__init__.py
CHANGED
|
@@ -18,7 +18,7 @@ from autograd.misc import flatten
|
|
|
18
18
|
import autograd.numpy as anp
|
|
19
19
|
|
|
20
20
|
from scipy.optimize import minimize, root_scalar
|
|
21
|
-
from scipy.integrate import
|
|
21
|
+
from scipy.integrate import trapezoid
|
|
22
22
|
from scipy import stats
|
|
23
23
|
|
|
24
24
|
import pandas as pd
|
|
@@ -550,7 +550,7 @@ class ParametricUnivariateFitter(UnivariateFitter):
|
|
|
550
550
|
minimizing_results, previous_results, minimizing_ll = None, None, np.inf
|
|
551
551
|
for method, option in zip(
|
|
552
552
|
["Nelder-Mead", self._scipy_fit_method],
|
|
553
|
-
[{"maxiter":
|
|
553
|
+
[{"maxiter": 400}, {**{"disp": show_progress}, **self._scipy_fit_options, **fit_options}],
|
|
554
554
|
):
|
|
555
555
|
|
|
556
556
|
initial_value = self._initial_values if previous_results is None else utils._to_1d_array(previous_results.x)
|
|
@@ -573,7 +573,7 @@ class ParametricUnivariateFitter(UnivariateFitter):
|
|
|
573
573
|
|
|
574
574
|
# convergence successful.
|
|
575
575
|
# I still need to check for ~np.isnan(minimizing_results.x).any() since minimize will happily
|
|
576
|
-
# return nans even when criteria is
|
|
576
|
+
# return nans even when criteria is satisfied.
|
|
577
577
|
if minimizing_results and minimizing_results.success and ~np.isnan(minimizing_results.x).any():
|
|
578
578
|
sol = utils._to_1d_array(minimizing_results.x)
|
|
579
579
|
# pylint: disable=no-value-for-parameter
|
|
@@ -876,7 +876,7 @@ class ParametricUnivariateFitter(UnivariateFitter):
|
|
|
876
876
|
length n, the end of the period the subject experienced the event in. If the value is equal to the corresponding value in lower_bound, then
|
|
877
877
|
the individual's event was observed (not censored).
|
|
878
878
|
event_observed: numpy array or pd.Series, optional
|
|
879
|
-
length n, if left optional, infer from ``lower_bound`` and ``
|
|
879
|
+
length n, if left optional, infer from ``lower_bound`` and ``upper_bound`` (if lower_bound==upper_bound then event observed, if lower_bound < upper_bound, then event censored)
|
|
880
880
|
timeline: list, optional
|
|
881
881
|
return the estimate at the values in timeline (positively increasing)
|
|
882
882
|
label: string, optional
|
|
@@ -1409,7 +1409,7 @@ class ParametricRegressionFitter(RegressionFitter):
|
|
|
1409
1409
|
def _survival_function(self, params, T, Xs):
|
|
1410
1410
|
return anp.clip(anp.exp(-self._cumulative_hazard(params, T, Xs)), 1e-12, 1 - 1e-12)
|
|
1411
1411
|
|
|
1412
|
-
def _log_likelihood_right_censoring(self, params, Ts, E, W, entries, Xs) -> float:
|
|
1412
|
+
def _log_likelihood_right_censoring(self, params, Ts: tuple, E, W, entries, Xs) -> float:
|
|
1413
1413
|
|
|
1414
1414
|
T = Ts[0]
|
|
1415
1415
|
non_zero_entries = entries > 0
|
|
@@ -1925,7 +1925,7 @@ class ParametricRegressionFitter(RegressionFitter):
|
|
|
1925
1925
|
def _fit_model(
|
|
1926
1926
|
self, likelihood, Ts, Xs, E, weights, entries, fit_options, show_progress=False, user_supplied_initial_point=None
|
|
1927
1927
|
):
|
|
1928
|
-
|
|
1928
|
+
initial_points_as_arrays, unflatten_array_to_dict = self._prepare_initial_points(
|
|
1929
1929
|
user_supplied_initial_point, Ts, E, entries, weights, Xs
|
|
1930
1930
|
)
|
|
1931
1931
|
|
|
@@ -1939,7 +1939,7 @@ class ParametricRegressionFitter(RegressionFitter):
|
|
|
1939
1939
|
|
|
1940
1940
|
minimum_ll = np.inf
|
|
1941
1941
|
minimum_results = None
|
|
1942
|
-
for _initial_point in
|
|
1942
|
+
for _initial_point in initial_points_as_arrays:
|
|
1943
1943
|
|
|
1944
1944
|
if _initial_point.shape[0] != Xs.columns.size:
|
|
1945
1945
|
raise ValueError("initial_point is not the correct shape.")
|
|
@@ -2507,7 +2507,7 @@ class ParametricRegressionFitter(RegressionFitter):
|
|
|
2507
2507
|
warnings.warn("""Approximating the expected value using trapezoid rule.\n""", exceptions.ApproximationWarning)
|
|
2508
2508
|
subjects = utils._get_index(X)
|
|
2509
2509
|
v = self.predict_survival_function(X, conditional_after=conditional_after)[subjects]
|
|
2510
|
-
return pd.Series(
|
|
2510
|
+
return pd.Series(trapezoid(v.values.T, v.index), index=subjects).squeeze()
|
|
2511
2511
|
|
|
2512
2512
|
@property
|
|
2513
2513
|
def median_survival_time_(self):
|
|
@@ -3365,6 +3365,8 @@ class ParametericAFTRegressionFitter(ParametricRegressionFitter):
|
|
|
3365
3365
|
also display the baseline survival, defined as the survival at the mean of the original dataset.
|
|
3366
3366
|
times: iterable
|
|
3367
3367
|
pass in a times to plot
|
|
3368
|
+
y: str
|
|
3369
|
+
one of "survival_function", "hazard", "cumulative_hazard". Default "survival_function"
|
|
3368
3370
|
kwargs:
|
|
3369
3371
|
pass in additional plotting commands
|
|
3370
3372
|
|
|
@@ -6,7 +6,7 @@ import time
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
import pandas as pd
|
|
8
8
|
from numpy.linalg import LinAlgError
|
|
9
|
-
from scipy.integrate import
|
|
9
|
+
from scipy.integrate import trapezoid
|
|
10
10
|
|
|
11
11
|
from lifelines.fitters import RegressionFitter
|
|
12
12
|
from lifelines.utils.printer import Printer
|
|
@@ -396,7 +396,7 @@ It's important to know that the naive variance estimates of the coefficients are
|
|
|
396
396
|
"""
|
|
397
397
|
index = _get_index(X)
|
|
398
398
|
t = self._index
|
|
399
|
-
return pd.Series(
|
|
399
|
+
return pd.Series(trapezoid(self.predict_survival_function(X)[index].values.T, t), index=index)
|
|
400
400
|
|
|
401
401
|
def _compute_confidence_intervals(self):
|
|
402
402
|
ci = 100 * (1 - self.alpha)
|
|
@@ -72,7 +72,14 @@ class BreslowFlemingHarringtonFitter(NonParametricUnivariateFitter):
|
|
|
72
72
|
alpha = coalesce(alpha, self.alpha)
|
|
73
73
|
|
|
74
74
|
naf = NelsonAalenFitter(alpha=alpha)
|
|
75
|
-
naf.fit(
|
|
75
|
+
naf.fit(
|
|
76
|
+
durations,
|
|
77
|
+
event_observed=event_observed,
|
|
78
|
+
timeline=timeline,
|
|
79
|
+
label=self._label,
|
|
80
|
+
entry=entry,
|
|
81
|
+
ci_labels=ci_labels,
|
|
82
|
+
)
|
|
76
83
|
self.durations, self.event_observed, self.timeline, self.entry, self.event_table, self.weights = (
|
|
77
84
|
naf.durations,
|
|
78
85
|
naf.event_observed,
|
|
@@ -87,6 +94,7 @@ class BreslowFlemingHarringtonFitter(NonParametricUnivariateFitter):
|
|
|
87
94
|
self.confidence_interval_ = np.exp(-naf.confidence_interval_)
|
|
88
95
|
self.confidence_interval_survival_function_ = self.confidence_interval_
|
|
89
96
|
self.confidence_interval_cumulative_density = 1 - self.confidence_interval_
|
|
97
|
+
self.confidence_interval_cumulative_density[:] = np.fliplr(self.confidence_interval_cumulative_density.values)
|
|
90
98
|
|
|
91
99
|
# estimation methods
|
|
92
100
|
self._estimation_method = "survival_function_"
|
|
@@ -801,7 +801,7 @@ See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-sep
|
|
|
801
801
|
hazards = self.predict_partial_hazard(tv_data).values
|
|
802
802
|
|
|
803
803
|
unique_death_times = np.unique(stop[events.values])
|
|
804
|
-
baseline_hazard_ = pd.DataFrame(np.zeros_like(unique_death_times), index=unique_death_times, columns=["baseline hazard"])
|
|
804
|
+
baseline_hazard_ = pd.DataFrame(np.zeros_like(unique_death_times).astype(float), index=unique_death_times, columns=["baseline hazard"])
|
|
805
805
|
|
|
806
806
|
for t in unique_death_times:
|
|
807
807
|
ix = (start.values < t) & (t <= stop.values)
|
|
@@ -9,7 +9,7 @@ import time
|
|
|
9
9
|
from numpy import dot, einsum, log, exp, zeros, arange, multiply, ndarray
|
|
10
10
|
import numpy as np
|
|
11
11
|
from scipy.linalg import solve as spsolve, LinAlgError, norm, inv
|
|
12
|
-
from scipy.integrate import
|
|
12
|
+
from scipy.integrate import trapezoid
|
|
13
13
|
from scipy import stats
|
|
14
14
|
from pandas import DataFrame, Series, Index
|
|
15
15
|
import pandas as pd
|
|
@@ -80,7 +80,7 @@ class CoxPHFitter(RegressionFitter, ProportionalHazardMixin):
|
|
|
80
80
|
When ``baseline_estimation_method="spline"``, this allows customizing the points in the time axis for the baseline hazard curve.
|
|
81
81
|
To use evenly-spaced points in time, the ``n_baseline_knots`` parameter can be employed instead.
|
|
82
82
|
|
|
83
|
-
breakpoints:
|
|
83
|
+
breakpoints: list, optional
|
|
84
84
|
Used when ``baseline_estimation_method="piecewise"``. Set the positions of the baseline hazard breakpoints.
|
|
85
85
|
|
|
86
86
|
Examples
|
|
@@ -2514,7 +2514,7 @@ See https://stats.stackexchange.com/q/11109/11867 for more.\n",
|
|
|
2514
2514
|
"""
|
|
2515
2515
|
subjects = utils._get_index(X)
|
|
2516
2516
|
v = self.predict_survival_function(X, conditional_after=conditional_after)[subjects]
|
|
2517
|
-
return pd.Series(
|
|
2517
|
+
return pd.Series(trapezoid(v.values.T, v.index), index=subjects)
|
|
2518
2518
|
|
|
2519
2519
|
def _compute_baseline_hazard(self, partial_hazards: DataFrame, name: Any) -> pd.DataFrame:
|
|
2520
2520
|
# https://stats.stackexchange.com/questions/46532/cox-baseline-hazard
|
|
@@ -3223,7 +3223,7 @@ class ParametricPiecewiseBaselinePHFitter(ParametricCoxModelFitter, Proportional
|
|
|
3223
3223
|
|
|
3224
3224
|
for stratum, stratified_X in df.groupby(self.strata):
|
|
3225
3225
|
log_lambdas_ = anp.array(
|
|
3226
|
-
[0] + [self.params_[self._strata_labeler(stratum, i)][0] for i in range(2, self.n_breakpoints + 2)]
|
|
3226
|
+
[0] + [self.params_.loc[self._strata_labeler(stratum, i)].iloc[0] for i in range(2, self.n_breakpoints + 2)]
|
|
3227
3227
|
)
|
|
3228
3228
|
lambdas_ = np.exp(log_lambdas_)
|
|
3229
3229
|
|
|
@@ -3237,7 +3237,9 @@ class ParametricPiecewiseBaselinePHFitter(ParametricCoxModelFitter, Proportional
|
|
|
3237
3237
|
return cumulative_hazard
|
|
3238
3238
|
|
|
3239
3239
|
else:
|
|
3240
|
-
log_lambdas_ = np.array(
|
|
3240
|
+
log_lambdas_ = np.array(
|
|
3241
|
+
[0] + [self.params_.loc[param].iloc[0] for param in self._fitted_parameter_names if param != "beta_"]
|
|
3242
|
+
)
|
|
3241
3243
|
lambdas_ = np.exp(log_lambdas_)
|
|
3242
3244
|
|
|
3243
3245
|
Xs = self.regressors.transform_df(df)
|
|
@@ -105,6 +105,7 @@ class GeneralizedGammaFitter(KnownModelParametricUnivariateFitter):
|
|
|
105
105
|
"""
|
|
106
106
|
|
|
107
107
|
_scipy_fit_method = "SLSQP"
|
|
108
|
+
_scipy_fit_options = {"maxiter": 10_000, "maxfev": 10_000}
|
|
108
109
|
_fitted_parameter_names = ["mu_", "ln_sigma_", "lambda_"]
|
|
109
110
|
_bounds = [(None, None), (None, None), (None, None)]
|
|
110
111
|
_compare_to_values = np.array([0.0, 0.0, 1.0])
|
|
@@ -117,14 +118,14 @@ class GeneralizedGammaFitter(KnownModelParametricUnivariateFitter):
|
|
|
117
118
|
elif CensoringType.is_interval_censoring(self):
|
|
118
119
|
# this fails if Ts[1] == Ts[0], so we add a some fudge factors.
|
|
119
120
|
log_data = log(Ts[1] - Ts[0] + 0.1)
|
|
120
|
-
return np.array([log_data.mean(), log(log_data.std() + 0.
|
|
121
|
+
return np.array([log_data.mean() * 1.5, log(log_data.std() + 0.1), 1.0])
|
|
121
122
|
|
|
122
123
|
def _cumulative_hazard(self, params, times):
|
|
123
124
|
mu_, ln_sigma_, lambda_ = params
|
|
124
125
|
|
|
125
126
|
sigma_ = safe_exp(ln_sigma_)
|
|
126
127
|
Z = (log(times) - mu_) / sigma_
|
|
127
|
-
ilambda_2 = 1 / lambda_
|
|
128
|
+
ilambda_2 = 1 / lambda_**2
|
|
128
129
|
clipped_exp = np.clip(safe_exp(lambda_ * Z) * ilambda_2, 1e-300, 1e20)
|
|
129
130
|
|
|
130
131
|
if lambda_ > 0:
|
|
@@ -137,7 +138,7 @@ class GeneralizedGammaFitter(KnownModelParametricUnivariateFitter):
|
|
|
137
138
|
|
|
138
139
|
def _log_hazard(self, params, times):
|
|
139
140
|
mu_, ln_sigma_, lambda_ = params
|
|
140
|
-
ilambda_2 = 1 / lambda_
|
|
141
|
+
ilambda_2 = 1 / lambda_**2
|
|
141
142
|
Z = (log(times) - mu_) / safe_exp(ln_sigma_)
|
|
142
143
|
clipped_exp = np.clip(safe_exp(lambda_ * Z) * ilambda_2, 1e-300, 1e20)
|
|
143
144
|
if lambda_ > 0:
|
|
@@ -171,5 +172,5 @@ class GeneralizedGammaFitter(KnownModelParametricUnivariateFitter):
|
|
|
171
172
|
sigma_ = exp(self.ln_sigma_)
|
|
172
173
|
|
|
173
174
|
if lambda_ > 0:
|
|
174
|
-
return exp(sigma_ * log(gammainccinv(1 / lambda_
|
|
175
|
-
return exp(sigma_ * log(gammaincinv(1 / lambda_
|
|
175
|
+
return exp(sigma_ * log(gammainccinv(1 / lambda_**2, p) * lambda_**2) / lambda_) * exp(self.mu_)
|
|
176
|
+
return exp(sigma_ * log(gammaincinv(1 / lambda_**2, p) * lambda_**2) / lambda_) * exp(self.mu_)
|
|
@@ -351,9 +351,14 @@ class KaplanMeierFitter(NonParametricUnivariateFitter):
|
|
|
351
351
|
primary_estimate_name = "survival_function_"
|
|
352
352
|
secondary_estimate_name = "cumulative_density_"
|
|
353
353
|
|
|
354
|
-
(
|
|
355
|
-
durations,
|
|
356
|
-
|
|
354
|
+
(
|
|
355
|
+
self.durations,
|
|
356
|
+
self.event_observed,
|
|
357
|
+
self.timeline,
|
|
358
|
+
self.entry,
|
|
359
|
+
self.event_table,
|
|
360
|
+
self.weights,
|
|
361
|
+
) = _preprocess_inputs(durations, event_observed, timeline, entry, weights)
|
|
357
362
|
|
|
358
363
|
alpha = alpha if alpha else self.alpha
|
|
359
364
|
log_estimate, cumulative_sq_ = _additive_estimate(
|
|
@@ -386,6 +391,7 @@ class KaplanMeierFitter(NonParametricUnivariateFitter):
|
|
|
386
391
|
|
|
387
392
|
self.confidence_interval_survival_function_ = self.confidence_interval_
|
|
388
393
|
self.confidence_interval_cumulative_density_ = 1 - self.confidence_interval_
|
|
394
|
+
self.confidence_interval_cumulative_density_[:] = np.fliplr(self.confidence_interval_cumulative_density_.values)
|
|
389
395
|
self._median = median_survival_times(self.survival_function_)
|
|
390
396
|
self._cumulative_sq_ = cumulative_sq_
|
|
391
397
|
|
lifelines/fitters/mixins.py
CHANGED
|
@@ -4,6 +4,7 @@ from textwrap import dedent, fill
|
|
|
4
4
|
from autograd import numpy as anp
|
|
5
5
|
import numpy as np
|
|
6
6
|
from pandas import DataFrame, Series
|
|
7
|
+
from lifelines.exceptions import ProportionalHazardAssumptionError
|
|
7
8
|
from lifelines.statistics import proportional_hazard_test, TimeTransformers
|
|
8
9
|
from lifelines.utils import format_p_value
|
|
9
10
|
from lifelines.utils.lowess import lowess
|
|
@@ -28,6 +29,7 @@ class ProportionalHazardMixin:
|
|
|
28
29
|
p_value_threshold: float = 0.01,
|
|
29
30
|
plot_n_bootstraps: int = 15,
|
|
30
31
|
columns: Optional[List[str]] = None,
|
|
32
|
+
raise_on_fail: bool = False,
|
|
31
33
|
) -> None:
|
|
32
34
|
"""
|
|
33
35
|
Use this function to test the proportional hazards assumption. See usage example at
|
|
@@ -51,6 +53,8 @@ class ProportionalHazardMixin:
|
|
|
51
53
|
the function significantly.
|
|
52
54
|
columns: list, optional
|
|
53
55
|
specify a subset of columns to test.
|
|
56
|
+
raise_on_fail: bool, optional
|
|
57
|
+
throw a ``ProportionalHazardAssumptionError`` if the test fails. Default: False.
|
|
54
58
|
|
|
55
59
|
Returns
|
|
56
60
|
--------
|
|
@@ -106,8 +110,8 @@ class ProportionalHazardMixin:
|
|
|
106
110
|
axes = []
|
|
107
111
|
|
|
108
112
|
for variable in self.params_.index.intersection(columns or self.params_.index):
|
|
109
|
-
|
|
110
|
-
|
|
113
|
+
minimum_observed_p_value = test_results.summary.loc[variable, "p"].min()
|
|
114
|
+
|
|
111
115
|
# plot is done (regardless of test result) whenever `show_plots = True`
|
|
112
116
|
if show_plots:
|
|
113
117
|
axes.append([])
|
|
@@ -150,7 +154,7 @@ class ProportionalHazardMixin:
|
|
|
150
154
|
plt.tight_layout()
|
|
151
155
|
plt.subplots_adjust(top=0.90)
|
|
152
156
|
|
|
153
|
-
if np.round(
|
|
157
|
+
if np.round(minimum_observed_p_value, 2) > p_value_threshold:
|
|
154
158
|
continue
|
|
155
159
|
|
|
156
160
|
counter += 1
|
|
@@ -178,7 +182,7 @@ class ProportionalHazardMixin:
|
|
|
178
182
|
print()
|
|
179
183
|
print(
|
|
180
184
|
"%d. Variable '%s' failed the non-proportional test: p-value is %s."
|
|
181
|
-
% (counter, variable, format_p_value(4)(
|
|
185
|
+
% (counter, variable, format_p_value(4)(minimum_observed_p_value)),
|
|
182
186
|
end="\n\n",
|
|
183
187
|
)
|
|
184
188
|
|
|
@@ -224,9 +228,8 @@ class ProportionalHazardMixin:
|
|
|
224
228
|
),
|
|
225
229
|
end="\n\n",
|
|
226
230
|
)
|
|
227
|
-
#################
|
|
231
|
+
#################
|
|
228
232
|
|
|
229
|
-
|
|
230
233
|
if advice and counter > 0:
|
|
231
234
|
print(
|
|
232
235
|
dedent(
|
|
@@ -243,6 +246,8 @@ class ProportionalHazardMixin:
|
|
|
243
246
|
|
|
244
247
|
if counter == 0:
|
|
245
248
|
print("Proportional hazard assumption looks okay.")
|
|
249
|
+
elif raise_on_fail:
|
|
250
|
+
raise ProportionalHazardAssumptionError()
|
|
246
251
|
return axes
|
|
247
252
|
|
|
248
253
|
@property
|
|
@@ -183,7 +183,7 @@ class NelsonAalenFitter(UnivariateFitter):
|
|
|
183
183
|
)
|
|
184
184
|
|
|
185
185
|
def _variance_f_discrete(self, population, deaths):
|
|
186
|
-
return (
|
|
186
|
+
return (1 - deaths / population) * (deaths / population) * (1.0 / population)
|
|
187
187
|
|
|
188
188
|
def _additive_f_smooth(self, population, deaths):
|
|
189
189
|
cum_ = np.cumsum(1.0 / np.arange(1, np.max(population) + 1))
|
|
@@ -200,7 +200,7 @@ class NelsonAalenFitter(UnivariateFitter):
|
|
|
200
200
|
Parameters
|
|
201
201
|
-----------
|
|
202
202
|
bandwidth: float
|
|
203
|
-
the
|
|
203
|
+
the bandwidth used in the Epanechnikov kernel.
|
|
204
204
|
|
|
205
205
|
Returns
|
|
206
206
|
-------
|
|
@@ -239,7 +239,7 @@ class NelsonAalenFitter(UnivariateFitter):
|
|
|
239
239
|
C = var_hazard_.values != 0.0 # only consider the points with jumps
|
|
240
240
|
std_hazard_ = np.sqrt(
|
|
241
241
|
1.0
|
|
242
|
-
/ (bandwidth
|
|
242
|
+
/ (bandwidth**2)
|
|
243
243
|
* np.dot(epanechnikov_kernel(timeline[:, None], timeline[C][None, :], bandwidth) ** 2, var_hazard_.values[C])
|
|
244
244
|
)
|
|
245
245
|
values = {
|
lifelines/fitters/npmle.py
CHANGED
|
@@ -291,7 +291,7 @@ def reconstruct_survival_function(
|
|
|
291
291
|
|
|
292
292
|
# First backfill at events between known observations
|
|
293
293
|
# Second fill all events _outside_ known obs with running_sum
|
|
294
|
-
return full_dataframe.combine_first(df).bfill().fillna(running_sum).clip(lower=0.0)
|
|
294
|
+
return full_dataframe.combine_first(df).astype(float).bfill().fillna(running_sum).clip(lower=0.0)
|
|
295
295
|
|
|
296
296
|
|
|
297
297
|
def npmle_compute_confidence_intervals(left, right, mle_, alpha=0.05, samples=1000):
|
|
@@ -66,7 +66,7 @@ class PiecewiseExponentialRegressionFitter(ParametricRegressionFitter):
|
|
|
66
66
|
coef_penalty = 0
|
|
67
67
|
if self.penalizer > 0:
|
|
68
68
|
for i in range(params_stacked.shape[1]):
|
|
69
|
-
if not self._cols_to_not_penalize[i]:
|
|
69
|
+
if not self._cols_to_not_penalize.iloc[i]:
|
|
70
70
|
coef_penalty = coef_penalty + (params_stacked[:, i]).var()
|
|
71
71
|
|
|
72
72
|
return neg_ll + self.penalizer * coef_penalty
|
lifelines/generate_datasets.py
CHANGED
|
@@ -5,7 +5,7 @@ import pandas as pd
|
|
|
5
5
|
|
|
6
6
|
from scipy import stats
|
|
7
7
|
from scipy.optimize import newton
|
|
8
|
-
from scipy.integrate import
|
|
8
|
+
from scipy.integrate import cumulative_trapezoid
|
|
9
9
|
|
|
10
10
|
random = np.random
|
|
11
11
|
|
|
@@ -172,7 +172,7 @@ def constant_coefficients(d, timelines, constant=True, independent=0):
|
|
|
172
172
|
timelines: the observational times
|
|
173
173
|
constant: True for constant coefficients
|
|
174
174
|
independent: the number of coffients to set to 0 (covariate is ind of survival), or
|
|
175
|
-
a list of covariates to make
|
|
175
|
+
a list of covariates to make independent.
|
|
176
176
|
|
|
177
177
|
returns a matrix (t,d+1) of coefficients
|
|
178
178
|
"""
|
|
@@ -187,7 +187,7 @@ def time_varying_coefficients(d, timelines, constant=False, independent=0, randg
|
|
|
187
187
|
timelines: the observational times
|
|
188
188
|
constant: True for constant coefficients
|
|
189
189
|
independent: the number of coffients to set to 0 (covariate is ind of survival), or
|
|
190
|
-
a list of covariates to make
|
|
190
|
+
a list of covariates to make independent.
|
|
191
191
|
randgen: how scalar coefficients (betas) are sampled.
|
|
192
192
|
|
|
193
193
|
returns a matrix (t,d+1) of coefficients
|
|
@@ -221,7 +221,7 @@ def generate_hazard_rates(n, d, timelines, constant=False, independent=0, n_bina
|
|
|
221
221
|
n: the number of instances
|
|
222
222
|
d: the number of covariates
|
|
223
223
|
lifelines: the observational times
|
|
224
|
-
constant: make the
|
|
224
|
+
constant: make the coefficients constant (not time dependent)
|
|
225
225
|
n_binary: the number of binary covariates
|
|
226
226
|
model: from ["aalen", "cox"]
|
|
227
227
|
|
|
@@ -253,7 +253,7 @@ def generate_random_lifetimes(hazard_rates, timelines, size=1, censor=None):
|
|
|
253
253
|
timelines: (t,) the observation times
|
|
254
254
|
size: the number to return, per hardard rate
|
|
255
255
|
censor: If True, adds uniform censoring between timelines.max() and 0
|
|
256
|
-
If a
|
|
256
|
+
If a positive number, censors all events above that value.
|
|
257
257
|
If (n,) np.array >=0 , censor elementwise.
|
|
258
258
|
|
|
259
259
|
|
|
@@ -308,7 +308,7 @@ def cumulative_integral(fx, x):
|
|
|
308
308
|
fx: (n,d) numpy array, what you want to integral of
|
|
309
309
|
x: (n,) numpy array, location to integrate over.
|
|
310
310
|
"""
|
|
311
|
-
return
|
|
311
|
+
return cumulative_trapezoid(fx.T, x, initial=0).T
|
|
312
312
|
|
|
313
313
|
|
|
314
314
|
def construct_survival_curves(hazard_rates, timelines):
|
lifelines/utils/__init__.py
CHANGED
|
@@ -11,7 +11,7 @@ import warnings
|
|
|
11
11
|
from numpy import ndarray
|
|
12
12
|
import numpy as np
|
|
13
13
|
|
|
14
|
-
from scipy.integrate import quad,
|
|
14
|
+
from scipy.integrate import quad, trapezoid
|
|
15
15
|
from scipy.linalg import solve
|
|
16
16
|
from scipy import stats
|
|
17
17
|
|
|
@@ -266,7 +266,7 @@ def _expected_value_of_survival_up_to_t(model_or_survival_function, t: float = n
|
|
|
266
266
|
)
|
|
267
267
|
sf = model_or_survival_function.loc[:t]
|
|
268
268
|
sf = pd.concat((sf, pd.DataFrame([1], index=[0], columns=sf.columns))).sort_index()
|
|
269
|
-
return
|
|
269
|
+
return trapezoid(y=sf.values[:, 0], x=sf.index)
|
|
270
270
|
elif isinstance(model_or_survival_function, lifelines.fitters.UnivariateFitter):
|
|
271
271
|
# lifelines model
|
|
272
272
|
model = model_or_survival_function
|
|
@@ -311,9 +311,9 @@ def _expected_value_of_survival_squared_up_to_t(
|
|
|
311
311
|
|
|
312
312
|
if isinstance(model_or_survival_function, pd.DataFrame):
|
|
313
313
|
sf = model_or_survival_function.loc[:t]
|
|
314
|
-
sf =
|
|
314
|
+
sf = pd.concat((sf, pd.DataFrame([1], index=[0], columns=sf.columns))).sort_index()
|
|
315
315
|
sf_tau = sf * sf.index.values[:, None]
|
|
316
|
-
return 2 *
|
|
316
|
+
return 2 * trapezoid(y=sf_tau.values[:, 0], x=sf_tau.index)
|
|
317
317
|
elif isinstance(model_or_survival_function, lifelines.fitters.UnivariateFitter):
|
|
318
318
|
# lifelines model
|
|
319
319
|
model = model_or_survival_function
|
|
@@ -556,12 +556,12 @@ def _group_event_table_by_intervals(event_table, intervals) -> pd.DataFrame:
|
|
|
556
556
|
|
|
557
557
|
intervals = np.arange(0, event_max + bin_width, bin_width)
|
|
558
558
|
|
|
559
|
-
event_table = event_table.groupby(pd.cut(event_table["event_at"], intervals, include_lowest=True)).agg(
|
|
559
|
+
event_table = event_table.groupby(pd.cut(event_table["event_at"], intervals, include_lowest=True), observed=False).agg(
|
|
560
560
|
{"removed": ["sum"], "observed": ["sum"], "censored": ["sum"], "at_risk": ["max"]}
|
|
561
561
|
)
|
|
562
562
|
# convert columns from multiindex
|
|
563
563
|
event_table.columns = event_table.columns.droplevel(1)
|
|
564
|
-
return event_table.bfill()
|
|
564
|
+
return event_table.bfill().fillna(0)
|
|
565
565
|
|
|
566
566
|
|
|
567
567
|
def survival_events_from_table(survival_table, observed_deaths_col="observed", censored_col="censored"):
|
|
@@ -648,7 +648,7 @@ def datetimes_to_durations(
|
|
|
648
648
|
the units of time to use. See Pandas 'freq'. Default 'D' for days.
|
|
649
649
|
dayfirst: bool, optional (default=False)
|
|
650
650
|
see Pandas `to_datetime`
|
|
651
|
-
na_values : list, optional
|
|
651
|
+
na_values : list[str], optional
|
|
652
652
|
list of values to recognize as NA/NaN. Ex: ['', 'NaT']
|
|
653
653
|
format:
|
|
654
654
|
see Pandas `to_datetime`
|
|
@@ -679,7 +679,7 @@ def datetimes_to_durations(
|
|
|
679
679
|
start_times = pd.Series(start_times).copy()
|
|
680
680
|
end_times = pd.Series(end_times).copy()
|
|
681
681
|
|
|
682
|
-
C = ~(pd.isnull(end_times).values | end_times.isin(na_values or [""]))
|
|
682
|
+
C = ~(pd.isnull(end_times).values | end_times.astype(str).isin(na_values or [""]))
|
|
683
683
|
end_times[~C] = fill_date_
|
|
684
684
|
start_times_ = pd.to_datetime(start_times, dayfirst=dayfirst, format=format)
|
|
685
685
|
end_times_ = pd.to_datetime(end_times, dayfirst=dayfirst, errors="coerce", format=format)
|
|
@@ -744,9 +744,6 @@ def k_fold_cross_validation(
|
|
|
744
744
|
results: list
|
|
745
745
|
(k,1) list of scores for each fold. The scores can be anything.
|
|
746
746
|
|
|
747
|
-
See Also
|
|
748
|
-
---------
|
|
749
|
-
lifelines.utils.sklearn_adapter.sklearn_adapter
|
|
750
747
|
|
|
751
748
|
"""
|
|
752
749
|
# Make sure fitters is a list
|
|
@@ -884,6 +881,7 @@ def _additive_estimate(events, timeline, _additive_f, _additive_var, reverse):
|
|
|
884
881
|
population = events["at_risk"] - entrances
|
|
885
882
|
|
|
886
883
|
estimate_ = np.cumsum(_additive_f(population, deaths))
|
|
884
|
+
|
|
887
885
|
var_ = np.cumsum(_additive_var(population, deaths))
|
|
888
886
|
|
|
889
887
|
timeline = sorted(timeline)
|
|
@@ -1466,7 +1464,7 @@ def add_covariate_to_timeline(
|
|
|
1466
1464
|
cv = cv.sort_values([id_col, duration_col])
|
|
1467
1465
|
cvs = cv.pipe(remove_redundant_rows).pipe(transform_cv_to_long_format).groupby(id_col, sort=True)
|
|
1468
1466
|
|
|
1469
|
-
long_form_df = long_form_df.groupby(id_col, group_keys=False, sort=True).apply(expand, cvs=cvs)
|
|
1467
|
+
long_form_df = long_form_df.groupby(id_col, group_keys=False, sort=True)[long_form_df.columns].apply(expand, cvs=cvs)
|
|
1470
1468
|
return long_form_df.reset_index(drop=True)
|
|
1471
1469
|
|
|
1472
1470
|
|
|
@@ -1508,7 +1506,7 @@ def covariates_from_event_matrix(df, id_col) -> pd.DataFrame:
|
|
|
1508
1506
|
"""
|
|
1509
1507
|
df = df.set_index(id_col)
|
|
1510
1508
|
df = df.fillna(np.inf)
|
|
1511
|
-
df = df.stack(
|
|
1509
|
+
df = df.stack(future_stack=True).reset_index()
|
|
1512
1510
|
df.columns = [id_col, "event", "duration"]
|
|
1513
1511
|
df["_counter"] = 1
|
|
1514
1512
|
return (
|
lifelines/version.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: lifelines
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.29.0
|
|
4
4
|
Summary: Survival analysis in Python, including Kaplan Meier, Nelson Aalen and regression
|
|
5
5
|
Home-page: https://github.com/CamDavidsonPilon/lifelines
|
|
6
6
|
Author: Cameron Davidson-Pilon
|
|
@@ -9,18 +9,16 @@ License: MIT
|
|
|
9
9
|
Classifier: Development Status :: 4 - Beta
|
|
10
10
|
Classifier: License :: OSI Approved :: MIT License
|
|
11
11
|
Classifier: Programming Language :: Python
|
|
12
|
-
Classifier: Programming Language :: Python :: 3.7
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
14
12
|
Classifier: Programming Language :: Python :: 3.9
|
|
15
13
|
Classifier: Programming Language :: Python :: 3.10
|
|
16
14
|
Classifier: Programming Language :: Python :: 3.11
|
|
17
15
|
Classifier: Topic :: Scientific/Engineering
|
|
18
|
-
Requires-Python: >=3.
|
|
16
|
+
Requires-Python: >=3.9
|
|
19
17
|
Description-Content-Type: text/markdown
|
|
20
18
|
License-File: LICENSE
|
|
21
19
|
Requires-Dist: numpy <2.0,>=1.14.0
|
|
22
|
-
Requires-Dist: scipy >=1.
|
|
23
|
-
Requires-Dist: pandas >=1
|
|
20
|
+
Requires-Dist: scipy >=1.7.0
|
|
21
|
+
Requires-Dist: pandas >=2.1
|
|
24
22
|
Requires-Dist: matplotlib >=3.0
|
|
25
23
|
Requires-Dist: autograd >=1.5
|
|
26
24
|
Requires-Dist: autograd-gamma >=0.3
|
|
@@ -52,8 +50,8 @@ If you are new to survival analysis, wondering why it is useful, or are interest
|
|
|
52
50
|
|
|
53
51
|
## Contact
|
|
54
52
|
- Start a conversation in our [Discussions room](https://github.com/CamDavidsonPilon/lifelines/discussions).
|
|
55
|
-
- Some users have posted common questions at [stats.stackexchange.com](https://stats.stackexchange.com/search?tab=votes&q=%22lifelines%22%20is%3aquestion)
|
|
56
|
-
-
|
|
53
|
+
- Some users have posted common questions at [stats.stackexchange.com](https://stats.stackexchange.com/search?tab=votes&q=%22lifelines%22%20is%3aquestion).
|
|
54
|
+
- Creating an issue in the [Github repository](https://github.com/camdavidsonpilon/lifelines).
|
|
57
55
|
|
|
58
56
|
## Development
|
|
59
57
|
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
lifelines/__init__.py,sha256=F_sKrawq6L4GwTPgOu4FjoGUKQ2gfelAVIQOW1Ee8Ao,2241
|
|
2
2
|
lifelines/calibration.py,sha256=Luii7bkJ2YB0jpuOYYhI22aUyEc1gLsS10Pno6Sqo98,4113
|
|
3
|
-
lifelines/exceptions.py,sha256=
|
|
4
|
-
lifelines/generate_datasets.py,sha256=
|
|
3
|
+
lifelines/exceptions.py,sha256=8T1vQuI6Fnf_5OfiJahksn5Soe-SmU9Y2IA7HYen460,577
|
|
4
|
+
lifelines/generate_datasets.py,sha256=nwGVpkcVRKH70N8z0Z-y7GgetX8suZZ1FFmdjOB8tBs,10188
|
|
5
5
|
lifelines/plotting.py,sha256=sQmwpSziHzVQoWoe_gll4LInrjg-E4FpeWMp07wurNo,35069
|
|
6
6
|
lifelines/statistics.py,sha256=cOszUYz87elnbTAt6V3fTrHwPjB9HFI1hxjLvKypS6k,35129
|
|
7
|
-
lifelines/version.py,sha256=
|
|
7
|
+
lifelines/version.py,sha256=pYOd-rP1moJefUJJ6DbkYBuzORUtOVDOBiCBT13-usg,88
|
|
8
8
|
lifelines/datasets/CuZn-LeftCensoredDataset.csv,sha256=PxTdZcJPPbhtaadpHjhMFVcUxmSn84BuDarujZIJpm4,1996
|
|
9
|
-
lifelines/datasets/__init__.py,sha256=
|
|
9
|
+
lifelines/datasets/__init__.py,sha256=wiKbbNj-SSrgk_jysTdeQo1ceCmHXKje8WIzwBJAH_E,19977
|
|
10
10
|
lifelines/datasets/anderson.csv,sha256=nTAtTK8mf0ymU88nKvO2Fj0WL9SE9o4S0GVujmX8Cl4,580
|
|
11
11
|
lifelines/datasets/c_botulinum_lag_phase.csv,sha256=K3vda-75OqH-UxMAJIpYf0LldUZE5tiwch5ohP2v9Yw,386
|
|
12
12
|
lifelines/datasets/canadian_senators.csv,sha256=nbpDx6e_fLmalLaS9aeWC-2EIsE850XsOZTpN_OWqn0,163046
|
|
@@ -35,39 +35,38 @@ lifelines/datasets/rossi.csv,sha256=AhRAAXDgfzAVooXtyiAUysDa6KrBJfy6rWQkkOBfiSw,
|
|
|
35
35
|
lifelines/datasets/stanford_heart.csv,sha256=HWS9SqJjQ6gDmvxxKCJLR1cOIJ8XKuwTNu4bW8tKWVM,8859
|
|
36
36
|
lifelines/datasets/static_test.csv,sha256=w2PtSkXknCZfciwqcOZGlA8znBO7jTcq_AJ5e6NStAk,101
|
|
37
37
|
lifelines/datasets/waltons_dataset.csv,sha256=Fd4UX6tGYxgGhXtH3T-S81wIGIbVohv5yom4aw0kXL8,2449
|
|
38
|
-
lifelines/fitters/__init__.py,sha256=
|
|
39
|
-
lifelines/fitters/aalen_additive_fitter.py,sha256=
|
|
38
|
+
lifelines/fitters/__init__.py,sha256=a3ACmN8KANdg7uyZ36lSIMvUx0rZKB3HhvHdTgbQfP0,151648
|
|
39
|
+
lifelines/fitters/aalen_additive_fitter.py,sha256=xca1uoNbuPS2YoGQ73GYa5JLZTLCt9otJPhhi2AJm4A,21526
|
|
40
40
|
lifelines/fitters/aalen_johansen_fitter.py,sha256=w_2MV7Bbtr0swJ0VdySqirhlGsjbYyqduRx9iLKd6XA,14172
|
|
41
|
-
lifelines/fitters/breslow_fleming_harrington_fitter.py,sha256=
|
|
42
|
-
lifelines/fitters/cox_time_varying_fitter.py,sha256=
|
|
43
|
-
lifelines/fitters/coxph_fitter.py,sha256=
|
|
41
|
+
lifelines/fitters/breslow_fleming_harrington_fitter.py,sha256=_86qU3wMHEyuCKLjhHLERP_ymNnlSvi7chWgi8Kygxg,4293
|
|
42
|
+
lifelines/fitters/cox_time_varying_fitter.py,sha256=cZo9opn4OdFajrj6aBxJDhgWvFIUHdsq7jpgMQ0HchU,34670
|
|
43
|
+
lifelines/fitters/coxph_fitter.py,sha256=zbmwmO1EyHy-19ijuMkF_RCb7dG_VB7DBH7qArHar6w,136920
|
|
44
44
|
lifelines/fitters/crc_spline_fitter.py,sha256=FUaiz4O-Hdke7T5dV8RCl-27oWxrMJLBSXxnRN4QkGQ,3126
|
|
45
45
|
lifelines/fitters/exponential_fitter.py,sha256=Fbb1rtBOrHb_YxFYidzqXcFw7aWsqet_2vqi7s8WJ4U,2857
|
|
46
|
-
lifelines/fitters/generalized_gamma_fitter.py,sha256=
|
|
46
|
+
lifelines/fitters/generalized_gamma_fitter.py,sha256=OiXO9onvYtI2gNvUoxF4mjEjbj7IRZl5R4UZ_RzrSjo,6482
|
|
47
47
|
lifelines/fitters/generalized_gamma_regression_fitter.py,sha256=UzG3dVau0UNdQtM6yW63wabDf7j--rxrdE9AlaVB8Vk,7955
|
|
48
|
-
lifelines/fitters/kaplan_meier_fitter.py,sha256=
|
|
48
|
+
lifelines/fitters/kaplan_meier_fitter.py,sha256=UYPJi4BYcn54F26fc_lkkYzcZV-yUomsBB59ufdLRF8,24209
|
|
49
49
|
lifelines/fitters/log_logistic_aft_fitter.py,sha256=cw179z0_IqvuWgOORHSZ1lBiidHcYkiO4hDi4YDEqRo,7074
|
|
50
50
|
lifelines/fitters/log_logistic_fitter.py,sha256=iTH97i9TrLp5IVBIZHC8nx5rvSn2-KM-wfv1wR_YSPU,4004
|
|
51
51
|
lifelines/fitters/log_normal_aft_fitter.py,sha256=aOcdMR8T4vhy2BKGebrpEJD_lTZIQQ5VsrnuuKkU0RA,7890
|
|
52
52
|
lifelines/fitters/log_normal_fitter.py,sha256=NLn1DCxJ9WJrVaairJPcOu_lShko_-vwoXw6goRR42w,3557
|
|
53
|
-
lifelines/fitters/mixins.py,sha256=
|
|
53
|
+
lifelines/fitters/mixins.py,sha256=5s9FdxHUU0RxvFvmM77QmFiPmO7iyU7upzkF7BmWOec,12827
|
|
54
54
|
lifelines/fitters/mixture_cure_fitter.py,sha256=UetFlv9EfFYMDt95M2iR354lna5RKeWtO_lkoaMmoZE,5416
|
|
55
|
-
lifelines/fitters/nelson_aalen_fitter.py,sha256=
|
|
56
|
-
lifelines/fitters/npmle.py,sha256=
|
|
55
|
+
lifelines/fitters/nelson_aalen_fitter.py,sha256=C_hEuBwZfrYLbd6KNI34jEs-2B7Y3x2SIvp-HQGJW_Y,10687
|
|
56
|
+
lifelines/fitters/npmle.py,sha256=K2PX1YWuygzwUa5H6I2w6CF8uqELJqb1KJEiN5dZbRI,10157
|
|
57
57
|
lifelines/fitters/piecewise_exponential_fitter.py,sha256=j48sXaEODClFmfFP3THb0qJ3_Q7ctJz19j50Uo1QJME,3357
|
|
58
|
-
lifelines/fitters/piecewise_exponential_regression_fitter.py,sha256=
|
|
58
|
+
lifelines/fitters/piecewise_exponential_regression_fitter.py,sha256=kdnsm2oE1i_Sarxiw8lDcGEk8vachmNE8qCJdm3g_6U,4983
|
|
59
59
|
lifelines/fitters/spline_fitter.py,sha256=TnkXPBabgZVqtI90T1-gm6C8k73WhQMrhbEAZw1OX0c,4214
|
|
60
60
|
lifelines/fitters/weibull_aft_fitter.py,sha256=6wtU499AvXxZAE9PdnNQnbzh_NpPcdAEL6zd3xRV8hU,7772
|
|
61
61
|
lifelines/fitters/weibull_fitter.py,sha256=CcII_V5ns00jP5sqv0dn8Yo0T3kdyc4Rkpb2bBuTvjU,3771
|
|
62
|
-
lifelines/utils/__init__.py,sha256=
|
|
62
|
+
lifelines/utils/__init__.py,sha256=IIn6YTAh98n8Jb7y1MZcHlAcrmO5XiVcu2nMrfJVMbE,70500
|
|
63
63
|
lifelines/utils/btree.py,sha256=yevaIsGw_tQsGauXmwBHTMgCBjuuMZQgdHa-nCB-q2I,4369
|
|
64
64
|
lifelines/utils/concordance.py,sha256=hWXrmg1BiK2Hqu9CRzlvkPlnlmZqZcAxH7L1PjaqdC8,12245
|
|
65
65
|
lifelines/utils/lowess.py,sha256=MMydVcnbxqIgsiNcIgVUFtlFycD7v3ezwEGpituvBHs,2541
|
|
66
66
|
lifelines/utils/printer.py,sha256=-nXxu02gs0kaKfoQQ65sH-I45tGmgoFeOOIUSEc53iE,5861
|
|
67
67
|
lifelines/utils/safe_exp.py,sha256=HCCAkwQTx6G2qRC03v9Q_GWqVj8at1Eac1JVrMgS9hg,4350
|
|
68
|
-
lifelines/
|
|
69
|
-
lifelines-0.
|
|
70
|
-
lifelines-0.
|
|
71
|
-
lifelines-0.
|
|
72
|
-
lifelines-0.
|
|
73
|
-
lifelines-0.27.8.dist-info/RECORD,,
|
|
68
|
+
lifelines-0.29.0.dist-info/LICENSE,sha256=AasDeD139SnTdfXbKgN4BMyMgBlRy9YFs60tNrB4wf0,1079
|
|
69
|
+
lifelines-0.29.0.dist-info/METADATA,sha256=JOhlJ11AqPSdC2P1z8KNg_Sh4mhCj9f7ymMBz85cZi8,3187
|
|
70
|
+
lifelines-0.29.0.dist-info/WHEEL,sha256=mguMlWGMX-VHnMpKOjjQidIo1ssRlCFu4a4mBpz1s2M,91
|
|
71
|
+
lifelines-0.29.0.dist-info/top_level.txt,sha256=3i57Z4mtpc6jWrsW0n-_o9Y7CpzytMTeLMPJBHYAo0o,10
|
|
72
|
+
lifelines-0.29.0.dist-info/RECORD,,
|
|
@@ -1,135 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
import inspect
|
|
3
|
-
import pandas as pd
|
|
4
|
-
|
|
5
|
-
try:
|
|
6
|
-
from sklearn.base import BaseEstimator, RegressorMixin, MetaEstimatorMixin
|
|
7
|
-
except ImportError:
|
|
8
|
-
raise ImportError("scikit-learn must be installed on the local system to use this utility class.")
|
|
9
|
-
from . import concordance_index
|
|
10
|
-
|
|
11
|
-
__all__ = ["sklearn_adapter"]
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def filter_kwargs(f, kwargs):
|
|
15
|
-
s = inspect.signature(f)
|
|
16
|
-
res = {k: kwargs[k] for k in s.parameters if k in kwargs}
|
|
17
|
-
return res
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
class _SklearnModel(BaseEstimator, MetaEstimatorMixin, RegressorMixin):
|
|
21
|
-
def __init__(self, **kwargs):
|
|
22
|
-
self._params = kwargs
|
|
23
|
-
self.lifelines_model = self.lifelines_model(**filter_kwargs(self.lifelines_model.__init__, self._params))
|
|
24
|
-
self._params["duration_col"] = "duration_col"
|
|
25
|
-
self._params["event_col"] = self._event_col
|
|
26
|
-
|
|
27
|
-
@property
|
|
28
|
-
def _yColumn(self):
|
|
29
|
-
return self._params["duration_col"]
|
|
30
|
-
|
|
31
|
-
@property
|
|
32
|
-
def _eventColumn(self):
|
|
33
|
-
return self._params["event_col"]
|
|
34
|
-
|
|
35
|
-
def fit(self, X, y=None):
|
|
36
|
-
"""
|
|
37
|
-
|
|
38
|
-
Parameters
|
|
39
|
-
-----------
|
|
40
|
-
|
|
41
|
-
X: DataFrame
|
|
42
|
-
must be a pandas DataFrame (with event_col included, if applicable)
|
|
43
|
-
|
|
44
|
-
"""
|
|
45
|
-
if not isinstance(X, pd.DataFrame):
|
|
46
|
-
raise ValueError("X must be a DataFrame. Got type: {}".format(type(X)))
|
|
47
|
-
|
|
48
|
-
X = X.copy()
|
|
49
|
-
|
|
50
|
-
if y is not None:
|
|
51
|
-
X.insert(len(X.columns), self._yColumn, y, allow_duplicates=False)
|
|
52
|
-
|
|
53
|
-
fit = getattr(self.lifelines_model, self._fit_method)
|
|
54
|
-
self.lifelines_model = fit(df=X, **filter_kwargs(fit, self._params))
|
|
55
|
-
return self
|
|
56
|
-
|
|
57
|
-
def set_params(self, **params):
|
|
58
|
-
for key, value in params.items():
|
|
59
|
-
setattr(self.lifelines_model, key, value)
|
|
60
|
-
return self
|
|
61
|
-
|
|
62
|
-
def get_params(self, deep=True):
|
|
63
|
-
out = {}
|
|
64
|
-
for name, p in inspect.signature(self.lifelines_model.__init__).parameters.items():
|
|
65
|
-
if p.kind < 4: # ignore kwargs
|
|
66
|
-
out[name] = getattr(self.lifelines_model, name)
|
|
67
|
-
return out
|
|
68
|
-
|
|
69
|
-
def predict(self, X, **kwargs):
|
|
70
|
-
"""
|
|
71
|
-
Parameters
|
|
72
|
-
------------
|
|
73
|
-
X: DataFrame or numpy array
|
|
74
|
-
|
|
75
|
-
"""
|
|
76
|
-
predictions = getattr(self.lifelines_model, self._predict_method)(X, **kwargs).squeeze().values
|
|
77
|
-
return predictions
|
|
78
|
-
|
|
79
|
-
def score(self, X, y, **kwargs):
|
|
80
|
-
"""
|
|
81
|
-
|
|
82
|
-
Parameters
|
|
83
|
-
-----------
|
|
84
|
-
|
|
85
|
-
X: DataFrame
|
|
86
|
-
must be a pandas DataFrame (with event_col included, if applicable)
|
|
87
|
-
|
|
88
|
-
"""
|
|
89
|
-
rest_columns = list(set(X.columns) - {self._yColumn, self._eventColumn})
|
|
90
|
-
|
|
91
|
-
x = X.loc[:, rest_columns]
|
|
92
|
-
e = X.loc[:, self._eventColumn] if self._eventColumn else None
|
|
93
|
-
|
|
94
|
-
if y is None:
|
|
95
|
-
y = X.loc[:, self._yColumn]
|
|
96
|
-
|
|
97
|
-
if callable(self._scoring_method):
|
|
98
|
-
res = self._scoring_method(y, self.predict(x, **kwargs), event_observed=e)
|
|
99
|
-
else:
|
|
100
|
-
raise ValueError()
|
|
101
|
-
return res
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
def sklearn_adapter(fitter, event_col=None, predict_method="predict_expectation", scoring_method=concordance_index):
|
|
105
|
-
"""
|
|
106
|
-
This function wraps lifelines models into a scikit-learn compatible API. The function returns a
|
|
107
|
-
class that can be instantiated with parameters (similar to a scikit-learn class).
|
|
108
|
-
|
|
109
|
-
Parameters
|
|
110
|
-
----------
|
|
111
|
-
|
|
112
|
-
fitter: class
|
|
113
|
-
The class (not an instance) to be wrapper. Example: ``CoxPHFitter`` or ``WeibullAFTFitter``
|
|
114
|
-
event_col: string
|
|
115
|
-
The column in your DataFrame that represents (if applicable) the event column
|
|
116
|
-
predict_method: string
|
|
117
|
-
Can be the string ``"predict_median", "predict_expectation"``
|
|
118
|
-
scoring_method: function
|
|
119
|
-
Provide a way to produce a ``score`` on the scikit-learn model. Signature should look like (durations, predictions, event_observed=None)
|
|
120
|
-
|
|
121
|
-
"""
|
|
122
|
-
name = "SkLearn" + fitter.__name__
|
|
123
|
-
klass = type(
|
|
124
|
-
name,
|
|
125
|
-
(_SklearnModel,),
|
|
126
|
-
{
|
|
127
|
-
"lifelines_model": fitter,
|
|
128
|
-
"_event_col": event_col,
|
|
129
|
-
"_predict_method": predict_method,
|
|
130
|
-
"_fit_method": "fit",
|
|
131
|
-
"_scoring_method": staticmethod(scoring_method),
|
|
132
|
-
},
|
|
133
|
-
)
|
|
134
|
-
globals()[klass.__name__] = klass
|
|
135
|
-
return klass
|
|
File without changes
|
|
File without changes
|