PyPI - lifelines - Versions diffs - 0.27.7__py3-none-any.whl → 0.28.0__py3-none-any.whl - Mend

lifelines 0.27.7py3-none-any.whl → 0.28.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

lifelines/datasets/__init__.py +2 -2
lifelines/exceptions.py +4 -0
lifelines/fitters/__init__.py +33 -20
lifelines/fitters/aalen_johansen_fitter.py +44 -0
lifelines/fitters/breslow_fleming_harrington_fitter.py +9 -1
lifelines/fitters/cox_time_varying_fitter.py +15 -10
lifelines/fitters/coxph_fitter.py +17 -13
lifelines/fitters/generalized_gamma_fitter.py +6 -5
lifelines/fitters/kaplan_meier_fitter.py +9 -3
lifelines/fitters/mixins.py +8 -3
lifelines/fitters/nelson_aalen_fitter.py +2 -2
lifelines/plotting.py +163 -91
lifelines/utils/__init__.py +5 -7
lifelines/version.py +1 -1
{lifelines-0.27.7.dist-info → lifelines-0.28.0.dist-info}/METADATA +9 -11
{lifelines-0.27.7.dist-info → lifelines-0.28.0.dist-info}/RECORD +19 -22
{lifelines-0.27.7.dist-info → lifelines-0.28.0.dist-info}/WHEEL +1 -1
lifelines/datasets/ACTG175.csv +0 -2140
lifelines/metrics.py +0 -60
lifelines/utils/sklearn_adapter.py +0 -135
{lifelines-0.27.7.dist-info → lifelines-0.28.0.dist-info}/LICENSE +0 -0
{lifelines-0.27.7.dist-info → lifelines-0.28.0.dist-info}/top_level.txt +0 -0

lifelines/datasets/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 import pandas as pd
-from pkg_resources import resource_filename
+from importlib import resources
 def _load_dataset(filename, **kwargs):
@@ -18,7 +18,7 @@ def _load_dataset(filename, **kwargs):
     -------
         output: DataFrame
     """
-    return pd.read_csv(resource_filename("lifelines", "datasets/" + filename), engine="python", **kwargs)
+    return pd.read_csv(resources.files("lifelines") / "datasets" / filename, engine="python", **kwargs)
 def load_recur(**kwargs):

lifelines/exceptions.py CHANGED Viewed

@@ -5,6 +5,10 @@ class StatError(Exception):
     pass
+class ProportionalHazardAssumptionError(Exception):
+    pass
 class ConvergenceError(ValueError):
     # inherits from ValueError for backwards compatibility reasons
     def __init__(self, msg, original_exception=""):

lifelines/fitters/__init__.py CHANGED Viewed

@@ -70,6 +70,10 @@ class BaseFitter:
             s = """<lifelines.%s>""" % classname
         return s
+    @property
+    def label(self):
+        return self._label
     @utils.CensoringType.right_censoring
     def fit(*args, **kwargs):
         raise NotImplementedError()
@@ -135,6 +139,10 @@ class UnivariateFitter(BaseFitter):
             "The `plot` function is deprecated, and will be removed in future versions. Use `plot_%s`" % self._estimate_name,
             DeprecationWarning,
         )
+        # Fix the confidence interval plot bug from Aalen-Johansen
+        # when calculate_variance is False.
+        if getattr(self, "_calc_var", None) is False:
+            kwargs["ci_show"] = False
         return _plot_estimate(self, estimate=self._estimate_name, **kwargs)
     def subtract(self, other) -> pd.DataFrame:
@@ -213,10 +221,10 @@ class UnivariateFitter(BaseFitter):
         might be 9 years.
         """
         age = self.survival_function_.index.values[:, None]
-        columns = ["%s - Conditional median duration remaining to event" % self._label]
+        columns = ["%s - Conditional median duration remaining to event" % self.label]
         return (
             pd.DataFrame(
-                utils.qth_survival_times(self.survival_function_[self._label] * 0.5, self.survival_function_)
+                utils.qth_survival_times(self.survival_function_[self.label] * 0.5, self.survival_function_)
                 .sort_index(ascending=False)
                 .values,
                 index=self.survival_function_.index,
@@ -454,7 +462,7 @@ class ParametricUnivariateFitter(UnivariateFitter):
         return -ll / weights.sum()
     def _compute_confidence_bounds_of_cumulative_hazard(self, alpha, ci_labels) -> pd.DataFrame:
-        return self._compute_confidence_bounds_of_transform(self._cumulative_hazard, alpha, ci_labels)
+        return self._compute_confidence_bounds_of_transform(self._cumulative_hazard, alpha, ci_labels, self.timeline)
     def _compute_variance_of_transform(self, transform, timeline=None):
         """
@@ -487,7 +495,9 @@ class ParametricUnivariateFitter(UnivariateFitter):
             np.einsum("nj,jk,nk->n", gradient_at_times.T, self.variance_matrix_, gradient_at_times.T), index=timeline
         )
-    def _compute_confidence_bounds_of_transform(self, transform, alpha, ci_labels) -> pd.DataFrame:
+    def _compute_confidence_bounds_of_transform(
+        self, transform, alpha: float, ci_labels: tuple[str, str], timeline
+    ) -> pd.DataFrame:
         """
         This computes the confidence intervals of a transform of the parameters. Ex: take
         the fitted parameters, a function/transform and the variance matrix and give me
@@ -503,20 +513,21 @@ class ParametricUnivariateFitter(UnivariateFitter):
         alpha: float
             confidence level
         ci_labels: tuple
+        timeline: iterable
         """
         alpha2 = 1 - alpha / 2.0
         z = utils.inv_normal_cdf(alpha2)
-        df = pd.DataFrame(index=self.timeline)
+        df = pd.DataFrame(index=timeline)
         std_of_transform = np.sqrt(self._compute_variance_of_transform(transform))
         if ci_labels is None:
-            ci_labels = ["%s_lower_%g" % (self._label, 1 - alpha), "%s_upper_%g" % (self._label, 1 - alpha)]
+            ci_labels = ["%s_lower_%g" % (self.label, 1 - alpha), "%s_upper_%g" % (self.label, 1 - alpha)]
         assert len(ci_labels) == 2, "ci_labels should be a length 2 array."
-        df[ci_labels[0]] = transform(self._fitted_parameters_, self.timeline) - z * std_of_transform
-        df[ci_labels[1]] = transform(self._fitted_parameters_, self.timeline) + z * std_of_transform
+        df[ci_labels[0]] = transform(self._fitted_parameters_, timeline) - z * std_of_transform
+        df[ci_labels[1]] = transform(self._fitted_parameters_, timeline) + z * std_of_transform
         return df
     def _create_initial_point(self, *args) -> np.ndarray:
@@ -539,7 +550,7 @@ class ParametricUnivariateFitter(UnivariateFitter):
             minimizing_results, previous_results, minimizing_ll = None, None, np.inf
             for method, option in zip(
                 ["Nelder-Mead", self._scipy_fit_method],
-                [{"maxiter": 100}, {**{"disp": show_progress}, **self._scipy_fit_options, **fit_options}],
+                [{"maxiter": 400}, {**{"disp": show_progress}, **self._scipy_fit_options, **fit_options}],
             ):
                 initial_value = self._initial_values if previous_results is None else utils._to_1d_array(previous_results.x)
@@ -1054,7 +1065,7 @@ class ParametricUnivariateFitter(UnivariateFitter):
           Rename the series returned. Useful for plotting.
         """
-        label = utils.coalesce(label, self._label)
+        label = utils.coalesce(label, self.label)
         return pd.Series(self._survival_function(self._fitted_parameters_, times), index=utils._to_1d_array(times), name=label)
     def cumulative_density_at_times(self, times, label: t.Optional[str] = None) -> pd.Series:
@@ -1069,7 +1080,7 @@ class ParametricUnivariateFitter(UnivariateFitter):
           Rename the series returned. Useful for plotting.
         """
-        label = utils.coalesce(label, self._label)
+        label = utils.coalesce(label, self.label)
         return pd.Series(self._cumulative_density(self._fitted_parameters_, times), index=utils._to_1d_array(times), name=label)
     def density_at_times(self, times, label=None) -> pd.Series:
@@ -1084,7 +1095,7 @@ class ParametricUnivariateFitter(UnivariateFitter):
           Rename the series returned. Useful for plotting.
         """
-        label = utils.coalesce(label, self._label)
+        label = utils.coalesce(label, self.label)
         return pd.Series(self._density(self._fitted_parameters_, times), index=utils._to_1d_array(times), name=label)
     def cumulative_hazard_at_times(self, times, label: t.Optional[str] = None) -> pd.Series:
@@ -1098,7 +1109,7 @@ class ParametricUnivariateFitter(UnivariateFitter):
         label: string, optional
           Rename the series returned. Useful for plotting.
         """
-        label = utils.coalesce(label, self._label)
+        label = utils.coalesce(label, self.label)
         return pd.Series(self._cumulative_hazard(self._fitted_parameters_, times), index=utils._to_1d_array(times), name=label)
     def hazard_at_times(self, times, label: t.Optional[str] = None) -> pd.Series:
@@ -1113,7 +1124,7 @@ class ParametricUnivariateFitter(UnivariateFitter):
           Rename the series returned. Useful for plotting.
         """
-        label = utils.coalesce(label, self._label)
+        label = utils.coalesce(label, self.label)
         return pd.Series(self._hazard(self._fitted_parameters_, times), index=utils._to_1d_array(times), name=label)
     @property
@@ -1135,28 +1146,28 @@ class ParametricUnivariateFitter(UnivariateFitter):
         """
         The confidence interval of the hazard.
         """
-        return self._compute_confidence_bounds_of_transform(self._hazard, self.alpha, self._ci_labels)
+        return self._compute_confidence_bounds_of_transform(self._hazard, self.alpha, self._ci_labels, self.timeline)
     @property
     def confidence_interval_density_(self) -> pd.DataFrame:
         """
         The confidence interval of the hazard.
         """
-        return self._compute_confidence_bounds_of_transform(self._density, self.alpha, self._ci_labels)
+        return self._compute_confidence_bounds_of_transform(self._density, self.alpha, self._ci_labels, self.timeline)
     @property
     def confidence_interval_survival_function_(self) -> pd.DataFrame:
         """
         The lower and upper confidence intervals for the survival function
         """
-        return self._compute_confidence_bounds_of_transform(self._survival_function, self.alpha, self._ci_labels)
+        return self._compute_confidence_bounds_of_transform(self._survival_function, self.alpha, self._ci_labels, self.timeline)
     @property
     def confidence_interval_cumulative_density_(self) -> pd.DataFrame:
         """
         The lower and upper confidence intervals for the cumulative density
         """
-        return self._compute_confidence_bounds_of_transform(self._cumulative_density, self.alpha, self._ci_labels)
+        return self._compute_confidence_bounds_of_transform(self._cumulative_density, self.alpha, self._ci_labels, self.timeline)
     def plot(self, **kwargs):
         """
@@ -1203,7 +1214,7 @@ class ParametricUnivariateFitter(UnivariateFitter):
         """
         age = self.timeline
-        columns = ["%s - Conditional median duration remaining to event" % self._label]
+        columns = ["%s - Conditional median duration remaining to event" % self.label]
         return pd.DataFrame(self.percentile(0.5 * self.survival_function_.values) - age[:, None], index=age, columns=columns)
@@ -1398,7 +1409,7 @@ class ParametricRegressionFitter(RegressionFitter):
     def _survival_function(self, params, T, Xs):
         return anp.clip(anp.exp(-self._cumulative_hazard(params, T, Xs)), 1e-12, 1 - 1e-12)
-    def _log_likelihood_right_censoring(self, params, Ts, E, W, entries, Xs) -> float:
+    def _log_likelihood_right_censoring(self, params, Ts: tuple, E, W, entries, Xs) -> float:
         T = Ts[0]
         non_zero_entries = entries > 0
@@ -3354,6 +3365,8 @@ class ParametericAFTRegressionFitter(ParametricRegressionFitter):
             also display the baseline survival, defined as the survival at the mean of the original dataset.
         times: iterable
             pass in a times to plot
+        y: str
+            one of "survival_function", "hazard", "cumulative_hazard". Default "survival_function"
         kwargs:
             pass in additional plotting commands

lifelines/fitters/aalen_johansen_fitter.py CHANGED Viewed

@@ -7,6 +7,7 @@ import warnings
 from lifelines.fitters import NonParametricUnivariateFitter
 from lifelines.utils import _preprocess_inputs, inv_normal_cdf, CensoringType, coalesce
 from lifelines import KaplanMeierFitter
+from lifelines.plotting import _plot_estimate
 class AalenJohansenFitter(NonParametricUnivariateFitter):
@@ -261,3 +262,46 @@ class AalenJohansenFitter(NonParametricUnivariateFitter):
         # Detect duplicated times with different event types
         return (dup_times & (~dup_events)).any()
+    def plot_cumulative_density(self, **kwargs):
+        """Plots a pretty figure of the model
+        Matplotlib plot arguments can be passed in inside the kwargs.
+        Parameters
+        -----------
+        show_censors: bool
+            place markers at censorship events. Default: False
+        censor_styles: dict
+            If show_censors, this dictionary will be passed into the plot call.
+        ci_alpha: float
+            the transparency level of the confidence interval. Default: 0.3
+        ci_force_lines: bool
+            force the confidence intervals to be line plots (versus default shaded areas). Default: False
+        ci_show: bool
+            show confidence intervals. Default: True
+        ci_legend: bool
+            if ci_force_lines is True, this is a boolean flag to add the lines' labels to the legend. Default: False
+        at_risk_counts: bool
+            show group sizes at time points. See function ``add_at_risk_counts`` for details. Default: False
+        loc: slice
+            specify a time-based subsection of the curves to plot, ex:
+            >>> model.plot(loc=slice(0.,10.))
+            will plot the time values between t=0. and t=10.
+        iloc: slice
+            specify a location-based subsection of the curves to plot, ex:
+            >>> model.plot(iloc=slice(0,10))
+            will plot the first 10 time points.
+        Returns
+        -------
+        ax:
+            a pyplot axis object
+        """
+        if not self._calc_var:
+            kwargs["ci_show"] = False
+        _plot_estimate(self, estimate=self._estimate_name, **kwargs)

lifelines/fitters/breslow_fleming_harrington_fitter.py CHANGED Viewed

@@ -72,7 +72,14 @@ class BreslowFlemingHarringtonFitter(NonParametricUnivariateFitter):
         alpha = coalesce(alpha, self.alpha)
         naf = NelsonAalenFitter(alpha=alpha)
-        naf.fit(durations, event_observed=event_observed, timeline=timeline, label=self._label, entry=entry, ci_labels=ci_labels)
+        naf.fit(
+            durations,
+            event_observed=event_observed,
+            timeline=timeline,
+            label=self._label,
+            entry=entry,
+            ci_labels=ci_labels,
+        )
         self.durations, self.event_observed, self.timeline, self.entry, self.event_table, self.weights = (
             naf.durations,
             naf.event_observed,
@@ -87,6 +94,7 @@ class BreslowFlemingHarringtonFitter(NonParametricUnivariateFitter):
         self.confidence_interval_ = np.exp(-naf.confidence_interval_)
         self.confidence_interval_survival_function_ = self.confidence_interval_
         self.confidence_interval_cumulative_density = 1 - self.confidence_interval_
+        self.confidence_interval_cumulative_density[:] = np.fliplr(self.confidence_interval_cumulative_density.values)
         # estimation methods
         self._estimation_method = "survival_function_"

lifelines/fitters/cox_time_varying_fitter.py CHANGED Viewed

@@ -153,6 +153,7 @@ class CoxTimeVaryingFitter(SemiParametricRegressionFitter, ProportionalHazardMix
             Override the default values in NR algorithm:
                 step_size: 0.95,
                 precision: 1e-07,
+                r_precision=1e-9,
                 max_steps: 500,
         Returns
@@ -328,12 +329,13 @@ class CoxTimeVaryingFitter(SemiParametricRegressionFitter, ProportionalHazardMix
         weights,
         show_progress=False,
         step_size=0.95,
-        precision=10e-6,
+        precision=1e-8,
+        r_precision=1e-9,
         max_steps=50,
         initial_point=None,
     ):  # pylint: disable=too-many-arguments,too-many-locals,too-many-branches,too-many-statements
         """
-        Newton Rhaphson algorithm for fitting CPH model.
+        Newton Raphson algorithm for fitting CPH model.
         Parameters
         ----------
@@ -345,8 +347,11 @@ class CoxTimeVaryingFitter(SemiParametricRegressionFitter, ProportionalHazardMix
         step_size: float
             > 0 to determine a starting step size in NR algorithm.
         precision: float
-            the convergence halts if the norm of delta between
-                     successive positions is less than epsilon.
+            the algorithm stops if the norm of delta between
+            successive positions is less than ``precision``.
+        r_precision: float, optional
+            the algorithms stops if the relative decrease in log-likelihood
+            between successive iterations goes below ``r_precision``.
         Returns
         --------
@@ -443,17 +448,17 @@ https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergen
             if show_progress:
                 print(
-                    "\rIteration %d: norm_delta = %.5f, step_size = %.5f, ll = %.5f, newton_decrement = %.5f, seconds_since_start = %.1f"
+                    "\rIteration %d: norm_delta = %.2e, step_size = %.4f, log_lik = %.5f, newton_decrement = %.2e, seconds_since_start = %.1f"
                     % (i, norm_delta, step_size, ll, newton_decrement, time.time() - start_time)
                 )
             # convergence criteria
             if norm_delta < precision:
                 converging, completed = False, True
-            elif previous_ll > 0 and abs(ll - previous_ll) / (-previous_ll) < 1e-09:
-                # this is what R uses by default
+            elif previous_ll > 0 and abs(ll - previous_ll) / (-previous_ll) < r_precision:
+                # this is what R uses by default with r_precision=1e-9
                 converging, completed = False, True
-            elif newton_decrement < 10e-8:
+            elif newton_decrement < precision:
                 converging, completed = False, True
             elif i >= max_steps:
                 # 50 iterations steps with N-R is a lot.
@@ -481,12 +486,12 @@ See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-sep
         # report to the user problems that we detect.
         if completed and norm_delta > 0.1:
             warnings.warn(
-                "Newton-Rhapson convergence completed but norm(delta) is still high, %.3f. This may imply non-unique solutions to the maximum likelihood. Perhaps there is colinearity or complete separation in the dataset?"
+                "Newton-Raphson convergence completed but norm(delta) is still high, %.3f. This may imply non-unique solutions to the maximum likelihood. Perhaps there is colinearity or complete separation in the dataset?"
                 % norm_delta,
                 ConvergenceWarning,
             )
         elif not completed:
-            warnings.warn("Newton-Rhapson failed to converge sufficiently in %d steps." % max_steps, ConvergenceWarning)
+            warnings.warn("Newton-Raphson failed to converge sufficiently in %d steps." % max_steps, ConvergenceWarning)
         return beta, ll, hessian

lifelines/fitters/coxph_fitter.py CHANGED Viewed

@@ -80,7 +80,7 @@ class CoxPHFitter(RegressionFitter, ProportionalHazardMixin):
         When ``baseline_estimation_method="spline"``, this allows customizing the points in the time axis for the baseline hazard curve.
         To use evenly-spaced points in time, the ``n_baseline_knots`` parameter can be employed instead.
-      breakpoints: int
+      breakpoints: list, optional
         Used when ``baseline_estimation_method="piecewise"``. Set the positions of the baseline hazard breakpoints.
     Examples
@@ -242,7 +242,7 @@ class CoxPHFitter(RegressionFitter, ProportionalHazardMixin):
             algorithm. Default is the zero vector.
         fit_options: dict, optional
-            pass kwargs for the fitting algorithm. For semi-parametric models, this is the Newton-Rhapson method (see method _newton_raphson_for_efron_model for kwargs)
+            pass kwargs for the fitting algorithm. For semi-parametric models, this is the Newton-Raphson method (see method _newton_raphson_for_efron_model for kwargs)
         Returns
         -------
@@ -1430,10 +1430,11 @@ estimate the variances. See paper "Variance estimation when using inverse probab
         show_progress: bool = True,
         step_size: float = 0.95,
         precision: float = 1e-07,
+        r_precision: float = 1e-9,
         max_steps: int = 500,
     ):  # pylint: disable=too-many-statements,too-many-branches
         """
-        Newton Rhaphson algorithm for fitting CPH model.
+        Newton Raphson algorithm for fitting CPH model.
         Note
         ----
@@ -1450,13 +1451,15 @@ estimate the variances. See paper "Variance estimation when using inverse probab
         step_size: float, optional
             > 0.001 to determine a starting step size in NR algorithm.
         precision: float, optional
-            the convergence halts if the norm of delta between
-            successive positions is less than epsilon.
+            the algorithm stops if the norm of delta between
+            successive positions is less than ``precision``.
+        r_precision: float, optional
+            the algorithms stops if the relative decrease in log-likelihood
+            between successive iterations goes below ``r_precision``.
         show_progress: bool, optional
-            since the fitter is iterative, show convergence
-                 diagnostics.
+            since the fitter is iterative, show convergence diagnostics.
         max_steps: int, optional
-            the maximum number of iterations of the Newton-Rhaphson algorithm.
+            the maximum number of iterations of the Newton-Raphson algorithm.
         Returns
         -------
@@ -1564,15 +1567,15 @@ estimate the variances. See paper "Variance estimation when using inverse probab
             if show_progress:
                 print(
-                    "\rIteration %d: norm_delta = %.5f, step_size = %.4f, log_lik = %.5f, newton_decrement = %.5f, seconds_since_start = %.1f"
+                    "\rIteration %d: norm_delta = %.2e, step_size = %.4f, log_lik = %.5f, newton_decrement = %.2e, seconds_since_start = %.1f"
                     % (i, norm_delta, step_size, ll_, newton_decrement, time.time() - start)
                 )
             # convergence criteria
             if norm_delta < precision:
                 converging, success = False, True
-            elif previous_ll_ != 0 and abs(ll_ - previous_ll_) / (-previous_ll_) < 1e-09:
-                # this is what R uses by default
+            elif previous_ll_ != 0 and abs(ll_ - previous_ll_) / (-previous_ll_) < r_precision:
+                # this is what R uses by default, with r_precision = 1e-9
                 converging, success = False, True
             elif newton_decrement < precision:
                 converging, success = False, True
@@ -1602,14 +1605,14 @@ See https://stats.stackexchange.com/q/11109/11867 for more.\n",
         if success and norm_delta > 0.1:
             self._check_values_post_fitting(X, T, E, weights)
             warnings.warn(
-                "Newton-Rhaphson convergence completed successfully but norm(delta) is still high, %.3f. This may imply non-unique solutions to the maximum likelihood. Perhaps there is collinearity or complete separation in the dataset?\n"
+                "Newton-Raphson convergence completed successfully but norm(delta) is still high, %.3f. This may imply non-unique solutions to the maximum likelihood. Perhaps there is collinearity or complete separation in the dataset?\n"
                 % norm_delta,
                 exceptions.ConvergenceWarning,
             )
         elif not success:
             self._check_values_post_fitting(X, T, E, weights)
             warnings.warn(
-                "Newton-Rhaphson failed to converge sufficiently. {0}".format(CONVERGENCE_DOCS), exceptions.ConvergenceWarning
+                "Newton-Raphson failed to converge sufficiently. {0}".format(CONVERGENCE_DOCS), exceptions.ConvergenceWarning
             )
         return beta, ll_, hessian
@@ -2855,6 +2858,7 @@ class ParametricCoxModelFitter(ParametricRegressionFitter, ProportionalHazardMix
             df = df.to_frame().T.infer_objects()
         df = df.copy()
+        df.index.name = None
         if self.strata is not None:
             df = df.reset_index().set_index(self.strata)

lifelines/fitters/generalized_gamma_fitter.py CHANGED Viewed

@@ -105,6 +105,7 @@ class GeneralizedGammaFitter(KnownModelParametricUnivariateFitter):
     """
     _scipy_fit_method = "SLSQP"
+    _scipy_fit_options = {"maxiter": 10_000, "maxfev": 10_000}
     _fitted_parameter_names = ["mu_", "ln_sigma_", "lambda_"]
     _bounds = [(None, None), (None, None), (None, None)]
     _compare_to_values = np.array([0.0, 0.0, 1.0])
@@ -117,14 +118,14 @@ class GeneralizedGammaFitter(KnownModelParametricUnivariateFitter):
         elif CensoringType.is_interval_censoring(self):
             # this fails if Ts[1] == Ts[0], so we add a some fudge factors.
             log_data = log(Ts[1] - Ts[0] + 0.1)
-        return np.array([log_data.mean(), log(log_data.std() + 0.01), 0.1])
+        return np.array([log_data.mean() * 1.5, log(log_data.std() + 0.1), 1.0])
     def _cumulative_hazard(self, params, times):
         mu_, ln_sigma_, lambda_ = params
         sigma_ = safe_exp(ln_sigma_)
         Z = (log(times) - mu_) / sigma_
-        ilambda_2 = 1 / lambda_ ** 2
+        ilambda_2 = 1 / lambda_**2
         clipped_exp = np.clip(safe_exp(lambda_ * Z) * ilambda_2, 1e-300, 1e20)
         if lambda_ > 0:
@@ -137,7 +138,7 @@ class GeneralizedGammaFitter(KnownModelParametricUnivariateFitter):
     def _log_hazard(self, params, times):
         mu_, ln_sigma_, lambda_ = params
-        ilambda_2 = 1 / lambda_ ** 2
+        ilambda_2 = 1 / lambda_**2
         Z = (log(times) - mu_) / safe_exp(ln_sigma_)
         clipped_exp = np.clip(safe_exp(lambda_ * Z) * ilambda_2, 1e-300, 1e20)
         if lambda_ > 0:
@@ -171,5 +172,5 @@ class GeneralizedGammaFitter(KnownModelParametricUnivariateFitter):
         sigma_ = exp(self.ln_sigma_)
         if lambda_ > 0:
-            return exp(sigma_ * log(gammainccinv(1 / lambda_ ** 2, p) * lambda_ ** 2) / lambda_) * exp(self.mu_)
-        return exp(sigma_ * log(gammaincinv(1 / lambda_ ** 2, p) * lambda_ ** 2) / lambda_) * exp(self.mu_)
+            return exp(sigma_ * log(gammainccinv(1 / lambda_**2, p) * lambda_**2) / lambda_) * exp(self.mu_)
+        return exp(sigma_ * log(gammaincinv(1 / lambda_**2, p) * lambda_**2) / lambda_) * exp(self.mu_)

lifelines/fitters/kaplan_meier_fitter.py CHANGED Viewed

@@ -351,9 +351,14 @@ class KaplanMeierFitter(NonParametricUnivariateFitter):
         primary_estimate_name = "survival_function_"
         secondary_estimate_name = "cumulative_density_"
-        (self.durations, self.event_observed, self.timeline, self.entry, self.event_table, self.weights) = _preprocess_inputs(
-            durations, event_observed, timeline, entry, weights
-        )
+        (
+            self.durations,
+            self.event_observed,
+            self.timeline,
+            self.entry,
+            self.event_table,
+            self.weights,
+        ) = _preprocess_inputs(durations, event_observed, timeline, entry, weights)
         alpha = alpha if alpha else self.alpha
         log_estimate, cumulative_sq_ = _additive_estimate(
@@ -386,6 +391,7 @@ class KaplanMeierFitter(NonParametricUnivariateFitter):
         self.confidence_interval_survival_function_ = self.confidence_interval_
         self.confidence_interval_cumulative_density_ = 1 - self.confidence_interval_
+        self.confidence_interval_cumulative_density_[:] = np.fliplr(self.confidence_interval_cumulative_density_.values)
         self._median = median_survival_times(self.survival_function_)
         self._cumulative_sq_ = cumulative_sq_

lifelines/fitters/mixins.py CHANGED Viewed

@@ -4,6 +4,7 @@ from textwrap import dedent, fill
 from autograd import numpy as anp
 import numpy as np
 from pandas import DataFrame, Series
+from lifelines.exceptions import ProportionalHazardAssumptionError
 from lifelines.statistics import proportional_hazard_test, TimeTransformers
 from lifelines.utils import format_p_value
 from lifelines.utils.lowess import lowess
@@ -28,6 +29,7 @@ class ProportionalHazardMixin:
         p_value_threshold: float = 0.01,
         plot_n_bootstraps: int = 15,
         columns: Optional[List[str]] = None,
+        raise_on_fail: bool = False,
     ) -> None:
         """
         Use this function to test the proportional hazards assumption. See usage example at
@@ -51,6 +53,8 @@ class ProportionalHazardMixin:
             the function significantly.
         columns: list, optional
             specify a subset of columns to test.
+        raise_on_fail: bool, optional
+            throw a ``ProportionalHazardAssumptionError`` if the test fails. Default: False.
         Returns
         --------
@@ -107,7 +111,7 @@ class ProportionalHazardMixin:
         for variable in self.params_.index.intersection(columns or self.params_.index):
             minumum_observed_p_value = test_results.summary.loc[variable, "p"].min()
             # plot is done (regardless of test result) whenever `show_plots = True`
             if show_plots:
                 axes.append([])
@@ -224,9 +228,8 @@ class ProportionalHazardMixin:
                         ),
                         end="\n\n",
                     )
-#################
+        #################
         if advice and counter > 0:
             print(
                 dedent(
@@ -243,6 +246,8 @@ class ProportionalHazardMixin:
         if counter == 0:
             print("Proportional hazard assumption looks okay.")
+        elif raise_on_fail:
+            raise ProportionalHazardAssumptionError()
         return axes
     @property

lifelines/fitters/nelson_aalen_fitter.py CHANGED Viewed

@@ -183,7 +183,7 @@ class NelsonAalenFitter(UnivariateFitter):
         )
     def _variance_f_discrete(self, population, deaths):
-        return (population - deaths) * deaths / population ** 3
+        return (1 - deaths / population) * (deaths / population) * (1.0 / population)
     def _additive_f_smooth(self, population, deaths):
         cum_ = np.cumsum(1.0 / np.arange(1, np.max(population) + 1))
@@ -239,7 +239,7 @@ class NelsonAalenFitter(UnivariateFitter):
         C = var_hazard_.values != 0.0  # only consider the points with jumps
         std_hazard_ = np.sqrt(
             1.0
-            / (bandwidth ** 2)
+            / (bandwidth**2)
             * np.dot(epanechnikov_kernel(timeline[:, None], timeline[C][None, :], bandwidth) ** 2, var_hazard_.values[C])
         )
         values = {

lifelines 0.27.7__py3-none-any.whl → 0.28.0__py3-none-any.whl

lifelines 0.27.7py3-none-any.whl → 0.28.0py3-none-any.whl