lifelines 0.27.7__py3-none-any.whl → 0.28.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  import pandas as pd
3
- from pkg_resources import resource_filename
3
+ from importlib import resources
4
4
 
5
5
 
6
6
  def _load_dataset(filename, **kwargs):
@@ -18,7 +18,7 @@ def _load_dataset(filename, **kwargs):
18
18
  -------
19
19
  output: DataFrame
20
20
  """
21
- return pd.read_csv(resource_filename("lifelines", "datasets/" + filename), engine="python", **kwargs)
21
+ return pd.read_csv(resources.files("lifelines") / "datasets" / filename, engine="python", **kwargs)
22
22
 
23
23
 
24
24
  def load_recur(**kwargs):
lifelines/exceptions.py CHANGED
@@ -5,6 +5,10 @@ class StatError(Exception):
5
5
  pass
6
6
 
7
7
 
8
+ class ProportionalHazardAssumptionError(Exception):
9
+ pass
10
+
11
+
8
12
  class ConvergenceError(ValueError):
9
13
  # inherits from ValueError for backwards compatibility reasons
10
14
  def __init__(self, msg, original_exception=""):
@@ -70,6 +70,10 @@ class BaseFitter:
70
70
  s = """<lifelines.%s>""" % classname
71
71
  return s
72
72
 
73
+ @property
74
+ def label(self):
75
+ return self._label
76
+
73
77
  @utils.CensoringType.right_censoring
74
78
  def fit(*args, **kwargs):
75
79
  raise NotImplementedError()
@@ -135,6 +139,10 @@ class UnivariateFitter(BaseFitter):
135
139
  "The `plot` function is deprecated, and will be removed in future versions. Use `plot_%s`" % self._estimate_name,
136
140
  DeprecationWarning,
137
141
  )
142
+ # Fix the confidence interval plot bug from Aalen-Johansen
143
+ # when calculate_variance is False.
144
+ if getattr(self, "_calc_var", None) is False:
145
+ kwargs["ci_show"] = False
138
146
  return _plot_estimate(self, estimate=self._estimate_name, **kwargs)
139
147
 
140
148
  def subtract(self, other) -> pd.DataFrame:
@@ -213,10 +221,10 @@ class UnivariateFitter(BaseFitter):
213
221
  might be 9 years.
214
222
  """
215
223
  age = self.survival_function_.index.values[:, None]
216
- columns = ["%s - Conditional median duration remaining to event" % self._label]
224
+ columns = ["%s - Conditional median duration remaining to event" % self.label]
217
225
  return (
218
226
  pd.DataFrame(
219
- utils.qth_survival_times(self.survival_function_[self._label] * 0.5, self.survival_function_)
227
+ utils.qth_survival_times(self.survival_function_[self.label] * 0.5, self.survival_function_)
220
228
  .sort_index(ascending=False)
221
229
  .values,
222
230
  index=self.survival_function_.index,
@@ -454,7 +462,7 @@ class ParametricUnivariateFitter(UnivariateFitter):
454
462
  return -ll / weights.sum()
455
463
 
456
464
  def _compute_confidence_bounds_of_cumulative_hazard(self, alpha, ci_labels) -> pd.DataFrame:
457
- return self._compute_confidence_bounds_of_transform(self._cumulative_hazard, alpha, ci_labels)
465
+ return self._compute_confidence_bounds_of_transform(self._cumulative_hazard, alpha, ci_labels, self.timeline)
458
466
 
459
467
  def _compute_variance_of_transform(self, transform, timeline=None):
460
468
  """
@@ -487,7 +495,9 @@ class ParametricUnivariateFitter(UnivariateFitter):
487
495
  np.einsum("nj,jk,nk->n", gradient_at_times.T, self.variance_matrix_, gradient_at_times.T), index=timeline
488
496
  )
489
497
 
490
- def _compute_confidence_bounds_of_transform(self, transform, alpha, ci_labels) -> pd.DataFrame:
498
+ def _compute_confidence_bounds_of_transform(
499
+ self, transform, alpha: float, ci_labels: tuple[str, str], timeline
500
+ ) -> pd.DataFrame:
491
501
  """
492
502
  This computes the confidence intervals of a transform of the parameters. Ex: take
493
503
  the fitted parameters, a function/transform and the variance matrix and give me
@@ -503,20 +513,21 @@ class ParametricUnivariateFitter(UnivariateFitter):
503
513
  alpha: float
504
514
  confidence level
505
515
  ci_labels: tuple
516
+ timeline: iterable
506
517
 
507
518
  """
508
519
  alpha2 = 1 - alpha / 2.0
509
520
  z = utils.inv_normal_cdf(alpha2)
510
- df = pd.DataFrame(index=self.timeline)
521
+ df = pd.DataFrame(index=timeline)
511
522
 
512
523
  std_of_transform = np.sqrt(self._compute_variance_of_transform(transform))
513
524
 
514
525
  if ci_labels is None:
515
- ci_labels = ["%s_lower_%g" % (self._label, 1 - alpha), "%s_upper_%g" % (self._label, 1 - alpha)]
526
+ ci_labels = ["%s_lower_%g" % (self.label, 1 - alpha), "%s_upper_%g" % (self.label, 1 - alpha)]
516
527
  assert len(ci_labels) == 2, "ci_labels should be a length 2 array."
517
528
 
518
- df[ci_labels[0]] = transform(self._fitted_parameters_, self.timeline) - z * std_of_transform
519
- df[ci_labels[1]] = transform(self._fitted_parameters_, self.timeline) + z * std_of_transform
529
+ df[ci_labels[0]] = transform(self._fitted_parameters_, timeline) - z * std_of_transform
530
+ df[ci_labels[1]] = transform(self._fitted_parameters_, timeline) + z * std_of_transform
520
531
  return df
521
532
 
522
533
  def _create_initial_point(self, *args) -> np.ndarray:
@@ -539,7 +550,7 @@ class ParametricUnivariateFitter(UnivariateFitter):
539
550
  minimizing_results, previous_results, minimizing_ll = None, None, np.inf
540
551
  for method, option in zip(
541
552
  ["Nelder-Mead", self._scipy_fit_method],
542
- [{"maxiter": 100}, {**{"disp": show_progress}, **self._scipy_fit_options, **fit_options}],
553
+ [{"maxiter": 400}, {**{"disp": show_progress}, **self._scipy_fit_options, **fit_options}],
543
554
  ):
544
555
 
545
556
  initial_value = self._initial_values if previous_results is None else utils._to_1d_array(previous_results.x)
@@ -1054,7 +1065,7 @@ class ParametricUnivariateFitter(UnivariateFitter):
1054
1065
  Rename the series returned. Useful for plotting.
1055
1066
 
1056
1067
  """
1057
- label = utils.coalesce(label, self._label)
1068
+ label = utils.coalesce(label, self.label)
1058
1069
  return pd.Series(self._survival_function(self._fitted_parameters_, times), index=utils._to_1d_array(times), name=label)
1059
1070
 
1060
1071
  def cumulative_density_at_times(self, times, label: t.Optional[str] = None) -> pd.Series:
@@ -1069,7 +1080,7 @@ class ParametricUnivariateFitter(UnivariateFitter):
1069
1080
  Rename the series returned. Useful for plotting.
1070
1081
 
1071
1082
  """
1072
- label = utils.coalesce(label, self._label)
1083
+ label = utils.coalesce(label, self.label)
1073
1084
  return pd.Series(self._cumulative_density(self._fitted_parameters_, times), index=utils._to_1d_array(times), name=label)
1074
1085
 
1075
1086
  def density_at_times(self, times, label=None) -> pd.Series:
@@ -1084,7 +1095,7 @@ class ParametricUnivariateFitter(UnivariateFitter):
1084
1095
  Rename the series returned. Useful for plotting.
1085
1096
 
1086
1097
  """
1087
- label = utils.coalesce(label, self._label)
1098
+ label = utils.coalesce(label, self.label)
1088
1099
  return pd.Series(self._density(self._fitted_parameters_, times), index=utils._to_1d_array(times), name=label)
1089
1100
 
1090
1101
  def cumulative_hazard_at_times(self, times, label: t.Optional[str] = None) -> pd.Series:
@@ -1098,7 +1109,7 @@ class ParametricUnivariateFitter(UnivariateFitter):
1098
1109
  label: string, optional
1099
1110
  Rename the series returned. Useful for plotting.
1100
1111
  """
1101
- label = utils.coalesce(label, self._label)
1112
+ label = utils.coalesce(label, self.label)
1102
1113
  return pd.Series(self._cumulative_hazard(self._fitted_parameters_, times), index=utils._to_1d_array(times), name=label)
1103
1114
 
1104
1115
  def hazard_at_times(self, times, label: t.Optional[str] = None) -> pd.Series:
@@ -1113,7 +1124,7 @@ class ParametricUnivariateFitter(UnivariateFitter):
1113
1124
  Rename the series returned. Useful for plotting.
1114
1125
 
1115
1126
  """
1116
- label = utils.coalesce(label, self._label)
1127
+ label = utils.coalesce(label, self.label)
1117
1128
  return pd.Series(self._hazard(self._fitted_parameters_, times), index=utils._to_1d_array(times), name=label)
1118
1129
 
1119
1130
  @property
@@ -1135,28 +1146,28 @@ class ParametricUnivariateFitter(UnivariateFitter):
1135
1146
  """
1136
1147
  The confidence interval of the hazard.
1137
1148
  """
1138
- return self._compute_confidence_bounds_of_transform(self._hazard, self.alpha, self._ci_labels)
1149
+ return self._compute_confidence_bounds_of_transform(self._hazard, self.alpha, self._ci_labels, self.timeline)
1139
1150
 
1140
1151
  @property
1141
1152
  def confidence_interval_density_(self) -> pd.DataFrame:
1142
1153
  """
1143
1154
  The confidence interval of the hazard.
1144
1155
  """
1145
- return self._compute_confidence_bounds_of_transform(self._density, self.alpha, self._ci_labels)
1156
+ return self._compute_confidence_bounds_of_transform(self._density, self.alpha, self._ci_labels, self.timeline)
1146
1157
 
1147
1158
  @property
1148
1159
  def confidence_interval_survival_function_(self) -> pd.DataFrame:
1149
1160
  """
1150
1161
  The lower and upper confidence intervals for the survival function
1151
1162
  """
1152
- return self._compute_confidence_bounds_of_transform(self._survival_function, self.alpha, self._ci_labels)
1163
+ return self._compute_confidence_bounds_of_transform(self._survival_function, self.alpha, self._ci_labels, self.timeline)
1153
1164
 
1154
1165
  @property
1155
1166
  def confidence_interval_cumulative_density_(self) -> pd.DataFrame:
1156
1167
  """
1157
1168
  The lower and upper confidence intervals for the cumulative density
1158
1169
  """
1159
- return self._compute_confidence_bounds_of_transform(self._cumulative_density, self.alpha, self._ci_labels)
1170
+ return self._compute_confidence_bounds_of_transform(self._cumulative_density, self.alpha, self._ci_labels, self.timeline)
1160
1171
 
1161
1172
  def plot(self, **kwargs):
1162
1173
  """
@@ -1203,7 +1214,7 @@ class ParametricUnivariateFitter(UnivariateFitter):
1203
1214
 
1204
1215
  """
1205
1216
  age = self.timeline
1206
- columns = ["%s - Conditional median duration remaining to event" % self._label]
1217
+ columns = ["%s - Conditional median duration remaining to event" % self.label]
1207
1218
 
1208
1219
  return pd.DataFrame(self.percentile(0.5 * self.survival_function_.values) - age[:, None], index=age, columns=columns)
1209
1220
 
@@ -1398,7 +1409,7 @@ class ParametricRegressionFitter(RegressionFitter):
1398
1409
  def _survival_function(self, params, T, Xs):
1399
1410
  return anp.clip(anp.exp(-self._cumulative_hazard(params, T, Xs)), 1e-12, 1 - 1e-12)
1400
1411
 
1401
- def _log_likelihood_right_censoring(self, params, Ts, E, W, entries, Xs) -> float:
1412
+ def _log_likelihood_right_censoring(self, params, Ts: tuple, E, W, entries, Xs) -> float:
1402
1413
 
1403
1414
  T = Ts[0]
1404
1415
  non_zero_entries = entries > 0
@@ -3354,6 +3365,8 @@ class ParametericAFTRegressionFitter(ParametricRegressionFitter):
3354
3365
  also display the baseline survival, defined as the survival at the mean of the original dataset.
3355
3366
  times: iterable
3356
3367
  pass in a times to plot
3368
+ y: str
3369
+ one of "survival_function", "hazard", "cumulative_hazard". Default "survival_function"
3357
3370
  kwargs:
3358
3371
  pass in additional plotting commands
3359
3372
 
@@ -7,6 +7,7 @@ import warnings
7
7
  from lifelines.fitters import NonParametricUnivariateFitter
8
8
  from lifelines.utils import _preprocess_inputs, inv_normal_cdf, CensoringType, coalesce
9
9
  from lifelines import KaplanMeierFitter
10
+ from lifelines.plotting import _plot_estimate
10
11
 
11
12
 
12
13
  class AalenJohansenFitter(NonParametricUnivariateFitter):
@@ -261,3 +262,46 @@ class AalenJohansenFitter(NonParametricUnivariateFitter):
261
262
 
262
263
  # Detect duplicated times with different event types
263
264
  return (dup_times & (~dup_events)).any()
265
+
266
+ def plot_cumulative_density(self, **kwargs):
267
+ """Plots a pretty figure of the model
268
+
269
+ Matplotlib plot arguments can be passed in inside the kwargs.
270
+
271
+ Parameters
272
+ -----------
273
+ show_censors: bool
274
+ place markers at censorship events. Default: False
275
+ censor_styles: dict
276
+ If show_censors, this dictionary will be passed into the plot call.
277
+ ci_alpha: float
278
+ the transparency level of the confidence interval. Default: 0.3
279
+ ci_force_lines: bool
280
+ force the confidence intervals to be line plots (versus default shaded areas). Default: False
281
+ ci_show: bool
282
+ show confidence intervals. Default: True
283
+ ci_legend: bool
284
+ if ci_force_lines is True, this is a boolean flag to add the lines' labels to the legend. Default: False
285
+ at_risk_counts: bool
286
+ show group sizes at time points. See function ``add_at_risk_counts`` for details. Default: False
287
+ loc: slice
288
+ specify a time-based subsection of the curves to plot, ex:
289
+
290
+ >>> model.plot(loc=slice(0.,10.))
291
+
292
+ will plot the time values between t=0. and t=10.
293
+ iloc: slice
294
+ specify a location-based subsection of the curves to plot, ex:
295
+
296
+ >>> model.plot(iloc=slice(0,10))
297
+
298
+ will plot the first 10 time points.
299
+
300
+ Returns
301
+ -------
302
+ ax:
303
+ a pyplot axis object
304
+ """
305
+ if not self._calc_var:
306
+ kwargs["ci_show"] = False
307
+ _plot_estimate(self, estimate=self._estimate_name, **kwargs)
@@ -72,7 +72,14 @@ class BreslowFlemingHarringtonFitter(NonParametricUnivariateFitter):
72
72
  alpha = coalesce(alpha, self.alpha)
73
73
 
74
74
  naf = NelsonAalenFitter(alpha=alpha)
75
- naf.fit(durations, event_observed=event_observed, timeline=timeline, label=self._label, entry=entry, ci_labels=ci_labels)
75
+ naf.fit(
76
+ durations,
77
+ event_observed=event_observed,
78
+ timeline=timeline,
79
+ label=self._label,
80
+ entry=entry,
81
+ ci_labels=ci_labels,
82
+ )
76
83
  self.durations, self.event_observed, self.timeline, self.entry, self.event_table, self.weights = (
77
84
  naf.durations,
78
85
  naf.event_observed,
@@ -87,6 +94,7 @@ class BreslowFlemingHarringtonFitter(NonParametricUnivariateFitter):
87
94
  self.confidence_interval_ = np.exp(-naf.confidence_interval_)
88
95
  self.confidence_interval_survival_function_ = self.confidence_interval_
89
96
  self.confidence_interval_cumulative_density = 1 - self.confidence_interval_
97
+ self.confidence_interval_cumulative_density[:] = np.fliplr(self.confidence_interval_cumulative_density.values)
90
98
 
91
99
  # estimation methods
92
100
  self._estimation_method = "survival_function_"
@@ -153,6 +153,7 @@ class CoxTimeVaryingFitter(SemiParametricRegressionFitter, ProportionalHazardMix
153
153
  Override the default values in NR algorithm:
154
154
  step_size: 0.95,
155
155
  precision: 1e-07,
156
+ r_precision=1e-9,
156
157
  max_steps: 500,
157
158
 
158
159
  Returns
@@ -328,12 +329,13 @@ class CoxTimeVaryingFitter(SemiParametricRegressionFitter, ProportionalHazardMix
328
329
  weights,
329
330
  show_progress=False,
330
331
  step_size=0.95,
331
- precision=10e-6,
332
+ precision=1e-8,
333
+ r_precision=1e-9,
332
334
  max_steps=50,
333
335
  initial_point=None,
334
336
  ): # pylint: disable=too-many-arguments,too-many-locals,too-many-branches,too-many-statements
335
337
  """
336
- Newton Rhaphson algorithm for fitting CPH model.
338
+ Newton Raphson algorithm for fitting CPH model.
337
339
 
338
340
  Parameters
339
341
  ----------
@@ -345,8 +347,11 @@ class CoxTimeVaryingFitter(SemiParametricRegressionFitter, ProportionalHazardMix
345
347
  step_size: float
346
348
  > 0 to determine a starting step size in NR algorithm.
347
349
  precision: float
348
- the convergence halts if the norm of delta between
349
- successive positions is less than epsilon.
350
+ the algorithm stops if the norm of delta between
351
+ successive positions is less than ``precision``.
352
+ r_precision: float, optional
353
+ the algorithms stops if the relative decrease in log-likelihood
354
+ between successive iterations goes below ``r_precision``.
350
355
 
351
356
  Returns
352
357
  --------
@@ -443,17 +448,17 @@ https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergen
443
448
 
444
449
  if show_progress:
445
450
  print(
446
- "\rIteration %d: norm_delta = %.5f, step_size = %.5f, ll = %.5f, newton_decrement = %.5f, seconds_since_start = %.1f"
451
+ "\rIteration %d: norm_delta = %.2e, step_size = %.4f, log_lik = %.5f, newton_decrement = %.2e, seconds_since_start = %.1f"
447
452
  % (i, norm_delta, step_size, ll, newton_decrement, time.time() - start_time)
448
453
  )
449
454
 
450
455
  # convergence criteria
451
456
  if norm_delta < precision:
452
457
  converging, completed = False, True
453
- elif previous_ll > 0 and abs(ll - previous_ll) / (-previous_ll) < 1e-09:
454
- # this is what R uses by default
458
+ elif previous_ll > 0 and abs(ll - previous_ll) / (-previous_ll) < r_precision:
459
+ # this is what R uses by default with r_precision=1e-9
455
460
  converging, completed = False, True
456
- elif newton_decrement < 10e-8:
461
+ elif newton_decrement < precision:
457
462
  converging, completed = False, True
458
463
  elif i >= max_steps:
459
464
  # 50 iterations steps with N-R is a lot.
@@ -481,12 +486,12 @@ See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-sep
481
486
  # report to the user problems that we detect.
482
487
  if completed and norm_delta > 0.1:
483
488
  warnings.warn(
484
- "Newton-Rhapson convergence completed but norm(delta) is still high, %.3f. This may imply non-unique solutions to the maximum likelihood. Perhaps there is colinearity or complete separation in the dataset?"
489
+ "Newton-Raphson convergence completed but norm(delta) is still high, %.3f. This may imply non-unique solutions to the maximum likelihood. Perhaps there is colinearity or complete separation in the dataset?"
485
490
  % norm_delta,
486
491
  ConvergenceWarning,
487
492
  )
488
493
  elif not completed:
489
- warnings.warn("Newton-Rhapson failed to converge sufficiently in %d steps." % max_steps, ConvergenceWarning)
494
+ warnings.warn("Newton-Raphson failed to converge sufficiently in %d steps." % max_steps, ConvergenceWarning)
490
495
 
491
496
  return beta, ll, hessian
492
497
 
@@ -80,7 +80,7 @@ class CoxPHFitter(RegressionFitter, ProportionalHazardMixin):
80
80
  When ``baseline_estimation_method="spline"``, this allows customizing the points in the time axis for the baseline hazard curve.
81
81
  To use evenly-spaced points in time, the ``n_baseline_knots`` parameter can be employed instead.
82
82
 
83
- breakpoints: int
83
+ breakpoints: list, optional
84
84
  Used when ``baseline_estimation_method="piecewise"``. Set the positions of the baseline hazard breakpoints.
85
85
 
86
86
  Examples
@@ -242,7 +242,7 @@ class CoxPHFitter(RegressionFitter, ProportionalHazardMixin):
242
242
  algorithm. Default is the zero vector.
243
243
 
244
244
  fit_options: dict, optional
245
- pass kwargs for the fitting algorithm. For semi-parametric models, this is the Newton-Rhapson method (see method _newton_raphson_for_efron_model for kwargs)
245
+ pass kwargs for the fitting algorithm. For semi-parametric models, this is the Newton-Raphson method (see method _newton_raphson_for_efron_model for kwargs)
246
246
 
247
247
  Returns
248
248
  -------
@@ -1430,10 +1430,11 @@ estimate the variances. See paper "Variance estimation when using inverse probab
1430
1430
  show_progress: bool = True,
1431
1431
  step_size: float = 0.95,
1432
1432
  precision: float = 1e-07,
1433
+ r_precision: float = 1e-9,
1433
1434
  max_steps: int = 500,
1434
1435
  ): # pylint: disable=too-many-statements,too-many-branches
1435
1436
  """
1436
- Newton Rhaphson algorithm for fitting CPH model.
1437
+ Newton Raphson algorithm for fitting CPH model.
1437
1438
 
1438
1439
  Note
1439
1440
  ----
@@ -1450,13 +1451,15 @@ estimate the variances. See paper "Variance estimation when using inverse probab
1450
1451
  step_size: float, optional
1451
1452
  > 0.001 to determine a starting step size in NR algorithm.
1452
1453
  precision: float, optional
1453
- the convergence halts if the norm of delta between
1454
- successive positions is less than epsilon.
1454
+ the algorithm stops if the norm of delta between
1455
+ successive positions is less than ``precision``.
1456
+ r_precision: float, optional
1457
+ the algorithms stops if the relative decrease in log-likelihood
1458
+ between successive iterations goes below ``r_precision``.
1455
1459
  show_progress: bool, optional
1456
- since the fitter is iterative, show convergence
1457
- diagnostics.
1460
+ since the fitter is iterative, show convergence diagnostics.
1458
1461
  max_steps: int, optional
1459
- the maximum number of iterations of the Newton-Rhaphson algorithm.
1462
+ the maximum number of iterations of the Newton-Raphson algorithm.
1460
1463
 
1461
1464
  Returns
1462
1465
  -------
@@ -1564,15 +1567,15 @@ estimate the variances. See paper "Variance estimation when using inverse probab
1564
1567
 
1565
1568
  if show_progress:
1566
1569
  print(
1567
- "\rIteration %d: norm_delta = %.5f, step_size = %.4f, log_lik = %.5f, newton_decrement = %.5f, seconds_since_start = %.1f"
1570
+ "\rIteration %d: norm_delta = %.2e, step_size = %.4f, log_lik = %.5f, newton_decrement = %.2e, seconds_since_start = %.1f"
1568
1571
  % (i, norm_delta, step_size, ll_, newton_decrement, time.time() - start)
1569
1572
  )
1570
1573
 
1571
1574
  # convergence criteria
1572
1575
  if norm_delta < precision:
1573
1576
  converging, success = False, True
1574
- elif previous_ll_ != 0 and abs(ll_ - previous_ll_) / (-previous_ll_) < 1e-09:
1575
- # this is what R uses by default
1577
+ elif previous_ll_ != 0 and abs(ll_ - previous_ll_) / (-previous_ll_) < r_precision:
1578
+ # this is what R uses by default, with r_precision = 1e-9
1576
1579
  converging, success = False, True
1577
1580
  elif newton_decrement < precision:
1578
1581
  converging, success = False, True
@@ -1602,14 +1605,14 @@ See https://stats.stackexchange.com/q/11109/11867 for more.\n",
1602
1605
  if success and norm_delta > 0.1:
1603
1606
  self._check_values_post_fitting(X, T, E, weights)
1604
1607
  warnings.warn(
1605
- "Newton-Rhaphson convergence completed successfully but norm(delta) is still high, %.3f. This may imply non-unique solutions to the maximum likelihood. Perhaps there is collinearity or complete separation in the dataset?\n"
1608
+ "Newton-Raphson convergence completed successfully but norm(delta) is still high, %.3f. This may imply non-unique solutions to the maximum likelihood. Perhaps there is collinearity or complete separation in the dataset?\n"
1606
1609
  % norm_delta,
1607
1610
  exceptions.ConvergenceWarning,
1608
1611
  )
1609
1612
  elif not success:
1610
1613
  self._check_values_post_fitting(X, T, E, weights)
1611
1614
  warnings.warn(
1612
- "Newton-Rhaphson failed to converge sufficiently. {0}".format(CONVERGENCE_DOCS), exceptions.ConvergenceWarning
1615
+ "Newton-Raphson failed to converge sufficiently. {0}".format(CONVERGENCE_DOCS), exceptions.ConvergenceWarning
1613
1616
  )
1614
1617
 
1615
1618
  return beta, ll_, hessian
@@ -2855,6 +2858,7 @@ class ParametricCoxModelFitter(ParametricRegressionFitter, ProportionalHazardMix
2855
2858
  df = df.to_frame().T.infer_objects()
2856
2859
 
2857
2860
  df = df.copy()
2861
+ df.index.name = None
2858
2862
 
2859
2863
  if self.strata is not None:
2860
2864
  df = df.reset_index().set_index(self.strata)
@@ -105,6 +105,7 @@ class GeneralizedGammaFitter(KnownModelParametricUnivariateFitter):
105
105
  """
106
106
 
107
107
  _scipy_fit_method = "SLSQP"
108
+ _scipy_fit_options = {"maxiter": 10_000, "maxfev": 10_000}
108
109
  _fitted_parameter_names = ["mu_", "ln_sigma_", "lambda_"]
109
110
  _bounds = [(None, None), (None, None), (None, None)]
110
111
  _compare_to_values = np.array([0.0, 0.0, 1.0])
@@ -117,14 +118,14 @@ class GeneralizedGammaFitter(KnownModelParametricUnivariateFitter):
117
118
  elif CensoringType.is_interval_censoring(self):
118
119
  # this fails if Ts[1] == Ts[0], so we add a some fudge factors.
119
120
  log_data = log(Ts[1] - Ts[0] + 0.1)
120
- return np.array([log_data.mean(), log(log_data.std() + 0.01), 0.1])
121
+ return np.array([log_data.mean() * 1.5, log(log_data.std() + 0.1), 1.0])
121
122
 
122
123
  def _cumulative_hazard(self, params, times):
123
124
  mu_, ln_sigma_, lambda_ = params
124
125
 
125
126
  sigma_ = safe_exp(ln_sigma_)
126
127
  Z = (log(times) - mu_) / sigma_
127
- ilambda_2 = 1 / lambda_ ** 2
128
+ ilambda_2 = 1 / lambda_**2
128
129
  clipped_exp = np.clip(safe_exp(lambda_ * Z) * ilambda_2, 1e-300, 1e20)
129
130
 
130
131
  if lambda_ > 0:
@@ -137,7 +138,7 @@ class GeneralizedGammaFitter(KnownModelParametricUnivariateFitter):
137
138
 
138
139
  def _log_hazard(self, params, times):
139
140
  mu_, ln_sigma_, lambda_ = params
140
- ilambda_2 = 1 / lambda_ ** 2
141
+ ilambda_2 = 1 / lambda_**2
141
142
  Z = (log(times) - mu_) / safe_exp(ln_sigma_)
142
143
  clipped_exp = np.clip(safe_exp(lambda_ * Z) * ilambda_2, 1e-300, 1e20)
143
144
  if lambda_ > 0:
@@ -171,5 +172,5 @@ class GeneralizedGammaFitter(KnownModelParametricUnivariateFitter):
171
172
  sigma_ = exp(self.ln_sigma_)
172
173
 
173
174
  if lambda_ > 0:
174
- return exp(sigma_ * log(gammainccinv(1 / lambda_ ** 2, p) * lambda_ ** 2) / lambda_) * exp(self.mu_)
175
- return exp(sigma_ * log(gammaincinv(1 / lambda_ ** 2, p) * lambda_ ** 2) / lambda_) * exp(self.mu_)
175
+ return exp(sigma_ * log(gammainccinv(1 / lambda_**2, p) * lambda_**2) / lambda_) * exp(self.mu_)
176
+ return exp(sigma_ * log(gammaincinv(1 / lambda_**2, p) * lambda_**2) / lambda_) * exp(self.mu_)
@@ -351,9 +351,14 @@ class KaplanMeierFitter(NonParametricUnivariateFitter):
351
351
  primary_estimate_name = "survival_function_"
352
352
  secondary_estimate_name = "cumulative_density_"
353
353
 
354
- (self.durations, self.event_observed, self.timeline, self.entry, self.event_table, self.weights) = _preprocess_inputs(
355
- durations, event_observed, timeline, entry, weights
356
- )
354
+ (
355
+ self.durations,
356
+ self.event_observed,
357
+ self.timeline,
358
+ self.entry,
359
+ self.event_table,
360
+ self.weights,
361
+ ) = _preprocess_inputs(durations, event_observed, timeline, entry, weights)
357
362
 
358
363
  alpha = alpha if alpha else self.alpha
359
364
  log_estimate, cumulative_sq_ = _additive_estimate(
@@ -386,6 +391,7 @@ class KaplanMeierFitter(NonParametricUnivariateFitter):
386
391
 
387
392
  self.confidence_interval_survival_function_ = self.confidence_interval_
388
393
  self.confidence_interval_cumulative_density_ = 1 - self.confidence_interval_
394
+ self.confidence_interval_cumulative_density_[:] = np.fliplr(self.confidence_interval_cumulative_density_.values)
389
395
  self._median = median_survival_times(self.survival_function_)
390
396
  self._cumulative_sq_ = cumulative_sq_
391
397
 
@@ -4,6 +4,7 @@ from textwrap import dedent, fill
4
4
  from autograd import numpy as anp
5
5
  import numpy as np
6
6
  from pandas import DataFrame, Series
7
+ from lifelines.exceptions import ProportionalHazardAssumptionError
7
8
  from lifelines.statistics import proportional_hazard_test, TimeTransformers
8
9
  from lifelines.utils import format_p_value
9
10
  from lifelines.utils.lowess import lowess
@@ -28,6 +29,7 @@ class ProportionalHazardMixin:
28
29
  p_value_threshold: float = 0.01,
29
30
  plot_n_bootstraps: int = 15,
30
31
  columns: Optional[List[str]] = None,
32
+ raise_on_fail: bool = False,
31
33
  ) -> None:
32
34
  """
33
35
  Use this function to test the proportional hazards assumption. See usage example at
@@ -51,6 +53,8 @@ class ProportionalHazardMixin:
51
53
  the function significantly.
52
54
  columns: list, optional
53
55
  specify a subset of columns to test.
56
+ raise_on_fail: bool, optional
57
+ throw a ``ProportionalHazardAssumptionError`` if the test fails. Default: False.
54
58
 
55
59
  Returns
56
60
  --------
@@ -107,7 +111,7 @@ class ProportionalHazardMixin:
107
111
 
108
112
  for variable in self.params_.index.intersection(columns or self.params_.index):
109
113
  minumum_observed_p_value = test_results.summary.loc[variable, "p"].min()
110
-
114
+
111
115
  # plot is done (regardless of test result) whenever `show_plots = True`
112
116
  if show_plots:
113
117
  axes.append([])
@@ -224,9 +228,8 @@ class ProportionalHazardMixin:
224
228
  ),
225
229
  end="\n\n",
226
230
  )
227
- #################
231
+ #################
228
232
 
229
-
230
233
  if advice and counter > 0:
231
234
  print(
232
235
  dedent(
@@ -243,6 +246,8 @@ class ProportionalHazardMixin:
243
246
 
244
247
  if counter == 0:
245
248
  print("Proportional hazard assumption looks okay.")
249
+ elif raise_on_fail:
250
+ raise ProportionalHazardAssumptionError()
246
251
  return axes
247
252
 
248
253
  @property
@@ -183,7 +183,7 @@ class NelsonAalenFitter(UnivariateFitter):
183
183
  )
184
184
 
185
185
  def _variance_f_discrete(self, population, deaths):
186
- return (population - deaths) * deaths / population ** 3
186
+ return (1 - deaths / population) * (deaths / population) * (1.0 / population)
187
187
 
188
188
  def _additive_f_smooth(self, population, deaths):
189
189
  cum_ = np.cumsum(1.0 / np.arange(1, np.max(population) + 1))
@@ -239,7 +239,7 @@ class NelsonAalenFitter(UnivariateFitter):
239
239
  C = var_hazard_.values != 0.0 # only consider the points with jumps
240
240
  std_hazard_ = np.sqrt(
241
241
  1.0
242
- / (bandwidth ** 2)
242
+ / (bandwidth**2)
243
243
  * np.dot(epanechnikov_kernel(timeline[:, None], timeline[C][None, :], bandwidth) ** 2, var_hazard_.values[C])
244
244
  )
245
245
  values = {