PyPI - pytmle - Versions diffs - 0.1.0__tar.gz - Mend

pytmle 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

pytmle-0.1.0/PKG-INFO +24 -0
pytmle-0.1.0/README.md +5 -0
pytmle-0.1.0/pyproject.toml +35 -0
pytmle-0.1.0/pytmle/__init__.py +2 -0
pytmle-0.1.0/pytmle/bootstrap.py +187 -0
pytmle-0.1.0/pytmle/estimates.py +254 -0
pytmle-0.1.0/pytmle/evalues_benchmark.py +382 -0
pytmle-0.1.0/pytmle/g_computation.py +66 -0
pytmle-0.1.0/pytmle/get_influence_curve.py +257 -0
pytmle-0.1.0/pytmle/get_initial_estimates.py +542 -0
pytmle-0.1.0/pytmle/initial_estimates_default_models.py +117 -0
pytmle-0.1.0/pytmle/plotting.py +218 -0
pytmle-0.1.0/pytmle/predict_ate.py +318 -0
pytmle-0.1.0/pytmle/pycox_wrapper.py +307 -0
pytmle-0.1.0/pytmle/pytmle.py +705 -0
pytmle-0.1.0/pytmle/tmle_update.py +371 -0
pytmle-0.1.0/pytmle.egg-info/PKG-INFO +24 -0
pytmle-0.1.0/pytmle.egg-info/SOURCES.txt +25 -0
pytmle-0.1.0/pytmle.egg-info/dependency_links.txt +1 -0
pytmle-0.1.0/pytmle.egg-info/requires.txt +13 -0
pytmle-0.1.0/pytmle.egg-info/top_level.txt +1 -0
pytmle-0.1.0/setup.cfg +4 -0
pytmle-0.1.0/tests/test_main_class.py +52 -0
pytmle-0.1.0/tests/test_mock_initial_estimates.py +25 -0
pytmle-0.1.0/tests/test_predict_methods.py +130 -0
pytmle-0.1.0/tests/test_pycox_wrapper.py +102 -0
pytmle-0.1.0/tests/test_tmle_update.py +48 -0

pytmle-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,24 @@
+Metadata-Version: 2.4
+Name: pytmle
+Version: 0.1.0
+Summary: A Flexible Python Implementation of Targeted Estimation for Survival and Competing Risks Analysis
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+Requires-Dist: matplotlib>=3.5.0
+Requires-Dist: numpy>=1.22.3
+Requires-Dist: pandas>=1.3.4
+Requires-Dist: pycox
+Requires-Dist: scikit-learn>=1.2.2
+Requires-Dist: scikit-survival>=0.21.0
+Requires-Dist: seaborn>=0.11.2
+Requires-Dist: tqdm>=4.67.1
+Provides-Extra: dev
+Requires-Dist: ipykernel>=6.29.5; extra == "dev"
+Requires-Dist: pytest>=8.3.5; extra == "dev"
+Requires-Dist: torch>=2.6.0; extra == "dev"
+# PyTMLE
+A flexible Python implementation of the Targeted Maximum Likelihood Estimator (TMLE) for the cause-specific absolute risk of time-to-event outcomes measured in continuous time.
+Additional information and documentation will be added in the next minor update.

pytmle-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,5 @@
+# PyTMLE
+A flexible Python implementation of the Targeted Maximum Likelihood Estimator (TMLE) for the cause-specific absolute risk of time-to-event outcomes measured in continuous time.
+Additional information and documentation will be added in the next minor update.

pytmle-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,35 @@
+[project]
+name = "pytmle"
+version = "0.1.0"
+description = "A Flexible Python Implementation of Targeted Estimation for Survival and Competing Risks Analysis"
+readme = "README.md"
+requires-python = ">=3.9"
+dependencies = [
+    "matplotlib>=3.5.0",
+    "numpy>=1.22.3",
+    "pandas>=1.3.4",
+    "pycox",
+    "scikit-learn>=1.2.2",
+    "scikit-survival>=0.21.0",
+    "seaborn>=0.11.2",
+    "tqdm>=4.67.1",
+]
+[project.optional-dependencies]
+dev = [
+    "ipykernel>=6.29.5",
+    "pytest>=8.3.5",
+    "torch>=2.6.0",
+]
+[tool.uv.sources]
+pycox = { git = "https://github.com/pooya-mohammadi/pycox" }
+torch = { index = "pytorch-cpu" }
+[tool.setuptools]
+packages = ["pytmle"]
+[[tool.uv.index]]
+name = "pytorch-cpu"
+url = "https://download.pytorch.org/whl/cpu"
+explicit = true

pytmle-0.1.0/pytmle/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ from .pytmle import PyTMLE
2	+ from .estimates import InitialEstimates

pytmle-0.1.0/pytmle/bootstrap.py ADDED Viewed

@@ -0,0 +1,187 @@
+from typing import Dict, List, Optional
+import warnings
+import numpy as np
+import pandas as pd
+from tqdm import tqdm
+from concurrent.futures import ProcessPoolExecutor, as_completed
+from pytmle.tmle_update import tmle_update
+from pytmle.predict_ate import get_counterfactual_risks, ate_ratio, ate_diff
+from pytmle.estimates import InitialEstimates
+def standard_bootstrap(event_indicator):
+    return np.random.choice(len(event_indicator),
+                                        size=len(event_indicator),
+                                        replace=True)
+def stratified_bootstrap(event_indicator):
+    """
+    Generate bootstrap samples stratified by event indicator.
+    """
+    sample_indices_all = []
+    for ev in np.unique(event_indicator):
+        indices = np.where(event_indicator == ev)[0]
+        sample_indices = np.random.choice(indices, size=len(indices), replace=True)
+        sample_indices_all.append(sample_indices)
+    return np.concatenate(sample_indices_all)
+def single_boot(initial_estimates,
+                event_times,
+                event_indicator,
+                target_times,
+                target_events,
+                key_1,
+                key_0,
+                stratify_by_event,
+                **kwargs):
+    """
+    Perform a single bootstrap sample and call tmle_update.
+    As pointed out by Coyle & van der Laan (2018; https://link.springer.com/chapter/10.1007/978-3-319-65304-4_28)
+    and Tran et al. (2023; https://www.degruyter.com/document/doi/10.1515/jci-2021-0067/html?srsltid=AfmBOopT0k3YNof6ON7IWkEv49nuaK_bqgd_bCL8GSyYvmUNBDoGavDG),
+    only the second stage of TMLE should be bootstrapped, not the first stage
+    """
+    # Create a bootstrap sample of indices
+    if stratify_by_event:
+        sample_indices = stratified_bootstrap(event_indicator)
+    else:
+        sample_indices = standard_bootstrap(event_indicator)
+    # Resample initial estimates, event times and event indicator;
+    boot_initial_estimates = {}
+    for k in initial_estimates.keys():
+        boot_initial_estimates[k] = initial_estimates[k][sample_indices]
+    boot_event_times = event_times[sample_indices]
+    boot_event_indicator = event_indicator[sample_indices]
+    # Call tmle_update
+    updated_estimates, _, converged, _ = tmle_update(
+        initial_estimates=boot_initial_estimates,
+        event_times=boot_event_times,
+        event_indicator=boot_event_indicator,
+        target_times=target_times,
+        target_events=target_events,
+        verbose=0,
+        **kwargs,
+    )
+    if not converged:
+        # if tmle_update did not converge, return None
+        return
+    cf_risks = get_counterfactual_risks(updated_estimates,
+                                        key_1=key_1,
+                                        key_0=key_0)[["Event", "Time", "Group", "Pt Est"]]
+    cf_risks["type"] = "risks"
+    ate_ratios = ate_ratio(updated_estimates,
+                           key_1=key_1,
+                           key_0=key_0)[["Event", "Time", "Pt Est"]]
+    ate_ratios["type"] = "rr"
+    ate_ratios["Group"] = -1
+    ate_diffs = ate_diff(updated_estimates,
+                         key_1=key_1,
+                         key_0=key_0)[["Event", "Time", "Pt Est"]]
+    ate_diffs["type"] = "rd"
+    ate_diffs["Group"] = -1
+    result_df = pd.concat([cf_risks, ate_ratios, ate_diffs])
+    return result_df
+def bootstrap_tmle_loop(
+    initial_estimates: Dict[int, InitialEstimates],
+    event_times: np.ndarray,
+    event_indicator: np.ndarray,
+    target_times: List[float],
+    target_events: List[int],
+    n_bootstrap: int = 100,
+    n_jobs: int = -1,
+    alpha: float = 0.05,
+    key_1: int = 1,
+    key_0: int = 0,
+    stratify_by_event: bool = False,
+    verbose: int = 2,
+    **kwargs,
+) -> Optional[pd.DataFrame]:
+    """
+    Perform parallel bootstrapping and call tmle_update on each sample.
+    Parameters
+    ----------
+    initial_estimates: Dict[int, InitialEstimates]
+        Initial estimates for each group.
+    event_times: np.ndarray
+        Array of event times.
+    event_indicator: np.ndarray
+        Array of event indicators.
+    target_times: List[float]
+        List of target times.
+    target_events: List[int]
+        List of target events.
+    n_bootstrap: int
+        Number of bootstrap samples.
+    n_jobs: int
+        Number of parallel jobs for bootstrapping.
+    alpha: float
+        Significance level for confidence intervals.
+    key_1: int
+        Key for group 1.
+    key_0: int
+        Key for group 0.
+    stratify_by_event: bool
+        Stratify bootstrapping by event indicator.
+    verbose: int
+        Verbosity level.
+    kwargs
+        Additional arguments to pass to tmle_update.
+    Returns
+    -------
+    Optional[pd.DataFrame]
+        DataFrame with bootstrapped confidence intervals.
+    """
+    with ProcessPoolExecutor(max_workers=n_jobs if n_jobs > 0 else None) as executor:
+        futures = [
+            executor.submit(
+                single_boot,
+                initial_estimates,
+                event_times,
+                event_indicator,
+                target_times,
+                target_events,
+                key_1,
+                key_0,
+                stratify_by_event,
+                **kwargs,
+            )
+            for _ in range(n_bootstrap)
+        ]
+        results = []
+        if verbose >= 2:
+            futures_iter = tqdm(
+                as_completed(futures), total=n_bootstrap, desc="Bootstrapping"
+            )
+        else:
+            futures_iter = as_completed(futures)
+        for f in futures_iter:
+            result = f.result()
+            if result is not None:
+                results.append(result)
+    if len(results) == 0:
+        if verbose >= 1:
+            warnings.warn(
+                "Not a single bootstrap samples converged. Bootstrapped CIs will not be available.",
+                RuntimeWarning,
+            )
+        return None
+    if verbose >= 2:
+        print(
+            f"TMLE converged for {len(results)} out of {n_bootstrap} bootstrap samples."
+        )
+    results_df = pd.concat(results)
+    summary_df = (
+        results_df.groupby(["type", "Event", "Time", "Group"])["Pt Est"]
+        .agg(
+            mean_bootstrap="mean",
+            CI_lower=lambda x: x.quantile(alpha / 2),
+            CI_upper=lambda x: x.quantile(1 - alpha / 2)
+        )
+    ).reset_index()
+    return summary_df

pytmle-0.1.0/pytmle/estimates.py ADDED Viewed

@@ -0,0 +1,254 @@
+import numpy as np
+import pandas as pd
+from dataclasses import dataclass, field
+from typing import Optional, List, Union
+from pytmle.g_computation import get_g_comp
+@dataclass
+class InitialEstimates:
+    # these fields must be filled on instatiation
+    times: np.ndarray
+    g_star_obs: np.ndarray
+    # these fields are optional and can be filled later
+    propensity_scores: Optional[np.ndarray] = field(default=None)
+    hazards: Optional[np.ndarray] = field(default=None)
+    event_free_survival_function: Optional[np.ndarray] = field(default=None)
+    censoring_survival_function: Optional[np.ndarray] = field(default=None)
+    _length: Optional[int] = field(default=None, init=False)
+    _run_checks: bool = field(default=True, init=False)
+    def __setattr__(self, name, value):
+        if value is not None and self._run_checks:
+            if name in ["propensity_scores",
+                        "g_star_obs"]:
+                self._check_compatibility(value, check_width=False)
+            elif name in ["hazards",
+                        "event_free_survival_function",
+                        "censoring_survival_function"]:
+                self._check_compatibility(value, check_width=True)
+        super().__setattr__(name, value)
+    def _check_compatibility(self, new_element, check_width):
+        # check that all given estimates have the same length (first dimension size)
+        if self._length is None:
+            self._length = len(new_element)
+        elif self._length != len(new_element):
+            raise ValueError(
+                f"All initial estimates must have the same first dimension, got elements with sizes {self._length} and {len(new_element)}."
+            )
+        if check_width and ((len(new_element.shape) < 2) or (new_element.shape[1] != len(self.times))):
+            raise ValueError(
+                f"The second dimension of all initial estimates must be in line with the given times, got {len(self.times)} times and element of shape {new_element.shape}."
+            )
+    def __getitem__(self, key: Union[np.ndarray, List[int]]) -> "InitialEstimates":
+        """
+        Enable subsetting of an InitialEstimates object (needed for bootstrapping)
+        Args:
+            key (Union[np.ndarray, List[int]]): The indices of the subset.
+        Returns:
+            InitialEstimates: A new InitialEstimates object containing the subset.
+        """
+        return InitialEstimates(
+            times=self.times,
+            g_star_obs=self.g_star_obs[key],
+            propensity_scores=(
+                self.propensity_scores[key]
+                if self.propensity_scores is not None
+                else None
+            ),
+            hazards=self.hazards[key] if self.hazards is not None else None,
+            event_free_survival_function=(
+                self.event_free_survival_function[key]
+                if self.event_free_survival_function is not None
+                else None
+            ),
+            censoring_survival_function=(
+                self.censoring_survival_function[key]
+                if self.censoring_survival_function is not None
+                else None
+            ),
+        )
+    def __len__(self):
+        return self._length
+@dataclass
+class UpdatedEstimates(InitialEstimates):
+    # all have to be given
+    propensity_scores: np.ndarray # type: ignore
+    hazards: np.ndarray # type: ignore
+    event_free_survival_function: np.ndarray # type: ignore
+    censoring_survival_function: np.ndarray # type: ignore
+    # is set on initialization
+    nuisance_weight: Optional[np.ndarray] = field(default=None, init=False)
+    min_nuisance: Optional[float] = field(default=None)
+    target_events: Optional[List[int]] = field(default=None)
+    target_times: Optional[List[float]] = field(default=None)
+    g_comp_est: Optional[pd.DataFrame] = field(default=None)
+    ic: Optional[pd.DataFrame] = field(default=None)
+    summ_eic: Optional[pd.DataFrame] = field(default=None)
+    def __post_init__(self):
+        if self.min_nuisance is None:
+            self.min_nuisance = (
+                5
+                / (len(self.propensity_scores) ** 0.5)
+                / (np.log(len(self.propensity_scores)))
+            )
+        if self.target_times is None:
+            # default if not target_times are given: only target the last time point
+            self.target_times = [self.times[-1]]
+        else:
+            self._update_for_target_times()
+        self._set_nuisance_weight()
+    def _set_nuisance_weight(self):
+        lagged_censoring_survival_function = np.column_stack(
+            [
+                np.ones((self.censoring_survival_function.shape[0], 1)),
+                self.censoring_survival_function[:, :-1],
+            ],
+        )
+        nuisance_denominator = (
+            self.propensity_scores[:, np.newaxis] * lagged_censoring_survival_function
+        )
+        # TODO: Add positivity check as in https://github.com/imbroglio-dc/concrete/blob/main/R/getInitialEstimate.R#L64?
+        self.nuisance_weight = 1 / np.maximum(nuisance_denominator, self.min_nuisance)  # type: ignore
+        self._check_compatibility(self.nuisance_weight, check_width=True)
+    @classmethod
+    def from_initial_estimates(
+        cls,
+        initial_estimates: InitialEstimates,
+        target_events: Optional[List[int]] = None,
+        target_times: Optional[List[float]] = None,
+        min_nuisance: Optional[float] = None,
+    ) -> "UpdatedEstimates":
+        assert (initial_estimates.propensity_scores is not None and
+                initial_estimates.hazards is not None and
+                initial_estimates.event_free_survival_function is not None and
+                initial_estimates.censoring_survival_function is not None), "All initial estimates have to be provided prior to an instatiation of UpdatedEstimates."
+        return cls(
+            propensity_scores=initial_estimates.propensity_scores,
+            hazards=initial_estimates.hazards,
+            event_free_survival_function=initial_estimates.event_free_survival_function,
+            censoring_survival_function=initial_estimates.censoring_survival_function,
+            min_nuisance=min_nuisance,
+            target_events=target_events,
+            target_times=target_times,
+            g_star_obs=initial_estimates.g_star_obs,
+            times=initial_estimates.times,
+        )
+    def _update_for_target_times(self):
+        """
+        Updates the time-related attributes of the object to include target times (plus 0).
+        This method performs the following steps:
+        1. Combines and sorts the existing times and target times.
+        2. Finds the indices where the target times should be inserted.
+        3. Updates the `hazards`, `event_free_survival_function`, and `censoring_survival_function`
+           attributes to account for the new target times by inserting appropriate values.
+        4. Trims the `hazards`, `event_free_survival_function`, and `censoring_survival_function`
+           attributes to only include times up to the maximum target time.
+        5. Updates the `times` attribute to include the target times up to the maximum target time.
+        Attributes:
+            times (np.ndarray): Array of existing times.
+            target_times (np.ndarray): Array of target times to be included.
+            hazards (np.ndarray): Array of hazard values.
+            event_free_survival_function (np.ndarray): Array of event-free survival function values.
+            censoring_survival_function (np.ndarray): Array of censoring survival function values.
+        """
+        # Combine and sort the times
+        all_times = np.sort(np.unique(np.concatenate((self.times, [0] + self.target_times))))  # type: ignore
+        if len(all_times) > len(self.times):
+            # Update hazards, event_free_survival_function, and censoring_survival_function
+            if 0 not in self.times:
+                self.times = np.insert(self.times, 0, 0)
+                self.hazards = np.insert(self.hazards, 0, 0, axis=1)
+                self.event_free_survival_function = np.insert(self.event_free_survival_function, 0, 1, axis=1)
+                self.censoring_survival_function = np.insert(self.censoring_survival_function, 0, 1, axis=1)
+            # Find the indices where the new times should be inserted
+            insert_times = [t for t in self.target_times if t not in self.times]
+            insert_indices = np.searchsorted(all_times, insert_times)
+            self.times = all_times
+            hazards_new = self.hazards
+            event_free_survival_function_new = self.event_free_survival_function
+            censoring_survival_function_new = self.censoring_survival_function
+            for idx in insert_indices:
+                hazards_new = np.insert(hazards_new, idx, 0, axis=1)
+                event_free_survival_function_new = np.insert(
+                    event_free_survival_function_new,
+                    idx,
+                    event_free_survival_function_new[:, idx - 1],
+                    axis=1,
+                )
+                censoring_survival_function_new = np.insert(
+                    censoring_survival_function_new,
+                    idx,
+                    censoring_survival_function_new[:, idx - 1],
+                    axis=1,
+                )
+            self.hazards = hazards_new
+            self.event_free_survival_function = event_free_survival_function_new
+            self.censoring_survival_function = censoring_survival_function_new
+        # Find the index of the maximum target time
+        max_target_time = max(self.target_times)  # type: ignore
+        max_index = np.searchsorted(all_times, max_target_time)
+        # Keep only times up to the maximum index
+        self.times = all_times[: max_index + 1]
+        self.hazards = self.hazards[:, : max_index + 1, :]
+        self.event_free_survival_function = self.event_free_survival_function[
+            :, : max_index + 1
+        ]
+        self.censoring_survival_function = self.censoring_survival_function[
+            :, : max_index + 1
+        ]
+    def predict_mean_risks(self, g_comp: bool = False) -> pd.DataFrame:
+        """
+        Predict the mean risks for the target events and times.
+        Args:
+            g_comp (bool): Flag to return the G-computation estimate instead of the TMLE estimate.
+        Returns:
+            pd.DataFrame: DataFrame with columns 'Event', 'Time', 'Pt Est', and 'SE' containing the mean counterfactual risks.
+        """
+        if g_comp:
+            if self.g_comp_est is None:
+                raise ValueError(
+                    "g_comp_est is not available."
+                )
+            # return g_comp_estimate from BEFORE the TMLE update loop (standard error not available)
+            pred_risk = self.g_comp_est
+            pred_risk["SE"] = np.nan
+        else:
+            # return g_comp_estimate from AFTER the TMLE update loop
+            if self.summ_eic is None or self.ic is None:
+                raise ValueError(
+                    "ic or summ_eic is not available."
+                )
+            pred_risk =  get_g_comp(
+                eval_times=self.times,
+                hazards=self.hazards,
+                total_surv=self.event_free_survival_function,
+                target_time=self.target_times,  # type: ignore
+                target_events=self.target_events,  # type: ignore
+            )
+            pred_risk = pred_risk.merge(self.summ_eic, on=["Event", "Time"])
+            pred_risk["SE"] = pred_risk["seEIC"] / len(self)**0.5
+            pred_risk = pred_risk[["Event", "Time", "Risk", "SE"]]
+        pred_risk.rename(columns={"Risk": "Pt Est"}, inplace=True)
+        return pred_risk