PyPI - diff-diff - Versions diffs - 3.0.1__cp314-cp314-win_amd64.whl - Mend

diff-diff 3.0.1__cp314-cp314-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

diff_diff/__init__.py +382 -0
diff_diff/_backend.py +134 -0
diff_diff/_rust_backend.cp314-win_amd64.pyd +0 -0
diff_diff/bacon.py +1140 -0
diff_diff/bootstrap_utils.py +730 -0
diff_diff/continuous_did.py +1626 -0
diff_diff/continuous_did_bspline.py +190 -0
diff_diff/continuous_did_results.py +374 -0
diff_diff/datasets.py +815 -0
diff_diff/diagnostics.py +882 -0
diff_diff/efficient_did.py +1770 -0
diff_diff/efficient_did_bootstrap.py +359 -0
diff_diff/efficient_did_covariates.py +899 -0
diff_diff/efficient_did_results.py +368 -0
diff_diff/efficient_did_weights.py +617 -0
diff_diff/estimators.py +1501 -0
diff_diff/honest_did.py +2585 -0
diff_diff/imputation.py +2458 -0
diff_diff/imputation_bootstrap.py +418 -0
diff_diff/imputation_results.py +448 -0
diff_diff/linalg.py +2538 -0
diff_diff/power.py +2588 -0
diff_diff/practitioner.py +869 -0
diff_diff/prep.py +1738 -0
diff_diff/prep_dgp.py +1718 -0
diff_diff/pretrends.py +1105 -0
diff_diff/results.py +918 -0
diff_diff/stacked_did.py +1049 -0
diff_diff/stacked_did_results.py +339 -0
diff_diff/staggered.py +3895 -0
diff_diff/staggered_aggregation.py +864 -0
diff_diff/staggered_bootstrap.py +752 -0
diff_diff/staggered_results.py +416 -0
diff_diff/staggered_triple_diff.py +1545 -0
diff_diff/staggered_triple_diff_results.py +416 -0
diff_diff/sun_abraham.py +1685 -0
diff_diff/survey.py +1981 -0
diff_diff/synthetic_did.py +1136 -0
diff_diff/triple_diff.py +2047 -0
diff_diff/trop.py +952 -0
diff_diff/trop_global.py +1270 -0
diff_diff/trop_local.py +1307 -0
diff_diff/trop_results.py +356 -0
diff_diff/twfe.py +542 -0
diff_diff/two_stage.py +1952 -0
diff_diff/two_stage_bootstrap.py +520 -0
diff_diff/two_stage_results.py +400 -0
diff_diff/utils.py +1902 -0
diff_diff/visualization/__init__.py +61 -0
diff_diff/visualization/_common.py +328 -0
diff_diff/visualization/_continuous.py +274 -0
diff_diff/visualization/_diagnostic.py +817 -0
diff_diff/visualization/_event_study.py +1086 -0
diff_diff/visualization/_power.py +661 -0
diff_diff/visualization/_staggered.py +833 -0
diff_diff/visualization/_synthetic.py +197 -0
diff_diff/wooldridge.py +1285 -0
diff_diff/wooldridge_results.py +349 -0
diff_diff-3.0.1.dist-info/METADATA +2997 -0
diff_diff-3.0.1.dist-info/RECORD +62 -0
diff_diff-3.0.1.dist-info/WHEEL +4 -0
diff_diff-3.0.1.dist-info/sboms/diff_diff_rust.cyclonedx.json +5843 -0

diff_diff/imputation_bootstrap.py ADDED Viewed

@@ -0,0 +1,418 @@
+"""
+Bootstrap inference methods for the Imputation DiD estimator.
+This module contains ImputationDiDBootstrapMixin, which provides multiplier
+bootstrap inference. Extracted from imputation.py for module size management.
+"""
+import warnings
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple
+import numpy as np
+import pandas as pd
+from diff_diff.bootstrap_utils import (
+    compute_effect_bootstrap_stats as _compute_effect_bootstrap_stats,
+)
+from diff_diff.bootstrap_utils import (
+    generate_bootstrap_weights_batch as _generate_bootstrap_weights_batch,
+)
+from diff_diff.bootstrap_utils import (
+    generate_survey_multiplier_weights_batch as _generate_survey_multiplier_weights_batch,
+)
+from diff_diff.imputation_results import ImputationBootstrapResults
+__all__ = [
+    "ImputationDiDBootstrapMixin",
+]
+def _compute_target_weights(
+    tau_hat: np.ndarray,
+    target_mask: np.ndarray,
+) -> "tuple[np.ndarray, int]":
+    """
+    Equal weights for finite tau_hat observations within target_mask.
+    Used by both aggregation and bootstrap paths to avoid weight logic
+    duplication.
+    Parameters
+    ----------
+    tau_hat : np.ndarray
+        Per-observation treatment effects (may contain NaN).
+    target_mask : np.ndarray
+        Boolean mask selecting the target subset within tau_hat.
+    Returns
+    -------
+    weights : np.ndarray
+        Weight array (same length as tau_hat). 1/n_valid for finite
+        observations in target_mask, 0 elsewhere.
+    n_valid : int
+        Number of finite observations in the target subset.
+    """
+    finite_target = np.isfinite(tau_hat) & target_mask
+    n_valid = int(finite_target.sum())
+    weights = np.zeros(len(tau_hat))
+    if n_valid > 0:
+        weights[np.where(finite_target)[0]] = 1.0 / n_valid
+    return weights, n_valid
+class ImputationDiDBootstrapMixin:
+    """Mixin providing bootstrap inference methods for ImputationDiD."""
+    # Type hints for attributes accessed from the main class
+    n_bootstrap: int
+    bootstrap_weights: str
+    alpha: float
+    seed: Optional[int]
+    anticipation: int
+    horizon_max: Optional[int]
+    if TYPE_CHECKING:
+        def _compute_cluster_psi_sums(
+            self,
+            df: pd.DataFrame,
+            outcome: str,
+            unit: str,
+            time: str,
+            first_treat: str,
+            covariates: Optional[List[str]],
+            omega_0_mask: pd.Series,
+            omega_1_mask: pd.Series,
+            unit_fe: Dict[Any, float],
+            time_fe: Dict[Any, float],
+            grand_mean: float,
+            delta_hat: Optional[np.ndarray],
+            weights: np.ndarray,
+            cluster_var: str,
+            kept_cov_mask: Optional[np.ndarray] = None,
+            survey_weights_0: Optional[np.ndarray] = None,
+        ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: ...
+        @staticmethod
+        def _build_cohort_rel_times(
+            df: pd.DataFrame,
+            first_treat: str,
+        ) -> Dict[Any, Set[int]]: ...
+        @staticmethod
+        def _compute_balanced_cohort_mask(
+            df_treated: pd.DataFrame,
+            first_treat: str,
+            all_horizons: List[int],
+            balance_e: int,
+            cohort_rel_times: Dict[Any, Set[int]],
+        ) -> np.ndarray: ...
+    def _precompute_bootstrap_psi(
+        self,
+        df: pd.DataFrame,
+        outcome: str,
+        unit: str,
+        time: str,
+        first_treat: str,
+        covariates: Optional[List[str]],
+        omega_0_mask: pd.Series,
+        omega_1_mask: pd.Series,
+        unit_fe: Dict[Any, float],
+        time_fe: Dict[Any, float],
+        grand_mean: float,
+        delta_hat: Optional[np.ndarray],
+        cluster_var: str,
+        kept_cov_mask: Optional[np.ndarray],
+        overall_weights: np.ndarray,
+        event_study_effects: Optional[Dict[int, Dict[str, Any]]],
+        group_effects: Optional[Dict[Any, Dict[str, Any]]],
+        treatment_groups: List[Any],
+        tau_hat: np.ndarray,
+        balance_e: Optional[int],
+        survey_weights_0: Optional[np.ndarray] = None,
+        survey_weights_1: Optional[np.ndarray] = None,
+    ) -> Dict[str, Any]:
+        """
+        Pre-compute cluster-level influence function sums for each bootstrap target.
+        For each aggregation target (overall, per-horizon, per-group), computes
+        psi_i = sum_t v_it * epsilon_tilde_it for each cluster. The multiplier
+        bootstrap then perturbs these psi sums with multiplier weights
+        (rademacher/mammen/webb; configurable via ``bootstrap_weights``).
+        Computational cost scales with the number of aggregation targets, since
+        each target requires its own v_untreated computation (weight-dependent).
+        """
+        result: Dict[str, Any] = {}
+        common = dict(
+            df=df,
+            outcome=outcome,
+            unit=unit,
+            time=time,
+            first_treat=first_treat,
+            covariates=covariates,
+            omega_0_mask=omega_0_mask,
+            omega_1_mask=omega_1_mask,
+            unit_fe=unit_fe,
+            time_fe=time_fe,
+            grand_mean=grand_mean,
+            delta_hat=delta_hat,
+            cluster_var=cluster_var,
+            kept_cov_mask=kept_cov_mask,
+            survey_weights_0=survey_weights_0,
+        )
+        # Overall ATT
+        overall_psi, cluster_ids, _ = self._compute_cluster_psi_sums(
+            **common, weights=overall_weights
+        )
+        result["overall"] = (overall_psi, cluster_ids)
+        # Event study: per-horizon weights
+        if event_study_effects:
+            result["event_study"] = {}
+            df_1 = df.loc[omega_1_mask]
+            rel_times = df_1["_rel_time"].values
+            all_horizons = sorted(set(int(h) for h in rel_times if np.isfinite(h)))
+            if self.horizon_max is not None:
+                all_horizons = [h for h in all_horizons if abs(h) <= self.horizon_max]
+            # Balanced cohort mask (same logic as _aggregate_event_study)
+            balanced_mask = None
+            if balance_e is not None:
+                cohort_rel_times = self._build_cohort_rel_times(df, first_treat)
+                balanced_mask = self._compute_balanced_cohort_mask(
+                    df_1, first_treat, all_horizons, balance_e, cohort_rel_times
+                )
+            ref_period = -1 - self.anticipation
+            for h in event_study_effects:
+                if event_study_effects[h].get("n_obs", 0) == 0:
+                    continue
+                if h == ref_period:
+                    continue
+                if not np.isfinite(event_study_effects[h].get("effect", np.nan)):
+                    continue
+                # Skip pre-period horizons — their SEs come from Test 1
+                # lead regression, not bootstrap
+                if h < -self.anticipation:
+                    continue
+                h_mask = rel_times == h
+                if balanced_mask is not None:
+                    h_mask = h_mask & balanced_mask
+                # When survey weights are provided, build weights proportional
+                # to treated-observation survey weights (matching the analytical
+                # path in _aggregate_event_study).  Otherwise use equal weights.
+                if survey_weights_1 is not None:
+                    finite_target = np.isfinite(tau_hat) & h_mask
+                    n_valid_h = int(finite_target.sum())
+                    if n_valid_h == 0:
+                        continue
+                    treated_sw = survey_weights_1
+                    sw_h = treated_sw[h_mask]
+                    finite_in_h = np.isfinite(tau_hat[h_mask])
+                    sw_finite = sw_h[finite_in_h]
+                    weights_h = np.zeros(len(tau_hat))
+                    if sw_finite.sum() > 0:
+                        h_indices = np.where(h_mask)[0]
+                        finite_indices = h_indices[finite_in_h]
+                        weights_h[finite_indices] = sw_finite / sw_finite.sum()
+                else:
+                    weights_h, n_valid_h = _compute_target_weights(tau_hat, h_mask)
+                    if n_valid_h == 0:
+                        continue
+                psi_h, _, _ = self._compute_cluster_psi_sums(**common, weights=weights_h)
+                result["event_study"][h] = psi_h
+        # Group effects: per-group weights
+        if group_effects:
+            result["group"] = {}
+            df_1 = df.loc[omega_1_mask]
+            cohorts = df_1[first_treat].values
+            for g in group_effects:
+                if group_effects[g].get("n_obs", 0) == 0:
+                    continue
+                if not np.isfinite(group_effects[g].get("effect", np.nan)):
+                    continue
+                g_mask = cohorts == g
+                # When survey weights are provided, build weights proportional
+                # to treated-observation survey weights (matching the analytical
+                # path in _aggregate_group).  Otherwise use equal weights.
+                if survey_weights_1 is not None:
+                    finite_target = np.isfinite(tau_hat) & g_mask
+                    n_valid_g = int(finite_target.sum())
+                    if n_valid_g == 0:
+                        continue
+                    treated_sw = survey_weights_1
+                    sw_g = treated_sw[g_mask]
+                    finite_in_g = np.isfinite(tau_hat[g_mask])
+                    sw_finite = sw_g[finite_in_g]
+                    weights_g = np.zeros(len(tau_hat))
+                    if sw_finite.sum() > 0:
+                        g_indices = np.where(g_mask)[0]
+                        finite_indices = g_indices[finite_in_g]
+                        weights_g[finite_indices] = sw_finite / sw_finite.sum()
+                else:
+                    weights_g, n_valid_g = _compute_target_weights(tau_hat, g_mask)
+                    if n_valid_g == 0:
+                        continue
+                psi_g, _, _ = self._compute_cluster_psi_sums(**common, weights=weights_g)
+                result["group"][g] = psi_g
+        return result
+    def _run_bootstrap(
+        self,
+        original_att: float,
+        original_event_study: Optional[Dict[int, Dict[str, Any]]],
+        original_group: Optional[Dict[Any, Dict[str, Any]]],
+        psi_data: Dict[str, Any],
+        resolved_survey: Optional[Any] = None,
+    ) -> ImputationBootstrapResults:
+        """
+        Run multiplier bootstrap on pre-computed influence function sums.
+        Uses T_b = sum_i w_b_i * psi_i where w_b_i are multiplier weights
+        (rademacher/mammen/webb; configurable via ``bootstrap_weights``)
+        and psi_i are cluster-level influence function sums from Theorem 3.
+        SE = std(T_b, ddof=1).
+        When ``resolved_survey`` carries PSU/strata/FPC structure, weights are
+        generated via ``generate_survey_multiplier_weights_batch`` so the
+        bootstrap variance respects the survey design (stratification and FPC
+        scaling).
+        """
+        if self.n_bootstrap < 50:
+            warnings.warn(
+                f"n_bootstrap={self.n_bootstrap} is low. Consider n_bootstrap >= 199 "
+                "for reliable inference.",
+                UserWarning,
+                stacklevel=3,
+            )
+        rng = np.random.default_rng(self.seed)
+        overall_psi, cluster_ids = psi_data["overall"]
+        n_clusters = len(cluster_ids)
+        # Determine whether to use survey-aware bootstrap weights
+        _use_survey_bootstrap = resolved_survey is not None and (
+            resolved_survey.strata is not None
+            or resolved_survey.psu is not None
+            or resolved_survey.fpc is not None
+        )
+        # Generate ALL weights upfront: shape (n_bootstrap, n_clusters)
+        if _use_survey_bootstrap:
+            psu_weights, psu_ids = _generate_survey_multiplier_weights_batch(
+                self.n_bootstrap, resolved_survey, self.bootstrap_weights, rng
+            )
+            # Reindex PSU weights to match cluster_ids ordering.
+            # cluster_ids are unique PSU values from _compute_cluster_psi_sums;
+            # psu_ids are unique PSU values from the survey weight generator.
+            # Build a map from psu_id -> column index in psu_weights.
+            psu_id_to_col = {int(p): c for c, p in enumerate(psu_ids)}
+            cluster_to_psu_col = np.array([psu_id_to_col[int(cid)] for cid in cluster_ids])
+            all_weights = psu_weights[:, cluster_to_psu_col]
+        else:
+            all_weights = _generate_bootstrap_weights_batch(
+                self.n_bootstrap, n_clusters, self.bootstrap_weights, rng
+            )
+        # Overall ATT bootstrap draws
+        boot_overall = np.dot(all_weights, overall_psi)  # (n_bootstrap,)
+        # Event study: loop over horizons
+        boot_event_study: Optional[Dict[int, np.ndarray]] = None
+        if original_event_study and "event_study" in psi_data:
+            boot_event_study = {}
+            for h, psi_h in psi_data["event_study"].items():
+                boot_event_study[h] = np.dot(all_weights, psi_h)
+        # Group effects: loop over groups
+        boot_group: Optional[Dict[Any, np.ndarray]] = None
+        if original_group and "group" in psi_data:
+            boot_group = {}
+            for g, psi_g in psi_data["group"].items():
+                boot_group[g] = np.dot(all_weights, psi_g)
+        # --- Inference (percentile bootstrap, matching CS/SA convention) ---
+        # Shift perturbation-centered draws to effect-centered draws.
+        # The multiplier bootstrap produces T_b = sum w_b_i * psi_i centered at 0.
+        # CS adds the original effect back (L411 of staggered_bootstrap.py).
+        # We do the same here so percentile CIs and empirical p-values work correctly.
+        boot_overall_shifted = boot_overall + original_att
+        overall_se, overall_ci, overall_p = _compute_effect_bootstrap_stats(
+            original_att,
+            boot_overall_shifted,
+            alpha=self.alpha,
+            context="ImputationDiD overall ATT",
+        )
+        event_study_ses = None
+        event_study_cis = None
+        event_study_p_values = None
+        if boot_event_study and original_event_study:
+            event_study_ses = {}
+            event_study_cis = {}
+            event_study_p_values = {}
+            for h in boot_event_study:
+                orig_eff = original_event_study[h]["effect"]
+                shifted_h = boot_event_study[h] + orig_eff
+                se_h, ci_h, p_h = _compute_effect_bootstrap_stats(
+                    orig_eff,
+                    shifted_h,
+                    alpha=self.alpha,
+                    context=f"ImputationDiD event study (h={h})",
+                )
+                event_study_ses[h] = se_h
+                event_study_cis[h] = ci_h
+                event_study_p_values[h] = p_h
+        group_ses = None
+        group_cis = None
+        group_p_values = None
+        if boot_group and original_group:
+            group_ses = {}
+            group_cis = {}
+            group_p_values = {}
+            for g in boot_group:
+                orig_eff = original_group[g]["effect"]
+                shifted_g = boot_group[g] + orig_eff
+                se_g, ci_g, p_g = _compute_effect_bootstrap_stats(
+                    orig_eff,
+                    shifted_g,
+                    alpha=self.alpha,
+                    context=f"ImputationDiD group effect (g={g})",
+                )
+                group_ses[g] = se_g
+                group_cis[g] = ci_g
+                group_p_values[g] = p_g
+        return ImputationBootstrapResults(
+            n_bootstrap=self.n_bootstrap,
+            weight_type=self.bootstrap_weights,
+            alpha=self.alpha,
+            overall_att_se=overall_se,
+            overall_att_ci=overall_ci,
+            overall_att_p_value=overall_p,
+            event_study_ses=event_study_ses,
+            event_study_cis=event_study_cis,
+            event_study_p_values=event_study_p_values,
+            group_ses=group_ses,
+            group_cis=group_cis,
+            group_p_values=group_p_values,
+            bootstrap_distribution=boot_overall_shifted,
+        )