PyPI - diff-diff - Versions diffs - 3.0.1__cp314-cp314-win_amd64.whl - Mend

diff-diff 3.0.1__cp314-cp314-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

diff_diff/__init__.py +382 -0
diff_diff/_backend.py +134 -0
diff_diff/_rust_backend.cp314-win_amd64.pyd +0 -0
diff_diff/bacon.py +1140 -0
diff_diff/bootstrap_utils.py +730 -0
diff_diff/continuous_did.py +1626 -0
diff_diff/continuous_did_bspline.py +190 -0
diff_diff/continuous_did_results.py +374 -0
diff_diff/datasets.py +815 -0
diff_diff/diagnostics.py +882 -0
diff_diff/efficient_did.py +1770 -0
diff_diff/efficient_did_bootstrap.py +359 -0
diff_diff/efficient_did_covariates.py +899 -0
diff_diff/efficient_did_results.py +368 -0
diff_diff/efficient_did_weights.py +617 -0
diff_diff/estimators.py +1501 -0
diff_diff/honest_did.py +2585 -0
diff_diff/imputation.py +2458 -0
diff_diff/imputation_bootstrap.py +418 -0
diff_diff/imputation_results.py +448 -0
diff_diff/linalg.py +2538 -0
diff_diff/power.py +2588 -0
diff_diff/practitioner.py +869 -0
diff_diff/prep.py +1738 -0
diff_diff/prep_dgp.py +1718 -0
diff_diff/pretrends.py +1105 -0
diff_diff/results.py +918 -0
diff_diff/stacked_did.py +1049 -0
diff_diff/stacked_did_results.py +339 -0
diff_diff/staggered.py +3895 -0
diff_diff/staggered_aggregation.py +864 -0
diff_diff/staggered_bootstrap.py +752 -0
diff_diff/staggered_results.py +416 -0
diff_diff/staggered_triple_diff.py +1545 -0
diff_diff/staggered_triple_diff_results.py +416 -0
diff_diff/sun_abraham.py +1685 -0
diff_diff/survey.py +1981 -0
diff_diff/synthetic_did.py +1136 -0
diff_diff/triple_diff.py +2047 -0
diff_diff/trop.py +952 -0
diff_diff/trop_global.py +1270 -0
diff_diff/trop_local.py +1307 -0
diff_diff/trop_results.py +356 -0
diff_diff/twfe.py +542 -0
diff_diff/two_stage.py +1952 -0
diff_diff/two_stage_bootstrap.py +520 -0
diff_diff/two_stage_results.py +400 -0
diff_diff/utils.py +1902 -0
diff_diff/visualization/__init__.py +61 -0
diff_diff/visualization/_common.py +328 -0
diff_diff/visualization/_continuous.py +274 -0
diff_diff/visualization/_diagnostic.py +817 -0
diff_diff/visualization/_event_study.py +1086 -0
diff_diff/visualization/_power.py +661 -0
diff_diff/visualization/_staggered.py +833 -0
diff_diff/visualization/_synthetic.py +197 -0
diff_diff/wooldridge.py +1285 -0
diff_diff/wooldridge_results.py +349 -0
diff_diff-3.0.1.dist-info/METADATA +2997 -0
diff_diff-3.0.1.dist-info/RECORD +62 -0
diff_diff-3.0.1.dist-info/WHEEL +4 -0
diff_diff-3.0.1.dist-info/sboms/diff_diff_rust.cyclonedx.json +5843 -0

diff_diff/stacked_did.py ADDED Viewed

@@ -0,0 +1,1049 @@
+"""
+Wing, Freedman & Hollingsworth (2024) Stacked Difference-in-Differences Estimator.
+Implements the stacked DiD estimator from Wing, Freedman & Hollingsworth (2024),
+NBER Working Paper 32054. The key contribution: naive stacked DiD regressions are
+biased because they implicitly weight treatment and control group trends differently
+across sub-experiments. The authors derive corrective Q-weights that make a weighted
+stacked regression identify the "trimmed aggregate ATT" — a well-defined convex
+combination of group-time ATTs with stable composition across event time.
+The implementation follows the R reference code at
+https://github.com/hollina/stacked-did-weights.
+References
+----------
+Wing, C., Freedman, S. M., & Hollingsworth, A. (2024). Stacked
+    Difference-in-Differences. NBER Working Paper 32054.
+"""
+import copy
+import warnings
+from typing import Any, Dict, List, Optional, Tuple
+import numpy as np
+import pandas as pd
+from diff_diff.linalg import solve_ols
+from diff_diff.stacked_did_results import StackedDiDResults  # noqa: F401 (re-export)
+from diff_diff.utils import safe_inference
+__all__ = [
+    "StackedDiD",
+    "StackedDiDResults",
+    "stacked_did",
+]
+class StackedDiD:
+    """
+    Stacked Difference-in-Differences estimator.
+    Implements Wing, Freedman & Hollingsworth (2024). Builds a stacked
+    dataset of sub-experiments (one per adoption cohort), applies
+    corrective Q-weights to address implicit weighting bias in naive
+    stacked regressions, and runs a weighted event-study regression.
+    Parameters
+    ----------
+    kappa_pre : int, default=1
+        Number of pre-treatment event-time periods in the event window.
+        The event window spans [-kappa_pre, ..., kappa_post].
+    kappa_post : int, default=1
+        Number of post-treatment event-time periods.
+    weighting : str, default="aggregate"
+        Target estimand weighting scheme per Table 1 of the paper:
+        - "aggregate": Equal weight per adoption event (trimmed aggregate ATT)
+        - "population": Weight by population size of treated cohort
+        - "sample_share": Weight by sample share of each sub-experiment
+    clean_control : str, default="not_yet_treated"
+        How to define clean controls per Appendix A of the paper:
+        - "not_yet_treated": Units with A_s > a + kappa_post
+        - "strict": Units with A_s > a + kappa_post + kappa_pre
+        - "never_treated": Only units with A_s = infinity
+    cluster : str, default="unit"
+        Clustering level for standard errors:
+        - "unit": Cluster on original unit identifier
+        - "unit_subexp": Cluster on (unit, sub_experiment) pairs
+    alpha : float, default=0.05
+        Significance level for confidence intervals.
+    anticipation : int, default=0
+        Number of anticipation periods. When anticipation > 0:
+        - Reference period shifts from e=-1 to e=-1-anticipation
+        - Post-treatment includes anticipation periods (e >= -anticipation)
+        - Event window expands by anticipation pre-periods
+        Consistent with ImputationDiD, TwoStageDiD, SunAbraham.
+    rank_deficient_action : str, default="warn"
+        Action when design matrix is rank-deficient:
+        - "warn": Issue warning and drop linearly dependent columns
+        - "error": Raise ValueError
+        - "silent": Drop columns silently
+    Attributes
+    ----------
+    results_ : StackedDiDResults
+        Estimation results after calling fit().
+    is_fitted_ : bool
+        Whether the model has been fitted.
+    Examples
+    --------
+    Basic usage:
+    >>> from diff_diff import StackedDiD, generate_staggered_data
+    >>> data = generate_staggered_data(n_units=200, seed=42)
+    >>> est = StackedDiD(kappa_pre=2, kappa_post=2)
+    >>> results = est.fit(data, outcome='outcome', unit='unit',
+    ...                   time='period', first_treat='first_treat')
+    >>> results.print_summary()
+    With event study:
+    >>> results = est.fit(data, outcome='outcome', unit='unit',
+    ...                   time='period', first_treat='first_treat',
+    ...                   aggregate='event_study')
+    >>> from diff_diff import plot_event_study
+    >>> plot_event_study(results)
+    Notes
+    -----
+    The stacked estimator addresses TWFE bias by:
+    1. Creating one sub-experiment per adoption cohort with clean controls
+    2. Applying Q-weights to reweight the stacked regression
+    3. Running a single event-study WLS regression on the weighted stack
+    References
+    ----------
+    Wing, C., Freedman, S. M., & Hollingsworth, A. (2024). Stacked
+        Difference-in-Differences. NBER Working Paper 32054.
+    """
+    def __init__(
+        self,
+        kappa_pre: int = 1,
+        kappa_post: int = 1,
+        weighting: str = "aggregate",
+        clean_control: str = "not_yet_treated",
+        cluster: str = "unit",
+        alpha: float = 0.05,
+        anticipation: int = 0,
+        rank_deficient_action: str = "warn",
+    ):
+        if weighting not in ("aggregate", "population", "sample_share"):
+            raise ValueError(
+                f"weighting must be 'aggregate', 'population', or 'sample_share', "
+                f"got '{weighting}'"
+            )
+        if clean_control not in ("not_yet_treated", "strict", "never_treated"):
+            raise ValueError(
+                f"clean_control must be 'not_yet_treated', 'strict', or "
+                f"'never_treated', got '{clean_control}'"
+            )
+        if cluster not in ("unit", "unit_subexp"):
+            raise ValueError(f"cluster must be 'unit' or 'unit_subexp', got '{cluster}'")
+        if rank_deficient_action not in ("warn", "error", "silent"):
+            raise ValueError(
+                f"rank_deficient_action must be 'warn', 'error', or 'silent', "
+                f"got '{rank_deficient_action}'"
+            )
+        self.kappa_pre = kappa_pre
+        self.kappa_post = kappa_post
+        self.weighting = weighting
+        self.clean_control = clean_control
+        self.cluster = cluster
+        self.alpha = alpha
+        self.anticipation = anticipation
+        self.rank_deficient_action = rank_deficient_action
+        self.is_fitted_ = False
+        self.results_: Optional[StackedDiDResults] = None
+    def fit(
+        self,
+        data: pd.DataFrame,
+        outcome: str,
+        unit: str,
+        time: str,
+        first_treat: str,
+        aggregate: Optional[str] = None,
+        population: Optional[str] = None,
+        survey_design=None,
+    ) -> StackedDiDResults:
+        """
+        Fit the stacked DiD estimator.
+        Parameters
+        ----------
+        data : pd.DataFrame
+            Panel data with unit and time identifiers.
+        outcome : str
+            Name of outcome variable column.
+        unit : str
+            Name of unit identifier column.
+        time : str
+            Name of time period column.
+        first_treat : str
+            Name of column indicating when unit was first treated.
+            Use 0 or np.inf for never-treated units.
+        aggregate : str, optional
+            Aggregation mode: None/"simple" (overall ATT only) or
+            "event_study". Group aggregation is not supported because
+            the pooled stacked regression cannot produce cohort-specific
+            effects. Use CallawaySantAnna or ImputationDiD for
+            cohort-level estimates.
+        population : str, optional
+            Column name for population weights. Required only when
+            weighting="population".
+        survey_design : SurveyDesign, optional
+            Survey design specification for design-based inference. When
+            provided, uses Taylor Series Linearization for variance
+            estimation and applies sampling weights to the regression.
+        Returns
+        -------
+        StackedDiDResults
+            Object containing all estimation results.
+        Raises
+        ------
+        ValueError
+            If required columns are missing or data validation fails.
+        """
+        # ---- Validate inputs ----
+        if aggregate in ("group", "all"):
+            raise ValueError(
+                f"aggregate='{aggregate}' is not supported by StackedDiD. "
+                "The pooled stacked regression cannot produce cohort-specific "
+                "effects. Use CallawaySantAnna or ImputationDiD for "
+                "cohort-level estimates."
+            )
+        if aggregate not in (None, "simple", "event_study"):
+            raise ValueError(
+                f"aggregate must be None, 'simple', or 'event_study', " f"got '{aggregate}'"
+            )
+        required_cols = [outcome, unit, time, first_treat]
+        if population is not None:
+            required_cols.append(population)
+        missing = [c for c in required_cols if c not in data.columns]
+        if missing:
+            raise ValueError(f"Missing columns: {missing}")
+        if self.weighting == "population" and population is None:
+            raise ValueError("population column must be specified when weighting='population'")
+        # ---- Resolve survey design ----
+        from diff_diff.survey import (
+            SurveyDesign,
+            _resolve_survey_for_fit,
+        )
+        resolved_survey, survey_weights, survey_weight_type, survey_metadata = (
+            _resolve_survey_for_fit(survey_design, data, "analytical")
+        )
+        _uses_replicate_sd = (
+            resolved_survey is not None and resolved_survey.uses_replicate_variance
+        )
+        # Reject fweight and aweight — Q-weight composition is ratio-valued
+        # and breaks both frequency-weight (integer) and analytic-weight
+        # (inverse-variance) semantics after multiplicative composition
+        if (
+            survey_design is not None
+            and hasattr(survey_design, "weight_type")
+            and survey_design.weight_type in ("fweight", "aweight")
+        ):
+            raise ValueError(
+                f"StackedDiD does not support weight_type='{survey_design.weight_type}' "
+                "because Q-weight composition changes the weight semantics. "
+                "Use weight_type='pweight' (default) instead."
+            )
+        # Collect survey design column names for propagation through sub-experiments
+        survey_cols: List[str] = []
+        if survey_design is not None and isinstance(survey_design, SurveyDesign):
+            for attr in ("weights", "strata", "psu", "fpc"):
+                col_name = getattr(survey_design, attr, None)
+                if col_name is not None:
+                    survey_cols.append(col_name)
+            # Propagate replicate weight columns through stacked dataset
+            if survey_design.replicate_weights is not None:
+                survey_cols.extend(survey_design.replicate_weights)
+        df = data.copy()
+        df[time] = pd.to_numeric(df[time])
+        df[first_treat] = pd.to_numeric(df[first_treat])
+        # ---- Data setup ----
+        # Handle never-treated encoding: both 0 and inf -> inf
+        df[first_treat] = df[first_treat].replace(0, np.inf)
+        # Build unit_info: one row per unit
+        unit_info = (
+            df.groupby(unit)
+            .agg({first_treat: "first"})
+            .reset_index()
+            .rename(columns={first_treat: "_first_treat"})
+        )
+        T_min = int(df[time].min())
+        T_max = int(df[time].max())
+        time_periods = sorted(df[time].unique())
+        # Extract unique adoption events (finite first_treat values)
+        omega_A = sorted([a for a in unit_info["_first_treat"].unique() if np.isfinite(a)])
+        if len(omega_A) == 0:
+            raise ValueError(
+                "No treated units found. Check 'first_treat' column "
+                "(use 0 or np.inf for never-treated units)."
+            )
+        # ---- Trim adoption events (IC1 + IC2) ----
+        omega_kappa, trimmed = self._trim_adoption_events(omega_A, T_min, T_max, unit_info)
+        # ---- Build stacked dataset ----
+        sub_experiments = []
+        skipped_events = []
+        for a in omega_kappa:
+            sub_exp = self._build_sub_experiment(
+                df,
+                unit_info,
+                a,
+                unit,
+                time,
+                first_treat,
+                outcome,
+                extra_cols=survey_cols,
+            )
+            if sub_exp is not None and len(sub_exp) > 0:
+                sub_experiments.append(sub_exp)
+            else:
+                skipped_events.append(a)
+        if skipped_events:
+            warnings.warn(
+                f"Sub-experiments for events {skipped_events} were empty " f"after filtering.",
+                UserWarning,
+                stacklevel=2,
+            )
+        if len(sub_experiments) == 0:
+            raise ValueError(
+                "All sub-experiments are empty after filtering. "
+                "Check your data or reduce kappa values."
+            )
+        stacked_df = pd.concat(sub_experiments, ignore_index=True)
+        # ---- Compute Q-weights ----
+        stacked_df = self._compute_q_weights(stacked_df, unit, population)
+        # ---- Count units ----
+        treated_units = stacked_df.loc[stacked_df["_D_sa"] == 1, unit].unique()
+        control_units = stacked_df.loc[stacked_df["_D_sa"] == 0, unit].unique()
+        n_treated_units = len(treated_units)
+        n_control_units = len(control_units)
+        # ---- Build design matrix and run WLS ----
+        # Always run event study regression (Equation 3 in paper)
+        # Reference period: e = -1 - anticipation (shifts when anticipation > 0)
+        ref_period = -1 - self.anticipation
+        event_times = sorted(
+            [
+                h
+                for h in range(-self.kappa_pre - self.anticipation, self.kappa_post + 1)
+                if h != ref_period
+            ]
+        )
+        n = len(stacked_df)
+        n_event_dummies = len(event_times)
+        # Track column indices for VCV extraction
+        # [0] intercept, [1] D_sa, [2..K+1] event-time dummies,
+        # [K+2..2K+1] D_sa * event-time interactions
+        interaction_indices: Dict[int, int] = {}
+        # Build design matrix
+        X = np.zeros((n, 2 + 2 * n_event_dummies))
+        X[:, 0] = 1.0  # intercept
+        X[:, 1] = stacked_df["_D_sa"].values  # treatment indicator
+        et_vals = stacked_df["_event_time"].values
+        d_vals = stacked_df["_D_sa"].values
+        for j, h in enumerate(event_times):
+            col_lambda = 2 + j  # event-time dummy
+            col_delta = 2 + n_event_dummies + j  # interaction
+            mask = et_vals == h
+            X[mask, col_lambda] = 1.0
+            X[mask, col_delta] = d_vals[mask]
+            interaction_indices[h] = col_delta
+        # WLS via sqrt(w) transformation
+        Q_weights = stacked_df["_Q_weight"].values
+        n_stacked = len(stacked_df)
+        # Compose Q-weights with survey weights if survey design is present
+        if resolved_survey is not None and survey_weights is not None:
+            # Survey weights were resolved on the original data; the stacked
+            # dataset carries the survey weight column through _build_sub_experiment.
+            # Re-extract from the stacked data so lengths match.
+            survey_weights_stacked = (
+                stacked_df[survey_design.weights].values.astype(np.float64)
+                if survey_design.weights is not None
+                else np.ones(n_stacked, dtype=np.float64)
+            )
+            composed_weights = Q_weights * survey_weights_stacked
+            # Normalize composed weights to sum = n_stacked
+            composed_weights = composed_weights * (n_stacked / np.sum(composed_weights))
+        else:
+            composed_weights = Q_weights
+        sqrt_w = np.sqrt(composed_weights)
+        Y = stacked_df[outcome].values
+        Y_t = Y * sqrt_w
+        X_t = X * sqrt_w[:, np.newaxis]
+        # Cluster IDs
+        if self.cluster == "unit":
+            cluster_ids = stacked_df[unit].values
+        else:  # unit_subexp
+            cluster_ids = (
+                stacked_df[unit].astype(str) + "_" + stacked_df["_sub_exp"].astype(str)
+            ).values
+        # Run OLS on transformed data (= WLS)
+        coef, residuals, vcov = solve_ols(
+            X_t,
+            Y_t,
+            cluster_ids=cluster_ids,
+            return_vcov=True,
+            rank_deficient_action=self.rank_deficient_action,
+        )
+        assert vcov is not None
+        # ---- Survey VCV override ----
+        _n_valid_rep_sd = None
+        resolved_stacked = None
+        if resolved_survey is not None and _uses_replicate_sd:
+            # Replicate variance: re-run WLS per replicate with composed weights
+            from diff_diff.survey import compute_replicate_refit_variance, compute_survey_metadata
+            resolved_stacked = survey_design.resolve(stacked_df)
+            # Refit closure: compose Q-weights with replicate survey weights
+            def _refit_stacked(w_r):
+                composed_r = Q_weights * w_r
+                w_sum = np.sum(composed_r)
+                if w_sum > 0:
+                    composed_r = composed_r * (n_stacked / w_sum)
+                sqrt_w_r = np.sqrt(composed_r)
+                coef_r, _, _ = solve_ols(
+                    X * sqrt_w_r[:, np.newaxis], Y * sqrt_w_r,
+                    cluster_ids=cluster_ids,
+                    rank_deficient_action="silent", return_vcov=False,
+                )
+                return coef_r
+            # Full-sample cohort effect vector
+            vcov, _n_valid_rep_sd = compute_replicate_refit_variance(
+                _refit_stacked, coef, resolved_stacked
+            )
+            # Compute survey metadata
+            raw_w_stacked = (
+                stacked_df[survey_design.weights].values.astype(np.float64)
+                if survey_design.weights is not None
+                else np.ones(n_stacked, dtype=np.float64)
+            )
+            survey_metadata = compute_survey_metadata(resolved_stacked, raw_w_stacked)
+        elif resolved_survey is not None:
+            from diff_diff.survey import (
+                _inject_cluster_as_psu,
+                _resolve_effective_cluster,
+                compute_survey_metadata,
+                compute_survey_vcov,
+            )
+            # Re-resolve survey design on the stacked data so that strata/PSU
+            # arrays have the correct length for TSL variance estimation.
+            resolved_stacked = survey_design.resolve(stacked_df)
+            # Create a copy with composed weights (normalized to sum=n_stacked)
+            resolved_composed = copy.copy(resolved_stacked)
+            resolved_composed.weights = composed_weights
+            # Original-scale residuals for TSL variance
+            resid_orig = Y - X @ coef
+            # Inject cluster as PSU when survey design has no explicit PSU
+            resolved_composed = _inject_cluster_as_psu(resolved_composed, cluster_ids)
+            # Resolve effective cluster (PSU overrides user-specified cluster)
+            _resolve_effective_cluster(resolved_composed, cluster_ids, self.cluster)
+            # Compute TSL variance
+            vcov = compute_survey_vcov(X, resid_orig, resolved_composed)
+            # Recompute survey metadata on the stacked resolved design
+            raw_w_stacked = (
+                stacked_df[survey_design.weights].values.astype(np.float64)
+                if survey_design.weights is not None
+                else np.ones(n_stacked, dtype=np.float64)
+            )
+            survey_metadata = compute_survey_metadata(resolved_composed, raw_w_stacked)
+        # ---- Extract event study effects ----
+        event_study_effects: Optional[Dict[int, Dict[str, Any]]] = None
+        if aggregate == "event_study":
+            event_study_effects = {}
+            # Reference period (e = -1 - anticipation)
+            event_study_effects[ref_period] = {
+                "effect": 0.0,
+                "se": 0.0,
+                "t_stat": np.nan,
+                "p_value": np.nan,
+                "conf_int": (np.nan, np.nan),
+                "n_obs": 0,
+            }
+            for h in event_times:
+                idx = interaction_indices[h]
+                effect = float(coef[idx])
+                se = float(np.sqrt(max(vcov[idx, idx], 0.0)))
+                _survey_df = (
+                    max(survey_metadata.df_survey, 1)
+                    if survey_metadata is not None and survey_metadata.df_survey is not None
+                    else (0 if _uses_replicate_sd else None)
+                )
+                # Override df when replicate replicates were dropped
+                if _n_valid_rep_sd is not None and resolved_stacked is not None:
+                    if _n_valid_rep_sd < resolved_stacked.n_replicates:
+                        _survey_df = _n_valid_rep_sd - 1 if _n_valid_rep_sd > 1 else 0
+                        if survey_metadata is not None:
+                            survey_metadata.df_survey = _survey_df if _survey_df > 0 else None
+                t_stat, p_value, conf_int = safe_inference(
+                    effect, se, alpha=self.alpha, df=_survey_df
+                )
+                n_obs_h = int(np.sum((et_vals == h) & (d_vals == 1)))
+                event_study_effects[h] = {
+                    "effect": effect,
+                    "se": se,
+                    "t_stat": t_stat,
+                    "p_value": p_value,
+                    "conf_int": conf_int,
+                    "n_obs": n_obs_h,
+                }
+        # ---- Compute overall ATT ----
+        # Average of post-treatment delta_h coefficients with delta-method SE
+        # Post-treatment includes anticipation periods (h >= -anticipation)
+        post_event_times = [
+            h for h in event_times if h >= -self.anticipation and h in interaction_indices
+        ]
+        post_indices = [interaction_indices[h] for h in post_event_times]
+        K = len(post_indices)
+        if K > 0:
+            overall_att = sum(float(coef[i]) for i in post_indices) / K
+            # Delta method: gradient = 1/K for each post-period coefficient
+            sub_vcv = vcov[np.ix_(post_indices, post_indices)]
+            ones = np.ones(K)
+            overall_se = float(np.sqrt(max(ones @ sub_vcv @ ones, 0.0))) / K
+        else:
+            overall_att = np.nan
+            overall_se = np.nan
+        _survey_df_overall = (
+            max(survey_metadata.df_survey, 1)
+            if survey_metadata is not None and survey_metadata.df_survey is not None
+            else (0 if _uses_replicate_sd else None)
+        )
+        if _n_valid_rep_sd is not None and resolved_stacked is not None:
+            if _n_valid_rep_sd < resolved_stacked.n_replicates:
+                _survey_df_overall = _n_valid_rep_sd - 1 if _n_valid_rep_sd > 1 else 0
+                if survey_metadata is not None:
+                    survey_metadata.df_survey = _survey_df_overall if _survey_df_overall > 0 else None
+        overall_t, overall_p, overall_ci = safe_inference(
+            overall_att, overall_se, alpha=self.alpha, df=_survey_df_overall
+        )
+        # ---- Construct results ----
+        self.results_ = StackedDiDResults(
+            overall_att=overall_att,
+            overall_se=overall_se,
+            overall_t_stat=overall_t,
+            overall_p_value=overall_p,
+            overall_conf_int=overall_ci,
+            event_study_effects=event_study_effects,
+            group_effects=None,
+            stacked_data=stacked_df,
+            groups=list(omega_kappa),
+            trimmed_groups=list(trimmed),
+            time_periods=time_periods,
+            n_obs=len(data),
+            n_stacked_obs=n,
+            n_sub_experiments=len(sub_experiments),
+            n_treated_units=n_treated_units,
+            n_control_units=n_control_units,
+            kappa_pre=self.kappa_pre,
+            kappa_post=self.kappa_post,
+            weighting=self.weighting,
+            clean_control=self.clean_control,
+            alpha=self.alpha,
+            survey_metadata=survey_metadata,
+        )
+        self.is_fitted_ = True
+        return self.results_
+    # =========================================================================
+    # Trimming (IC1 + IC2)
+    # =========================================================================
+    def _trim_adoption_events(
+        self,
+        adoption_events: List[Any],
+        T_min: int,
+        T_max: int,
+        unit_info: pd.DataFrame,
+    ) -> Tuple[List[Any], List[Any]]:
+        """
+        Trim adoption events based on IC1 (window) and IC2 (controls).
+        IC1: a - kappa_pre >= T_min AND a + kappa_post <= T_max
+        (matches R reference: focalAdoptionTime - kappa_pre >= minTime
+        AND focalAdoptionTime + kappa_post <= maxTime)
+        With anticipation: a - kappa_pre - anticipation >= T_min
+        IC2: Clean controls exist for this adoption event.
+        Parameters
+        ----------
+        adoption_events : list
+            Unique finite adoption event times.
+        T_min, T_max : int
+            Min and max time periods in the data.
+        unit_info : pd.DataFrame
+            One row per unit with _first_treat column.
+        Returns
+        -------
+        omega_kappa : list
+            Included adoption events.
+        trimmed : list
+            Excluded adoption events.
+        """
+        omega_kappa = []
+        trimmed = []
+        for a in adoption_events:
+            a_int = int(a)
+            # IC1: Event window fits in data
+            # a - kappa_pre >= T_min  AND  a + kappa_post <= T_max
+            # (matches R reference: focalAdoptionTime - kappa_pre >= minTime)
+            # With anticipation: shift window start earlier
+            lower_ok = (a_int - self.kappa_pre - self.anticipation) >= T_min
+            upper_ok = (a_int + self.kappa_post) <= T_max
+            ic1 = lower_ok and upper_ok
+            # IC2: Clean controls exist
+            ic2 = self._check_clean_controls_exist(a_int, unit_info)
+            if ic1 and ic2:
+                omega_kappa.append(a)
+            else:
+                trimmed.append(a)
+        if trimmed:
+            warnings.warn(
+                f"Trimmed {len(trimmed)} adoption event(s) that don't satisfy "
+                f"inclusion criteria: {trimmed}. "
+                f"IC1 requires event window [{-self.kappa_pre}, {self.kappa_post}] "
+                f"to fit within data range [{T_min}, {T_max}]. "
+                f"IC2 requires clean controls to exist.",
+                UserWarning,
+                stacklevel=3,
+            )
+        if len(omega_kappa) == 0:
+            raise ValueError(
+                f"All {len(adoption_events)} adoption events were trimmed. "
+                f"No valid sub-experiments can be constructed. "
+                f"Consider reducing kappa_pre (currently {self.kappa_pre}) "
+                f"or kappa_post (currently {self.kappa_post}), or check that "
+                f"clean control units exist."
+            )
+        return omega_kappa, trimmed
+    def _check_clean_controls_exist(self, a: int, unit_info: pd.DataFrame) -> bool:
+        """Check IC2: whether clean control units exist for adoption event a."""
+        ft = unit_info["_first_treat"].values
+        if self.clean_control == "not_yet_treated":
+            return bool(np.any(ft > a + self.kappa_post))
+        elif self.clean_control == "strict":
+            return bool(np.any(ft > a + self.kappa_post + self.kappa_pre))
+        else:  # never_treated
+            return bool(np.any(np.isinf(ft)))
+    # =========================================================================
+    # Sub-experiment construction
+    # =========================================================================
+    def _build_sub_experiment(
+        self,
+        df: pd.DataFrame,
+        unit_info: pd.DataFrame,
+        a: Any,
+        unit: str,
+        time: str,
+        first_treat: str,
+        outcome: str,
+        extra_cols: Optional[List[str]] = None,
+    ) -> Optional[pd.DataFrame]:
+        """
+        Build a single sub-experiment for adoption event a.
+        Parameters
+        ----------
+        df : pd.DataFrame
+            Full panel data.
+        unit_info : pd.DataFrame
+            One row per unit with _first_treat.
+        a : int/float
+            Adoption event time.
+        unit, time, first_treat, outcome : str
+            Column names.
+        extra_cols : list of str, optional
+            Additional columns to propagate from the source data into the
+            sub-experiment (e.g., survey design columns: weights, strata,
+            psu, fpc).
+        Returns
+        -------
+        pd.DataFrame or None
+            Sub-experiment data with _sub_exp, _event_time, _D_sa columns.
+        """
+        a_int = int(a)
+        ft = unit_info["_first_treat"].values
+        unit_ids = unit_info[unit].values
+        # Treated units: A_s = a
+        treated_mask = ft == a
+        treated_units = set(unit_ids[treated_mask])
+        # Clean control units
+        if self.clean_control == "not_yet_treated":
+            control_mask = ft > a_int + self.kappa_post
+        elif self.clean_control == "strict":
+            control_mask = ft > a_int + self.kappa_post + self.kappa_pre
+        else:  # never_treated
+            control_mask = np.isinf(ft)
+        control_units = set(unit_ids[control_mask])
+        if len(treated_units) == 0 or len(control_units) == 0:
+            return None
+        # Time window: [a - kappa_pre - anticipation, a + kappa_post]
+        # Reference period a-1 (event time e=-1) is included when kappa_pre >= 1
+        # Matches R reference: (focalAdoptionTime - kappa_pre):(focalAdoptionTime + kappa_post)
+        t_start = a_int - self.kappa_pre - self.anticipation
+        t_end = a_int + self.kappa_post
+        all_units = treated_units | control_units
+        # Filter data
+        mask = df[unit].isin(all_units) & (df[time] >= t_start) & (df[time] <= t_end)
+        sub_df = df.loc[mask].copy()
+        if len(sub_df) == 0:
+            return None
+        # Add sub-experiment columns
+        sub_df["_sub_exp"] = a
+        sub_df["_event_time"] = sub_df[time] - a_int
+        sub_df["_D_sa"] = sub_df[unit].isin(treated_units).astype(int)
+        return sub_df
+    # =========================================================================
+    # Q-weight computation
+    # =========================================================================
+    def _compute_q_weights(
+        self,
+        stacked_df: pd.DataFrame,
+        unit_col: str,
+        population_col: Optional[str],
+    ) -> pd.DataFrame:
+        """
+        Compute Q-weights per Table 1 of Wing et al. (2024).
+        Treated observations always get Q = 1.
+        Control observations get Q based on the weighting scheme.
+        For aggregate weighting, Q-weights are computed using observation
+        counts per (event_time, sub_exp), matching the R reference
+        ``compute_weights()``. For balanced panels this is equivalent to
+        unit counts per sub-experiment. For unbalanced panels the weights
+        adjust for varying observation density per event time.
+        Population and sample_share weighting use unit counts per
+        sub-experiment, following the paper's notation (N_a^D, N_a^C).
+        Parameters
+        ----------
+        stacked_df : pd.DataFrame
+            Stacked dataset with _sub_exp, _event_time, and _D_sa columns.
+        unit_col : str
+            Unit column name.
+        population_col : str, optional
+            Population column name (for weighting="population").
+        Returns
+        -------
+        pd.DataFrame
+            stacked_df with _Q_weight column added.
+        """
+        if self.weighting == "aggregate":
+            return self._compute_q_weights_aggregate(stacked_df)
+        # --- Population and sample_share: unit-count-based formulas ---
+        # Count distinct units per sub-experiment
+        sub_exp_stats = (
+            stacked_df.groupby(["_sub_exp", "_D_sa"])[unit_col].nunique().unstack(fill_value=0)
+        )
+        # N_a^D and N_a^C per sub-experiment
+        N_D = sub_exp_stats.get(1, pd.Series(dtype=float)).to_dict()
+        N_C = sub_exp_stats.get(0, pd.Series(dtype=float)).to_dict()
+        # Totals
+        N_Omega_C = sum(N_C.values())
+        if self.weighting == "population":
+            # Pop_a^D: sum of population values for treated units per sub-exp
+            treated_pop = (
+                stacked_df[stacked_df["_D_sa"] == 1]
+                .drop_duplicates(subset=[unit_col, "_sub_exp"])
+                .groupby("_sub_exp")[population_col]
+                .sum()
+                .to_dict()
+            )
+            Pop_D_total = sum(treated_pop.values())
+            q_control: Dict[Any, float] = {}
+            for a in N_D:
+                n_c = N_C.get(a, 0)
+                if n_c == 0 or N_Omega_C == 0:
+                    q_control[a] = 1.0
+                    continue
+                control_share = n_c / N_Omega_C
+                pop_d = treated_pop.get(a, 0)
+                pop_share = pop_d / Pop_D_total if Pop_D_total > 0 else 0.0
+                q_control[a] = pop_share / control_share if control_share > 0 else 1.0
+        else:  # sample_share
+            N_Omega_D = sum(N_D.values())
+            N_total = {a: N_D.get(a, 0) + N_C.get(a, 0) for a in N_D}
+            N_grand = N_Omega_D + N_Omega_C
+            q_control = {}
+            for a in N_D:
+                n_c = N_C.get(a, 0)
+                if n_c == 0 or N_Omega_C == 0:
+                    q_control[a] = 1.0
+                    continue
+                control_share = n_c / N_Omega_C
+                n_total_a = N_total.get(a, 0)
+                sample_share = n_total_a / N_grand if N_grand > 0 else 0.0
+                q_control[a] = sample_share / control_share if control_share > 0 else 1.0
+        # Assign weights: treated=1, control=q_control[sub_exp]
+        sub_exp_vals = stacked_df["_sub_exp"].values
+        d_vals = stacked_df["_D_sa"].values
+        weights = np.ones(len(stacked_df))
+        for i in range(len(stacked_df)):
+            if d_vals[i] == 0:
+                weights[i] = q_control.get(sub_exp_vals[i], 1.0)
+        stacked_df["_Q_weight"] = weights
+        return stacked_df
+    def _compute_q_weights_aggregate(self, stacked_df: pd.DataFrame) -> pd.DataFrame:
+        """
+        Compute aggregate Q-weights using observation counts per (event_time, sub_exp).
+        Matches the R reference ``compute_weights()`` which computes shares at the
+        (event_time, sub_exp) level, not the sub_exp level. For balanced panels the
+        two approaches are equivalent. For unbalanced panels this adjusts for varying
+        observation density per event time.
+        R reference pattern::
+            stack_treat_n  = count(D==1) BY event_time
+            stack_control_n = count(D==0) BY event_time
+            sub_treat_n    = count(D==1) BY (sub_exp, event_time)
+            sub_control_n  = count(D==0) BY (sub_exp, event_time)
+            sub_treat_share = sub_treat_n / stack_treat_n
+            sub_control_share = sub_control_n / stack_control_n
+            Q = sub_treat_share / sub_control_share  (for controls)
+            Q = 1  (for treated)
+        """
+        # Step 1: Stack-level totals by (event_time, D_sa)
+        stack_counts = stacked_df.groupby(["_event_time", "_D_sa"]).size().unstack(fill_value=0)
+        stack_treat_n = stack_counts.get(1, pd.Series(0, index=stack_counts.index))
+        stack_control_n = stack_counts.get(0, pd.Series(0, index=stack_counts.index))
+        # Step 2: Sub-experiment-level counts by (event_time, sub_exp, D_sa)
+        sub_counts = (
+            stacked_df.groupby(["_event_time", "_sub_exp", "_D_sa"]).size().unstack(fill_value=0)
+        )
+        sub_treat_n = sub_counts.get(1, pd.Series(0, index=sub_counts.index))
+        sub_control_n = sub_counts.get(0, pd.Series(0, index=sub_counts.index))
+        # Step 3: Compute shares and Q per (event_time, sub_exp)
+        # Q = (sub_treat_n / stack_treat_n) / (sub_control_n / stack_control_n)
+        q_lookup: Dict[Tuple[Any, Any], float] = {}
+        for et, sub_exp in sub_counts.index:
+            s_treat = sub_treat_n.get((et, sub_exp), 0)
+            s_control = sub_control_n.get((et, sub_exp), 0)
+            st_treat = stack_treat_n.get(et, 0)
+            st_control = stack_control_n.get(et, 0)
+            if s_control == 0 or st_treat == 0 or st_control == 0:
+                q_lookup[(et, sub_exp)] = 1.0
+            else:
+                treat_share = s_treat / st_treat
+                control_share = s_control / st_control
+                q_lookup[(et, sub_exp)] = treat_share / control_share if control_share > 0 else 1.0
+        # Step 4: Assign weights via vectorized merge
+        et_vals = stacked_df["_event_time"].values
+        sub_exp_vals = stacked_df["_sub_exp"].values
+        d_vals = stacked_df["_D_sa"].values
+        weights = np.ones(len(stacked_df))
+        for i in range(len(stacked_df)):
+            if d_vals[i] == 0:
+                weights[i] = q_lookup.get((et_vals[i], sub_exp_vals[i]), 1.0)
+        stacked_df["_Q_weight"] = weights
+        return stacked_df
+    # =========================================================================
+    # sklearn-compatible interface
+    # =========================================================================
+    def get_params(self) -> Dict[str, Any]:
+        """Get estimator parameters (sklearn-compatible)."""
+        return {
+            "kappa_pre": self.kappa_pre,
+            "kappa_post": self.kappa_post,
+            "weighting": self.weighting,
+            "clean_control": self.clean_control,
+            "cluster": self.cluster,
+            "alpha": self.alpha,
+            "anticipation": self.anticipation,
+            "rank_deficient_action": self.rank_deficient_action,
+        }
+    def set_params(self, **params: Any) -> "StackedDiD":
+        """Set estimator parameters (sklearn-compatible)."""
+        for key, value in params.items():
+            if hasattr(self, key):
+                setattr(self, key, value)
+            else:
+                raise ValueError(f"Unknown parameter: {key}")
+        return self
+    def summary(self) -> str:
+        """Get summary of estimation results."""
+        if not self.is_fitted_:
+            raise RuntimeError("Model must be fitted before calling summary()")
+        assert self.results_ is not None
+        return self.results_.summary()
+    def print_summary(self) -> None:
+        """Print summary to stdout."""
+        print(self.summary())
+# =============================================================================
+# Convenience function
+# =============================================================================
+def stacked_did(
+    data: pd.DataFrame,
+    outcome: str,
+    unit: str,
+    time: str,
+    first_treat: str,
+    kappa_pre: int = 1,
+    kappa_post: int = 1,
+    aggregate: Optional[str] = None,
+    population: Optional[str] = None,
+    survey_design=None,
+    **kwargs: Any,
+) -> StackedDiDResults:
+    """
+    Convenience function for stacked DiD estimation.
+    This is a shortcut for creating a StackedDiD estimator and calling fit().
+    Parameters
+    ----------
+    data : pd.DataFrame
+        Panel data.
+    outcome : str
+        Outcome variable column name.
+    unit : str
+        Unit identifier column name.
+    time : str
+        Time period column name.
+    first_treat : str
+        Column indicating first treatment period (0 or inf for never-treated).
+    kappa_pre : int, default=1
+        Pre-treatment event-time periods.
+    kappa_post : int, default=1
+        Post-treatment event-time periods.
+    aggregate : str, optional
+        Aggregation mode: None, "simple", or "event_study".
+    population : str, optional
+        Population column for weighting="population".
+    survey_design : SurveyDesign, optional
+        Survey design specification for design-based inference.
+    **kwargs
+        Additional keyword arguments passed to StackedDiD constructor.
+    Returns
+    -------
+    StackedDiDResults
+        Estimation results.
+    Examples
+    --------
+    >>> from diff_diff import stacked_did, generate_staggered_data
+    >>> data = generate_staggered_data(seed=42)
+    >>> results = stacked_did(data, 'outcome', 'unit', 'period',
+    ...                       'first_treat', kappa_pre=2, kappa_post=2,
+    ...                       aggregate='event_study')
+    >>> results.print_summary()
+    """
+    est = StackedDiD(kappa_pre=kappa_pre, kappa_post=kappa_post, **kwargs)
+    return est.fit(
+        data,
+        outcome=outcome,
+        unit=unit,
+        time=time,
+        first_treat=first_treat,
+        aggregate=aggregate,
+        population=population,
+        survey_design=survey_design,
+    )