PyPI - diff-diff - Versions diffs - 2.3.2__cp313-cp313-win_amd64.whl - Mend

diff-diff 2.3.2__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

diff_diff/__init__.py +254 -0
diff_diff/_backend.py +112 -0
diff_diff/_rust_backend.cp313-win_amd64.pyd +0 -0
diff_diff/bacon.py +979 -0
diff_diff/datasets.py +708 -0
diff_diff/diagnostics.py +927 -0
diff_diff/estimators.py +1161 -0
diff_diff/honest_did.py +1511 -0
diff_diff/imputation.py +2480 -0
diff_diff/linalg.py +1537 -0
diff_diff/power.py +1350 -0
diff_diff/prep.py +1241 -0
diff_diff/prep_dgp.py +777 -0
diff_diff/pretrends.py +1104 -0
diff_diff/results.py +794 -0
diff_diff/staggered.py +1120 -0
diff_diff/staggered_aggregation.py +492 -0
diff_diff/staggered_bootstrap.py +753 -0
diff_diff/staggered_results.py +296 -0
diff_diff/sun_abraham.py +1227 -0
diff_diff/synthetic_did.py +858 -0
diff_diff/triple_diff.py +1322 -0
diff_diff/trop.py +2904 -0
diff_diff/twfe.py +428 -0
diff_diff/utils.py +1845 -0
diff_diff/visualization.py +1676 -0
diff_diff-2.3.2.dist-info/METADATA +2646 -0
diff_diff-2.3.2.dist-info/RECORD +30 -0
diff_diff-2.3.2.dist-info/WHEEL +4 -0
diff_diff-2.3.2.dist-info/sboms/diff_diff_rust.cyclonedx.json +5952 -0

diff_diff/synthetic_did.py ADDED Viewed

@@ -0,0 +1,858 @@
+"""
+Synthetic Difference-in-Differences estimator.
+"""
+import warnings
+from typing import Any, Dict, List, Optional, Tuple
+import numpy as np
+import pandas as pd
+from numpy.linalg import LinAlgError
+from diff_diff.estimators import DifferenceInDifferences
+from diff_diff.linalg import solve_ols
+from diff_diff.results import SyntheticDiDResults
+from diff_diff.utils import (
+    _compute_regularization,
+    _sum_normalize,
+    compute_confidence_interval,
+    compute_p_value,
+    compute_sdid_estimator,
+    compute_sdid_unit_weights,
+    compute_time_weights,
+    validate_binary,
+)
+class SyntheticDiD(DifferenceInDifferences):
+    """
+    Synthetic Difference-in-Differences (SDID) estimator.
+    Combines the strengths of Difference-in-Differences and Synthetic Control
+    methods by re-weighting control units to better match treated units'
+    pre-treatment trends.
+    This method is particularly useful when:
+    - You have few treated units (possibly just one)
+    - Parallel trends assumption may be questionable
+    - Control units are heterogeneous and need reweighting
+    - You want robustness to pre-treatment differences
+    Parameters
+    ----------
+    zeta_omega : float, optional
+        Regularization for unit weights. If None (default), auto-computed
+        from data as ``(N1 * T1)^(1/4) * noise_level`` matching R's synthdid.
+    zeta_lambda : float, optional
+        Regularization for time weights. If None (default), auto-computed
+        from data as ``1e-6 * noise_level`` matching R's synthdid.
+    alpha : float, default=0.05
+        Significance level for confidence intervals.
+    variance_method : str, default="placebo"
+        Method for variance estimation:
+        - "placebo": Placebo-based variance matching R's synthdid::vcov(method="placebo").
+          Implements Algorithm 4 from Arkhangelsky et al. (2021). This is R's default.
+        - "bootstrap": Bootstrap at unit level with fixed weights matching R's
+          synthdid::vcov(method="bootstrap").
+    n_bootstrap : int, default=200
+        Number of replications for variance estimation. Used for both:
+        - Bootstrap: Number of bootstrap samples
+        - Placebo: Number of random permutations (matches R's `replications` argument)
+    seed : int, optional
+        Random seed for reproducibility. If None (default), results
+        will vary between runs.
+    Attributes
+    ----------
+    results_ : SyntheticDiDResults
+        Estimation results after calling fit().
+    is_fitted_ : bool
+        Whether the model has been fitted.
+    Examples
+    --------
+    Basic usage with panel data:
+    >>> import pandas as pd
+    >>> from diff_diff import SyntheticDiD
+    >>>
+    >>> # Panel data with units observed over multiple time periods
+    >>> # Treatment occurs at period 5 for treated units
+    >>> data = pd.DataFrame({
+    ...     'unit': [...],      # Unit identifier
+    ...     'period': [...],    # Time period
+    ...     'outcome': [...],   # Outcome variable
+    ...     'treated': [...]    # 1 if unit is ever treated, 0 otherwise
+    ... })
+    >>>
+    >>> # Fit SDID model
+    >>> sdid = SyntheticDiD()
+    >>> results = sdid.fit(
+    ...     data,
+    ...     outcome='outcome',
+    ...     treatment='treated',
+    ...     unit='unit',
+    ...     time='period',
+    ...     post_periods=[5, 6, 7, 8]
+    ... )
+    >>>
+    >>> # View results
+    >>> results.print_summary()
+    >>> print(f"ATT: {results.att:.3f} (SE: {results.se:.3f})")
+    >>>
+    >>> # Examine unit weights
+    >>> weights_df = results.get_unit_weights_df()
+    >>> print(weights_df.head(10))
+    Notes
+    -----
+    The SDID estimator (Arkhangelsky et al., 2021) computes:
+        τ̂ = (Ȳ_treated,post - Σ_t λ_t * Y_treated,t)
+            - Σ_j ω_j * (Ȳ_j,post - Σ_t λ_t * Y_j,t)
+    Where:
+    - ω_j are unit weights (sum to 1, non-negative)
+    - λ_t are time weights (sum to 1, non-negative)
+    Unit weights ω are chosen to match pre-treatment outcomes:
+        min ||Σ_j ω_j * Y_j,pre - Y_treated,pre||²
+    This interpolates between:
+    - Standard DiD (uniform weights): ω_j = 1/N_control
+    - Synthetic Control (exact matching): concentrated weights
+    References
+    ----------
+    Arkhangelsky, D., Athey, S., Hirshberg, D. A., Imbens, G. W., & Wager, S.
+    (2021). Synthetic Difference-in-Differences. American Economic Review,
+    111(12), 4088-4118.
+    """
+    def __init__(
+        self,
+        zeta_omega: Optional[float] = None,
+        zeta_lambda: Optional[float] = None,
+        alpha: float = 0.05,
+        variance_method: str = "placebo",
+        n_bootstrap: int = 200,
+        seed: Optional[int] = None,
+        # Deprecated — accepted for backward compat, ignored with warning
+        lambda_reg: Optional[float] = None,
+        zeta: Optional[float] = None,
+    ):
+        if lambda_reg is not None:
+            warnings.warn(
+                "lambda_reg is deprecated and ignored. Regularization is now "
+                "auto-computed from data. Use zeta_omega to override unit weight "
+                "regularization.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+        if zeta is not None:
+            warnings.warn(
+                "zeta is deprecated and ignored. Use zeta_lambda to override "
+                "time weight regularization.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+        super().__init__(robust=True, cluster=None, alpha=alpha)
+        self.zeta_omega = zeta_omega
+        self.zeta_lambda = zeta_lambda
+        self.variance_method = variance_method
+        self.n_bootstrap = n_bootstrap
+        self.seed = seed
+        # Validate n_bootstrap
+        if n_bootstrap < 2:
+            raise ValueError(
+                f"n_bootstrap must be >= 2 (got {n_bootstrap}). At least 2 "
+                f"iterations are needed to estimate standard errors."
+            )
+        # Validate variance_method
+        valid_methods = ("bootstrap", "placebo")
+        if variance_method not in valid_methods:
+            raise ValueError(
+                f"variance_method must be one of {valid_methods}, "
+                f"got '{variance_method}'"
+            )
+        self._unit_weights = None
+        self._time_weights = None
+    def fit(  # type: ignore[override]
+        self,
+        data: pd.DataFrame,
+        outcome: str,
+        treatment: str,
+        unit: str,
+        time: str,
+        post_periods: Optional[List[Any]] = None,
+        covariates: Optional[List[str]] = None
+    ) -> SyntheticDiDResults:
+        """
+        Fit the Synthetic Difference-in-Differences model.
+        Parameters
+        ----------
+        data : pd.DataFrame
+            Panel data with observations for multiple units over multiple
+            time periods.
+        outcome : str
+            Name of the outcome variable column.
+        treatment : str
+            Name of the treatment group indicator column (0/1).
+            Should be 1 for all observations of treated units
+            (both pre and post treatment).
+        unit : str
+            Name of the unit identifier column.
+        time : str
+            Name of the time period column.
+        post_periods : list, optional
+            List of time period values that are post-treatment.
+            If None, uses the last half of periods.
+        covariates : list, optional
+            List of covariate column names. Covariates are residualized
+            out before computing the SDID estimator.
+        Returns
+        -------
+        SyntheticDiDResults
+            Object containing the ATT estimate, standard error,
+            unit weights, and time weights.
+        Raises
+        ------
+        ValueError
+            If required parameters are missing or data validation fails.
+        """
+        # Validate inputs
+        if outcome is None or treatment is None or unit is None or time is None:
+            raise ValueError(
+                "Must provide 'outcome', 'treatment', 'unit', and 'time'"
+            )
+        # Check columns exist
+        required_cols = [outcome, treatment, unit, time]
+        if covariates:
+            required_cols.extend(covariates)
+        missing = [c for c in required_cols if c not in data.columns]
+        if missing:
+            raise ValueError(f"Missing columns: {missing}")
+        # Validate treatment is binary
+        validate_binary(data[treatment].values, "treatment")
+        # Get all unique time periods
+        all_periods = sorted(data[time].unique())
+        if len(all_periods) < 2:
+            raise ValueError("Need at least 2 time periods")
+        # Determine pre and post periods
+        if post_periods is None:
+            mid = len(all_periods) // 2
+            post_periods = list(all_periods[mid:])
+            pre_periods = list(all_periods[:mid])
+        else:
+            post_periods = list(post_periods)
+            pre_periods = [p for p in all_periods if p not in post_periods]
+        if len(post_periods) == 0:
+            raise ValueError("Must have at least one post-treatment period")
+        if len(pre_periods) == 0:
+            raise ValueError("Must have at least one pre-treatment period")
+        # Validate post_periods are in data
+        for p in post_periods:
+            if p not in all_periods:
+                raise ValueError(f"Post-period '{p}' not found in time column")
+        # Identify treated and control units
+        # Treatment indicator should be constant within unit
+        unit_treatment = data.groupby(unit)[treatment].first()
+        # Validate treatment is constant within unit (SDID requires block treatment)
+        treatment_nunique = data.groupby(unit)[treatment].nunique()
+        varying_units = treatment_nunique[treatment_nunique > 1]
+        if len(varying_units) > 0:
+            example_unit = varying_units.index[0]
+            example_vals = sorted(
+                data.loc[data[unit] == example_unit, treatment].unique()
+            )
+            raise ValueError(
+                f"Treatment indicator varies within {len(varying_units)} unit(s) "
+                f"(e.g., unit '{example_unit}' has values {example_vals}). "
+                f"SyntheticDiD requires 'block' treatment where treatment is "
+                f"constant within each unit across all time periods. "
+                f"For staggered adoption designs, use CallawaySantAnna or "
+                f"ImputationDiD instead."
+            )
+        treated_units = unit_treatment[unit_treatment == 1].index.tolist()
+        control_units = unit_treatment[unit_treatment == 0].index.tolist()
+        if len(treated_units) == 0:
+            raise ValueError("No treated units found")
+        if len(control_units) == 0:
+            raise ValueError("No control units found")
+        # Validate balanced panel (SDID requires all units observed in all periods)
+        periods_per_unit = data.groupby(unit)[time].nunique()
+        expected_n_periods = len(all_periods)
+        unbalanced_units = periods_per_unit[periods_per_unit != expected_n_periods]
+        if len(unbalanced_units) > 0:
+            example_unit = unbalanced_units.index[0]
+            actual_count = unbalanced_units.iloc[0]
+            raise ValueError(
+                f"Panel is not balanced: {len(unbalanced_units)} unit(s) do not "
+                f"have observations in all {expected_n_periods} periods "
+                f"(e.g., unit '{example_unit}' has {actual_count} periods). "
+                f"SyntheticDiD requires a balanced panel. Use "
+                f"diff_diff.prep.balance_panel() to balance the panel first."
+            )
+        # Residualize covariates if provided
+        working_data = data.copy()
+        if covariates:
+            working_data = self._residualize_covariates(
+                working_data, outcome, covariates, unit, time
+            )
+        # Create outcome matrices
+        # Shape: (n_periods, n_units)
+        Y_pre_control, Y_post_control, Y_pre_treated, Y_post_treated = \
+            self._create_outcome_matrices(
+                working_data, outcome, unit, time,
+                pre_periods, post_periods, treated_units, control_units
+            )
+        # Compute auto-regularization (or use user overrides)
+        auto_zeta_omega, auto_zeta_lambda = _compute_regularization(
+            Y_pre_control, len(treated_units), len(post_periods)
+        )
+        zeta_omega = self.zeta_omega if self.zeta_omega is not None else auto_zeta_omega
+        zeta_lambda = self.zeta_lambda if self.zeta_lambda is not None else auto_zeta_lambda
+        # Store noise level for diagnostics
+        from diff_diff.utils import _compute_noise_level
+        noise_level = _compute_noise_level(Y_pre_control)
+        # Data-dependent convergence threshold (matches R's 1e-5 * noise.level).
+        # Floor of 1e-5 when noise_level == 0: R would use 0.0, causing FW to
+        # run all max_iter iterations.  The result is equivalent (zero-noise
+        # data has no variation to optimize), but the floor enables early stop.
+        min_decrease = 1e-5 * noise_level if noise_level > 0 else 1e-5
+        # Compute unit weights (Frank-Wolfe with sparsification)
+        Y_pre_treated_mean = np.mean(Y_pre_treated, axis=1)
+        unit_weights = compute_sdid_unit_weights(
+            Y_pre_control,
+            Y_pre_treated_mean,
+            zeta_omega=zeta_omega,
+            min_decrease=min_decrease,
+        )
+        # Compute time weights (Frank-Wolfe on collapsed form)
+        time_weights = compute_time_weights(
+            Y_pre_control,
+            Y_post_control,
+            zeta_lambda=zeta_lambda,
+            min_decrease=min_decrease,
+        )
+        # Compute SDID estimate
+        Y_post_treated_mean = np.mean(Y_post_treated, axis=1)
+        att = compute_sdid_estimator(
+            Y_pre_control,
+            Y_post_control,
+            Y_pre_treated_mean,
+            Y_post_treated_mean,
+            unit_weights,
+            time_weights
+        )
+        # Compute pre-treatment fit (RMSE)
+        synthetic_pre = Y_pre_control @ unit_weights
+        pre_fit_rmse = np.sqrt(np.mean((Y_pre_treated_mean - synthetic_pre) ** 2))
+        # Warn if pre-treatment fit is poor (Registry requirement).
+        # Threshold: 1× SD of treated pre-treatment outcomes — a natural baseline
+        # since RMSE exceeding natural variation indicates the synthetic control
+        # fails to reproduce the treated series' level or trend.
+        pre_treatment_sd = np.std(Y_pre_treated_mean, ddof=1) if len(Y_pre_treated_mean) > 1 else 0.0
+        if pre_treatment_sd > 0 and pre_fit_rmse > pre_treatment_sd:
+            warnings.warn(
+                f"Pre-treatment fit is poor: RMSE ({pre_fit_rmse:.4f}) exceeds "
+                f"the standard deviation of treated pre-treatment outcomes "
+                f"({pre_treatment_sd:.4f}). The synthetic control may not "
+                f"adequately reproduce treated unit trends. Consider adding "
+                f"more control units or adjusting regularization.",
+                UserWarning,
+                stacklevel=2,
+            )
+        # Compute standard errors based on variance_method
+        if self.variance_method == "bootstrap":
+            se, bootstrap_estimates = self._bootstrap_se(
+                Y_pre_control, Y_post_control,
+                Y_pre_treated, Y_post_treated,
+                unit_weights, time_weights,
+            )
+            placebo_effects = bootstrap_estimates
+            inference_method = "bootstrap"
+        else:
+            # Use placebo-based variance (R's synthdid Algorithm 4)
+            se, placebo_effects = self._placebo_variance_se(
+                Y_pre_control,
+                Y_post_control,
+                Y_pre_treated_mean,
+                Y_post_treated_mean,
+                n_treated=len(treated_units),
+                zeta_omega=zeta_omega,
+                zeta_lambda=zeta_lambda,
+                min_decrease=min_decrease,
+                replications=self.n_bootstrap  # Reuse n_bootstrap for replications
+            )
+            inference_method = "placebo"
+        # Compute test statistics
+        if np.isfinite(se) and se > 0:
+            t_stat = att / se
+            # Use placebo distribution for p-value if available
+            if len(placebo_effects) > 0:
+                # Two-sided p-value from placebo distribution
+                p_value = np.mean(np.abs(placebo_effects) >= np.abs(att))
+                p_value = max(p_value, 1.0 / (len(placebo_effects) + 1))
+            else:
+                p_value = compute_p_value(t_stat)
+        else:
+            t_stat = np.nan
+            p_value = np.nan
+        # Confidence interval
+        if np.isfinite(se) and se > 0:
+            conf_int = compute_confidence_interval(att, se, self.alpha)
+        else:
+            conf_int = (np.nan, np.nan)
+        # Create weight dictionaries
+        unit_weights_dict = {
+            unit_id: w for unit_id, w in zip(control_units, unit_weights)
+        }
+        time_weights_dict = {
+            period: w for period, w in zip(pre_periods, time_weights)
+        }
+        # Store results
+        self.results_ = SyntheticDiDResults(
+            att=att,
+            se=se,
+            t_stat=t_stat,
+            p_value=p_value,
+            conf_int=conf_int,
+            n_obs=len(data),
+            n_treated=len(treated_units),
+            n_control=len(control_units),
+            unit_weights=unit_weights_dict,
+            time_weights=time_weights_dict,
+            pre_periods=pre_periods,
+            post_periods=post_periods,
+            alpha=self.alpha,
+            variance_method=inference_method,
+            noise_level=noise_level,
+            zeta_omega=zeta_omega,
+            zeta_lambda=zeta_lambda,
+            pre_treatment_fit=pre_fit_rmse,
+            placebo_effects=placebo_effects if len(placebo_effects) > 0 else None,
+            n_bootstrap=self.n_bootstrap if inference_method == "bootstrap" else None
+        )
+        self._unit_weights = unit_weights
+        self._time_weights = time_weights
+        self.is_fitted_ = True
+        return self.results_
+    def _create_outcome_matrices(
+        self,
+        data: pd.DataFrame,
+        outcome: str,
+        unit: str,
+        time: str,
+        pre_periods: List[Any],
+        post_periods: List[Any],
+        treated_units: List[Any],
+        control_units: List[Any]
+    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+        """
+        Create outcome matrices for SDID estimation.
+        Returns
+        -------
+        tuple
+            (Y_pre_control, Y_post_control, Y_pre_treated, Y_post_treated)
+            Each is a 2D array with shape (n_periods, n_units)
+        """
+        # Pivot data to wide format
+        pivot = data.pivot(index=time, columns=unit, values=outcome)
+        # Extract submatrices
+        Y_pre_control = pivot.loc[pre_periods, control_units].values
+        Y_post_control = pivot.loc[post_periods, control_units].values
+        Y_pre_treated = pivot.loc[pre_periods, treated_units].values
+        Y_post_treated = pivot.loc[post_periods, treated_units].values
+        return (
+            Y_pre_control.astype(float),
+            Y_post_control.astype(float),
+            Y_pre_treated.astype(float),
+            Y_post_treated.astype(float)
+        )
+    def _residualize_covariates(
+        self,
+        data: pd.DataFrame,
+        outcome: str,
+        covariates: List[str],
+        unit: str,
+        time: str
+    ) -> pd.DataFrame:
+        """
+        Residualize outcome by regressing out covariates.
+        Uses two-way fixed effects to partial out covariates.
+        """
+        data = data.copy()
+        # Create design matrix with covariates
+        X = data[covariates].values.astype(float)
+        # Add unit and time dummies
+        unit_dummies = pd.get_dummies(data[unit], prefix='u', drop_first=True)
+        time_dummies = pd.get_dummies(data[time], prefix='t', drop_first=True)
+        X_full = np.column_stack([
+            np.ones(len(data)),
+            X,
+            unit_dummies.values,
+            time_dummies.values
+        ])
+        y = data[outcome].values.astype(float)
+        # Fit and get residuals using unified backend
+        coeffs, residuals, _ = solve_ols(X_full, y, return_vcov=False)
+        # Add back the mean for interpretability
+        data[outcome] = residuals + np.mean(y)
+        return data
+    def _bootstrap_se(
+        self,
+        Y_pre_control: np.ndarray,
+        Y_post_control: np.ndarray,
+        Y_pre_treated: np.ndarray,
+        Y_post_treated: np.ndarray,
+        unit_weights: np.ndarray,
+        time_weights: np.ndarray,
+    ) -> Tuple[float, np.ndarray]:
+        """Compute bootstrap standard error matching R's synthdid bootstrap_sample.
+        Resamples all units (control + treated) with replacement, renormalizes
+        original unit weights for the resampled controls, and computes the
+        SDID estimator with **fixed** weights (no re-estimation).
+        This matches R's ``synthdid::vcov(method="bootstrap")``.
+        """
+        rng = np.random.default_rng(self.seed)
+        n_control = Y_pre_control.shape[1]
+        n_treated = Y_pre_treated.shape[1]
+        n_total = n_control + n_treated
+        # Build full panel matrix: (n_pre+n_post, n_control+n_treated)
+        Y_full = np.block([
+            [Y_pre_control, Y_pre_treated],
+            [Y_post_control, Y_post_treated]
+        ])
+        n_pre = Y_pre_control.shape[0]
+        bootstrap_estimates = []
+        for _ in range(self.n_bootstrap):
+            # Resample ALL units with replacement
+            boot_idx = rng.choice(n_total, size=n_total, replace=True)
+            # Identify which resampled units are control vs treated
+            boot_is_control = boot_idx < n_control
+            boot_control_idx = boot_idx[boot_is_control]
+            boot_treated_idx = boot_idx[~boot_is_control]
+            # Skip if no control or no treated units in bootstrap sample
+            if len(boot_control_idx) == 0 or len(boot_treated_idx) == 0:
+                continue
+            try:
+                # Renormalize original unit weights for the resampled controls
+                boot_omega = _sum_normalize(unit_weights[boot_control_idx])
+                # Extract resampled outcome matrices
+                Y_boot = Y_full[:, boot_idx]
+                Y_boot_pre_c = Y_boot[:n_pre, boot_is_control]
+                Y_boot_post_c = Y_boot[n_pre:, boot_is_control]
+                Y_boot_pre_t = Y_boot[:n_pre, ~boot_is_control]
+                Y_boot_post_t = Y_boot[n_pre:, ~boot_is_control]
+                # Compute ATT with FIXED weights (do NOT re-estimate)
+                Y_boot_pre_t_mean = np.mean(Y_boot_pre_t, axis=1)
+                Y_boot_post_t_mean = np.mean(Y_boot_post_t, axis=1)
+                tau = compute_sdid_estimator(
+                    Y_boot_pre_c, Y_boot_post_c,
+                    Y_boot_pre_t_mean, Y_boot_post_t_mean,
+                    boot_omega, time_weights  # time_weights = original lambda
+                )
+                if np.isfinite(tau):
+                    bootstrap_estimates.append(tau)
+            except (ValueError, LinAlgError):
+                continue
+        bootstrap_estimates = np.array(bootstrap_estimates)
+        # Check bootstrap success rate and handle failures
+        n_successful = len(bootstrap_estimates)
+        failure_rate = 1 - (n_successful / self.n_bootstrap)
+        if n_successful == 0:
+            raise ValueError(
+                f"All {self.n_bootstrap} bootstrap iterations failed. "
+                f"This typically occurs when:\n"
+                f"  - Sample size is too small for reliable resampling\n"
+                f"  - Weight matrices are singular or near-singular\n"
+                f"  - Insufficient pre-treatment periods for weight estimation\n"
+                f"  - Too few control units relative to treated units\n"
+                f"Consider using variance_method='placebo' or increasing "
+                f"the regularization parameters (zeta_omega, zeta_lambda)."
+            )
+        elif n_successful == 1:
+            warnings.warn(
+                f"Only 1/{self.n_bootstrap} bootstrap iteration succeeded. "
+                f"Standard error cannot be computed reliably (requires at least 2). "
+                f"Returning SE=0.0. Consider using variance_method='placebo' or "
+                f"increasing the regularization (zeta_omega, zeta_lambda).",
+                UserWarning,
+                stacklevel=2,
+            )
+            se = 0.0
+        elif failure_rate > 0.05:
+            warnings.warn(
+                f"Only {n_successful}/{self.n_bootstrap} bootstrap iterations succeeded "
+                f"({failure_rate:.1%} failure rate). Standard errors may be unreliable. "
+                f"This can occur with small samples or insufficient pre-treatment periods.",
+                UserWarning,
+                stacklevel=2,
+            )
+            se = float(np.std(bootstrap_estimates, ddof=1))
+        else:
+            se = float(np.std(bootstrap_estimates, ddof=1))
+        return se, bootstrap_estimates
+    def _placebo_variance_se(
+        self,
+        Y_pre_control: np.ndarray,
+        Y_post_control: np.ndarray,
+        Y_pre_treated_mean: np.ndarray,
+        Y_post_treated_mean: np.ndarray,
+        n_treated: int,
+        zeta_omega: float = 0.0,
+        zeta_lambda: float = 0.0,
+        min_decrease: float = 1e-5,
+        replications: int = 200
+    ) -> Tuple[float, np.ndarray]:
+        """
+        Compute placebo-based variance matching R's synthdid methodology.
+        This implements Algorithm 4 from Arkhangelsky et al. (2021),
+        matching R's synthdid::vcov(method = "placebo"):
+        1. Randomly sample N₀ control indices (permutation)
+        2. Designate last N₁ as pseudo-treated, first (N₀-N₁) as pseudo-controls
+        3. Re-estimate both omega and lambda on the permuted data (from
+           uniform initialization, fresh start), matching R's behavior where
+           ``update.omega=TRUE, update.lambda=TRUE`` are passed via ``opts``
+        4. Compute SDID estimate with re-estimated weights
+        5. Repeat `replications` times
+        6. SE = sqrt((r-1)/r) * sd(estimates)
+        Parameters
+        ----------
+        Y_pre_control : np.ndarray
+            Control outcomes in pre-treatment periods, shape (n_pre, n_control).
+        Y_post_control : np.ndarray
+            Control outcomes in post-treatment periods, shape (n_post, n_control).
+        Y_pre_treated_mean : np.ndarray
+            Mean treated outcomes in pre-treatment periods, shape (n_pre,).
+        Y_post_treated_mean : np.ndarray
+            Mean treated outcomes in post-treatment periods, shape (n_post,).
+        n_treated : int
+            Number of treated units in the original estimation.
+        zeta_omega : float
+            Regularization parameter for unit weights (for re-estimation).
+        zeta_lambda : float
+            Regularization parameter for time weights (for re-estimation).
+        min_decrease : float
+            Convergence threshold for Frank-Wolfe (for re-estimation).
+        replications : int, default=200
+            Number of placebo replications.
+        Returns
+        -------
+        tuple
+            (se, placebo_effects) where se is the standard error and
+            placebo_effects is the array of placebo treatment effects.
+        References
+        ----------
+        Arkhangelsky, D., Athey, S., Hirshberg, D. A., Imbens, G. W., & Wager, S.
+        (2021). Synthetic Difference-in-Differences. American Economic Review,
+        111(12), 4088-4118. Algorithm 4.
+        """
+        rng = np.random.default_rng(self.seed)
+        n_pre, n_control = Y_pre_control.shape
+        # Ensure we have enough controls for the split
+        n_pseudo_control = n_control - n_treated
+        if n_pseudo_control < 1:
+            warnings.warn(
+                f"Not enough control units ({n_control}) for placebo variance "
+                f"estimation with {n_treated} treated units. "
+                f"Consider using variance_method='bootstrap'.",
+                UserWarning,
+                stacklevel=3,
+            )
+            return 0.0, np.array([])
+        placebo_estimates = []
+        for _ in range(replications):
+            try:
+                # Random permutation of control indices (Algorithm 4, step 1)
+                perm = rng.permutation(n_control)
+                # Split into pseudo-controls and pseudo-treated (step 2)
+                pseudo_control_idx = perm[:n_pseudo_control]
+                pseudo_treated_idx = perm[n_pseudo_control:]
+                # Get pseudo-control and pseudo-treated outcomes
+                Y_pre_pseudo_control = Y_pre_control[:, pseudo_control_idx]
+                Y_post_pseudo_control = Y_post_control[:, pseudo_control_idx]
+                Y_pre_pseudo_treated_mean = np.mean(
+                    Y_pre_control[:, pseudo_treated_idx], axis=1
+                )
+                Y_post_pseudo_treated_mean = np.mean(
+                    Y_post_control[:, pseudo_treated_idx], axis=1
+                )
+                # Re-estimate weights on permuted data (matching R's behavior)
+                # R passes update.omega=TRUE, update.lambda=TRUE via opts,
+                # re-estimating weights from uniform initialization (fresh start).
+                # Unit weights: re-estimate on pseudo-control/pseudo-treated data
+                pseudo_omega = compute_sdid_unit_weights(
+                    Y_pre_pseudo_control,
+                    Y_pre_pseudo_treated_mean,
+                    zeta_omega=zeta_omega,
+                    min_decrease=min_decrease,
+                )
+                # Time weights: re-estimate on pseudo-control data
+                pseudo_lambda = compute_time_weights(
+                    Y_pre_pseudo_control,
+                    Y_post_pseudo_control,
+                    zeta_lambda=zeta_lambda,
+                    min_decrease=min_decrease,
+                )
+                # Compute placebo SDID estimate (step 4)
+                tau = compute_sdid_estimator(
+                    Y_pre_pseudo_control,
+                    Y_post_pseudo_control,
+                    Y_pre_pseudo_treated_mean,
+                    Y_post_pseudo_treated_mean,
+                    pseudo_omega,
+                    pseudo_lambda
+                )
+                if np.isfinite(tau):
+                    placebo_estimates.append(tau)
+            except (ValueError, LinAlgError, ZeroDivisionError):
+                # Skip failed iterations
+                continue
+        placebo_estimates = np.array(placebo_estimates)
+        n_successful = len(placebo_estimates)
+        if n_successful < 2:
+            warnings.warn(
+                f"Only {n_successful} placebo replications completed successfully. "
+                f"Standard error cannot be estimated reliably. "
+                f"Consider using variance_method='bootstrap' or increasing "
+                f"the number of control units.",
+                UserWarning,
+                stacklevel=3,
+            )
+            return 0.0, placebo_estimates
+        # Warn if many replications failed
+        failure_rate = 1 - (n_successful / replications)
+        if failure_rate > 0.05:
+            warnings.warn(
+                f"Only {n_successful}/{replications} placebo replications succeeded "
+                f"({failure_rate:.1%} failure rate). Standard errors may be unreliable.",
+                UserWarning,
+                stacklevel=3,
+            )
+        # Compute SE using R's formula: sqrt((r-1)/r) * sd(estimates)
+        # This matches synthdid::vcov.R exactly
+        se = np.sqrt((n_successful - 1) / n_successful) * np.std(
+            placebo_estimates, ddof=1
+        )
+        return se, placebo_estimates
+    def get_params(self) -> Dict[str, Any]:
+        """Get estimator parameters."""
+        return {
+            "zeta_omega": self.zeta_omega,
+            "zeta_lambda": self.zeta_lambda,
+            "alpha": self.alpha,
+            "variance_method": self.variance_method,
+            "n_bootstrap": self.n_bootstrap,
+            "seed": self.seed,
+        }
+    def set_params(self, **params) -> "SyntheticDiD":
+        """Set estimator parameters."""
+        # Deprecated parameter names — emit warning and ignore
+        _deprecated = {"lambda_reg", "zeta"}
+        for key, value in params.items():
+            if key in _deprecated:
+                warnings.warn(
+                    f"{key} is deprecated and ignored. Use zeta_omega/zeta_lambda "
+                    f"instead.",
+                    DeprecationWarning,
+                    stacklevel=2,
+                )
+            elif hasattr(self, key):
+                setattr(self, key, value)
+            else:
+                raise ValueError(f"Unknown parameter: {key}")
+        return self