PyPI - diff-diff - Versions diffs - 2.4.1__tar.gz → 2.4.2__tar.gz - Mend

diff-diff 2.4.1tar.gz → 2.4.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

{diff_diff-2.4.1 → diff_diff-2.4.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: diff-diff
-Version: 2.4.1
+Version: 2.4.2
 Classifier: Development Status :: 5 - Production/Stable
 Classifier: Intended Audience :: Science/Research
 Classifier: Operating System :: OS Independent
@@ -2021,7 +2021,7 @@ TROP(
     max_iter=100,              # Max iterations for factor estimation
     tol=1e-6,                  # Convergence tolerance
     alpha=0.05,                # Significance level for CIs
-    n_bootstrap=200,           # Bootstrap replications
+    n_bootstrap=200,           # Bootstrap replications (minimum 2; TROP requires bootstrap for SEs)
     seed=None                  # Random seed
 )
 ```
@@ -2102,8 +2102,6 @@ SunAbraham(
 | `time` | str | Time period column |
 | `first_treat` | str | Column with first treatment period (0 for never-treated) |
 | `covariates` | list | Covariate column names |
-| `min_pre_periods` | int | Minimum pre-treatment periods to include |
-| `min_post_periods` | int | Minimum post-treatment periods to include |
 ### SunAbrahamResults
@@ -2143,6 +2141,7 @@ ImputationDiD(
     alpha=0.05,                       # Significance level for CIs
     cluster=None,                     # Column for cluster-robust SEs
     n_bootstrap=0,                    # Bootstrap iterations (0 = analytical)
+    bootstrap_weights='rademacher',   # 'rademacher', 'mammen', or 'webb'
     seed=None,                        # Random seed
     rank_deficient_action='warn',     # 'warn', 'error', or 'silent'
     horizon_max=None,                 # Max event-study horizon
@@ -2197,6 +2196,7 @@ TwoStageDiD(
     alpha=0.05,                       # Significance level for CIs
     cluster=None,                     # Column for cluster-robust SEs (defaults to unit)
     n_bootstrap=0,                    # Bootstrap iterations (0 = analytical GMM SEs)
+    bootstrap_weights='rademacher',   # 'rademacher', 'mammen', or 'webb'
     seed=None,                        # Random seed
     rank_deficient_action='warn',     # 'warn', 'error', or 'silent'
     horizon_max=None,                 # Max event-study horizon

{diff_diff-2.4.1 → diff_diff-2.4.2}/README.md RENAMED Viewed

@@ -1983,7 +1983,7 @@ TROP(
     max_iter=100,              # Max iterations for factor estimation
     tol=1e-6,                  # Convergence tolerance
     alpha=0.05,                # Significance level for CIs
-    n_bootstrap=200,           # Bootstrap replications
+    n_bootstrap=200,           # Bootstrap replications (minimum 2; TROP requires bootstrap for SEs)
     seed=None                  # Random seed
 )
 ```
@@ -2064,8 +2064,6 @@ SunAbraham(
 | `time` | str | Time period column |
 | `first_treat` | str | Column with first treatment period (0 for never-treated) |
 | `covariates` | list | Covariate column names |
-| `min_pre_periods` | int | Minimum pre-treatment periods to include |
-| `min_post_periods` | int | Minimum post-treatment periods to include |
 ### SunAbrahamResults
@@ -2105,6 +2103,7 @@ ImputationDiD(
     alpha=0.05,                       # Significance level for CIs
     cluster=None,                     # Column for cluster-robust SEs
     n_bootstrap=0,                    # Bootstrap iterations (0 = analytical)
+    bootstrap_weights='rademacher',   # 'rademacher', 'mammen', or 'webb'
     seed=None,                        # Random seed
     rank_deficient_action='warn',     # 'warn', 'error', or 'silent'
     horizon_max=None,                 # Max event-study horizon
@@ -2159,6 +2158,7 @@ TwoStageDiD(
     alpha=0.05,                       # Significance level for CIs
     cluster=None,                     # Column for cluster-robust SEs (defaults to unit)
     n_bootstrap=0,                    # Bootstrap iterations (0 = analytical GMM SEs)
+    bootstrap_weights='rademacher',   # 'rademacher', 'mammen', or 'webb'
     seed=None,                        # Random seed
     rank_deficient_action='warn',     # 'warn', 'error', or 'silent'
     horizon_max=None,                 # Max event-study horizon

{diff_diff-2.4.1 → diff_diff-2.4.2}/diff_diff/__init__.py RENAMED Viewed

@@ -148,7 +148,7 @@ from diff_diff.datasets import (
     load_mpdta,
 )
-__version__ = "2.4.1"
+__version__ = "2.4.2"
 __all__ = [
     # Estimators
     "DifferenceInDifferences",

{diff_diff-2.4.1 → diff_diff-2.4.2}/diff_diff/_backend.py RENAMED Viewed

@@ -35,6 +35,8 @@ try:
         compute_time_weights as _rust_compute_time_weights,
         compute_noise_level as _rust_compute_noise_level,
         sc_weight_fw as _rust_sc_weight_fw,
+        # Diagnostics
+        rust_backend_info as _rust_backend_info,
     )
     _rust_available = True
 except ImportError:
@@ -56,6 +58,7 @@ except ImportError:
     _rust_compute_time_weights = None
     _rust_compute_noise_level = None
     _rust_sc_weight_fw = None
+    _rust_backend_info = None
 # Determine final backend based on environment variable and availability
 if _backend_env == 'python':
@@ -78,6 +81,7 @@ if _backend_env == 'python':
     _rust_compute_time_weights = None
     _rust_compute_noise_level = None
     _rust_sc_weight_fw = None
+    _rust_backend_info = None
 elif _backend_env == 'rust':
     # Force Rust mode - fail if not available
     if not _rust_available:
@@ -90,8 +94,25 @@ else:
     # Auto mode - use Rust if available
     HAS_RUST_BACKEND = _rust_available
+def rust_backend_info():
+    """Return compile-time BLAS feature information for the Rust backend.
+    Returns a dict with keys:
+    - 'blas': True if any BLAS backend is linked
+    - 'accelerate': True if Apple Accelerate is linked (macOS)
+    - 'openblas': True if OpenBLAS is linked (Linux)
+    If the Rust backend is not available, all values are False.
+    """
+    if _rust_backend_info is not None:
+        return _rust_backend_info()
+    return {"blas": False, "accelerate": False, "openblas": False}
 __all__ = [
     'HAS_RUST_BACKEND',
+    'rust_backend_info',
     '_rust_bootstrap_weights',
     '_rust_synthetic_weights',
     '_rust_project_simplex',

{diff_diff-2.4.1 → diff_diff-2.4.2}/diff_diff/imputation.py RENAMED Viewed

@@ -22,7 +22,7 @@ import pandas as pd
 from scipy import sparse, stats
 from scipy.sparse.linalg import spsolve
-from diff_diff.imputation_bootstrap import ImputationDiDBootstrapMixin
+from diff_diff.imputation_bootstrap import ImputationDiDBootstrapMixin, _compute_target_weights
 from diff_diff.imputation_results import ImputationBootstrapResults, ImputationDiDResults  # noqa: F401 (re-export)
 from diff_diff.linalg import solve_ols
 from diff_diff.utils import safe_inference
@@ -63,6 +63,8 @@ class ImputationDiD(ImputationDiDBootstrapMixin):
     n_bootstrap : int, default=0
         Number of bootstrap iterations. If 0, uses analytical inference
         (conservative variance from Theorem 3).
+    bootstrap_weights : str, default="rademacher"
+        Type of bootstrap weights: "rademacher", "mammen", or "webb".
     seed : int, optional
         Random seed for reproducibility.
     rank_deficient_action : str, default="warn"
@@ -126,6 +128,7 @@ class ImputationDiD(ImputationDiDBootstrapMixin):
         alpha: float = 0.05,
         cluster: Optional[str] = None,
         n_bootstrap: int = 0,
+        bootstrap_weights: str = "rademacher",
         seed: Optional[int] = None,
         rank_deficient_action: str = "warn",
         horizon_max: Optional[int] = None,
@@ -136,6 +139,11 @@ class ImputationDiD(ImputationDiDBootstrapMixin):
                 f"rank_deficient_action must be 'warn', 'error', or 'silent', "
                 f"got '{rank_deficient_action}'"
             )
+        if bootstrap_weights not in ("rademacher", "mammen", "webb"):
+            raise ValueError(
+                f"bootstrap_weights must be 'rademacher', 'mammen', or 'webb', "
+                f"got '{bootstrap_weights}'"
+            )
         if aux_partition not in ("cohort_horizon", "cohort", "horizon"):
             raise ValueError(
                 f"aux_partition must be 'cohort_horizon', 'cohort', or 'horizon', "
@@ -146,6 +154,7 @@ class ImputationDiD(ImputationDiDBootstrapMixin):
         self.alpha = alpha
         self.cluster = cluster
         self.n_bootstrap = n_bootstrap
+        self.bootstrap_weights = bootstrap_weights
         self.seed = seed
         self.rank_deficient_action = rank_deficient_action
         self.horizon_max = horizon_max
@@ -1359,15 +1368,7 @@ class ImputationDiD(ImputationDiDBootstrapMixin):
             effect = float(np.mean(valid_tau))
             # Compute SE via conservative variance with horizon-specific weights
-            weights_h = np.zeros(int(omega_1_mask.sum()))
-            # Map h_mask (relative to df_1) to weights array
-            h_indices_in_omega1 = np.where(h_mask)[0]
-            n_valid = len(valid_tau)
-            # Only weight valid (finite) observations
-            finite_mask = np.isfinite(tau_hat[h_mask])
-            valid_h_indices = h_indices_in_omega1[finite_mask]
-            for idx in valid_h_indices:
-                weights_h[idx] = 1.0 / n_valid
+            weights_h, n_valid = _compute_target_weights(tau_hat, h_mask)
             se = self._compute_conservative_variance(
                 df=df,
@@ -1477,12 +1478,7 @@ class ImputationDiD(ImputationDiDBootstrapMixin):
             effect = float(np.mean(valid_tau))
             # Compute SE with group-specific weights
-            weights_g = np.zeros(int(omega_1_mask.sum()))
-            finite_mask = np.isfinite(tau_hat) & g_mask
-            g_indices = np.where(finite_mask)[0]
-            n_valid = len(valid_tau)
-            for idx in g_indices:
-                weights_g[idx] = 1.0 / n_valid
+            weights_g, _ = _compute_target_weights(tau_hat, g_mask)
             se = self._compute_conservative_variance(
                 df=df,
@@ -1664,6 +1660,7 @@ class ImputationDiD(ImputationDiDBootstrapMixin):
             "alpha": self.alpha,
             "cluster": self.cluster,
             "n_bootstrap": self.n_bootstrap,
+            "bootstrap_weights": self.bootstrap_weights,
             "seed": self.seed,
             "rank_deficient_action": self.rank_deficient_action,
             "horizon_max": self.horizon_max,

{diff_diff-2.4.1 → diff_diff-2.4.2}/diff_diff/imputation_bootstrap.py RENAMED Viewed

@@ -19,6 +19,39 @@ __all__ = [
 ]
+def _compute_target_weights(
+    tau_hat: np.ndarray,
+    target_mask: np.ndarray,
+) -> "tuple[np.ndarray, int]":
+    """
+    Equal weights for finite tau_hat observations within target_mask.
+    Used by both aggregation and bootstrap paths to avoid weight logic
+    duplication.
+    Parameters
+    ----------
+    tau_hat : np.ndarray
+        Per-observation treatment effects (may contain NaN).
+    target_mask : np.ndarray
+        Boolean mask selecting the target subset within tau_hat.
+    Returns
+    -------
+    weights : np.ndarray
+        Weight array (same length as tau_hat). 1/n_valid for finite
+        observations in target_mask, 0 elsewhere.
+    n_valid : int
+        Number of finite observations in the target subset.
+    """
+    finite_target = np.isfinite(tau_hat) & target_mask
+    n_valid = int(finite_target.sum())
+    weights = np.zeros(len(tau_hat))
+    if n_valid > 0:
+        weights[np.where(finite_target)[0]] = 1.0 / n_valid
+    return weights, n_valid
 class ImputationDiDBootstrapMixin:
     """Mixin providing bootstrap inference methods for ImputationDiD."""
@@ -91,7 +124,8 @@ class ImputationDiDBootstrapMixin:
         For each aggregation target (overall, per-horizon, per-group), computes
         psi_i = sum_t v_it * epsilon_tilde_it for each cluster. The multiplier
-        bootstrap then perturbs these psi sums with Rademacher weights.
+        bootstrap then perturbs these psi sums with multiplier weights
+        (rademacher/mammen/webb; configurable via ``bootstrap_weights``).
         Computational cost scales with the number of aggregation targets, since
         each target requires its own v_untreated computation (weight-dependent).
@@ -120,13 +154,10 @@ class ImputationDiDBootstrapMixin:
         result["overall"] = (overall_psi, cluster_ids)
         # Event study: per-horizon weights
-        # NOTE: weight logic duplicated from _aggregate_event_study.
-        # If weight scheme changes there, update here too.
         if event_study_effects:
             result["event_study"] = {}
             df_1 = df.loc[omega_1_mask]
             rel_times = df_1["_rel_time"].values
-            n_omega_1 = int(omega_1_mask.sum())
             # Balanced cohort mask (same logic as _aggregate_event_study)
             balanced_mask = None
@@ -150,24 +181,18 @@ class ImputationDiDBootstrapMixin:
                 h_mask = rel_times == h
                 if balanced_mask is not None:
                     h_mask = h_mask & balanced_mask
-                weights_h = np.zeros(n_omega_1)
-                finite_h = np.isfinite(tau_hat) & h_mask
-                n_valid_h = int(finite_h.sum())
+                weights_h, n_valid_h = _compute_target_weights(tau_hat, h_mask)
                 if n_valid_h == 0:
                     continue
-                weights_h[np.where(finite_h)[0]] = 1.0 / n_valid_h
                 psi_h, _ = self._compute_cluster_psi_sums(**common, weights=weights_h)
                 result["event_study"][h] = psi_h
         # Group effects: per-group weights
-        # NOTE: weight logic duplicated from _aggregate_group.
-        # If weight scheme changes there, update here too.
         if group_effects:
             result["group"] = {}
             df_1 = df.loc[omega_1_mask]
             cohorts = df_1[first_treat].values
-            n_omega_1 = int(omega_1_mask.sum())
             for g in group_effects:
                 if group_effects[g].get("n_obs", 0) == 0:
@@ -175,12 +200,9 @@ class ImputationDiDBootstrapMixin:
                 if not np.isfinite(group_effects[g].get("effect", np.nan)):
                     continue
                 g_mask = cohorts == g
-                weights_g = np.zeros(n_omega_1)
-                finite_g = np.isfinite(tau_hat) & g_mask
-                n_valid_g = int(finite_g.sum())
+                weights_g, n_valid_g = _compute_target_weights(tau_hat, g_mask)
                 if n_valid_g == 0:
                     continue
-                weights_g[np.where(finite_g)[0]] = 1.0 / n_valid_g
                 psi_g, _ = self._compute_cluster_psi_sums(**common, weights=weights_g)
                 result["group"][g] = psi_g
@@ -197,7 +219,8 @@ class ImputationDiDBootstrapMixin:
         """
         Run multiplier bootstrap on pre-computed influence function sums.
-        Uses T_b = sum_i w_b_i * psi_i where w_b_i are Rademacher weights
+        Uses T_b = sum_i w_b_i * psi_i where w_b_i are multiplier weights
+        (rademacher/mammen/webb; configurable via ``bootstrap_weights``)
         and psi_i are cluster-level influence function sums from Theorem 3.
         SE = std(T_b, ddof=1).
         """
@@ -216,7 +239,7 @@ class ImputationDiDBootstrapMixin:
         # Generate ALL weights upfront: shape (n_bootstrap, n_clusters)
         all_weights = _generate_bootstrap_weights_batch(
-            self.n_bootstrap, n_clusters, "rademacher", rng
+            self.n_bootstrap, n_clusters, self.bootstrap_weights, rng
         )
         # Overall ATT bootstrap draws
@@ -295,7 +318,7 @@ class ImputationDiDBootstrapMixin:
         return ImputationBootstrapResults(
             n_bootstrap=self.n_bootstrap,
-            weight_type="rademacher",
+            weight_type=self.bootstrap_weights,
             alpha=self.alpha,
             overall_att_se=overall_se,
             overall_att_ci=overall_ci,

{diff_diff-2.4.1 → diff_diff-2.4.2}/diff_diff/imputation_results.py RENAMED Viewed

@@ -33,7 +33,7 @@ class ImputationBootstrapResults:
     n_bootstrap : int
         Number of bootstrap iterations.
     weight_type : str
-        Type of bootstrap weights (currently "rademacher" only).
+        Type of bootstrap weights: "rademacher", "mammen", or "webb".
     alpha : float
         Significance level used for confidence intervals.
     overall_att_se : float

{diff_diff-2.4.1 → diff_diff-2.4.2}/diff_diff/sun_abraham.py RENAMED Viewed

@@ -433,8 +433,6 @@ class SunAbraham:
         time: str,
         first_treat: str,
         covariates: Optional[List[str]] = None,
-        min_pre_periods: int = 1,
-        min_post_periods: int = 1,
     ) -> SunAbrahamResults:
         """
         Fit the Sun-Abraham estimator using saturated regression.
@@ -454,10 +452,6 @@ class SunAbraham:
             Use 0 (or np.inf) for never-treated units.
         covariates : list, optional
             List of covariate column names to include in regression.
-        min_pre_periods : int, default=1
-            **Deprecated**: Accepted but ignored. Will be removed in a future version.
-        min_post_periods : int, default=1
-            **Deprecated**: Accepted but ignored. Will be removed in a future version.
         Returns
         -------
@@ -469,22 +463,6 @@ class SunAbraham:
         ValueError
             If required columns are missing or data validation fails.
         """
-        # Deprecation warnings for unimplemented parameters
-        if min_pre_periods != 1:
-            warnings.warn(
-                "min_pre_periods is not yet implemented and will be ignored. "
-                "This parameter will be removed in a future version.",
-                FutureWarning,
-                stacklevel=2,
-            )
-        if min_post_periods != 1:
-            warnings.warn(
-                "min_post_periods is not yet implemented and will be ignored. "
-                "This parameter will be removed in a future version.",
-                FutureWarning,
-                stacklevel=2,
-            )
         # Validate inputs
         required_cols = [outcome, unit, time, first_treat]
         if covariates:

{diff_diff-2.4.1 → diff_diff-2.4.2}/diff_diff/trop.py RENAMED Viewed

@@ -93,7 +93,7 @@ class TROP:
     alpha : float, default=0.05
         Significance level for confidence intervals.
     n_bootstrap : int, default=200
-        Number of bootstrap replications for variance estimation.
+        Number of bootstrap replications for variance estimation. Must be >= 2.
     seed : int, optional
         Random seed for reproducibility.
@@ -156,6 +156,12 @@ class TROP:
         self.lambda_unit_grid = lambda_unit_grid or [0.0, 0.1, 0.5, 1.0, 2.0, 5.0]
         self.lambda_nn_grid = lambda_nn_grid or [0.0, 0.01, 0.1, 1.0, 10.0]
+        if n_bootstrap < 2:
+            raise ValueError(
+                "n_bootstrap must be >= 2 for TROP (bootstrap variance "
+                "estimation is always used)"
+            )
         self.max_iter = max_iter
         self.tol = tol
         self.alpha = alpha

{diff_diff-2.4.1 → diff_diff-2.4.2}/diff_diff/two_stage.py RENAMED Viewed

@@ -29,6 +29,11 @@ import pandas as pd
 from scipy import sparse
 from scipy.sparse.linalg import factorized as sparse_factorized
+# Maximum number of elements before falling back to per-column sparse aggregation.
+# 10M float64 elements ≈ 80 MB peak allocation. Above this, per-column .getcol()
+# trades throughput for bounded memory. Keep in sync with two_stage_bootstrap.py.
+_SPARSE_DENSE_THRESHOLD = 10_000_000
 from diff_diff.linalg import solve_ols
 from diff_diff.two_stage_bootstrap import TwoStageDiDBootstrapMixin
 from diff_diff.two_stage_results import TwoStageBootstrapResults, TwoStageDiDResults  # noqa: F401 (re-export)
@@ -67,6 +72,8 @@ class TwoStageDiD(TwoStageDiDBootstrapMixin):
     n_bootstrap : int, default=0
         Number of bootstrap iterations. If 0, uses analytical GMM
         sandwich inference.
+    bootstrap_weights : str, default="rademacher"
+        Type of bootstrap weights: "rademacher", "mammen", or "webb".
     seed : int, optional
         Random seed for reproducibility.
     rank_deficient_action : str, default="warn"
@@ -125,6 +132,7 @@ class TwoStageDiD(TwoStageDiDBootstrapMixin):
         alpha: float = 0.05,
         cluster: Optional[str] = None,
         n_bootstrap: int = 0,
+        bootstrap_weights: str = "rademacher",
         seed: Optional[int] = None,
         rank_deficient_action: str = "warn",
         horizon_max: Optional[int] = None,
@@ -134,11 +142,17 @@ class TwoStageDiD(TwoStageDiDBootstrapMixin):
                 f"rank_deficient_action must be 'warn', 'error', or 'silent', "
                 f"got '{rank_deficient_action}'"
             )
+        if bootstrap_weights not in ("rademacher", "mammen", "webb"):
+            raise ValueError(
+                f"bootstrap_weights must be 'rademacher', 'mammen', or 'webb', "
+                f"got '{bootstrap_weights}'"
+            )
         self.anticipation = anticipation
         self.alpha = alpha
         self.cluster = cluster
         self.n_bootstrap = n_bootstrap
+        self.bootstrap_weights = bootstrap_weights
         self.seed = seed
         self.rank_deficient_action = rank_deficient_action
         self.horizon_max = horizon_max
@@ -1065,6 +1079,41 @@ class TwoStageDiD(TwoStageDiDBootstrapMixin):
         return group_effects
+    # =========================================================================
+    # GMM score computation
+    # =========================================================================
+    @staticmethod
+    def _compute_gmm_scores(
+        c_by_cluster: np.ndarray,
+        gamma_hat: np.ndarray,
+        s2_by_cluster: np.ndarray,
+    ) -> np.ndarray:
+        """
+        Compute per-cluster GMM scores S_g = gamma_hat' c_g - X'_{2g} eps_{2g}.
+        Handles NaN/overflow from rank-deficient FE by wrapping in errstate
+        and replacing non-finite values with 0.
+        Parameters
+        ----------
+        c_by_cluster : np.ndarray, shape (G, p)
+            Per-cluster Stage 1 scores.
+        gamma_hat : np.ndarray, shape (p, k)
+            Cross-moment correction matrix.
+        s2_by_cluster : np.ndarray, shape (G, k)
+            Per-cluster Stage 2 scores.
+        Returns
+        -------
+        np.ndarray, shape (G, k)
+            Per-cluster influence scores.
+        """
+        with np.errstate(invalid="ignore", divide="ignore", over="ignore"):
+            correction = np.dot(c_by_cluster, gamma_hat)
+        np.nan_to_num(correction, copy=False, nan=0.0, posinf=0.0, neginf=0.0)
+        return correction - s2_by_cluster
     # =========================================================================
     # GMM Sandwich Variance (Butts & Gardner 2022)
     # =========================================================================
@@ -1178,12 +1227,19 @@ class TwoStageDiD(TwoStageDiDBootstrapMixin):
         unique_clusters, cluster_indices = np.unique(cluster_ids, return_inverse=True)
         G = len(unique_clusters)
-        # Aggregate sparse rows by cluster using column-wise np.add.at
-        weighted_X10_csc = weighted_X10.tocsc()
+        n_elements = weighted_X10.shape[0] * weighted_X10.shape[1]
         c_by_cluster = np.zeros((G, p))
-        for j_col in range(p):
-            col_data = weighted_X10_csc.getcol(j_col).toarray().ravel()
-            np.add.at(c_by_cluster[:, j_col], cluster_indices, col_data)
+        if n_elements > _SPARSE_DENSE_THRESHOLD:
+            # Per-column path: limits peak memory for large FE matrices
+            weighted_X10_csc = weighted_X10.tocsc()
+            for j_col in range(p):
+                col_data = weighted_X10_csc.getcol(j_col).toarray().ravel()
+                np.add.at(c_by_cluster[:, j_col], cluster_indices, col_data)
+        else:
+            # Dense path: faster for moderate-size matrices
+            weighted_X10_dense = weighted_X10.toarray()
+            for j_col in range(p):
+                np.add.at(c_by_cluster[:, j_col], cluster_indices, weighted_X10_dense[:, j_col])
         # 3. Per-cluster Stage 2 scores: X'_{2g} eps_{2g}
         weighted_X2 = X_2 * eps_2[:, None]  # (n x k) dense
@@ -1192,11 +1248,7 @@ class TwoStageDiD(TwoStageDiDBootstrapMixin):
             np.add.at(s2_by_cluster[:, j_col], cluster_indices, weighted_X2[:, j_col])
         # 4. S_g = gamma_hat' c_g - X'_{2g} eps_{2g}
-        with np.errstate(invalid="ignore", divide="ignore", over="ignore"):
-            correction = np.dot(c_by_cluster, gamma_hat)  # (G x p) @ (p x k) = (G x k)
-        # Replace NaN/inf from overflow (rank-deficient FE) with 0
-        np.nan_to_num(correction, copy=False, nan=0.0, posinf=0.0, neginf=0.0)
-        S = correction - s2_by_cluster  # (G x k)
+        S = self._compute_gmm_scores(c_by_cluster, gamma_hat, s2_by_cluster)
         # 5. Meat: sum_g S_g S'_g = S' S
         with np.errstate(invalid="ignore", over="ignore"):
@@ -1304,6 +1356,7 @@ class TwoStageDiD(TwoStageDiDBootstrapMixin):
             "alpha": self.alpha,
             "cluster": self.cluster,
             "n_bootstrap": self.n_bootstrap,
+            "bootstrap_weights": self.bootstrap_weights,
             "seed": self.seed,
             "rank_deficient_action": self.rank_deficient_action,
             "horizon_max": self.horizon_max,

{diff_diff-2.4.1 → diff_diff-2.4.2}/diff_diff/two_stage_bootstrap.py RENAMED Viewed

@@ -15,6 +15,9 @@ from scipy.sparse.linalg import factorized as sparse_factorized
 from diff_diff.linalg import solve_ols
 from diff_diff.staggered_bootstrap import _generate_bootstrap_weights_batch
+# Maximum number of elements before falling back to per-column sparse aggregation.
+# Keep in sync with two_stage.py.
+_SPARSE_DENSE_THRESHOLD = 10_000_000
 from diff_diff.two_stage_results import TwoStageBootstrapResults
 __all__ = [
@@ -106,19 +109,26 @@ class TwoStageDiDBootstrapMixin:
         unique_clusters, cluster_indices = np.unique(cluster_ids, return_inverse=True)
         G = len(unique_clusters)
-        weighted_X10_csc = weighted_X10.tocsc()
+        n_elements = weighted_X10.shape[0] * weighted_X10.shape[1]
         c_by_cluster = np.zeros((G, p))
-        for j_col in range(p):
-            col_data = weighted_X10_csc.getcol(j_col).toarray().ravel()
-            np.add.at(c_by_cluster[:, j_col], cluster_indices, col_data)
+        if n_elements > _SPARSE_DENSE_THRESHOLD:
+            # Per-column path: limits peak memory for large FE matrices
+            weighted_X10_csc = weighted_X10.tocsc()
+            for j_col in range(p):
+                col_data = weighted_X10_csc.getcol(j_col).toarray().ravel()
+                np.add.at(c_by_cluster[:, j_col], cluster_indices, col_data)
+        else:
+            # Dense path: faster for moderate-size matrices
+            weighted_X10_dense = weighted_X10.toarray()
+            for j_col in range(p):
+                np.add.at(c_by_cluster[:, j_col], cluster_indices, weighted_X10_dense[:, j_col])
         weighted_X2 = X_2 * eps_2[:, None]
         s2_by_cluster = np.zeros((G, k))
         for j_col in range(k):
             np.add.at(s2_by_cluster[:, j_col], cluster_indices, weighted_X2[:, j_col])
-        correction = np.dot(c_by_cluster, gamma_hat)
-        S = correction - s2_by_cluster
+        S = self._compute_gmm_scores(c_by_cluster, gamma_hat, s2_by_cluster)
         # Bread
         XtX_2 = np.dot(X_2.T, X_2)
@@ -201,7 +211,7 @@ class TwoStageDiDBootstrapMixin:
         n_clusters = len(unique_clusters)
         all_weights = _generate_bootstrap_weights_batch(
-            self.n_bootstrap, n_clusters, "rademacher", rng
+            self.n_bootstrap, n_clusters, self.bootstrap_weights, rng
         )
         # T_b = bread @ (sum_g w_bg * S_g) = bread @ (W @ S)'  per boot
@@ -385,7 +395,7 @@ class TwoStageDiDBootstrapMixin:
         return TwoStageBootstrapResults(
             n_bootstrap=self.n_bootstrap,
-            weight_type="rademacher",
+            weight_type=self.bootstrap_weights,
             alpha=self.alpha,
             overall_att_se=overall_se,
             overall_att_ci=overall_ci,

{diff_diff-2.4.1 → diff_diff-2.4.2}/diff_diff/two_stage_results.py RENAMED Viewed

@@ -34,7 +34,7 @@ class TwoStageBootstrapResults:
     n_bootstrap : int
         Number of bootstrap iterations.
     weight_type : str
-        Type of bootstrap weights (currently "rademacher" only).
+        Type of bootstrap weights: "rademacher", "mammen", or "webb".
     alpha : float
         Significance level used for confidence intervals.
     overall_att_se : float

diff-diff 2.4.1__tar.gz → 2.4.2__tar.gz

diff-diff 2.4.1tar.gz → 2.4.2tar.gz