PyPI - diff-diff - Versions diffs - 2.3.0__tar.gz → 2.3.2__tar.gz - Mend

diff-diff 2.3.0tar.gz → 2.3.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

{diff_diff-2.3.0 → diff_diff-2.3.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: diff-diff
-Version: 2.3.0
+Version: 2.3.2
 Classifier: Development Status :: 5 - Production/Stable
 Classifier: Intended Audience :: Science/Research
 Classifier: Operating System :: OS Independent
@@ -9,6 +9,7 @@ Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
 Classifier: Topic :: Scientific/Engineering :: Mathematics
 Requires-Dist: numpy>=1.20.0
 Requires-Dist: pandas>=1.3.0
@@ -28,7 +29,7 @@ Summary: A library for Difference-in-Differences causal inference analysis
 Keywords: causal-inference,difference-in-differences,econometrics,statistics,treatment-effects
 Author: diff-diff contributors
 License-Expression: MIT
-Requires-Python: >=3.9
+Requires-Python: >=3.9, <3.14
 Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
 Project-URL: Documentation, https://diff-diff.readthedocs.io
 Project-URL: Homepage, https://github.com/igerber/diff-diff
@@ -1200,11 +1201,12 @@ Use Synthetic DiD instead of standard DiD when:
 ```python
 SyntheticDiD(
-    lambda_reg=0.0,     # Regularization toward uniform weights (0 = no reg)
-    zeta=1.0,           # Time weight regularization (higher = more uniform)
-    alpha=0.05,         # Significance level
-    n_bootstrap=200,    # Bootstrap iterations for SE (0 = placebo-based)
-    seed=None           # Random seed for reproducibility
+    zeta_omega=None,        # Unit weight regularization (None = auto-computed from data)
+    zeta_lambda=None,       # Time weight regularization (None = auto-computed from data)
+    alpha=0.05,             # Significance level
+    variance_method="placebo",  # "placebo" (default, matches R) or "bootstrap"
+    n_bootstrap=200,        # Replications for SE estimation
+    seed=None               # Random seed for reproducibility
 )
 ```
@@ -1909,11 +1911,12 @@ MultiPeriodDiD(
 ```python
 SyntheticDiD(
-    lambda_reg=0.0,     # L2 regularization for unit weights
-    zeta=1.0,           # Regularization for time weights
-    alpha=0.05,         # Significance level for CIs
-    n_bootstrap=200,    # Bootstrap iterations for SE
-    seed=None           # Random seed for reproducibility
+    zeta_omega=None,        # Unit weight regularization (None = auto from data)
+    zeta_lambda=None,       # Time weight regularization (None = auto from data)
+    alpha=0.05,             # Significance level for CIs
+    variance_method="placebo",  # "placebo" (R default) or "bootstrap"
+    n_bootstrap=200,        # Replications for SE estimation
+    seed=None               # Random seed for reproducibility
 )
 ```
@@ -2487,7 +2490,7 @@ Returns DataFrame with columns: `unit`, `quality_score`, `outcome_trend_score`,
 ## Requirements
-- Python >= 3.9
+- Python 3.9 - 3.13
 - numpy >= 1.20
 - pandas >= 1.3
 - scipy >= 1.7

{diff_diff-2.3.0 → diff_diff-2.3.2}/README.md RENAMED Viewed

@@ -1163,11 +1163,12 @@ Use Synthetic DiD instead of standard DiD when:
 ```python
 SyntheticDiD(
-    lambda_reg=0.0,     # Regularization toward uniform weights (0 = no reg)
-    zeta=1.0,           # Time weight regularization (higher = more uniform)
-    alpha=0.05,         # Significance level
-    n_bootstrap=200,    # Bootstrap iterations for SE (0 = placebo-based)
-    seed=None           # Random seed for reproducibility
+    zeta_omega=None,        # Unit weight regularization (None = auto-computed from data)
+    zeta_lambda=None,       # Time weight regularization (None = auto-computed from data)
+    alpha=0.05,             # Significance level
+    variance_method="placebo",  # "placebo" (default, matches R) or "bootstrap"
+    n_bootstrap=200,        # Replications for SE estimation
+    seed=None               # Random seed for reproducibility
 )
 ```
@@ -1872,11 +1873,12 @@ MultiPeriodDiD(
 ```python
 SyntheticDiD(
-    lambda_reg=0.0,     # L2 regularization for unit weights
-    zeta=1.0,           # Regularization for time weights
-    alpha=0.05,         # Significance level for CIs
-    n_bootstrap=200,    # Bootstrap iterations for SE
-    seed=None           # Random seed for reproducibility
+    zeta_omega=None,        # Unit weight regularization (None = auto from data)
+    zeta_lambda=None,       # Time weight regularization (None = auto from data)
+    alpha=0.05,             # Significance level for CIs
+    variance_method="placebo",  # "placebo" (R default) or "bootstrap"
+    n_bootstrap=200,        # Replications for SE estimation
+    seed=None               # Random seed for reproducibility
 )
 ```
@@ -2450,7 +2452,7 @@ Returns DataFrame with columns: `unit`, `quality_score`, `outcome_trend_score`,
 ## Requirements
-- Python >= 3.9
+- Python 3.9 - 3.13
 - numpy >= 1.20
 - pandas >= 1.3
 - scipy >= 1.7

{diff_diff-2.3.0 → diff_diff-2.3.2}/diff_diff/__init__.py RENAMED Viewed

@@ -142,7 +142,7 @@ from diff_diff.datasets import (
     load_mpdta,
 )
-__version__ = "2.3.0"
+__version__ = "2.3.2"
 __all__ = [
     # Estimators
     "DifferenceInDifferences",

{diff_diff-2.3.0 → diff_diff-2.3.2}/diff_diff/_backend.py RENAMED Viewed

@@ -30,6 +30,11 @@ try:
         # TROP estimator acceleration (joint method)
         loocv_grid_search_joint as _rust_loocv_grid_search_joint,
         bootstrap_trop_variance_joint as _rust_bootstrap_trop_variance_joint,
+        # SDID weights (Frank-Wolfe matching R's synthdid)
+        compute_sdid_unit_weights as _rust_sdid_unit_weights,
+        compute_time_weights as _rust_compute_time_weights,
+        compute_noise_level as _rust_compute_noise_level,
+        sc_weight_fw as _rust_sc_weight_fw,
     )
     _rust_available = True
 except ImportError:
@@ -46,6 +51,11 @@ except ImportError:
     # TROP estimator acceleration (joint method)
     _rust_loocv_grid_search_joint = None
     _rust_bootstrap_trop_variance_joint = None
+    # SDID weights (Frank-Wolfe matching R's synthdid)
+    _rust_sdid_unit_weights = None
+    _rust_compute_time_weights = None
+    _rust_compute_noise_level = None
+    _rust_sc_weight_fw = None
 # Determine final backend based on environment variable and availability
 if _backend_env == 'python':
@@ -63,6 +73,11 @@ if _backend_env == 'python':
     # TROP estimator acceleration (joint method)
     _rust_loocv_grid_search_joint = None
     _rust_bootstrap_trop_variance_joint = None
+    # SDID weights (Frank-Wolfe matching R's synthdid)
+    _rust_sdid_unit_weights = None
+    _rust_compute_time_weights = None
+    _rust_compute_noise_level = None
+    _rust_sc_weight_fw = None
 elif _backend_env == 'rust':
     # Force Rust mode - fail if not available
     if not _rust_available:
@@ -89,4 +104,9 @@ __all__ = [
     # TROP estimator acceleration (joint method)
     '_rust_loocv_grid_search_joint',
     '_rust_bootstrap_trop_variance_joint',
+    # SDID weights (Frank-Wolfe matching R's synthdid)
+    '_rust_sdid_unit_weights',
+    '_rust_compute_time_weights',
+    '_rust_compute_noise_level',
+    '_rust_sc_weight_fw',
 ]

{diff_diff-2.3.0 → diff_diff-2.3.2}/diff_diff/results.py RENAMED Viewed

@@ -605,8 +605,10 @@ class SyntheticDiDResults:
     pre_periods: List[Any]
     post_periods: List[Any]
     alpha: float = 0.05
-    variance_method: str = field(default="bootstrap")
-    lambda_reg: Optional[float] = field(default=None)
+    variance_method: str = field(default="placebo")
+    noise_level: Optional[float] = field(default=None)
+    zeta_omega: Optional[float] = field(default=None)
+    zeta_lambda: Optional[float] = field(default=None)
     pre_treatment_fit: Optional[float] = field(default=None)
     placebo_effects: Optional[np.ndarray] = field(default=None)
     n_bootstrap: Optional[int] = field(default=None)
@@ -650,8 +652,12 @@ class SyntheticDiDResults:
             f"{'Post-treatment periods:':<25} {len(self.post_periods):>10}",
         ]
-        if self.lambda_reg is not None:
-            lines.append(f"{'Regularization (lambda):':<25} {self.lambda_reg:>10.4f}")
+        if self.zeta_omega is not None:
+            lines.append(f"{'Zeta (unit weights):':<25} {self.zeta_omega:>10.4f}")
+        if self.zeta_lambda is not None:
+            lines.append(f"{'Zeta (time weights):':<25} {self.zeta_lambda:>10.6f}")
+        if self.noise_level is not None:
+            lines.append(f"{'Noise level:':<25} {self.noise_level:>10.4f}")
         if self.pre_treatment_fit is not None:
             lines.append(f"{'Pre-treatment fit (RMSE):':<25} {self.pre_treatment_fit:>10.4f}")
@@ -731,7 +737,9 @@ class SyntheticDiDResults:
             "n_pre_periods": len(self.pre_periods),
             "n_post_periods": len(self.post_periods),
             "variance_method": self.variance_method,
-            "lambda_reg": self.lambda_reg,
+            "noise_level": self.noise_level,
+            "zeta_omega": self.zeta_omega,
+            "zeta_lambda": self.zeta_lambda,
             "pre_treatment_fit": self.pre_treatment_fit,
         }
         if self.n_bootstrap is not None:

{diff_diff-2.3.0 → diff_diff-2.3.2}/diff_diff/staggered.py RENAMED Viewed

@@ -415,6 +415,7 @@ class CallawaySantAnna(
             cohort_masks[g] = (unit_cohorts == g)
         # Never-treated mask
+        # np.inf was normalized to 0 in fit(), so the np.inf check is defensive only
         never_treated_mask = (unit_cohorts == 0) | (unit_cohorts == np.inf)
         # Pre-compute covariate matrices by time period if needed
@@ -639,13 +640,15 @@ class CallawaySantAnna(
         # This avoids hardcoding column names in internal methods
         df['first_treat'] = df[first_treat]
+        # Never-treated indicator (must precede treatment_groups to exclude np.inf)
+        df['_never_treated'] = (df[first_treat] == 0) | (df[first_treat] == np.inf)
+        # Normalize np.inf → 0 so all downstream `> 0` checks exclude never-treated
+        df.loc[df[first_treat] == np.inf, first_treat] = 0
         # Identify groups and time periods
         time_periods = sorted(df[time].unique())
         treatment_groups = sorted([g for g in df[first_treat].unique() if g > 0])
-        # Never-treated indicator (first_treat = 0 or inf)
-        df['_never_treated'] = (df[first_treat] == 0) | (df[first_treat] == np.inf)
         # Get unique units
         unit_info = df.groupby(unit).agg({
             first_treat: 'first',

{diff_diff-2.3.0 → diff_diff-2.3.2}/diff_diff/sun_abraham.py RENAMED Viewed

@@ -456,9 +456,9 @@ class SunAbraham:
         covariates : list, optional
             List of covariate column names to include in regression.
         min_pre_periods : int, default=1
-            Minimum number of pre-treatment periods to include in event study.
+            **Deprecated**: Accepted but ignored. Will be removed in a future version.
         min_post_periods : int, default=1
-            Minimum number of post-treatment periods to include in event study.
+            **Deprecated**: Accepted but ignored. Will be removed in a future version.
         Returns
         -------
@@ -470,6 +470,22 @@ class SunAbraham:
         ValueError
             If required columns are missing or data validation fails.
         """
+        # Deprecation warnings for unimplemented parameters
+        if min_pre_periods != 1:
+            warnings.warn(
+                "min_pre_periods is not yet implemented and will be ignored. "
+                "This parameter will be removed in a future version.",
+                FutureWarning,
+                stacklevel=2,
+            )
+        if min_post_periods != 1:
+            warnings.warn(
+                "min_post_periods is not yet implemented and will be ignored. "
+                "This parameter will be removed in a future version.",
+                FutureWarning,
+                stacklevel=2,
+            )
         # Validate inputs
         required_cols = [outcome, unit, time, first_treat]
         if covariates:
@@ -486,13 +502,15 @@ class SunAbraham:
         df[time] = pd.to_numeric(df[time])
         df[first_treat] = pd.to_numeric(df[first_treat])
+        # Never-treated indicator (must precede treatment_groups to exclude np.inf)
+        df["_never_treated"] = (df[first_treat] == 0) | (df[first_treat] == np.inf)
+        # Normalize np.inf → 0 so all downstream `> 0` checks exclude never-treated
+        df.loc[df[first_treat] == np.inf, first_treat] = 0
         # Identify groups and time periods
         time_periods = sorted(df[time].unique())
         treatment_groups = sorted([g for g in df[first_treat].unique() if g > 0])
-        # Never-treated indicator
-        df["_never_treated"] = (df[first_treat] == 0) | (df[first_treat] == np.inf)
         # Get unique units
         unit_info = (
             df.groupby(unit)
@@ -533,9 +551,9 @@ class SunAbraham:
         all_rel_times_sorted = sorted(all_rel_times)
-        # Filter to reasonable range
-        min_rel = max(min(all_rel_times_sorted), -20)  # cap at -20
-        max_rel = min(max(all_rel_times_sorted), 20)   # cap at +20
+        # Use full range of relative times (no artificial truncation, matches R's fixest::sunab())
+        min_rel = min(all_rel_times_sorted)
+        max_rel = max(all_rel_times_sorted)
         # Reference period: last pre-treatment period (typically -1)
         self._reference_period = -1 - self.anticipation
@@ -765,12 +783,18 @@ class SunAbraham:
         # Fit OLS using LinearRegression helper (more stable than manual X'X inverse)
         cluster_ids = df_demeaned[cluster_var].values
+        # Degrees of freedom adjustment for absorbed unit and time fixed effects
+        n_units_fe = df[unit].nunique()
+        n_times_fe = df[time].nunique()
+        df_adj = n_units_fe + n_times_fe - 1
         reg = LinearRegression(
             include_intercept=False,  # Already demeaned, no intercept needed
             robust=True,
             cluster_ids=cluster_ids,
             rank_deficient_action=self.rank_deficient_action,
-        ).fit(X, y)
+        ).fit(X, y, df_adjustment=df_adj)
         coefficients = reg.coefficients_
         vcov = reg.vcov_
@@ -821,7 +845,8 @@ class SunAbraham:
         β_e = Σ_g w_{g,e} × δ_{g,e}
-        where w_{g,e} is the share of cohort g among treated units at relative time e.
+        where w_{g,e} = n_{g,e} / Σ_g n_{g,e} is the share of observations from cohort g
+        at event-time e among all treated observations at that event-time.
         Returns
         -------
@@ -833,9 +858,8 @@ class SunAbraham:
         event_study_effects: Dict[int, Dict[str, Any]] = {}
         cohort_weights: Dict[int, Dict[Any, float]] = {}
-        # Get cohort sizes
-        unit_cohorts = df.groupby(unit)[first_treat].first()
-        cohort_sizes = unit_cohorts[unit_cohorts > 0].value_counts().to_dict()
+        # Pre-compute per-event-time observation counts: n_{g,e}
+        event_time_counts = df[df[first_treat] > 0].groupby([first_treat, "_rel_time"]).size()
         for e in rel_periods:
             # Get cohorts that have observations at this relative time
@@ -847,13 +871,13 @@ class SunAbraham:
             if not cohorts_at_e:
                 continue
-            # Compute IW weights: share of each cohort among those observed at e
+            # Compute IW weights: n_{g,e} / Σ_g n_{g,e}
             weights = {}
             total_size = 0
             for g in cohorts_at_e:
-                n_g = cohort_sizes.get(g, 0)
-                weights[g] = n_g
-                total_size += n_g
+                n_g_e = event_time_counts.get((g, e), 0)
+                weights[g] = n_g_e
+                total_size += n_g_e
             if total_size == 0:
                 continue
@@ -915,7 +939,7 @@ class SunAbraham:
         ]
         if not post_effects:
-            return 0.0, 0.0
+            return np.nan, np.nan
         # Weight by number of treated observations at each relative time
         post_weights = []
@@ -948,7 +972,13 @@ class SunAbraham:
                         overall_weights_by_coef[key] += period_weight * cw
         if not overall_weights_by_coef:
-            # Fallback to simple variance calculation
+            # Fallback to simplified variance that ignores covariances between periods
+            warnings.warn(
+                "Could not construct full weight vector for overall ATT SE. "
+                "Using simplified variance that ignores covariances between periods.",
+                UserWarning,
+                stacklevel=2,
+            )
             overall_var = float(
                 np.sum((post_weights ** 2) * np.array([eff["se"] ** 2 for _, eff in post_effects]))
             )
@@ -1029,6 +1059,7 @@ class SunAbraham:
                 df_b[time] - df_b[first_treat],
                 np.nan
             )
+            # np.inf was normalized to 0 in fit(), so the np.inf check is defensive only
             df_b["_never_treated"] = (
                 (df_b[first_treat] == 0) | (df_b[first_treat] == np.inf)
             )
@@ -1113,11 +1144,16 @@ class SunAbraham:
             event_study_p_values[e] = p_value
         # Overall ATT statistics
-        overall_se = float(np.std(bootstrap_overall, ddof=1))
-        overall_ci = self._compute_percentile_ci(bootstrap_overall, self.alpha)
-        overall_p = self._compute_bootstrap_pvalue(
-            original_overall_att, bootstrap_overall
-        )
+        if not np.isfinite(original_overall_att):
+            overall_se = np.nan
+            overall_ci = (np.nan, np.nan)
+            overall_p = np.nan
+        else:
+            overall_se = float(np.std(bootstrap_overall, ddof=1))
+            overall_ci = self._compute_percentile_ci(bootstrap_overall, self.alpha)
+            overall_p = self._compute_bootstrap_pvalue(
+                original_overall_att, bootstrap_overall
+            )
         return SABootstrapResults(
             n_bootstrap=self.n_bootstrap,

diff-diff 2.3.0__tar.gz → 2.3.2__tar.gz

diff-diff 2.3.0tar.gz → 2.3.2tar.gz