PyPI - diff-diff - Versions diffs - 2.6.1__tar.gz → 2.7.0__tar.gz - Mend

diff-diff 2.6.1tar.gz → 2.7.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

{diff_diff-2.6.1 → diff_diff-2.7.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: diff-diff
-Version: 2.6.1
+Version: 2.7.0
 Classifier: Development Status :: 5 - Production/Stable
 Classifier: Intended Audience :: Science/Research
 Classifier: Operating System :: OS Independent
@@ -58,7 +58,7 @@ pip install -e .
 ```python
 import pandas as pd
-from diff_diff import DifferenceInDifferences
+from diff_diff import DifferenceInDifferences  # or: DiD
 # Create sample data
 data = pd.DataFrame({
@@ -122,6 +122,28 @@ Signif. codes: '***' 0.001, '**' 0.01, '*' 0.05, '.' 0.1
 - **Data prep utilities**: Helper functions for common data preparation tasks
 - **Validated against R**: Benchmarked against `did`, `synthdid`, and `fixest` packages (see [benchmarks](docs/benchmarks.rst))
+## Estimator Aliases
+All estimators have short aliases for convenience:
+| Alias | Full Name | Method |
+|-------|-----------|--------|
+| `DiD` | `DifferenceInDifferences` | Basic 2x2 DiD |
+| `TWFE` | `TwoWayFixedEffects` | Two-way fixed effects |
+| `EventStudy` | `MultiPeriodDiD` | Event study / multi-period |
+| `CS` | `CallawaySantAnna` | Callaway & Sant'Anna (2021) |
+| `SA` | `SunAbraham` | Sun & Abraham (2021) |
+| `BJS` | `ImputationDiD` | Borusyak, Jaravel & Spiess (2024) |
+| `Gardner` | `TwoStageDiD` | Gardner (2022) two-stage |
+| `SDiD` | `SyntheticDiD` | Synthetic DiD |
+| `DDD` | `TripleDifference` | Triple difference |
+| `CDiD` | `ContinuousDiD` | Continuous treatment DiD |
+| `Stacked` | `StackedDiD` | Stacked DiD |
+| `Bacon` | `BaconDecomposition` | Goodman-Bacon decomposition |
+| `EDiD` | `EfficientDiD` | Efficient DiD |
+`TROP` already uses its short canonical name and needs no alias.
 ## Tutorials
 We provide Jupyter notebook tutorials in `docs/tutorials/`:

{diff_diff-2.6.1 → diff_diff-2.7.0}/README.md RENAMED Viewed

@@ -20,7 +20,7 @@ pip install -e .
 ```python
 import pandas as pd
-from diff_diff import DifferenceInDifferences
+from diff_diff import DifferenceInDifferences  # or: DiD
 # Create sample data
 data = pd.DataFrame({
@@ -84,6 +84,28 @@ Signif. codes: '***' 0.001, '**' 0.01, '*' 0.05, '.' 0.1
 - **Data prep utilities**: Helper functions for common data preparation tasks
 - **Validated against R**: Benchmarked against `did`, `synthdid`, and `fixest` packages (see [benchmarks](docs/benchmarks.rst))
+## Estimator Aliases
+All estimators have short aliases for convenience:
+| Alias | Full Name | Method |
+|-------|-----------|--------|
+| `DiD` | `DifferenceInDifferences` | Basic 2x2 DiD |
+| `TWFE` | `TwoWayFixedEffects` | Two-way fixed effects |
+| `EventStudy` | `MultiPeriodDiD` | Event study / multi-period |
+| `CS` | `CallawaySantAnna` | Callaway & Sant'Anna (2021) |
+| `SA` | `SunAbraham` | Sun & Abraham (2021) |
+| `BJS` | `ImputationDiD` | Borusyak, Jaravel & Spiess (2024) |
+| `Gardner` | `TwoStageDiD` | Gardner (2022) two-stage |
+| `SDiD` | `SyntheticDiD` | Synthetic DiD |
+| `DDD` | `TripleDifference` | Triple difference |
+| `CDiD` | `ContinuousDiD` | Continuous treatment DiD |
+| `Stacked` | `StackedDiD` | Stacked DiD |
+| `Bacon` | `BaconDecomposition` | Goodman-Bacon decomposition |
+| `EDiD` | `EfficientDiD` | Efficient DiD |
+`TROP` already uses its short canonical name and needs no alias.
 ## Tutorials
 We provide Jupyter notebook tutorials in `docs/tutorials/`:

{diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/__init__.py RENAMED Viewed

@@ -128,6 +128,11 @@ from diff_diff.continuous_did import (
     ContinuousDiDResults,
     DoseResponseCurve,
 )
+from diff_diff.efficient_did import (
+    EfficientDiD,
+    EfficientDiDResults,
+    EDiDBootstrapResults,
+)
 from diff_diff.trop import (
     TROP,
     TROPResults,
@@ -172,8 +177,9 @@ Gardner = TwoStageDiD
 DDD = TripleDifference
 Stacked = StackedDiD
 Bacon = BaconDecomposition
+EDiD = EfficientDiD
-__version__ = "2.6.1"
+__version__ = "2.7.0"
 __all__ = [
     # Estimators
     "DifferenceInDifferences",
@@ -231,6 +237,11 @@ __all__ = [
     "trop",
     "StackedDiDResults",
     "stacked_did",
+    # EfficientDiD
+    "EfficientDiD",
+    "EfficientDiDResults",
+    "EDiDBootstrapResults",
+    "EDiD",
     # Visualization
     "plot_event_study",
     "plot_group_effects",

{diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/bootstrap_utils.py RENAMED Viewed

@@ -19,6 +19,7 @@ __all__ = [
     "compute_percentile_ci",
     "compute_bootstrap_pvalue",
     "compute_effect_bootstrap_stats",
+    "compute_effect_bootstrap_stats_batch",
 ]
@@ -277,3 +278,126 @@ def compute_effect_bootstrap_stats(
         original_effect, valid_dist, n_valid=len(valid_dist)
     )
     return se, ci, p_value
+def compute_effect_bootstrap_stats_batch(
+    original_effects: np.ndarray,
+    bootstrap_matrix: np.ndarray,
+    alpha: float = 0.05,
+) -> tuple:
+    """
+    Batch-compute bootstrap statistics for multiple effects at once.
+    Parameters
+    ----------
+    original_effects : np.ndarray
+        Array of original point estimates, shape (n_effects,).
+    bootstrap_matrix : np.ndarray
+        Bootstrap distributions, shape (n_bootstrap, n_effects).
+    alpha : float, default=0.05
+        Significance level.
+    Returns
+    -------
+    ses : np.ndarray
+        Bootstrap SEs for each effect.
+    ci_lowers : np.ndarray
+        Lower CI bounds for each effect.
+    ci_uppers : np.ndarray
+        Upper CI bounds for each effect.
+    p_values : np.ndarray
+        Bootstrap p-values for each effect.
+    """
+    n_bootstrap, n_effects = bootstrap_matrix.shape
+    ses = np.full(n_effects, np.nan)
+    ci_lowers = np.full(n_effects, np.nan)
+    ci_uppers = np.full(n_effects, np.nan)
+    p_values = np.full(n_effects, np.nan)
+    # Check for non-finite original effects
+    valid_effects = np.isfinite(original_effects)
+    if not np.any(valid_effects):
+        return ses, ci_lowers, ci_uppers, p_values
+    # Count valid bootstrap samples per effect
+    finite_mask = np.isfinite(bootstrap_matrix)  # (n_bootstrap, n_effects)
+    n_valid = finite_mask.sum(axis=0)  # (n_effects,)
+    # Determine which effects have enough valid samples
+    enough_valid = (n_valid >= n_bootstrap * 0.5) & valid_effects
+    if not np.any(enough_valid):
+        n_insufficient = int(np.sum(valid_effects))
+        if n_insufficient > 0:
+            warnings.warn(
+                f"{n_insufficient} effect(s) had too few valid bootstrap samples (<50%). "
+                "Returning NaN for SE/CI/p-value.",
+                RuntimeWarning,
+                stacklevel=2,
+            )
+        return ses, ci_lowers, ci_uppers, p_values
+    # Warn about subset with insufficient samples
+    n_insufficient = int(np.sum(valid_effects & ~enough_valid))
+    if n_insufficient > 0:
+        warnings.warn(
+            f"{n_insufficient} effect(s) had too few valid bootstrap samples (<50%). "
+            "Returning NaN for SE/CI/p-value.",
+            RuntimeWarning,
+            stacklevel=2,
+        )
+    # For effects with all-finite bootstraps (common case), use vectorized ops
+    all_finite = (n_valid == n_bootstrap) & enough_valid
+    if np.any(all_finite):
+        idx = np.where(all_finite)[0]
+        sub = bootstrap_matrix[:, idx]
+        # Vectorized SE: std across bootstrap dimension
+        batch_ses = np.std(sub, axis=0, ddof=1)
+        # Vectorized percentile CI
+        lower_pct = alpha / 2 * 100
+        upper_pct = (1 - alpha / 2) * 100
+        batch_ci = np.percentile(sub, [lower_pct, upper_pct], axis=0)
+        # Vectorized p-values
+        batch_p = np.empty(len(idx))
+        for j, eff_idx in enumerate(idx):
+            eff = original_effects[eff_idx]
+            if eff >= 0:
+                batch_p[j] = np.mean(sub[:, j] <= 0)
+            else:
+                batch_p[j] = np.mean(sub[:, j] >= 0)
+        batch_p = np.minimum(2 * batch_p, 1.0)
+        batch_p = np.maximum(batch_p, 1 / (n_bootstrap + 1))
+        # Guard: SE must be positive and finite
+        se_valid = np.isfinite(batch_ses) & (batch_ses > 0)
+        n_bad_se = int(np.sum(~se_valid))
+        if n_bad_se > 0:
+            warnings.warn(
+                f"{n_bad_se} effect(s) had non-finite or zero bootstrap SE. "
+                "Returning NaN for SE/CI/p-value.",
+                RuntimeWarning,
+                stacklevel=2,
+            )
+        ses[idx[se_valid]] = batch_ses[se_valid]
+        ci_lowers[idx[se_valid]] = batch_ci[0][se_valid]
+        ci_uppers[idx[se_valid]] = batch_ci[1][se_valid]
+        p_values[idx[se_valid]] = batch_p[se_valid]
+    # Handle effects with some non-finite bootstraps (rare) via scalar fallback
+    partial_valid = enough_valid & ~all_finite
+    if np.any(partial_valid):
+        for j in np.where(partial_valid)[0]:
+            se, ci, pv = compute_effect_bootstrap_stats(
+                original_effects[j], bootstrap_matrix[:, j], alpha=alpha,
+                context=f"effect {j}"
+            )
+            ses[j] = se
+            ci_lowers[j] = ci[0]
+            ci_uppers[j] = ci[1]
+            p_values[j] = pv
+    return ses, ci_lowers, ci_uppers, p_values

diff-diff 2.6.1__tar.gz → 2.7.0__tar.gz

diff-diff 2.6.1tar.gz → 2.7.0tar.gz