diff-diff 2.2.0__tar.gz → 2.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {diff_diff-2.2.0 → diff_diff-2.3.0}/PKG-INFO +145 -23
  2. {diff_diff-2.2.0 → diff_diff-2.3.0}/README.md +142 -22
  3. {diff_diff-2.2.0 → diff_diff-2.3.0}/diff_diff/__init__.py +11 -1
  4. {diff_diff-2.2.0 → diff_diff-2.3.0}/diff_diff/diagnostics.py +3 -3
  5. {diff_diff-2.2.0 → diff_diff-2.3.0}/diff_diff/estimators.py +156 -42
  6. {diff_diff-2.2.0 → diff_diff-2.3.0}/diff_diff/honest_did.py +158 -147
  7. diff_diff-2.3.0/diff_diff/imputation.py +2480 -0
  8. {diff_diff-2.2.0 → diff_diff-2.3.0}/diff_diff/pretrends.py +89 -151
  9. {diff_diff-2.2.0 → diff_diff-2.3.0}/diff_diff/results.py +164 -88
  10. {diff_diff-2.2.0 → diff_diff-2.3.0}/diff_diff/sun_abraham.py +6 -6
  11. {diff_diff-2.2.0 → diff_diff-2.3.0}/diff_diff/triple_diff.py +2 -2
  12. {diff_diff-2.2.0 → diff_diff-2.3.0}/diff_diff/trop.py +80 -325
  13. {diff_diff-2.2.0 → diff_diff-2.3.0}/diff_diff/twfe.py +39 -8
  14. {diff_diff-2.2.0 → diff_diff-2.3.0}/diff_diff/visualization.py +209 -214
  15. {diff_diff-2.2.0 → diff_diff-2.3.0}/pyproject.toml +3 -1
  16. {diff_diff-2.2.0 → diff_diff-2.3.0}/rust/Cargo.lock +31 -31
  17. {diff_diff-2.2.0 → diff_diff-2.3.0}/rust/Cargo.toml +1 -1
  18. {diff_diff-2.2.0 → diff_diff-2.3.0}/rust/src/trop.rs +149 -136
  19. {diff_diff-2.2.0 → diff_diff-2.3.0}/diff_diff/_backend.py +0 -0
  20. {diff_diff-2.2.0 → diff_diff-2.3.0}/diff_diff/bacon.py +0 -0
  21. {diff_diff-2.2.0 → diff_diff-2.3.0}/diff_diff/datasets.py +0 -0
  22. {diff_diff-2.2.0 → diff_diff-2.3.0}/diff_diff/linalg.py +0 -0
  23. {diff_diff-2.2.0 → diff_diff-2.3.0}/diff_diff/power.py +0 -0
  24. {diff_diff-2.2.0 → diff_diff-2.3.0}/diff_diff/prep.py +0 -0
  25. {diff_diff-2.2.0 → diff_diff-2.3.0}/diff_diff/prep_dgp.py +0 -0
  26. {diff_diff-2.2.0 → diff_diff-2.3.0}/diff_diff/staggered.py +0 -0
  27. {diff_diff-2.2.0 → diff_diff-2.3.0}/diff_diff/staggered_aggregation.py +0 -0
  28. {diff_diff-2.2.0 → diff_diff-2.3.0}/diff_diff/staggered_bootstrap.py +0 -0
  29. {diff_diff-2.2.0 → diff_diff-2.3.0}/diff_diff/staggered_results.py +0 -0
  30. {diff_diff-2.2.0 → diff_diff-2.3.0}/diff_diff/synthetic_did.py +0 -0
  31. {diff_diff-2.2.0 → diff_diff-2.3.0}/diff_diff/utils.py +0 -0
  32. {diff_diff-2.2.0 → diff_diff-2.3.0}/rust/src/bootstrap.rs +0 -0
  33. {diff_diff-2.2.0 → diff_diff-2.3.0}/rust/src/lib.rs +0 -0
  34. {diff_diff-2.2.0 → diff_diff-2.3.0}/rust/src/linalg.rs +0 -0
  35. {diff_diff-2.2.0 → diff_diff-2.3.0}/rust/src/weights.rs +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: diff-diff
3
- Version: 2.2.0
3
+ Version: 2.3.0
4
4
  Classifier: Development Status :: 5 - Production/Stable
5
5
  Classifier: Intended Audience :: Science/Research
6
6
  Classifier: Operating System :: OS Independent
@@ -14,10 +14,12 @@ Requires-Dist: numpy>=1.20.0
14
14
  Requires-Dist: pandas>=1.3.0
15
15
  Requires-Dist: scipy>=1.7.0
16
16
  Requires-Dist: pytest>=7.0 ; extra == 'dev'
17
+ Requires-Dist: pytest-xdist>=3.0 ; extra == 'dev'
17
18
  Requires-Dist: pytest-cov>=4.0 ; extra == 'dev'
18
19
  Requires-Dist: black>=23.0 ; extra == 'dev'
19
20
  Requires-Dist: ruff>=0.1.0 ; extra == 'dev'
20
21
  Requires-Dist: mypy>=1.0 ; extra == 'dev'
22
+ Requires-Dist: maturin>=1.4,<2.0 ; extra == 'dev'
21
23
  Requires-Dist: sphinx>=6.0 ; extra == 'docs'
22
24
  Requires-Dist: sphinx-rtd-theme>=1.0 ; extra == 'docs'
23
25
  Provides-Extra: dev
@@ -105,7 +107,7 @@ Signif. codes: '***' 0.001, '**' 0.01, '*' 0.05, '.' 0.1
105
107
  - **Wild cluster bootstrap**: Valid inference with few clusters (<50) using Rademacher, Webb, or Mammen weights
106
108
  - **Panel data support**: Two-way fixed effects estimator for panel designs
107
109
  - **Multi-period analysis**: Event-study style DiD with period-specific treatment effects
108
- - **Staggered adoption**: Callaway-Sant'Anna (2021) and Sun-Abraham (2021) estimators for heterogeneous treatment timing
110
+ - **Staggered adoption**: Callaway-Sant'Anna (2021), Sun-Abraham (2021), and Borusyak-Jaravel-Spiess (2024) imputation estimators for heterogeneous treatment timing
109
111
  - **Triple Difference (DDD)**: Ortiz-Villavicencio & Sant'Anna (2025) estimators with proper covariate handling
110
112
  - **Synthetic DiD**: Combined DiD with synthetic control for improved robustness
111
113
  - **Triply Robust Panel (TROP)**: Factor-adjusted DiD with synthetic weights (Athey et al. 2025)
@@ -596,12 +598,13 @@ results = twfe.fit(
596
598
 
597
599
  ### Multi-Period DiD (Event Study)
598
600
 
599
- For settings with multiple pre- and post-treatment periods:
601
+ For settings with multiple pre- and post-treatment periods. Estimates treatment × period
602
+ interactions for ALL periods (pre and post), enabling parallel trends assessment:
600
603
 
601
604
  ```python
602
605
  from diff_diff import MultiPeriodDiD
603
606
 
604
- # Fit with multiple time periods
607
+ # Fit full event study with pre and post period effects
605
608
  did = MultiPeriodDiD()
606
609
  results = did.fit(
607
610
  panel_data,
@@ -609,18 +612,23 @@ results = did.fit(
609
612
  treatment='treated',
610
613
  time='period',
611
614
  post_periods=[3, 4, 5], # Periods 3-5 are post-treatment
612
- reference_period=0 # Reference period for comparison
615
+ reference_period=2, # Last pre-period (e=-1 convention)
616
+ unit='unit_id', # Optional: warns if staggered adoption detected
613
617
  )
614
618
 
615
- # View period-specific treatment effects
616
- for period, effect in results.period_effects.items():
617
- print(f"Period {period}: {effect.effect:.3f} (SE: {effect.se:.3f})")
619
+ # Pre-period effects test parallel trends (should be ≈ 0)
620
+ for period, effect in results.pre_period_effects.items():
621
+ print(f"Pre {period}: {effect.effect:.3f} (SE: {effect.se:.3f})")
622
+
623
+ # Post-period effects estimate dynamic treatment effects
624
+ for period, effect in results.post_period_effects.items():
625
+ print(f"Post {period}: {effect.effect:.3f} (SE: {effect.se:.3f})")
618
626
 
619
627
  # View average treatment effect across post-periods
620
628
  print(f"Average ATT: {results.avg_att:.3f}")
621
629
  print(f"Average SE: {results.avg_se:.3f}")
622
630
 
623
- # Full summary with all period effects
631
+ # Full summary with pre and post period effects
624
632
  results.print_summary()
625
633
  ```
626
634
 
@@ -908,6 +916,54 @@ print(f"Sun-Abraham ATT: {sa_results.overall_att:.3f}")
908
916
  # If results differ substantially, investigate heterogeneity
909
917
  ```
910
918
 
919
+ ### Borusyak-Jaravel-Spiess Imputation Estimator
920
+
921
+ The Borusyak et al. (2024) imputation estimator is the **efficient** estimator for staggered DiD under parallel trends, producing ~50% shorter confidence intervals than Callaway-Sant'Anna and 2-3.5x shorter than Sun-Abraham under homogeneous treatment effects.
922
+
923
+ ```python
924
+ from diff_diff import ImputationDiD, imputation_did
925
+
926
+ # Basic usage
927
+ est = ImputationDiD()
928
+ results = est.fit(data, outcome='outcome', unit='unit',
929
+ time='period', first_treat='first_treat')
930
+ results.print_summary()
931
+
932
+ # Event study
933
+ results = est.fit(data, outcome='outcome', unit='unit',
934
+ time='period', first_treat='first_treat',
935
+ aggregate='event_study')
936
+
937
+ # Pre-trend test (Equation 9)
938
+ pt = results.pretrend_test(n_leads=3)
939
+ print(f"F-stat: {pt['f_stat']:.3f}, p-value: {pt['p_value']:.4f}")
940
+
941
+ # Convenience function
942
+ results = imputation_did(data, 'outcome', 'unit', 'period', 'first_treat',
943
+ aggregate='all')
944
+ ```
945
+
946
+ ```python
947
+ ImputationDiD(
948
+ anticipation=0, # Number of anticipation periods
949
+ alpha=0.05, # Significance level
950
+ cluster=None, # Cluster variable (defaults to unit)
951
+ n_bootstrap=0, # Bootstrap iterations (0=analytical inference)
952
+ seed=None, # Random seed
953
+ horizon_max=None, # Max event-study horizon
954
+ aux_partition="cohort_horizon", # Variance partition: "cohort_horizon", "cohort", "horizon"
955
+ )
956
+ ```
957
+
958
+ **When to use Imputation DiD vs Callaway-Sant'Anna:**
959
+
960
+ | Aspect | Imputation DiD | Callaway-Sant'Anna |
961
+ |--------|---------------|-------------------|
962
+ | Efficiency | Most efficient under homogeneous effects | Less efficient but more robust to heterogeneity |
963
+ | Control group | Always uses all untreated obs | Choice of never-treated or not-yet-treated |
964
+ | Inference | Conservative variance (Theorem 3) | Multiplier bootstrap |
965
+ | Pre-trends | Built-in F-test (Equation 9) | Separate testing |
966
+
911
967
  ### Triple Difference (DDD)
912
968
 
913
969
  Triple Difference (DDD) is used when treatment requires satisfying two criteria: belonging to a treated **group** AND being in an eligible **partition**. The `TripleDifference` class implements the methodology from Ortiz-Villavicencio & Sant'Anna (2025), which correctly handles covariate adjustment (unlike naive implementations).
@@ -986,10 +1042,10 @@ Create publication-ready event study plots:
986
1042
  ```python
987
1043
  from diff_diff import plot_event_study, MultiPeriodDiD, CallawaySantAnna, SunAbraham
988
1044
 
989
- # From MultiPeriodDiD
1045
+ # From MultiPeriodDiD (full event study with pre and post period effects)
990
1046
  did = MultiPeriodDiD()
991
1047
  results = did.fit(data, outcome='y', treatment='treated',
992
- time='period', post_periods=[3, 4, 5])
1048
+ time='period', post_periods=[3, 4, 5], reference_period=2)
993
1049
  plot_event_study(results, title="Treatment Effects Over Time")
994
1050
 
995
1051
  # From CallawaySantAnna (with event study aggregation)
@@ -1309,7 +1365,6 @@ TROP(
1309
1365
  max_iter=100, # Max iterations for factor estimation
1310
1366
  tol=1e-6, # Convergence tolerance
1311
1367
  alpha=0.05, # Significance level
1312
- variance_method='bootstrap', # 'bootstrap' or 'jackknife'
1313
1368
  n_bootstrap=200, # Bootstrap replications
1314
1369
  seed=None # Random seed
1315
1370
  )
@@ -1449,14 +1504,15 @@ Pre-trends tests have low power and can exacerbate bias. **Honest DiD** (Rambach
1449
1504
  ```python
1450
1505
  from diff_diff import HonestDiD, MultiPeriodDiD
1451
1506
 
1452
- # First, fit a standard event study
1507
+ # First, fit a full event study (pre + post period effects)
1453
1508
  did = MultiPeriodDiD()
1454
1509
  event_results = did.fit(
1455
1510
  data,
1456
1511
  outcome='outcome',
1457
1512
  treatment='treated',
1458
1513
  time='period',
1459
- post_periods=[5, 6, 7, 8, 9]
1514
+ post_periods=[5, 6, 7, 8, 9],
1515
+ reference_period=4, # Last pre-period (e=-1 convention)
1460
1516
  )
1461
1517
 
1462
1518
  # Compute honest bounds with relative magnitudes restriction
@@ -1524,14 +1580,15 @@ A passing pre-trends test doesn't mean parallel trends holds—it may just mean
1524
1580
  ```python
1525
1581
  from diff_diff import PreTrendsPower, MultiPeriodDiD
1526
1582
 
1527
- # First, fit an event study
1583
+ # First, fit a full event study
1528
1584
  did = MultiPeriodDiD()
1529
1585
  event_results = did.fit(
1530
1586
  data,
1531
1587
  outcome='outcome',
1532
1588
  treatment='treated',
1533
1589
  time='period',
1534
- post_periods=[5, 6, 7, 8, 9]
1590
+ post_periods=[5, 6, 7, 8, 9],
1591
+ reference_period=4,
1535
1592
  )
1536
1593
 
1537
1594
  # Analyze pre-trends test power
@@ -1800,7 +1857,8 @@ MultiPeriodDiD(
1800
1857
  | `covariates` | list | Linear control variables |
1801
1858
  | `fixed_effects` | list | Categorical FE columns (creates dummies) |
1802
1859
  | `absorb` | list | High-dimensional FE (within-transformation) |
1803
- | `reference_period` | any | Omitted period for time dummies |
1860
+ | `reference_period` | any | Omitted period (default: last pre-period, e=-1 convention) |
1861
+ | `unit` | str | Unit identifier column (for staggered adoption warning) |
1804
1862
 
1805
1863
  ### MultiPeriodDiDResults
1806
1864
 
@@ -1808,8 +1866,8 @@ MultiPeriodDiD(
1808
1866
 
1809
1867
  | Attribute | Description |
1810
1868
  |-----------|-------------|
1811
- | `period_effects` | Dict mapping periods to PeriodEffect objects |
1812
- | `avg_att` | Average ATT across post-treatment periods |
1869
+ | `period_effects` | Dict mapping periods to PeriodEffect objects (pre and post, excluding reference) |
1870
+ | `avg_att` | Average ATT across post-treatment periods only |
1813
1871
  | `avg_se` | Standard error of average ATT |
1814
1872
  | `avg_t_stat` | T-statistic for average ATT |
1815
1873
  | `avg_p_value` | P-value for average ATT |
@@ -1817,6 +1875,10 @@ MultiPeriodDiD(
1817
1875
  | `n_obs` | Number of observations |
1818
1876
  | `pre_periods` | List of pre-treatment periods |
1819
1877
  | `post_periods` | List of post-treatment periods |
1878
+ | `reference_period` | The omitted reference period (coefficient = 0 by construction) |
1879
+ | `interaction_indices` | Dict mapping period → column index in VCV (for sub-VCV extraction) |
1880
+ | `pre_period_effects` | Property: pre-period effects only (for parallel trends assessment) |
1881
+ | `post_period_effects` | Property: post-period effects only |
1820
1882
 
1821
1883
  **Methods:**
1822
1884
 
@@ -1909,8 +1971,7 @@ TROP(
1909
1971
  max_iter=100, # Max iterations for factor estimation
1910
1972
  tol=1e-6, # Convergence tolerance
1911
1973
  alpha=0.05, # Significance level for CIs
1912
- variance_method='bootstrap', # 'bootstrap' or 'jackknife'
1913
- n_bootstrap=200, # Bootstrap/jackknife iterations
1974
+ n_bootstrap=200, # Bootstrap replications
1914
1975
  seed=None # Random seed
1915
1976
  )
1916
1977
  ```
@@ -1934,7 +1995,7 @@ Note: TROP infers treatment periods from the treatment indicator column. The tre
1934
1995
  | Attribute | Description |
1935
1996
  |-----------|-------------|
1936
1997
  | `att` | Average Treatment effect on the Treated |
1937
- | `se` | Standard error (bootstrap or jackknife) |
1998
+ | `se` | Standard error (bootstrap) |
1938
1999
  | `t_stat` | T-statistic |
1939
2000
  | `p_value` | P-value |
1940
2001
  | `conf_int` | Confidence interval |
@@ -1953,7 +2014,6 @@ Note: TROP infers treatment periods from the treatment indicator column. The tre
1953
2014
  | `loocv_score` | LOOCV score for selected parameters |
1954
2015
  | `n_pre_periods` | Number of pre-treatment periods |
1955
2016
  | `n_post_periods` | Number of post-treatment periods |
1956
- | `variance_method` | Variance estimation method |
1957
2017
  | `bootstrap_distribution` | Bootstrap distribution (if bootstrap) |
1958
2018
 
1959
2019
  **Methods:**
@@ -2025,6 +2085,60 @@ SunAbraham(
2025
2085
  | `print_summary(alpha)` | Print summary to stdout |
2026
2086
  | `to_dataframe(level)` | Convert to DataFrame ('event_study' or 'cohort') |
2027
2087
 
2088
+ ### ImputationDiD
2089
+
2090
+ ```python
2091
+ ImputationDiD(
2092
+ anticipation=0, # Periods of anticipation effects
2093
+ alpha=0.05, # Significance level for CIs
2094
+ cluster=None, # Column for cluster-robust SEs
2095
+ n_bootstrap=0, # Bootstrap iterations (0 = analytical)
2096
+ seed=None, # Random seed
2097
+ rank_deficient_action='warn', # 'warn', 'error', or 'silent'
2098
+ horizon_max=None, # Max event-study horizon
2099
+ aux_partition='cohort_horizon', # Variance partition
2100
+ )
2101
+ ```
2102
+
2103
+ **fit() Parameters:**
2104
+
2105
+ | Parameter | Type | Description |
2106
+ |-----------|------|-------------|
2107
+ | `data` | DataFrame | Panel data |
2108
+ | `outcome` | str | Outcome variable column name |
2109
+ | `unit` | str | Unit identifier column |
2110
+ | `time` | str | Time period column |
2111
+ | `first_treat` | str | First treatment period column (0 for never-treated) |
2112
+ | `covariates` | list | Covariate column names |
2113
+ | `aggregate` | str | Aggregation: None, "event_study", "group", "all" |
2114
+ | `balance_e` | int | Balance event study to this many pre-treatment periods |
2115
+
2116
+ ### ImputationDiDResults
2117
+
2118
+ **Attributes:**
2119
+
2120
+ | Attribute | Description |
2121
+ |-----------|-------------|
2122
+ | `overall_att` | Overall average treatment effect on the treated |
2123
+ | `overall_se` | Standard error (conservative, Theorem 3) |
2124
+ | `overall_t_stat` | T-statistic |
2125
+ | `overall_p_value` | P-value for H0: ATT = 0 |
2126
+ | `overall_conf_int` | Confidence interval |
2127
+ | `event_study_effects` | Dict of relative time -> effect dict (if `aggregate='event_study'` or `'all'`) |
2128
+ | `group_effects` | Dict of cohort -> effect dict (if `aggregate='group'` or `'all'`) |
2129
+ | `treatment_effects` | DataFrame of unit-level imputed treatment effects |
2130
+ | `n_treated_obs` | Number of treated observations |
2131
+ | `n_untreated_obs` | Number of untreated observations |
2132
+
2133
+ **Methods:**
2134
+
2135
+ | Method | Description |
2136
+ |--------|-------------|
2137
+ | `summary(alpha)` | Get formatted summary string |
2138
+ | `print_summary(alpha)` | Print summary to stdout |
2139
+ | `to_dataframe(level)` | Convert to DataFrame ('observation', 'event_study', 'group') |
2140
+ | `pretrend_test(n_leads)` | Run pre-trend F-test (Equation 9) |
2141
+
2028
2142
  ### TripleDifference
2029
2143
 
2030
2144
  ```python
@@ -2489,6 +2603,14 @@ The `HonestDiD` module implements sensitivity analysis methods for relaxing the
2489
2603
 
2490
2604
  ### Multi-Period and Staggered Adoption
2491
2605
 
2606
+ - **Borusyak, K., Jaravel, X., & Spiess, J. (2024).** "Revisiting Event-Study Designs: Robust and Efficient Estimation." *Review of Economic Studies*, 91(6), 3253-3285. [https://doi.org/10.1093/restud/rdae007](https://doi.org/10.1093/restud/rdae007)
2607
+
2608
+ This paper introduces the imputation estimator implemented in our `ImputationDiD` class:
2609
+ - **Efficient imputation**: OLS on untreated observations → impute counterfactuals → aggregate
2610
+ - **Conservative variance**: Theorem 3 clustered variance estimator with auxiliary model
2611
+ - **Pre-trend test**: Independent of treatment effect estimation (Proposition 9)
2612
+ - **Efficiency gains**: ~50% shorter CIs than Callaway-Sant'Anna under homogeneous effects
2613
+
2492
2614
  - **Callaway, B., & Sant'Anna, P. H. C. (2021).** "Difference-in-Differences with Multiple Time Periods." *Journal of Econometrics*, 225(2), 200-230. [https://doi.org/10.1016/j.jeconom.2020.12.001](https://doi.org/10.1016/j.jeconom.2020.12.001)
2493
2615
 
2494
2616
  - **Sant'Anna, P. H. C., & Zhao, J. (2020).** "Doubly Robust Difference-in-Differences Estimators." *Journal of Econometrics*, 219(1), 101-122. [https://doi.org/10.1016/j.jeconom.2020.06.003](https://doi.org/10.1016/j.jeconom.2020.06.003)
@@ -70,7 +70,7 @@ Signif. codes: '***' 0.001, '**' 0.01, '*' 0.05, '.' 0.1
70
70
  - **Wild cluster bootstrap**: Valid inference with few clusters (<50) using Rademacher, Webb, or Mammen weights
71
71
  - **Panel data support**: Two-way fixed effects estimator for panel designs
72
72
  - **Multi-period analysis**: Event-study style DiD with period-specific treatment effects
73
- - **Staggered adoption**: Callaway-Sant'Anna (2021) and Sun-Abraham (2021) estimators for heterogeneous treatment timing
73
+ - **Staggered adoption**: Callaway-Sant'Anna (2021), Sun-Abraham (2021), and Borusyak-Jaravel-Spiess (2024) imputation estimators for heterogeneous treatment timing
74
74
  - **Triple Difference (DDD)**: Ortiz-Villavicencio & Sant'Anna (2025) estimators with proper covariate handling
75
75
  - **Synthetic DiD**: Combined DiD with synthetic control for improved robustness
76
76
  - **Triply Robust Panel (TROP)**: Factor-adjusted DiD with synthetic weights (Athey et al. 2025)
@@ -561,12 +561,13 @@ results = twfe.fit(
561
561
 
562
562
  ### Multi-Period DiD (Event Study)
563
563
 
564
- For settings with multiple pre- and post-treatment periods:
564
+ For settings with multiple pre- and post-treatment periods. Estimates treatment × period
565
+ interactions for ALL periods (pre and post), enabling parallel trends assessment:
565
566
 
566
567
  ```python
567
568
  from diff_diff import MultiPeriodDiD
568
569
 
569
- # Fit with multiple time periods
570
+ # Fit full event study with pre and post period effects
570
571
  did = MultiPeriodDiD()
571
572
  results = did.fit(
572
573
  panel_data,
@@ -574,18 +575,23 @@ results = did.fit(
574
575
  treatment='treated',
575
576
  time='period',
576
577
  post_periods=[3, 4, 5], # Periods 3-5 are post-treatment
577
- reference_period=0 # Reference period for comparison
578
+ reference_period=2, # Last pre-period (e=-1 convention)
579
+ unit='unit_id', # Optional: warns if staggered adoption detected
578
580
  )
579
581
 
580
- # View period-specific treatment effects
581
- for period, effect in results.period_effects.items():
582
- print(f"Period {period}: {effect.effect:.3f} (SE: {effect.se:.3f})")
582
+ # Pre-period effects test parallel trends (should be ≈ 0)
583
+ for period, effect in results.pre_period_effects.items():
584
+ print(f"Pre {period}: {effect.effect:.3f} (SE: {effect.se:.3f})")
585
+
586
+ # Post-period effects estimate dynamic treatment effects
587
+ for period, effect in results.post_period_effects.items():
588
+ print(f"Post {period}: {effect.effect:.3f} (SE: {effect.se:.3f})")
583
589
 
584
590
  # View average treatment effect across post-periods
585
591
  print(f"Average ATT: {results.avg_att:.3f}")
586
592
  print(f"Average SE: {results.avg_se:.3f}")
587
593
 
588
- # Full summary with all period effects
594
+ # Full summary with pre and post period effects
589
595
  results.print_summary()
590
596
  ```
591
597
 
@@ -873,6 +879,54 @@ print(f"Sun-Abraham ATT: {sa_results.overall_att:.3f}")
873
879
  # If results differ substantially, investigate heterogeneity
874
880
  ```
875
881
 
882
+ ### Borusyak-Jaravel-Spiess Imputation Estimator
883
+
884
+ The Borusyak et al. (2024) imputation estimator is the **efficient** estimator for staggered DiD under parallel trends, producing ~50% shorter confidence intervals than Callaway-Sant'Anna and 2-3.5x shorter than Sun-Abraham under homogeneous treatment effects.
885
+
886
+ ```python
887
+ from diff_diff import ImputationDiD, imputation_did
888
+
889
+ # Basic usage
890
+ est = ImputationDiD()
891
+ results = est.fit(data, outcome='outcome', unit='unit',
892
+ time='period', first_treat='first_treat')
893
+ results.print_summary()
894
+
895
+ # Event study
896
+ results = est.fit(data, outcome='outcome', unit='unit',
897
+ time='period', first_treat='first_treat',
898
+ aggregate='event_study')
899
+
900
+ # Pre-trend test (Equation 9)
901
+ pt = results.pretrend_test(n_leads=3)
902
+ print(f"F-stat: {pt['f_stat']:.3f}, p-value: {pt['p_value']:.4f}")
903
+
904
+ # Convenience function
905
+ results = imputation_did(data, 'outcome', 'unit', 'period', 'first_treat',
906
+ aggregate='all')
907
+ ```
908
+
909
+ ```python
910
+ ImputationDiD(
911
+ anticipation=0, # Number of anticipation periods
912
+ alpha=0.05, # Significance level
913
+ cluster=None, # Cluster variable (defaults to unit)
914
+ n_bootstrap=0, # Bootstrap iterations (0=analytical inference)
915
+ seed=None, # Random seed
916
+ horizon_max=None, # Max event-study horizon
917
+ aux_partition="cohort_horizon", # Variance partition: "cohort_horizon", "cohort", "horizon"
918
+ )
919
+ ```
920
+
921
+ **When to use Imputation DiD vs Callaway-Sant'Anna:**
922
+
923
+ | Aspect | Imputation DiD | Callaway-Sant'Anna |
924
+ |--------|---------------|-------------------|
925
+ | Efficiency | Most efficient under homogeneous effects | Less efficient but more robust to heterogeneity |
926
+ | Control group | Always uses all untreated obs | Choice of never-treated or not-yet-treated |
927
+ | Inference | Conservative variance (Theorem 3) | Multiplier bootstrap |
928
+ | Pre-trends | Built-in F-test (Equation 9) | Separate testing |
929
+
876
930
  ### Triple Difference (DDD)
877
931
 
878
932
  Triple Difference (DDD) is used when treatment requires satisfying two criteria: belonging to a treated **group** AND being in an eligible **partition**. The `TripleDifference` class implements the methodology from Ortiz-Villavicencio & Sant'Anna (2025), which correctly handles covariate adjustment (unlike naive implementations).
@@ -951,10 +1005,10 @@ Create publication-ready event study plots:
951
1005
  ```python
952
1006
  from diff_diff import plot_event_study, MultiPeriodDiD, CallawaySantAnna, SunAbraham
953
1007
 
954
- # From MultiPeriodDiD
1008
+ # From MultiPeriodDiD (full event study with pre and post period effects)
955
1009
  did = MultiPeriodDiD()
956
1010
  results = did.fit(data, outcome='y', treatment='treated',
957
- time='period', post_periods=[3, 4, 5])
1011
+ time='period', post_periods=[3, 4, 5], reference_period=2)
958
1012
  plot_event_study(results, title="Treatment Effects Over Time")
959
1013
 
960
1014
  # From CallawaySantAnna (with event study aggregation)
@@ -1274,7 +1328,6 @@ TROP(
1274
1328
  max_iter=100, # Max iterations for factor estimation
1275
1329
  tol=1e-6, # Convergence tolerance
1276
1330
  alpha=0.05, # Significance level
1277
- variance_method='bootstrap', # 'bootstrap' or 'jackknife'
1278
1331
  n_bootstrap=200, # Bootstrap replications
1279
1332
  seed=None # Random seed
1280
1333
  )
@@ -1414,14 +1467,15 @@ Pre-trends tests have low power and can exacerbate bias. **Honest DiD** (Rambach
1414
1467
  ```python
1415
1468
  from diff_diff import HonestDiD, MultiPeriodDiD
1416
1469
 
1417
- # First, fit a standard event study
1470
+ # First, fit a full event study (pre + post period effects)
1418
1471
  did = MultiPeriodDiD()
1419
1472
  event_results = did.fit(
1420
1473
  data,
1421
1474
  outcome='outcome',
1422
1475
  treatment='treated',
1423
1476
  time='period',
1424
- post_periods=[5, 6, 7, 8, 9]
1477
+ post_periods=[5, 6, 7, 8, 9],
1478
+ reference_period=4, # Last pre-period (e=-1 convention)
1425
1479
  )
1426
1480
 
1427
1481
  # Compute honest bounds with relative magnitudes restriction
@@ -1489,14 +1543,15 @@ A passing pre-trends test doesn't mean parallel trends holds—it may just mean
1489
1543
  ```python
1490
1544
  from diff_diff import PreTrendsPower, MultiPeriodDiD
1491
1545
 
1492
- # First, fit an event study
1546
+ # First, fit a full event study
1493
1547
  did = MultiPeriodDiD()
1494
1548
  event_results = did.fit(
1495
1549
  data,
1496
1550
  outcome='outcome',
1497
1551
  treatment='treated',
1498
1552
  time='period',
1499
- post_periods=[5, 6, 7, 8, 9]
1553
+ post_periods=[5, 6, 7, 8, 9],
1554
+ reference_period=4,
1500
1555
  )
1501
1556
 
1502
1557
  # Analyze pre-trends test power
@@ -1765,7 +1820,8 @@ MultiPeriodDiD(
1765
1820
  | `covariates` | list | Linear control variables |
1766
1821
  | `fixed_effects` | list | Categorical FE columns (creates dummies) |
1767
1822
  | `absorb` | list | High-dimensional FE (within-transformation) |
1768
- | `reference_period` | any | Omitted period for time dummies |
1823
+ | `reference_period` | any | Omitted period (default: last pre-period, e=-1 convention) |
1824
+ | `unit` | str | Unit identifier column (for staggered adoption warning) |
1769
1825
 
1770
1826
  ### MultiPeriodDiDResults
1771
1827
 
@@ -1773,8 +1829,8 @@ MultiPeriodDiD(
1773
1829
 
1774
1830
  | Attribute | Description |
1775
1831
  |-----------|-------------|
1776
- | `period_effects` | Dict mapping periods to PeriodEffect objects |
1777
- | `avg_att` | Average ATT across post-treatment periods |
1832
+ | `period_effects` | Dict mapping periods to PeriodEffect objects (pre and post, excluding reference) |
1833
+ | `avg_att` | Average ATT across post-treatment periods only |
1778
1834
  | `avg_se` | Standard error of average ATT |
1779
1835
  | `avg_t_stat` | T-statistic for average ATT |
1780
1836
  | `avg_p_value` | P-value for average ATT |
@@ -1782,6 +1838,10 @@ MultiPeriodDiD(
1782
1838
  | `n_obs` | Number of observations |
1783
1839
  | `pre_periods` | List of pre-treatment periods |
1784
1840
  | `post_periods` | List of post-treatment periods |
1841
+ | `reference_period` | The omitted reference period (coefficient = 0 by construction) |
1842
+ | `interaction_indices` | Dict mapping period → column index in VCV (for sub-VCV extraction) |
1843
+ | `pre_period_effects` | Property: pre-period effects only (for parallel trends assessment) |
1844
+ | `post_period_effects` | Property: post-period effects only |
1785
1845
 
1786
1846
  **Methods:**
1787
1847
 
@@ -1874,8 +1934,7 @@ TROP(
1874
1934
  max_iter=100, # Max iterations for factor estimation
1875
1935
  tol=1e-6, # Convergence tolerance
1876
1936
  alpha=0.05, # Significance level for CIs
1877
- variance_method='bootstrap', # 'bootstrap' or 'jackknife'
1878
- n_bootstrap=200, # Bootstrap/jackknife iterations
1937
+ n_bootstrap=200, # Bootstrap replications
1879
1938
  seed=None # Random seed
1880
1939
  )
1881
1940
  ```
@@ -1899,7 +1958,7 @@ Note: TROP infers treatment periods from the treatment indicator column. The tre
1899
1958
  | Attribute | Description |
1900
1959
  |-----------|-------------|
1901
1960
  | `att` | Average Treatment effect on the Treated |
1902
- | `se` | Standard error (bootstrap or jackknife) |
1961
+ | `se` | Standard error (bootstrap) |
1903
1962
  | `t_stat` | T-statistic |
1904
1963
  | `p_value` | P-value |
1905
1964
  | `conf_int` | Confidence interval |
@@ -1918,7 +1977,6 @@ Note: TROP infers treatment periods from the treatment indicator column. The tre
1918
1977
  | `loocv_score` | LOOCV score for selected parameters |
1919
1978
  | `n_pre_periods` | Number of pre-treatment periods |
1920
1979
  | `n_post_periods` | Number of post-treatment periods |
1921
- | `variance_method` | Variance estimation method |
1922
1980
  | `bootstrap_distribution` | Bootstrap distribution (if bootstrap) |
1923
1981
 
1924
1982
  **Methods:**
@@ -1990,6 +2048,60 @@ SunAbraham(
1990
2048
  | `print_summary(alpha)` | Print summary to stdout |
1991
2049
  | `to_dataframe(level)` | Convert to DataFrame ('event_study' or 'cohort') |
1992
2050
 
2051
+ ### ImputationDiD
2052
+
2053
+ ```python
2054
+ ImputationDiD(
2055
+ anticipation=0, # Periods of anticipation effects
2056
+ alpha=0.05, # Significance level for CIs
2057
+ cluster=None, # Column for cluster-robust SEs
2058
+ n_bootstrap=0, # Bootstrap iterations (0 = analytical)
2059
+ seed=None, # Random seed
2060
+ rank_deficient_action='warn', # 'warn', 'error', or 'silent'
2061
+ horizon_max=None, # Max event-study horizon
2062
+ aux_partition='cohort_horizon', # Variance partition
2063
+ )
2064
+ ```
2065
+
2066
+ **fit() Parameters:**
2067
+
2068
+ | Parameter | Type | Description |
2069
+ |-----------|------|-------------|
2070
+ | `data` | DataFrame | Panel data |
2071
+ | `outcome` | str | Outcome variable column name |
2072
+ | `unit` | str | Unit identifier column |
2073
+ | `time` | str | Time period column |
2074
+ | `first_treat` | str | First treatment period column (0 for never-treated) |
2075
+ | `covariates` | list | Covariate column names |
2076
+ | `aggregate` | str | Aggregation: None, "event_study", "group", "all" |
2077
+ | `balance_e` | int | Balance event study to this many pre-treatment periods |
2078
+
2079
+ ### ImputationDiDResults
2080
+
2081
+ **Attributes:**
2082
+
2083
+ | Attribute | Description |
2084
+ |-----------|-------------|
2085
+ | `overall_att` | Overall average treatment effect on the treated |
2086
+ | `overall_se` | Standard error (conservative, Theorem 3) |
2087
+ | `overall_t_stat` | T-statistic |
2088
+ | `overall_p_value` | P-value for H0: ATT = 0 |
2089
+ | `overall_conf_int` | Confidence interval |
2090
+ | `event_study_effects` | Dict of relative time -> effect dict (if `aggregate='event_study'` or `'all'`) |
2091
+ | `group_effects` | Dict of cohort -> effect dict (if `aggregate='group'` or `'all'`) |
2092
+ | `treatment_effects` | DataFrame of unit-level imputed treatment effects |
2093
+ | `n_treated_obs` | Number of treated observations |
2094
+ | `n_untreated_obs` | Number of untreated observations |
2095
+
2096
+ **Methods:**
2097
+
2098
+ | Method | Description |
2099
+ |--------|-------------|
2100
+ | `summary(alpha)` | Get formatted summary string |
2101
+ | `print_summary(alpha)` | Print summary to stdout |
2102
+ | `to_dataframe(level)` | Convert to DataFrame ('observation', 'event_study', 'group') |
2103
+ | `pretrend_test(n_leads)` | Run pre-trend F-test (Equation 9) |
2104
+
1993
2105
  ### TripleDifference
1994
2106
 
1995
2107
  ```python
@@ -2454,6 +2566,14 @@ The `HonestDiD` module implements sensitivity analysis methods for relaxing the
2454
2566
 
2455
2567
  ### Multi-Period and Staggered Adoption
2456
2568
 
2569
+ - **Borusyak, K., Jaravel, X., & Spiess, J. (2024).** "Revisiting Event-Study Designs: Robust and Efficient Estimation." *Review of Economic Studies*, 91(6), 3253-3285. [https://doi.org/10.1093/restud/rdae007](https://doi.org/10.1093/restud/rdae007)
2570
+
2571
+ This paper introduces the imputation estimator implemented in our `ImputationDiD` class:
2572
+ - **Efficient imputation**: OLS on untreated observations → impute counterfactuals → aggregate
2573
+ - **Conservative variance**: Theorem 3 clustered variance estimator with auxiliary model
2574
+ - **Pre-trend test**: Independent of treatment effect estimation (Proposition 9)
2575
+ - **Efficiency gains**: ~50% shorter CIs than Callaway-Sant'Anna under homogeneous effects
2576
+
2457
2577
  - **Callaway, B., & Sant'Anna, P. H. C. (2021).** "Difference-in-Differences with Multiple Time Periods." *Journal of Econometrics*, 225(2), 200-230. [https://doi.org/10.1016/j.jeconom.2020.12.001](https://doi.org/10.1016/j.jeconom.2020.12.001)
2458
2578
 
2459
2579
  - **Sant'Anna, P. H. C., & Zhao, J. (2020).** "Doubly Robust Difference-in-Differences Estimators." *Journal of Econometrics*, 219(1), 101-122. [https://doi.org/10.1016/j.jeconom.2020.06.003](https://doi.org/10.1016/j.jeconom.2020.06.003)
@@ -95,6 +95,12 @@ from diff_diff.staggered import (
95
95
  CSBootstrapResults,
96
96
  GroupTimeEffect,
97
97
  )
98
+ from diff_diff.imputation import (
99
+ ImputationBootstrapResults,
100
+ ImputationDiD,
101
+ ImputationDiDResults,
102
+ imputation_did,
103
+ )
98
104
  from diff_diff.sun_abraham import (
99
105
  SABootstrapResults,
100
106
  SunAbraham,
@@ -136,7 +142,7 @@ from diff_diff.datasets import (
136
142
  load_mpdta,
137
143
  )
138
144
 
139
- __version__ = "2.2.0"
145
+ __version__ = "2.3.0"
140
146
  __all__ = [
141
147
  # Estimators
142
148
  "DifferenceInDifferences",
@@ -145,6 +151,7 @@ __all__ = [
145
151
  "SyntheticDiD",
146
152
  "CallawaySantAnna",
147
153
  "SunAbraham",
154
+ "ImputationDiD",
148
155
  "TripleDifference",
149
156
  "TROP",
150
157
  # Bacon Decomposition
@@ -163,6 +170,9 @@ __all__ = [
163
170
  "GroupTimeEffect",
164
171
  "SunAbrahamResults",
165
172
  "SABootstrapResults",
173
+ "ImputationDiDResults",
174
+ "ImputationBootstrapResults",
175
+ "imputation_did",
166
176
  "TripleDifferenceResults",
167
177
  "triple_difference",
168
178
  "TROPResults",
@@ -662,7 +662,7 @@ def permutation_test(
662
662
  ci_upper = np.percentile(valid_effects, (1 - alpha / 2) * 100)
663
663
 
664
664
  # T-stat from original estimate
665
- t_stat = original_att / se if se > 0 else 0.0
665
+ t_stat = original_att / se if np.isfinite(se) and se > 0 else np.nan
666
666
 
667
667
  return PlaceboTestResults(
668
668
  test_type="permutation",
@@ -783,14 +783,14 @@ def leave_one_out_test(
783
783
  # Statistics of LOO distribution
784
784
  mean_effect = np.mean(valid_effects)
785
785
  se = np.std(valid_effects, ddof=1) if len(valid_effects) > 1 else 0.0
786
- t_stat = mean_effect / se if se > 0 else 0.0
786
+ t_stat = mean_effect / se if np.isfinite(se) and se > 0 else np.nan
787
787
 
788
788
  # Use t-distribution for p-value
789
789
  df = len(valid_effects) - 1 if len(valid_effects) > 1 else 1
790
790
  p_value = compute_p_value(t_stat, df=df)
791
791
 
792
792
  # CI
793
- conf_int = compute_confidence_interval(mean_effect, se, alpha, df=df)
793
+ conf_int = compute_confidence_interval(mean_effect, se, alpha, df=df) if np.isfinite(se) and se > 0 else (np.nan, np.nan)
794
794
 
795
795
  return PlaceboTestResults(
796
796
  test_type="leave_one_out",