diff-diff 2.3.1__tar.gz → 2.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. diff_diff-2.3.1/README.md → diff_diff-2.4.0/PKG-INFO +144 -2
  2. diff_diff-2.3.1/PKG-INFO → diff_diff-2.4.0/README.md +105 -40
  3. {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/__init__.py +11 -1
  4. {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/staggered.py +6 -3
  5. {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/sun_abraham.py +60 -24
  6. diff_diff-2.4.0/diff_diff/two_stage.py +2209 -0
  7. {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/utils.py +34 -0
  8. {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/visualization.py +2 -0
  9. {diff_diff-2.3.1 → diff_diff-2.4.0}/pyproject.toml +3 -2
  10. {diff_diff-2.3.1 → diff_diff-2.4.0}/rust/Cargo.lock +3 -3
  11. {diff_diff-2.3.1 → diff_diff-2.4.0}/rust/Cargo.toml +1 -1
  12. {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/_backend.py +0 -0
  13. {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/bacon.py +0 -0
  14. {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/datasets.py +0 -0
  15. {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/diagnostics.py +0 -0
  16. {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/estimators.py +0 -0
  17. {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/honest_did.py +0 -0
  18. {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/imputation.py +0 -0
  19. {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/linalg.py +0 -0
  20. {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/power.py +0 -0
  21. {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/prep.py +0 -0
  22. {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/prep_dgp.py +0 -0
  23. {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/pretrends.py +0 -0
  24. {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/results.py +0 -0
  25. {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/staggered_aggregation.py +0 -0
  26. {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/staggered_bootstrap.py +0 -0
  27. {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/staggered_results.py +0 -0
  28. {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/synthetic_did.py +0 -0
  29. {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/triple_diff.py +0 -0
  30. {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/trop.py +0 -0
  31. {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/twfe.py +0 -0
  32. {diff_diff-2.3.1 → diff_diff-2.4.0}/rust/src/bootstrap.rs +0 -0
  33. {diff_diff-2.3.1 → diff_diff-2.4.0}/rust/src/lib.rs +0 -0
  34. {diff_diff-2.3.1 → diff_diff-2.4.0}/rust/src/linalg.rs +0 -0
  35. {diff_diff-2.3.1 → diff_diff-2.4.0}/rust/src/trop.rs +0 -0
  36. {diff_diff-2.3.1 → diff_diff-2.4.0}/rust/src/weights.rs +0 -0
@@ -1,3 +1,41 @@
1
+ Metadata-Version: 2.4
2
+ Name: diff-diff
3
+ Version: 2.4.0
4
+ Classifier: Development Status :: 5 - Production/Stable
5
+ Classifier: Intended Audience :: Science/Research
6
+ Classifier: Operating System :: OS Independent
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: Programming Language :: Python :: 3.9
9
+ Classifier: Programming Language :: Python :: 3.10
10
+ Classifier: Programming Language :: Python :: 3.11
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Classifier: Programming Language :: Python :: 3.13
13
+ Classifier: Topic :: Scientific/Engineering :: Mathematics
14
+ Requires-Dist: numpy>=1.20.0
15
+ Requires-Dist: pandas>=1.3.0
16
+ Requires-Dist: scipy>=1.7.0
17
+ Requires-Dist: pytest>=7.0 ; extra == 'dev'
18
+ Requires-Dist: pytest-xdist>=3.0 ; extra == 'dev'
19
+ Requires-Dist: pytest-cov>=4.0 ; extra == 'dev'
20
+ Requires-Dist: black>=23.0 ; extra == 'dev'
21
+ Requires-Dist: ruff>=0.1.0 ; extra == 'dev'
22
+ Requires-Dist: mypy>=1.0 ; extra == 'dev'
23
+ Requires-Dist: maturin>=1.4,<2.0 ; extra == 'dev'
24
+ Requires-Dist: sphinx>=6.0 ; extra == 'docs'
25
+ Requires-Dist: sphinx-rtd-theme>=1.0 ; extra == 'docs'
26
+ Provides-Extra: dev
27
+ Provides-Extra: docs
28
+ Summary: A library for Difference-in-Differences causal inference analysis
29
+ Keywords: causal-inference,difference-in-differences,econometrics,statistics,treatment-effects
30
+ Author: diff-diff contributors
31
+ License-Expression: MIT
32
+ Requires-Python: >=3.9, <3.14
33
+ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
34
+ Project-URL: Documentation, https://diff-diff.readthedocs.io
35
+ Project-URL: Homepage, https://github.com/igerber/diff-diff
36
+ Project-URL: Issues, https://github.com/igerber/diff-diff/issues
37
+ Project-URL: Repository, https://github.com/igerber/diff-diff
38
+
1
39
  # diff-diff
2
40
 
3
41
  A Python library for Difference-in-Differences (DiD) causal inference analysis with an sklearn-like API and statsmodels-style outputs.
@@ -70,7 +108,7 @@ Signif. codes: '***' 0.001, '**' 0.01, '*' 0.05, '.' 0.1
70
108
  - **Wild cluster bootstrap**: Valid inference with few clusters (<50) using Rademacher, Webb, or Mammen weights
71
109
  - **Panel data support**: Two-way fixed effects estimator for panel designs
72
110
  - **Multi-period analysis**: Event-study style DiD with period-specific treatment effects
73
- - **Staggered adoption**: Callaway-Sant'Anna (2021), Sun-Abraham (2021), and Borusyak-Jaravel-Spiess (2024) imputation estimators for heterogeneous treatment timing
111
+ - **Staggered adoption**: Callaway-Sant'Anna (2021), Sun-Abraham (2021), Borusyak-Jaravel-Spiess (2024) imputation, and Two-Stage DiD (Gardner 2022) estimators for heterogeneous treatment timing
74
112
  - **Triple Difference (DDD)**: Ortiz-Villavicencio & Sant'Anna (2025) estimators with proper covariate handling
75
113
  - **Synthetic DiD**: Combined DiD with synthetic control for improved robustness
76
114
  - **Triply Robust Panel (TROP)**: Factor-adjusted DiD with synthetic weights (Athey et al. 2025)
@@ -927,6 +965,53 @@ ImputationDiD(
927
965
  | Inference | Conservative variance (Theorem 3) | Multiplier bootstrap |
928
966
  | Pre-trends | Built-in F-test (Equation 9) | Separate testing |
929
967
 
968
+ ### Two-Stage DiD (Gardner 2022)
969
+
970
+ Two-Stage DiD addresses TWFE bias in staggered adoption designs by estimating unit and time fixed effects on untreated observations only, then regressing the residualized outcomes on treatment indicators. Point estimates match the Imputation DiD estimator (Borusyak et al. 2024); the key difference is that Two-Stage DiD uses a GMM sandwich variance estimator that accounts for first-stage estimation error, while Imputation DiD uses a conservative variance (Theorem 3).
971
+
972
+ ```python
973
+ from diff_diff import TwoStageDiD
974
+
975
+ # Basic usage
976
+ est = TwoStageDiD()
977
+ results = est.fit(data, outcome='outcome', unit='unit', time='period', first_treat='first_treat')
978
+ results.print_summary()
979
+ ```
980
+
981
+ **Event study:**
982
+
983
+ ```python
984
+ # Event study aggregation with visualization
985
+ results = est.fit(data, outcome='outcome', unit='unit', time='period',
986
+ first_treat='first_treat', aggregate='event_study')
987
+ plot_event_study(results)
988
+ ```
989
+
990
+ **Parameters:**
991
+
992
+ ```python
993
+ TwoStageDiD(
994
+ anticipation=0, # Periods of anticipation effects
995
+ alpha=0.05, # Significance level for CIs
996
+ cluster=None, # Column for cluster-robust SEs (defaults to unit)
997
+ n_bootstrap=0, # Bootstrap iterations (0 = analytical GMM SEs)
998
+ seed=None, # Random seed
999
+ rank_deficient_action='warn', # 'warn', 'error', or 'silent'
1000
+ horizon_max=None, # Max event-study horizon
1001
+ )
1002
+ ```
1003
+
1004
+ **When to use Two-Stage DiD vs Imputation DiD:**
1005
+
1006
+ | Aspect | Two-Stage DiD | Imputation DiD |
1007
+ |--------|--------------|---------------|
1008
+ | Point estimates | Identical | Identical |
1009
+ | Variance | GMM sandwich (accounts for first-stage error) | Conservative (Theorem 3, may overcover) |
1010
+ | Intuition | Residualize then regress | Impute counterfactuals then aggregate |
1011
+ | Reference impl. | R `did2s` package | R `didimputation` package |
1012
+
1013
+ Both estimators are the efficient estimator under homogeneous treatment effects, producing shorter confidence intervals than Callaway-Sant'Anna or Sun-Abraham.
1014
+
930
1015
  ### Triple Difference (DDD)
931
1016
 
932
1017
  Triple Difference (DDD) is used when treatment requires satisfying two criteria: belonging to a treated **group** AND being in an eligible **partition**. The `TripleDifference` class implements the methodology from Ortiz-Villavicencio & Sant'Anna (2025), which correctly handles covariate adjustment (unlike naive implementations).
@@ -2104,6 +2189,58 @@ ImputationDiD(
2104
2189
  | `to_dataframe(level)` | Convert to DataFrame ('observation', 'event_study', 'group') |
2105
2190
  | `pretrend_test(n_leads)` | Run pre-trend F-test (Equation 9) |
2106
2191
 
2192
+ ### TwoStageDiD
2193
+
2194
+ ```python
2195
+ TwoStageDiD(
2196
+ anticipation=0, # Periods of anticipation effects
2197
+ alpha=0.05, # Significance level for CIs
2198
+ cluster=None, # Column for cluster-robust SEs (defaults to unit)
2199
+ n_bootstrap=0, # Bootstrap iterations (0 = analytical GMM SEs)
2200
+ seed=None, # Random seed
2201
+ rank_deficient_action='warn', # 'warn', 'error', or 'silent'
2202
+ horizon_max=None, # Max event-study horizon
2203
+ )
2204
+ ```
2205
+
2206
+ **fit() Parameters:**
2207
+
2208
+ | Parameter | Type | Description |
2209
+ |-----------|------|-------------|
2210
+ | `data` | DataFrame | Panel data |
2211
+ | `outcome` | str | Outcome variable column name |
2212
+ | `unit` | str | Unit identifier column |
2213
+ | `time` | str | Time period column |
2214
+ | `first_treat` | str | First treatment period column (0 for never-treated) |
2215
+ | `covariates` | list | Covariate column names |
2216
+ | `aggregate` | str | Aggregation: None, "event_study", "group", "all" |
2217
+ | `balance_e` | int | Balance event study to this many pre-treatment periods |
2218
+
2219
+ ### TwoStageDiDResults
2220
+
2221
+ **Attributes:**
2222
+
2223
+ | Attribute | Description |
2224
+ |-----------|-------------|
2225
+ | `overall_att` | Overall average treatment effect on the treated |
2226
+ | `overall_se` | Standard error (GMM sandwich variance) |
2227
+ | `overall_t_stat` | T-statistic |
2228
+ | `overall_p_value` | P-value for H0: ATT = 0 |
2229
+ | `overall_conf_int` | Confidence interval |
2230
+ | `event_study_effects` | Dict of relative time -> effect dict (if `aggregate='event_study'` or `'all'`) |
2231
+ | `group_effects` | Dict of cohort -> effect dict (if `aggregate='group'` or `'all'`) |
2232
+ | `treatment_effects` | DataFrame of unit-level treatment effects |
2233
+ | `n_treated_obs` | Number of treated observations |
2234
+ | `n_untreated_obs` | Number of untreated observations |
2235
+
2236
+ **Methods:**
2237
+
2238
+ | Method | Description |
2239
+ |--------|-------------|
2240
+ | `summary(alpha)` | Get formatted summary string |
2241
+ | `print_summary(alpha)` | Print summary to stdout |
2242
+ | `to_dataframe(level)` | Convert to DataFrame ('observation', 'event_study', 'group') |
2243
+
2107
2244
  ### TripleDifference
2108
2245
 
2109
2246
  ```python
@@ -2452,7 +2589,7 @@ Returns DataFrame with columns: `unit`, `quality_score`, `outcome_trend_score`,
2452
2589
 
2453
2590
  ## Requirements
2454
2591
 
2455
- - Python >= 3.9
2592
+ - Python 3.9 - 3.13
2456
2593
  - numpy >= 1.20
2457
2594
  - pandas >= 1.3
2458
2595
  - scipy >= 1.7
@@ -2582,6 +2719,10 @@ The `HonestDiD` module implements sensitivity analysis methods for relaxing the
2582
2719
 
2583
2720
  - **Sun, L., & Abraham, S. (2021).** "Estimating Dynamic Treatment Effects in Event Studies with Heterogeneous Treatment Effects." *Journal of Econometrics*, 225(2), 175-199. [https://doi.org/10.1016/j.jeconom.2020.09.006](https://doi.org/10.1016/j.jeconom.2020.09.006)
2584
2721
 
2722
+ - **Gardner, J. (2022).** "Two-stage differences in differences." *arXiv preprint arXiv:2207.05943*. [https://arxiv.org/abs/2207.05943](https://arxiv.org/abs/2207.05943)
2723
+
2724
+ - **Butts, K., & Gardner, J. (2022).** "did2s: Two-Stage Difference-in-Differences." *The R Journal*, 14(1), 162-173. [https://doi.org/10.32614/RJ-2022-048](https://doi.org/10.32614/RJ-2022-048)
2725
+
2585
2726
  - **de Chaisemartin, C., & D'Haultfœuille, X. (2020).** "Two-Way Fixed Effects Estimators with Heterogeneous Treatment Effects." *American Economic Review*, 110(9), 2964-2996. [https://doi.org/10.1257/aer.20181169](https://doi.org/10.1257/aer.20181169)
2586
2727
 
2587
2728
  - **Goodman-Bacon, A. (2021).** "Difference-in-Differences with Variation in Treatment Timing." *Journal of Econometrics*, 225(2), 254-277. [https://doi.org/10.1016/j.jeconom.2021.03.014](https://doi.org/10.1016/j.jeconom.2021.03.014)
@@ -2605,3 +2746,4 @@ The `HonestDiD` module implements sensitivity analysis methods for relaxing the
2605
2746
  ## License
2606
2747
 
2607
2748
  MIT License
2749
+
@@ -1,40 +1,3 @@
1
- Metadata-Version: 2.4
2
- Name: diff-diff
3
- Version: 2.3.1
4
- Classifier: Development Status :: 5 - Production/Stable
5
- Classifier: Intended Audience :: Science/Research
6
- Classifier: Operating System :: OS Independent
7
- Classifier: Programming Language :: Python :: 3
8
- Classifier: Programming Language :: Python :: 3.9
9
- Classifier: Programming Language :: Python :: 3.10
10
- Classifier: Programming Language :: Python :: 3.11
11
- Classifier: Programming Language :: Python :: 3.12
12
- Classifier: Topic :: Scientific/Engineering :: Mathematics
13
- Requires-Dist: numpy>=1.20.0
14
- Requires-Dist: pandas>=1.3.0
15
- Requires-Dist: scipy>=1.7.0
16
- Requires-Dist: pytest>=7.0 ; extra == 'dev'
17
- Requires-Dist: pytest-xdist>=3.0 ; extra == 'dev'
18
- Requires-Dist: pytest-cov>=4.0 ; extra == 'dev'
19
- Requires-Dist: black>=23.0 ; extra == 'dev'
20
- Requires-Dist: ruff>=0.1.0 ; extra == 'dev'
21
- Requires-Dist: mypy>=1.0 ; extra == 'dev'
22
- Requires-Dist: maturin>=1.4,<2.0 ; extra == 'dev'
23
- Requires-Dist: sphinx>=6.0 ; extra == 'docs'
24
- Requires-Dist: sphinx-rtd-theme>=1.0 ; extra == 'docs'
25
- Provides-Extra: dev
26
- Provides-Extra: docs
27
- Summary: A library for Difference-in-Differences causal inference analysis
28
- Keywords: causal-inference,difference-in-differences,econometrics,statistics,treatment-effects
29
- Author: diff-diff contributors
30
- License-Expression: MIT
31
- Requires-Python: >=3.9
32
- Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
33
- Project-URL: Documentation, https://diff-diff.readthedocs.io
34
- Project-URL: Homepage, https://github.com/igerber/diff-diff
35
- Project-URL: Issues, https://github.com/igerber/diff-diff/issues
36
- Project-URL: Repository, https://github.com/igerber/diff-diff
37
-
38
1
  # diff-diff
39
2
 
40
3
  A Python library for Difference-in-Differences (DiD) causal inference analysis with an sklearn-like API and statsmodels-style outputs.
@@ -107,7 +70,7 @@ Signif. codes: '***' 0.001, '**' 0.01, '*' 0.05, '.' 0.1
107
70
  - **Wild cluster bootstrap**: Valid inference with few clusters (<50) using Rademacher, Webb, or Mammen weights
108
71
  - **Panel data support**: Two-way fixed effects estimator for panel designs
109
72
  - **Multi-period analysis**: Event-study style DiD with period-specific treatment effects
110
- - **Staggered adoption**: Callaway-Sant'Anna (2021), Sun-Abraham (2021), and Borusyak-Jaravel-Spiess (2024) imputation estimators for heterogeneous treatment timing
73
+ - **Staggered adoption**: Callaway-Sant'Anna (2021), Sun-Abraham (2021), Borusyak-Jaravel-Spiess (2024) imputation, and Two-Stage DiD (Gardner 2022) estimators for heterogeneous treatment timing
111
74
  - **Triple Difference (DDD)**: Ortiz-Villavicencio & Sant'Anna (2025) estimators with proper covariate handling
112
75
  - **Synthetic DiD**: Combined DiD with synthetic control for improved robustness
113
76
  - **Triply Robust Panel (TROP)**: Factor-adjusted DiD with synthetic weights (Athey et al. 2025)
@@ -964,6 +927,53 @@ ImputationDiD(
964
927
  | Inference | Conservative variance (Theorem 3) | Multiplier bootstrap |
965
928
  | Pre-trends | Built-in F-test (Equation 9) | Separate testing |
966
929
 
930
+ ### Two-Stage DiD (Gardner 2022)
931
+
932
+ Two-Stage DiD addresses TWFE bias in staggered adoption designs by estimating unit and time fixed effects on untreated observations only, then regressing the residualized outcomes on treatment indicators. Point estimates match the Imputation DiD estimator (Borusyak et al. 2024); the key difference is that Two-Stage DiD uses a GMM sandwich variance estimator that accounts for first-stage estimation error, while Imputation DiD uses a conservative variance (Theorem 3).
933
+
934
+ ```python
935
+ from diff_diff import TwoStageDiD
936
+
937
+ # Basic usage
938
+ est = TwoStageDiD()
939
+ results = est.fit(data, outcome='outcome', unit='unit', time='period', first_treat='first_treat')
940
+ results.print_summary()
941
+ ```
942
+
943
+ **Event study:**
944
+
945
+ ```python
946
+ # Event study aggregation with visualization
947
+ results = est.fit(data, outcome='outcome', unit='unit', time='period',
948
+ first_treat='first_treat', aggregate='event_study')
949
+ plot_event_study(results)
950
+ ```
951
+
952
+ **Parameters:**
953
+
954
+ ```python
955
+ TwoStageDiD(
956
+ anticipation=0, # Periods of anticipation effects
957
+ alpha=0.05, # Significance level for CIs
958
+ cluster=None, # Column for cluster-robust SEs (defaults to unit)
959
+ n_bootstrap=0, # Bootstrap iterations (0 = analytical GMM SEs)
960
+ seed=None, # Random seed
961
+ rank_deficient_action='warn', # 'warn', 'error', or 'silent'
962
+ horizon_max=None, # Max event-study horizon
963
+ )
964
+ ```
965
+
966
+ **When to use Two-Stage DiD vs Imputation DiD:**
967
+
968
+ | Aspect | Two-Stage DiD | Imputation DiD |
969
+ |--------|--------------|---------------|
970
+ | Point estimates | Identical | Identical |
971
+ | Variance | GMM sandwich (accounts for first-stage error) | Conservative (Theorem 3, may overcover) |
972
+ | Intuition | Residualize then regress | Impute counterfactuals then aggregate |
973
+ | Reference impl. | R `did2s` package | R `didimputation` package |
974
+
975
+ Both estimators are the efficient estimator under homogeneous treatment effects, producing shorter confidence intervals than Callaway-Sant'Anna or Sun-Abraham.
976
+
967
977
  ### Triple Difference (DDD)
968
978
 
969
979
  Triple Difference (DDD) is used when treatment requires satisfying two criteria: belonging to a treated **group** AND being in an eligible **partition**. The `TripleDifference` class implements the methodology from Ortiz-Villavicencio & Sant'Anna (2025), which correctly handles covariate adjustment (unlike naive implementations).
@@ -2141,6 +2151,58 @@ ImputationDiD(
2141
2151
  | `to_dataframe(level)` | Convert to DataFrame ('observation', 'event_study', 'group') |
2142
2152
  | `pretrend_test(n_leads)` | Run pre-trend F-test (Equation 9) |
2143
2153
 
2154
+ ### TwoStageDiD
2155
+
2156
+ ```python
2157
+ TwoStageDiD(
2158
+ anticipation=0, # Periods of anticipation effects
2159
+ alpha=0.05, # Significance level for CIs
2160
+ cluster=None, # Column for cluster-robust SEs (defaults to unit)
2161
+ n_bootstrap=0, # Bootstrap iterations (0 = analytical GMM SEs)
2162
+ seed=None, # Random seed
2163
+ rank_deficient_action='warn', # 'warn', 'error', or 'silent'
2164
+ horizon_max=None, # Max event-study horizon
2165
+ )
2166
+ ```
2167
+
2168
+ **fit() Parameters:**
2169
+
2170
+ | Parameter | Type | Description |
2171
+ |-----------|------|-------------|
2172
+ | `data` | DataFrame | Panel data |
2173
+ | `outcome` | str | Outcome variable column name |
2174
+ | `unit` | str | Unit identifier column |
2175
+ | `time` | str | Time period column |
2176
+ | `first_treat` | str | First treatment period column (0 for never-treated) |
2177
+ | `covariates` | list | Covariate column names |
2178
+ | `aggregate` | str | Aggregation: None, "event_study", "group", "all" |
2179
+ | `balance_e` | int | Balance event study to this many pre-treatment periods |
2180
+
2181
+ ### TwoStageDiDResults
2182
+
2183
+ **Attributes:**
2184
+
2185
+ | Attribute | Description |
2186
+ |-----------|-------------|
2187
+ | `overall_att` | Overall average treatment effect on the treated |
2188
+ | `overall_se` | Standard error (GMM sandwich variance) |
2189
+ | `overall_t_stat` | T-statistic |
2190
+ | `overall_p_value` | P-value for H0: ATT = 0 |
2191
+ | `overall_conf_int` | Confidence interval |
2192
+ | `event_study_effects` | Dict of relative time -> effect dict (if `aggregate='event_study'` or `'all'`) |
2193
+ | `group_effects` | Dict of cohort -> effect dict (if `aggregate='group'` or `'all'`) |
2194
+ | `treatment_effects` | DataFrame of unit-level treatment effects |
2195
+ | `n_treated_obs` | Number of treated observations |
2196
+ | `n_untreated_obs` | Number of untreated observations |
2197
+
2198
+ **Methods:**
2199
+
2200
+ | Method | Description |
2201
+ |--------|-------------|
2202
+ | `summary(alpha)` | Get formatted summary string |
2203
+ | `print_summary(alpha)` | Print summary to stdout |
2204
+ | `to_dataframe(level)` | Convert to DataFrame ('observation', 'event_study', 'group') |
2205
+
2144
2206
  ### TripleDifference
2145
2207
 
2146
2208
  ```python
@@ -2489,7 +2551,7 @@ Returns DataFrame with columns: `unit`, `quality_score`, `outcome_trend_score`,
2489
2551
 
2490
2552
  ## Requirements
2491
2553
 
2492
- - Python >= 3.9
2554
+ - Python 3.9 - 3.13
2493
2555
  - numpy >= 1.20
2494
2556
  - pandas >= 1.3
2495
2557
  - scipy >= 1.7
@@ -2619,6 +2681,10 @@ The `HonestDiD` module implements sensitivity analysis methods for relaxing the
2619
2681
 
2620
2682
  - **Sun, L., & Abraham, S. (2021).** "Estimating Dynamic Treatment Effects in Event Studies with Heterogeneous Treatment Effects." *Journal of Econometrics*, 225(2), 175-199. [https://doi.org/10.1016/j.jeconom.2020.09.006](https://doi.org/10.1016/j.jeconom.2020.09.006)
2621
2683
 
2684
+ - **Gardner, J. (2022).** "Two-stage differences in differences." *arXiv preprint arXiv:2207.05943*. [https://arxiv.org/abs/2207.05943](https://arxiv.org/abs/2207.05943)
2685
+
2686
+ - **Butts, K., & Gardner, J. (2022).** "did2s: Two-Stage Difference-in-Differences." *The R Journal*, 14(1), 162-173. [https://doi.org/10.32614/RJ-2022-048](https://doi.org/10.32614/RJ-2022-048)
2687
+
2622
2688
  - **de Chaisemartin, C., & D'Haultfœuille, X. (2020).** "Two-Way Fixed Effects Estimators with Heterogeneous Treatment Effects." *American Economic Review*, 110(9), 2964-2996. [https://doi.org/10.1257/aer.20181169](https://doi.org/10.1257/aer.20181169)
2623
2689
 
2624
2690
  - **Goodman-Bacon, A. (2021).** "Difference-in-Differences with Variation in Treatment Timing." *Journal of Econometrics*, 225(2), 254-277. [https://doi.org/10.1016/j.jeconom.2021.03.014](https://doi.org/10.1016/j.jeconom.2021.03.014)
@@ -2642,4 +2708,3 @@ The `HonestDiD` module implements sensitivity analysis methods for relaxing the
2642
2708
  ## License
2643
2709
 
2644
2710
  MIT License
2645
-
@@ -101,6 +101,12 @@ from diff_diff.imputation import (
101
101
  ImputationDiDResults,
102
102
  imputation_did,
103
103
  )
104
+ from diff_diff.two_stage import (
105
+ TwoStageBootstrapResults,
106
+ TwoStageDiD,
107
+ TwoStageDiDResults,
108
+ two_stage_did,
109
+ )
104
110
  from diff_diff.sun_abraham import (
105
111
  SABootstrapResults,
106
112
  SunAbraham,
@@ -142,7 +148,7 @@ from diff_diff.datasets import (
142
148
  load_mpdta,
143
149
  )
144
150
 
145
- __version__ = "2.3.1"
151
+ __version__ = "2.4.0"
146
152
  __all__ = [
147
153
  # Estimators
148
154
  "DifferenceInDifferences",
@@ -152,6 +158,7 @@ __all__ = [
152
158
  "CallawaySantAnna",
153
159
  "SunAbraham",
154
160
  "ImputationDiD",
161
+ "TwoStageDiD",
155
162
  "TripleDifference",
156
163
  "TROP",
157
164
  # Bacon Decomposition
@@ -173,6 +180,9 @@ __all__ = [
173
180
  "ImputationDiDResults",
174
181
  "ImputationBootstrapResults",
175
182
  "imputation_did",
183
+ "TwoStageDiDResults",
184
+ "TwoStageBootstrapResults",
185
+ "two_stage_did",
176
186
  "TripleDifferenceResults",
177
187
  "triple_difference",
178
188
  "TROPResults",
@@ -415,6 +415,7 @@ class CallawaySantAnna(
415
415
  cohort_masks[g] = (unit_cohorts == g)
416
416
 
417
417
  # Never-treated mask
418
+ # np.inf was normalized to 0 in fit(), so the np.inf check is defensive only
418
419
  never_treated_mask = (unit_cohorts == 0) | (unit_cohorts == np.inf)
419
420
 
420
421
  # Pre-compute covariate matrices by time period if needed
@@ -639,13 +640,15 @@ class CallawaySantAnna(
639
640
  # This avoids hardcoding column names in internal methods
640
641
  df['first_treat'] = df[first_treat]
641
642
 
643
+ # Never-treated indicator (must precede treatment_groups to exclude np.inf)
644
+ df['_never_treated'] = (df[first_treat] == 0) | (df[first_treat] == np.inf)
645
+ # Normalize np.inf → 0 so all downstream `> 0` checks exclude never-treated
646
+ df.loc[df[first_treat] == np.inf, first_treat] = 0
647
+
642
648
  # Identify groups and time periods
643
649
  time_periods = sorted(df[time].unique())
644
650
  treatment_groups = sorted([g for g in df[first_treat].unique() if g > 0])
645
651
 
646
- # Never-treated indicator (first_treat = 0 or inf)
647
- df['_never_treated'] = (df[first_treat] == 0) | (df[first_treat] == np.inf)
648
-
649
652
  # Get unique units
650
653
  unit_info = df.groupby(unit).agg({
651
654
  first_treat: 'first',
@@ -456,9 +456,9 @@ class SunAbraham:
456
456
  covariates : list, optional
457
457
  List of covariate column names to include in regression.
458
458
  min_pre_periods : int, default=1
459
- Minimum number of pre-treatment periods to include in event study.
459
+ **Deprecated**: Accepted but ignored. Will be removed in a future version.
460
460
  min_post_periods : int, default=1
461
- Minimum number of post-treatment periods to include in event study.
461
+ **Deprecated**: Accepted but ignored. Will be removed in a future version.
462
462
 
463
463
  Returns
464
464
  -------
@@ -470,6 +470,22 @@ class SunAbraham:
470
470
  ValueError
471
471
  If required columns are missing or data validation fails.
472
472
  """
473
+ # Deprecation warnings for unimplemented parameters
474
+ if min_pre_periods != 1:
475
+ warnings.warn(
476
+ "min_pre_periods is not yet implemented and will be ignored. "
477
+ "This parameter will be removed in a future version.",
478
+ FutureWarning,
479
+ stacklevel=2,
480
+ )
481
+ if min_post_periods != 1:
482
+ warnings.warn(
483
+ "min_post_periods is not yet implemented and will be ignored. "
484
+ "This parameter will be removed in a future version.",
485
+ FutureWarning,
486
+ stacklevel=2,
487
+ )
488
+
473
489
  # Validate inputs
474
490
  required_cols = [outcome, unit, time, first_treat]
475
491
  if covariates:
@@ -486,13 +502,15 @@ class SunAbraham:
486
502
  df[time] = pd.to_numeric(df[time])
487
503
  df[first_treat] = pd.to_numeric(df[first_treat])
488
504
 
505
+ # Never-treated indicator (must precede treatment_groups to exclude np.inf)
506
+ df["_never_treated"] = (df[first_treat] == 0) | (df[first_treat] == np.inf)
507
+ # Normalize np.inf → 0 so all downstream `> 0` checks exclude never-treated
508
+ df.loc[df[first_treat] == np.inf, first_treat] = 0
509
+
489
510
  # Identify groups and time periods
490
511
  time_periods = sorted(df[time].unique())
491
512
  treatment_groups = sorted([g for g in df[first_treat].unique() if g > 0])
492
513
 
493
- # Never-treated indicator
494
- df["_never_treated"] = (df[first_treat] == 0) | (df[first_treat] == np.inf)
495
-
496
514
  # Get unique units
497
515
  unit_info = (
498
516
  df.groupby(unit)
@@ -533,9 +551,9 @@ class SunAbraham:
533
551
 
534
552
  all_rel_times_sorted = sorted(all_rel_times)
535
553
 
536
- # Filter to reasonable range
537
- min_rel = max(min(all_rel_times_sorted), -20) # cap at -20
538
- max_rel = min(max(all_rel_times_sorted), 20) # cap at +20
554
+ # Use full range of relative times (no artificial truncation, matches R's fixest::sunab())
555
+ min_rel = min(all_rel_times_sorted)
556
+ max_rel = max(all_rel_times_sorted)
539
557
 
540
558
  # Reference period: last pre-treatment period (typically -1)
541
559
  self._reference_period = -1 - self.anticipation
@@ -765,12 +783,18 @@ class SunAbraham:
765
783
 
766
784
  # Fit OLS using LinearRegression helper (more stable than manual X'X inverse)
767
785
  cluster_ids = df_demeaned[cluster_var].values
786
+
787
+ # Degrees of freedom adjustment for absorbed unit and time fixed effects
788
+ n_units_fe = df[unit].nunique()
789
+ n_times_fe = df[time].nunique()
790
+ df_adj = n_units_fe + n_times_fe - 1
791
+
768
792
  reg = LinearRegression(
769
793
  include_intercept=False, # Already demeaned, no intercept needed
770
794
  robust=True,
771
795
  cluster_ids=cluster_ids,
772
796
  rank_deficient_action=self.rank_deficient_action,
773
- ).fit(X, y)
797
+ ).fit(X, y, df_adjustment=df_adj)
774
798
 
775
799
  coefficients = reg.coefficients_
776
800
  vcov = reg.vcov_
@@ -821,7 +845,8 @@ class SunAbraham:
821
845
 
822
846
  β_e = Σ_g w_{g,e} × δ_{g,e}
823
847
 
824
- where w_{g,e} is the share of cohort g among treated units at relative time e.
848
+ where w_{g,e} = n_{g,e} / Σ_g n_{g,e} is the share of observations from cohort g
849
+ at event-time e among all treated observations at that event-time.
825
850
 
826
851
  Returns
827
852
  -------
@@ -833,9 +858,8 @@ class SunAbraham:
833
858
  event_study_effects: Dict[int, Dict[str, Any]] = {}
834
859
  cohort_weights: Dict[int, Dict[Any, float]] = {}
835
860
 
836
- # Get cohort sizes
837
- unit_cohorts = df.groupby(unit)[first_treat].first()
838
- cohort_sizes = unit_cohorts[unit_cohorts > 0].value_counts().to_dict()
861
+ # Pre-compute per-event-time observation counts: n_{g,e}
862
+ event_time_counts = df[df[first_treat] > 0].groupby([first_treat, "_rel_time"]).size()
839
863
 
840
864
  for e in rel_periods:
841
865
  # Get cohorts that have observations at this relative time
@@ -847,13 +871,13 @@ class SunAbraham:
847
871
  if not cohorts_at_e:
848
872
  continue
849
873
 
850
- # Compute IW weights: share of each cohort among those observed at e
874
+ # Compute IW weights: n_{g,e} / Σ_g n_{g,e}
851
875
  weights = {}
852
876
  total_size = 0
853
877
  for g in cohorts_at_e:
854
- n_g = cohort_sizes.get(g, 0)
855
- weights[g] = n_g
856
- total_size += n_g
878
+ n_g_e = event_time_counts.get((g, e), 0)
879
+ weights[g] = n_g_e
880
+ total_size += n_g_e
857
881
 
858
882
  if total_size == 0:
859
883
  continue
@@ -915,7 +939,7 @@ class SunAbraham:
915
939
  ]
916
940
 
917
941
  if not post_effects:
918
- return 0.0, 0.0
942
+ return np.nan, np.nan
919
943
 
920
944
  # Weight by number of treated observations at each relative time
921
945
  post_weights = []
@@ -948,7 +972,13 @@ class SunAbraham:
948
972
  overall_weights_by_coef[key] += period_weight * cw
949
973
 
950
974
  if not overall_weights_by_coef:
951
- # Fallback to simple variance calculation
975
+ # Fallback to simplified variance that ignores covariances between periods
976
+ warnings.warn(
977
+ "Could not construct full weight vector for overall ATT SE. "
978
+ "Using simplified variance that ignores covariances between periods.",
979
+ UserWarning,
980
+ stacklevel=2,
981
+ )
952
982
  overall_var = float(
953
983
  np.sum((post_weights ** 2) * np.array([eff["se"] ** 2 for _, eff in post_effects]))
954
984
  )
@@ -1029,6 +1059,7 @@ class SunAbraham:
1029
1059
  df_b[time] - df_b[first_treat],
1030
1060
  np.nan
1031
1061
  )
1062
+ # np.inf was normalized to 0 in fit(), so the np.inf check is defensive only
1032
1063
  df_b["_never_treated"] = (
1033
1064
  (df_b[first_treat] == 0) | (df_b[first_treat] == np.inf)
1034
1065
  )
@@ -1113,11 +1144,16 @@ class SunAbraham:
1113
1144
  event_study_p_values[e] = p_value
1114
1145
 
1115
1146
  # Overall ATT statistics
1116
- overall_se = float(np.std(bootstrap_overall, ddof=1))
1117
- overall_ci = self._compute_percentile_ci(bootstrap_overall, self.alpha)
1118
- overall_p = self._compute_bootstrap_pvalue(
1119
- original_overall_att, bootstrap_overall
1120
- )
1147
+ if not np.isfinite(original_overall_att):
1148
+ overall_se = np.nan
1149
+ overall_ci = (np.nan, np.nan)
1150
+ overall_p = np.nan
1151
+ else:
1152
+ overall_se = float(np.std(bootstrap_overall, ddof=1))
1153
+ overall_ci = self._compute_percentile_ci(bootstrap_overall, self.alpha)
1154
+ overall_p = self._compute_bootstrap_pvalue(
1155
+ original_overall_att, bootstrap_overall
1156
+ )
1121
1157
 
1122
1158
  return SABootstrapResults(
1123
1159
  n_bootstrap=self.n_bootstrap,