diff-diff 2.3.1__tar.gz → 2.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diff_diff-2.3.1/README.md → diff_diff-2.4.0/PKG-INFO +144 -2
- diff_diff-2.3.1/PKG-INFO → diff_diff-2.4.0/README.md +105 -40
- {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/__init__.py +11 -1
- {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/staggered.py +6 -3
- {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/sun_abraham.py +60 -24
- diff_diff-2.4.0/diff_diff/two_stage.py +2209 -0
- {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/utils.py +34 -0
- {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/visualization.py +2 -0
- {diff_diff-2.3.1 → diff_diff-2.4.0}/pyproject.toml +3 -2
- {diff_diff-2.3.1 → diff_diff-2.4.0}/rust/Cargo.lock +3 -3
- {diff_diff-2.3.1 → diff_diff-2.4.0}/rust/Cargo.toml +1 -1
- {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/_backend.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/bacon.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/datasets.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/diagnostics.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/estimators.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/honest_did.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/imputation.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/linalg.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/power.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/prep.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/prep_dgp.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/pretrends.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/results.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/staggered_aggregation.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/staggered_bootstrap.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/staggered_results.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/synthetic_did.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/triple_diff.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/trop.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.4.0}/diff_diff/twfe.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.4.0}/rust/src/bootstrap.rs +0 -0
- {diff_diff-2.3.1 → diff_diff-2.4.0}/rust/src/lib.rs +0 -0
- {diff_diff-2.3.1 → diff_diff-2.4.0}/rust/src/linalg.rs +0 -0
- {diff_diff-2.3.1 → diff_diff-2.4.0}/rust/src/trop.rs +0 -0
- {diff_diff-2.3.1 → diff_diff-2.4.0}/rust/src/weights.rs +0 -0
|
@@ -1,3 +1,41 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: diff-diff
|
|
3
|
+
Version: 2.4.0
|
|
4
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
5
|
+
Classifier: Intended Audience :: Science/Research
|
|
6
|
+
Classifier: Operating System :: OS Independent
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
13
|
+
Classifier: Topic :: Scientific/Engineering :: Mathematics
|
|
14
|
+
Requires-Dist: numpy>=1.20.0
|
|
15
|
+
Requires-Dist: pandas>=1.3.0
|
|
16
|
+
Requires-Dist: scipy>=1.7.0
|
|
17
|
+
Requires-Dist: pytest>=7.0 ; extra == 'dev'
|
|
18
|
+
Requires-Dist: pytest-xdist>=3.0 ; extra == 'dev'
|
|
19
|
+
Requires-Dist: pytest-cov>=4.0 ; extra == 'dev'
|
|
20
|
+
Requires-Dist: black>=23.0 ; extra == 'dev'
|
|
21
|
+
Requires-Dist: ruff>=0.1.0 ; extra == 'dev'
|
|
22
|
+
Requires-Dist: mypy>=1.0 ; extra == 'dev'
|
|
23
|
+
Requires-Dist: maturin>=1.4,<2.0 ; extra == 'dev'
|
|
24
|
+
Requires-Dist: sphinx>=6.0 ; extra == 'docs'
|
|
25
|
+
Requires-Dist: sphinx-rtd-theme>=1.0 ; extra == 'docs'
|
|
26
|
+
Provides-Extra: dev
|
|
27
|
+
Provides-Extra: docs
|
|
28
|
+
Summary: A library for Difference-in-Differences causal inference analysis
|
|
29
|
+
Keywords: causal-inference,difference-in-differences,econometrics,statistics,treatment-effects
|
|
30
|
+
Author: diff-diff contributors
|
|
31
|
+
License-Expression: MIT
|
|
32
|
+
Requires-Python: >=3.9, <3.14
|
|
33
|
+
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
34
|
+
Project-URL: Documentation, https://diff-diff.readthedocs.io
|
|
35
|
+
Project-URL: Homepage, https://github.com/igerber/diff-diff
|
|
36
|
+
Project-URL: Issues, https://github.com/igerber/diff-diff/issues
|
|
37
|
+
Project-URL: Repository, https://github.com/igerber/diff-diff
|
|
38
|
+
|
|
1
39
|
# diff-diff
|
|
2
40
|
|
|
3
41
|
A Python library for Difference-in-Differences (DiD) causal inference analysis with an sklearn-like API and statsmodels-style outputs.
|
|
@@ -70,7 +108,7 @@ Signif. codes: '***' 0.001, '**' 0.01, '*' 0.05, '.' 0.1
|
|
|
70
108
|
- **Wild cluster bootstrap**: Valid inference with few clusters (<50) using Rademacher, Webb, or Mammen weights
|
|
71
109
|
- **Panel data support**: Two-way fixed effects estimator for panel designs
|
|
72
110
|
- **Multi-period analysis**: Event-study style DiD with period-specific treatment effects
|
|
73
|
-
- **Staggered adoption**: Callaway-Sant'Anna (2021), Sun-Abraham (2021),
|
|
111
|
+
- **Staggered adoption**: Callaway-Sant'Anna (2021), Sun-Abraham (2021), Borusyak-Jaravel-Spiess (2024) imputation, and Two-Stage DiD (Gardner 2022) estimators for heterogeneous treatment timing
|
|
74
112
|
- **Triple Difference (DDD)**: Ortiz-Villavicencio & Sant'Anna (2025) estimators with proper covariate handling
|
|
75
113
|
- **Synthetic DiD**: Combined DiD with synthetic control for improved robustness
|
|
76
114
|
- **Triply Robust Panel (TROP)**: Factor-adjusted DiD with synthetic weights (Athey et al. 2025)
|
|
@@ -927,6 +965,53 @@ ImputationDiD(
|
|
|
927
965
|
| Inference | Conservative variance (Theorem 3) | Multiplier bootstrap |
|
|
928
966
|
| Pre-trends | Built-in F-test (Equation 9) | Separate testing |
|
|
929
967
|
|
|
968
|
+
### Two-Stage DiD (Gardner 2022)
|
|
969
|
+
|
|
970
|
+
Two-Stage DiD addresses TWFE bias in staggered adoption designs by estimating unit and time fixed effects on untreated observations only, then regressing the residualized outcomes on treatment indicators. Point estimates match the Imputation DiD estimator (Borusyak et al. 2024); the key difference is that Two-Stage DiD uses a GMM sandwich variance estimator that accounts for first-stage estimation error, while Imputation DiD uses a conservative variance (Theorem 3).
|
|
971
|
+
|
|
972
|
+
```python
|
|
973
|
+
from diff_diff import TwoStageDiD
|
|
974
|
+
|
|
975
|
+
# Basic usage
|
|
976
|
+
est = TwoStageDiD()
|
|
977
|
+
results = est.fit(data, outcome='outcome', unit='unit', time='period', first_treat='first_treat')
|
|
978
|
+
results.print_summary()
|
|
979
|
+
```
|
|
980
|
+
|
|
981
|
+
**Event study:**
|
|
982
|
+
|
|
983
|
+
```python
|
|
984
|
+
# Event study aggregation with visualization
|
|
985
|
+
results = est.fit(data, outcome='outcome', unit='unit', time='period',
|
|
986
|
+
first_treat='first_treat', aggregate='event_study')
|
|
987
|
+
plot_event_study(results)
|
|
988
|
+
```
|
|
989
|
+
|
|
990
|
+
**Parameters:**
|
|
991
|
+
|
|
992
|
+
```python
|
|
993
|
+
TwoStageDiD(
|
|
994
|
+
anticipation=0, # Periods of anticipation effects
|
|
995
|
+
alpha=0.05, # Significance level for CIs
|
|
996
|
+
cluster=None, # Column for cluster-robust SEs (defaults to unit)
|
|
997
|
+
n_bootstrap=0, # Bootstrap iterations (0 = analytical GMM SEs)
|
|
998
|
+
seed=None, # Random seed
|
|
999
|
+
rank_deficient_action='warn', # 'warn', 'error', or 'silent'
|
|
1000
|
+
horizon_max=None, # Max event-study horizon
|
|
1001
|
+
)
|
|
1002
|
+
```
|
|
1003
|
+
|
|
1004
|
+
**When to use Two-Stage DiD vs Imputation DiD:**
|
|
1005
|
+
|
|
1006
|
+
| Aspect | Two-Stage DiD | Imputation DiD |
|
|
1007
|
+
|--------|--------------|---------------|
|
|
1008
|
+
| Point estimates | Identical | Identical |
|
|
1009
|
+
| Variance | GMM sandwich (accounts for first-stage error) | Conservative (Theorem 3, may overcover) |
|
|
1010
|
+
| Intuition | Residualize then regress | Impute counterfactuals then aggregate |
|
|
1011
|
+
| Reference impl. | R `did2s` package | R `didimputation` package |
|
|
1012
|
+
|
|
1013
|
+
Both estimators are the efficient estimator under homogeneous treatment effects, producing shorter confidence intervals than Callaway-Sant'Anna or Sun-Abraham.
|
|
1014
|
+
|
|
930
1015
|
### Triple Difference (DDD)
|
|
931
1016
|
|
|
932
1017
|
Triple Difference (DDD) is used when treatment requires satisfying two criteria: belonging to a treated **group** AND being in an eligible **partition**. The `TripleDifference` class implements the methodology from Ortiz-Villavicencio & Sant'Anna (2025), which correctly handles covariate adjustment (unlike naive implementations).
|
|
@@ -2104,6 +2189,58 @@ ImputationDiD(
|
|
|
2104
2189
|
| `to_dataframe(level)` | Convert to DataFrame ('observation', 'event_study', 'group') |
|
|
2105
2190
|
| `pretrend_test(n_leads)` | Run pre-trend F-test (Equation 9) |
|
|
2106
2191
|
|
|
2192
|
+
### TwoStageDiD
|
|
2193
|
+
|
|
2194
|
+
```python
|
|
2195
|
+
TwoStageDiD(
|
|
2196
|
+
anticipation=0, # Periods of anticipation effects
|
|
2197
|
+
alpha=0.05, # Significance level for CIs
|
|
2198
|
+
cluster=None, # Column for cluster-robust SEs (defaults to unit)
|
|
2199
|
+
n_bootstrap=0, # Bootstrap iterations (0 = analytical GMM SEs)
|
|
2200
|
+
seed=None, # Random seed
|
|
2201
|
+
rank_deficient_action='warn', # 'warn', 'error', or 'silent'
|
|
2202
|
+
horizon_max=None, # Max event-study horizon
|
|
2203
|
+
)
|
|
2204
|
+
```
|
|
2205
|
+
|
|
2206
|
+
**fit() Parameters:**
|
|
2207
|
+
|
|
2208
|
+
| Parameter | Type | Description |
|
|
2209
|
+
|-----------|------|-------------|
|
|
2210
|
+
| `data` | DataFrame | Panel data |
|
|
2211
|
+
| `outcome` | str | Outcome variable column name |
|
|
2212
|
+
| `unit` | str | Unit identifier column |
|
|
2213
|
+
| `time` | str | Time period column |
|
|
2214
|
+
| `first_treat` | str | First treatment period column (0 for never-treated) |
|
|
2215
|
+
| `covariates` | list | Covariate column names |
|
|
2216
|
+
| `aggregate` | str | Aggregation: None, "event_study", "group", "all" |
|
|
2217
|
+
| `balance_e` | int | Balance event study to this many pre-treatment periods |
|
|
2218
|
+
|
|
2219
|
+
### TwoStageDiDResults
|
|
2220
|
+
|
|
2221
|
+
**Attributes:**
|
|
2222
|
+
|
|
2223
|
+
| Attribute | Description |
|
|
2224
|
+
|-----------|-------------|
|
|
2225
|
+
| `overall_att` | Overall average treatment effect on the treated |
|
|
2226
|
+
| `overall_se` | Standard error (GMM sandwich variance) |
|
|
2227
|
+
| `overall_t_stat` | T-statistic |
|
|
2228
|
+
| `overall_p_value` | P-value for H0: ATT = 0 |
|
|
2229
|
+
| `overall_conf_int` | Confidence interval |
|
|
2230
|
+
| `event_study_effects` | Dict of relative time -> effect dict (if `aggregate='event_study'` or `'all'`) |
|
|
2231
|
+
| `group_effects` | Dict of cohort -> effect dict (if `aggregate='group'` or `'all'`) |
|
|
2232
|
+
| `treatment_effects` | DataFrame of unit-level treatment effects |
|
|
2233
|
+
| `n_treated_obs` | Number of treated observations |
|
|
2234
|
+
| `n_untreated_obs` | Number of untreated observations |
|
|
2235
|
+
|
|
2236
|
+
**Methods:**
|
|
2237
|
+
|
|
2238
|
+
| Method | Description |
|
|
2239
|
+
|--------|-------------|
|
|
2240
|
+
| `summary(alpha)` | Get formatted summary string |
|
|
2241
|
+
| `print_summary(alpha)` | Print summary to stdout |
|
|
2242
|
+
| `to_dataframe(level)` | Convert to DataFrame ('observation', 'event_study', 'group') |
|
|
2243
|
+
|
|
2107
2244
|
### TripleDifference
|
|
2108
2245
|
|
|
2109
2246
|
```python
|
|
@@ -2452,7 +2589,7 @@ Returns DataFrame with columns: `unit`, `quality_score`, `outcome_trend_score`,
|
|
|
2452
2589
|
|
|
2453
2590
|
## Requirements
|
|
2454
2591
|
|
|
2455
|
-
- Python
|
|
2592
|
+
- Python 3.9 - 3.13
|
|
2456
2593
|
- numpy >= 1.20
|
|
2457
2594
|
- pandas >= 1.3
|
|
2458
2595
|
- scipy >= 1.7
|
|
@@ -2582,6 +2719,10 @@ The `HonestDiD` module implements sensitivity analysis methods for relaxing the
|
|
|
2582
2719
|
|
|
2583
2720
|
- **Sun, L., & Abraham, S. (2021).** "Estimating Dynamic Treatment Effects in Event Studies with Heterogeneous Treatment Effects." *Journal of Econometrics*, 225(2), 175-199. [https://doi.org/10.1016/j.jeconom.2020.09.006](https://doi.org/10.1016/j.jeconom.2020.09.006)
|
|
2584
2721
|
|
|
2722
|
+
- **Gardner, J. (2022).** "Two-stage differences in differences." *arXiv preprint arXiv:2207.05943*. [https://arxiv.org/abs/2207.05943](https://arxiv.org/abs/2207.05943)
|
|
2723
|
+
|
|
2724
|
+
- **Butts, K., & Gardner, J. (2022).** "did2s: Two-Stage Difference-in-Differences." *The R Journal*, 14(1), 162-173. [https://doi.org/10.32614/RJ-2022-048](https://doi.org/10.32614/RJ-2022-048)
|
|
2725
|
+
|
|
2585
2726
|
- **de Chaisemartin, C., & D'Haultfœuille, X. (2020).** "Two-Way Fixed Effects Estimators with Heterogeneous Treatment Effects." *American Economic Review*, 110(9), 2964-2996. [https://doi.org/10.1257/aer.20181169](https://doi.org/10.1257/aer.20181169)
|
|
2586
2727
|
|
|
2587
2728
|
- **Goodman-Bacon, A. (2021).** "Difference-in-Differences with Variation in Treatment Timing." *Journal of Econometrics*, 225(2), 254-277. [https://doi.org/10.1016/j.jeconom.2021.03.014](https://doi.org/10.1016/j.jeconom.2021.03.014)
|
|
@@ -2605,3 +2746,4 @@ The `HonestDiD` module implements sensitivity analysis methods for relaxing the
|
|
|
2605
2746
|
## License
|
|
2606
2747
|
|
|
2607
2748
|
MIT License
|
|
2749
|
+
|
|
@@ -1,40 +1,3 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: diff-diff
|
|
3
|
-
Version: 2.3.1
|
|
4
|
-
Classifier: Development Status :: 5 - Production/Stable
|
|
5
|
-
Classifier: Intended Audience :: Science/Research
|
|
6
|
-
Classifier: Operating System :: OS Independent
|
|
7
|
-
Classifier: Programming Language :: Python :: 3
|
|
8
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
9
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
10
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
-
Classifier: Topic :: Scientific/Engineering :: Mathematics
|
|
13
|
-
Requires-Dist: numpy>=1.20.0
|
|
14
|
-
Requires-Dist: pandas>=1.3.0
|
|
15
|
-
Requires-Dist: scipy>=1.7.0
|
|
16
|
-
Requires-Dist: pytest>=7.0 ; extra == 'dev'
|
|
17
|
-
Requires-Dist: pytest-xdist>=3.0 ; extra == 'dev'
|
|
18
|
-
Requires-Dist: pytest-cov>=4.0 ; extra == 'dev'
|
|
19
|
-
Requires-Dist: black>=23.0 ; extra == 'dev'
|
|
20
|
-
Requires-Dist: ruff>=0.1.0 ; extra == 'dev'
|
|
21
|
-
Requires-Dist: mypy>=1.0 ; extra == 'dev'
|
|
22
|
-
Requires-Dist: maturin>=1.4,<2.0 ; extra == 'dev'
|
|
23
|
-
Requires-Dist: sphinx>=6.0 ; extra == 'docs'
|
|
24
|
-
Requires-Dist: sphinx-rtd-theme>=1.0 ; extra == 'docs'
|
|
25
|
-
Provides-Extra: dev
|
|
26
|
-
Provides-Extra: docs
|
|
27
|
-
Summary: A library for Difference-in-Differences causal inference analysis
|
|
28
|
-
Keywords: causal-inference,difference-in-differences,econometrics,statistics,treatment-effects
|
|
29
|
-
Author: diff-diff contributors
|
|
30
|
-
License-Expression: MIT
|
|
31
|
-
Requires-Python: >=3.9
|
|
32
|
-
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
33
|
-
Project-URL: Documentation, https://diff-diff.readthedocs.io
|
|
34
|
-
Project-URL: Homepage, https://github.com/igerber/diff-diff
|
|
35
|
-
Project-URL: Issues, https://github.com/igerber/diff-diff/issues
|
|
36
|
-
Project-URL: Repository, https://github.com/igerber/diff-diff
|
|
37
|
-
|
|
38
1
|
# diff-diff
|
|
39
2
|
|
|
40
3
|
A Python library for Difference-in-Differences (DiD) causal inference analysis with an sklearn-like API and statsmodels-style outputs.
|
|
@@ -107,7 +70,7 @@ Signif. codes: '***' 0.001, '**' 0.01, '*' 0.05, '.' 0.1
|
|
|
107
70
|
- **Wild cluster bootstrap**: Valid inference with few clusters (<50) using Rademacher, Webb, or Mammen weights
|
|
108
71
|
- **Panel data support**: Two-way fixed effects estimator for panel designs
|
|
109
72
|
- **Multi-period analysis**: Event-study style DiD with period-specific treatment effects
|
|
110
|
-
- **Staggered adoption**: Callaway-Sant'Anna (2021), Sun-Abraham (2021),
|
|
73
|
+
- **Staggered adoption**: Callaway-Sant'Anna (2021), Sun-Abraham (2021), Borusyak-Jaravel-Spiess (2024) imputation, and Two-Stage DiD (Gardner 2022) estimators for heterogeneous treatment timing
|
|
111
74
|
- **Triple Difference (DDD)**: Ortiz-Villavicencio & Sant'Anna (2025) estimators with proper covariate handling
|
|
112
75
|
- **Synthetic DiD**: Combined DiD with synthetic control for improved robustness
|
|
113
76
|
- **Triply Robust Panel (TROP)**: Factor-adjusted DiD with synthetic weights (Athey et al. 2025)
|
|
@@ -964,6 +927,53 @@ ImputationDiD(
|
|
|
964
927
|
| Inference | Conservative variance (Theorem 3) | Multiplier bootstrap |
|
|
965
928
|
| Pre-trends | Built-in F-test (Equation 9) | Separate testing |
|
|
966
929
|
|
|
930
|
+
### Two-Stage DiD (Gardner 2022)
|
|
931
|
+
|
|
932
|
+
Two-Stage DiD addresses TWFE bias in staggered adoption designs by estimating unit and time fixed effects on untreated observations only, then regressing the residualized outcomes on treatment indicators. Point estimates match the Imputation DiD estimator (Borusyak et al. 2024); the key difference is that Two-Stage DiD uses a GMM sandwich variance estimator that accounts for first-stage estimation error, while Imputation DiD uses a conservative variance (Theorem 3).
|
|
933
|
+
|
|
934
|
+
```python
|
|
935
|
+
from diff_diff import TwoStageDiD
|
|
936
|
+
|
|
937
|
+
# Basic usage
|
|
938
|
+
est = TwoStageDiD()
|
|
939
|
+
results = est.fit(data, outcome='outcome', unit='unit', time='period', first_treat='first_treat')
|
|
940
|
+
results.print_summary()
|
|
941
|
+
```
|
|
942
|
+
|
|
943
|
+
**Event study:**
|
|
944
|
+
|
|
945
|
+
```python
|
|
946
|
+
# Event study aggregation with visualization
|
|
947
|
+
results = est.fit(data, outcome='outcome', unit='unit', time='period',
|
|
948
|
+
first_treat='first_treat', aggregate='event_study')
|
|
949
|
+
plot_event_study(results)
|
|
950
|
+
```
|
|
951
|
+
|
|
952
|
+
**Parameters:**
|
|
953
|
+
|
|
954
|
+
```python
|
|
955
|
+
TwoStageDiD(
|
|
956
|
+
anticipation=0, # Periods of anticipation effects
|
|
957
|
+
alpha=0.05, # Significance level for CIs
|
|
958
|
+
cluster=None, # Column for cluster-robust SEs (defaults to unit)
|
|
959
|
+
n_bootstrap=0, # Bootstrap iterations (0 = analytical GMM SEs)
|
|
960
|
+
seed=None, # Random seed
|
|
961
|
+
rank_deficient_action='warn', # 'warn', 'error', or 'silent'
|
|
962
|
+
horizon_max=None, # Max event-study horizon
|
|
963
|
+
)
|
|
964
|
+
```
|
|
965
|
+
|
|
966
|
+
**When to use Two-Stage DiD vs Imputation DiD:**
|
|
967
|
+
|
|
968
|
+
| Aspect | Two-Stage DiD | Imputation DiD |
|
|
969
|
+
|--------|--------------|---------------|
|
|
970
|
+
| Point estimates | Identical | Identical |
|
|
971
|
+
| Variance | GMM sandwich (accounts for first-stage error) | Conservative (Theorem 3, may overcover) |
|
|
972
|
+
| Intuition | Residualize then regress | Impute counterfactuals then aggregate |
|
|
973
|
+
| Reference impl. | R `did2s` package | R `didimputation` package |
|
|
974
|
+
|
|
975
|
+
Both estimators are the efficient estimator under homogeneous treatment effects, producing shorter confidence intervals than Callaway-Sant'Anna or Sun-Abraham.
|
|
976
|
+
|
|
967
977
|
### Triple Difference (DDD)
|
|
968
978
|
|
|
969
979
|
Triple Difference (DDD) is used when treatment requires satisfying two criteria: belonging to a treated **group** AND being in an eligible **partition**. The `TripleDifference` class implements the methodology from Ortiz-Villavicencio & Sant'Anna (2025), which correctly handles covariate adjustment (unlike naive implementations).
|
|
@@ -2141,6 +2151,58 @@ ImputationDiD(
|
|
|
2141
2151
|
| `to_dataframe(level)` | Convert to DataFrame ('observation', 'event_study', 'group') |
|
|
2142
2152
|
| `pretrend_test(n_leads)` | Run pre-trend F-test (Equation 9) |
|
|
2143
2153
|
|
|
2154
|
+
### TwoStageDiD
|
|
2155
|
+
|
|
2156
|
+
```python
|
|
2157
|
+
TwoStageDiD(
|
|
2158
|
+
anticipation=0, # Periods of anticipation effects
|
|
2159
|
+
alpha=0.05, # Significance level for CIs
|
|
2160
|
+
cluster=None, # Column for cluster-robust SEs (defaults to unit)
|
|
2161
|
+
n_bootstrap=0, # Bootstrap iterations (0 = analytical GMM SEs)
|
|
2162
|
+
seed=None, # Random seed
|
|
2163
|
+
rank_deficient_action='warn', # 'warn', 'error', or 'silent'
|
|
2164
|
+
horizon_max=None, # Max event-study horizon
|
|
2165
|
+
)
|
|
2166
|
+
```
|
|
2167
|
+
|
|
2168
|
+
**fit() Parameters:**
|
|
2169
|
+
|
|
2170
|
+
| Parameter | Type | Description |
|
|
2171
|
+
|-----------|------|-------------|
|
|
2172
|
+
| `data` | DataFrame | Panel data |
|
|
2173
|
+
| `outcome` | str | Outcome variable column name |
|
|
2174
|
+
| `unit` | str | Unit identifier column |
|
|
2175
|
+
| `time` | str | Time period column |
|
|
2176
|
+
| `first_treat` | str | First treatment period column (0 for never-treated) |
|
|
2177
|
+
| `covariates` | list | Covariate column names |
|
|
2178
|
+
| `aggregate` | str | Aggregation: None, "event_study", "group", "all" |
|
|
2179
|
+
| `balance_e` | int | Balance event study to this many pre-treatment periods |
|
|
2180
|
+
|
|
2181
|
+
### TwoStageDiDResults
|
|
2182
|
+
|
|
2183
|
+
**Attributes:**
|
|
2184
|
+
|
|
2185
|
+
| Attribute | Description |
|
|
2186
|
+
|-----------|-------------|
|
|
2187
|
+
| `overall_att` | Overall average treatment effect on the treated |
|
|
2188
|
+
| `overall_se` | Standard error (GMM sandwich variance) |
|
|
2189
|
+
| `overall_t_stat` | T-statistic |
|
|
2190
|
+
| `overall_p_value` | P-value for H0: ATT = 0 |
|
|
2191
|
+
| `overall_conf_int` | Confidence interval |
|
|
2192
|
+
| `event_study_effects` | Dict of relative time -> effect dict (if `aggregate='event_study'` or `'all'`) |
|
|
2193
|
+
| `group_effects` | Dict of cohort -> effect dict (if `aggregate='group'` or `'all'`) |
|
|
2194
|
+
| `treatment_effects` | DataFrame of unit-level treatment effects |
|
|
2195
|
+
| `n_treated_obs` | Number of treated observations |
|
|
2196
|
+
| `n_untreated_obs` | Number of untreated observations |
|
|
2197
|
+
|
|
2198
|
+
**Methods:**
|
|
2199
|
+
|
|
2200
|
+
| Method | Description |
|
|
2201
|
+
|--------|-------------|
|
|
2202
|
+
| `summary(alpha)` | Get formatted summary string |
|
|
2203
|
+
| `print_summary(alpha)` | Print summary to stdout |
|
|
2204
|
+
| `to_dataframe(level)` | Convert to DataFrame ('observation', 'event_study', 'group') |
|
|
2205
|
+
|
|
2144
2206
|
### TripleDifference
|
|
2145
2207
|
|
|
2146
2208
|
```python
|
|
@@ -2489,7 +2551,7 @@ Returns DataFrame with columns: `unit`, `quality_score`, `outcome_trend_score`,
|
|
|
2489
2551
|
|
|
2490
2552
|
## Requirements
|
|
2491
2553
|
|
|
2492
|
-
- Python
|
|
2554
|
+
- Python 3.9 - 3.13
|
|
2493
2555
|
- numpy >= 1.20
|
|
2494
2556
|
- pandas >= 1.3
|
|
2495
2557
|
- scipy >= 1.7
|
|
@@ -2619,6 +2681,10 @@ The `HonestDiD` module implements sensitivity analysis methods for relaxing the
|
|
|
2619
2681
|
|
|
2620
2682
|
- **Sun, L., & Abraham, S. (2021).** "Estimating Dynamic Treatment Effects in Event Studies with Heterogeneous Treatment Effects." *Journal of Econometrics*, 225(2), 175-199. [https://doi.org/10.1016/j.jeconom.2020.09.006](https://doi.org/10.1016/j.jeconom.2020.09.006)
|
|
2621
2683
|
|
|
2684
|
+
- **Gardner, J. (2022).** "Two-stage differences in differences." *arXiv preprint arXiv:2207.05943*. [https://arxiv.org/abs/2207.05943](https://arxiv.org/abs/2207.05943)
|
|
2685
|
+
|
|
2686
|
+
- **Butts, K., & Gardner, J. (2022).** "did2s: Two-Stage Difference-in-Differences." *The R Journal*, 14(1), 162-173. [https://doi.org/10.32614/RJ-2022-048](https://doi.org/10.32614/RJ-2022-048)
|
|
2687
|
+
|
|
2622
2688
|
- **de Chaisemartin, C., & D'Haultfœuille, X. (2020).** "Two-Way Fixed Effects Estimators with Heterogeneous Treatment Effects." *American Economic Review*, 110(9), 2964-2996. [https://doi.org/10.1257/aer.20181169](https://doi.org/10.1257/aer.20181169)
|
|
2623
2689
|
|
|
2624
2690
|
- **Goodman-Bacon, A. (2021).** "Difference-in-Differences with Variation in Treatment Timing." *Journal of Econometrics*, 225(2), 254-277. [https://doi.org/10.1016/j.jeconom.2021.03.014](https://doi.org/10.1016/j.jeconom.2021.03.014)
|
|
@@ -2642,4 +2708,3 @@ The `HonestDiD` module implements sensitivity analysis methods for relaxing the
|
|
|
2642
2708
|
## License
|
|
2643
2709
|
|
|
2644
2710
|
MIT License
|
|
2645
|
-
|
|
@@ -101,6 +101,12 @@ from diff_diff.imputation import (
|
|
|
101
101
|
ImputationDiDResults,
|
|
102
102
|
imputation_did,
|
|
103
103
|
)
|
|
104
|
+
from diff_diff.two_stage import (
|
|
105
|
+
TwoStageBootstrapResults,
|
|
106
|
+
TwoStageDiD,
|
|
107
|
+
TwoStageDiDResults,
|
|
108
|
+
two_stage_did,
|
|
109
|
+
)
|
|
104
110
|
from diff_diff.sun_abraham import (
|
|
105
111
|
SABootstrapResults,
|
|
106
112
|
SunAbraham,
|
|
@@ -142,7 +148,7 @@ from diff_diff.datasets import (
|
|
|
142
148
|
load_mpdta,
|
|
143
149
|
)
|
|
144
150
|
|
|
145
|
-
__version__ = "2.
|
|
151
|
+
__version__ = "2.4.0"
|
|
146
152
|
__all__ = [
|
|
147
153
|
# Estimators
|
|
148
154
|
"DifferenceInDifferences",
|
|
@@ -152,6 +158,7 @@ __all__ = [
|
|
|
152
158
|
"CallawaySantAnna",
|
|
153
159
|
"SunAbraham",
|
|
154
160
|
"ImputationDiD",
|
|
161
|
+
"TwoStageDiD",
|
|
155
162
|
"TripleDifference",
|
|
156
163
|
"TROP",
|
|
157
164
|
# Bacon Decomposition
|
|
@@ -173,6 +180,9 @@ __all__ = [
|
|
|
173
180
|
"ImputationDiDResults",
|
|
174
181
|
"ImputationBootstrapResults",
|
|
175
182
|
"imputation_did",
|
|
183
|
+
"TwoStageDiDResults",
|
|
184
|
+
"TwoStageBootstrapResults",
|
|
185
|
+
"two_stage_did",
|
|
176
186
|
"TripleDifferenceResults",
|
|
177
187
|
"triple_difference",
|
|
178
188
|
"TROPResults",
|
|
@@ -415,6 +415,7 @@ class CallawaySantAnna(
|
|
|
415
415
|
cohort_masks[g] = (unit_cohorts == g)
|
|
416
416
|
|
|
417
417
|
# Never-treated mask
|
|
418
|
+
# np.inf was normalized to 0 in fit(), so the np.inf check is defensive only
|
|
418
419
|
never_treated_mask = (unit_cohorts == 0) | (unit_cohorts == np.inf)
|
|
419
420
|
|
|
420
421
|
# Pre-compute covariate matrices by time period if needed
|
|
@@ -639,13 +640,15 @@ class CallawaySantAnna(
|
|
|
639
640
|
# This avoids hardcoding column names in internal methods
|
|
640
641
|
df['first_treat'] = df[first_treat]
|
|
641
642
|
|
|
643
|
+
# Never-treated indicator (must precede treatment_groups to exclude np.inf)
|
|
644
|
+
df['_never_treated'] = (df[first_treat] == 0) | (df[first_treat] == np.inf)
|
|
645
|
+
# Normalize np.inf → 0 so all downstream `> 0` checks exclude never-treated
|
|
646
|
+
df.loc[df[first_treat] == np.inf, first_treat] = 0
|
|
647
|
+
|
|
642
648
|
# Identify groups and time periods
|
|
643
649
|
time_periods = sorted(df[time].unique())
|
|
644
650
|
treatment_groups = sorted([g for g in df[first_treat].unique() if g > 0])
|
|
645
651
|
|
|
646
|
-
# Never-treated indicator (first_treat = 0 or inf)
|
|
647
|
-
df['_never_treated'] = (df[first_treat] == 0) | (df[first_treat] == np.inf)
|
|
648
|
-
|
|
649
652
|
# Get unique units
|
|
650
653
|
unit_info = df.groupby(unit).agg({
|
|
651
654
|
first_treat: 'first',
|
|
@@ -456,9 +456,9 @@ class SunAbraham:
|
|
|
456
456
|
covariates : list, optional
|
|
457
457
|
List of covariate column names to include in regression.
|
|
458
458
|
min_pre_periods : int, default=1
|
|
459
|
-
|
|
459
|
+
**Deprecated**: Accepted but ignored. Will be removed in a future version.
|
|
460
460
|
min_post_periods : int, default=1
|
|
461
|
-
|
|
461
|
+
**Deprecated**: Accepted but ignored. Will be removed in a future version.
|
|
462
462
|
|
|
463
463
|
Returns
|
|
464
464
|
-------
|
|
@@ -470,6 +470,22 @@ class SunAbraham:
|
|
|
470
470
|
ValueError
|
|
471
471
|
If required columns are missing or data validation fails.
|
|
472
472
|
"""
|
|
473
|
+
# Deprecation warnings for unimplemented parameters
|
|
474
|
+
if min_pre_periods != 1:
|
|
475
|
+
warnings.warn(
|
|
476
|
+
"min_pre_periods is not yet implemented and will be ignored. "
|
|
477
|
+
"This parameter will be removed in a future version.",
|
|
478
|
+
FutureWarning,
|
|
479
|
+
stacklevel=2,
|
|
480
|
+
)
|
|
481
|
+
if min_post_periods != 1:
|
|
482
|
+
warnings.warn(
|
|
483
|
+
"min_post_periods is not yet implemented and will be ignored. "
|
|
484
|
+
"This parameter will be removed in a future version.",
|
|
485
|
+
FutureWarning,
|
|
486
|
+
stacklevel=2,
|
|
487
|
+
)
|
|
488
|
+
|
|
473
489
|
# Validate inputs
|
|
474
490
|
required_cols = [outcome, unit, time, first_treat]
|
|
475
491
|
if covariates:
|
|
@@ -486,13 +502,15 @@ class SunAbraham:
|
|
|
486
502
|
df[time] = pd.to_numeric(df[time])
|
|
487
503
|
df[first_treat] = pd.to_numeric(df[first_treat])
|
|
488
504
|
|
|
505
|
+
# Never-treated indicator (must precede treatment_groups to exclude np.inf)
|
|
506
|
+
df["_never_treated"] = (df[first_treat] == 0) | (df[first_treat] == np.inf)
|
|
507
|
+
# Normalize np.inf → 0 so all downstream `> 0` checks exclude never-treated
|
|
508
|
+
df.loc[df[first_treat] == np.inf, first_treat] = 0
|
|
509
|
+
|
|
489
510
|
# Identify groups and time periods
|
|
490
511
|
time_periods = sorted(df[time].unique())
|
|
491
512
|
treatment_groups = sorted([g for g in df[first_treat].unique() if g > 0])
|
|
492
513
|
|
|
493
|
-
# Never-treated indicator
|
|
494
|
-
df["_never_treated"] = (df[first_treat] == 0) | (df[first_treat] == np.inf)
|
|
495
|
-
|
|
496
514
|
# Get unique units
|
|
497
515
|
unit_info = (
|
|
498
516
|
df.groupby(unit)
|
|
@@ -533,9 +551,9 @@ class SunAbraham:
|
|
|
533
551
|
|
|
534
552
|
all_rel_times_sorted = sorted(all_rel_times)
|
|
535
553
|
|
|
536
|
-
#
|
|
537
|
-
min_rel =
|
|
538
|
-
max_rel =
|
|
554
|
+
# Use full range of relative times (no artificial truncation, matches R's fixest::sunab())
|
|
555
|
+
min_rel = min(all_rel_times_sorted)
|
|
556
|
+
max_rel = max(all_rel_times_sorted)
|
|
539
557
|
|
|
540
558
|
# Reference period: last pre-treatment period (typically -1)
|
|
541
559
|
self._reference_period = -1 - self.anticipation
|
|
@@ -765,12 +783,18 @@ class SunAbraham:
|
|
|
765
783
|
|
|
766
784
|
# Fit OLS using LinearRegression helper (more stable than manual X'X inverse)
|
|
767
785
|
cluster_ids = df_demeaned[cluster_var].values
|
|
786
|
+
|
|
787
|
+
# Degrees of freedom adjustment for absorbed unit and time fixed effects
|
|
788
|
+
n_units_fe = df[unit].nunique()
|
|
789
|
+
n_times_fe = df[time].nunique()
|
|
790
|
+
df_adj = n_units_fe + n_times_fe - 1
|
|
791
|
+
|
|
768
792
|
reg = LinearRegression(
|
|
769
793
|
include_intercept=False, # Already demeaned, no intercept needed
|
|
770
794
|
robust=True,
|
|
771
795
|
cluster_ids=cluster_ids,
|
|
772
796
|
rank_deficient_action=self.rank_deficient_action,
|
|
773
|
-
).fit(X, y)
|
|
797
|
+
).fit(X, y, df_adjustment=df_adj)
|
|
774
798
|
|
|
775
799
|
coefficients = reg.coefficients_
|
|
776
800
|
vcov = reg.vcov_
|
|
@@ -821,7 +845,8 @@ class SunAbraham:
|
|
|
821
845
|
|
|
822
846
|
β_e = Σ_g w_{g,e} × δ_{g,e}
|
|
823
847
|
|
|
824
|
-
where w_{g,e}
|
|
848
|
+
where w_{g,e} = n_{g,e} / Σ_g n_{g,e} is the share of observations from cohort g
|
|
849
|
+
at event-time e among all treated observations at that event-time.
|
|
825
850
|
|
|
826
851
|
Returns
|
|
827
852
|
-------
|
|
@@ -833,9 +858,8 @@ class SunAbraham:
|
|
|
833
858
|
event_study_effects: Dict[int, Dict[str, Any]] = {}
|
|
834
859
|
cohort_weights: Dict[int, Dict[Any, float]] = {}
|
|
835
860
|
|
|
836
|
-
#
|
|
837
|
-
|
|
838
|
-
cohort_sizes = unit_cohorts[unit_cohorts > 0].value_counts().to_dict()
|
|
861
|
+
# Pre-compute per-event-time observation counts: n_{g,e}
|
|
862
|
+
event_time_counts = df[df[first_treat] > 0].groupby([first_treat, "_rel_time"]).size()
|
|
839
863
|
|
|
840
864
|
for e in rel_periods:
|
|
841
865
|
# Get cohorts that have observations at this relative time
|
|
@@ -847,13 +871,13 @@ class SunAbraham:
|
|
|
847
871
|
if not cohorts_at_e:
|
|
848
872
|
continue
|
|
849
873
|
|
|
850
|
-
# Compute IW weights:
|
|
874
|
+
# Compute IW weights: n_{g,e} / Σ_g n_{g,e}
|
|
851
875
|
weights = {}
|
|
852
876
|
total_size = 0
|
|
853
877
|
for g in cohorts_at_e:
|
|
854
|
-
|
|
855
|
-
weights[g] =
|
|
856
|
-
total_size +=
|
|
878
|
+
n_g_e = event_time_counts.get((g, e), 0)
|
|
879
|
+
weights[g] = n_g_e
|
|
880
|
+
total_size += n_g_e
|
|
857
881
|
|
|
858
882
|
if total_size == 0:
|
|
859
883
|
continue
|
|
@@ -915,7 +939,7 @@ class SunAbraham:
|
|
|
915
939
|
]
|
|
916
940
|
|
|
917
941
|
if not post_effects:
|
|
918
|
-
return
|
|
942
|
+
return np.nan, np.nan
|
|
919
943
|
|
|
920
944
|
# Weight by number of treated observations at each relative time
|
|
921
945
|
post_weights = []
|
|
@@ -948,7 +972,13 @@ class SunAbraham:
|
|
|
948
972
|
overall_weights_by_coef[key] += period_weight * cw
|
|
949
973
|
|
|
950
974
|
if not overall_weights_by_coef:
|
|
951
|
-
# Fallback to
|
|
975
|
+
# Fallback to simplified variance that ignores covariances between periods
|
|
976
|
+
warnings.warn(
|
|
977
|
+
"Could not construct full weight vector for overall ATT SE. "
|
|
978
|
+
"Using simplified variance that ignores covariances between periods.",
|
|
979
|
+
UserWarning,
|
|
980
|
+
stacklevel=2,
|
|
981
|
+
)
|
|
952
982
|
overall_var = float(
|
|
953
983
|
np.sum((post_weights ** 2) * np.array([eff["se"] ** 2 for _, eff in post_effects]))
|
|
954
984
|
)
|
|
@@ -1029,6 +1059,7 @@ class SunAbraham:
|
|
|
1029
1059
|
df_b[time] - df_b[first_treat],
|
|
1030
1060
|
np.nan
|
|
1031
1061
|
)
|
|
1062
|
+
# np.inf was normalized to 0 in fit(), so the np.inf check is defensive only
|
|
1032
1063
|
df_b["_never_treated"] = (
|
|
1033
1064
|
(df_b[first_treat] == 0) | (df_b[first_treat] == np.inf)
|
|
1034
1065
|
)
|
|
@@ -1113,11 +1144,16 @@ class SunAbraham:
|
|
|
1113
1144
|
event_study_p_values[e] = p_value
|
|
1114
1145
|
|
|
1115
1146
|
# Overall ATT statistics
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1147
|
+
if not np.isfinite(original_overall_att):
|
|
1148
|
+
overall_se = np.nan
|
|
1149
|
+
overall_ci = (np.nan, np.nan)
|
|
1150
|
+
overall_p = np.nan
|
|
1151
|
+
else:
|
|
1152
|
+
overall_se = float(np.std(bootstrap_overall, ddof=1))
|
|
1153
|
+
overall_ci = self._compute_percentile_ci(bootstrap_overall, self.alpha)
|
|
1154
|
+
overall_p = self._compute_bootstrap_pvalue(
|
|
1155
|
+
original_overall_att, bootstrap_overall
|
|
1156
|
+
)
|
|
1121
1157
|
|
|
1122
1158
|
return SABootstrapResults(
|
|
1123
1159
|
n_bootstrap=self.n_bootstrap,
|