diff-diff 2.2.1__tar.gz → 2.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {diff_diff-2.2.1 → diff_diff-2.3.1}/PKG-INFO +125 -12
- {diff_diff-2.2.1 → diff_diff-2.3.1}/README.md +123 -11
- {diff_diff-2.2.1 → diff_diff-2.3.1}/diff_diff/__init__.py +11 -1
- {diff_diff-2.2.1 → diff_diff-2.3.1}/diff_diff/_backend.py +20 -0
- diff_diff-2.3.1/diff_diff/imputation.py +2480 -0
- {diff_diff-2.2.1 → diff_diff-2.3.1}/diff_diff/results.py +13 -5
- {diff_diff-2.2.1 → diff_diff-2.3.1}/diff_diff/synthetic_did.py +246 -126
- {diff_diff-2.2.1 → diff_diff-2.3.1}/diff_diff/twfe.py +39 -8
- {diff_diff-2.2.1 → diff_diff-2.3.1}/diff_diff/utils.py +393 -32
- {diff_diff-2.2.1 → diff_diff-2.3.1}/diff_diff/visualization.py +4 -2
- {diff_diff-2.2.1 → diff_diff-2.3.1}/pyproject.toml +2 -1
- {diff_diff-2.2.1 → diff_diff-2.3.1}/rust/Cargo.lock +20 -20
- {diff_diff-2.2.1 → diff_diff-2.3.1}/rust/Cargo.toml +1 -1
- {diff_diff-2.2.1 → diff_diff-2.3.1}/rust/src/lib.rs +7 -1
- diff_diff-2.3.1/rust/src/weights.rs +713 -0
- diff_diff-2.2.1/rust/src/weights.rs +0 -220
- {diff_diff-2.2.1 → diff_diff-2.3.1}/diff_diff/bacon.py +0 -0
- {diff_diff-2.2.1 → diff_diff-2.3.1}/diff_diff/datasets.py +0 -0
- {diff_diff-2.2.1 → diff_diff-2.3.1}/diff_diff/diagnostics.py +0 -0
- {diff_diff-2.2.1 → diff_diff-2.3.1}/diff_diff/estimators.py +0 -0
- {diff_diff-2.2.1 → diff_diff-2.3.1}/diff_diff/honest_did.py +0 -0
- {diff_diff-2.2.1 → diff_diff-2.3.1}/diff_diff/linalg.py +0 -0
- {diff_diff-2.2.1 → diff_diff-2.3.1}/diff_diff/power.py +0 -0
- {diff_diff-2.2.1 → diff_diff-2.3.1}/diff_diff/prep.py +0 -0
- {diff_diff-2.2.1 → diff_diff-2.3.1}/diff_diff/prep_dgp.py +0 -0
- {diff_diff-2.2.1 → diff_diff-2.3.1}/diff_diff/pretrends.py +0 -0
- {diff_diff-2.2.1 → diff_diff-2.3.1}/diff_diff/staggered.py +0 -0
- {diff_diff-2.2.1 → diff_diff-2.3.1}/diff_diff/staggered_aggregation.py +0 -0
- {diff_diff-2.2.1 → diff_diff-2.3.1}/diff_diff/staggered_bootstrap.py +0 -0
- {diff_diff-2.2.1 → diff_diff-2.3.1}/diff_diff/staggered_results.py +0 -0
- {diff_diff-2.2.1 → diff_diff-2.3.1}/diff_diff/sun_abraham.py +0 -0
- {diff_diff-2.2.1 → diff_diff-2.3.1}/diff_diff/triple_diff.py +0 -0
- {diff_diff-2.2.1 → diff_diff-2.3.1}/diff_diff/trop.py +0 -0
- {diff_diff-2.2.1 → diff_diff-2.3.1}/rust/src/bootstrap.rs +0 -0
- {diff_diff-2.2.1 → diff_diff-2.3.1}/rust/src/linalg.rs +0 -0
- {diff_diff-2.2.1 → diff_diff-2.3.1}/rust/src/trop.rs +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: diff-diff
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.3.1
|
|
4
4
|
Classifier: Development Status :: 5 - Production/Stable
|
|
5
5
|
Classifier: Intended Audience :: Science/Research
|
|
6
6
|
Classifier: Operating System :: OS Independent
|
|
@@ -14,6 +14,7 @@ Requires-Dist: numpy>=1.20.0
|
|
|
14
14
|
Requires-Dist: pandas>=1.3.0
|
|
15
15
|
Requires-Dist: scipy>=1.7.0
|
|
16
16
|
Requires-Dist: pytest>=7.0 ; extra == 'dev'
|
|
17
|
+
Requires-Dist: pytest-xdist>=3.0 ; extra == 'dev'
|
|
17
18
|
Requires-Dist: pytest-cov>=4.0 ; extra == 'dev'
|
|
18
19
|
Requires-Dist: black>=23.0 ; extra == 'dev'
|
|
19
20
|
Requires-Dist: ruff>=0.1.0 ; extra == 'dev'
|
|
@@ -106,7 +107,7 @@ Signif. codes: '***' 0.001, '**' 0.01, '*' 0.05, '.' 0.1
|
|
|
106
107
|
- **Wild cluster bootstrap**: Valid inference with few clusters (<50) using Rademacher, Webb, or Mammen weights
|
|
107
108
|
- **Panel data support**: Two-way fixed effects estimator for panel designs
|
|
108
109
|
- **Multi-period analysis**: Event-study style DiD with period-specific treatment effects
|
|
109
|
-
- **Staggered adoption**: Callaway-Sant'Anna (2021)
|
|
110
|
+
- **Staggered adoption**: Callaway-Sant'Anna (2021), Sun-Abraham (2021), and Borusyak-Jaravel-Spiess (2024) imputation estimators for heterogeneous treatment timing
|
|
110
111
|
- **Triple Difference (DDD)**: Ortiz-Villavicencio & Sant'Anna (2025) estimators with proper covariate handling
|
|
111
112
|
- **Synthetic DiD**: Combined DiD with synthetic control for improved robustness
|
|
112
113
|
- **Triply Robust Panel (TROP)**: Factor-adjusted DiD with synthetic weights (Athey et al. 2025)
|
|
@@ -915,6 +916,54 @@ print(f"Sun-Abraham ATT: {sa_results.overall_att:.3f}")
|
|
|
915
916
|
# If results differ substantially, investigate heterogeneity
|
|
916
917
|
```
|
|
917
918
|
|
|
919
|
+
### Borusyak-Jaravel-Spiess Imputation Estimator
|
|
920
|
+
|
|
921
|
+
The Borusyak et al. (2024) imputation estimator is the **efficient** estimator for staggered DiD under parallel trends, producing ~50% shorter confidence intervals than Callaway-Sant'Anna and 2-3.5x shorter than Sun-Abraham under homogeneous treatment effects.
|
|
922
|
+
|
|
923
|
+
```python
|
|
924
|
+
from diff_diff import ImputationDiD, imputation_did
|
|
925
|
+
|
|
926
|
+
# Basic usage
|
|
927
|
+
est = ImputationDiD()
|
|
928
|
+
results = est.fit(data, outcome='outcome', unit='unit',
|
|
929
|
+
time='period', first_treat='first_treat')
|
|
930
|
+
results.print_summary()
|
|
931
|
+
|
|
932
|
+
# Event study
|
|
933
|
+
results = est.fit(data, outcome='outcome', unit='unit',
|
|
934
|
+
time='period', first_treat='first_treat',
|
|
935
|
+
aggregate='event_study')
|
|
936
|
+
|
|
937
|
+
# Pre-trend test (Equation 9)
|
|
938
|
+
pt = results.pretrend_test(n_leads=3)
|
|
939
|
+
print(f"F-stat: {pt['f_stat']:.3f}, p-value: {pt['p_value']:.4f}")
|
|
940
|
+
|
|
941
|
+
# Convenience function
|
|
942
|
+
results = imputation_did(data, 'outcome', 'unit', 'period', 'first_treat',
|
|
943
|
+
aggregate='all')
|
|
944
|
+
```
|
|
945
|
+
|
|
946
|
+
```python
|
|
947
|
+
ImputationDiD(
|
|
948
|
+
anticipation=0, # Number of anticipation periods
|
|
949
|
+
alpha=0.05, # Significance level
|
|
950
|
+
cluster=None, # Cluster variable (defaults to unit)
|
|
951
|
+
n_bootstrap=0, # Bootstrap iterations (0=analytical inference)
|
|
952
|
+
seed=None, # Random seed
|
|
953
|
+
horizon_max=None, # Max event-study horizon
|
|
954
|
+
aux_partition="cohort_horizon", # Variance partition: "cohort_horizon", "cohort", "horizon"
|
|
955
|
+
)
|
|
956
|
+
```
|
|
957
|
+
|
|
958
|
+
**When to use Imputation DiD vs Callaway-Sant'Anna:**
|
|
959
|
+
|
|
960
|
+
| Aspect | Imputation DiD | Callaway-Sant'Anna |
|
|
961
|
+
|--------|---------------|-------------------|
|
|
962
|
+
| Efficiency | Most efficient under homogeneous effects | Less efficient but more robust to heterogeneity |
|
|
963
|
+
| Control group | Always uses all untreated obs | Choice of never-treated or not-yet-treated |
|
|
964
|
+
| Inference | Conservative variance (Theorem 3) | Multiplier bootstrap |
|
|
965
|
+
| Pre-trends | Built-in F-test (Equation 9) | Separate testing |
|
|
966
|
+
|
|
918
967
|
### Triple Difference (DDD)
|
|
919
968
|
|
|
920
969
|
Triple Difference (DDD) is used when treatment requires satisfying two criteria: belonging to a treated **group** AND being in an eligible **partition**. The `TripleDifference` class implements the methodology from Ortiz-Villavicencio & Sant'Anna (2025), which correctly handles covariate adjustment (unlike naive implementations).
|
|
@@ -1151,11 +1200,12 @@ Use Synthetic DiD instead of standard DiD when:
|
|
|
1151
1200
|
|
|
1152
1201
|
```python
|
|
1153
1202
|
SyntheticDiD(
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
alpha=0.05,
|
|
1157
|
-
|
|
1158
|
-
|
|
1203
|
+
zeta_omega=None, # Unit weight regularization (None = auto-computed from data)
|
|
1204
|
+
zeta_lambda=None, # Time weight regularization (None = auto-computed from data)
|
|
1205
|
+
alpha=0.05, # Significance level
|
|
1206
|
+
variance_method="placebo", # "placebo" (default, matches R) or "bootstrap"
|
|
1207
|
+
n_bootstrap=200, # Replications for SE estimation
|
|
1208
|
+
seed=None # Random seed for reproducibility
|
|
1159
1209
|
)
|
|
1160
1210
|
```
|
|
1161
1211
|
|
|
@@ -1860,11 +1910,12 @@ MultiPeriodDiD(
|
|
|
1860
1910
|
|
|
1861
1911
|
```python
|
|
1862
1912
|
SyntheticDiD(
|
|
1863
|
-
|
|
1864
|
-
|
|
1865
|
-
alpha=0.05,
|
|
1866
|
-
|
|
1867
|
-
|
|
1913
|
+
zeta_omega=None, # Unit weight regularization (None = auto from data)
|
|
1914
|
+
zeta_lambda=None, # Time weight regularization (None = auto from data)
|
|
1915
|
+
alpha=0.05, # Significance level for CIs
|
|
1916
|
+
variance_method="placebo", # "placebo" (R default) or "bootstrap"
|
|
1917
|
+
n_bootstrap=200, # Replications for SE estimation
|
|
1918
|
+
seed=None # Random seed for reproducibility
|
|
1868
1919
|
)
|
|
1869
1920
|
```
|
|
1870
1921
|
|
|
@@ -2036,6 +2087,60 @@ SunAbraham(
|
|
|
2036
2087
|
| `print_summary(alpha)` | Print summary to stdout |
|
|
2037
2088
|
| `to_dataframe(level)` | Convert to DataFrame ('event_study' or 'cohort') |
|
|
2038
2089
|
|
|
2090
|
+
### ImputationDiD
|
|
2091
|
+
|
|
2092
|
+
```python
|
|
2093
|
+
ImputationDiD(
|
|
2094
|
+
anticipation=0, # Periods of anticipation effects
|
|
2095
|
+
alpha=0.05, # Significance level for CIs
|
|
2096
|
+
cluster=None, # Column for cluster-robust SEs
|
|
2097
|
+
n_bootstrap=0, # Bootstrap iterations (0 = analytical)
|
|
2098
|
+
seed=None, # Random seed
|
|
2099
|
+
rank_deficient_action='warn', # 'warn', 'error', or 'silent'
|
|
2100
|
+
horizon_max=None, # Max event-study horizon
|
|
2101
|
+
aux_partition='cohort_horizon', # Variance partition
|
|
2102
|
+
)
|
|
2103
|
+
```
|
|
2104
|
+
|
|
2105
|
+
**fit() Parameters:**
|
|
2106
|
+
|
|
2107
|
+
| Parameter | Type | Description |
|
|
2108
|
+
|-----------|------|-------------|
|
|
2109
|
+
| `data` | DataFrame | Panel data |
|
|
2110
|
+
| `outcome` | str | Outcome variable column name |
|
|
2111
|
+
| `unit` | str | Unit identifier column |
|
|
2112
|
+
| `time` | str | Time period column |
|
|
2113
|
+
| `first_treat` | str | First treatment period column (0 for never-treated) |
|
|
2114
|
+
| `covariates` | list | Covariate column names |
|
|
2115
|
+
| `aggregate` | str | Aggregation: None, "event_study", "group", "all" |
|
|
2116
|
+
| `balance_e` | int | Balance event study to this many pre-treatment periods |
|
|
2117
|
+
|
|
2118
|
+
### ImputationDiDResults
|
|
2119
|
+
|
|
2120
|
+
**Attributes:**
|
|
2121
|
+
|
|
2122
|
+
| Attribute | Description |
|
|
2123
|
+
|-----------|-------------|
|
|
2124
|
+
| `overall_att` | Overall average treatment effect on the treated |
|
|
2125
|
+
| `overall_se` | Standard error (conservative, Theorem 3) |
|
|
2126
|
+
| `overall_t_stat` | T-statistic |
|
|
2127
|
+
| `overall_p_value` | P-value for H0: ATT = 0 |
|
|
2128
|
+
| `overall_conf_int` | Confidence interval |
|
|
2129
|
+
| `event_study_effects` | Dict of relative time -> effect dict (if `aggregate='event_study'` or `'all'`) |
|
|
2130
|
+
| `group_effects` | Dict of cohort -> effect dict (if `aggregate='group'` or `'all'`) |
|
|
2131
|
+
| `treatment_effects` | DataFrame of unit-level imputed treatment effects |
|
|
2132
|
+
| `n_treated_obs` | Number of treated observations |
|
|
2133
|
+
| `n_untreated_obs` | Number of untreated observations |
|
|
2134
|
+
|
|
2135
|
+
**Methods:**
|
|
2136
|
+
|
|
2137
|
+
| Method | Description |
|
|
2138
|
+
|--------|-------------|
|
|
2139
|
+
| `summary(alpha)` | Get formatted summary string |
|
|
2140
|
+
| `print_summary(alpha)` | Print summary to stdout |
|
|
2141
|
+
| `to_dataframe(level)` | Convert to DataFrame ('observation', 'event_study', 'group') |
|
|
2142
|
+
| `pretrend_test(n_leads)` | Run pre-trend F-test (Equation 9) |
|
|
2143
|
+
|
|
2039
2144
|
### TripleDifference
|
|
2040
2145
|
|
|
2041
2146
|
```python
|
|
@@ -2500,6 +2605,14 @@ The `HonestDiD` module implements sensitivity analysis methods for relaxing the
|
|
|
2500
2605
|
|
|
2501
2606
|
### Multi-Period and Staggered Adoption
|
|
2502
2607
|
|
|
2608
|
+
- **Borusyak, K., Jaravel, X., & Spiess, J. (2024).** "Revisiting Event-Study Designs: Robust and Efficient Estimation." *Review of Economic Studies*, 91(6), 3253-3285. [https://doi.org/10.1093/restud/rdae007](https://doi.org/10.1093/restud/rdae007)
|
|
2609
|
+
|
|
2610
|
+
This paper introduces the imputation estimator implemented in our `ImputationDiD` class:
|
|
2611
|
+
- **Efficient imputation**: OLS on untreated observations → impute counterfactuals → aggregate
|
|
2612
|
+
- **Conservative variance**: Theorem 3 clustered variance estimator with auxiliary model
|
|
2613
|
+
- **Pre-trend test**: Independent of treatment effect estimation (Proposition 9)
|
|
2614
|
+
- **Efficiency gains**: ~50% shorter CIs than Callaway-Sant'Anna under homogeneous effects
|
|
2615
|
+
|
|
2503
2616
|
- **Callaway, B., & Sant'Anna, P. H. C. (2021).** "Difference-in-Differences with Multiple Time Periods." *Journal of Econometrics*, 225(2), 200-230. [https://doi.org/10.1016/j.jeconom.2020.12.001](https://doi.org/10.1016/j.jeconom.2020.12.001)
|
|
2504
2617
|
|
|
2505
2618
|
- **Sant'Anna, P. H. C., & Zhao, J. (2020).** "Doubly Robust Difference-in-Differences Estimators." *Journal of Econometrics*, 219(1), 101-122. [https://doi.org/10.1016/j.jeconom.2020.06.003](https://doi.org/10.1016/j.jeconom.2020.06.003)
|
|
@@ -70,7 +70,7 @@ Signif. codes: '***' 0.001, '**' 0.01, '*' 0.05, '.' 0.1
|
|
|
70
70
|
- **Wild cluster bootstrap**: Valid inference with few clusters (<50) using Rademacher, Webb, or Mammen weights
|
|
71
71
|
- **Panel data support**: Two-way fixed effects estimator for panel designs
|
|
72
72
|
- **Multi-period analysis**: Event-study style DiD with period-specific treatment effects
|
|
73
|
-
- **Staggered adoption**: Callaway-Sant'Anna (2021)
|
|
73
|
+
- **Staggered adoption**: Callaway-Sant'Anna (2021), Sun-Abraham (2021), and Borusyak-Jaravel-Spiess (2024) imputation estimators for heterogeneous treatment timing
|
|
74
74
|
- **Triple Difference (DDD)**: Ortiz-Villavicencio & Sant'Anna (2025) estimators with proper covariate handling
|
|
75
75
|
- **Synthetic DiD**: Combined DiD with synthetic control for improved robustness
|
|
76
76
|
- **Triply Robust Panel (TROP)**: Factor-adjusted DiD with synthetic weights (Athey et al. 2025)
|
|
@@ -879,6 +879,54 @@ print(f"Sun-Abraham ATT: {sa_results.overall_att:.3f}")
|
|
|
879
879
|
# If results differ substantially, investigate heterogeneity
|
|
880
880
|
```
|
|
881
881
|
|
|
882
|
+
### Borusyak-Jaravel-Spiess Imputation Estimator
|
|
883
|
+
|
|
884
|
+
The Borusyak et al. (2024) imputation estimator is the **efficient** estimator for staggered DiD under parallel trends, producing ~50% shorter confidence intervals than Callaway-Sant'Anna and 2-3.5x shorter than Sun-Abraham under homogeneous treatment effects.
|
|
885
|
+
|
|
886
|
+
```python
|
|
887
|
+
from diff_diff import ImputationDiD, imputation_did
|
|
888
|
+
|
|
889
|
+
# Basic usage
|
|
890
|
+
est = ImputationDiD()
|
|
891
|
+
results = est.fit(data, outcome='outcome', unit='unit',
|
|
892
|
+
time='period', first_treat='first_treat')
|
|
893
|
+
results.print_summary()
|
|
894
|
+
|
|
895
|
+
# Event study
|
|
896
|
+
results = est.fit(data, outcome='outcome', unit='unit',
|
|
897
|
+
time='period', first_treat='first_treat',
|
|
898
|
+
aggregate='event_study')
|
|
899
|
+
|
|
900
|
+
# Pre-trend test (Equation 9)
|
|
901
|
+
pt = results.pretrend_test(n_leads=3)
|
|
902
|
+
print(f"F-stat: {pt['f_stat']:.3f}, p-value: {pt['p_value']:.4f}")
|
|
903
|
+
|
|
904
|
+
# Convenience function
|
|
905
|
+
results = imputation_did(data, 'outcome', 'unit', 'period', 'first_treat',
|
|
906
|
+
aggregate='all')
|
|
907
|
+
```
|
|
908
|
+
|
|
909
|
+
```python
|
|
910
|
+
ImputationDiD(
|
|
911
|
+
anticipation=0, # Number of anticipation periods
|
|
912
|
+
alpha=0.05, # Significance level
|
|
913
|
+
cluster=None, # Cluster variable (defaults to unit)
|
|
914
|
+
n_bootstrap=0, # Bootstrap iterations (0=analytical inference)
|
|
915
|
+
seed=None, # Random seed
|
|
916
|
+
horizon_max=None, # Max event-study horizon
|
|
917
|
+
aux_partition="cohort_horizon", # Variance partition: "cohort_horizon", "cohort", "horizon"
|
|
918
|
+
)
|
|
919
|
+
```
|
|
920
|
+
|
|
921
|
+
**When to use Imputation DiD vs Callaway-Sant'Anna:**
|
|
922
|
+
|
|
923
|
+
| Aspect | Imputation DiD | Callaway-Sant'Anna |
|
|
924
|
+
|--------|---------------|-------------------|
|
|
925
|
+
| Efficiency | Most efficient under homogeneous effects | Less efficient but more robust to heterogeneity |
|
|
926
|
+
| Control group | Always uses all untreated obs | Choice of never-treated or not-yet-treated |
|
|
927
|
+
| Inference | Conservative variance (Theorem 3) | Multiplier bootstrap |
|
|
928
|
+
| Pre-trends | Built-in F-test (Equation 9) | Separate testing |
|
|
929
|
+
|
|
882
930
|
### Triple Difference (DDD)
|
|
883
931
|
|
|
884
932
|
Triple Difference (DDD) is used when treatment requires satisfying two criteria: belonging to a treated **group** AND being in an eligible **partition**. The `TripleDifference` class implements the methodology from Ortiz-Villavicencio & Sant'Anna (2025), which correctly handles covariate adjustment (unlike naive implementations).
|
|
@@ -1115,11 +1163,12 @@ Use Synthetic DiD instead of standard DiD when:
|
|
|
1115
1163
|
|
|
1116
1164
|
```python
|
|
1117
1165
|
SyntheticDiD(
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
alpha=0.05,
|
|
1121
|
-
|
|
1122
|
-
|
|
1166
|
+
zeta_omega=None, # Unit weight regularization (None = auto-computed from data)
|
|
1167
|
+
zeta_lambda=None, # Time weight regularization (None = auto-computed from data)
|
|
1168
|
+
alpha=0.05, # Significance level
|
|
1169
|
+
variance_method="placebo", # "placebo" (default, matches R) or "bootstrap"
|
|
1170
|
+
n_bootstrap=200, # Replications for SE estimation
|
|
1171
|
+
seed=None # Random seed for reproducibility
|
|
1123
1172
|
)
|
|
1124
1173
|
```
|
|
1125
1174
|
|
|
@@ -1824,11 +1873,12 @@ MultiPeriodDiD(
|
|
|
1824
1873
|
|
|
1825
1874
|
```python
|
|
1826
1875
|
SyntheticDiD(
|
|
1827
|
-
|
|
1828
|
-
|
|
1829
|
-
alpha=0.05,
|
|
1830
|
-
|
|
1831
|
-
|
|
1876
|
+
zeta_omega=None, # Unit weight regularization (None = auto from data)
|
|
1877
|
+
zeta_lambda=None, # Time weight regularization (None = auto from data)
|
|
1878
|
+
alpha=0.05, # Significance level for CIs
|
|
1879
|
+
variance_method="placebo", # "placebo" (R default) or "bootstrap"
|
|
1880
|
+
n_bootstrap=200, # Replications for SE estimation
|
|
1881
|
+
seed=None # Random seed for reproducibility
|
|
1832
1882
|
)
|
|
1833
1883
|
```
|
|
1834
1884
|
|
|
@@ -2000,6 +2050,60 @@ SunAbraham(
|
|
|
2000
2050
|
| `print_summary(alpha)` | Print summary to stdout |
|
|
2001
2051
|
| `to_dataframe(level)` | Convert to DataFrame ('event_study' or 'cohort') |
|
|
2002
2052
|
|
|
2053
|
+
### ImputationDiD
|
|
2054
|
+
|
|
2055
|
+
```python
|
|
2056
|
+
ImputationDiD(
|
|
2057
|
+
anticipation=0, # Periods of anticipation effects
|
|
2058
|
+
alpha=0.05, # Significance level for CIs
|
|
2059
|
+
cluster=None, # Column for cluster-robust SEs
|
|
2060
|
+
n_bootstrap=0, # Bootstrap iterations (0 = analytical)
|
|
2061
|
+
seed=None, # Random seed
|
|
2062
|
+
rank_deficient_action='warn', # 'warn', 'error', or 'silent'
|
|
2063
|
+
horizon_max=None, # Max event-study horizon
|
|
2064
|
+
aux_partition='cohort_horizon', # Variance partition
|
|
2065
|
+
)
|
|
2066
|
+
```
|
|
2067
|
+
|
|
2068
|
+
**fit() Parameters:**
|
|
2069
|
+
|
|
2070
|
+
| Parameter | Type | Description |
|
|
2071
|
+
|-----------|------|-------------|
|
|
2072
|
+
| `data` | DataFrame | Panel data |
|
|
2073
|
+
| `outcome` | str | Outcome variable column name |
|
|
2074
|
+
| `unit` | str | Unit identifier column |
|
|
2075
|
+
| `time` | str | Time period column |
|
|
2076
|
+
| `first_treat` | str | First treatment period column (0 for never-treated) |
|
|
2077
|
+
| `covariates` | list | Covariate column names |
|
|
2078
|
+
| `aggregate` | str | Aggregation: None, "event_study", "group", "all" |
|
|
2079
|
+
| `balance_e` | int | Balance event study to this many pre-treatment periods |
|
|
2080
|
+
|
|
2081
|
+
### ImputationDiDResults
|
|
2082
|
+
|
|
2083
|
+
**Attributes:**
|
|
2084
|
+
|
|
2085
|
+
| Attribute | Description |
|
|
2086
|
+
|-----------|-------------|
|
|
2087
|
+
| `overall_att` | Overall average treatment effect on the treated |
|
|
2088
|
+
| `overall_se` | Standard error (conservative, Theorem 3) |
|
|
2089
|
+
| `overall_t_stat` | T-statistic |
|
|
2090
|
+
| `overall_p_value` | P-value for H0: ATT = 0 |
|
|
2091
|
+
| `overall_conf_int` | Confidence interval |
|
|
2092
|
+
| `event_study_effects` | Dict of relative time -> effect dict (if `aggregate='event_study'` or `'all'`) |
|
|
2093
|
+
| `group_effects` | Dict of cohort -> effect dict (if `aggregate='group'` or `'all'`) |
|
|
2094
|
+
| `treatment_effects` | DataFrame of unit-level imputed treatment effects |
|
|
2095
|
+
| `n_treated_obs` | Number of treated observations |
|
|
2096
|
+
| `n_untreated_obs` | Number of untreated observations |
|
|
2097
|
+
|
|
2098
|
+
**Methods:**
|
|
2099
|
+
|
|
2100
|
+
| Method | Description |
|
|
2101
|
+
|--------|-------------|
|
|
2102
|
+
| `summary(alpha)` | Get formatted summary string |
|
|
2103
|
+
| `print_summary(alpha)` | Print summary to stdout |
|
|
2104
|
+
| `to_dataframe(level)` | Convert to DataFrame ('observation', 'event_study', 'group') |
|
|
2105
|
+
| `pretrend_test(n_leads)` | Run pre-trend F-test (Equation 9) |
|
|
2106
|
+
|
|
2003
2107
|
### TripleDifference
|
|
2004
2108
|
|
|
2005
2109
|
```python
|
|
@@ -2464,6 +2568,14 @@ The `HonestDiD` module implements sensitivity analysis methods for relaxing the
|
|
|
2464
2568
|
|
|
2465
2569
|
### Multi-Period and Staggered Adoption
|
|
2466
2570
|
|
|
2571
|
+
- **Borusyak, K., Jaravel, X., & Spiess, J. (2024).** "Revisiting Event-Study Designs: Robust and Efficient Estimation." *Review of Economic Studies*, 91(6), 3253-3285. [https://doi.org/10.1093/restud/rdae007](https://doi.org/10.1093/restud/rdae007)
|
|
2572
|
+
|
|
2573
|
+
This paper introduces the imputation estimator implemented in our `ImputationDiD` class:
|
|
2574
|
+
- **Efficient imputation**: OLS on untreated observations → impute counterfactuals → aggregate
|
|
2575
|
+
- **Conservative variance**: Theorem 3 clustered variance estimator with auxiliary model
|
|
2576
|
+
- **Pre-trend test**: Independent of treatment effect estimation (Proposition 9)
|
|
2577
|
+
- **Efficiency gains**: ~50% shorter CIs than Callaway-Sant'Anna under homogeneous effects
|
|
2578
|
+
|
|
2467
2579
|
- **Callaway, B., & Sant'Anna, P. H. C. (2021).** "Difference-in-Differences with Multiple Time Periods." *Journal of Econometrics*, 225(2), 200-230. [https://doi.org/10.1016/j.jeconom.2020.12.001](https://doi.org/10.1016/j.jeconom.2020.12.001)
|
|
2468
2580
|
|
|
2469
2581
|
- **Sant'Anna, P. H. C., & Zhao, J. (2020).** "Doubly Robust Difference-in-Differences Estimators." *Journal of Econometrics*, 219(1), 101-122. [https://doi.org/10.1016/j.jeconom.2020.06.003](https://doi.org/10.1016/j.jeconom.2020.06.003)
|
|
@@ -95,6 +95,12 @@ from diff_diff.staggered import (
|
|
|
95
95
|
CSBootstrapResults,
|
|
96
96
|
GroupTimeEffect,
|
|
97
97
|
)
|
|
98
|
+
from diff_diff.imputation import (
|
|
99
|
+
ImputationBootstrapResults,
|
|
100
|
+
ImputationDiD,
|
|
101
|
+
ImputationDiDResults,
|
|
102
|
+
imputation_did,
|
|
103
|
+
)
|
|
98
104
|
from diff_diff.sun_abraham import (
|
|
99
105
|
SABootstrapResults,
|
|
100
106
|
SunAbraham,
|
|
@@ -136,7 +142,7 @@ from diff_diff.datasets import (
|
|
|
136
142
|
load_mpdta,
|
|
137
143
|
)
|
|
138
144
|
|
|
139
|
-
__version__ = "2.
|
|
145
|
+
__version__ = "2.3.1"
|
|
140
146
|
__all__ = [
|
|
141
147
|
# Estimators
|
|
142
148
|
"DifferenceInDifferences",
|
|
@@ -145,6 +151,7 @@ __all__ = [
|
|
|
145
151
|
"SyntheticDiD",
|
|
146
152
|
"CallawaySantAnna",
|
|
147
153
|
"SunAbraham",
|
|
154
|
+
"ImputationDiD",
|
|
148
155
|
"TripleDifference",
|
|
149
156
|
"TROP",
|
|
150
157
|
# Bacon Decomposition
|
|
@@ -163,6 +170,9 @@ __all__ = [
|
|
|
163
170
|
"GroupTimeEffect",
|
|
164
171
|
"SunAbrahamResults",
|
|
165
172
|
"SABootstrapResults",
|
|
173
|
+
"ImputationDiDResults",
|
|
174
|
+
"ImputationBootstrapResults",
|
|
175
|
+
"imputation_did",
|
|
166
176
|
"TripleDifferenceResults",
|
|
167
177
|
"triple_difference",
|
|
168
178
|
"TROPResults",
|
|
@@ -30,6 +30,11 @@ try:
|
|
|
30
30
|
# TROP estimator acceleration (joint method)
|
|
31
31
|
loocv_grid_search_joint as _rust_loocv_grid_search_joint,
|
|
32
32
|
bootstrap_trop_variance_joint as _rust_bootstrap_trop_variance_joint,
|
|
33
|
+
# SDID weights (Frank-Wolfe matching R's synthdid)
|
|
34
|
+
compute_sdid_unit_weights as _rust_sdid_unit_weights,
|
|
35
|
+
compute_time_weights as _rust_compute_time_weights,
|
|
36
|
+
compute_noise_level as _rust_compute_noise_level,
|
|
37
|
+
sc_weight_fw as _rust_sc_weight_fw,
|
|
33
38
|
)
|
|
34
39
|
_rust_available = True
|
|
35
40
|
except ImportError:
|
|
@@ -46,6 +51,11 @@ except ImportError:
|
|
|
46
51
|
# TROP estimator acceleration (joint method)
|
|
47
52
|
_rust_loocv_grid_search_joint = None
|
|
48
53
|
_rust_bootstrap_trop_variance_joint = None
|
|
54
|
+
# SDID weights (Frank-Wolfe matching R's synthdid)
|
|
55
|
+
_rust_sdid_unit_weights = None
|
|
56
|
+
_rust_compute_time_weights = None
|
|
57
|
+
_rust_compute_noise_level = None
|
|
58
|
+
_rust_sc_weight_fw = None
|
|
49
59
|
|
|
50
60
|
# Determine final backend based on environment variable and availability
|
|
51
61
|
if _backend_env == 'python':
|
|
@@ -63,6 +73,11 @@ if _backend_env == 'python':
|
|
|
63
73
|
# TROP estimator acceleration (joint method)
|
|
64
74
|
_rust_loocv_grid_search_joint = None
|
|
65
75
|
_rust_bootstrap_trop_variance_joint = None
|
|
76
|
+
# SDID weights (Frank-Wolfe matching R's synthdid)
|
|
77
|
+
_rust_sdid_unit_weights = None
|
|
78
|
+
_rust_compute_time_weights = None
|
|
79
|
+
_rust_compute_noise_level = None
|
|
80
|
+
_rust_sc_weight_fw = None
|
|
66
81
|
elif _backend_env == 'rust':
|
|
67
82
|
# Force Rust mode - fail if not available
|
|
68
83
|
if not _rust_available:
|
|
@@ -89,4 +104,9 @@ __all__ = [
|
|
|
89
104
|
# TROP estimator acceleration (joint method)
|
|
90
105
|
'_rust_loocv_grid_search_joint',
|
|
91
106
|
'_rust_bootstrap_trop_variance_joint',
|
|
107
|
+
# SDID weights (Frank-Wolfe matching R's synthdid)
|
|
108
|
+
'_rust_sdid_unit_weights',
|
|
109
|
+
'_rust_compute_time_weights',
|
|
110
|
+
'_rust_compute_noise_level',
|
|
111
|
+
'_rust_sc_weight_fw',
|
|
92
112
|
]
|