diff-diff 2.3.2__tar.gz → 2.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. {diff_diff-2.3.2 → diff_diff-2.4.1}/PKG-INFO +105 -2
  2. {diff_diff-2.3.2 → diff_diff-2.4.1}/README.md +104 -1
  3. {diff_diff-2.3.2 → diff_diff-2.4.1}/diff_diff/__init__.py +11 -1
  4. {diff_diff-2.3.2 → diff_diff-2.4.1}/diff_diff/diagnostics.py +4 -10
  5. {diff_diff-2.3.2 → diff_diff-2.4.1}/diff_diff/estimators.py +5 -19
  6. {diff_diff-2.3.2 → diff_diff-2.4.1}/diff_diff/imputation.py +18 -739
  7. diff_diff-2.4.1/diff_diff/imputation_bootstrap.py +310 -0
  8. diff_diff-2.4.1/diff_diff/imputation_results.py +426 -0
  9. {diff_diff-2.3.2 → diff_diff-2.4.1}/diff_diff/linalg.py +6 -6
  10. {diff_diff-2.3.2 → diff_diff-2.4.1}/diff_diff/staggered.py +16 -30
  11. {diff_diff-2.3.2 → diff_diff-2.4.1}/diff_diff/staggered_aggregation.py +3 -10
  12. {diff_diff-2.3.2 → diff_diff-2.4.1}/diff_diff/sun_abraham.py +7 -12
  13. {diff_diff-2.3.2 → diff_diff-2.4.1}/diff_diff/synthetic_did.py +8 -19
  14. {diff_diff-2.3.2 → diff_diff-2.4.1}/diff_diff/triple_diff.py +6 -11
  15. {diff_diff-2.3.2 → diff_diff-2.4.1}/diff_diff/trop.py +12 -325
  16. diff_diff-2.4.1/diff_diff/trop_results.py +322 -0
  17. diff_diff-2.4.1/diff_diff/two_stage.py +1398 -0
  18. diff_diff-2.4.1/diff_diff/two_stage_bootstrap.py +449 -0
  19. diff_diff-2.4.1/diff_diff/two_stage_results.py +379 -0
  20. {diff_diff-2.3.2 → diff_diff-2.4.1}/diff_diff/utils.py +36 -3
  21. {diff_diff-2.3.2 → diff_diff-2.4.1}/diff_diff/visualization.py +2 -0
  22. {diff_diff-2.3.2 → diff_diff-2.4.1}/pyproject.toml +1 -1
  23. {diff_diff-2.3.2 → diff_diff-2.4.1}/rust/Cargo.lock +1 -1
  24. {diff_diff-2.3.2 → diff_diff-2.4.1}/rust/Cargo.toml +1 -1
  25. {diff_diff-2.3.2 → diff_diff-2.4.1}/diff_diff/_backend.py +0 -0
  26. {diff_diff-2.3.2 → diff_diff-2.4.1}/diff_diff/bacon.py +0 -0
  27. {diff_diff-2.3.2 → diff_diff-2.4.1}/diff_diff/datasets.py +0 -0
  28. {diff_diff-2.3.2 → diff_diff-2.4.1}/diff_diff/honest_did.py +0 -0
  29. {diff_diff-2.3.2 → diff_diff-2.4.1}/diff_diff/power.py +0 -0
  30. {diff_diff-2.3.2 → diff_diff-2.4.1}/diff_diff/prep.py +0 -0
  31. {diff_diff-2.3.2 → diff_diff-2.4.1}/diff_diff/prep_dgp.py +0 -0
  32. {diff_diff-2.3.2 → diff_diff-2.4.1}/diff_diff/pretrends.py +0 -0
  33. {diff_diff-2.3.2 → diff_diff-2.4.1}/diff_diff/results.py +0 -0
  34. {diff_diff-2.3.2 → diff_diff-2.4.1}/diff_diff/staggered_bootstrap.py +0 -0
  35. {diff_diff-2.3.2 → diff_diff-2.4.1}/diff_diff/staggered_results.py +0 -0
  36. {diff_diff-2.3.2 → diff_diff-2.4.1}/diff_diff/twfe.py +0 -0
  37. {diff_diff-2.3.2 → diff_diff-2.4.1}/rust/src/bootstrap.rs +0 -0
  38. {diff_diff-2.3.2 → diff_diff-2.4.1}/rust/src/lib.rs +0 -0
  39. {diff_diff-2.3.2 → diff_diff-2.4.1}/rust/src/linalg.rs +0 -0
  40. {diff_diff-2.3.2 → diff_diff-2.4.1}/rust/src/trop.rs +0 -0
  41. {diff_diff-2.3.2 → diff_diff-2.4.1}/rust/src/weights.rs +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: diff-diff
3
- Version: 2.3.2
3
+ Version: 2.4.1
4
4
  Classifier: Development Status :: 5 - Production/Stable
5
5
  Classifier: Intended Audience :: Science/Research
6
6
  Classifier: Operating System :: OS Independent
@@ -108,7 +108,7 @@ Signif. codes: '***' 0.001, '**' 0.01, '*' 0.05, '.' 0.1
108
108
  - **Wild cluster bootstrap**: Valid inference with few clusters (<50) using Rademacher, Webb, or Mammen weights
109
109
  - **Panel data support**: Two-way fixed effects estimator for panel designs
110
110
  - **Multi-period analysis**: Event-study style DiD with period-specific treatment effects
111
- - **Staggered adoption**: Callaway-Sant'Anna (2021), Sun-Abraham (2021), and Borusyak-Jaravel-Spiess (2024) imputation estimators for heterogeneous treatment timing
111
+ - **Staggered adoption**: Callaway-Sant'Anna (2021), Sun-Abraham (2021), Borusyak-Jaravel-Spiess (2024) imputation, and Two-Stage DiD (Gardner 2022) estimators for heterogeneous treatment timing
112
112
  - **Triple Difference (DDD)**: Ortiz-Villavicencio & Sant'Anna (2025) estimators with proper covariate handling
113
113
  - **Synthetic DiD**: Combined DiD with synthetic control for improved robustness
114
114
  - **Triply Robust Panel (TROP)**: Factor-adjusted DiD with synthetic weights (Athey et al. 2025)
@@ -965,6 +965,53 @@ ImputationDiD(
965
965
  | Inference | Conservative variance (Theorem 3) | Multiplier bootstrap |
966
966
  | Pre-trends | Built-in F-test (Equation 9) | Separate testing |
967
967
 
968
+ ### Two-Stage DiD (Gardner 2022)
969
+
970
+ Two-Stage DiD addresses TWFE bias in staggered adoption designs by estimating unit and time fixed effects on untreated observations only, then regressing the residualized outcomes on treatment indicators. Point estimates match the Imputation DiD estimator (Borusyak et al. 2024); the key difference is that Two-Stage DiD uses a GMM sandwich variance estimator that accounts for first-stage estimation error, while Imputation DiD uses a conservative variance (Theorem 3).
971
+
972
+ ```python
973
+ from diff_diff import TwoStageDiD
974
+
975
+ # Basic usage
976
+ est = TwoStageDiD()
977
+ results = est.fit(data, outcome='outcome', unit='unit', time='period', first_treat='first_treat')
978
+ results.print_summary()
979
+ ```
980
+
981
+ **Event study:**
982
+
983
+ ```python
984
+ # Event study aggregation with visualization
985
+ results = est.fit(data, outcome='outcome', unit='unit', time='period',
986
+ first_treat='first_treat', aggregate='event_study')
987
+ plot_event_study(results)
988
+ ```
989
+
990
+ **Parameters:**
991
+
992
+ ```python
993
+ TwoStageDiD(
994
+ anticipation=0, # Periods of anticipation effects
995
+ alpha=0.05, # Significance level for CIs
996
+ cluster=None, # Column for cluster-robust SEs (defaults to unit)
997
+ n_bootstrap=0, # Bootstrap iterations (0 = analytical GMM SEs)
998
+ seed=None, # Random seed
999
+ rank_deficient_action='warn', # 'warn', 'error', or 'silent'
1000
+ horizon_max=None, # Max event-study horizon
1001
+ )
1002
+ ```
1003
+
1004
+ **When to use Two-Stage DiD vs Imputation DiD:**
1005
+
1006
+ | Aspect | Two-Stage DiD | Imputation DiD |
1007
+ |--------|--------------|---------------|
1008
+ | Point estimates | Identical | Identical |
1009
+ | Variance | GMM sandwich (accounts for first-stage error) | Conservative (Theorem 3, may overcover) |
1010
+ | Intuition | Residualize then regress | Impute counterfactuals then aggregate |
1011
+ | Reference impl. | R `did2s` package | R `didimputation` package |
1012
+
1013
+ Both estimators are the efficient estimator under homogeneous treatment effects, producing shorter confidence intervals than Callaway-Sant'Anna or Sun-Abraham.
1014
+
968
1015
  ### Triple Difference (DDD)
969
1016
 
970
1017
  Triple Difference (DDD) is used when treatment requires satisfying two criteria: belonging to a treated **group** AND being in an eligible **partition**. The `TripleDifference` class implements the methodology from Ortiz-Villavicencio & Sant'Anna (2025), which correctly handles covariate adjustment (unlike naive implementations).
@@ -2142,6 +2189,58 @@ ImputationDiD(
2142
2189
  | `to_dataframe(level)` | Convert to DataFrame ('observation', 'event_study', 'group') |
2143
2190
  | `pretrend_test(n_leads)` | Run pre-trend F-test (Equation 9) |
2144
2191
 
2192
+ ### TwoStageDiD
2193
+
2194
+ ```python
2195
+ TwoStageDiD(
2196
+ anticipation=0, # Periods of anticipation effects
2197
+ alpha=0.05, # Significance level for CIs
2198
+ cluster=None, # Column for cluster-robust SEs (defaults to unit)
2199
+ n_bootstrap=0, # Bootstrap iterations (0 = analytical GMM SEs)
2200
+ seed=None, # Random seed
2201
+ rank_deficient_action='warn', # 'warn', 'error', or 'silent'
2202
+ horizon_max=None, # Max event-study horizon
2203
+ )
2204
+ ```
2205
+
2206
+ **fit() Parameters:**
2207
+
2208
+ | Parameter | Type | Description |
2209
+ |-----------|------|-------------|
2210
+ | `data` | DataFrame | Panel data |
2211
+ | `outcome` | str | Outcome variable column name |
2212
+ | `unit` | str | Unit identifier column |
2213
+ | `time` | str | Time period column |
2214
+ | `first_treat` | str | First treatment period column (0 for never-treated) |
2215
+ | `covariates` | list | Covariate column names |
2216
+ | `aggregate` | str | Aggregation: None, "event_study", "group", "all" |
2217
+ | `balance_e` | int | Balance event study to this many pre-treatment periods |
2218
+
2219
+ ### TwoStageDiDResults
2220
+
2221
+ **Attributes:**
2222
+
2223
+ | Attribute | Description |
2224
+ |-----------|-------------|
2225
+ | `overall_att` | Overall average treatment effect on the treated |
2226
+ | `overall_se` | Standard error (GMM sandwich variance) |
2227
+ | `overall_t_stat` | T-statistic |
2228
+ | `overall_p_value` | P-value for H0: ATT = 0 |
2229
+ | `overall_conf_int` | Confidence interval |
2230
+ | `event_study_effects` | Dict of relative time -> effect dict (if `aggregate='event_study'` or `'all'`) |
2231
+ | `group_effects` | Dict of cohort -> effect dict (if `aggregate='group'` or `'all'`) |
2232
+ | `treatment_effects` | DataFrame of unit-level treatment effects |
2233
+ | `n_treated_obs` | Number of treated observations |
2234
+ | `n_untreated_obs` | Number of untreated observations |
2235
+
2236
+ **Methods:**
2237
+
2238
+ | Method | Description |
2239
+ |--------|-------------|
2240
+ | `summary(alpha)` | Get formatted summary string |
2241
+ | `print_summary(alpha)` | Print summary to stdout |
2242
+ | `to_dataframe(level)` | Convert to DataFrame ('observation', 'event_study', 'group') |
2243
+
2145
2244
  ### TripleDifference
2146
2245
 
2147
2246
  ```python
@@ -2620,6 +2719,10 @@ The `HonestDiD` module implements sensitivity analysis methods for relaxing the
2620
2719
 
2621
2720
  - **Sun, L., & Abraham, S. (2021).** "Estimating Dynamic Treatment Effects in Event Studies with Heterogeneous Treatment Effects." *Journal of Econometrics*, 225(2), 175-199. [https://doi.org/10.1016/j.jeconom.2020.09.006](https://doi.org/10.1016/j.jeconom.2020.09.006)
2622
2721
 
2722
+ - **Gardner, J. (2022).** "Two-stage differences in differences." *arXiv preprint arXiv:2207.05943*. [https://arxiv.org/abs/2207.05943](https://arxiv.org/abs/2207.05943)
2723
+
2724
+ - **Butts, K., & Gardner, J. (2022).** "did2s: Two-Stage Difference-in-Differences." *The R Journal*, 14(1), 162-173. [https://doi.org/10.32614/RJ-2022-048](https://doi.org/10.32614/RJ-2022-048)
2725
+
2623
2726
  - **de Chaisemartin, C., & D'Haultfœuille, X. (2020).** "Two-Way Fixed Effects Estimators with Heterogeneous Treatment Effects." *American Economic Review*, 110(9), 2964-2996. [https://doi.org/10.1257/aer.20181169](https://doi.org/10.1257/aer.20181169)
2624
2727
 
2625
2728
  - **Goodman-Bacon, A. (2021).** "Difference-in-Differences with Variation in Treatment Timing." *Journal of Econometrics*, 225(2), 254-277. [https://doi.org/10.1016/j.jeconom.2021.03.014](https://doi.org/10.1016/j.jeconom.2021.03.014)
@@ -70,7 +70,7 @@ Signif. codes: '***' 0.001, '**' 0.01, '*' 0.05, '.' 0.1
70
70
  - **Wild cluster bootstrap**: Valid inference with few clusters (<50) using Rademacher, Webb, or Mammen weights
71
71
  - **Panel data support**: Two-way fixed effects estimator for panel designs
72
72
  - **Multi-period analysis**: Event-study style DiD with period-specific treatment effects
73
- - **Staggered adoption**: Callaway-Sant'Anna (2021), Sun-Abraham (2021), and Borusyak-Jaravel-Spiess (2024) imputation estimators for heterogeneous treatment timing
73
+ - **Staggered adoption**: Callaway-Sant'Anna (2021), Sun-Abraham (2021), Borusyak-Jaravel-Spiess (2024) imputation, and Two-Stage DiD (Gardner 2022) estimators for heterogeneous treatment timing
74
74
  - **Triple Difference (DDD)**: Ortiz-Villavicencio & Sant'Anna (2025) estimators with proper covariate handling
75
75
  - **Synthetic DiD**: Combined DiD with synthetic control for improved robustness
76
76
  - **Triply Robust Panel (TROP)**: Factor-adjusted DiD with synthetic weights (Athey et al. 2025)
@@ -927,6 +927,53 @@ ImputationDiD(
927
927
  | Inference | Conservative variance (Theorem 3) | Multiplier bootstrap |
928
928
  | Pre-trends | Built-in F-test (Equation 9) | Separate testing |
929
929
 
930
+ ### Two-Stage DiD (Gardner 2022)
931
+
932
+ Two-Stage DiD addresses TWFE bias in staggered adoption designs by estimating unit and time fixed effects on untreated observations only, then regressing the residualized outcomes on treatment indicators. Point estimates match the Imputation DiD estimator (Borusyak et al. 2024); the key difference is that Two-Stage DiD uses a GMM sandwich variance estimator that accounts for first-stage estimation error, while Imputation DiD uses a conservative variance (Theorem 3).
933
+
934
+ ```python
935
+ from diff_diff import TwoStageDiD
936
+
937
+ # Basic usage
938
+ est = TwoStageDiD()
939
+ results = est.fit(data, outcome='outcome', unit='unit', time='period', first_treat='first_treat')
940
+ results.print_summary()
941
+ ```
942
+
943
+ **Event study:**
944
+
945
+ ```python
946
+ # Event study aggregation with visualization
947
+ results = est.fit(data, outcome='outcome', unit='unit', time='period',
948
+ first_treat='first_treat', aggregate='event_study')
949
+ plot_event_study(results)
950
+ ```
951
+
952
+ **Parameters:**
953
+
954
+ ```python
955
+ TwoStageDiD(
956
+ anticipation=0, # Periods of anticipation effects
957
+ alpha=0.05, # Significance level for CIs
958
+ cluster=None, # Column for cluster-robust SEs (defaults to unit)
959
+ n_bootstrap=0, # Bootstrap iterations (0 = analytical GMM SEs)
960
+ seed=None, # Random seed
961
+ rank_deficient_action='warn', # 'warn', 'error', or 'silent'
962
+ horizon_max=None, # Max event-study horizon
963
+ )
964
+ ```
965
+
966
+ **When to use Two-Stage DiD vs Imputation DiD:**
967
+
968
+ | Aspect | Two-Stage DiD | Imputation DiD |
969
+ |--------|--------------|---------------|
970
+ | Point estimates | Identical | Identical |
971
+ | Variance | GMM sandwich (accounts for first-stage error) | Conservative (Theorem 3, may overcover) |
972
+ | Intuition | Residualize then regress | Impute counterfactuals then aggregate |
973
+ | Reference impl. | R `did2s` package | R `didimputation` package |
974
+
975
+ Both estimators are the efficient estimator under homogeneous treatment effects, producing shorter confidence intervals than Callaway-Sant'Anna or Sun-Abraham.
976
+
930
977
  ### Triple Difference (DDD)
931
978
 
932
979
  Triple Difference (DDD) is used when treatment requires satisfying two criteria: belonging to a treated **group** AND being in an eligible **partition**. The `TripleDifference` class implements the methodology from Ortiz-Villavicencio & Sant'Anna (2025), which correctly handles covariate adjustment (unlike naive implementations).
@@ -2104,6 +2151,58 @@ ImputationDiD(
2104
2151
  | `to_dataframe(level)` | Convert to DataFrame ('observation', 'event_study', 'group') |
2105
2152
  | `pretrend_test(n_leads)` | Run pre-trend F-test (Equation 9) |
2106
2153
 
2154
+ ### TwoStageDiD
2155
+
2156
+ ```python
2157
+ TwoStageDiD(
2158
+ anticipation=0, # Periods of anticipation effects
2159
+ alpha=0.05, # Significance level for CIs
2160
+ cluster=None, # Column for cluster-robust SEs (defaults to unit)
2161
+ n_bootstrap=0, # Bootstrap iterations (0 = analytical GMM SEs)
2162
+ seed=None, # Random seed
2163
+ rank_deficient_action='warn', # 'warn', 'error', or 'silent'
2164
+ horizon_max=None, # Max event-study horizon
2165
+ )
2166
+ ```
2167
+
2168
+ **fit() Parameters:**
2169
+
2170
+ | Parameter | Type | Description |
2171
+ |-----------|------|-------------|
2172
+ | `data` | DataFrame | Panel data |
2173
+ | `outcome` | str | Outcome variable column name |
2174
+ | `unit` | str | Unit identifier column |
2175
+ | `time` | str | Time period column |
2176
+ | `first_treat` | str | First treatment period column (0 for never-treated) |
2177
+ | `covariates` | list | Covariate column names |
2178
+ | `aggregate` | str | Aggregation: None, "event_study", "group", "all" |
2179
+ | `balance_e` | int | Balance event study to this many pre-treatment periods |
2180
+
2181
+ ### TwoStageDiDResults
2182
+
2183
+ **Attributes:**
2184
+
2185
+ | Attribute | Description |
2186
+ |-----------|-------------|
2187
+ | `overall_att` | Overall average treatment effect on the treated |
2188
+ | `overall_se` | Standard error (GMM sandwich variance) |
2189
+ | `overall_t_stat` | T-statistic |
2190
+ | `overall_p_value` | P-value for H0: ATT = 0 |
2191
+ | `overall_conf_int` | Confidence interval |
2192
+ | `event_study_effects` | Dict of relative time -> effect dict (if `aggregate='event_study'` or `'all'`) |
2193
+ | `group_effects` | Dict of cohort -> effect dict (if `aggregate='group'` or `'all'`) |
2194
+ | `treatment_effects` | DataFrame of unit-level treatment effects |
2195
+ | `n_treated_obs` | Number of treated observations |
2196
+ | `n_untreated_obs` | Number of untreated observations |
2197
+
2198
+ **Methods:**
2199
+
2200
+ | Method | Description |
2201
+ |--------|-------------|
2202
+ | `summary(alpha)` | Get formatted summary string |
2203
+ | `print_summary(alpha)` | Print summary to stdout |
2204
+ | `to_dataframe(level)` | Convert to DataFrame ('observation', 'event_study', 'group') |
2205
+
2107
2206
  ### TripleDifference
2108
2207
 
2109
2208
  ```python
@@ -2582,6 +2681,10 @@ The `HonestDiD` module implements sensitivity analysis methods for relaxing the
2582
2681
 
2583
2682
  - **Sun, L., & Abraham, S. (2021).** "Estimating Dynamic Treatment Effects in Event Studies with Heterogeneous Treatment Effects." *Journal of Econometrics*, 225(2), 175-199. [https://doi.org/10.1016/j.jeconom.2020.09.006](https://doi.org/10.1016/j.jeconom.2020.09.006)
2584
2683
 
2684
+ - **Gardner, J. (2022).** "Two-stage differences in differences." *arXiv preprint arXiv:2207.05943*. [https://arxiv.org/abs/2207.05943](https://arxiv.org/abs/2207.05943)
2685
+
2686
+ - **Butts, K., & Gardner, J. (2022).** "did2s: Two-Stage Difference-in-Differences." *The R Journal*, 14(1), 162-173. [https://doi.org/10.32614/RJ-2022-048](https://doi.org/10.32614/RJ-2022-048)
2687
+
2585
2688
  - **de Chaisemartin, C., & D'Haultfœuille, X. (2020).** "Two-Way Fixed Effects Estimators with Heterogeneous Treatment Effects." *American Economic Review*, 110(9), 2964-2996. [https://doi.org/10.1257/aer.20181169](https://doi.org/10.1257/aer.20181169)
2586
2689
 
2587
2690
  - **Goodman-Bacon, A. (2021).** "Difference-in-Differences with Variation in Treatment Timing." *Journal of Econometrics*, 225(2), 254-277. [https://doi.org/10.1016/j.jeconom.2021.03.014](https://doi.org/10.1016/j.jeconom.2021.03.014)
@@ -101,6 +101,12 @@ from diff_diff.imputation import (
101
101
  ImputationDiDResults,
102
102
  imputation_did,
103
103
  )
104
+ from diff_diff.two_stage import (
105
+ TwoStageBootstrapResults,
106
+ TwoStageDiD,
107
+ TwoStageDiDResults,
108
+ two_stage_did,
109
+ )
104
110
  from diff_diff.sun_abraham import (
105
111
  SABootstrapResults,
106
112
  SunAbraham,
@@ -142,7 +148,7 @@ from diff_diff.datasets import (
142
148
  load_mpdta,
143
149
  )
144
150
 
145
- __version__ = "2.3.2"
151
+ __version__ = "2.4.1"
146
152
  __all__ = [
147
153
  # Estimators
148
154
  "DifferenceInDifferences",
@@ -152,6 +158,7 @@ __all__ = [
152
158
  "CallawaySantAnna",
153
159
  "SunAbraham",
154
160
  "ImputationDiD",
161
+ "TwoStageDiD",
155
162
  "TripleDifference",
156
163
  "TROP",
157
164
  # Bacon Decomposition
@@ -173,6 +180,9 @@ __all__ = [
173
180
  "ImputationDiDResults",
174
181
  "ImputationBootstrapResults",
175
182
  "imputation_did",
183
+ "TwoStageDiDResults",
184
+ "TwoStageBootstrapResults",
185
+ "two_stage_did",
176
186
  "TripleDifferenceResults",
177
187
  "triple_difference",
178
188
  "TROPResults",
@@ -19,7 +19,7 @@ import pandas as pd
19
19
 
20
20
  from diff_diff.estimators import DifferenceInDifferences
21
21
  from diff_diff.results import _get_significance_stars
22
- from diff_diff.utils import compute_confidence_interval, compute_p_value
22
+ from diff_diff.utils import safe_inference
23
23
 
24
24
 
25
25
  @dataclass
@@ -661,7 +661,7 @@ def permutation_test(
661
661
  ci_lower = np.percentile(valid_effects, alpha / 2 * 100)
662
662
  ci_upper = np.percentile(valid_effects, (1 - alpha / 2) * 100)
663
663
 
664
- # T-stat from original estimate
664
+ # NOTE: Not using safe_inference — p_value is permutation-based, CI is percentile-based.
665
665
  t_stat = original_att / se if np.isfinite(se) and se > 0 else np.nan
666
666
 
667
667
  return PlaceboTestResults(
@@ -782,15 +782,9 @@ def leave_one_out_test(
782
782
 
783
783
  # Statistics of LOO distribution
784
784
  mean_effect = np.mean(valid_effects)
785
- se = np.std(valid_effects, ddof=1) if len(valid_effects) > 1 else 0.0
786
- t_stat = mean_effect / se if np.isfinite(se) and se > 0 else np.nan
787
-
788
- # Use t-distribution for p-value
785
+ se = np.std(valid_effects, ddof=1) if len(valid_effects) > 1 else np.nan
789
786
  df = len(valid_effects) - 1 if len(valid_effects) > 1 else 1
790
- p_value = compute_p_value(t_stat, df=df)
791
-
792
- # CI
793
- conf_int = compute_confidence_interval(mean_effect, se, alpha, df=df) if np.isfinite(se) and se > 0 else (np.nan, np.nan)
787
+ t_stat, p_value, conf_int = safe_inference(mean_effect, se, alpha=alpha, df=df)
794
788
 
795
789
  return PlaceboTestResults(
796
790
  test_type="leave_one_out",
@@ -27,9 +27,8 @@ from diff_diff.linalg import (
27
27
  from diff_diff.results import DiDResults, MultiPeriodDiDResults, PeriodEffect
28
28
  from diff_diff.utils import (
29
29
  WildBootstrapResults,
30
- compute_confidence_interval,
31
- compute_p_value,
32
30
  demean_by_group,
31
+ safe_inference,
33
32
  validate_binary,
34
33
  wild_bootstrap_se,
35
34
  )
@@ -1034,14 +1033,7 @@ class MultiPeriodDiD(DifferenceInDifferences):
1034
1033
  idx = interaction_indices[period]
1035
1034
  effect = coefficients[idx]
1036
1035
  se = np.sqrt(vcov[idx, idx])
1037
- if np.isfinite(se) and se > 0:
1038
- t_stat = effect / se
1039
- p_value = compute_p_value(t_stat, df=df)
1040
- conf_int = compute_confidence_interval(effect, se, self.alpha, df=df)
1041
- else:
1042
- t_stat = np.nan
1043
- p_value = np.nan
1044
- conf_int = (np.nan, np.nan)
1036
+ t_stat, p_value, conf_int = safe_inference(effect, se, alpha=self.alpha, df=df)
1045
1037
 
1046
1038
  period_effects[period] = PeriodEffect(
1047
1039
  period=period,
@@ -1085,15 +1077,9 @@ class MultiPeriodDiD(DifferenceInDifferences):
1085
1077
  avg_conf_int = (np.nan, np.nan)
1086
1078
  else:
1087
1079
  avg_se = float(np.sqrt(avg_var))
1088
- if np.isfinite(avg_se) and avg_se > 0:
1089
- avg_t_stat = avg_att / avg_se
1090
- avg_p_value = compute_p_value(avg_t_stat, df=df)
1091
- avg_conf_int = compute_confidence_interval(avg_att, avg_se, self.alpha, df=df)
1092
- else:
1093
- # Zero SE (degenerate case)
1094
- avg_t_stat = np.nan
1095
- avg_p_value = np.nan
1096
- avg_conf_int = (np.nan, np.nan)
1080
+ avg_t_stat, avg_p_value, avg_conf_int = safe_inference(
1081
+ avg_att, avg_se, alpha=self.alpha, df=df
1082
+ )
1097
1083
 
1098
1084
  # Count observations
1099
1085
  n_treated = int(np.sum(d))