diff-diff 2.3.0__tar.gz → 2.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {diff_diff-2.3.0 → diff_diff-2.3.2}/PKG-INFO +16 -13
- {diff_diff-2.3.0 → diff_diff-2.3.2}/README.md +13 -11
- {diff_diff-2.3.0 → diff_diff-2.3.2}/diff_diff/__init__.py +1 -1
- {diff_diff-2.3.0 → diff_diff-2.3.2}/diff_diff/_backend.py +20 -0
- {diff_diff-2.3.0 → diff_diff-2.3.2}/diff_diff/results.py +13 -5
- {diff_diff-2.3.0 → diff_diff-2.3.2}/diff_diff/staggered.py +6 -3
- {diff_diff-2.3.0 → diff_diff-2.3.2}/diff_diff/sun_abraham.py +60 -24
- {diff_diff-2.3.0 → diff_diff-2.3.2}/diff_diff/synthetic_did.py +246 -126
- {diff_diff-2.3.0 → diff_diff-2.3.2}/diff_diff/utils.py +393 -32
- {diff_diff-2.3.0 → diff_diff-2.3.2}/pyproject.toml +3 -2
- {diff_diff-2.3.0 → diff_diff-2.3.2}/rust/Cargo.lock +20 -20
- {diff_diff-2.3.0 → diff_diff-2.3.2}/rust/Cargo.toml +1 -1
- {diff_diff-2.3.0 → diff_diff-2.3.2}/rust/src/lib.rs +7 -1
- diff_diff-2.3.2/rust/src/weights.rs +713 -0
- diff_diff-2.3.0/rust/src/weights.rs +0 -220
- {diff_diff-2.3.0 → diff_diff-2.3.2}/diff_diff/bacon.py +0 -0
- {diff_diff-2.3.0 → diff_diff-2.3.2}/diff_diff/datasets.py +0 -0
- {diff_diff-2.3.0 → diff_diff-2.3.2}/diff_diff/diagnostics.py +0 -0
- {diff_diff-2.3.0 → diff_diff-2.3.2}/diff_diff/estimators.py +0 -0
- {diff_diff-2.3.0 → diff_diff-2.3.2}/diff_diff/honest_did.py +0 -0
- {diff_diff-2.3.0 → diff_diff-2.3.2}/diff_diff/imputation.py +0 -0
- {diff_diff-2.3.0 → diff_diff-2.3.2}/diff_diff/linalg.py +0 -0
- {diff_diff-2.3.0 → diff_diff-2.3.2}/diff_diff/power.py +0 -0
- {diff_diff-2.3.0 → diff_diff-2.3.2}/diff_diff/prep.py +0 -0
- {diff_diff-2.3.0 → diff_diff-2.3.2}/diff_diff/prep_dgp.py +0 -0
- {diff_diff-2.3.0 → diff_diff-2.3.2}/diff_diff/pretrends.py +0 -0
- {diff_diff-2.3.0 → diff_diff-2.3.2}/diff_diff/staggered_aggregation.py +0 -0
- {diff_diff-2.3.0 → diff_diff-2.3.2}/diff_diff/staggered_bootstrap.py +0 -0
- {diff_diff-2.3.0 → diff_diff-2.3.2}/diff_diff/staggered_results.py +0 -0
- {diff_diff-2.3.0 → diff_diff-2.3.2}/diff_diff/triple_diff.py +0 -0
- {diff_diff-2.3.0 → diff_diff-2.3.2}/diff_diff/trop.py +0 -0
- {diff_diff-2.3.0 → diff_diff-2.3.2}/diff_diff/twfe.py +0 -0
- {diff_diff-2.3.0 → diff_diff-2.3.2}/diff_diff/visualization.py +0 -0
- {diff_diff-2.3.0 → diff_diff-2.3.2}/rust/src/bootstrap.rs +0 -0
- {diff_diff-2.3.0 → diff_diff-2.3.2}/rust/src/linalg.rs +0 -0
- {diff_diff-2.3.0 → diff_diff-2.3.2}/rust/src/trop.rs +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: diff-diff
|
|
3
|
-
Version: 2.3.
|
|
3
|
+
Version: 2.3.2
|
|
4
4
|
Classifier: Development Status :: 5 - Production/Stable
|
|
5
5
|
Classifier: Intended Audience :: Science/Research
|
|
6
6
|
Classifier: Operating System :: OS Independent
|
|
@@ -9,6 +9,7 @@ Classifier: Programming Language :: Python :: 3.9
|
|
|
9
9
|
Classifier: Programming Language :: Python :: 3.10
|
|
10
10
|
Classifier: Programming Language :: Python :: 3.11
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
12
13
|
Classifier: Topic :: Scientific/Engineering :: Mathematics
|
|
13
14
|
Requires-Dist: numpy>=1.20.0
|
|
14
15
|
Requires-Dist: pandas>=1.3.0
|
|
@@ -28,7 +29,7 @@ Summary: A library for Difference-in-Differences causal inference analysis
|
|
|
28
29
|
Keywords: causal-inference,difference-in-differences,econometrics,statistics,treatment-effects
|
|
29
30
|
Author: diff-diff contributors
|
|
30
31
|
License-Expression: MIT
|
|
31
|
-
Requires-Python: >=3.9
|
|
32
|
+
Requires-Python: >=3.9, <3.14
|
|
32
33
|
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
33
34
|
Project-URL: Documentation, https://diff-diff.readthedocs.io
|
|
34
35
|
Project-URL: Homepage, https://github.com/igerber/diff-diff
|
|
@@ -1200,11 +1201,12 @@ Use Synthetic DiD instead of standard DiD when:
|
|
|
1200
1201
|
|
|
1201
1202
|
```python
|
|
1202
1203
|
SyntheticDiD(
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
alpha=0.05,
|
|
1206
|
-
|
|
1207
|
-
|
|
1204
|
+
zeta_omega=None, # Unit weight regularization (None = auto-computed from data)
|
|
1205
|
+
zeta_lambda=None, # Time weight regularization (None = auto-computed from data)
|
|
1206
|
+
alpha=0.05, # Significance level
|
|
1207
|
+
variance_method="placebo", # "placebo" (default, matches R) or "bootstrap"
|
|
1208
|
+
n_bootstrap=200, # Replications for SE estimation
|
|
1209
|
+
seed=None # Random seed for reproducibility
|
|
1208
1210
|
)
|
|
1209
1211
|
```
|
|
1210
1212
|
|
|
@@ -1909,11 +1911,12 @@ MultiPeriodDiD(
|
|
|
1909
1911
|
|
|
1910
1912
|
```python
|
|
1911
1913
|
SyntheticDiD(
|
|
1912
|
-
|
|
1913
|
-
|
|
1914
|
-
alpha=0.05,
|
|
1915
|
-
|
|
1916
|
-
|
|
1914
|
+
zeta_omega=None, # Unit weight regularization (None = auto from data)
|
|
1915
|
+
zeta_lambda=None, # Time weight regularization (None = auto from data)
|
|
1916
|
+
alpha=0.05, # Significance level for CIs
|
|
1917
|
+
variance_method="placebo", # "placebo" (R default) or "bootstrap"
|
|
1918
|
+
n_bootstrap=200, # Replications for SE estimation
|
|
1919
|
+
seed=None # Random seed for reproducibility
|
|
1917
1920
|
)
|
|
1918
1921
|
```
|
|
1919
1922
|
|
|
@@ -2487,7 +2490,7 @@ Returns DataFrame with columns: `unit`, `quality_score`, `outcome_trend_score`,
|
|
|
2487
2490
|
|
|
2488
2491
|
## Requirements
|
|
2489
2492
|
|
|
2490
|
-
- Python
|
|
2493
|
+
- Python 3.9 - 3.13
|
|
2491
2494
|
- numpy >= 1.20
|
|
2492
2495
|
- pandas >= 1.3
|
|
2493
2496
|
- scipy >= 1.7
|
|
@@ -1163,11 +1163,12 @@ Use Synthetic DiD instead of standard DiD when:
|
|
|
1163
1163
|
|
|
1164
1164
|
```python
|
|
1165
1165
|
SyntheticDiD(
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
alpha=0.05,
|
|
1169
|
-
|
|
1170
|
-
|
|
1166
|
+
zeta_omega=None, # Unit weight regularization (None = auto-computed from data)
|
|
1167
|
+
zeta_lambda=None, # Time weight regularization (None = auto-computed from data)
|
|
1168
|
+
alpha=0.05, # Significance level
|
|
1169
|
+
variance_method="placebo", # "placebo" (default, matches R) or "bootstrap"
|
|
1170
|
+
n_bootstrap=200, # Replications for SE estimation
|
|
1171
|
+
seed=None # Random seed for reproducibility
|
|
1171
1172
|
)
|
|
1172
1173
|
```
|
|
1173
1174
|
|
|
@@ -1872,11 +1873,12 @@ MultiPeriodDiD(
|
|
|
1872
1873
|
|
|
1873
1874
|
```python
|
|
1874
1875
|
SyntheticDiD(
|
|
1875
|
-
|
|
1876
|
-
|
|
1877
|
-
alpha=0.05,
|
|
1878
|
-
|
|
1879
|
-
|
|
1876
|
+
zeta_omega=None, # Unit weight regularization (None = auto from data)
|
|
1877
|
+
zeta_lambda=None, # Time weight regularization (None = auto from data)
|
|
1878
|
+
alpha=0.05, # Significance level for CIs
|
|
1879
|
+
variance_method="placebo", # "placebo" (R default) or "bootstrap"
|
|
1880
|
+
n_bootstrap=200, # Replications for SE estimation
|
|
1881
|
+
seed=None # Random seed for reproducibility
|
|
1880
1882
|
)
|
|
1881
1883
|
```
|
|
1882
1884
|
|
|
@@ -2450,7 +2452,7 @@ Returns DataFrame with columns: `unit`, `quality_score`, `outcome_trend_score`,
|
|
|
2450
2452
|
|
|
2451
2453
|
## Requirements
|
|
2452
2454
|
|
|
2453
|
-
- Python
|
|
2455
|
+
- Python 3.9 - 3.13
|
|
2454
2456
|
- numpy >= 1.20
|
|
2455
2457
|
- pandas >= 1.3
|
|
2456
2458
|
- scipy >= 1.7
|
|
@@ -30,6 +30,11 @@ try:
|
|
|
30
30
|
# TROP estimator acceleration (joint method)
|
|
31
31
|
loocv_grid_search_joint as _rust_loocv_grid_search_joint,
|
|
32
32
|
bootstrap_trop_variance_joint as _rust_bootstrap_trop_variance_joint,
|
|
33
|
+
# SDID weights (Frank-Wolfe matching R's synthdid)
|
|
34
|
+
compute_sdid_unit_weights as _rust_sdid_unit_weights,
|
|
35
|
+
compute_time_weights as _rust_compute_time_weights,
|
|
36
|
+
compute_noise_level as _rust_compute_noise_level,
|
|
37
|
+
sc_weight_fw as _rust_sc_weight_fw,
|
|
33
38
|
)
|
|
34
39
|
_rust_available = True
|
|
35
40
|
except ImportError:
|
|
@@ -46,6 +51,11 @@ except ImportError:
|
|
|
46
51
|
# TROP estimator acceleration (joint method)
|
|
47
52
|
_rust_loocv_grid_search_joint = None
|
|
48
53
|
_rust_bootstrap_trop_variance_joint = None
|
|
54
|
+
# SDID weights (Frank-Wolfe matching R's synthdid)
|
|
55
|
+
_rust_sdid_unit_weights = None
|
|
56
|
+
_rust_compute_time_weights = None
|
|
57
|
+
_rust_compute_noise_level = None
|
|
58
|
+
_rust_sc_weight_fw = None
|
|
49
59
|
|
|
50
60
|
# Determine final backend based on environment variable and availability
|
|
51
61
|
if _backend_env == 'python':
|
|
@@ -63,6 +73,11 @@ if _backend_env == 'python':
|
|
|
63
73
|
# TROP estimator acceleration (joint method)
|
|
64
74
|
_rust_loocv_grid_search_joint = None
|
|
65
75
|
_rust_bootstrap_trop_variance_joint = None
|
|
76
|
+
# SDID weights (Frank-Wolfe matching R's synthdid)
|
|
77
|
+
_rust_sdid_unit_weights = None
|
|
78
|
+
_rust_compute_time_weights = None
|
|
79
|
+
_rust_compute_noise_level = None
|
|
80
|
+
_rust_sc_weight_fw = None
|
|
66
81
|
elif _backend_env == 'rust':
|
|
67
82
|
# Force Rust mode - fail if not available
|
|
68
83
|
if not _rust_available:
|
|
@@ -89,4 +104,9 @@ __all__ = [
|
|
|
89
104
|
# TROP estimator acceleration (joint method)
|
|
90
105
|
'_rust_loocv_grid_search_joint',
|
|
91
106
|
'_rust_bootstrap_trop_variance_joint',
|
|
107
|
+
# SDID weights (Frank-Wolfe matching R's synthdid)
|
|
108
|
+
'_rust_sdid_unit_weights',
|
|
109
|
+
'_rust_compute_time_weights',
|
|
110
|
+
'_rust_compute_noise_level',
|
|
111
|
+
'_rust_sc_weight_fw',
|
|
92
112
|
]
|
|
@@ -605,8 +605,10 @@ class SyntheticDiDResults:
|
|
|
605
605
|
pre_periods: List[Any]
|
|
606
606
|
post_periods: List[Any]
|
|
607
607
|
alpha: float = 0.05
|
|
608
|
-
variance_method: str = field(default="
|
|
609
|
-
|
|
608
|
+
variance_method: str = field(default="placebo")
|
|
609
|
+
noise_level: Optional[float] = field(default=None)
|
|
610
|
+
zeta_omega: Optional[float] = field(default=None)
|
|
611
|
+
zeta_lambda: Optional[float] = field(default=None)
|
|
610
612
|
pre_treatment_fit: Optional[float] = field(default=None)
|
|
611
613
|
placebo_effects: Optional[np.ndarray] = field(default=None)
|
|
612
614
|
n_bootstrap: Optional[int] = field(default=None)
|
|
@@ -650,8 +652,12 @@ class SyntheticDiDResults:
|
|
|
650
652
|
f"{'Post-treatment periods:':<25} {len(self.post_periods):>10}",
|
|
651
653
|
]
|
|
652
654
|
|
|
653
|
-
if self.
|
|
654
|
-
lines.append(f"{'
|
|
655
|
+
if self.zeta_omega is not None:
|
|
656
|
+
lines.append(f"{'Zeta (unit weights):':<25} {self.zeta_omega:>10.4f}")
|
|
657
|
+
if self.zeta_lambda is not None:
|
|
658
|
+
lines.append(f"{'Zeta (time weights):':<25} {self.zeta_lambda:>10.6f}")
|
|
659
|
+
if self.noise_level is not None:
|
|
660
|
+
lines.append(f"{'Noise level:':<25} {self.noise_level:>10.4f}")
|
|
655
661
|
|
|
656
662
|
if self.pre_treatment_fit is not None:
|
|
657
663
|
lines.append(f"{'Pre-treatment fit (RMSE):':<25} {self.pre_treatment_fit:>10.4f}")
|
|
@@ -731,7 +737,9 @@ class SyntheticDiDResults:
|
|
|
731
737
|
"n_pre_periods": len(self.pre_periods),
|
|
732
738
|
"n_post_periods": len(self.post_periods),
|
|
733
739
|
"variance_method": self.variance_method,
|
|
734
|
-
"
|
|
740
|
+
"noise_level": self.noise_level,
|
|
741
|
+
"zeta_omega": self.zeta_omega,
|
|
742
|
+
"zeta_lambda": self.zeta_lambda,
|
|
735
743
|
"pre_treatment_fit": self.pre_treatment_fit,
|
|
736
744
|
}
|
|
737
745
|
if self.n_bootstrap is not None:
|
|
@@ -415,6 +415,7 @@ class CallawaySantAnna(
|
|
|
415
415
|
cohort_masks[g] = (unit_cohorts == g)
|
|
416
416
|
|
|
417
417
|
# Never-treated mask
|
|
418
|
+
# np.inf was normalized to 0 in fit(), so the np.inf check is defensive only
|
|
418
419
|
never_treated_mask = (unit_cohorts == 0) | (unit_cohorts == np.inf)
|
|
419
420
|
|
|
420
421
|
# Pre-compute covariate matrices by time period if needed
|
|
@@ -639,13 +640,15 @@ class CallawaySantAnna(
|
|
|
639
640
|
# This avoids hardcoding column names in internal methods
|
|
640
641
|
df['first_treat'] = df[first_treat]
|
|
641
642
|
|
|
643
|
+
# Never-treated indicator (must precede treatment_groups to exclude np.inf)
|
|
644
|
+
df['_never_treated'] = (df[first_treat] == 0) | (df[first_treat] == np.inf)
|
|
645
|
+
# Normalize np.inf → 0 so all downstream `> 0` checks exclude never-treated
|
|
646
|
+
df.loc[df[first_treat] == np.inf, first_treat] = 0
|
|
647
|
+
|
|
642
648
|
# Identify groups and time periods
|
|
643
649
|
time_periods = sorted(df[time].unique())
|
|
644
650
|
treatment_groups = sorted([g for g in df[first_treat].unique() if g > 0])
|
|
645
651
|
|
|
646
|
-
# Never-treated indicator (first_treat = 0 or inf)
|
|
647
|
-
df['_never_treated'] = (df[first_treat] == 0) | (df[first_treat] == np.inf)
|
|
648
|
-
|
|
649
652
|
# Get unique units
|
|
650
653
|
unit_info = df.groupby(unit).agg({
|
|
651
654
|
first_treat: 'first',
|
|
@@ -456,9 +456,9 @@ class SunAbraham:
|
|
|
456
456
|
covariates : list, optional
|
|
457
457
|
List of covariate column names to include in regression.
|
|
458
458
|
min_pre_periods : int, default=1
|
|
459
|
-
|
|
459
|
+
**Deprecated**: Accepted but ignored. Will be removed in a future version.
|
|
460
460
|
min_post_periods : int, default=1
|
|
461
|
-
|
|
461
|
+
**Deprecated**: Accepted but ignored. Will be removed in a future version.
|
|
462
462
|
|
|
463
463
|
Returns
|
|
464
464
|
-------
|
|
@@ -470,6 +470,22 @@ class SunAbraham:
|
|
|
470
470
|
ValueError
|
|
471
471
|
If required columns are missing or data validation fails.
|
|
472
472
|
"""
|
|
473
|
+
# Deprecation warnings for unimplemented parameters
|
|
474
|
+
if min_pre_periods != 1:
|
|
475
|
+
warnings.warn(
|
|
476
|
+
"min_pre_periods is not yet implemented and will be ignored. "
|
|
477
|
+
"This parameter will be removed in a future version.",
|
|
478
|
+
FutureWarning,
|
|
479
|
+
stacklevel=2,
|
|
480
|
+
)
|
|
481
|
+
if min_post_periods != 1:
|
|
482
|
+
warnings.warn(
|
|
483
|
+
"min_post_periods is not yet implemented and will be ignored. "
|
|
484
|
+
"This parameter will be removed in a future version.",
|
|
485
|
+
FutureWarning,
|
|
486
|
+
stacklevel=2,
|
|
487
|
+
)
|
|
488
|
+
|
|
473
489
|
# Validate inputs
|
|
474
490
|
required_cols = [outcome, unit, time, first_treat]
|
|
475
491
|
if covariates:
|
|
@@ -486,13 +502,15 @@ class SunAbraham:
|
|
|
486
502
|
df[time] = pd.to_numeric(df[time])
|
|
487
503
|
df[first_treat] = pd.to_numeric(df[first_treat])
|
|
488
504
|
|
|
505
|
+
# Never-treated indicator (must precede treatment_groups to exclude np.inf)
|
|
506
|
+
df["_never_treated"] = (df[first_treat] == 0) | (df[first_treat] == np.inf)
|
|
507
|
+
# Normalize np.inf → 0 so all downstream `> 0` checks exclude never-treated
|
|
508
|
+
df.loc[df[first_treat] == np.inf, first_treat] = 0
|
|
509
|
+
|
|
489
510
|
# Identify groups and time periods
|
|
490
511
|
time_periods = sorted(df[time].unique())
|
|
491
512
|
treatment_groups = sorted([g for g in df[first_treat].unique() if g > 0])
|
|
492
513
|
|
|
493
|
-
# Never-treated indicator
|
|
494
|
-
df["_never_treated"] = (df[first_treat] == 0) | (df[first_treat] == np.inf)
|
|
495
|
-
|
|
496
514
|
# Get unique units
|
|
497
515
|
unit_info = (
|
|
498
516
|
df.groupby(unit)
|
|
@@ -533,9 +551,9 @@ class SunAbraham:
|
|
|
533
551
|
|
|
534
552
|
all_rel_times_sorted = sorted(all_rel_times)
|
|
535
553
|
|
|
536
|
-
#
|
|
537
|
-
min_rel =
|
|
538
|
-
max_rel =
|
|
554
|
+
# Use full range of relative times (no artificial truncation, matches R's fixest::sunab())
|
|
555
|
+
min_rel = min(all_rel_times_sorted)
|
|
556
|
+
max_rel = max(all_rel_times_sorted)
|
|
539
557
|
|
|
540
558
|
# Reference period: last pre-treatment period (typically -1)
|
|
541
559
|
self._reference_period = -1 - self.anticipation
|
|
@@ -765,12 +783,18 @@ class SunAbraham:
|
|
|
765
783
|
|
|
766
784
|
# Fit OLS using LinearRegression helper (more stable than manual X'X inverse)
|
|
767
785
|
cluster_ids = df_demeaned[cluster_var].values
|
|
786
|
+
|
|
787
|
+
# Degrees of freedom adjustment for absorbed unit and time fixed effects
|
|
788
|
+
n_units_fe = df[unit].nunique()
|
|
789
|
+
n_times_fe = df[time].nunique()
|
|
790
|
+
df_adj = n_units_fe + n_times_fe - 1
|
|
791
|
+
|
|
768
792
|
reg = LinearRegression(
|
|
769
793
|
include_intercept=False, # Already demeaned, no intercept needed
|
|
770
794
|
robust=True,
|
|
771
795
|
cluster_ids=cluster_ids,
|
|
772
796
|
rank_deficient_action=self.rank_deficient_action,
|
|
773
|
-
).fit(X, y)
|
|
797
|
+
).fit(X, y, df_adjustment=df_adj)
|
|
774
798
|
|
|
775
799
|
coefficients = reg.coefficients_
|
|
776
800
|
vcov = reg.vcov_
|
|
@@ -821,7 +845,8 @@ class SunAbraham:
|
|
|
821
845
|
|
|
822
846
|
β_e = Σ_g w_{g,e} × δ_{g,e}
|
|
823
847
|
|
|
824
|
-
where w_{g,e}
|
|
848
|
+
where w_{g,e} = n_{g,e} / Σ_g n_{g,e} is the share of observations from cohort g
|
|
849
|
+
at event-time e among all treated observations at that event-time.
|
|
825
850
|
|
|
826
851
|
Returns
|
|
827
852
|
-------
|
|
@@ -833,9 +858,8 @@ class SunAbraham:
|
|
|
833
858
|
event_study_effects: Dict[int, Dict[str, Any]] = {}
|
|
834
859
|
cohort_weights: Dict[int, Dict[Any, float]] = {}
|
|
835
860
|
|
|
836
|
-
#
|
|
837
|
-
|
|
838
|
-
cohort_sizes = unit_cohorts[unit_cohorts > 0].value_counts().to_dict()
|
|
861
|
+
# Pre-compute per-event-time observation counts: n_{g,e}
|
|
862
|
+
event_time_counts = df[df[first_treat] > 0].groupby([first_treat, "_rel_time"]).size()
|
|
839
863
|
|
|
840
864
|
for e in rel_periods:
|
|
841
865
|
# Get cohorts that have observations at this relative time
|
|
@@ -847,13 +871,13 @@ class SunAbraham:
|
|
|
847
871
|
if not cohorts_at_e:
|
|
848
872
|
continue
|
|
849
873
|
|
|
850
|
-
# Compute IW weights:
|
|
874
|
+
# Compute IW weights: n_{g,e} / Σ_g n_{g,e}
|
|
851
875
|
weights = {}
|
|
852
876
|
total_size = 0
|
|
853
877
|
for g in cohorts_at_e:
|
|
854
|
-
|
|
855
|
-
weights[g] =
|
|
856
|
-
total_size +=
|
|
878
|
+
n_g_e = event_time_counts.get((g, e), 0)
|
|
879
|
+
weights[g] = n_g_e
|
|
880
|
+
total_size += n_g_e
|
|
857
881
|
|
|
858
882
|
if total_size == 0:
|
|
859
883
|
continue
|
|
@@ -915,7 +939,7 @@ class SunAbraham:
|
|
|
915
939
|
]
|
|
916
940
|
|
|
917
941
|
if not post_effects:
|
|
918
|
-
return
|
|
942
|
+
return np.nan, np.nan
|
|
919
943
|
|
|
920
944
|
# Weight by number of treated observations at each relative time
|
|
921
945
|
post_weights = []
|
|
@@ -948,7 +972,13 @@ class SunAbraham:
|
|
|
948
972
|
overall_weights_by_coef[key] += period_weight * cw
|
|
949
973
|
|
|
950
974
|
if not overall_weights_by_coef:
|
|
951
|
-
# Fallback to
|
|
975
|
+
# Fallback to simplified variance that ignores covariances between periods
|
|
976
|
+
warnings.warn(
|
|
977
|
+
"Could not construct full weight vector for overall ATT SE. "
|
|
978
|
+
"Using simplified variance that ignores covariances between periods.",
|
|
979
|
+
UserWarning,
|
|
980
|
+
stacklevel=2,
|
|
981
|
+
)
|
|
952
982
|
overall_var = float(
|
|
953
983
|
np.sum((post_weights ** 2) * np.array([eff["se"] ** 2 for _, eff in post_effects]))
|
|
954
984
|
)
|
|
@@ -1029,6 +1059,7 @@ class SunAbraham:
|
|
|
1029
1059
|
df_b[time] - df_b[first_treat],
|
|
1030
1060
|
np.nan
|
|
1031
1061
|
)
|
|
1062
|
+
# np.inf was normalized to 0 in fit(), so the np.inf check is defensive only
|
|
1032
1063
|
df_b["_never_treated"] = (
|
|
1033
1064
|
(df_b[first_treat] == 0) | (df_b[first_treat] == np.inf)
|
|
1034
1065
|
)
|
|
@@ -1113,11 +1144,16 @@ class SunAbraham:
|
|
|
1113
1144
|
event_study_p_values[e] = p_value
|
|
1114
1145
|
|
|
1115
1146
|
# Overall ATT statistics
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1147
|
+
if not np.isfinite(original_overall_att):
|
|
1148
|
+
overall_se = np.nan
|
|
1149
|
+
overall_ci = (np.nan, np.nan)
|
|
1150
|
+
overall_p = np.nan
|
|
1151
|
+
else:
|
|
1152
|
+
overall_se = float(np.std(bootstrap_overall, ddof=1))
|
|
1153
|
+
overall_ci = self._compute_percentile_ci(bootstrap_overall, self.alpha)
|
|
1154
|
+
overall_p = self._compute_bootstrap_pvalue(
|
|
1155
|
+
original_overall_att, bootstrap_overall
|
|
1156
|
+
)
|
|
1121
1157
|
|
|
1122
1158
|
return SABootstrapResults(
|
|
1123
1159
|
n_bootstrap=self.n_bootstrap,
|