diff-diff 3.0.1__cp314-cp314-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diff_diff/__init__.py +382 -0
- diff_diff/_backend.py +134 -0
- diff_diff/_rust_backend.cp314-win_amd64.pyd +0 -0
- diff_diff/bacon.py +1140 -0
- diff_diff/bootstrap_utils.py +730 -0
- diff_diff/continuous_did.py +1626 -0
- diff_diff/continuous_did_bspline.py +190 -0
- diff_diff/continuous_did_results.py +374 -0
- diff_diff/datasets.py +815 -0
- diff_diff/diagnostics.py +882 -0
- diff_diff/efficient_did.py +1770 -0
- diff_diff/efficient_did_bootstrap.py +359 -0
- diff_diff/efficient_did_covariates.py +899 -0
- diff_diff/efficient_did_results.py +368 -0
- diff_diff/efficient_did_weights.py +617 -0
- diff_diff/estimators.py +1501 -0
- diff_diff/honest_did.py +2585 -0
- diff_diff/imputation.py +2458 -0
- diff_diff/imputation_bootstrap.py +418 -0
- diff_diff/imputation_results.py +448 -0
- diff_diff/linalg.py +2538 -0
- diff_diff/power.py +2588 -0
- diff_diff/practitioner.py +869 -0
- diff_diff/prep.py +1738 -0
- diff_diff/prep_dgp.py +1718 -0
- diff_diff/pretrends.py +1105 -0
- diff_diff/results.py +918 -0
- diff_diff/stacked_did.py +1049 -0
- diff_diff/stacked_did_results.py +339 -0
- diff_diff/staggered.py +3895 -0
- diff_diff/staggered_aggregation.py +864 -0
- diff_diff/staggered_bootstrap.py +752 -0
- diff_diff/staggered_results.py +416 -0
- diff_diff/staggered_triple_diff.py +1545 -0
- diff_diff/staggered_triple_diff_results.py +416 -0
- diff_diff/sun_abraham.py +1685 -0
- diff_diff/survey.py +1981 -0
- diff_diff/synthetic_did.py +1136 -0
- diff_diff/triple_diff.py +2047 -0
- diff_diff/trop.py +952 -0
- diff_diff/trop_global.py +1270 -0
- diff_diff/trop_local.py +1307 -0
- diff_diff/trop_results.py +356 -0
- diff_diff/twfe.py +542 -0
- diff_diff/two_stage.py +1952 -0
- diff_diff/two_stage_bootstrap.py +520 -0
- diff_diff/two_stage_results.py +400 -0
- diff_diff/utils.py +1902 -0
- diff_diff/visualization/__init__.py +61 -0
- diff_diff/visualization/_common.py +328 -0
- diff_diff/visualization/_continuous.py +274 -0
- diff_diff/visualization/_diagnostic.py +817 -0
- diff_diff/visualization/_event_study.py +1086 -0
- diff_diff/visualization/_power.py +661 -0
- diff_diff/visualization/_staggered.py +833 -0
- diff_diff/visualization/_synthetic.py +197 -0
- diff_diff/wooldridge.py +1285 -0
- diff_diff/wooldridge_results.py +349 -0
- diff_diff-3.0.1.dist-info/METADATA +2997 -0
- diff_diff-3.0.1.dist-info/RECORD +62 -0
- diff_diff-3.0.1.dist-info/WHEEL +4 -0
- diff_diff-3.0.1.dist-info/sboms/diff_diff_rust.cyclonedx.json +5843 -0
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Result containers for the Stacked DiD estimator.
|
|
3
|
+
|
|
4
|
+
This module contains StackedDiDResults dataclass for Wing, Freedman &
|
|
5
|
+
Hollingsworth (2024) stacked difference-in-differences estimation.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
import pandas as pd
|
|
13
|
+
|
|
14
|
+
from diff_diff.results import _format_survey_block, _get_significance_stars
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"StackedDiDResults",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class StackedDiDResults:
|
|
23
|
+
"""
|
|
24
|
+
Results from Stacked DiD estimation (Wing, Freedman & Hollingsworth 2024).
|
|
25
|
+
|
|
26
|
+
Attributes
|
|
27
|
+
----------
|
|
28
|
+
overall_att : float
|
|
29
|
+
Overall average treatment effect on the treated (average of
|
|
30
|
+
post-treatment event-study coefficients).
|
|
31
|
+
overall_se : float
|
|
32
|
+
Standard error of overall ATT (delta method on VCV).
|
|
33
|
+
overall_t_stat : float
|
|
34
|
+
T-statistic for overall ATT.
|
|
35
|
+
overall_p_value : float
|
|
36
|
+
P-value for overall ATT.
|
|
37
|
+
overall_conf_int : tuple
|
|
38
|
+
Confidence interval for overall ATT.
|
|
39
|
+
event_study_effects : dict, optional
|
|
40
|
+
Dictionary mapping event time h to effect dict with keys:
|
|
41
|
+
'effect', 'se', 't_stat', 'p_value', 'conf_int', 'n_obs'.
|
|
42
|
+
group_effects : dict, optional
|
|
43
|
+
Dictionary mapping cohort g to effect dict.
|
|
44
|
+
stacked_data : pd.DataFrame
|
|
45
|
+
Full stacked dataset with _sub_exp, _event_time, _D_sa,
|
|
46
|
+
_Q_weight columns. Accessible for custom analysis.
|
|
47
|
+
groups : list
|
|
48
|
+
Adoption events in the trimmed set (Omega_kappa).
|
|
49
|
+
trimmed_groups : list
|
|
50
|
+
Adoption events excluded by IC1/IC2.
|
|
51
|
+
time_periods : list
|
|
52
|
+
All time periods in the original data.
|
|
53
|
+
n_obs : int
|
|
54
|
+
Number of observations in the original data.
|
|
55
|
+
n_stacked_obs : int
|
|
56
|
+
Number of observations in the stacked dataset.
|
|
57
|
+
n_sub_experiments : int
|
|
58
|
+
Number of sub-experiments in the stack.
|
|
59
|
+
n_treated_units : int
|
|
60
|
+
Distinct treated units across trimmed set.
|
|
61
|
+
n_control_units : int
|
|
62
|
+
Distinct control units across trimmed set.
|
|
63
|
+
kappa_pre : int
|
|
64
|
+
Pre-treatment event-time window size.
|
|
65
|
+
kappa_post : int
|
|
66
|
+
Post-treatment event-time window size.
|
|
67
|
+
weighting : str
|
|
68
|
+
Weighting scheme used.
|
|
69
|
+
clean_control : str
|
|
70
|
+
Clean control definition used.
|
|
71
|
+
alpha : float
|
|
72
|
+
Significance level used.
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
overall_att: float
|
|
76
|
+
overall_se: float
|
|
77
|
+
overall_t_stat: float
|
|
78
|
+
overall_p_value: float
|
|
79
|
+
overall_conf_int: Tuple[float, float]
|
|
80
|
+
event_study_effects: Optional[Dict[int, Dict[str, Any]]]
|
|
81
|
+
group_effects: Optional[Dict[Any, Dict[str, Any]]]
|
|
82
|
+
stacked_data: pd.DataFrame = field(repr=False)
|
|
83
|
+
groups: List[Any] = field(default_factory=list)
|
|
84
|
+
trimmed_groups: List[Any] = field(default_factory=list)
|
|
85
|
+
time_periods: List[Any] = field(default_factory=list)
|
|
86
|
+
n_obs: int = 0
|
|
87
|
+
n_stacked_obs: int = 0
|
|
88
|
+
n_sub_experiments: int = 0
|
|
89
|
+
n_treated_units: int = 0
|
|
90
|
+
n_control_units: int = 0
|
|
91
|
+
kappa_pre: int = 1
|
|
92
|
+
kappa_post: int = 1
|
|
93
|
+
weighting: str = "aggregate"
|
|
94
|
+
clean_control: str = "not_yet_treated"
|
|
95
|
+
alpha: float = 0.05
|
|
96
|
+
# Survey design metadata (SurveyMetadata instance from diff_diff.survey)
|
|
97
|
+
survey_metadata: Optional[Any] = field(default=None)
|
|
98
|
+
|
|
99
|
+
def __repr__(self) -> str:
|
|
100
|
+
"""Concise string representation."""
|
|
101
|
+
sig = _get_significance_stars(self.overall_p_value)
|
|
102
|
+
return (
|
|
103
|
+
f"StackedDiDResults(ATT={self.overall_att:.4f}{sig}, "
|
|
104
|
+
f"SE={self.overall_se:.4f}, "
|
|
105
|
+
f"n_sub_exp={self.n_sub_experiments}, "
|
|
106
|
+
f"n_stacked_obs={self.n_stacked_obs})"
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
@property
|
|
110
|
+
def coef_var(self) -> float:
|
|
111
|
+
"""Coefficient of variation: SE / |overall ATT|. NaN when ATT is 0 or SE non-finite."""
|
|
112
|
+
if not (np.isfinite(self.overall_se) and self.overall_se >= 0):
|
|
113
|
+
return np.nan
|
|
114
|
+
if not np.isfinite(self.overall_att) or self.overall_att == 0:
|
|
115
|
+
return np.nan
|
|
116
|
+
return self.overall_se / abs(self.overall_att)
|
|
117
|
+
|
|
118
|
+
def summary(self, alpha: Optional[float] = None) -> str:
|
|
119
|
+
"""
|
|
120
|
+
Generate formatted summary of estimation results.
|
|
121
|
+
|
|
122
|
+
Parameters
|
|
123
|
+
----------
|
|
124
|
+
alpha : float, optional
|
|
125
|
+
Significance level. Defaults to alpha used in estimation.
|
|
126
|
+
|
|
127
|
+
Returns
|
|
128
|
+
-------
|
|
129
|
+
str
|
|
130
|
+
Formatted summary.
|
|
131
|
+
"""
|
|
132
|
+
alpha = alpha or self.alpha
|
|
133
|
+
conf_level = int((1 - alpha) * 100)
|
|
134
|
+
|
|
135
|
+
lines = [
|
|
136
|
+
"=" * 85,
|
|
137
|
+
"Stacked DiD Estimator Results (Wing, Freedman & Hollingsworth 2024)".center(85),
|
|
138
|
+
"=" * 85,
|
|
139
|
+
"",
|
|
140
|
+
f"{'Original observations:':<30} {self.n_obs:>10}",
|
|
141
|
+
f"{'Stacked observations:':<30} {self.n_stacked_obs:>10}",
|
|
142
|
+
f"{'Sub-experiments:':<30} {self.n_sub_experiments:>10}",
|
|
143
|
+
f"{'Treated units:':<30} {self.n_treated_units:>10}",
|
|
144
|
+
f"{'Control units:':<30} {self.n_control_units:>10}",
|
|
145
|
+
f"{'Treatment cohorts:':<30} {len(self.groups):>10}",
|
|
146
|
+
f"{'Trimmed cohorts:':<30} {len(self.trimmed_groups):>10}",
|
|
147
|
+
f"{'Event window:':<30} {'[' + str(-self.kappa_pre) + ', ' + str(self.kappa_post) + ']':>10}",
|
|
148
|
+
f"{'Weighting:':<30} {self.weighting:>10}",
|
|
149
|
+
f"{'Clean control:':<30} {self.clean_control:>10}",
|
|
150
|
+
"",
|
|
151
|
+
]
|
|
152
|
+
|
|
153
|
+
# Add survey design info
|
|
154
|
+
if self.survey_metadata is not None:
|
|
155
|
+
sm = self.survey_metadata
|
|
156
|
+
lines.extend(_format_survey_block(sm, 85))
|
|
157
|
+
|
|
158
|
+
# Overall ATT
|
|
159
|
+
lines.extend(
|
|
160
|
+
[
|
|
161
|
+
"-" * 85,
|
|
162
|
+
"Overall Average Treatment Effect on the Treated".center(85),
|
|
163
|
+
"-" * 85,
|
|
164
|
+
f"{'Parameter':<15} {'Estimate':>12} {'Std. Err.':>12} "
|
|
165
|
+
f"{'t-stat':>10} {'P>|t|':>10} {'Sig.':>6}",
|
|
166
|
+
"-" * 85,
|
|
167
|
+
]
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
t_str = (
|
|
171
|
+
f"{self.overall_t_stat:>10.3f}" if np.isfinite(self.overall_t_stat) else f"{'NaN':>10}"
|
|
172
|
+
)
|
|
173
|
+
p_str = (
|
|
174
|
+
f"{self.overall_p_value:>10.4f}"
|
|
175
|
+
if np.isfinite(self.overall_p_value)
|
|
176
|
+
else f"{'NaN':>10}"
|
|
177
|
+
)
|
|
178
|
+
sig = _get_significance_stars(self.overall_p_value)
|
|
179
|
+
|
|
180
|
+
lines.extend(
|
|
181
|
+
[
|
|
182
|
+
f"{'ATT':<15} {self.overall_att:>12.4f} {self.overall_se:>12.4f} "
|
|
183
|
+
f"{t_str} {p_str} {sig:>6}",
|
|
184
|
+
"-" * 85,
|
|
185
|
+
"",
|
|
186
|
+
f"{conf_level}% Confidence Interval: "
|
|
187
|
+
f"[{self.overall_conf_int[0]:.4f}, {self.overall_conf_int[1]:.4f}]",
|
|
188
|
+
]
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
cv = self.coef_var
|
|
192
|
+
if np.isfinite(cv):
|
|
193
|
+
lines.append(f"{'CV (SE/|ATT|):':<25} {cv:>10.4f}")
|
|
194
|
+
|
|
195
|
+
lines.append("")
|
|
196
|
+
|
|
197
|
+
# Event study effects
|
|
198
|
+
if self.event_study_effects:
|
|
199
|
+
lines.extend(
|
|
200
|
+
[
|
|
201
|
+
"-" * 85,
|
|
202
|
+
"Event Study (Dynamic) Effects".center(85),
|
|
203
|
+
"-" * 85,
|
|
204
|
+
f"{'Rel. Period':<15} {'Estimate':>12} {'Std. Err.':>12} "
|
|
205
|
+
f"{'t-stat':>10} {'P>|t|':>10} {'Sig.':>6}",
|
|
206
|
+
"-" * 85,
|
|
207
|
+
]
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
for h in sorted(self.event_study_effects.keys()):
|
|
211
|
+
eff = self.event_study_effects[h]
|
|
212
|
+
if eff.get("n_obs", 1) == 0:
|
|
213
|
+
# Reference period marker
|
|
214
|
+
lines.append(
|
|
215
|
+
f"[ref: {h}]" f"{'0.0000':>17} {'---':>12} {'---':>10} {'---':>10} {'':>6}"
|
|
216
|
+
)
|
|
217
|
+
elif np.isnan(eff["effect"]):
|
|
218
|
+
lines.append(f"{h:<15} {'NaN':>12} {'NaN':>12} {'NaN':>10} {'NaN':>10} {'':>6}")
|
|
219
|
+
else:
|
|
220
|
+
e_sig = _get_significance_stars(eff["p_value"])
|
|
221
|
+
e_t = (
|
|
222
|
+
f"{eff['t_stat']:>10.3f}" if np.isfinite(eff["t_stat"]) else f"{'NaN':>10}"
|
|
223
|
+
)
|
|
224
|
+
e_p = (
|
|
225
|
+
f"{eff['p_value']:>10.4f}"
|
|
226
|
+
if np.isfinite(eff["p_value"])
|
|
227
|
+
else f"{'NaN':>10}"
|
|
228
|
+
)
|
|
229
|
+
lines.append(
|
|
230
|
+
f"{h:<15} {eff['effect']:>12.4f} {eff['se']:>12.4f} "
|
|
231
|
+
f"{e_t} {e_p} {e_sig:>6}"
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
lines.extend(["-" * 85, ""])
|
|
235
|
+
|
|
236
|
+
# Group effects
|
|
237
|
+
if self.group_effects:
|
|
238
|
+
lines.extend(
|
|
239
|
+
[
|
|
240
|
+
"-" * 85,
|
|
241
|
+
"Group (Cohort) Effects".center(85),
|
|
242
|
+
"-" * 85,
|
|
243
|
+
f"{'Cohort':<15} {'Estimate':>12} {'Std. Err.':>12} "
|
|
244
|
+
f"{'t-stat':>10} {'P>|t|':>10} {'Sig.':>6}",
|
|
245
|
+
"-" * 85,
|
|
246
|
+
]
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
for g in sorted(self.group_effects.keys()):
|
|
250
|
+
eff = self.group_effects[g]
|
|
251
|
+
if np.isnan(eff["effect"]):
|
|
252
|
+
lines.append(f"{g:<15} {'NaN':>12} {'NaN':>12} {'NaN':>10} {'NaN':>10} {'':>6}")
|
|
253
|
+
else:
|
|
254
|
+
g_sig = _get_significance_stars(eff["p_value"])
|
|
255
|
+
g_t = (
|
|
256
|
+
f"{eff['t_stat']:>10.3f}" if np.isfinite(eff["t_stat"]) else f"{'NaN':>10}"
|
|
257
|
+
)
|
|
258
|
+
g_p = (
|
|
259
|
+
f"{eff['p_value']:>10.4f}"
|
|
260
|
+
if np.isfinite(eff["p_value"])
|
|
261
|
+
else f"{'NaN':>10}"
|
|
262
|
+
)
|
|
263
|
+
lines.append(
|
|
264
|
+
f"{g:<15} {eff['effect']:>12.4f} {eff['se']:>12.4f} "
|
|
265
|
+
f"{g_t} {g_p} {g_sig:>6}"
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
lines.extend(["-" * 85, ""])
|
|
269
|
+
|
|
270
|
+
lines.extend(
|
|
271
|
+
[
|
|
272
|
+
"Signif. codes: '***' 0.001, '**' 0.01, '*' 0.05, '.' 0.1",
|
|
273
|
+
"=" * 85,
|
|
274
|
+
]
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
return "\n".join(lines)
|
|
278
|
+
|
|
279
|
+
def print_summary(self, alpha: Optional[float] = None) -> None:
|
|
280
|
+
"""Print summary to stdout."""
|
|
281
|
+
print(self.summary(alpha))
|
|
282
|
+
|
|
283
|
+
def to_dataframe(self, level: str = "event_study") -> pd.DataFrame:
|
|
284
|
+
"""
|
|
285
|
+
Convert results to DataFrame.
|
|
286
|
+
|
|
287
|
+
Parameters
|
|
288
|
+
----------
|
|
289
|
+
level : str, default="event_study"
|
|
290
|
+
Level of aggregation:
|
|
291
|
+
- "event_study": Event study effects by relative time
|
|
292
|
+
- "group": Group (cohort) effects
|
|
293
|
+
|
|
294
|
+
Returns
|
|
295
|
+
-------
|
|
296
|
+
pd.DataFrame
|
|
297
|
+
Results as DataFrame.
|
|
298
|
+
"""
|
|
299
|
+
if level == "event_study":
|
|
300
|
+
if self.event_study_effects is None:
|
|
301
|
+
raise ValueError(
|
|
302
|
+
"Event study effects not computed. " "Use aggregate='event_study'."
|
|
303
|
+
)
|
|
304
|
+
rows = []
|
|
305
|
+
for h, data in sorted(self.event_study_effects.items()):
|
|
306
|
+
rows.append(
|
|
307
|
+
{
|
|
308
|
+
"relative_period": h,
|
|
309
|
+
"effect": data["effect"],
|
|
310
|
+
"se": data["se"],
|
|
311
|
+
"t_stat": data["t_stat"],
|
|
312
|
+
"p_value": data["p_value"],
|
|
313
|
+
"conf_int_lower": data["conf_int"][0],
|
|
314
|
+
"conf_int_upper": data["conf_int"][1],
|
|
315
|
+
"n_obs": data.get("n_obs", np.nan),
|
|
316
|
+
}
|
|
317
|
+
)
|
|
318
|
+
return pd.DataFrame(rows)
|
|
319
|
+
|
|
320
|
+
elif level == "group":
|
|
321
|
+
raise ValueError(
|
|
322
|
+
"Group aggregation is not supported by StackedDiD. "
|
|
323
|
+
"The pooled stacked regression cannot produce cohort-specific "
|
|
324
|
+
"effects. Use CallawaySantAnna or ImputationDiD for "
|
|
325
|
+
"cohort-level estimates."
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
else:
|
|
329
|
+
raise ValueError(f"Unknown level: {level}. Use 'event_study' or 'group'.")
|
|
330
|
+
|
|
331
|
+
@property
|
|
332
|
+
def is_significant(self) -> bool:
|
|
333
|
+
"""Check if overall ATT is significant."""
|
|
334
|
+
return bool(self.overall_p_value < self.alpha)
|
|
335
|
+
|
|
336
|
+
@property
|
|
337
|
+
def significance_stars(self) -> str:
|
|
338
|
+
"""Significance stars for overall ATT."""
|
|
339
|
+
return _get_significance_stars(self.overall_p_value)
|