diff-diff 2.1.0__cp39-cp39-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diff_diff/__init__.py +234 -0
- diff_diff/_backend.py +64 -0
- diff_diff/_rust_backend.cpython-39-darwin.so +0 -0
- diff_diff/bacon.py +979 -0
- diff_diff/datasets.py +708 -0
- diff_diff/diagnostics.py +927 -0
- diff_diff/estimators.py +1000 -0
- diff_diff/honest_did.py +1493 -0
- diff_diff/linalg.py +980 -0
- diff_diff/power.py +1350 -0
- diff_diff/prep.py +1338 -0
- diff_diff/pretrends.py +1067 -0
- diff_diff/results.py +703 -0
- diff_diff/staggered.py +2297 -0
- diff_diff/sun_abraham.py +1176 -0
- diff_diff/synthetic_did.py +738 -0
- diff_diff/triple_diff.py +1291 -0
- diff_diff/trop.py +1348 -0
- diff_diff/twfe.py +344 -0
- diff_diff/utils.py +1481 -0
- diff_diff/visualization.py +1627 -0
- diff_diff-2.1.0.dist-info/METADATA +2511 -0
- diff_diff-2.1.0.dist-info/RECORD +24 -0
- diff_diff-2.1.0.dist-info/WHEEL +4 -0
diff_diff/diagnostics.py
ADDED
|
@@ -0,0 +1,927 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Diagnostic tools for validating Difference-in-Differences assumptions.
|
|
3
|
+
|
|
4
|
+
This module provides placebo tests and other diagnostic tools for assessing
|
|
5
|
+
the validity of the parallel trends assumption in DiD designs.
|
|
6
|
+
|
|
7
|
+
References
|
|
8
|
+
----------
|
|
9
|
+
Bertrand, M., Duflo, E., & Mullainathan, S. (2004). How Much Should We Trust
|
|
10
|
+
Differences-in-Differences Estimates? The Quarterly Journal of Economics,
|
|
11
|
+
119(1), 249-275.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from dataclasses import dataclass, field
|
|
15
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
import pandas as pd
|
|
19
|
+
|
|
20
|
+
from diff_diff.estimators import DifferenceInDifferences
|
|
21
|
+
from diff_diff.results import _get_significance_stars
|
|
22
|
+
from diff_diff.utils import compute_confidence_interval, compute_p_value
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class PlaceboTestResults:
|
|
27
|
+
"""
|
|
28
|
+
Results from a placebo test for DiD assumption validation.
|
|
29
|
+
|
|
30
|
+
Attributes
|
|
31
|
+
----------
|
|
32
|
+
test_type : str
|
|
33
|
+
Type of placebo test performed.
|
|
34
|
+
placebo_effect : float
|
|
35
|
+
Estimated placebo treatment effect.
|
|
36
|
+
se : float
|
|
37
|
+
Standard error of the placebo effect.
|
|
38
|
+
t_stat : float
|
|
39
|
+
T-statistic for the placebo effect.
|
|
40
|
+
p_value : float
|
|
41
|
+
P-value for testing placebo_effect = 0.
|
|
42
|
+
conf_int : tuple
|
|
43
|
+
Confidence interval for the placebo effect.
|
|
44
|
+
n_obs : int
|
|
45
|
+
Number of observations used in the test.
|
|
46
|
+
is_significant : bool
|
|
47
|
+
Whether the placebo effect is significant at alpha=0.05.
|
|
48
|
+
original_effect : float, optional
|
|
49
|
+
Original ATT estimate for comparison.
|
|
50
|
+
original_se : float, optional
|
|
51
|
+
Original SE for comparison.
|
|
52
|
+
permutation_distribution : np.ndarray, optional
|
|
53
|
+
Distribution of permuted effects (for permutation test).
|
|
54
|
+
leave_one_out_effects : dict, optional
|
|
55
|
+
Unit-specific effects (for leave-one-out test).
|
|
56
|
+
fake_period : any, optional
|
|
57
|
+
The fake treatment period used (for timing test).
|
|
58
|
+
fake_group : list, optional
|
|
59
|
+
The fake treatment group used (for group test).
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
test_type: str
|
|
63
|
+
placebo_effect: float
|
|
64
|
+
se: float
|
|
65
|
+
t_stat: float
|
|
66
|
+
p_value: float
|
|
67
|
+
conf_int: Tuple[float, float]
|
|
68
|
+
n_obs: int
|
|
69
|
+
is_significant: bool
|
|
70
|
+
alpha: float = 0.05
|
|
71
|
+
|
|
72
|
+
# Optional fields for specific test types
|
|
73
|
+
original_effect: Optional[float] = None
|
|
74
|
+
original_se: Optional[float] = None
|
|
75
|
+
permutation_distribution: Optional[np.ndarray] = field(default=None, repr=False)
|
|
76
|
+
leave_one_out_effects: Optional[Dict[Any, float]] = field(default=None)
|
|
77
|
+
fake_period: Optional[Any] = None
|
|
78
|
+
fake_group: Optional[List[Any]] = field(default=None)
|
|
79
|
+
n_permutations: Optional[int] = None
|
|
80
|
+
|
|
81
|
+
@property
|
|
82
|
+
def significance_stars(self) -> str:
|
|
83
|
+
"""Return significance stars based on p-value."""
|
|
84
|
+
return _get_significance_stars(self.p_value)
|
|
85
|
+
|
|
86
|
+
def summary(self) -> str:
|
|
87
|
+
"""Generate formatted summary of placebo test results."""
|
|
88
|
+
conf_level = int((1 - self.alpha) * 100)
|
|
89
|
+
|
|
90
|
+
lines = [
|
|
91
|
+
"=" * 65,
|
|
92
|
+
f"Placebo Test Results: {self.test_type}".center(65),
|
|
93
|
+
"=" * 65,
|
|
94
|
+
"",
|
|
95
|
+
f"{'Placebo effect:':<25} {self.placebo_effect:>12.4f}",
|
|
96
|
+
f"{'Standard error:':<25} {self.se:>12.4f}",
|
|
97
|
+
f"{'T-statistic:':<25} {self.t_stat:>12.4f}",
|
|
98
|
+
f"{'P-value:':<25} {self.p_value:>12.4f}",
|
|
99
|
+
f"{conf_level}% CI: [{self.conf_int[0]:.4f}, {self.conf_int[1]:.4f}]",
|
|
100
|
+
"",
|
|
101
|
+
f"{'Observations:':<25} {self.n_obs:>12}",
|
|
102
|
+
]
|
|
103
|
+
|
|
104
|
+
if self.original_effect is not None:
|
|
105
|
+
lines.extend([
|
|
106
|
+
"",
|
|
107
|
+
"-" * 65,
|
|
108
|
+
"Comparison with Original Estimate".center(65),
|
|
109
|
+
"-" * 65,
|
|
110
|
+
f"{'Original ATT:':<25} {self.original_effect:>12.4f}",
|
|
111
|
+
])
|
|
112
|
+
if self.original_se is not None:
|
|
113
|
+
lines.append(f"{'Original SE:':<25} {self.original_se:>12.4f}")
|
|
114
|
+
|
|
115
|
+
if self.n_permutations is not None:
|
|
116
|
+
lines.append(f"{'Number of permutations:':<25} {self.n_permutations:>12}")
|
|
117
|
+
|
|
118
|
+
if self.fake_period is not None:
|
|
119
|
+
lines.append(f"{'Fake treatment period:':<25} {str(self.fake_period):>12}")
|
|
120
|
+
|
|
121
|
+
if self.leave_one_out_effects is not None:
|
|
122
|
+
n_units = len(self.leave_one_out_effects)
|
|
123
|
+
effects = list(self.leave_one_out_effects.values())
|
|
124
|
+
lines.extend([
|
|
125
|
+
"",
|
|
126
|
+
"-" * 65,
|
|
127
|
+
"Leave-One-Out Summary".center(65),
|
|
128
|
+
"-" * 65,
|
|
129
|
+
f"{'Units analyzed:':<25} {n_units:>12}",
|
|
130
|
+
f"{'Mean effect:':<25} {np.mean(effects):>12.4f}",
|
|
131
|
+
f"{'Std. dev.:':<25} {np.std(effects, ddof=1):>12.4f}",
|
|
132
|
+
f"{'Min effect:':<25} {np.min(effects):>12.4f}",
|
|
133
|
+
f"{'Max effect:':<25} {np.max(effects):>12.4f}",
|
|
134
|
+
])
|
|
135
|
+
|
|
136
|
+
# Interpretation
|
|
137
|
+
lines.extend([
|
|
138
|
+
"",
|
|
139
|
+
"-" * 65,
|
|
140
|
+
"Interpretation".center(65),
|
|
141
|
+
"-" * 65,
|
|
142
|
+
])
|
|
143
|
+
|
|
144
|
+
if self.is_significant:
|
|
145
|
+
lines.append(
|
|
146
|
+
"WARNING: Significant placebo effect detected (p < 0.05)."
|
|
147
|
+
)
|
|
148
|
+
lines.append(
|
|
149
|
+
"This suggests potential violations of the parallel trends assumption."
|
|
150
|
+
)
|
|
151
|
+
else:
|
|
152
|
+
lines.append(
|
|
153
|
+
"No significant placebo effect detected (p >= 0.05)."
|
|
154
|
+
)
|
|
155
|
+
lines.append(
|
|
156
|
+
"This is consistent with the parallel trends assumption."
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
lines.append("=" * 65)
|
|
160
|
+
|
|
161
|
+
return "\n".join(lines)
|
|
162
|
+
|
|
163
|
+
def print_summary(self) -> None:
|
|
164
|
+
"""Print summary to stdout."""
|
|
165
|
+
print(self.summary())
|
|
166
|
+
|
|
167
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
168
|
+
"""Convert results to a dictionary."""
|
|
169
|
+
result = {
|
|
170
|
+
"test_type": self.test_type,
|
|
171
|
+
"placebo_effect": self.placebo_effect,
|
|
172
|
+
"se": self.se,
|
|
173
|
+
"t_stat": self.t_stat,
|
|
174
|
+
"p_value": self.p_value,
|
|
175
|
+
"conf_int_lower": self.conf_int[0],
|
|
176
|
+
"conf_int_upper": self.conf_int[1],
|
|
177
|
+
"n_obs": self.n_obs,
|
|
178
|
+
"is_significant": self.is_significant,
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
if self.original_effect is not None:
|
|
182
|
+
result["original_effect"] = self.original_effect
|
|
183
|
+
if self.original_se is not None:
|
|
184
|
+
result["original_se"] = self.original_se
|
|
185
|
+
if self.n_permutations is not None:
|
|
186
|
+
result["n_permutations"] = self.n_permutations
|
|
187
|
+
|
|
188
|
+
return result
|
|
189
|
+
|
|
190
|
+
def to_dataframe(self) -> pd.DataFrame:
|
|
191
|
+
"""Convert results to a DataFrame."""
|
|
192
|
+
return pd.DataFrame([self.to_dict()])
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def run_placebo_test(
|
|
196
|
+
data: pd.DataFrame,
|
|
197
|
+
outcome: str,
|
|
198
|
+
treatment: str,
|
|
199
|
+
time: str,
|
|
200
|
+
unit: Optional[str] = None,
|
|
201
|
+
test_type: str = "fake_timing",
|
|
202
|
+
fake_treatment_period: Optional[Any] = None,
|
|
203
|
+
fake_treatment_group: Optional[List[Any]] = None,
|
|
204
|
+
post_periods: Optional[List[Any]] = None,
|
|
205
|
+
n_permutations: int = 1000,
|
|
206
|
+
alpha: float = 0.05,
|
|
207
|
+
seed: Optional[int] = None,
|
|
208
|
+
**estimator_kwargs
|
|
209
|
+
) -> PlaceboTestResults:
|
|
210
|
+
"""
|
|
211
|
+
Run a placebo test to validate DiD assumptions.
|
|
212
|
+
|
|
213
|
+
Placebo tests provide evidence on the validity of the parallel trends
|
|
214
|
+
assumption by testing whether "fake" treatments produce significant effects.
|
|
215
|
+
A significant placebo effect suggests the parallel trends assumption may
|
|
216
|
+
be violated.
|
|
217
|
+
|
|
218
|
+
Parameters
|
|
219
|
+
----------
|
|
220
|
+
data : pd.DataFrame
|
|
221
|
+
Panel data for DiD analysis.
|
|
222
|
+
outcome : str
|
|
223
|
+
Name of outcome variable column.
|
|
224
|
+
treatment : str
|
|
225
|
+
Name of treatment indicator column (0/1).
|
|
226
|
+
time : str
|
|
227
|
+
Name of time period column.
|
|
228
|
+
unit : str, optional
|
|
229
|
+
Name of unit identifier column. Required for some test types.
|
|
230
|
+
test_type : str, default="fake_timing"
|
|
231
|
+
Type of placebo test:
|
|
232
|
+
- "fake_timing": Assign treatment at a fake (earlier) time period
|
|
233
|
+
- "fake_group": Run DiD designating some control units as "fake treated"
|
|
234
|
+
- "permutation": Randomly reassign treatment and compute distribution
|
|
235
|
+
- "leave_one_out": Drop each treated unit and re-estimate
|
|
236
|
+
fake_treatment_period : any, optional
|
|
237
|
+
For "fake_timing": The fake treatment period to test.
|
|
238
|
+
Should be a pre-treatment period.
|
|
239
|
+
fake_treatment_group : list, optional
|
|
240
|
+
For "fake_group": List of control unit IDs to designate as fake treated.
|
|
241
|
+
post_periods : list, optional
|
|
242
|
+
List of post-treatment periods. Required for fake_timing test.
|
|
243
|
+
n_permutations : int, default=1000
|
|
244
|
+
For "permutation": Number of random treatment assignments.
|
|
245
|
+
alpha : float, default=0.05
|
|
246
|
+
Significance level.
|
|
247
|
+
seed : int, optional
|
|
248
|
+
Random seed for reproducibility.
|
|
249
|
+
**estimator_kwargs
|
|
250
|
+
Additional arguments passed to the DiD estimator.
|
|
251
|
+
|
|
252
|
+
Returns
|
|
253
|
+
-------
|
|
254
|
+
PlaceboTestResults
|
|
255
|
+
Object containing placebo effect estimates, p-values, and diagnostics.
|
|
256
|
+
|
|
257
|
+
Examples
|
|
258
|
+
--------
|
|
259
|
+
Fake timing test:
|
|
260
|
+
|
|
261
|
+
>>> results = run_placebo_test(
|
|
262
|
+
... data, outcome='sales', treatment='treated', time='period',
|
|
263
|
+
... test_type='fake_timing',
|
|
264
|
+
... fake_treatment_period=1, # Pre-treatment period
|
|
265
|
+
... post_periods=[2, 3, 4]
|
|
266
|
+
... )
|
|
267
|
+
>>> if results.is_significant:
|
|
268
|
+
... print("Warning: Pre-treatment differential trends detected!")
|
|
269
|
+
|
|
270
|
+
Permutation test:
|
|
271
|
+
|
|
272
|
+
>>> results = run_placebo_test(
|
|
273
|
+
... data, outcome='sales', treatment='treated', time='period',
|
|
274
|
+
... unit='unit_id',
|
|
275
|
+
... test_type='permutation',
|
|
276
|
+
... n_permutations=1000,
|
|
277
|
+
... seed=42
|
|
278
|
+
... )
|
|
279
|
+
>>> print(f"Permutation p-value: {results.p_value:.4f}")
|
|
280
|
+
|
|
281
|
+
References
|
|
282
|
+
----------
|
|
283
|
+
Bertrand, M., Duflo, E., & Mullainathan, S. (2004). How Much Should
|
|
284
|
+
We Trust Differences-in-Differences Estimates? The Quarterly Journal
|
|
285
|
+
of Economics, 119(1), 249-275.
|
|
286
|
+
"""
|
|
287
|
+
test_type = test_type.lower()
|
|
288
|
+
valid_types = ["fake_timing", "fake_group", "permutation", "leave_one_out"]
|
|
289
|
+
|
|
290
|
+
if test_type not in valid_types:
|
|
291
|
+
raise ValueError(
|
|
292
|
+
f"test_type must be one of {valid_types}, got '{test_type}'"
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
if test_type == "fake_timing":
|
|
296
|
+
return placebo_timing_test(
|
|
297
|
+
data=data,
|
|
298
|
+
outcome=outcome,
|
|
299
|
+
treatment=treatment,
|
|
300
|
+
time=time,
|
|
301
|
+
fake_treatment_period=fake_treatment_period,
|
|
302
|
+
post_periods=post_periods,
|
|
303
|
+
alpha=alpha,
|
|
304
|
+
**estimator_kwargs
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
elif test_type == "fake_group":
|
|
308
|
+
if unit is None:
|
|
309
|
+
raise ValueError("unit is required for fake_group test")
|
|
310
|
+
if fake_treatment_group is None or len(fake_treatment_group) == 0:
|
|
311
|
+
raise ValueError("fake_treatment_group is required for fake_group test")
|
|
312
|
+
return placebo_group_test(
|
|
313
|
+
data=data,
|
|
314
|
+
outcome=outcome,
|
|
315
|
+
time=time,
|
|
316
|
+
unit=unit,
|
|
317
|
+
fake_treated_units=fake_treatment_group,
|
|
318
|
+
post_periods=post_periods,
|
|
319
|
+
alpha=alpha,
|
|
320
|
+
**estimator_kwargs
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
elif test_type == "permutation":
|
|
324
|
+
if unit is None:
|
|
325
|
+
raise ValueError("unit is required for permutation test")
|
|
326
|
+
return permutation_test(
|
|
327
|
+
data=data,
|
|
328
|
+
outcome=outcome,
|
|
329
|
+
treatment=treatment,
|
|
330
|
+
time=time,
|
|
331
|
+
unit=unit,
|
|
332
|
+
n_permutations=n_permutations,
|
|
333
|
+
alpha=alpha,
|
|
334
|
+
seed=seed,
|
|
335
|
+
**estimator_kwargs
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
elif test_type == "leave_one_out":
|
|
339
|
+
if unit is None:
|
|
340
|
+
raise ValueError("unit is required for leave_one_out test")
|
|
341
|
+
return leave_one_out_test(
|
|
342
|
+
data=data,
|
|
343
|
+
outcome=outcome,
|
|
344
|
+
treatment=treatment,
|
|
345
|
+
time=time,
|
|
346
|
+
unit=unit,
|
|
347
|
+
alpha=alpha,
|
|
348
|
+
**estimator_kwargs
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
# This should never be reached due to validation above
|
|
352
|
+
raise ValueError(f"Unknown test type: {test_type}")
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
def placebo_timing_test(
|
|
356
|
+
data: pd.DataFrame,
|
|
357
|
+
outcome: str,
|
|
358
|
+
treatment: str,
|
|
359
|
+
time: str,
|
|
360
|
+
fake_treatment_period: Any,
|
|
361
|
+
post_periods: Optional[List[Any]] = None,
|
|
362
|
+
alpha: float = 0.05,
|
|
363
|
+
**estimator_kwargs
|
|
364
|
+
) -> PlaceboTestResults:
|
|
365
|
+
"""
|
|
366
|
+
Test for pre-treatment effects by moving treatment timing earlier.
|
|
367
|
+
|
|
368
|
+
Creates a fake "post" indicator using pre-treatment data only, then
|
|
369
|
+
estimates a DiD model. A significant effect suggests pre-existing
|
|
370
|
+
differential trends.
|
|
371
|
+
|
|
372
|
+
Parameters
|
|
373
|
+
----------
|
|
374
|
+
data : pd.DataFrame
|
|
375
|
+
Panel data.
|
|
376
|
+
outcome : str
|
|
377
|
+
Outcome variable column.
|
|
378
|
+
treatment : str
|
|
379
|
+
Treatment indicator column.
|
|
380
|
+
time : str
|
|
381
|
+
Time period column.
|
|
382
|
+
fake_treatment_period : any
|
|
383
|
+
Period to use as fake treatment timing (should be a pre-treatment period).
|
|
384
|
+
post_periods : list, optional
|
|
385
|
+
List of actual post-treatment periods. If None, infers from data.
|
|
386
|
+
alpha : float, default=0.05
|
|
387
|
+
Significance level.
|
|
388
|
+
**estimator_kwargs
|
|
389
|
+
Arguments passed to DifferenceInDifferences.
|
|
390
|
+
|
|
391
|
+
Returns
|
|
392
|
+
-------
|
|
393
|
+
PlaceboTestResults
|
|
394
|
+
Results of the fake timing placebo test.
|
|
395
|
+
"""
|
|
396
|
+
all_periods = sorted(data[time].unique())
|
|
397
|
+
|
|
398
|
+
# Infer post periods if not provided
|
|
399
|
+
if post_periods is None:
|
|
400
|
+
# Use second half of periods as post
|
|
401
|
+
mid = len(all_periods) // 2
|
|
402
|
+
post_periods = all_periods[mid:]
|
|
403
|
+
|
|
404
|
+
# Validate fake_treatment_period is pre-treatment
|
|
405
|
+
if fake_treatment_period in post_periods:
|
|
406
|
+
raise ValueError(
|
|
407
|
+
f"fake_treatment_period ({fake_treatment_period}) must be a "
|
|
408
|
+
f"pre-treatment period, not in post_periods ({post_periods})"
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
# Use only pre-treatment data
|
|
412
|
+
pre_periods = [p for p in all_periods if p not in post_periods]
|
|
413
|
+
pre_data = data[data[time].isin(pre_periods)].copy()
|
|
414
|
+
|
|
415
|
+
# Create fake post indicator
|
|
416
|
+
pre_data["_fake_post"] = (pre_data[time] >= fake_treatment_period).astype(int)
|
|
417
|
+
|
|
418
|
+
# Fit DiD on pre-treatment data with fake post
|
|
419
|
+
did = DifferenceInDifferences(**estimator_kwargs)
|
|
420
|
+
results = did.fit(
|
|
421
|
+
pre_data,
|
|
422
|
+
outcome=outcome,
|
|
423
|
+
treatment=treatment,
|
|
424
|
+
time="_fake_post"
|
|
425
|
+
)
|
|
426
|
+
|
|
427
|
+
# Also fit on full data for comparison
|
|
428
|
+
data_with_post = data.copy()
|
|
429
|
+
data_with_post["_post"] = data_with_post[time].isin(post_periods).astype(int)
|
|
430
|
+
did_full = DifferenceInDifferences(**estimator_kwargs)
|
|
431
|
+
results_full = did_full.fit(
|
|
432
|
+
data_with_post,
|
|
433
|
+
outcome=outcome,
|
|
434
|
+
treatment=treatment,
|
|
435
|
+
time="_post"
|
|
436
|
+
)
|
|
437
|
+
|
|
438
|
+
return PlaceboTestResults(
|
|
439
|
+
test_type="fake_timing",
|
|
440
|
+
placebo_effect=results.att,
|
|
441
|
+
se=results.se,
|
|
442
|
+
t_stat=results.t_stat,
|
|
443
|
+
p_value=results.p_value,
|
|
444
|
+
conf_int=results.conf_int,
|
|
445
|
+
n_obs=results.n_obs,
|
|
446
|
+
is_significant=bool(results.p_value < alpha),
|
|
447
|
+
alpha=alpha,
|
|
448
|
+
original_effect=results_full.att,
|
|
449
|
+
original_se=results_full.se,
|
|
450
|
+
fake_period=fake_treatment_period,
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
|
|
454
|
+
def placebo_group_test(
|
|
455
|
+
data: pd.DataFrame,
|
|
456
|
+
outcome: str,
|
|
457
|
+
time: str,
|
|
458
|
+
unit: str,
|
|
459
|
+
fake_treated_units: List[Any],
|
|
460
|
+
post_periods: Optional[List[Any]] = None,
|
|
461
|
+
alpha: float = 0.05,
|
|
462
|
+
**estimator_kwargs
|
|
463
|
+
) -> PlaceboTestResults:
|
|
464
|
+
"""
|
|
465
|
+
Test for differential trends among never-treated units.
|
|
466
|
+
|
|
467
|
+
Assigns some never-treated units as "fake treated" and estimates a
|
|
468
|
+
DiD model using only never-treated data. A significant effect suggests
|
|
469
|
+
heterogeneous trends in the control group.
|
|
470
|
+
|
|
471
|
+
Parameters
|
|
472
|
+
----------
|
|
473
|
+
data : pd.DataFrame
|
|
474
|
+
Panel data.
|
|
475
|
+
outcome : str
|
|
476
|
+
Outcome variable column.
|
|
477
|
+
time : str
|
|
478
|
+
Time period column.
|
|
479
|
+
unit : str
|
|
480
|
+
Unit identifier column.
|
|
481
|
+
fake_treated_units : list
|
|
482
|
+
List of control unit IDs to designate as "fake treated".
|
|
483
|
+
post_periods : list, optional
|
|
484
|
+
List of post-treatment period values.
|
|
485
|
+
alpha : float, default=0.05
|
|
486
|
+
Significance level.
|
|
487
|
+
**estimator_kwargs
|
|
488
|
+
Arguments passed to DifferenceInDifferences.
|
|
489
|
+
|
|
490
|
+
Returns
|
|
491
|
+
-------
|
|
492
|
+
PlaceboTestResults
|
|
493
|
+
Results of the fake group placebo test.
|
|
494
|
+
"""
|
|
495
|
+
if fake_treated_units is None or len(fake_treated_units) == 0:
|
|
496
|
+
raise ValueError("fake_treated_units must be a non-empty list")
|
|
497
|
+
|
|
498
|
+
all_periods = sorted(data[time].unique())
|
|
499
|
+
|
|
500
|
+
# Infer post periods if not provided
|
|
501
|
+
if post_periods is None:
|
|
502
|
+
mid = len(all_periods) // 2
|
|
503
|
+
post_periods = all_periods[mid:]
|
|
504
|
+
|
|
505
|
+
# Create fake treatment indicator
|
|
506
|
+
fake_data = data.copy()
|
|
507
|
+
fake_data["_fake_treated"] = fake_data[unit].isin(fake_treated_units).astype(int)
|
|
508
|
+
fake_data["_post"] = fake_data[time].isin(post_periods).astype(int)
|
|
509
|
+
|
|
510
|
+
# Fit DiD
|
|
511
|
+
did = DifferenceInDifferences(**estimator_kwargs)
|
|
512
|
+
results = did.fit(
|
|
513
|
+
fake_data,
|
|
514
|
+
outcome=outcome,
|
|
515
|
+
treatment="_fake_treated",
|
|
516
|
+
time="_post"
|
|
517
|
+
)
|
|
518
|
+
|
|
519
|
+
return PlaceboTestResults(
|
|
520
|
+
test_type="fake_group",
|
|
521
|
+
placebo_effect=results.att,
|
|
522
|
+
se=results.se,
|
|
523
|
+
t_stat=results.t_stat,
|
|
524
|
+
p_value=results.p_value,
|
|
525
|
+
conf_int=results.conf_int,
|
|
526
|
+
n_obs=results.n_obs,
|
|
527
|
+
is_significant=bool(results.p_value < alpha),
|
|
528
|
+
alpha=alpha,
|
|
529
|
+
fake_group=list(fake_treated_units),
|
|
530
|
+
)
|
|
531
|
+
|
|
532
|
+
|
|
533
|
+
def permutation_test(
|
|
534
|
+
data: pd.DataFrame,
|
|
535
|
+
outcome: str,
|
|
536
|
+
treatment: str,
|
|
537
|
+
time: str,
|
|
538
|
+
unit: str,
|
|
539
|
+
n_permutations: int = 1000,
|
|
540
|
+
alpha: float = 0.05,
|
|
541
|
+
seed: Optional[int] = None,
|
|
542
|
+
**estimator_kwargs
|
|
543
|
+
) -> PlaceboTestResults:
|
|
544
|
+
"""
|
|
545
|
+
Compute permutation-based p-value for DiD estimate.
|
|
546
|
+
|
|
547
|
+
Randomly reassigns treatment status at the unit level and computes the
|
|
548
|
+
DiD estimate for each permutation. The p-value is the proportion of
|
|
549
|
+
permuted estimates at least as extreme as the original.
|
|
550
|
+
|
|
551
|
+
Parameters
|
|
552
|
+
----------
|
|
553
|
+
data : pd.DataFrame
|
|
554
|
+
Panel data.
|
|
555
|
+
outcome : str
|
|
556
|
+
Outcome variable column.
|
|
557
|
+
treatment : str
|
|
558
|
+
Treatment indicator column.
|
|
559
|
+
time : str
|
|
560
|
+
Time period column.
|
|
561
|
+
unit : str
|
|
562
|
+
Unit identifier column.
|
|
563
|
+
n_permutations : int, default=1000
|
|
564
|
+
Number of random permutations.
|
|
565
|
+
alpha : float, default=0.05
|
|
566
|
+
Significance level.
|
|
567
|
+
seed : int, optional
|
|
568
|
+
Random seed for reproducibility.
|
|
569
|
+
**estimator_kwargs
|
|
570
|
+
Arguments passed to DifferenceInDifferences.
|
|
571
|
+
|
|
572
|
+
Returns
|
|
573
|
+
-------
|
|
574
|
+
PlaceboTestResults
|
|
575
|
+
Results with permutation distribution and p-value.
|
|
576
|
+
|
|
577
|
+
Notes
|
|
578
|
+
-----
|
|
579
|
+
The permutation test is exact and does not rely on asymptotic
|
|
580
|
+
approximations, making it valid with any sample size.
|
|
581
|
+
"""
|
|
582
|
+
rng = np.random.default_rng(seed)
|
|
583
|
+
|
|
584
|
+
# First, fit original model
|
|
585
|
+
did = DifferenceInDifferences(**estimator_kwargs)
|
|
586
|
+
original_results = did.fit(
|
|
587
|
+
data,
|
|
588
|
+
outcome=outcome,
|
|
589
|
+
treatment=treatment,
|
|
590
|
+
time=time
|
|
591
|
+
)
|
|
592
|
+
original_att = original_results.att
|
|
593
|
+
|
|
594
|
+
# Get unit-level treatment assignment
|
|
595
|
+
unit_treatment = (
|
|
596
|
+
data.groupby(unit)[treatment]
|
|
597
|
+
.first()
|
|
598
|
+
.reset_index()
|
|
599
|
+
)
|
|
600
|
+
units = unit_treatment[unit].values
|
|
601
|
+
n_treated = int(unit_treatment[treatment].sum())
|
|
602
|
+
|
|
603
|
+
# Permutation loop
|
|
604
|
+
permuted_effects = np.zeros(n_permutations)
|
|
605
|
+
|
|
606
|
+
for i in range(n_permutations):
|
|
607
|
+
# Randomly assign treatment to units
|
|
608
|
+
perm_treated_units = rng.choice(units, size=n_treated, replace=False)
|
|
609
|
+
|
|
610
|
+
# Create permuted data
|
|
611
|
+
perm_data = data.copy()
|
|
612
|
+
perm_data["_perm_treatment"] = perm_data[unit].isin(perm_treated_units).astype(int)
|
|
613
|
+
|
|
614
|
+
# Fit DiD
|
|
615
|
+
try:
|
|
616
|
+
perm_did = DifferenceInDifferences(**estimator_kwargs)
|
|
617
|
+
perm_results = perm_did.fit(
|
|
618
|
+
perm_data,
|
|
619
|
+
outcome=outcome,
|
|
620
|
+
treatment="_perm_treatment",
|
|
621
|
+
time=time
|
|
622
|
+
)
|
|
623
|
+
permuted_effects[i] = perm_results.att
|
|
624
|
+
except (ValueError, KeyError, np.linalg.LinAlgError):
|
|
625
|
+
# Handle edge cases where fitting fails
|
|
626
|
+
permuted_effects[i] = np.nan
|
|
627
|
+
|
|
628
|
+
# Remove any NaN values and track failure rate
|
|
629
|
+
valid_effects = permuted_effects[~np.isnan(permuted_effects)]
|
|
630
|
+
n_failed = n_permutations - len(valid_effects)
|
|
631
|
+
|
|
632
|
+
if len(valid_effects) == 0:
|
|
633
|
+
raise RuntimeError(
|
|
634
|
+
f"All {n_permutations} permutations failed. This typically occurs when:\n"
|
|
635
|
+
f" - Treatment/control groups are too small for valid permutation\n"
|
|
636
|
+
f" - Data contains collinearity or singular matrices after permutation\n"
|
|
637
|
+
f" - There are too few observations per time period\n"
|
|
638
|
+
f"Consider checking data quality with validate_did_data() from diff_diff.prep."
|
|
639
|
+
)
|
|
640
|
+
|
|
641
|
+
# Warn if significant number of permutations failed
|
|
642
|
+
if n_failed > 0:
|
|
643
|
+
failure_rate = n_failed / n_permutations
|
|
644
|
+
if failure_rate > 0.1:
|
|
645
|
+
import warnings
|
|
646
|
+
warnings.warn(
|
|
647
|
+
f"{n_failed}/{n_permutations} permutations failed ({failure_rate:.1%}). "
|
|
648
|
+
f"Results based on {len(valid_effects)} successful permutations.",
|
|
649
|
+
UserWarning,
|
|
650
|
+
stacklevel=2
|
|
651
|
+
)
|
|
652
|
+
|
|
653
|
+
# Compute p-value: proportion of |permuted| >= |original|
|
|
654
|
+
p_value = np.mean(np.abs(valid_effects) >= np.abs(original_att))
|
|
655
|
+
|
|
656
|
+
# Ensure p-value is at least 1/(n_permutations + 1)
|
|
657
|
+
p_value = max(p_value, 1 / (len(valid_effects) + 1))
|
|
658
|
+
|
|
659
|
+
# Compute SE and CI from permutation distribution
|
|
660
|
+
se = np.std(valid_effects, ddof=1)
|
|
661
|
+
ci_lower = np.percentile(valid_effects, alpha / 2 * 100)
|
|
662
|
+
ci_upper = np.percentile(valid_effects, (1 - alpha / 2) * 100)
|
|
663
|
+
|
|
664
|
+
# T-stat from original estimate
|
|
665
|
+
t_stat = original_att / se if se > 0 else 0.0
|
|
666
|
+
|
|
667
|
+
return PlaceboTestResults(
|
|
668
|
+
test_type="permutation",
|
|
669
|
+
placebo_effect=np.mean(valid_effects), # Mean of null distribution
|
|
670
|
+
se=se,
|
|
671
|
+
t_stat=t_stat,
|
|
672
|
+
p_value=p_value,
|
|
673
|
+
conf_int=(ci_lower, ci_upper),
|
|
674
|
+
n_obs=len(data),
|
|
675
|
+
is_significant=bool(p_value < alpha),
|
|
676
|
+
alpha=alpha,
|
|
677
|
+
original_effect=original_att,
|
|
678
|
+
original_se=original_results.se,
|
|
679
|
+
permutation_distribution=valid_effects,
|
|
680
|
+
n_permutations=len(valid_effects),
|
|
681
|
+
)
|
|
682
|
+
|
|
683
|
+
|
|
684
|
+
def leave_one_out_test(
|
|
685
|
+
data: pd.DataFrame,
|
|
686
|
+
outcome: str,
|
|
687
|
+
treatment: str,
|
|
688
|
+
time: str,
|
|
689
|
+
unit: str,
|
|
690
|
+
alpha: float = 0.05,
|
|
691
|
+
**estimator_kwargs
|
|
692
|
+
) -> PlaceboTestResults:
|
|
693
|
+
"""
|
|
694
|
+
Assess sensitivity by dropping each treated unit in turn.
|
|
695
|
+
|
|
696
|
+
For each treated unit, drops that unit and re-estimates the DiD model.
|
|
697
|
+
Large variation in estimates suggests results are driven by a single unit.
|
|
698
|
+
|
|
699
|
+
Parameters
|
|
700
|
+
----------
|
|
701
|
+
data : pd.DataFrame
|
|
702
|
+
Panel data.
|
|
703
|
+
outcome : str
|
|
704
|
+
Outcome variable column.
|
|
705
|
+
treatment : str
|
|
706
|
+
Treatment indicator column.
|
|
707
|
+
time : str
|
|
708
|
+
Time period column.
|
|
709
|
+
unit : str
|
|
710
|
+
Unit identifier column.
|
|
711
|
+
alpha : float, default=0.05
|
|
712
|
+
Significance level.
|
|
713
|
+
**estimator_kwargs
|
|
714
|
+
Arguments passed to DifferenceInDifferences.
|
|
715
|
+
|
|
716
|
+
Returns
|
|
717
|
+
-------
|
|
718
|
+
PlaceboTestResults
|
|
719
|
+
Results with leave_one_out_effects dict mapping unit -> ATT estimate.
|
|
720
|
+
"""
|
|
721
|
+
# Fit original model
|
|
722
|
+
did = DifferenceInDifferences(**estimator_kwargs)
|
|
723
|
+
original_results = did.fit(
|
|
724
|
+
data,
|
|
725
|
+
outcome=outcome,
|
|
726
|
+
treatment=treatment,
|
|
727
|
+
time=time
|
|
728
|
+
)
|
|
729
|
+
original_att = original_results.att
|
|
730
|
+
|
|
731
|
+
# Get treated units
|
|
732
|
+
treated_units = data[data[treatment] == 1][unit].unique()
|
|
733
|
+
|
|
734
|
+
# Leave-one-out loop
|
|
735
|
+
loo_effects = {}
|
|
736
|
+
|
|
737
|
+
for u in treated_units:
|
|
738
|
+
# Drop this unit
|
|
739
|
+
loo_data = data[data[unit] != u].copy()
|
|
740
|
+
|
|
741
|
+
# Check we still have treated units
|
|
742
|
+
if loo_data[treatment].sum() == 0:
|
|
743
|
+
continue
|
|
744
|
+
|
|
745
|
+
try:
|
|
746
|
+
loo_did = DifferenceInDifferences(**estimator_kwargs)
|
|
747
|
+
loo_results = loo_did.fit(
|
|
748
|
+
loo_data,
|
|
749
|
+
outcome=outcome,
|
|
750
|
+
treatment=treatment,
|
|
751
|
+
time=time
|
|
752
|
+
)
|
|
753
|
+
loo_effects[u] = loo_results.att
|
|
754
|
+
except (ValueError, KeyError, np.linalg.LinAlgError):
|
|
755
|
+
# Skip units that cause fitting issues
|
|
756
|
+
loo_effects[u] = np.nan
|
|
757
|
+
|
|
758
|
+
# Remove NaN values for statistics and track failures
|
|
759
|
+
valid_effects = [v for v in loo_effects.values() if not np.isnan(v)]
|
|
760
|
+
n_total = len(loo_effects)
|
|
761
|
+
n_failed = n_total - len(valid_effects)
|
|
762
|
+
|
|
763
|
+
if len(valid_effects) == 0:
|
|
764
|
+
raise RuntimeError(
|
|
765
|
+
f"All {n_total} leave-one-out estimates failed. This typically occurs when:\n"
|
|
766
|
+
f" - Removing any single treated unit causes model fitting to fail\n"
|
|
767
|
+
f" - Very few treated units (need at least 2 for LOO)\n"
|
|
768
|
+
f" - Data has collinearity issues that manifest when units are removed\n"
|
|
769
|
+
f"Consider checking data quality and ensuring sufficient treated units."
|
|
770
|
+
)
|
|
771
|
+
|
|
772
|
+
# Warn if significant number of LOO iterations failed
|
|
773
|
+
if n_failed > 0:
|
|
774
|
+
import warnings
|
|
775
|
+
failed_units = [u for u, v in loo_effects.items() if np.isnan(v)]
|
|
776
|
+
warnings.warn(
|
|
777
|
+
f"{n_failed}/{n_total} leave-one-out estimates failed for units: {failed_units}. "
|
|
778
|
+
f"Results based on {len(valid_effects)} successful iterations.",
|
|
779
|
+
UserWarning,
|
|
780
|
+
stacklevel=2
|
|
781
|
+
)
|
|
782
|
+
|
|
783
|
+
# Statistics of LOO distribution
|
|
784
|
+
mean_effect = np.mean(valid_effects)
|
|
785
|
+
se = np.std(valid_effects, ddof=1) if len(valid_effects) > 1 else 0.0
|
|
786
|
+
t_stat = mean_effect / se if se > 0 else 0.0
|
|
787
|
+
|
|
788
|
+
# Use t-distribution for p-value
|
|
789
|
+
df = len(valid_effects) - 1 if len(valid_effects) > 1 else 1
|
|
790
|
+
p_value = compute_p_value(t_stat, df=df)
|
|
791
|
+
|
|
792
|
+
# CI
|
|
793
|
+
conf_int = compute_confidence_interval(mean_effect, se, alpha, df=df)
|
|
794
|
+
|
|
795
|
+
return PlaceboTestResults(
|
|
796
|
+
test_type="leave_one_out",
|
|
797
|
+
placebo_effect=mean_effect,
|
|
798
|
+
se=se,
|
|
799
|
+
t_stat=t_stat,
|
|
800
|
+
p_value=p_value,
|
|
801
|
+
conf_int=conf_int,
|
|
802
|
+
n_obs=len(data),
|
|
803
|
+
is_significant=bool(p_value < alpha),
|
|
804
|
+
alpha=alpha,
|
|
805
|
+
original_effect=original_att,
|
|
806
|
+
original_se=original_results.se,
|
|
807
|
+
leave_one_out_effects=loo_effects,
|
|
808
|
+
)
|
|
809
|
+
|
|
810
|
+
|
|
811
|
+
def run_all_placebo_tests(
|
|
812
|
+
data: pd.DataFrame,
|
|
813
|
+
outcome: str,
|
|
814
|
+
treatment: str,
|
|
815
|
+
time: str,
|
|
816
|
+
unit: str,
|
|
817
|
+
pre_periods: List[Any],
|
|
818
|
+
post_periods: List[Any],
|
|
819
|
+
n_permutations: int = 500,
|
|
820
|
+
alpha: float = 0.05,
|
|
821
|
+
seed: Optional[int] = None,
|
|
822
|
+
**estimator_kwargs
|
|
823
|
+
) -> Dict[str, Union[PlaceboTestResults, Dict[str, str]]]:
|
|
824
|
+
"""
|
|
825
|
+
Run a comprehensive suite of placebo tests.
|
|
826
|
+
|
|
827
|
+
Runs fake timing tests for each pre-period, a permutation test, and
|
|
828
|
+
a leave-one-out sensitivity analysis. If a test fails, the result
|
|
829
|
+
will be a dict with an "error" key containing the error message.
|
|
830
|
+
|
|
831
|
+
Parameters
|
|
832
|
+
----------
|
|
833
|
+
data : pd.DataFrame
|
|
834
|
+
Panel data.
|
|
835
|
+
outcome : str
|
|
836
|
+
Outcome variable column.
|
|
837
|
+
treatment : str
|
|
838
|
+
Treatment indicator column.
|
|
839
|
+
time : str
|
|
840
|
+
Time period column.
|
|
841
|
+
unit : str
|
|
842
|
+
Unit identifier column.
|
|
843
|
+
pre_periods : list
|
|
844
|
+
List of pre-treatment periods.
|
|
845
|
+
post_periods : list
|
|
846
|
+
List of post-treatment periods.
|
|
847
|
+
n_permutations : int, default=500
|
|
848
|
+
Permutations for permutation test.
|
|
849
|
+
alpha : float, default=0.05
|
|
850
|
+
Significance level.
|
|
851
|
+
seed : int, optional
|
|
852
|
+
Random seed.
|
|
853
|
+
**estimator_kwargs
|
|
854
|
+
Arguments passed to estimators.
|
|
855
|
+
|
|
856
|
+
Returns
|
|
857
|
+
-------
|
|
858
|
+
dict
|
|
859
|
+
Dictionary mapping test names to PlaceboTestResults.
|
|
860
|
+
Keys: "fake_timing_{period}", "permutation", "leave_one_out"
|
|
861
|
+
"""
|
|
862
|
+
results = {}
|
|
863
|
+
|
|
864
|
+
# Fake timing tests for each pre-period (except first)
|
|
865
|
+
for period in pre_periods[1:]: # Skip first period
|
|
866
|
+
try:
|
|
867
|
+
test_result = placebo_timing_test(
|
|
868
|
+
data=data,
|
|
869
|
+
outcome=outcome,
|
|
870
|
+
treatment=treatment,
|
|
871
|
+
time=time,
|
|
872
|
+
fake_treatment_period=period,
|
|
873
|
+
post_periods=post_periods,
|
|
874
|
+
alpha=alpha,
|
|
875
|
+
**estimator_kwargs
|
|
876
|
+
)
|
|
877
|
+
results[f"fake_timing_{period}"] = test_result
|
|
878
|
+
except Exception as e:
|
|
879
|
+
# Store structured error info for debugging
|
|
880
|
+
results[f"fake_timing_{period}"] = {
|
|
881
|
+
"error": str(e),
|
|
882
|
+
"error_type": type(e).__name__,
|
|
883
|
+
"test_type": "fake_timing",
|
|
884
|
+
"period": period
|
|
885
|
+
}
|
|
886
|
+
|
|
887
|
+
# Permutation test
|
|
888
|
+
try:
|
|
889
|
+
perm_result = permutation_test(
|
|
890
|
+
data=data,
|
|
891
|
+
outcome=outcome,
|
|
892
|
+
treatment=treatment,
|
|
893
|
+
time=time,
|
|
894
|
+
unit=unit,
|
|
895
|
+
n_permutations=n_permutations,
|
|
896
|
+
alpha=alpha,
|
|
897
|
+
seed=seed,
|
|
898
|
+
**estimator_kwargs
|
|
899
|
+
)
|
|
900
|
+
results["permutation"] = perm_result
|
|
901
|
+
except Exception as e:
|
|
902
|
+
results["permutation"] = {
|
|
903
|
+
"error": str(e),
|
|
904
|
+
"error_type": type(e).__name__,
|
|
905
|
+
"test_type": "permutation"
|
|
906
|
+
}
|
|
907
|
+
|
|
908
|
+
# Leave-one-out test
|
|
909
|
+
try:
|
|
910
|
+
loo_result = leave_one_out_test(
|
|
911
|
+
data=data,
|
|
912
|
+
outcome=outcome,
|
|
913
|
+
treatment=treatment,
|
|
914
|
+
time=time,
|
|
915
|
+
unit=unit,
|
|
916
|
+
alpha=alpha,
|
|
917
|
+
**estimator_kwargs
|
|
918
|
+
)
|
|
919
|
+
results["leave_one_out"] = loo_result
|
|
920
|
+
except Exception as e:
|
|
921
|
+
results["leave_one_out"] = {
|
|
922
|
+
"error": str(e),
|
|
923
|
+
"error_type": type(e).__name__,
|
|
924
|
+
"test_type": "leave_one_out"
|
|
925
|
+
}
|
|
926
|
+
|
|
927
|
+
return results
|