diff-diff 3.0.1__cp314-cp314-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. diff_diff/__init__.py +382 -0
  2. diff_diff/_backend.py +134 -0
  3. diff_diff/_rust_backend.cp314-win_amd64.pyd +0 -0
  4. diff_diff/bacon.py +1140 -0
  5. diff_diff/bootstrap_utils.py +730 -0
  6. diff_diff/continuous_did.py +1626 -0
  7. diff_diff/continuous_did_bspline.py +190 -0
  8. diff_diff/continuous_did_results.py +374 -0
  9. diff_diff/datasets.py +815 -0
  10. diff_diff/diagnostics.py +882 -0
  11. diff_diff/efficient_did.py +1770 -0
  12. diff_diff/efficient_did_bootstrap.py +359 -0
  13. diff_diff/efficient_did_covariates.py +899 -0
  14. diff_diff/efficient_did_results.py +368 -0
  15. diff_diff/efficient_did_weights.py +617 -0
  16. diff_diff/estimators.py +1501 -0
  17. diff_diff/honest_did.py +2585 -0
  18. diff_diff/imputation.py +2458 -0
  19. diff_diff/imputation_bootstrap.py +418 -0
  20. diff_diff/imputation_results.py +448 -0
  21. diff_diff/linalg.py +2538 -0
  22. diff_diff/power.py +2588 -0
  23. diff_diff/practitioner.py +869 -0
  24. diff_diff/prep.py +1738 -0
  25. diff_diff/prep_dgp.py +1718 -0
  26. diff_diff/pretrends.py +1105 -0
  27. diff_diff/results.py +918 -0
  28. diff_diff/stacked_did.py +1049 -0
  29. diff_diff/stacked_did_results.py +339 -0
  30. diff_diff/staggered.py +3895 -0
  31. diff_diff/staggered_aggregation.py +864 -0
  32. diff_diff/staggered_bootstrap.py +752 -0
  33. diff_diff/staggered_results.py +416 -0
  34. diff_diff/staggered_triple_diff.py +1545 -0
  35. diff_diff/staggered_triple_diff_results.py +416 -0
  36. diff_diff/sun_abraham.py +1685 -0
  37. diff_diff/survey.py +1981 -0
  38. diff_diff/synthetic_did.py +1136 -0
  39. diff_diff/triple_diff.py +2047 -0
  40. diff_diff/trop.py +952 -0
  41. diff_diff/trop_global.py +1270 -0
  42. diff_diff/trop_local.py +1307 -0
  43. diff_diff/trop_results.py +356 -0
  44. diff_diff/twfe.py +542 -0
  45. diff_diff/two_stage.py +1952 -0
  46. diff_diff/two_stage_bootstrap.py +520 -0
  47. diff_diff/two_stage_results.py +400 -0
  48. diff_diff/utils.py +1902 -0
  49. diff_diff/visualization/__init__.py +61 -0
  50. diff_diff/visualization/_common.py +328 -0
  51. diff_diff/visualization/_continuous.py +274 -0
  52. diff_diff/visualization/_diagnostic.py +817 -0
  53. diff_diff/visualization/_event_study.py +1086 -0
  54. diff_diff/visualization/_power.py +661 -0
  55. diff_diff/visualization/_staggered.py +833 -0
  56. diff_diff/visualization/_synthetic.py +197 -0
  57. diff_diff/wooldridge.py +1285 -0
  58. diff_diff/wooldridge_results.py +349 -0
  59. diff_diff-3.0.1.dist-info/METADATA +2997 -0
  60. diff_diff-3.0.1.dist-info/RECORD +62 -0
  61. diff_diff-3.0.1.dist-info/WHEEL +4 -0
  62. diff_diff-3.0.1.dist-info/sboms/diff_diff_rust.cyclonedx.json +5843 -0
@@ -0,0 +1,882 @@
1
+ """
2
+ Diagnostic tools for validating Difference-in-Differences assumptions.
3
+
4
+ This module provides placebo tests and other diagnostic tools for assessing
5
+ the validity of the parallel trends assumption in DiD designs.
6
+
7
+ References
8
+ ----------
9
+ Bertrand, M., Duflo, E., & Mullainathan, S. (2004). How Much Should We Trust
10
+ Differences-in-Differences Estimates? The Quarterly Journal of Economics,
11
+ 119(1), 249-275.
12
+ """
13
+
14
+ from dataclasses import dataclass, field
15
+ from typing import Any, Dict, List, Optional, Tuple, Union
16
+
17
+ import numpy as np
18
+ import pandas as pd
19
+
20
+ from diff_diff.estimators import DifferenceInDifferences
21
+ from diff_diff.results import _get_significance_stars
22
+ from diff_diff.utils import safe_inference
23
+
24
+
25
+ @dataclass
26
+ class PlaceboTestResults:
27
+ """
28
+ Results from a placebo test for DiD assumption validation.
29
+
30
+ Attributes
31
+ ----------
32
+ test_type : str
33
+ Type of placebo test performed.
34
+ placebo_effect : float
35
+ Estimated placebo treatment effect.
36
+ se : float
37
+ Standard error of the placebo effect.
38
+ t_stat : float
39
+ T-statistic for the placebo effect.
40
+ p_value : float
41
+ P-value for testing placebo_effect = 0.
42
+ conf_int : tuple
43
+ Confidence interval for the placebo effect.
44
+ n_obs : int
45
+ Number of observations used in the test.
46
+ is_significant : bool
47
+ Whether the placebo effect is significant at alpha=0.05.
48
+ original_effect : float, optional
49
+ Original ATT estimate for comparison.
50
+ original_se : float, optional
51
+ Original SE for comparison.
52
+ permutation_distribution : np.ndarray, optional
53
+ Distribution of permuted effects (for permutation test).
54
+ leave_one_out_effects : dict, optional
55
+ Unit-specific effects (for leave-one-out test).
56
+ fake_period : any, optional
57
+ The fake treatment period used (for timing test).
58
+ fake_group : list, optional
59
+ The fake treatment group used (for group test).
60
+ """
61
+
62
+ test_type: str
63
+ placebo_effect: float
64
+ se: float
65
+ t_stat: float
66
+ p_value: float
67
+ conf_int: Tuple[float, float]
68
+ n_obs: int
69
+ is_significant: bool
70
+ alpha: float = 0.05
71
+
72
+ # Optional fields for specific test types
73
+ original_effect: Optional[float] = None
74
+ original_se: Optional[float] = None
75
+ permutation_distribution: Optional[np.ndarray] = field(default=None, repr=False)
76
+ leave_one_out_effects: Optional[Dict[Any, float]] = field(default=None)
77
+ fake_period: Optional[Any] = None
78
+ fake_group: Optional[List[Any]] = field(default=None)
79
+ n_permutations: Optional[int] = None
80
+
81
+ @property
82
+ def significance_stars(self) -> str:
83
+ """Return significance stars based on p-value."""
84
+ return _get_significance_stars(self.p_value)
85
+
86
+ def summary(self) -> str:
87
+ """Generate formatted summary of placebo test results."""
88
+ conf_level = int((1 - self.alpha) * 100)
89
+
90
+ lines = [
91
+ "=" * 65,
92
+ f"Placebo Test Results: {self.test_type}".center(65),
93
+ "=" * 65,
94
+ "",
95
+ f"{'Placebo effect:':<25} {self.placebo_effect:>12.4f}",
96
+ f"{'Standard error:':<25} {self.se:>12.4f}",
97
+ f"{'T-statistic:':<25} {self.t_stat:>12.4f}",
98
+ f"{'P-value:':<25} {self.p_value:>12.4f}",
99
+ f"{conf_level}% CI: [{self.conf_int[0]:.4f}, {self.conf_int[1]:.4f}]",
100
+ "",
101
+ f"{'Observations:':<25} {self.n_obs:>12}",
102
+ ]
103
+
104
+ if self.original_effect is not None:
105
+ lines.extend(
106
+ [
107
+ "",
108
+ "-" * 65,
109
+ "Comparison with Original Estimate".center(65),
110
+ "-" * 65,
111
+ f"{'Original ATT:':<25} {self.original_effect:>12.4f}",
112
+ ]
113
+ )
114
+ if self.original_se is not None:
115
+ lines.append(f"{'Original SE:':<25} {self.original_se:>12.4f}")
116
+
117
+ if self.n_permutations is not None:
118
+ lines.append(f"{'Number of permutations:':<25} {self.n_permutations:>12}")
119
+
120
+ if self.fake_period is not None:
121
+ lines.append(f"{'Fake treatment period:':<25} {str(self.fake_period):>12}")
122
+
123
+ if self.leave_one_out_effects is not None:
124
+ n_units = len(self.leave_one_out_effects)
125
+ effects = list(self.leave_one_out_effects.values())
126
+ lines.extend(
127
+ [
128
+ "",
129
+ "-" * 65,
130
+ "Leave-One-Out Summary".center(65),
131
+ "-" * 65,
132
+ f"{'Units analyzed:':<25} {n_units:>12}",
133
+ f"{'Mean effect:':<25} {np.mean(effects):>12.4f}",
134
+ f"{'Std. dev.:':<25} {np.std(effects, ddof=1):>12.4f}",
135
+ f"{'Min effect:':<25} {np.min(effects):>12.4f}",
136
+ f"{'Max effect:':<25} {np.max(effects):>12.4f}",
137
+ ]
138
+ )
139
+
140
+ # Interpretation
141
+ lines.extend(
142
+ [
143
+ "",
144
+ "-" * 65,
145
+ "Interpretation".center(65),
146
+ "-" * 65,
147
+ ]
148
+ )
149
+
150
+ if self.is_significant:
151
+ lines.append("WARNING: Significant placebo effect detected (p < 0.05).")
152
+ lines.append("This suggests potential violations of the parallel trends assumption.")
153
+ else:
154
+ lines.append("No significant placebo effect detected (p >= 0.05).")
155
+ lines.append("This is consistent with the parallel trends assumption.")
156
+
157
+ lines.append("=" * 65)
158
+
159
+ return "\n".join(lines)
160
+
161
+ def print_summary(self) -> None:
162
+ """Print summary to stdout."""
163
+ print(self.summary())
164
+
165
+ def to_dict(self) -> Dict[str, Any]:
166
+ """Convert results to a dictionary."""
167
+ result = {
168
+ "test_type": self.test_type,
169
+ "placebo_effect": self.placebo_effect,
170
+ "se": self.se,
171
+ "t_stat": self.t_stat,
172
+ "p_value": self.p_value,
173
+ "conf_int_lower": self.conf_int[0],
174
+ "conf_int_upper": self.conf_int[1],
175
+ "n_obs": self.n_obs,
176
+ "is_significant": self.is_significant,
177
+ }
178
+
179
+ if self.original_effect is not None:
180
+ result["original_effect"] = self.original_effect
181
+ if self.original_se is not None:
182
+ result["original_se"] = self.original_se
183
+ if self.n_permutations is not None:
184
+ result["n_permutations"] = self.n_permutations
185
+
186
+ return result
187
+
188
+ def to_dataframe(self) -> pd.DataFrame:
189
+ """Convert results to a DataFrame."""
190
+ return pd.DataFrame([self.to_dict()])
191
+
192
+
193
+ def run_placebo_test(
194
+ data: pd.DataFrame,
195
+ outcome: str,
196
+ treatment: str,
197
+ time: str,
198
+ unit: Optional[str] = None,
199
+ test_type: str = "fake_timing",
200
+ fake_treatment_period: Optional[Any] = None,
201
+ fake_treatment_group: Optional[List[Any]] = None,
202
+ post_periods: Optional[List[Any]] = None,
203
+ n_permutations: int = 1000,
204
+ alpha: float = 0.05,
205
+ seed: Optional[int] = None,
206
+ **estimator_kwargs,
207
+ ) -> PlaceboTestResults:
208
+ """
209
+ Run a placebo test to validate DiD assumptions.
210
+
211
+ Placebo tests provide evidence on the validity of the parallel trends
212
+ assumption by testing whether "fake" treatments produce significant effects.
213
+ A significant placebo effect suggests the parallel trends assumption may
214
+ be violated.
215
+
216
+ Parameters
217
+ ----------
218
+ data : pd.DataFrame
219
+ Panel data for DiD analysis.
220
+ outcome : str
221
+ Name of outcome variable column.
222
+ treatment : str
223
+ Name of treatment indicator column (0/1).
224
+ time : str
225
+ Name of time period column.
226
+ unit : str, optional
227
+ Name of unit identifier column. Required for some test types.
228
+ test_type : str, default="fake_timing"
229
+ Type of placebo test:
230
+ - "fake_timing": Assign treatment at a fake (earlier) time period
231
+ - "fake_group": Run DiD designating some control units as "fake treated"
232
+ - "permutation": Randomly reassign treatment and compute distribution
233
+ - "leave_one_out": Drop each treated unit and re-estimate
234
+ fake_treatment_period : any, optional
235
+ For "fake_timing": The fake treatment period to test.
236
+ Should be a pre-treatment period.
237
+ fake_treatment_group : list, optional
238
+ For "fake_group": List of control unit IDs to designate as fake treated.
239
+ post_periods : list, optional
240
+ List of post-treatment periods. Required for fake_timing test.
241
+ n_permutations : int, default=1000
242
+ For "permutation": Number of random treatment assignments.
243
+ alpha : float, default=0.05
244
+ Significance level.
245
+ seed : int, optional
246
+ Random seed for reproducibility.
247
+ **estimator_kwargs
248
+ Additional arguments passed to the DiD estimator.
249
+
250
+ Returns
251
+ -------
252
+ PlaceboTestResults
253
+ Object containing placebo effect estimates, p-values, and diagnostics.
254
+
255
+ Examples
256
+ --------
257
+ Fake timing test:
258
+
259
+ >>> results = run_placebo_test(
260
+ ... data, outcome='sales', treatment='treated', time='period',
261
+ ... test_type='fake_timing',
262
+ ... fake_treatment_period=1, # Pre-treatment period
263
+ ... post_periods=[2, 3, 4]
264
+ ... )
265
+ >>> if results.is_significant:
266
+ ... print("Warning: Pre-treatment differential trends detected!")
267
+
268
+ Permutation test:
269
+
270
+ >>> results = run_placebo_test(
271
+ ... data, outcome='sales', treatment='treated', time='period',
272
+ ... unit='unit_id',
273
+ ... test_type='permutation',
274
+ ... n_permutations=1000,
275
+ ... seed=42
276
+ ... )
277
+ >>> print(f"Permutation p-value: {results.p_value:.4f}")
278
+
279
+ References
280
+ ----------
281
+ Bertrand, M., Duflo, E., & Mullainathan, S. (2004). How Much Should
282
+ We Trust Differences-in-Differences Estimates? The Quarterly Journal
283
+ of Economics, 119(1), 249-275.
284
+ """
285
+ test_type = test_type.lower()
286
+ valid_types = ["fake_timing", "fake_group", "permutation", "leave_one_out"]
287
+
288
+ if test_type not in valid_types:
289
+ raise ValueError(f"test_type must be one of {valid_types}, got '{test_type}'")
290
+
291
+ if test_type == "fake_timing":
292
+ return placebo_timing_test(
293
+ data=data,
294
+ outcome=outcome,
295
+ treatment=treatment,
296
+ time=time,
297
+ fake_treatment_period=fake_treatment_period,
298
+ post_periods=post_periods,
299
+ alpha=alpha,
300
+ **estimator_kwargs,
301
+ )
302
+
303
+ elif test_type == "fake_group":
304
+ if unit is None:
305
+ raise ValueError("unit is required for fake_group test")
306
+ if fake_treatment_group is None or len(fake_treatment_group) == 0:
307
+ raise ValueError("fake_treatment_group is required for fake_group test")
308
+ return placebo_group_test(
309
+ data=data,
310
+ outcome=outcome,
311
+ time=time,
312
+ unit=unit,
313
+ fake_treated_units=fake_treatment_group,
314
+ post_periods=post_periods,
315
+ alpha=alpha,
316
+ **estimator_kwargs,
317
+ )
318
+
319
+ elif test_type == "permutation":
320
+ if unit is None:
321
+ raise ValueError("unit is required for permutation test")
322
+ return permutation_test(
323
+ data=data,
324
+ outcome=outcome,
325
+ treatment=treatment,
326
+ time=time,
327
+ unit=unit,
328
+ n_permutations=n_permutations,
329
+ alpha=alpha,
330
+ seed=seed,
331
+ **estimator_kwargs,
332
+ )
333
+
334
+ elif test_type == "leave_one_out":
335
+ if unit is None:
336
+ raise ValueError("unit is required for leave_one_out test")
337
+ return leave_one_out_test(
338
+ data=data,
339
+ outcome=outcome,
340
+ treatment=treatment,
341
+ time=time,
342
+ unit=unit,
343
+ alpha=alpha,
344
+ **estimator_kwargs,
345
+ )
346
+
347
+ # This should never be reached due to validation above
348
+ raise ValueError(f"Unknown test type: {test_type}")
349
+
350
+
351
+ def placebo_timing_test(
352
+ data: pd.DataFrame,
353
+ outcome: str,
354
+ treatment: str,
355
+ time: str,
356
+ fake_treatment_period: Any,
357
+ post_periods: Optional[List[Any]] = None,
358
+ alpha: float = 0.05,
359
+ **estimator_kwargs,
360
+ ) -> PlaceboTestResults:
361
+ """
362
+ Test for pre-treatment effects by moving treatment timing earlier.
363
+
364
+ Creates a fake "post" indicator using pre-treatment data only, then
365
+ estimates a DiD model. A significant effect suggests pre-existing
366
+ differential trends.
367
+
368
+ Parameters
369
+ ----------
370
+ data : pd.DataFrame
371
+ Panel data.
372
+ outcome : str
373
+ Outcome variable column.
374
+ treatment : str
375
+ Treatment indicator column.
376
+ time : str
377
+ Time period column.
378
+ fake_treatment_period : any
379
+ Period to use as fake treatment timing (should be a pre-treatment period).
380
+ post_periods : list, optional
381
+ List of actual post-treatment periods. If None, infers from data.
382
+ alpha : float, default=0.05
383
+ Significance level.
384
+ **estimator_kwargs
385
+ Arguments passed to DifferenceInDifferences.
386
+
387
+ Returns
388
+ -------
389
+ PlaceboTestResults
390
+ Results of the fake timing placebo test.
391
+ """
392
+ all_periods = sorted(data[time].unique())
393
+
394
+ # Infer post periods if not provided
395
+ if post_periods is None:
396
+ # Use second half of periods as post
397
+ mid = len(all_periods) // 2
398
+ post_periods = all_periods[mid:]
399
+
400
+ # Validate fake_treatment_period is pre-treatment
401
+ if fake_treatment_period in post_periods:
402
+ raise ValueError(
403
+ f"fake_treatment_period ({fake_treatment_period}) must be a "
404
+ f"pre-treatment period, not in post_periods ({post_periods})"
405
+ )
406
+
407
+ # Use only pre-treatment data
408
+ pre_periods = [p for p in all_periods if p not in post_periods]
409
+ pre_data = data[data[time].isin(pre_periods)].copy()
410
+
411
+ # Create fake post indicator
412
+ pre_data["_fake_post"] = (pre_data[time] >= fake_treatment_period).astype(int)
413
+
414
+ # Fit DiD on pre-treatment data with fake post
415
+ did = DifferenceInDifferences(**estimator_kwargs)
416
+ results = did.fit(pre_data, outcome=outcome, treatment=treatment, time="_fake_post")
417
+
418
+ # Also fit on full data for comparison
419
+ data_with_post = data.copy()
420
+ data_with_post["_post"] = data_with_post[time].isin(post_periods).astype(int)
421
+ did_full = DifferenceInDifferences(**estimator_kwargs)
422
+ results_full = did_full.fit(data_with_post, outcome=outcome, treatment=treatment, time="_post")
423
+
424
+ return PlaceboTestResults(
425
+ test_type="fake_timing",
426
+ placebo_effect=results.att,
427
+ se=results.se,
428
+ t_stat=results.t_stat,
429
+ p_value=results.p_value,
430
+ conf_int=results.conf_int,
431
+ n_obs=results.n_obs,
432
+ is_significant=bool(results.p_value < alpha),
433
+ alpha=alpha,
434
+ original_effect=results_full.att,
435
+ original_se=results_full.se,
436
+ fake_period=fake_treatment_period,
437
+ )
438
+
439
+
440
+ def placebo_group_test(
441
+ data: pd.DataFrame,
442
+ outcome: str,
443
+ time: str,
444
+ unit: str,
445
+ fake_treated_units: List[Any],
446
+ post_periods: Optional[List[Any]] = None,
447
+ alpha: float = 0.05,
448
+ **estimator_kwargs,
449
+ ) -> PlaceboTestResults:
450
+ """
451
+ Test for differential trends among never-treated units.
452
+
453
+ Assigns some never-treated units as "fake treated" and estimates a
454
+ DiD model using only never-treated data. A significant effect suggests
455
+ heterogeneous trends in the control group.
456
+
457
+ Parameters
458
+ ----------
459
+ data : pd.DataFrame
460
+ Panel data.
461
+ outcome : str
462
+ Outcome variable column.
463
+ time : str
464
+ Time period column.
465
+ unit : str
466
+ Unit identifier column.
467
+ fake_treated_units : list
468
+ List of control unit IDs to designate as "fake treated".
469
+ post_periods : list, optional
470
+ List of post-treatment period values.
471
+ alpha : float, default=0.05
472
+ Significance level.
473
+ **estimator_kwargs
474
+ Arguments passed to DifferenceInDifferences.
475
+
476
+ Returns
477
+ -------
478
+ PlaceboTestResults
479
+ Results of the fake group placebo test.
480
+ """
481
+ if fake_treated_units is None or len(fake_treated_units) == 0:
482
+ raise ValueError("fake_treated_units must be a non-empty list")
483
+
484
+ all_periods = sorted(data[time].unique())
485
+
486
+ # Infer post periods if not provided
487
+ if post_periods is None:
488
+ mid = len(all_periods) // 2
489
+ post_periods = all_periods[mid:]
490
+
491
+ # Create fake treatment indicator
492
+ fake_data = data.copy()
493
+ fake_data["_fake_treated"] = fake_data[unit].isin(fake_treated_units).astype(int)
494
+ fake_data["_post"] = fake_data[time].isin(post_periods).astype(int)
495
+
496
+ # Fit DiD
497
+ did = DifferenceInDifferences(**estimator_kwargs)
498
+ results = did.fit(fake_data, outcome=outcome, treatment="_fake_treated", time="_post")
499
+
500
+ return PlaceboTestResults(
501
+ test_type="fake_group",
502
+ placebo_effect=results.att,
503
+ se=results.se,
504
+ t_stat=results.t_stat,
505
+ p_value=results.p_value,
506
+ conf_int=results.conf_int,
507
+ n_obs=results.n_obs,
508
+ is_significant=bool(results.p_value < alpha),
509
+ alpha=alpha,
510
+ fake_group=list(fake_treated_units),
511
+ )
512
+
513
+
514
+ def permutation_test(
515
+ data: pd.DataFrame,
516
+ outcome: str,
517
+ treatment: str,
518
+ time: str,
519
+ unit: str,
520
+ n_permutations: int = 1000,
521
+ alpha: float = 0.05,
522
+ seed: Optional[int] = None,
523
+ **estimator_kwargs,
524
+ ) -> PlaceboTestResults:
525
+ """
526
+ Compute permutation-based p-value for DiD estimate.
527
+
528
+ Randomly reassigns treatment status at the unit level and computes the
529
+ DiD estimate for each permutation. The p-value is the proportion of
530
+ permuted estimates at least as extreme as the original.
531
+
532
+ Parameters
533
+ ----------
534
+ data : pd.DataFrame
535
+ Panel data.
536
+ outcome : str
537
+ Outcome variable column.
538
+ treatment : str
539
+ Treatment indicator column.
540
+ time : str
541
+ Time period column.
542
+ unit : str
543
+ Unit identifier column.
544
+ n_permutations : int, default=1000
545
+ Number of random permutations.
546
+ alpha : float, default=0.05
547
+ Significance level.
548
+ seed : int, optional
549
+ Random seed for reproducibility.
550
+ **estimator_kwargs
551
+ Arguments passed to DifferenceInDifferences.
552
+
553
+ Returns
554
+ -------
555
+ PlaceboTestResults
556
+ Results with permutation distribution and p-value.
557
+
558
+ Notes
559
+ -----
560
+ The permutation test is exact and does not rely on asymptotic
561
+ approximations, making it valid with any sample size.
562
+ """
563
+ rng = np.random.default_rng(seed)
564
+
565
+ # First, fit original model
566
+ did = DifferenceInDifferences(**estimator_kwargs)
567
+ original_results = did.fit(data, outcome=outcome, treatment=treatment, time=time)
568
+ original_att = original_results.att
569
+
570
+ # Get unit-level treatment assignment
571
+ unit_treatment = data.groupby(unit)[treatment].first().reset_index()
572
+ units = unit_treatment[unit].values
573
+ n_treated = int(unit_treatment[treatment].sum())
574
+
575
+ # Permutation loop
576
+ permuted_effects = np.zeros(n_permutations)
577
+
578
+ for i in range(n_permutations):
579
+ # Randomly assign treatment to units
580
+ perm_treated_units = rng.choice(units, size=n_treated, replace=False)
581
+
582
+ # Create permuted data
583
+ perm_data = data.copy()
584
+ perm_data["_perm_treatment"] = perm_data[unit].isin(perm_treated_units).astype(int)
585
+
586
+ # Fit DiD
587
+ try:
588
+ perm_did = DifferenceInDifferences(**estimator_kwargs)
589
+ perm_results = perm_did.fit(
590
+ perm_data, outcome=outcome, treatment="_perm_treatment", time=time
591
+ )
592
+ permuted_effects[i] = perm_results.att
593
+ except (ValueError, KeyError, np.linalg.LinAlgError):
594
+ # Handle edge cases where fitting fails
595
+ permuted_effects[i] = np.nan
596
+
597
+ # Remove any NaN values and track failure rate
598
+ valid_effects = permuted_effects[~np.isnan(permuted_effects)]
599
+ n_failed = n_permutations - len(valid_effects)
600
+
601
+ if len(valid_effects) == 0:
602
+ raise RuntimeError(
603
+ f"All {n_permutations} permutations failed. This typically occurs when:\n"
604
+ f" - Treatment/control groups are too small for valid permutation\n"
605
+ f" - Data contains collinearity or singular matrices after permutation\n"
606
+ f" - There are too few observations per time period\n"
607
+ f"Consider checking data quality with validate_did_data() from diff_diff.prep."
608
+ )
609
+
610
+ # Warn if significant number of permutations failed
611
+ if n_failed > 0:
612
+ failure_rate = n_failed / n_permutations
613
+ if failure_rate > 0.1:
614
+ import warnings
615
+
616
+ warnings.warn(
617
+ f"{n_failed}/{n_permutations} permutations failed ({failure_rate:.1%}). "
618
+ f"Results based on {len(valid_effects)} successful permutations.",
619
+ UserWarning,
620
+ stacklevel=2,
621
+ )
622
+
623
+ # Compute p-value: proportion of |permuted| >= |original|
624
+ p_value = np.mean(np.abs(valid_effects) >= np.abs(original_att))
625
+
626
+ # Ensure p-value is at least 1/(n_permutations + 1)
627
+ p_value = max(p_value, 1 / (len(valid_effects) + 1))
628
+
629
+ # Compute SE and CI from permutation distribution
630
+ se = np.std(valid_effects, ddof=1)
631
+ ci_lower = np.percentile(valid_effects, alpha / 2 * 100)
632
+ ci_upper = np.percentile(valid_effects, (1 - alpha / 2) * 100)
633
+
634
+ # NOTE: Not using safe_inference — p_value is permutation-based, CI is percentile-based.
635
+ t_stat = original_att / se if np.isfinite(se) and se > 0 else np.nan
636
+
637
+ return PlaceboTestResults(
638
+ test_type="permutation",
639
+ placebo_effect=np.mean(valid_effects), # Mean of null distribution
640
+ se=se,
641
+ t_stat=t_stat,
642
+ p_value=p_value,
643
+ conf_int=(ci_lower, ci_upper),
644
+ n_obs=len(data),
645
+ is_significant=bool(p_value < alpha),
646
+ alpha=alpha,
647
+ original_effect=original_att,
648
+ original_se=original_results.se,
649
+ permutation_distribution=valid_effects,
650
+ n_permutations=len(valid_effects),
651
+ )
652
+
653
+
654
+ def leave_one_out_test(
655
+ data: pd.DataFrame,
656
+ outcome: str,
657
+ treatment: str,
658
+ time: str,
659
+ unit: str,
660
+ alpha: float = 0.05,
661
+ **estimator_kwargs,
662
+ ) -> PlaceboTestResults:
663
+ """
664
+ Assess sensitivity by dropping each treated unit in turn.
665
+
666
+ For each treated unit, drops that unit and re-estimates the DiD model.
667
+ Large variation in estimates suggests results are driven by a single unit.
668
+
669
+ Parameters
670
+ ----------
671
+ data : pd.DataFrame
672
+ Panel data.
673
+ outcome : str
674
+ Outcome variable column.
675
+ treatment : str
676
+ Treatment indicator column.
677
+ time : str
678
+ Time period column.
679
+ unit : str
680
+ Unit identifier column.
681
+ alpha : float, default=0.05
682
+ Significance level.
683
+ **estimator_kwargs
684
+ Arguments passed to DifferenceInDifferences.
685
+
686
+ Returns
687
+ -------
688
+ PlaceboTestResults
689
+ Results with leave_one_out_effects dict mapping unit -> ATT estimate.
690
+ """
691
+ # Fit original model
692
+ did = DifferenceInDifferences(**estimator_kwargs)
693
+ original_results = did.fit(data, outcome=outcome, treatment=treatment, time=time)
694
+ original_att = original_results.att
695
+
696
+ # Get treated units
697
+ treated_units = data[data[treatment] == 1][unit].unique()
698
+
699
+ # Leave-one-out loop
700
+ loo_effects = {}
701
+
702
+ for u in treated_units:
703
+ # Drop this unit
704
+ loo_data = data[data[unit] != u].copy()
705
+
706
+ # Check we still have treated units
707
+ if loo_data[treatment].sum() == 0:
708
+ continue
709
+
710
+ try:
711
+ loo_did = DifferenceInDifferences(**estimator_kwargs)
712
+ loo_results = loo_did.fit(loo_data, outcome=outcome, treatment=treatment, time=time)
713
+ loo_effects[u] = loo_results.att
714
+ except (ValueError, KeyError, np.linalg.LinAlgError):
715
+ # Skip units that cause fitting issues
716
+ loo_effects[u] = np.nan
717
+
718
+ # Remove NaN values for statistics and track failures
719
+ valid_effects = [v for v in loo_effects.values() if not np.isnan(v)]
720
+ n_total = len(loo_effects)
721
+ n_failed = n_total - len(valid_effects)
722
+
723
+ if len(valid_effects) == 0:
724
+ raise RuntimeError(
725
+ f"All {n_total} leave-one-out estimates failed. This typically occurs when:\n"
726
+ f" - Removing any single treated unit causes model fitting to fail\n"
727
+ f" - Very few treated units (need at least 2 for LOO)\n"
728
+ f" - Data has collinearity issues that manifest when units are removed\n"
729
+ f"Consider checking data quality and ensuring sufficient treated units."
730
+ )
731
+
732
+ # Warn if significant number of LOO iterations failed
733
+ if n_failed > 0:
734
+ import warnings
735
+
736
+ failed_units = [u for u, v in loo_effects.items() if np.isnan(v)]
737
+ warnings.warn(
738
+ f"{n_failed}/{n_total} leave-one-out estimates failed for units: {failed_units}. "
739
+ f"Results based on {len(valid_effects)} successful iterations.",
740
+ UserWarning,
741
+ stacklevel=2,
742
+ )
743
+
744
+ # Statistics of LOO distribution
745
+ mean_effect = np.mean(valid_effects)
746
+ se = np.std(valid_effects, ddof=1) if len(valid_effects) > 1 else np.nan
747
+ df = len(valid_effects) - 1 if len(valid_effects) > 1 else 1
748
+ t_stat, p_value, conf_int = safe_inference(mean_effect, se, alpha=alpha, df=df)
749
+
750
+ return PlaceboTestResults(
751
+ test_type="leave_one_out",
752
+ placebo_effect=mean_effect,
753
+ se=se,
754
+ t_stat=t_stat,
755
+ p_value=p_value,
756
+ conf_int=conf_int,
757
+ n_obs=len(data),
758
+ is_significant=bool(p_value < alpha),
759
+ alpha=alpha,
760
+ original_effect=original_att,
761
+ original_se=original_results.se,
762
+ leave_one_out_effects=loo_effects,
763
+ )
764
+
765
+
766
+ def run_all_placebo_tests(
767
+ data: pd.DataFrame,
768
+ outcome: str,
769
+ treatment: str,
770
+ time: str,
771
+ unit: str,
772
+ pre_periods: List[Any],
773
+ post_periods: List[Any],
774
+ n_permutations: int = 500,
775
+ alpha: float = 0.05,
776
+ seed: Optional[int] = None,
777
+ **estimator_kwargs,
778
+ ) -> Dict[str, Union[PlaceboTestResults, Dict[str, str]]]:
779
+ """
780
+ Run a comprehensive suite of placebo tests.
781
+
782
+ Runs fake timing tests for each pre-period, a permutation test, and
783
+ a leave-one-out sensitivity analysis. If a test fails, the result
784
+ will be a dict with an "error" key containing the error message.
785
+
786
+ Parameters
787
+ ----------
788
+ data : pd.DataFrame
789
+ Panel data.
790
+ outcome : str
791
+ Outcome variable column.
792
+ treatment : str
793
+ Treatment indicator column.
794
+ time : str
795
+ Time period column.
796
+ unit : str
797
+ Unit identifier column.
798
+ pre_periods : list
799
+ List of pre-treatment periods.
800
+ post_periods : list
801
+ List of post-treatment periods.
802
+ n_permutations : int, default=500
803
+ Permutations for permutation test.
804
+ alpha : float, default=0.05
805
+ Significance level.
806
+ seed : int, optional
807
+ Random seed.
808
+ **estimator_kwargs
809
+ Arguments passed to estimators.
810
+
811
+ Returns
812
+ -------
813
+ dict
814
+ Dictionary mapping test names to PlaceboTestResults.
815
+ Keys: "fake_timing_{period}", "permutation", "leave_one_out"
816
+ """
817
+ results = {}
818
+
819
+ # Fake timing tests for each pre-period (except first)
820
+ for period in pre_periods[1:]: # Skip first period
821
+ try:
822
+ test_result = placebo_timing_test(
823
+ data=data,
824
+ outcome=outcome,
825
+ treatment=treatment,
826
+ time=time,
827
+ fake_treatment_period=period,
828
+ post_periods=post_periods,
829
+ alpha=alpha,
830
+ **estimator_kwargs,
831
+ )
832
+ results[f"fake_timing_{period}"] = test_result
833
+ except Exception as e:
834
+ # Store structured error info for debugging
835
+ results[f"fake_timing_{period}"] = {
836
+ "error": str(e),
837
+ "error_type": type(e).__name__,
838
+ "test_type": "fake_timing",
839
+ "period": period,
840
+ }
841
+
842
+ # Permutation test
843
+ try:
844
+ perm_result = permutation_test(
845
+ data=data,
846
+ outcome=outcome,
847
+ treatment=treatment,
848
+ time=time,
849
+ unit=unit,
850
+ n_permutations=n_permutations,
851
+ alpha=alpha,
852
+ seed=seed,
853
+ **estimator_kwargs,
854
+ )
855
+ results["permutation"] = perm_result
856
+ except Exception as e:
857
+ results["permutation"] = {
858
+ "error": str(e),
859
+ "error_type": type(e).__name__,
860
+ "test_type": "permutation",
861
+ }
862
+
863
+ # Leave-one-out test
864
+ try:
865
+ loo_result = leave_one_out_test(
866
+ data=data,
867
+ outcome=outcome,
868
+ treatment=treatment,
869
+ time=time,
870
+ unit=unit,
871
+ alpha=alpha,
872
+ **estimator_kwargs,
873
+ )
874
+ results["leave_one_out"] = loo_result
875
+ except Exception as e:
876
+ results["leave_one_out"] = {
877
+ "error": str(e),
878
+ "error_type": type(e).__name__,
879
+ "test_type": "leave_one_out",
880
+ }
881
+
882
+ return results