diff-diff 2.1.0__cp39-cp39-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,927 @@
1
+ """
2
+ Diagnostic tools for validating Difference-in-Differences assumptions.
3
+
4
+ This module provides placebo tests and other diagnostic tools for assessing
5
+ the validity of the parallel trends assumption in DiD designs.
6
+
7
+ References
8
+ ----------
9
+ Bertrand, M., Duflo, E., & Mullainathan, S. (2004). How Much Should We Trust
10
+ Differences-in-Differences Estimates? The Quarterly Journal of Economics,
11
+ 119(1), 249-275.
12
+ """
13
+
14
+ from dataclasses import dataclass, field
15
+ from typing import Any, Dict, List, Optional, Tuple, Union
16
+
17
+ import numpy as np
18
+ import pandas as pd
19
+
20
+ from diff_diff.estimators import DifferenceInDifferences
21
+ from diff_diff.results import _get_significance_stars
22
+ from diff_diff.utils import compute_confidence_interval, compute_p_value
23
+
24
+
25
+ @dataclass
26
+ class PlaceboTestResults:
27
+ """
28
+ Results from a placebo test for DiD assumption validation.
29
+
30
+ Attributes
31
+ ----------
32
+ test_type : str
33
+ Type of placebo test performed.
34
+ placebo_effect : float
35
+ Estimated placebo treatment effect.
36
+ se : float
37
+ Standard error of the placebo effect.
38
+ t_stat : float
39
+ T-statistic for the placebo effect.
40
+ p_value : float
41
+ P-value for testing placebo_effect = 0.
42
+ conf_int : tuple
43
+ Confidence interval for the placebo effect.
44
+ n_obs : int
45
+ Number of observations used in the test.
46
+ is_significant : bool
47
+ Whether the placebo effect is significant at alpha=0.05.
48
+ original_effect : float, optional
49
+ Original ATT estimate for comparison.
50
+ original_se : float, optional
51
+ Original SE for comparison.
52
+ permutation_distribution : np.ndarray, optional
53
+ Distribution of permuted effects (for permutation test).
54
+ leave_one_out_effects : dict, optional
55
+ Unit-specific effects (for leave-one-out test).
56
+ fake_period : any, optional
57
+ The fake treatment period used (for timing test).
58
+ fake_group : list, optional
59
+ The fake treatment group used (for group test).
60
+ """
61
+
62
+ test_type: str
63
+ placebo_effect: float
64
+ se: float
65
+ t_stat: float
66
+ p_value: float
67
+ conf_int: Tuple[float, float]
68
+ n_obs: int
69
+ is_significant: bool
70
+ alpha: float = 0.05
71
+
72
+ # Optional fields for specific test types
73
+ original_effect: Optional[float] = None
74
+ original_se: Optional[float] = None
75
+ permutation_distribution: Optional[np.ndarray] = field(default=None, repr=False)
76
+ leave_one_out_effects: Optional[Dict[Any, float]] = field(default=None)
77
+ fake_period: Optional[Any] = None
78
+ fake_group: Optional[List[Any]] = field(default=None)
79
+ n_permutations: Optional[int] = None
80
+
81
+ @property
82
+ def significance_stars(self) -> str:
83
+ """Return significance stars based on p-value."""
84
+ return _get_significance_stars(self.p_value)
85
+
86
+ def summary(self) -> str:
87
+ """Generate formatted summary of placebo test results."""
88
+ conf_level = int((1 - self.alpha) * 100)
89
+
90
+ lines = [
91
+ "=" * 65,
92
+ f"Placebo Test Results: {self.test_type}".center(65),
93
+ "=" * 65,
94
+ "",
95
+ f"{'Placebo effect:':<25} {self.placebo_effect:>12.4f}",
96
+ f"{'Standard error:':<25} {self.se:>12.4f}",
97
+ f"{'T-statistic:':<25} {self.t_stat:>12.4f}",
98
+ f"{'P-value:':<25} {self.p_value:>12.4f}",
99
+ f"{conf_level}% CI: [{self.conf_int[0]:.4f}, {self.conf_int[1]:.4f}]",
100
+ "",
101
+ f"{'Observations:':<25} {self.n_obs:>12}",
102
+ ]
103
+
104
+ if self.original_effect is not None:
105
+ lines.extend([
106
+ "",
107
+ "-" * 65,
108
+ "Comparison with Original Estimate".center(65),
109
+ "-" * 65,
110
+ f"{'Original ATT:':<25} {self.original_effect:>12.4f}",
111
+ ])
112
+ if self.original_se is not None:
113
+ lines.append(f"{'Original SE:':<25} {self.original_se:>12.4f}")
114
+
115
+ if self.n_permutations is not None:
116
+ lines.append(f"{'Number of permutations:':<25} {self.n_permutations:>12}")
117
+
118
+ if self.fake_period is not None:
119
+ lines.append(f"{'Fake treatment period:':<25} {str(self.fake_period):>12}")
120
+
121
+ if self.leave_one_out_effects is not None:
122
+ n_units = len(self.leave_one_out_effects)
123
+ effects = list(self.leave_one_out_effects.values())
124
+ lines.extend([
125
+ "",
126
+ "-" * 65,
127
+ "Leave-One-Out Summary".center(65),
128
+ "-" * 65,
129
+ f"{'Units analyzed:':<25} {n_units:>12}",
130
+ f"{'Mean effect:':<25} {np.mean(effects):>12.4f}",
131
+ f"{'Std. dev.:':<25} {np.std(effects, ddof=1):>12.4f}",
132
+ f"{'Min effect:':<25} {np.min(effects):>12.4f}",
133
+ f"{'Max effect:':<25} {np.max(effects):>12.4f}",
134
+ ])
135
+
136
+ # Interpretation
137
+ lines.extend([
138
+ "",
139
+ "-" * 65,
140
+ "Interpretation".center(65),
141
+ "-" * 65,
142
+ ])
143
+
144
+ if self.is_significant:
145
+ lines.append(
146
+ "WARNING: Significant placebo effect detected (p < 0.05)."
147
+ )
148
+ lines.append(
149
+ "This suggests potential violations of the parallel trends assumption."
150
+ )
151
+ else:
152
+ lines.append(
153
+ "No significant placebo effect detected (p >= 0.05)."
154
+ )
155
+ lines.append(
156
+ "This is consistent with the parallel trends assumption."
157
+ )
158
+
159
+ lines.append("=" * 65)
160
+
161
+ return "\n".join(lines)
162
+
163
+ def print_summary(self) -> None:
164
+ """Print summary to stdout."""
165
+ print(self.summary())
166
+
167
+ def to_dict(self) -> Dict[str, Any]:
168
+ """Convert results to a dictionary."""
169
+ result = {
170
+ "test_type": self.test_type,
171
+ "placebo_effect": self.placebo_effect,
172
+ "se": self.se,
173
+ "t_stat": self.t_stat,
174
+ "p_value": self.p_value,
175
+ "conf_int_lower": self.conf_int[0],
176
+ "conf_int_upper": self.conf_int[1],
177
+ "n_obs": self.n_obs,
178
+ "is_significant": self.is_significant,
179
+ }
180
+
181
+ if self.original_effect is not None:
182
+ result["original_effect"] = self.original_effect
183
+ if self.original_se is not None:
184
+ result["original_se"] = self.original_se
185
+ if self.n_permutations is not None:
186
+ result["n_permutations"] = self.n_permutations
187
+
188
+ return result
189
+
190
+ def to_dataframe(self) -> pd.DataFrame:
191
+ """Convert results to a DataFrame."""
192
+ return pd.DataFrame([self.to_dict()])
193
+
194
+
195
+ def run_placebo_test(
196
+ data: pd.DataFrame,
197
+ outcome: str,
198
+ treatment: str,
199
+ time: str,
200
+ unit: Optional[str] = None,
201
+ test_type: str = "fake_timing",
202
+ fake_treatment_period: Optional[Any] = None,
203
+ fake_treatment_group: Optional[List[Any]] = None,
204
+ post_periods: Optional[List[Any]] = None,
205
+ n_permutations: int = 1000,
206
+ alpha: float = 0.05,
207
+ seed: Optional[int] = None,
208
+ **estimator_kwargs
209
+ ) -> PlaceboTestResults:
210
+ """
211
+ Run a placebo test to validate DiD assumptions.
212
+
213
+ Placebo tests provide evidence on the validity of the parallel trends
214
+ assumption by testing whether "fake" treatments produce significant effects.
215
+ A significant placebo effect suggests the parallel trends assumption may
216
+ be violated.
217
+
218
+ Parameters
219
+ ----------
220
+ data : pd.DataFrame
221
+ Panel data for DiD analysis.
222
+ outcome : str
223
+ Name of outcome variable column.
224
+ treatment : str
225
+ Name of treatment indicator column (0/1).
226
+ time : str
227
+ Name of time period column.
228
+ unit : str, optional
229
+ Name of unit identifier column. Required for some test types.
230
+ test_type : str, default="fake_timing"
231
+ Type of placebo test:
232
+ - "fake_timing": Assign treatment at a fake (earlier) time period
233
+ - "fake_group": Run DiD designating some control units as "fake treated"
234
+ - "permutation": Randomly reassign treatment and compute distribution
235
+ - "leave_one_out": Drop each treated unit and re-estimate
236
+ fake_treatment_period : any, optional
237
+ For "fake_timing": The fake treatment period to test.
238
+ Should be a pre-treatment period.
239
+ fake_treatment_group : list, optional
240
+ For "fake_group": List of control unit IDs to designate as fake treated.
241
+ post_periods : list, optional
242
+ List of post-treatment periods. Required for fake_timing test.
243
+ n_permutations : int, default=1000
244
+ For "permutation": Number of random treatment assignments.
245
+ alpha : float, default=0.05
246
+ Significance level.
247
+ seed : int, optional
248
+ Random seed for reproducibility.
249
+ **estimator_kwargs
250
+ Additional arguments passed to the DiD estimator.
251
+
252
+ Returns
253
+ -------
254
+ PlaceboTestResults
255
+ Object containing placebo effect estimates, p-values, and diagnostics.
256
+
257
+ Examples
258
+ --------
259
+ Fake timing test:
260
+
261
+ >>> results = run_placebo_test(
262
+ ... data, outcome='sales', treatment='treated', time='period',
263
+ ... test_type='fake_timing',
264
+ ... fake_treatment_period=1, # Pre-treatment period
265
+ ... post_periods=[2, 3, 4]
266
+ ... )
267
+ >>> if results.is_significant:
268
+ ... print("Warning: Pre-treatment differential trends detected!")
269
+
270
+ Permutation test:
271
+
272
+ >>> results = run_placebo_test(
273
+ ... data, outcome='sales', treatment='treated', time='period',
274
+ ... unit='unit_id',
275
+ ... test_type='permutation',
276
+ ... n_permutations=1000,
277
+ ... seed=42
278
+ ... )
279
+ >>> print(f"Permutation p-value: {results.p_value:.4f}")
280
+
281
+ References
282
+ ----------
283
+ Bertrand, M., Duflo, E., & Mullainathan, S. (2004). How Much Should
284
+ We Trust Differences-in-Differences Estimates? The Quarterly Journal
285
+ of Economics, 119(1), 249-275.
286
+ """
287
+ test_type = test_type.lower()
288
+ valid_types = ["fake_timing", "fake_group", "permutation", "leave_one_out"]
289
+
290
+ if test_type not in valid_types:
291
+ raise ValueError(
292
+ f"test_type must be one of {valid_types}, got '{test_type}'"
293
+ )
294
+
295
+ if test_type == "fake_timing":
296
+ return placebo_timing_test(
297
+ data=data,
298
+ outcome=outcome,
299
+ treatment=treatment,
300
+ time=time,
301
+ fake_treatment_period=fake_treatment_period,
302
+ post_periods=post_periods,
303
+ alpha=alpha,
304
+ **estimator_kwargs
305
+ )
306
+
307
+ elif test_type == "fake_group":
308
+ if unit is None:
309
+ raise ValueError("unit is required for fake_group test")
310
+ if fake_treatment_group is None or len(fake_treatment_group) == 0:
311
+ raise ValueError("fake_treatment_group is required for fake_group test")
312
+ return placebo_group_test(
313
+ data=data,
314
+ outcome=outcome,
315
+ time=time,
316
+ unit=unit,
317
+ fake_treated_units=fake_treatment_group,
318
+ post_periods=post_periods,
319
+ alpha=alpha,
320
+ **estimator_kwargs
321
+ )
322
+
323
+ elif test_type == "permutation":
324
+ if unit is None:
325
+ raise ValueError("unit is required for permutation test")
326
+ return permutation_test(
327
+ data=data,
328
+ outcome=outcome,
329
+ treatment=treatment,
330
+ time=time,
331
+ unit=unit,
332
+ n_permutations=n_permutations,
333
+ alpha=alpha,
334
+ seed=seed,
335
+ **estimator_kwargs
336
+ )
337
+
338
+ elif test_type == "leave_one_out":
339
+ if unit is None:
340
+ raise ValueError("unit is required for leave_one_out test")
341
+ return leave_one_out_test(
342
+ data=data,
343
+ outcome=outcome,
344
+ treatment=treatment,
345
+ time=time,
346
+ unit=unit,
347
+ alpha=alpha,
348
+ **estimator_kwargs
349
+ )
350
+
351
+ # This should never be reached due to validation above
352
+ raise ValueError(f"Unknown test type: {test_type}")
353
+
354
+
355
+ def placebo_timing_test(
356
+ data: pd.DataFrame,
357
+ outcome: str,
358
+ treatment: str,
359
+ time: str,
360
+ fake_treatment_period: Any,
361
+ post_periods: Optional[List[Any]] = None,
362
+ alpha: float = 0.05,
363
+ **estimator_kwargs
364
+ ) -> PlaceboTestResults:
365
+ """
366
+ Test for pre-treatment effects by moving treatment timing earlier.
367
+
368
+ Creates a fake "post" indicator using pre-treatment data only, then
369
+ estimates a DiD model. A significant effect suggests pre-existing
370
+ differential trends.
371
+
372
+ Parameters
373
+ ----------
374
+ data : pd.DataFrame
375
+ Panel data.
376
+ outcome : str
377
+ Outcome variable column.
378
+ treatment : str
379
+ Treatment indicator column.
380
+ time : str
381
+ Time period column.
382
+ fake_treatment_period : any
383
+ Period to use as fake treatment timing (should be a pre-treatment period).
384
+ post_periods : list, optional
385
+ List of actual post-treatment periods. If None, infers from data.
386
+ alpha : float, default=0.05
387
+ Significance level.
388
+ **estimator_kwargs
389
+ Arguments passed to DifferenceInDifferences.
390
+
391
+ Returns
392
+ -------
393
+ PlaceboTestResults
394
+ Results of the fake timing placebo test.
395
+ """
396
+ all_periods = sorted(data[time].unique())
397
+
398
+ # Infer post periods if not provided
399
+ if post_periods is None:
400
+ # Use second half of periods as post
401
+ mid = len(all_periods) // 2
402
+ post_periods = all_periods[mid:]
403
+
404
+ # Validate fake_treatment_period is pre-treatment
405
+ if fake_treatment_period in post_periods:
406
+ raise ValueError(
407
+ f"fake_treatment_period ({fake_treatment_period}) must be a "
408
+ f"pre-treatment period, not in post_periods ({post_periods})"
409
+ )
410
+
411
+ # Use only pre-treatment data
412
+ pre_periods = [p for p in all_periods if p not in post_periods]
413
+ pre_data = data[data[time].isin(pre_periods)].copy()
414
+
415
+ # Create fake post indicator
416
+ pre_data["_fake_post"] = (pre_data[time] >= fake_treatment_period).astype(int)
417
+
418
+ # Fit DiD on pre-treatment data with fake post
419
+ did = DifferenceInDifferences(**estimator_kwargs)
420
+ results = did.fit(
421
+ pre_data,
422
+ outcome=outcome,
423
+ treatment=treatment,
424
+ time="_fake_post"
425
+ )
426
+
427
+ # Also fit on full data for comparison
428
+ data_with_post = data.copy()
429
+ data_with_post["_post"] = data_with_post[time].isin(post_periods).astype(int)
430
+ did_full = DifferenceInDifferences(**estimator_kwargs)
431
+ results_full = did_full.fit(
432
+ data_with_post,
433
+ outcome=outcome,
434
+ treatment=treatment,
435
+ time="_post"
436
+ )
437
+
438
+ return PlaceboTestResults(
439
+ test_type="fake_timing",
440
+ placebo_effect=results.att,
441
+ se=results.se,
442
+ t_stat=results.t_stat,
443
+ p_value=results.p_value,
444
+ conf_int=results.conf_int,
445
+ n_obs=results.n_obs,
446
+ is_significant=bool(results.p_value < alpha),
447
+ alpha=alpha,
448
+ original_effect=results_full.att,
449
+ original_se=results_full.se,
450
+ fake_period=fake_treatment_period,
451
+ )
452
+
453
+
454
+ def placebo_group_test(
455
+ data: pd.DataFrame,
456
+ outcome: str,
457
+ time: str,
458
+ unit: str,
459
+ fake_treated_units: List[Any],
460
+ post_periods: Optional[List[Any]] = None,
461
+ alpha: float = 0.05,
462
+ **estimator_kwargs
463
+ ) -> PlaceboTestResults:
464
+ """
465
+ Test for differential trends among never-treated units.
466
+
467
+ Assigns some never-treated units as "fake treated" and estimates a
468
+ DiD model using only never-treated data. A significant effect suggests
469
+ heterogeneous trends in the control group.
470
+
471
+ Parameters
472
+ ----------
473
+ data : pd.DataFrame
474
+ Panel data.
475
+ outcome : str
476
+ Outcome variable column.
477
+ time : str
478
+ Time period column.
479
+ unit : str
480
+ Unit identifier column.
481
+ fake_treated_units : list
482
+ List of control unit IDs to designate as "fake treated".
483
+ post_periods : list, optional
484
+ List of post-treatment period values.
485
+ alpha : float, default=0.05
486
+ Significance level.
487
+ **estimator_kwargs
488
+ Arguments passed to DifferenceInDifferences.
489
+
490
+ Returns
491
+ -------
492
+ PlaceboTestResults
493
+ Results of the fake group placebo test.
494
+ """
495
+ if fake_treated_units is None or len(fake_treated_units) == 0:
496
+ raise ValueError("fake_treated_units must be a non-empty list")
497
+
498
+ all_periods = sorted(data[time].unique())
499
+
500
+ # Infer post periods if not provided
501
+ if post_periods is None:
502
+ mid = len(all_periods) // 2
503
+ post_periods = all_periods[mid:]
504
+
505
+ # Create fake treatment indicator
506
+ fake_data = data.copy()
507
+ fake_data["_fake_treated"] = fake_data[unit].isin(fake_treated_units).astype(int)
508
+ fake_data["_post"] = fake_data[time].isin(post_periods).astype(int)
509
+
510
+ # Fit DiD
511
+ did = DifferenceInDifferences(**estimator_kwargs)
512
+ results = did.fit(
513
+ fake_data,
514
+ outcome=outcome,
515
+ treatment="_fake_treated",
516
+ time="_post"
517
+ )
518
+
519
+ return PlaceboTestResults(
520
+ test_type="fake_group",
521
+ placebo_effect=results.att,
522
+ se=results.se,
523
+ t_stat=results.t_stat,
524
+ p_value=results.p_value,
525
+ conf_int=results.conf_int,
526
+ n_obs=results.n_obs,
527
+ is_significant=bool(results.p_value < alpha),
528
+ alpha=alpha,
529
+ fake_group=list(fake_treated_units),
530
+ )
531
+
532
+
533
+ def permutation_test(
534
+ data: pd.DataFrame,
535
+ outcome: str,
536
+ treatment: str,
537
+ time: str,
538
+ unit: str,
539
+ n_permutations: int = 1000,
540
+ alpha: float = 0.05,
541
+ seed: Optional[int] = None,
542
+ **estimator_kwargs
543
+ ) -> PlaceboTestResults:
544
+ """
545
+ Compute permutation-based p-value for DiD estimate.
546
+
547
+ Randomly reassigns treatment status at the unit level and computes the
548
+ DiD estimate for each permutation. The p-value is the proportion of
549
+ permuted estimates at least as extreme as the original.
550
+
551
+ Parameters
552
+ ----------
553
+ data : pd.DataFrame
554
+ Panel data.
555
+ outcome : str
556
+ Outcome variable column.
557
+ treatment : str
558
+ Treatment indicator column.
559
+ time : str
560
+ Time period column.
561
+ unit : str
562
+ Unit identifier column.
563
+ n_permutations : int, default=1000
564
+ Number of random permutations.
565
+ alpha : float, default=0.05
566
+ Significance level.
567
+ seed : int, optional
568
+ Random seed for reproducibility.
569
+ **estimator_kwargs
570
+ Arguments passed to DifferenceInDifferences.
571
+
572
+ Returns
573
+ -------
574
+ PlaceboTestResults
575
+ Results with permutation distribution and p-value.
576
+
577
+ Notes
578
+ -----
579
+ The permutation test is exact and does not rely on asymptotic
580
+ approximations, making it valid with any sample size.
581
+ """
582
+ rng = np.random.default_rng(seed)
583
+
584
+ # First, fit original model
585
+ did = DifferenceInDifferences(**estimator_kwargs)
586
+ original_results = did.fit(
587
+ data,
588
+ outcome=outcome,
589
+ treatment=treatment,
590
+ time=time
591
+ )
592
+ original_att = original_results.att
593
+
594
+ # Get unit-level treatment assignment
595
+ unit_treatment = (
596
+ data.groupby(unit)[treatment]
597
+ .first()
598
+ .reset_index()
599
+ )
600
+ units = unit_treatment[unit].values
601
+ n_treated = int(unit_treatment[treatment].sum())
602
+
603
+ # Permutation loop
604
+ permuted_effects = np.zeros(n_permutations)
605
+
606
+ for i in range(n_permutations):
607
+ # Randomly assign treatment to units
608
+ perm_treated_units = rng.choice(units, size=n_treated, replace=False)
609
+
610
+ # Create permuted data
611
+ perm_data = data.copy()
612
+ perm_data["_perm_treatment"] = perm_data[unit].isin(perm_treated_units).astype(int)
613
+
614
+ # Fit DiD
615
+ try:
616
+ perm_did = DifferenceInDifferences(**estimator_kwargs)
617
+ perm_results = perm_did.fit(
618
+ perm_data,
619
+ outcome=outcome,
620
+ treatment="_perm_treatment",
621
+ time=time
622
+ )
623
+ permuted_effects[i] = perm_results.att
624
+ except (ValueError, KeyError, np.linalg.LinAlgError):
625
+ # Handle edge cases where fitting fails
626
+ permuted_effects[i] = np.nan
627
+
628
+ # Remove any NaN values and track failure rate
629
+ valid_effects = permuted_effects[~np.isnan(permuted_effects)]
630
+ n_failed = n_permutations - len(valid_effects)
631
+
632
+ if len(valid_effects) == 0:
633
+ raise RuntimeError(
634
+ f"All {n_permutations} permutations failed. This typically occurs when:\n"
635
+ f" - Treatment/control groups are too small for valid permutation\n"
636
+ f" - Data contains collinearity or singular matrices after permutation\n"
637
+ f" - There are too few observations per time period\n"
638
+ f"Consider checking data quality with validate_did_data() from diff_diff.prep."
639
+ )
640
+
641
+ # Warn if significant number of permutations failed
642
+ if n_failed > 0:
643
+ failure_rate = n_failed / n_permutations
644
+ if failure_rate > 0.1:
645
+ import warnings
646
+ warnings.warn(
647
+ f"{n_failed}/{n_permutations} permutations failed ({failure_rate:.1%}). "
648
+ f"Results based on {len(valid_effects)} successful permutations.",
649
+ UserWarning,
650
+ stacklevel=2
651
+ )
652
+
653
+ # Compute p-value: proportion of |permuted| >= |original|
654
+ p_value = np.mean(np.abs(valid_effects) >= np.abs(original_att))
655
+
656
+ # Ensure p-value is at least 1/(n_permutations + 1)
657
+ p_value = max(p_value, 1 / (len(valid_effects) + 1))
658
+
659
+ # Compute SE and CI from permutation distribution
660
+ se = np.std(valid_effects, ddof=1)
661
+ ci_lower = np.percentile(valid_effects, alpha / 2 * 100)
662
+ ci_upper = np.percentile(valid_effects, (1 - alpha / 2) * 100)
663
+
664
+ # T-stat from original estimate
665
+ t_stat = original_att / se if se > 0 else 0.0
666
+
667
+ return PlaceboTestResults(
668
+ test_type="permutation",
669
+ placebo_effect=np.mean(valid_effects), # Mean of null distribution
670
+ se=se,
671
+ t_stat=t_stat,
672
+ p_value=p_value,
673
+ conf_int=(ci_lower, ci_upper),
674
+ n_obs=len(data),
675
+ is_significant=bool(p_value < alpha),
676
+ alpha=alpha,
677
+ original_effect=original_att,
678
+ original_se=original_results.se,
679
+ permutation_distribution=valid_effects,
680
+ n_permutations=len(valid_effects),
681
+ )
682
+
683
+
684
+ def leave_one_out_test(
685
+ data: pd.DataFrame,
686
+ outcome: str,
687
+ treatment: str,
688
+ time: str,
689
+ unit: str,
690
+ alpha: float = 0.05,
691
+ **estimator_kwargs
692
+ ) -> PlaceboTestResults:
693
+ """
694
+ Assess sensitivity by dropping each treated unit in turn.
695
+
696
+ For each treated unit, drops that unit and re-estimates the DiD model.
697
+ Large variation in estimates suggests results are driven by a single unit.
698
+
699
+ Parameters
700
+ ----------
701
+ data : pd.DataFrame
702
+ Panel data.
703
+ outcome : str
704
+ Outcome variable column.
705
+ treatment : str
706
+ Treatment indicator column.
707
+ time : str
708
+ Time period column.
709
+ unit : str
710
+ Unit identifier column.
711
+ alpha : float, default=0.05
712
+ Significance level.
713
+ **estimator_kwargs
714
+ Arguments passed to DifferenceInDifferences.
715
+
716
+ Returns
717
+ -------
718
+ PlaceboTestResults
719
+ Results with leave_one_out_effects dict mapping unit -> ATT estimate.
720
+ """
721
+ # Fit original model
722
+ did = DifferenceInDifferences(**estimator_kwargs)
723
+ original_results = did.fit(
724
+ data,
725
+ outcome=outcome,
726
+ treatment=treatment,
727
+ time=time
728
+ )
729
+ original_att = original_results.att
730
+
731
+ # Get treated units
732
+ treated_units = data[data[treatment] == 1][unit].unique()
733
+
734
+ # Leave-one-out loop
735
+ loo_effects = {}
736
+
737
+ for u in treated_units:
738
+ # Drop this unit
739
+ loo_data = data[data[unit] != u].copy()
740
+
741
+ # Check we still have treated units
742
+ if loo_data[treatment].sum() == 0:
743
+ continue
744
+
745
+ try:
746
+ loo_did = DifferenceInDifferences(**estimator_kwargs)
747
+ loo_results = loo_did.fit(
748
+ loo_data,
749
+ outcome=outcome,
750
+ treatment=treatment,
751
+ time=time
752
+ )
753
+ loo_effects[u] = loo_results.att
754
+ except (ValueError, KeyError, np.linalg.LinAlgError):
755
+ # Skip units that cause fitting issues
756
+ loo_effects[u] = np.nan
757
+
758
+ # Remove NaN values for statistics and track failures
759
+ valid_effects = [v for v in loo_effects.values() if not np.isnan(v)]
760
+ n_total = len(loo_effects)
761
+ n_failed = n_total - len(valid_effects)
762
+
763
+ if len(valid_effects) == 0:
764
+ raise RuntimeError(
765
+ f"All {n_total} leave-one-out estimates failed. This typically occurs when:\n"
766
+ f" - Removing any single treated unit causes model fitting to fail\n"
767
+ f" - Very few treated units (need at least 2 for LOO)\n"
768
+ f" - Data has collinearity issues that manifest when units are removed\n"
769
+ f"Consider checking data quality and ensuring sufficient treated units."
770
+ )
771
+
772
+ # Warn if significant number of LOO iterations failed
773
+ if n_failed > 0:
774
+ import warnings
775
+ failed_units = [u for u, v in loo_effects.items() if np.isnan(v)]
776
+ warnings.warn(
777
+ f"{n_failed}/{n_total} leave-one-out estimates failed for units: {failed_units}. "
778
+ f"Results based on {len(valid_effects)} successful iterations.",
779
+ UserWarning,
780
+ stacklevel=2
781
+ )
782
+
783
+ # Statistics of LOO distribution
784
+ mean_effect = np.mean(valid_effects)
785
+ se = np.std(valid_effects, ddof=1) if len(valid_effects) > 1 else 0.0
786
+ t_stat = mean_effect / se if se > 0 else 0.0
787
+
788
+ # Use t-distribution for p-value
789
+ df = len(valid_effects) - 1 if len(valid_effects) > 1 else 1
790
+ p_value = compute_p_value(t_stat, df=df)
791
+
792
+ # CI
793
+ conf_int = compute_confidence_interval(mean_effect, se, alpha, df=df)
794
+
795
+ return PlaceboTestResults(
796
+ test_type="leave_one_out",
797
+ placebo_effect=mean_effect,
798
+ se=se,
799
+ t_stat=t_stat,
800
+ p_value=p_value,
801
+ conf_int=conf_int,
802
+ n_obs=len(data),
803
+ is_significant=bool(p_value < alpha),
804
+ alpha=alpha,
805
+ original_effect=original_att,
806
+ original_se=original_results.se,
807
+ leave_one_out_effects=loo_effects,
808
+ )
809
+
810
+
811
+ def run_all_placebo_tests(
812
+ data: pd.DataFrame,
813
+ outcome: str,
814
+ treatment: str,
815
+ time: str,
816
+ unit: str,
817
+ pre_periods: List[Any],
818
+ post_periods: List[Any],
819
+ n_permutations: int = 500,
820
+ alpha: float = 0.05,
821
+ seed: Optional[int] = None,
822
+ **estimator_kwargs
823
+ ) -> Dict[str, Union[PlaceboTestResults, Dict[str, str]]]:
824
+ """
825
+ Run a comprehensive suite of placebo tests.
826
+
827
+ Runs fake timing tests for each pre-period, a permutation test, and
828
+ a leave-one-out sensitivity analysis. If a test fails, the result
829
+ will be a dict with an "error" key containing the error message.
830
+
831
+ Parameters
832
+ ----------
833
+ data : pd.DataFrame
834
+ Panel data.
835
+ outcome : str
836
+ Outcome variable column.
837
+ treatment : str
838
+ Treatment indicator column.
839
+ time : str
840
+ Time period column.
841
+ unit : str
842
+ Unit identifier column.
843
+ pre_periods : list
844
+ List of pre-treatment periods.
845
+ post_periods : list
846
+ List of post-treatment periods.
847
+ n_permutations : int, default=500
848
+ Permutations for permutation test.
849
+ alpha : float, default=0.05
850
+ Significance level.
851
+ seed : int, optional
852
+ Random seed.
853
+ **estimator_kwargs
854
+ Arguments passed to estimators.
855
+
856
+ Returns
857
+ -------
858
+ dict
859
+ Dictionary mapping test names to PlaceboTestResults.
860
+ Keys: "fake_timing_{period}", "permutation", "leave_one_out"
861
+ """
862
+ results = {}
863
+
864
+ # Fake timing tests for each pre-period (except first)
865
+ for period in pre_periods[1:]: # Skip first period
866
+ try:
867
+ test_result = placebo_timing_test(
868
+ data=data,
869
+ outcome=outcome,
870
+ treatment=treatment,
871
+ time=time,
872
+ fake_treatment_period=period,
873
+ post_periods=post_periods,
874
+ alpha=alpha,
875
+ **estimator_kwargs
876
+ )
877
+ results[f"fake_timing_{period}"] = test_result
878
+ except Exception as e:
879
+ # Store structured error info for debugging
880
+ results[f"fake_timing_{period}"] = {
881
+ "error": str(e),
882
+ "error_type": type(e).__name__,
883
+ "test_type": "fake_timing",
884
+ "period": period
885
+ }
886
+
887
+ # Permutation test
888
+ try:
889
+ perm_result = permutation_test(
890
+ data=data,
891
+ outcome=outcome,
892
+ treatment=treatment,
893
+ time=time,
894
+ unit=unit,
895
+ n_permutations=n_permutations,
896
+ alpha=alpha,
897
+ seed=seed,
898
+ **estimator_kwargs
899
+ )
900
+ results["permutation"] = perm_result
901
+ except Exception as e:
902
+ results["permutation"] = {
903
+ "error": str(e),
904
+ "error_type": type(e).__name__,
905
+ "test_type": "permutation"
906
+ }
907
+
908
+ # Leave-one-out test
909
+ try:
910
+ loo_result = leave_one_out_test(
911
+ data=data,
912
+ outcome=outcome,
913
+ treatment=treatment,
914
+ time=time,
915
+ unit=unit,
916
+ alpha=alpha,
917
+ **estimator_kwargs
918
+ )
919
+ results["leave_one_out"] = loo_result
920
+ except Exception as e:
921
+ results["leave_one_out"] = {
922
+ "error": str(e),
923
+ "error_type": type(e).__name__,
924
+ "test_type": "leave_one_out"
925
+ }
926
+
927
+ return results