diff-diff 3.0.1__cp314-cp314-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. diff_diff/__init__.py +382 -0
  2. diff_diff/_backend.py +134 -0
  3. diff_diff/_rust_backend.cp314-win_amd64.pyd +0 -0
  4. diff_diff/bacon.py +1140 -0
  5. diff_diff/bootstrap_utils.py +730 -0
  6. diff_diff/continuous_did.py +1626 -0
  7. diff_diff/continuous_did_bspline.py +190 -0
  8. diff_diff/continuous_did_results.py +374 -0
  9. diff_diff/datasets.py +815 -0
  10. diff_diff/diagnostics.py +882 -0
  11. diff_diff/efficient_did.py +1770 -0
  12. diff_diff/efficient_did_bootstrap.py +359 -0
  13. diff_diff/efficient_did_covariates.py +899 -0
  14. diff_diff/efficient_did_results.py +368 -0
  15. diff_diff/efficient_did_weights.py +617 -0
  16. diff_diff/estimators.py +1501 -0
  17. diff_diff/honest_did.py +2585 -0
  18. diff_diff/imputation.py +2458 -0
  19. diff_diff/imputation_bootstrap.py +418 -0
  20. diff_diff/imputation_results.py +448 -0
  21. diff_diff/linalg.py +2538 -0
  22. diff_diff/power.py +2588 -0
  23. diff_diff/practitioner.py +869 -0
  24. diff_diff/prep.py +1738 -0
  25. diff_diff/prep_dgp.py +1718 -0
  26. diff_diff/pretrends.py +1105 -0
  27. diff_diff/results.py +918 -0
  28. diff_diff/stacked_did.py +1049 -0
  29. diff_diff/stacked_did_results.py +339 -0
  30. diff_diff/staggered.py +3895 -0
  31. diff_diff/staggered_aggregation.py +864 -0
  32. diff_diff/staggered_bootstrap.py +752 -0
  33. diff_diff/staggered_results.py +416 -0
  34. diff_diff/staggered_triple_diff.py +1545 -0
  35. diff_diff/staggered_triple_diff_results.py +416 -0
  36. diff_diff/sun_abraham.py +1685 -0
  37. diff_diff/survey.py +1981 -0
  38. diff_diff/synthetic_did.py +1136 -0
  39. diff_diff/triple_diff.py +2047 -0
  40. diff_diff/trop.py +952 -0
  41. diff_diff/trop_global.py +1270 -0
  42. diff_diff/trop_local.py +1307 -0
  43. diff_diff/trop_results.py +356 -0
  44. diff_diff/twfe.py +542 -0
  45. diff_diff/two_stage.py +1952 -0
  46. diff_diff/two_stage_bootstrap.py +520 -0
  47. diff_diff/two_stage_results.py +400 -0
  48. diff_diff/utils.py +1902 -0
  49. diff_diff/visualization/__init__.py +61 -0
  50. diff_diff/visualization/_common.py +328 -0
  51. diff_diff/visualization/_continuous.py +274 -0
  52. diff_diff/visualization/_diagnostic.py +817 -0
  53. diff_diff/visualization/_event_study.py +1086 -0
  54. diff_diff/visualization/_power.py +661 -0
  55. diff_diff/visualization/_staggered.py +833 -0
  56. diff_diff/visualization/_synthetic.py +197 -0
  57. diff_diff/wooldridge.py +1285 -0
  58. diff_diff/wooldridge_results.py +349 -0
  59. diff_diff-3.0.1.dist-info/METADATA +2997 -0
  60. diff_diff-3.0.1.dist-info/RECORD +62 -0
  61. diff_diff-3.0.1.dist-info/WHEEL +4 -0
  62. diff_diff-3.0.1.dist-info/sboms/diff_diff_rust.cyclonedx.json +5843 -0
@@ -0,0 +1,2585 @@
1
+ """
2
+ Honest DiD sensitivity analysis (Rambachan & Roth 2023).
3
+
4
+ Provides robust inference for difference-in-differences designs when
5
+ parallel trends may be violated. Instead of assuming parallel trends
6
+ holds exactly, this module allows for bounded violations and computes
7
+ partially identified treatment effect bounds.
8
+
9
+ References
10
+ ----------
11
+ Rambachan, A., & Roth, J. (2023). A More Credible Approach to Parallel Trends.
12
+ The Review of Economic Studies, 90(5), 2555-2591.
13
+ https://doi.org/10.1093/restud/rdad018
14
+
15
+ See Also
16
+ --------
17
+ https://github.com/asheshrambachan/HonestDiD - R package implementation
18
+ """
19
+
20
+ from dataclasses import dataclass, field
21
+ from typing import Any, Dict, List, Literal, Optional, Tuple, Union
22
+
23
+ import numpy as np
24
+ import pandas as pd
25
+ from scipy import optimize
26
+
27
+ from diff_diff.results import (
28
+ MultiPeriodDiDResults,
29
+ )
30
+ from diff_diff.utils import _get_critical_value
31
+
32
+ # =============================================================================
33
+ # Delta Restriction Classes
34
+ # =============================================================================
35
+
36
+
37
+ @dataclass
38
+ class DeltaSD:
39
+ """
40
+ Smoothness restriction on trend violations (Delta^{SD}).
41
+
42
+ Restricts the second differences of the trend violations:
43
+ |delta_{t+1} - 2*delta_t + delta_{t-1}| <= M
44
+
45
+ When M=0, this enforces that violations follow a linear trend
46
+ (linear extrapolation of pre-trends). Larger M allows more
47
+ curvature in the violation path.
48
+
49
+ Parameters
50
+ ----------
51
+ M : float
52
+ Maximum allowed second difference. M=0 means linear trends only.
53
+
54
+ Examples
55
+ --------
56
+ >>> delta = DeltaSD(M=0.5)
57
+ >>> delta.M
58
+ 0.5
59
+ """
60
+
61
+ M: float = 0.0
62
+
63
+ def __post_init__(self):
64
+ if self.M < 0:
65
+ raise ValueError(f"M must be non-negative, got M={self.M}")
66
+
67
+ def __repr__(self) -> str:
68
+ return f"DeltaSD(M={self.M})"
69
+
70
+
71
+ @dataclass
72
+ class DeltaRM:
73
+ """
74
+ Relative magnitudes restriction on trend violations (Delta^{RM}).
75
+
76
+ Post-treatment consecutive first differences are bounded by Mbar
77
+ times the maximum pre-treatment first difference:
78
+ |delta_{t+1} - delta_t| <= Mbar * max_{s<0} |delta_{s+1} - delta_s|
79
+
80
+ When Mbar=0, this enforces zero post-treatment first differences.
81
+ Mbar=1 means post-period first differences can be as large as the
82
+ worst observed pre-period first difference.
83
+
84
+ Parameters
85
+ ----------
86
+ Mbar : float
87
+ Scaling factor for maximum pre-period first difference.
88
+
89
+ Examples
90
+ --------
91
+ >>> delta = DeltaRM(Mbar=1.0)
92
+ >>> delta.Mbar
93
+ 1.0
94
+ """
95
+
96
+ Mbar: float = 1.0
97
+
98
+ def __post_init__(self):
99
+ if self.Mbar < 0:
100
+ raise ValueError(f"Mbar must be non-negative, got Mbar={self.Mbar}")
101
+
102
+ def __repr__(self) -> str:
103
+ return f"DeltaRM(Mbar={self.Mbar})"
104
+
105
+
106
+ @dataclass
107
+ class DeltaSDRM:
108
+ """
109
+ Combined smoothness and relative magnitudes restriction.
110
+
111
+ Imposes both:
112
+ 1. Smoothness: |delta_{t+1} - 2*delta_t + delta_{t-1}| <= M
113
+ 2. Relative magnitudes: |delta_{t+1} - delta_t| <= Mbar * max_{s<0} |delta_{s+1} - delta_s|
114
+
115
+ This is more restrictive than either constraint alone.
116
+
117
+ Parameters
118
+ ----------
119
+ M : float
120
+ Maximum allowed second difference (smoothness).
121
+ Mbar : float
122
+ Scaling factor for maximum pre-period first difference (relative magnitudes).
123
+
124
+ Examples
125
+ --------
126
+ >>> delta = DeltaSDRM(M=0.5, Mbar=1.0)
127
+ """
128
+
129
+ M: float = 0.0
130
+ Mbar: float = 1.0
131
+
132
+ def __post_init__(self):
133
+ if self.M < 0:
134
+ raise ValueError(f"M must be non-negative, got M={self.M}")
135
+ if self.Mbar < 0:
136
+ raise ValueError(f"Mbar must be non-negative, got Mbar={self.Mbar}")
137
+
138
+ def __repr__(self) -> str:
139
+ return f"DeltaSDRM(M={self.M}, Mbar={self.Mbar})"
140
+
141
+
142
+ DeltaType = Union[DeltaSD, DeltaRM, DeltaSDRM]
143
+
144
+
145
+ # =============================================================================
146
+ # Results Classes
147
+ # =============================================================================
148
+
149
+
150
+ @dataclass
151
+ class HonestDiDResults:
152
+ """
153
+ Results from Honest DiD sensitivity analysis.
154
+
155
+ Contains bounds on the treatment effect under the specified
156
+ restrictions on violations of parallel trends.
157
+
158
+ Attributes
159
+ ----------
160
+ lb : float
161
+ Lower bound of identified set.
162
+ ub : float
163
+ Upper bound of identified set.
164
+ ci_lb : float
165
+ Lower bound of robust confidence interval.
166
+ ci_ub : float
167
+ Upper bound of robust confidence interval.
168
+ M : float
169
+ The restriction parameter value used.
170
+ method : str
171
+ The type of restriction ("smoothness", "relative_magnitude", or "combined").
172
+ original_estimate : float
173
+ The original point estimate (under parallel trends).
174
+ original_se : float
175
+ The original standard error.
176
+ alpha : float
177
+ Significance level for confidence interval.
178
+ ci_method : str
179
+ Method used for CI construction ("FLCI" or "C-LF").
180
+ original_results : Any
181
+ The original estimation results object.
182
+ """
183
+
184
+ lb: float
185
+ ub: float
186
+ ci_lb: float
187
+ ci_ub: float
188
+ M: float
189
+ method: str
190
+ original_estimate: float
191
+ original_se: float
192
+ alpha: float = 0.05
193
+ ci_method: str = "FLCI"
194
+ original_results: Optional[Any] = field(default=None, repr=False)
195
+ # Event study bounds (optional)
196
+ event_study_bounds: Optional[Dict[Any, Dict[str, float]]] = field(default=None, repr=False)
197
+ # Survey design metadata (Phase 7d)
198
+ survey_metadata: Optional[Any] = field(default=None, repr=False)
199
+ df_survey: Optional[int] = field(default=None, repr=False)
200
+
201
+ def _ci_is_finite(self) -> bool:
202
+ """Check if CI endpoints are finite (not NaN/inf)."""
203
+ return np.isfinite(self.ci_lb) and np.isfinite(self.ci_ub)
204
+
205
+ def __repr__(self) -> str:
206
+ if not self._ci_is_finite():
207
+ return (
208
+ f"HonestDiDResults(bounds=[{self.lb}, {self.ub}], "
209
+ f"CI=[{self.ci_lb}, {self.ci_ub}] (undefined), "
210
+ f"M={self.M})"
211
+ )
212
+ sig = "" if self.ci_lb <= 0 <= self.ci_ub else "*"
213
+ return (
214
+ f"HonestDiDResults(bounds=[{self.lb:.4f}, {self.ub:.4f}], "
215
+ f"CI=[{self.ci_lb:.4f}, {self.ci_ub:.4f}]{sig}, "
216
+ f"M={self.M})"
217
+ )
218
+
219
+ @property
220
+ def is_significant(self) -> bool:
221
+ """Check if CI excludes zero (effect is robust to violations).
222
+
223
+ Returns False for undefined (NaN) CIs.
224
+ """
225
+ if not self._ci_is_finite():
226
+ return False
227
+ return not (self.ci_lb <= 0 <= self.ci_ub)
228
+
229
+ @property
230
+ def significance_stars(self) -> str:
231
+ """
232
+ Return significance indicator if robust CI excludes zero.
233
+
234
+ Note: Unlike point estimation, partial identification does not yield
235
+ a single p-value. This returns "*" if the robust CI excludes zero
236
+ at the specified alpha level, indicating the effect is robust to
237
+ the assumed violations of parallel trends.
238
+ """
239
+ return "*" if self.is_significant else ""
240
+
241
+ @property
242
+ def identified_set_width(self) -> float:
243
+ """Width of the identified set."""
244
+ return self.ub - self.lb
245
+
246
+ @property
247
+ def ci_width(self) -> float:
248
+ """Width of the confidence interval."""
249
+ return self.ci_ub - self.ci_lb
250
+
251
+ def summary(self) -> str:
252
+ """
253
+ Generate formatted summary of sensitivity analysis results.
254
+
255
+ Returns
256
+ -------
257
+ str
258
+ Formatted summary.
259
+ """
260
+ conf_level = int((1 - self.alpha) * 100)
261
+
262
+ method_names = {
263
+ "smoothness": "Smoothness (Delta^SD)",
264
+ "relative_magnitude": "Relative Magnitudes (Delta^RM)",
265
+ "combined": "Combined (Delta^SDRM)",
266
+ }
267
+ method_display = method_names.get(self.method, self.method)
268
+
269
+ lines = [
270
+ "=" * 70,
271
+ "Honest DiD Sensitivity Analysis Results".center(70),
272
+ "(Rambachan & Roth 2023)".center(70),
273
+ "=" * 70,
274
+ "",
275
+ f"{'Method:':<30} {method_display}",
276
+ f"{'Restriction parameter (M):':<30} {self.M:.4f}",
277
+ f"{'CI method:':<30} {self.ci_method}",
278
+ "",
279
+ "-" * 70,
280
+ "Original Estimate (under parallel trends)".center(70),
281
+ "-" * 70,
282
+ f"{'Point estimate:':<30} {self.original_estimate:.4f}",
283
+ f"{'Standard error:':<30} {self.original_se:.4f}",
284
+ "",
285
+ "-" * 70,
286
+ "Robust Results (allowing for violations)".center(70),
287
+ "-" * 70,
288
+ f"{'Identified set:':<30} [{self.lb:.4f}, {self.ub:.4f}]",
289
+ f"{f'{conf_level}% Robust CI:':<30} [{self.ci_lb:.4f}, {self.ci_ub:.4f}]",
290
+ "",
291
+ f"{'Effect robust to violations:':<30} {'Yes' if self.is_significant else 'No'}",
292
+ "",
293
+ ]
294
+
295
+ # Interpretation
296
+ lines.extend(
297
+ [
298
+ "-" * 70,
299
+ "Interpretation".center(70),
300
+ "-" * 70,
301
+ ]
302
+ )
303
+
304
+ if self.method == "relative_magnitude":
305
+ lines.append(
306
+ f"Post-treatment first differences bounded at {self.M:.1f}x max pre-period first difference."
307
+ )
308
+ elif self.method == "smoothness":
309
+ if self.M == 0:
310
+ lines.append("Violations follow linear extrapolation of pre-trends.")
311
+ else:
312
+ lines.append(
313
+ f"Violation curvature (second diff) bounded by {self.M:.4f} per period."
314
+ )
315
+ else:
316
+ lines.append(f"Combined smoothness (M={self.M:.2f}) and relative magnitude bounds.")
317
+
318
+ if self.is_significant:
319
+ if self.ci_lb > 0:
320
+ lines.append(f"Effect remains POSITIVE even with violations up to M={self.M}.")
321
+ else:
322
+ lines.append(f"Effect remains NEGATIVE even with violations up to M={self.M}.")
323
+ else:
324
+ lines.append(f"Cannot rule out zero effect when allowing violations up to M={self.M}.")
325
+
326
+ lines.extend(["", "=" * 70])
327
+
328
+ return "\n".join(lines)
329
+
330
+ def print_summary(self) -> None:
331
+ """Print summary to stdout."""
332
+ print(self.summary())
333
+
334
+ def to_dict(self) -> Dict[str, Any]:
335
+ """Convert results to dictionary."""
336
+ return {
337
+ "lb": self.lb,
338
+ "ub": self.ub,
339
+ "ci_lb": self.ci_lb,
340
+ "ci_ub": self.ci_ub,
341
+ "M": self.M,
342
+ "method": self.method,
343
+ "original_estimate": self.original_estimate,
344
+ "original_se": self.original_se,
345
+ "alpha": self.alpha,
346
+ "ci_method": self.ci_method,
347
+ "is_significant": self.is_significant,
348
+ "identified_set_width": self.identified_set_width,
349
+ "ci_width": self.ci_width,
350
+ }
351
+
352
+ def to_dataframe(self) -> pd.DataFrame:
353
+ """Convert results to DataFrame."""
354
+ return pd.DataFrame([self.to_dict()])
355
+
356
+
357
+ @dataclass
358
+ class SensitivityResults:
359
+ """
360
+ Results from sensitivity analysis over a grid of M values.
361
+
362
+ Contains bounds and confidence intervals for each M value,
363
+ plus the breakdown value.
364
+
365
+ Attributes
366
+ ----------
367
+ M_values : np.ndarray
368
+ Grid of M parameter values.
369
+ bounds : List[Tuple[float, float]]
370
+ List of (lb, ub) identified set bounds for each M.
371
+ robust_cis : List[Tuple[float, float]]
372
+ List of (ci_lb, ci_ub) robust CIs for each M.
373
+ breakdown_M : float
374
+ Smallest M where robust CI includes zero.
375
+ method : str
376
+ Type of restriction used.
377
+ original_estimate : float
378
+ Original point estimate.
379
+ original_se : float
380
+ Original standard error.
381
+ alpha : float
382
+ Significance level.
383
+ """
384
+
385
+ M_values: np.ndarray
386
+ bounds: List[Tuple[float, float]]
387
+ robust_cis: List[Tuple[float, float]]
388
+ breakdown_M: Optional[float]
389
+ method: str
390
+ original_estimate: float
391
+ original_se: float
392
+ alpha: float = 0.05
393
+
394
+ def __repr__(self) -> str:
395
+ breakdown_str = f"{self.breakdown_M:.4f}" if self.breakdown_M else "None"
396
+ return f"SensitivityResults(n_M={len(self.M_values)}, " f"breakdown_M={breakdown_str})"
397
+
398
+ @property
399
+ def has_breakdown(self) -> bool:
400
+ """Check if there is a finite breakdown value."""
401
+ return self.breakdown_M is not None
402
+
403
+ def summary(self) -> str:
404
+ """Generate formatted summary."""
405
+ lines = [
406
+ "=" * 70,
407
+ "Honest DiD Sensitivity Analysis".center(70),
408
+ "=" * 70,
409
+ "",
410
+ f"{'Method:':<30} {self.method}",
411
+ f"{'Original estimate:':<30} {self.original_estimate:.4f}",
412
+ f"{'Original SE:':<30} {self.original_se:.4f}",
413
+ f"{'M values tested:':<30} {len(self.M_values)}",
414
+ "",
415
+ ]
416
+
417
+ if self.breakdown_M is not None:
418
+ lines.append(f"{'Breakdown value:':<30} {self.breakdown_M:.4f}")
419
+ lines.append("")
420
+ lines.append(f"Result is robust to violations up to M = {self.breakdown_M:.4f}")
421
+ else:
422
+ lines.append(f"{'Breakdown value:':<30} None (always significant)")
423
+
424
+ lines.extend(
425
+ [
426
+ "",
427
+ "-" * 70,
428
+ f"{'M':<10} {'Lower Bound':>12} {'Upper Bound':>12} {'CI Lower':>12} {'CI Upper':>12}",
429
+ "-" * 70,
430
+ ]
431
+ )
432
+
433
+ for i, M in enumerate(self.M_values):
434
+ lb, ub = self.bounds[i]
435
+ ci_lb, ci_ub = self.robust_cis[i]
436
+ lines.append(f"{M:<10.4f} {lb:>12.4f} {ub:>12.4f} {ci_lb:>12.4f} {ci_ub:>12.4f}")
437
+
438
+ lines.extend(["", "=" * 70])
439
+
440
+ return "\n".join(lines)
441
+
442
+ def print_summary(self) -> None:
443
+ """Print summary to stdout."""
444
+ print(self.summary())
445
+
446
+ def to_dataframe(self) -> pd.DataFrame:
447
+ """Convert to DataFrame with one row per M value."""
448
+ rows = []
449
+ for i, M in enumerate(self.M_values):
450
+ lb, ub = self.bounds[i]
451
+ ci_lb, ci_ub = self.robust_cis[i]
452
+ rows.append(
453
+ {
454
+ "M": M,
455
+ "lb": lb,
456
+ "ub": ub,
457
+ "ci_lb": ci_lb,
458
+ "ci_ub": ci_ub,
459
+ "is_significant": (np.isfinite(ci_lb) and np.isfinite(ci_ub) and not (ci_lb <= 0 <= ci_ub)),
460
+ }
461
+ )
462
+ return pd.DataFrame(rows)
463
+
464
+ def plot(
465
+ self,
466
+ ax=None,
467
+ show_bounds: bool = True,
468
+ show_ci: bool = True,
469
+ breakdown_line: bool = True,
470
+ **kwargs,
471
+ ):
472
+ """
473
+ Plot sensitivity analysis results.
474
+
475
+ Parameters
476
+ ----------
477
+ ax : matplotlib.axes.Axes, optional
478
+ Axes to plot on. If None, creates new figure.
479
+ show_bounds : bool
480
+ Whether to show identified set bounds.
481
+ show_ci : bool
482
+ Whether to show confidence intervals.
483
+ breakdown_line : bool
484
+ Whether to show vertical line at breakdown value.
485
+ **kwargs
486
+ Additional arguments passed to plotting functions.
487
+
488
+ Returns
489
+ -------
490
+ ax : matplotlib.axes.Axes
491
+ The axes with the plot.
492
+ """
493
+ try:
494
+ import matplotlib.pyplot as plt
495
+ except ImportError:
496
+ raise ImportError("matplotlib is required for plotting")
497
+
498
+ if ax is None:
499
+ fig, ax = plt.subplots(figsize=(10, 6))
500
+
501
+ M = self.M_values
502
+ bounds_arr = np.array(self.bounds)
503
+ ci_arr = np.array(self.robust_cis)
504
+
505
+ # Plot original estimate
506
+ ax.axhline(
507
+ y=self.original_estimate,
508
+ color="black",
509
+ linestyle="-",
510
+ linewidth=1.5,
511
+ label="Original estimate",
512
+ alpha=0.7,
513
+ )
514
+
515
+ # Plot zero line
516
+ ax.axhline(y=0, color="gray", linestyle="--", linewidth=1, alpha=0.5)
517
+
518
+ if show_bounds:
519
+ ax.fill_between(
520
+ M,
521
+ bounds_arr[:, 0],
522
+ bounds_arr[:, 1],
523
+ alpha=0.3,
524
+ color="blue",
525
+ label="Identified set",
526
+ )
527
+
528
+ if show_ci:
529
+ ax.plot(M, ci_arr[:, 0], "b-", linewidth=1.5, label="Robust CI")
530
+ ax.plot(M, ci_arr[:, 1], "b-", linewidth=1.5)
531
+
532
+ if breakdown_line and self.breakdown_M is not None:
533
+ ax.axvline(
534
+ x=self.breakdown_M,
535
+ color="red",
536
+ linestyle=":",
537
+ linewidth=2,
538
+ label=f"Breakdown (M={self.breakdown_M:.2f})",
539
+ )
540
+
541
+ ax.set_xlabel("M (restriction parameter)")
542
+ ax.set_ylabel("Treatment Effect")
543
+ ax.set_title("Sensitivity Analysis: Treatment Effect Bounds")
544
+ ax.legend(loc="best")
545
+
546
+ return ax
547
+
548
+
549
+ # =============================================================================
550
+ # Helper Functions
551
+ # =============================================================================
552
+
553
+
554
+ def _extract_event_study_params(
555
+ results: Union[MultiPeriodDiDResults, Any],
556
+ ) -> Tuple[np.ndarray, np.ndarray, int, int, List[Any], List[Any], Optional[int]]:
557
+ """
558
+ Extract event study parameters from results objects.
559
+
560
+ Parameters
561
+ ----------
562
+ results : MultiPeriodDiDResults or CallawaySantAnnaResults
563
+ Estimation results with event study structure.
564
+
565
+ Returns
566
+ -------
567
+ beta_hat : np.ndarray
568
+ Vector of event study coefficients (pre + post periods).
569
+ sigma : np.ndarray
570
+ Variance-covariance matrix of coefficients.
571
+ num_pre_periods : int
572
+ Number of pre-treatment periods.
573
+ num_post_periods : int
574
+ Number of post-treatment periods.
575
+ pre_periods : list
576
+ Pre-period identifiers.
577
+ post_periods : list
578
+ Post-period identifiers.
579
+ df_survey : int or None
580
+ Survey degrees of freedom for t-distribution inference.
581
+ """
582
+ if isinstance(results, MultiPeriodDiDResults):
583
+ # Extract from MultiPeriodDiD
584
+ pre_periods = results.pre_periods
585
+ post_periods = results.post_periods
586
+
587
+ # Filter periods with finite effects/SEs, maintaining pre-then-post order
588
+ finite_periods = {
589
+ p
590
+ for p in results.period_effects.keys()
591
+ if np.isfinite(results.period_effects[p].effect)
592
+ and np.isfinite(results.period_effects[p].se)
593
+ }
594
+
595
+ pre_estimated = [p for p in pre_periods if p in finite_periods]
596
+ post_estimated = [p for p in post_periods if p in finite_periods]
597
+ all_estimated = pre_estimated + post_estimated
598
+
599
+ if not all_estimated:
600
+ raise ValueError(
601
+ "No period effects with finite estimates found. " "Cannot compute HonestDiD bounds."
602
+ )
603
+
604
+ effects = [results.period_effects[p].effect for p in all_estimated]
605
+ ses = [results.period_effects[p].se for p in all_estimated]
606
+
607
+ beta_hat = np.array(effects)
608
+ num_pre_periods = sum(1 for p in all_estimated if p in pre_periods)
609
+ num_post_periods = sum(1 for p in all_estimated if p in post_periods)
610
+
611
+ if num_pre_periods == 0:
612
+ raise ValueError(
613
+ "No pre-period effects with finite estimates found. "
614
+ "HonestDiD requires at least one identified pre-period "
615
+ "coefficient."
616
+ )
617
+
618
+ # Extract proper sub-VCV for interaction terms
619
+ if (
620
+ results.vcov is not None
621
+ and hasattr(results, "interaction_indices")
622
+ and results.interaction_indices is not None
623
+ ):
624
+ indices = [results.interaction_indices[p] for p in all_estimated]
625
+ sigma = results.vcov[np.ix_(indices, indices)]
626
+ else:
627
+ # Fallback: diagonal from SEs
628
+ sigma = np.diag(np.array(ses) ** 2)
629
+
630
+ # Extract survey df. Replicate designs with undefined df → sentinel 0.
631
+ df_survey = None
632
+ if hasattr(results, "survey_metadata") and results.survey_metadata is not None:
633
+ sm = results.survey_metadata
634
+ df_survey = getattr(sm, "df_survey", None)
635
+ if df_survey is None and getattr(sm, "replicate_method", None) is not None:
636
+ df_survey = 0
637
+
638
+ return (
639
+ beta_hat,
640
+ sigma,
641
+ num_pre_periods,
642
+ num_post_periods,
643
+ pre_periods,
644
+ post_periods,
645
+ df_survey,
646
+ )
647
+
648
+ else:
649
+ # Try CallawaySantAnnaResults
650
+ try:
651
+ from diff_diff.staggered import CallawaySantAnnaResults
652
+
653
+ if isinstance(results, CallawaySantAnnaResults):
654
+ if results.event_study_effects is None:
655
+ raise ValueError(
656
+ "CallawaySantAnnaResults must have event_study_effects for HonestDiD. "
657
+ "Re-run CallawaySantAnna.fit() with aggregate='event_study' to compute "
658
+ "event study effects."
659
+ )
660
+
661
+ # Warn if not using universal base period (R's HonestDiD requires it)
662
+ if getattr(results, "base_period", "universal") != "universal":
663
+ import warnings
664
+
665
+ warnings.warn(
666
+ "HonestDiD sensitivity analysis on CallawaySantAnna results "
667
+ "requires base_period='universal' for valid interpretation. "
668
+ "With base_period='varying', pre-treatment coefficients use "
669
+ "consecutive comparisons (not a common reference period), "
670
+ "which changes the meaning of the parallel trends restriction. "
671
+ "Re-run with CallawaySantAnna(base_period='universal') for "
672
+ "methodologically valid HonestDiD bounds.",
673
+ UserWarning,
674
+ stacklevel=3,
675
+ )
676
+
677
+ # Extract event study effects by relative time
678
+ # Filter out normalization constraints (n_groups=0) and non-finite SEs
679
+ event_effects = {
680
+ t: data
681
+ for t, data in results.event_study_effects.items()
682
+ if data.get("n_groups", 1) > 0 and np.isfinite(data.get("se", np.nan))
683
+ }
684
+ rel_times = sorted(event_effects.keys())
685
+
686
+ # Infer the omitted reference period from the normalization
687
+ # marker injected by _aggregate_event_study for universal base.
688
+ # The reference has the exact signature: effect=0.0, se=NaN, n_groups=0.
689
+ # Other empty bins may also have n_groups=0 but with NaN effect.
690
+ ref_period = None
691
+ for t, data in results.event_study_effects.items():
692
+ if (
693
+ data.get("n_groups", 1) == 0
694
+ and data.get("effect", None) == 0.0
695
+ and not np.isfinite(data.get("se", 0.0))
696
+ ):
697
+ ref_period = t
698
+ break
699
+
700
+ if ref_period is not None:
701
+ # Universal base: split relative to the reference period
702
+ pre_times = [t for t in rel_times if t < ref_period]
703
+ post_times = [t for t in rel_times if t > ref_period]
704
+ else:
705
+ # Varying base or no reference marker: split at t < 0 / t >= 0
706
+ pre_times = [t for t in rel_times if t < 0]
707
+ post_times = [t for t in rel_times if t >= 0]
708
+
709
+ if len(pre_times) == 0:
710
+ raise ValueError(
711
+ "No pre-period effects with finite estimates found in "
712
+ "CallawaySantAnna event study. HonestDiD requires at "
713
+ "least one identified pre-period coefficient."
714
+ )
715
+
716
+ effects = []
717
+ ses = []
718
+ for t in rel_times:
719
+ effects.append(event_effects[t]["effect"])
720
+ ses.append(event_effects[t]["se"])
721
+
722
+ beta_hat = np.array(effects)
723
+
724
+ # Use full event-study VCV if available (Phase 7d),
725
+ # otherwise fall back to diagonal from SEs
726
+ if hasattr(results, "event_study_vcov") and results.event_study_vcov is not None:
727
+ vcov = results.event_study_vcov
728
+ # VCV is indexed by the aggregated event times (stored in
729
+ # event_study_vcov_index), NOT by event_study_effects keys
730
+ # (which may include an injected reference period).
731
+ # Subset to match the surviving rel_times.
732
+ vcov_index = getattr(results, "event_study_vcov_index", None)
733
+ if vcov_index is not None and len(rel_times) < len(vcov_index):
734
+ idx = [vcov_index.index(t) for t in rel_times if t in vcov_index]
735
+ if len(idx) == len(rel_times):
736
+ sigma = vcov[np.ix_(idx, idx)]
737
+ else:
738
+ sigma = np.diag(np.array(ses) ** 2)
739
+ elif vcov.shape[0] == len(rel_times):
740
+ sigma = vcov
741
+ else:
742
+ sigma = np.diag(np.array(ses) ** 2)
743
+ else:
744
+ # No full VCV available. Check if this is a bootstrap fit
745
+ # (VCV was cleared to prevent mixing analytical/bootstrap).
746
+ if (
747
+ hasattr(results, "bootstrap_results")
748
+ and results.bootstrap_results is not None
749
+ ):
750
+ import warnings
751
+
752
+ warnings.warn(
753
+ "HonestDiD on bootstrap-fitted CallawaySantAnna results "
754
+ "uses a diagonal covariance matrix (cross-event-time "
755
+ "covariance is not available from bootstrap). For full "
756
+ "covariance structure, use analytical SEs (n_bootstrap=0).",
757
+ UserWarning,
758
+ stacklevel=4,
759
+ )
760
+ sigma = np.diag(np.array(ses) ** 2)
761
+
762
+ # Validate the full event-time grid is consecutive.
763
+ # For universal base: exactly one gap for the omitted reference.
764
+ # For varying base: no gap expected (pre ends at -1, post starts at 0).
765
+ if pre_times and post_times:
766
+ if ref_period is not None:
767
+ # Universal: pre[-1]+1 = ref, ref+1 = post[0] → gap of 2
768
+ ref_gap = post_times[0] - pre_times[-1]
769
+ has_gap = ref_gap != 2
770
+ else:
771
+ # Varying: pre ends at -1, post starts at 0 → gap of 1
772
+ ref_gap = post_times[0] - pre_times[-1]
773
+ has_gap = ref_gap != 1
774
+ elif pre_times:
775
+ has_gap = False # only pre, no ref gap to check
776
+ elif post_times:
777
+ has_gap = False # only post, no ref gap to check
778
+ else:
779
+ has_gap = False
780
+ # Also check within-block consecutiveness
781
+ for block in [pre_times, post_times]:
782
+ if len(block) >= 2:
783
+ for i in range(len(block) - 1):
784
+ if block[i + 1] - block[i] != 1:
785
+ has_gap = True
786
+ break
787
+ if has_gap:
788
+ raise ValueError(
789
+ "HonestDiD requires a consecutive event-time grid "
790
+ "around the omitted reference period. Retained "
791
+ f"pre-periods {pre_times} and post-periods "
792
+ f"{post_times} have gaps. This can happen when "
793
+ "some event-study horizons have non-finite SEs. "
794
+ "Ensure all event-study periods have valid estimates, "
795
+ "or use balance_e to restrict to a balanced subset."
796
+ )
797
+
798
+ # Extract survey df. For replicate designs with undefined df
799
+ # (rank <= 1), use sentinel df=0 so _get_critical_value returns
800
+ # NaN, matching the safe_inference contract.
801
+ df_survey = None
802
+ if hasattr(results, "survey_metadata") and results.survey_metadata is not None:
803
+ sm = results.survey_metadata
804
+ df_survey = getattr(sm, "df_survey", None)
805
+ if df_survey is None and getattr(sm, "replicate_method", None) is not None:
806
+ df_survey = 0 # undefined replicate df → NaN inference
807
+
808
+ return (
809
+ beta_hat,
810
+ sigma,
811
+ len(pre_times),
812
+ len(post_times),
813
+ pre_times,
814
+ post_times,
815
+ df_survey,
816
+ )
817
+ except ImportError:
818
+ pass
819
+
820
+ raise TypeError(
821
+ f"Unsupported results type: {type(results)}. "
822
+ "Expected MultiPeriodDiDResults or CallawaySantAnnaResults."
823
+ )
824
+
825
+
826
+ def _construct_A_sd(num_pre_periods: int, num_post_periods: int) -> np.ndarray:
827
+ """
828
+ Construct constraint matrix for smoothness (second differences).
829
+
830
+ Builds the matrix A such that A @ delta gives the second differences,
831
+ accounting for the normalization delta_0 = 0 at the pre-post boundary.
832
+
833
+ The delta vector is [delta_{-T}, ..., delta_{-1}, delta_1, ..., delta_{Tbar}]
834
+ (delta_0 = 0 is omitted). Second differences at the boundary use delta_0 = 0:
835
+ t=-1: delta_{-2} - 2*delta_{-1} + 0 (if num_pre >= 2)
836
+ t= 0: delta_{-1} + delta_1 (bridge constraint, always present)
837
+ t= 1: 0 - 2*delta_1 + delta_2 (if num_post >= 2)
838
+
839
+ Parameters
840
+ ----------
841
+ num_pre_periods : int
842
+ Number of pre-treatment periods (T).
843
+ num_post_periods : int
844
+ Number of post-treatment periods (Tbar).
845
+
846
+ Returns
847
+ -------
848
+ A : np.ndarray
849
+ Constraint matrix of shape (n_constraints, num_pre + num_post).
850
+ n_constraints = num_pre + num_post - 1 for sufficient periods,
851
+ accounting for the delta_0 = 0 boundary.
852
+ """
853
+ T = num_pre_periods
854
+ Tbar = num_post_periods
855
+ total = T + Tbar
856
+
857
+ if total < 2:
858
+ return np.zeros((0, total))
859
+
860
+ rows = []
861
+
862
+ # Pure pre-period second differences: t = -T+1, ..., -2
863
+ # These involve delta[i-1], delta[i], delta[i+1] all in the pre-period block
864
+ # Row i corresponds to: delta_{-(T-i)} - 2*delta_{-(T-i-1)} + delta_{-(T-i-2)}
865
+ for i in range(T - 2):
866
+ row = np.zeros(total)
867
+ row[i] = 1 # delta_{t-1}
868
+ row[i + 1] = -2 # delta_t
869
+ row[i + 2] = 1 # delta_{t+1}
870
+ rows.append(row)
871
+
872
+ # Boundary constraint at t = -1: delta_{-2} - 2*delta_{-1} + delta_0
873
+ # With delta_0 = 0: delta_{-2} - 2*delta_{-1}
874
+ if T >= 2:
875
+ row = np.zeros(total)
876
+ row[T - 2] = 1 # delta_{-2}
877
+ row[T - 1] = -2 # delta_{-1}
878
+ # delta_0 = 0, no entry needed
879
+ rows.append(row)
880
+
881
+ # Bridge constraint at t = 0: delta_{-1} - 2*delta_0 + delta_1
882
+ # With delta_0 = 0: delta_{-1} + delta_1
883
+ if T >= 1 and Tbar >= 1:
884
+ row = np.zeros(total)
885
+ row[T - 1] = 1 # delta_{-1}
886
+ row[T] = 1 # delta_1
887
+ rows.append(row)
888
+
889
+ # Boundary constraint at t = 1: delta_0 - 2*delta_1 + delta_2
890
+ # With delta_0 = 0: -2*delta_1 + delta_2
891
+ if Tbar >= 2:
892
+ row = np.zeros(total)
893
+ row[T] = -2 # delta_1
894
+ row[T + 1] = 1 # delta_2
895
+ rows.append(row)
896
+
897
+ # Pure post-period second differences: event times t = 2, ..., Tbar-1
898
+ # delta_{t+1} - 2*delta_t + delta_{t-1}, all within the post-period block
899
+ for t in range(2, Tbar):
900
+ row = np.zeros(total)
901
+ row[T + t - 2] = 1 # delta_{t-1}
902
+ row[T + t - 1] = -2 # delta_t
903
+ row[T + t] = 1 # delta_{t+1}
904
+ rows.append(row)
905
+
906
+ if not rows:
907
+ return np.zeros((0, total))
908
+
909
+ return np.array(rows)
910
+
911
+
912
+ def _construct_constraints_sd(
913
+ num_pre_periods: int, num_post_periods: int, M: float
914
+ ) -> Tuple[np.ndarray, np.ndarray]:
915
+ """
916
+ Construct smoothness constraint matrices for Delta^SD(M).
917
+
918
+ Returns A, b such that delta in DeltaSD(M) iff |A @ delta| <= b.
919
+ Accounts for delta_0 = 0 normalization at the pre-post boundary.
920
+
921
+ Parameters
922
+ ----------
923
+ num_pre_periods : int
924
+ Number of pre-treatment periods.
925
+ num_post_periods : int
926
+ Number of post-treatment periods.
927
+ M : float
928
+ Smoothness parameter (max second difference).
929
+
930
+ Returns
931
+ -------
932
+ A_ineq : np.ndarray
933
+ Inequality constraint matrix.
934
+ b_ineq : np.ndarray
935
+ Inequality constraint vector.
936
+ """
937
+ A_base = _construct_A_sd(num_pre_periods, num_post_periods)
938
+
939
+ if A_base.shape[0] == 0:
940
+ total = num_pre_periods + num_post_periods
941
+ return np.zeros((0, total)), np.zeros(0)
942
+
943
+ # |A @ delta| <= M becomes:
944
+ # A @ delta <= M and -A @ delta <= M
945
+ A_ineq = np.vstack([A_base, -A_base])
946
+ b_ineq = np.full(2 * A_base.shape[0], M)
947
+
948
+ return A_ineq, b_ineq
949
+
950
+
951
+ def _construct_constraints_rm_component(
952
+ num_pre_periods: int,
953
+ num_post_periods: int,
954
+ Mbar: float,
955
+ max_pre_first_diff: float,
956
+ ) -> Tuple[np.ndarray, np.ndarray]:
957
+ """
958
+ Construct constraint matrices for one component of Delta^RM.
959
+
960
+ Delta^RM constrains post-treatment FIRST DIFFERENCES (not levels):
961
+ |delta_{t+1} - delta_t| <= Mbar * max_pre_first_diff, for all t >= 0
962
+
963
+ With delta_0 = 0 normalization:
964
+ |delta_1| <= bound (t=0)
965
+ |delta_{t+1} - delta_t| <= bound (t=1, ..., Tbar-1)
966
+
967
+ Parameters
968
+ ----------
969
+ num_pre_periods : int
970
+ Number of pre-treatment periods.
971
+ num_post_periods : int
972
+ Number of post-treatment periods.
973
+ Mbar : float
974
+ Relative magnitude scaling factor.
975
+ max_pre_first_diff : float
976
+ The pre-period first difference for this union component.
977
+
978
+ Returns
979
+ -------
980
+ A_ineq : np.ndarray
981
+ Inequality constraint matrix.
982
+ b_ineq : np.ndarray
983
+ Inequality constraint vector.
984
+ """
985
+ T = num_pre_periods
986
+ Tbar = num_post_periods
987
+ total = T + Tbar
988
+ bound = Mbar * max_pre_first_diff
989
+
990
+ rows = []
991
+
992
+ # t=0: |delta_1 - delta_0| = |delta_1| <= bound (delta_0 = 0)
993
+ if Tbar >= 1:
994
+ row_pos = np.zeros(total)
995
+ row_pos[T] = 1 # delta_1 <= bound
996
+ rows.append(row_pos)
997
+ row_neg = np.zeros(total)
998
+ row_neg[T] = -1 # -delta_1 <= bound
999
+ rows.append(row_neg)
1000
+
1001
+ # t=1, ..., Tbar-1: |delta_{t+1} - delta_t| <= bound
1002
+ for t in range(1, Tbar):
1003
+ row_pos = np.zeros(total)
1004
+ row_pos[T + t] = 1 # delta_{t+1}
1005
+ row_pos[T + t - 1] = -1 # -delta_t
1006
+ rows.append(row_pos)
1007
+ row_neg = np.zeros(total)
1008
+ row_neg[T + t] = -1 # -delta_{t+1}
1009
+ row_neg[T + t - 1] = 1 # delta_t
1010
+ rows.append(row_neg)
1011
+
1012
+ if not rows:
1013
+ return np.zeros((0, total)), np.zeros(0)
1014
+
1015
+ A_ineq = np.array(rows)
1016
+ b_ineq = np.full(len(rows), bound)
1017
+ return A_ineq, b_ineq
1018
+
1019
+
1020
+ def _compute_pre_first_differences(beta_pre: np.ndarray) -> np.ndarray:
1021
+ """
1022
+ Compute pre-period first differences for Delta^RM.
1023
+
1024
+ With delta_0 = 0 normalization, the pre-period first differences are:
1025
+ fd_s = delta_{s+1} - delta_s for s = -T, ..., -1
1026
+
1027
+ Since delta_pre = beta_pre (by no-anticipation):
1028
+ fd_{-T} = beta_{-T+1} - beta_{-T}
1029
+ ...
1030
+ fd_{-2} = beta_{-1} - beta_{-2}
1031
+ fd_{-1} = delta_0 - beta_{-1} = -beta_{-1} (boundary through delta_0=0)
1032
+
1033
+ Parameters
1034
+ ----------
1035
+ beta_pre : np.ndarray
1036
+ Pre-period coefficient estimates [beta_{-T}, ..., beta_{-1}].
1037
+
1038
+ Returns
1039
+ -------
1040
+ first_diffs : np.ndarray
1041
+ Absolute first differences |fd_{-T}|, ..., |fd_{-1}|.
1042
+ """
1043
+ if len(beta_pre) == 0:
1044
+ return np.array([])
1045
+
1046
+ diffs = []
1047
+ # Interior first differences: fd_s = beta_{s+1} - beta_s
1048
+ for i in range(len(beta_pre) - 1):
1049
+ diffs.append(abs(beta_pre[i + 1] - beta_pre[i]))
1050
+ # Boundary: fd_{-1} = delta_0 - delta_{-1} = 0 - beta_{-1} = -beta_{-1}
1051
+ diffs.append(abs(beta_pre[-1]))
1052
+
1053
+ return np.array(diffs)
1054
+
1055
+
1056
+ def _solve_rm_bounds_union(
1057
+ beta_pre: np.ndarray,
1058
+ beta_post: np.ndarray,
1059
+ l_vec: np.ndarray,
1060
+ num_pre_periods: int,
1061
+ Mbar: float,
1062
+ lp_method: str = "highs",
1063
+ ) -> Tuple[float, float]:
1064
+ """
1065
+ Solve identified set bounds for Delta^RM via union of polyhedra.
1066
+
1067
+ Delta^RM is a union of polyhedra (one per location of the max pre-period
1068
+ first difference). Per Lemma 2.2 of Rambachan & Roth (2023), the
1069
+ identified set is the union of component identified sets.
1070
+
1071
+ With delta_pre = beta_pre pinned, each pre-period first difference is
1072
+ a known scalar, so each component LP has simple box constraints on
1073
+ post-treatment first differences.
1074
+
1075
+ Parameters
1076
+ ----------
1077
+ beta_pre : np.ndarray
1078
+ Pre-period coefficients.
1079
+ beta_post : np.ndarray
1080
+ Post-period coefficients.
1081
+ l_vec : np.ndarray
1082
+ Weighting vector.
1083
+ num_pre_periods : int
1084
+ Number of pre-periods.
1085
+ Mbar : float
1086
+ Relative magnitudes scaling factor.
1087
+ lp_method : str
1088
+ LP solver method.
1089
+
1090
+ Returns
1091
+ -------
1092
+ lb : float
1093
+ Lower bound (min over all components).
1094
+ ub : float
1095
+ Upper bound (max over all components).
1096
+ """
1097
+ pre_diffs = _compute_pre_first_differences(beta_pre)
1098
+ num_post = len(beta_post)
1099
+
1100
+ if len(pre_diffs) == 0 or np.max(pre_diffs) == 0:
1101
+ theta = np.dot(l_vec, beta_post)
1102
+ return theta, theta
1103
+
1104
+ # After pinning delta_pre = beta_pre, the RM bound is determined by
1105
+ # max(pre_diffs). Smaller components give tighter constraints and thus
1106
+ # narrower bounds that are nested inside the max-component bounds.
1107
+ # One LP call suffices (Lemma 2.2 union simplifies to max component).
1108
+ max_pre_fd = float(np.max(pre_diffs))
1109
+ A_ineq, b_ineq = _construct_constraints_rm_component(
1110
+ num_pre_periods, num_post, Mbar, max_pre_fd
1111
+ )
1112
+ return _solve_bounds_lp(
1113
+ beta_pre, beta_post, l_vec, A_ineq, b_ineq, num_pre_periods, lp_method
1114
+ )
1115
+
1116
+
1117
+ def _solve_bounds_lp(
1118
+ beta_pre: np.ndarray,
1119
+ beta_post: np.ndarray,
1120
+ l_vec: np.ndarray,
1121
+ A_ineq: np.ndarray,
1122
+ b_ineq: np.ndarray,
1123
+ num_pre_periods: int,
1124
+ lp_method: str = "highs",
1125
+ ) -> Tuple[float, float]:
1126
+ """
1127
+ Solve for identified set bounds using linear programming.
1128
+
1129
+ Computes the bounds of the identified set S(beta, Delta) per
1130
+ Rambachan & Roth (2023) Equations 5-6:
1131
+
1132
+ theta^lb = l'beta_post - max{ l'delta_post : delta in Delta, delta_pre = beta_pre }
1133
+ theta^ub = l'beta_post - min{ l'delta_post : delta in Delta, delta_pre = beta_pre }
1134
+
1135
+ The equality constraint delta_pre = beta_pre pins the pre-treatment violations
1136
+ to the observed pre-treatment coefficients (since tau_pre = 0 by no-anticipation).
1137
+
1138
+ Parameters
1139
+ ----------
1140
+ beta_pre : np.ndarray
1141
+ Pre-period coefficient estimates (pinned as equality constraints).
1142
+ beta_post : np.ndarray
1143
+ Post-period coefficient estimates.
1144
+ l_vec : np.ndarray
1145
+ Weighting vector for aggregation.
1146
+ A_ineq : np.ndarray
1147
+ Inequality constraint matrix (for all periods).
1148
+ b_ineq : np.ndarray
1149
+ Inequality constraint vector.
1150
+ num_pre_periods : int
1151
+ Number of pre-periods (for indexing).
1152
+ lp_method : str
1153
+ LP solver method for scipy.optimize.linprog. Default 'highs' requires
1154
+ scipy >= 1.6.0. Alternatives: 'interior-point', 'revised simplex'.
1155
+
1156
+ Returns
1157
+ -------
1158
+ lb : float
1159
+ Lower bound of identified set.
1160
+ ub : float
1161
+ Upper bound of identified set.
1162
+ """
1163
+ num_post = len(beta_post)
1164
+ total_periods = A_ineq.shape[1] if A_ineq.shape[0] > 0 else num_pre_periods + num_post
1165
+
1166
+ # Objective: min/max -l' @ delta_post over delta in R^total_periods
1167
+ c = np.zeros(total_periods)
1168
+ c[num_pre_periods : num_pre_periods + num_post] = -l_vec
1169
+
1170
+ # Equality constraints: delta_pre = beta_pre (Rambachan & Roth Eqs 5-6)
1171
+ A_eq = np.zeros((num_pre_periods, total_periods))
1172
+ for i in range(num_pre_periods):
1173
+ A_eq[i, i] = 1.0
1174
+ b_eq = beta_pre
1175
+
1176
+ if A_ineq.shape[0] == 0 and num_pre_periods == 0:
1177
+ return -np.inf, np.inf
1178
+
1179
+ lp_kwargs = dict(
1180
+ A_ub=A_ineq if A_ineq.shape[0] > 0 else None,
1181
+ b_ub=b_ineq if A_ineq.shape[0] > 0 else None,
1182
+ A_eq=A_eq,
1183
+ b_eq=b_eq,
1184
+ bounds=(None, None),
1185
+ method=lp_method,
1186
+ )
1187
+
1188
+ # Solve for min(-l'@delta_post) → gives upper bound of theta
1189
+ try:
1190
+ result_min = optimize.linprog(c, **lp_kwargs)
1191
+ if result_min.success:
1192
+ min_val = result_min.fun
1193
+ elif result_min.status == 2:
1194
+ # Infeasible: beta_pre inconsistent with Delta at this M
1195
+ return np.nan, np.nan
1196
+ else:
1197
+ min_val = -np.inf
1198
+ except (ValueError, TypeError):
1199
+ min_val = -np.inf
1200
+
1201
+ # Solve for max(-l'@delta_post) → gives lower bound of theta
1202
+ try:
1203
+ result_max = optimize.linprog(-c, **lp_kwargs)
1204
+ if result_max.success:
1205
+ max_val = -result_max.fun
1206
+ elif result_max.status == 2:
1207
+ return np.nan, np.nan
1208
+ else:
1209
+ max_val = np.inf
1210
+ except (ValueError, TypeError):
1211
+ max_val = np.inf
1212
+
1213
+ theta_base = np.dot(l_vec, beta_post)
1214
+ lb = theta_base + min_val # = l'@beta + min(-l'@delta) = min(l'@(beta-delta))
1215
+ ub = theta_base + max_val # = l'@beta + max(-l'@delta) = max(l'@(beta-delta))
1216
+
1217
+ return lb, ub
1218
+
1219
+
1220
+ def _compute_flci(
1221
+ lb: float,
1222
+ ub: float,
1223
+ se: float,
1224
+ alpha: float = 0.05,
1225
+ df: Optional[int] = None,
1226
+ ) -> Tuple[float, float]:
1227
+ """
1228
+ Compute Fixed Length Confidence Interval (FLCI).
1229
+
1230
+ The FLCI extends the identified set by a critical value times
1231
+ the standard error on each side.
1232
+
1233
+ Parameters
1234
+ ----------
1235
+ lb : float
1236
+ Lower bound of identified set.
1237
+ ub : float
1238
+ Upper bound of identified set.
1239
+ se : float
1240
+ Standard error of the estimator.
1241
+ alpha : float
1242
+ Significance level.
1243
+ df : int, optional
1244
+ Degrees of freedom. If provided, uses t-distribution critical value
1245
+ instead of normal (for survey designs with df = n_PSU - n_strata).
1246
+
1247
+ Returns
1248
+ -------
1249
+ ci_lb : float
1250
+ Lower bound of confidence interval.
1251
+ ci_ub : float
1252
+ Upper bound of confidence interval.
1253
+
1254
+ Raises
1255
+ ------
1256
+ ValueError
1257
+ If se <= 0 or alpha is not in (0, 1).
1258
+ """
1259
+ if se <= 0:
1260
+ raise ValueError(f"Standard error must be positive, got se={se}")
1261
+ if not (0 < alpha < 1):
1262
+ raise ValueError(f"alpha must be between 0 and 1, got alpha={alpha}")
1263
+
1264
+ z = _get_critical_value(alpha, df)
1265
+ ci_lb = lb - z * se
1266
+ ci_ub = ub + z * se
1267
+ return ci_lb, ci_ub
1268
+
1269
+
1270
+ def _cv_alpha(t: float, alpha: float, df: Optional[int] = None) -> float:
1271
+ """
1272
+ Compute the (1-alpha) quantile of the folded distribution |X|.
1273
+
1274
+ When df is None: X ~ N(t, 1) (folded normal).
1275
+ When df > 0: X ~ nct(df, t) (folded non-central t, for survey inference).
1276
+ Per Rambachan & Roth (2023) Equation 18.
1277
+
1278
+ Parameters
1279
+ ----------
1280
+ t : float
1281
+ Non-centrality parameter (bias / se ratio).
1282
+ alpha : float
1283
+ Significance level.
1284
+ df : int, optional
1285
+ Degrees of freedom for non-central t. None = normal theory.
1286
+
1287
+ Returns
1288
+ -------
1289
+ cv : float
1290
+ Critical value such that P(|X| <= cv) = 1 - alpha.
1291
+ """
1292
+ from scipy.stats import norm
1293
+
1294
+ target = 1 - alpha
1295
+ t = abs(t)
1296
+
1297
+ if df is not None and df > 0:
1298
+ # Folded non-central t: P(|nct(df,t)| <= x) = F(x;df,t) - F(-x;df,t)
1299
+ from scipy.stats import nct as nct_dist
1300
+
1301
+ x = nct_dist.ppf(1 - alpha / 2, df, t) + 1.0 # generous start
1302
+ for _ in range(30):
1303
+ f = nct_dist.cdf(x, df, t) - nct_dist.cdf(-x, df, t) - target
1304
+ fprime = nct_dist.pdf(x, df, t) + nct_dist.pdf(-x, df, t)
1305
+ if fprime < 1e-15:
1306
+ break
1307
+ x_new = x - f / fprime
1308
+ x_new = max(x_new, 0.0)
1309
+ if abs(x_new - x) < 1e-10:
1310
+ break
1311
+ x = x_new
1312
+ return x
1313
+
1314
+ # Folded normal: P(|N(t,1)| <= x) = Phi(x-t) - Phi(-x-t)
1315
+ x = norm.ppf(1 - alpha / 2) + t
1316
+
1317
+ for _ in range(20):
1318
+ f = norm.cdf(x - t) - norm.cdf(-x - t) - target
1319
+ fprime = norm.pdf(x - t) + norm.pdf(-x - t)
1320
+ if fprime < 1e-15:
1321
+ break
1322
+ x_new = x - f / fprime
1323
+ x_new = max(x_new, 0.0)
1324
+ if abs(x_new - x) < 1e-12:
1325
+ break
1326
+ x = x_new
1327
+
1328
+ return x
1329
+
1330
+
1331
+ def _build_fd_transform(num_pre: int, num_post: int) -> np.ndarray:
1332
+ """
1333
+ Build the matrix C mapping first-differences to levels: delta = C @ fd.
1334
+
1335
+ The fd vector has T+Tbar components:
1336
+ fd = [fd_{-T}, ..., fd_{-1}, fd_0, ..., fd_{Tbar-1}]
1337
+ where fd_s = delta_{s+1} - delta_s (with delta_0 = 0).
1338
+
1339
+ The delta vector is:
1340
+ delta = [delta_{-T}, ..., delta_{-1}, delta_1, ..., delta_{Tbar}]
1341
+
1342
+ Pre-period (backward from delta_0=0):
1343
+ delta_{-1} = -fd_{T-1}
1344
+ delta_{-k} = -(fd_{T-1} + fd_{T-2} + ... + fd_{T-k})
1345
+
1346
+ Post-period (forward from delta_0=0):
1347
+ delta_1 = fd_T
1348
+ delta_k = fd_T + fd_{T+1} + ... + fd_{T+k-1}
1349
+ """
1350
+ T = num_pre
1351
+ Tbar = num_post
1352
+ total = T + Tbar
1353
+ C = np.zeros((total, total))
1354
+
1355
+ # Pre-period: delta_{-k} = -(fd_{T-1} + fd_{T-2} + ... + fd_{T-k})
1356
+ for k in range(1, T + 1):
1357
+ delta_idx = T - k # delta_{-k} is at index T-k
1358
+ for j in range(k):
1359
+ fd_idx = T - 1 - j # fd_{T-1-j}
1360
+ C[delta_idx, fd_idx] = -1.0
1361
+
1362
+ # Post-period: delta_k = fd_T + fd_{T+1} + ... + fd_{T+k-1}
1363
+ for k in range(1, Tbar + 1):
1364
+ delta_idx = T + k - 1 # delta_k is at index T+k-1
1365
+ for j in range(k):
1366
+ fd_idx = T + j # fd_{T+j}
1367
+ C[delta_idx, fd_idx] = 1.0
1368
+
1369
+ return C
1370
+
1371
+
1372
+ def _build_fd_smoothness_constraints(
1373
+ num_fd: int, M: float
1374
+ ) -> Tuple[np.ndarray, np.ndarray]:
1375
+ """
1376
+ Build smoothness constraints in first-difference space.
1377
+
1378
+ Delta^SD(M) in fd-space: |fd_{i+1} - fd_i| <= M for all consecutive pairs.
1379
+ This is a bounded polyhedron (unlike level-space Delta^SD which is unbounded).
1380
+ """
1381
+ if num_fd < 2:
1382
+ return np.zeros((0, num_fd)), np.zeros(0)
1383
+
1384
+ n_constraints = num_fd - 1
1385
+ rows = []
1386
+ for i in range(n_constraints):
1387
+ row_pos = np.zeros(num_fd)
1388
+ row_pos[i + 1] = 1
1389
+ row_pos[i] = -1
1390
+ rows.append(row_pos)
1391
+ row_neg = np.zeros(num_fd)
1392
+ row_neg[i + 1] = -1
1393
+ row_neg[i] = 1
1394
+ rows.append(row_neg)
1395
+
1396
+ A = np.array(rows)
1397
+ b = np.full(len(rows), M)
1398
+ return A, b
1399
+
1400
+
1401
+ def _w_to_v(w: np.ndarray, l: np.ndarray, num_pre: int) -> np.ndarray:
1402
+ """
1403
+ Map slope weights w to the full estimator direction v.
1404
+
1405
+ The estimator is: theta_hat = l'beta_post - sum_s w_s (beta_s - beta_{s-1})
1406
+ for s = -T+1, ..., 0 (T slopes total, including boundary slope s=0
1407
+ where beta_0 = 0).
1408
+
1409
+ This gives v = (v_pre, l) where v_pre is determined by differencing w.
1410
+
1411
+ Parameters
1412
+ ----------
1413
+ w : np.ndarray
1414
+ Weights on slopes (length T). Includes the boundary slope at s=0.
1415
+ l : np.ndarray
1416
+ Target parameter weights (length Tbar).
1417
+ num_pre : int
1418
+ Number of pre-periods (T).
1419
+ """
1420
+ T = num_pre
1421
+ Tbar = len(l)
1422
+ v = np.zeros(T + Tbar)
1423
+
1424
+ if len(w) > 0:
1425
+ # v[0] = w[0] (beta_{-T} from slope s=-T+1)
1426
+ v[0] = w[0]
1427
+ # v[k] = -w[k-1] + w[k] for k=1,...,T-1
1428
+ for k in range(1, T):
1429
+ v[k] = -w[k - 1] + w[k]
1430
+
1431
+ v[T:] = l
1432
+ return v
1433
+
1434
+
1435
+ def _compute_worst_case_bias(
1436
+ w: np.ndarray,
1437
+ l: np.ndarray,
1438
+ num_pre: int,
1439
+ num_post: int,
1440
+ M: float,
1441
+ ) -> float:
1442
+ """
1443
+ Compute worst-case bias of the FLCI affine estimator for Delta^SD.
1444
+
1445
+ Per Rambachan & Roth (2023) Eq. 17, the bias is max |v'delta| over
1446
+ Delta^SD(M). This is computed in first-difference space where Delta^SD
1447
+ is a bounded polyhedron |fd_{i+1} - fd_i| <= M.
1448
+
1449
+ The bias direction in fd-space is C'v, where C maps fd -> delta and
1450
+ v is the estimator direction derived from slope weights w.
1451
+
1452
+ Parameters
1453
+ ----------
1454
+ w : np.ndarray
1455
+ Slope weights (length T), sum(w) = sum_j j*l_j (Eq. 17 neutrality).
1456
+ l : np.ndarray
1457
+ Target parameter weights.
1458
+ num_pre : int
1459
+ Number of pre-periods (T).
1460
+ num_post : int
1461
+ Number of post-periods (Tbar).
1462
+ M : float
1463
+ Smoothness parameter.
1464
+
1465
+ Returns
1466
+ -------
1467
+ bias : float
1468
+ Maximum worst-case bias (finite for M >= 0).
1469
+ """
1470
+ if M == 0:
1471
+ return 0.0 # Linear trends => zero bias when sum(w)=1
1472
+
1473
+ total = num_pre + num_post
1474
+ v = _w_to_v(w, l, num_pre)
1475
+ C = _build_fd_transform(num_pre, num_post)
1476
+ A_fd, b_fd = _build_fd_smoothness_constraints(total, M)
1477
+
1478
+ # Bias direction in fd-space: max (C'v)' fd subject to smoothness
1479
+ bias_dir_fd = C.T @ v
1480
+
1481
+ if A_fd.shape[0] == 0:
1482
+ return 0.0
1483
+
1484
+ # Centrosymmetric: max |c'fd| = max c'fd
1485
+ try:
1486
+ res = optimize.linprog(
1487
+ -bias_dir_fd,
1488
+ A_ub=A_fd,
1489
+ b_ub=b_fd,
1490
+ bounds=(None, None),
1491
+ method="highs",
1492
+ )
1493
+ return -res.fun if res.success else np.inf
1494
+ except (ValueError, TypeError):
1495
+ return np.inf
1496
+
1497
+
1498
+ def _compute_optimal_flci(
1499
+ beta_pre: np.ndarray,
1500
+ beta_post: np.ndarray,
1501
+ sigma: np.ndarray,
1502
+ l_vec: np.ndarray,
1503
+ num_pre: int,
1504
+ num_post: int,
1505
+ M: float,
1506
+ alpha: float = 0.05,
1507
+ df: Optional[int] = None,
1508
+ ) -> Tuple[float, float]:
1509
+ """
1510
+ Compute the optimal Fixed Length Confidence Interval for Delta^SD.
1511
+
1512
+ Per Rambachan & Roth (2023) Section 4.1, the optimal FLCI is:
1513
+ CI = (a + v'beta_hat) ± chi
1514
+ where (a, v) minimize the half-length chi subject to coverage.
1515
+
1516
+ The estimator is parameterized in terms of slope weights w on
1517
+ pre-treatment first differences (Section 4.1.1):
1518
+ theta_hat = l'beta_post - sum_s w_s (beta_s - beta_{s-1})
1519
+ with constraint sum(w) = sum_j j*l_j (linear trend neutrality, Eq. 17).
1520
+
1521
+ The bias is computed in first-difference space where Delta^SD is
1522
+ a bounded polyhedron, making the LP well-posed.
1523
+
1524
+ When df is provided, uses the folded non-central t distribution
1525
+ for survey inference (replaces the folded normal).
1526
+
1527
+ Parameters
1528
+ ----------
1529
+ beta_pre : np.ndarray
1530
+ Pre-period coefficients.
1531
+ beta_post : np.ndarray
1532
+ Post-period coefficients.
1533
+ sigma : np.ndarray
1534
+ Full variance-covariance matrix (pre + post periods).
1535
+ l_vec : np.ndarray
1536
+ Target parameter weights.
1537
+ num_pre : int
1538
+ Number of pre-periods (T).
1539
+ num_post : int
1540
+ Number of post-periods (Tbar).
1541
+ M : float
1542
+ Smoothness parameter.
1543
+ alpha : float
1544
+ Significance level.
1545
+ df : int, optional
1546
+ Survey degrees of freedom for folded t inference.
1547
+
1548
+ Returns
1549
+ -------
1550
+ ci_lb : float
1551
+ Lower bound of FLCI.
1552
+ ci_ub : float
1553
+ Upper bound of FLCI.
1554
+ """
1555
+ T = num_pre
1556
+ Tbar = num_post
1557
+
1558
+ # Survey df gating: df<=0 sentinel → NaN inference
1559
+ if df is not None and df <= 0:
1560
+ return np.nan, np.nan
1561
+
1562
+ # T slopes total (s = -T+1, ..., 0), including boundary slope s=0.
1563
+ # Linear-trend neutrality requires sum(w) = sum_j j*l_j (Eq. 17).
1564
+ n_slopes = T
1565
+ target_sum = float(np.dot(np.arange(1, Tbar + 1), l_vec))
1566
+
1567
+ def flci_half_length(w_free):
1568
+ """Compute FLCI half-length for given free slope weights."""
1569
+ # Reconstruct full w with constraint sum(w) = target_sum
1570
+ if n_slopes == 1:
1571
+ w = np.array([target_sum])
1572
+ elif len(w_free) == n_slopes - 1:
1573
+ w = np.concatenate([w_free, [target_sum - np.sum(w_free)]])
1574
+ else:
1575
+ w = w_free
1576
+
1577
+ # Map w -> v for variance
1578
+ v = _w_to_v(w, l_vec, T)
1579
+ sigma_v = np.sqrt(float(v @ sigma @ v))
1580
+ if sigma_v <= 0:
1581
+ return np.inf
1582
+
1583
+ # Compute bias in fd-space
1584
+ bias = _compute_worst_case_bias(w, l_vec, T, Tbar, M)
1585
+ if not np.isfinite(bias):
1586
+ return np.inf
1587
+
1588
+ t = float(bias / sigma_v)
1589
+ cv = _cv_alpha(t, alpha, df=df)
1590
+ return float(sigma_v * cv)
1591
+
1592
+ from scipy.optimize import minimize as scipy_minimize
1593
+
1594
+ if n_slopes == 1:
1595
+ # Only one slope weight, determined by constraint.
1596
+ w_opt = np.array([target_sum])
1597
+ chi = flci_half_length(w_opt)
1598
+ else:
1599
+ # Optimize over T-1 free parameters (last w determined by sum constraint)
1600
+ x0 = np.full(n_slopes - 1, target_sum / n_slopes)
1601
+
1602
+ result = scipy_minimize(
1603
+ flci_half_length,
1604
+ x0=x0,
1605
+ method="Nelder-Mead",
1606
+ options={"maxiter": 500, "xatol": 1e-5, "fatol": 1e-6},
1607
+ )
1608
+ w_opt = np.concatenate([result.x, [target_sum - np.sum(result.x)]])
1609
+ chi = flci_half_length(result.x)
1610
+
1611
+ # Build the estimator value: theta_hat = v'beta
1612
+ beta_full = np.concatenate([beta_pre, beta_post])
1613
+ v_opt = _w_to_v(w_opt, l_vec, T)
1614
+ theta_hat = float(v_opt @ beta_full)
1615
+
1616
+ if not np.isfinite(chi):
1617
+ return np.nan, np.nan
1618
+
1619
+ return theta_hat - chi, theta_hat + chi
1620
+
1621
+
1622
+ def _setup_moment_inequalities(
1623
+ beta_hat: np.ndarray,
1624
+ sigma_hat: np.ndarray,
1625
+ A: np.ndarray,
1626
+ d: np.ndarray,
1627
+ l: np.ndarray,
1628
+ theta_bar: float,
1629
+ num_pre: int,
1630
+ ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
1631
+ """
1632
+ Transform H0: theta = theta_bar into moment inequality form.
1633
+
1634
+ Per Rambachan & Roth (2023) Equations 12-13.
1635
+
1636
+ Returns
1637
+ -------
1638
+ Y_tilde : np.ndarray
1639
+ Transformed statistic.
1640
+ X_tilde : np.ndarray
1641
+ Transformed nuisance matrix.
1642
+ Sigma_tilde : np.ndarray
1643
+ Transformed covariance.
1644
+ """
1645
+ num_post = len(beta_hat) - num_pre
1646
+
1647
+ # Y_n = A @ beta_hat - d
1648
+ Y_n = A @ beta_hat - d
1649
+
1650
+ # Build A_tilde: transform to eliminate tau_post nuisance
1651
+ # A_tilde_{(.,1)} corresponds to the target direction
1652
+ # A_tilde_{(.,rest)} corresponds to nuisance parameters
1653
+ L_post = np.zeros((len(beta_hat), num_post))
1654
+ L_post[num_pre:, :] = np.eye(num_post)
1655
+
1656
+ A_tilde = A @ L_post # shape: (n_constraints, num_post)
1657
+
1658
+ # Change of basis: first column = l direction, rest = complement
1659
+ # Use QR on l to get orthogonal complement
1660
+ l_full = l.reshape(-1, 1)
1661
+ Q, _ = np.linalg.qr(np.hstack([l_full, np.eye(num_post)[:, :num_post - 1]]))
1662
+
1663
+ A_tilde_rotated = A_tilde @ Q # Rotate into (l, complement) basis
1664
+
1665
+ # Y_tilde(theta_bar) = Y_n - A_tilde_{col1} * theta_bar
1666
+ Y_tilde = Y_n - A_tilde_rotated[:, 0] * theta_bar
1667
+
1668
+ # X_tilde = remaining columns (nuisance)
1669
+ X_tilde = A_tilde_rotated[:, 1:]
1670
+
1671
+ # Sigma_tilde
1672
+ Sigma_tilde = A @ sigma_hat @ A.T
1673
+
1674
+ return Y_tilde, X_tilde, Sigma_tilde
1675
+
1676
+
1677
+ def _enumerate_vertices(
1678
+ X_tilde: np.ndarray,
1679
+ sigma_tilde_diag: np.ndarray,
1680
+ n_moments: int,
1681
+ ) -> List[np.ndarray]:
1682
+ """
1683
+ Enumerate basic feasible solutions of the dual LP.
1684
+
1685
+ The dual feasible set is:
1686
+ {gamma >= 0 : gamma' @ X_tilde = 0, gamma' @ sigma_tilde_diag = 1}
1687
+
1688
+ For small problems (typical n_moments <= 15), we enumerate all
1689
+ possible bases using combinatorial search.
1690
+
1691
+ Parameters
1692
+ ----------
1693
+ X_tilde : np.ndarray
1694
+ Nuisance constraint matrix, shape (n_moments, n_nuisance).
1695
+ sigma_tilde_diag : np.ndarray
1696
+ sqrt(diag(Sigma_tilde)), shape (n_moments,).
1697
+ n_moments : int
1698
+ Number of moment inequalities.
1699
+
1700
+ Returns
1701
+ -------
1702
+ vertices : list of np.ndarray
1703
+ Feasible vertices (gamma vectors).
1704
+ """
1705
+ import itertools
1706
+
1707
+ n_nuisance = X_tilde.shape[1] if X_tilde.ndim > 1 else 0
1708
+ n_eq = n_nuisance + 1 # nuisance zero conditions + normalization
1709
+
1710
+ if n_eq > n_moments:
1711
+ return []
1712
+
1713
+ vertices = []
1714
+
1715
+ # Each vertex has exactly n_eq non-zero (basic) variables
1716
+ for basis_idx in itertools.combinations(range(n_moments), n_eq):
1717
+ basis_idx = list(basis_idx)
1718
+
1719
+ # Build the system for basic variables
1720
+ # gamma[basis_idx]' @ X_tilde[basis_idx, :] = 0
1721
+ # gamma[basis_idx]' @ sigma_tilde_diag[basis_idx] = 1
1722
+ if n_nuisance > 0:
1723
+ A_sys = np.vstack([
1724
+ X_tilde[basis_idx, :].T,
1725
+ sigma_tilde_diag[basis_idx].reshape(1, -1),
1726
+ ])
1727
+ else:
1728
+ A_sys = sigma_tilde_diag[basis_idx].reshape(1, -1)
1729
+
1730
+ b_sys = np.zeros(n_eq)
1731
+ b_sys[-1] = 1.0 # normalization
1732
+
1733
+ try:
1734
+ gamma_basic = np.linalg.solve(A_sys, b_sys)
1735
+ except np.linalg.LinAlgError:
1736
+ continue
1737
+
1738
+ # Check feasibility: gamma >= 0
1739
+ if np.all(gamma_basic >= -1e-10):
1740
+ gamma = np.zeros(n_moments)
1741
+ gamma[basis_idx] = np.maximum(gamma_basic, 0)
1742
+ vertices.append(gamma)
1743
+
1744
+ return vertices
1745
+
1746
+
1747
+ def _compute_arp_test(
1748
+ Y_tilde: np.ndarray,
1749
+ X_tilde: np.ndarray,
1750
+ Sigma_tilde: np.ndarray,
1751
+ alpha: float,
1752
+ kappa: Optional[float] = None,
1753
+ ) -> bool:
1754
+ """
1755
+ Run the ARP conditional-LF hybrid test.
1756
+
1757
+ Tests H0 using the ARP framework from Rambachan & Roth (2023)
1758
+ Sections 3.2.1-3.2.2.
1759
+
1760
+ Parameters
1761
+ ----------
1762
+ Y_tilde : np.ndarray
1763
+ Transformed statistic.
1764
+ X_tilde : np.ndarray
1765
+ Nuisance matrix.
1766
+ Sigma_tilde : np.ndarray
1767
+ Transformed covariance.
1768
+ alpha : float
1769
+ Significance level.
1770
+ kappa : float, optional
1771
+ First-stage LF test size. Default: alpha / 10.
1772
+
1773
+ Returns
1774
+ -------
1775
+ reject : bool
1776
+ True if H0 is rejected.
1777
+ """
1778
+ from scipy.stats import norm, truncnorm
1779
+
1780
+ if kappa is None:
1781
+ kappa = alpha / 10.0
1782
+
1783
+ n_moments = len(Y_tilde)
1784
+ sigma_tilde_diag = np.sqrt(np.maximum(np.diag(Sigma_tilde), 0))
1785
+
1786
+ # Avoid division by zero
1787
+ if np.any(sigma_tilde_diag <= 0):
1788
+ return False
1789
+
1790
+ # Enumerate vertices of the dual feasible set
1791
+ vertices = _enumerate_vertices(X_tilde, sigma_tilde_diag, n_moments)
1792
+
1793
+ if not vertices:
1794
+ # Cannot enumerate vertices; fall back to conservative non-rejection
1795
+ return False
1796
+
1797
+ # Compute eta_hat = max_{gamma in vertices} gamma' @ Y_tilde
1798
+ eta_values = [gamma @ Y_tilde for gamma in vertices]
1799
+ eta_hat = max(eta_values)
1800
+ opt_idx = np.argmax(eta_values)
1801
+ gamma_star = vertices[opt_idx]
1802
+
1803
+ # Stage 1: LF test (size kappa)
1804
+ # c_LF = 1-kappa quantile of max_{gamma in V} gamma' @ xi, xi ~ N(0, Sigma_tilde)
1805
+ rng = np.random.default_rng(42) # Fixed seed for reproducibility
1806
+ n_sim = 5000
1807
+ L = np.linalg.cholesky(Sigma_tilde + 1e-12 * np.eye(n_moments))
1808
+ max_draws = np.zeros(n_sim)
1809
+ for i in range(n_sim):
1810
+ xi = L @ rng.standard_normal(n_moments)
1811
+ max_draws[i] = max(gamma @ xi for gamma in vertices)
1812
+ c_LF = np.quantile(max_draws, 1 - kappa)
1813
+
1814
+ if eta_hat > c_LF:
1815
+ return True # Reject via LF test
1816
+
1817
+ # Stage 2: Conditional test (size (alpha - kappa) / (1 - kappa))
1818
+ alpha_cond = (alpha - kappa) / (1 - kappa)
1819
+
1820
+ # Compute conditional variance and truncation bounds
1821
+ gamma_var = gamma_star @ Sigma_tilde @ gamma_star
1822
+ if gamma_var <= 0:
1823
+ return False
1824
+
1825
+ sigma_gamma = np.sqrt(gamma_var)
1826
+
1827
+ # Truncation bounds: v_lo is the next-best vertex value
1828
+ other_eta = [ev for j, ev in enumerate(eta_values) if j != opt_idx]
1829
+ v_lo = max(other_eta) if other_eta else -np.inf
1830
+
1831
+ # v_up for hybrid: min(v_up_cond, c_LF)
1832
+ v_up = c_LF # Upper truncation from first stage non-rejection
1833
+
1834
+ if v_lo >= v_up:
1835
+ # Degenerate truncation interval
1836
+ return False
1837
+
1838
+ # Truncated normal critical value
1839
+ # Under H0, the worst case is mu = 0 (least favorable)
1840
+ a = (v_lo - 0) / sigma_gamma
1841
+ b = (v_up - 0) / sigma_gamma
1842
+
1843
+ try:
1844
+ c_cond = truncnorm.ppf(1 - alpha_cond, a, b, loc=0, scale=sigma_gamma)
1845
+ except (ValueError, RuntimeError):
1846
+ return False
1847
+
1848
+ return eta_hat > max(0, c_cond)
1849
+
1850
+
1851
+ def _arp_confidence_set(
1852
+ beta_hat: np.ndarray,
1853
+ sigma_hat: np.ndarray,
1854
+ A: np.ndarray,
1855
+ d: np.ndarray,
1856
+ l: np.ndarray,
1857
+ num_pre: int,
1858
+ alpha: float = 0.05,
1859
+ kappa: Optional[float] = None,
1860
+ n_grid: int = 200,
1861
+ ) -> Tuple[float, float]:
1862
+ """
1863
+ Compute ARP hybrid confidence set by test inversion.
1864
+
1865
+ Per Rambachan & Roth (2023), the confidence set is:
1866
+ C = {theta_bar : ARP hybrid test does not reject H0: theta = theta_bar}
1867
+
1868
+ Parameters
1869
+ ----------
1870
+ beta_hat : np.ndarray
1871
+ Full event-study coefficient vector [pre, post].
1872
+ sigma_hat : np.ndarray
1873
+ Full covariance matrix.
1874
+ A : np.ndarray
1875
+ Polyhedral constraint matrix (for Delta).
1876
+ d : np.ndarray
1877
+ Polyhedral constraint vector.
1878
+ l : np.ndarray
1879
+ Target parameter weights.
1880
+ num_pre : int
1881
+ Number of pre-periods.
1882
+ alpha : float
1883
+ Significance level.
1884
+ kappa : float, optional
1885
+ Hybrid test first-stage size.
1886
+ n_grid : int
1887
+ Number of grid points for test inversion.
1888
+
1889
+ Returns
1890
+ -------
1891
+ ci_lb : float
1892
+ Lower bound of confidence set.
1893
+ ci_ub : float
1894
+ Upper bound of confidence set.
1895
+ """
1896
+ num_post = len(beta_hat) - num_pre
1897
+ beta_post = beta_hat[num_pre:]
1898
+
1899
+ # Point estimate and SE for grid centering
1900
+ theta_hat = l @ beta_post
1901
+ se = np.sqrt(l @ sigma_hat[num_pre:, num_pre:] @ l)
1902
+
1903
+ # Grid centered on point estimate
1904
+ grid_half = max(5 * se, 1.0)
1905
+ theta_grid = np.linspace(theta_hat - grid_half, theta_hat + grid_half, n_grid)
1906
+
1907
+ # Test inversion: find theta_bar values not rejected
1908
+ accepted = []
1909
+ for theta_bar in theta_grid:
1910
+ Y_tilde, X_tilde, Sigma_tilde = _setup_moment_inequalities(
1911
+ beta_hat, sigma_hat, A, d, l, theta_bar, num_pre
1912
+ )
1913
+ reject = _compute_arp_test(Y_tilde, X_tilde, Sigma_tilde, alpha, kappa)
1914
+ if not reject:
1915
+ accepted.append(theta_bar)
1916
+
1917
+ if not accepted:
1918
+ # Everything rejected — empty confidence set (unusual)
1919
+ return theta_hat, theta_hat
1920
+
1921
+ ci_lb = min(accepted)
1922
+ ci_ub = max(accepted)
1923
+
1924
+ # Refine boundaries with bisection
1925
+ for _ in range(15):
1926
+ # Refine lower bound
1927
+ mid = (ci_lb - grid_half / n_grid + ci_lb) / 2 if ci_lb > theta_grid[0] else ci_lb
1928
+ if mid < ci_lb:
1929
+ Y_tilde, X_tilde, Sigma_tilde = _setup_moment_inequalities(
1930
+ beta_hat, sigma_hat, A, d, l, mid, num_pre
1931
+ )
1932
+ if not _compute_arp_test(Y_tilde, X_tilde, Sigma_tilde, alpha, kappa):
1933
+ ci_lb = mid
1934
+
1935
+ # Refine upper bound
1936
+ mid = (ci_ub + grid_half / n_grid + ci_ub) / 2 if ci_ub < theta_grid[-1] else ci_ub
1937
+ if mid > ci_ub:
1938
+ Y_tilde, X_tilde, Sigma_tilde = _setup_moment_inequalities(
1939
+ beta_hat, sigma_hat, A, d, l, mid, num_pre
1940
+ )
1941
+ if not _compute_arp_test(Y_tilde, X_tilde, Sigma_tilde, alpha, kappa):
1942
+ ci_ub = mid
1943
+
1944
+ return ci_lb, ci_ub
1945
+
1946
+
1947
+ # =============================================================================
1948
+ # Main Class
1949
+ # =============================================================================
1950
+
1951
+
1952
+ class HonestDiD:
1953
+ """
1954
+ Honest DiD sensitivity analysis (Rambachan & Roth 2023).
1955
+
1956
+ Computes robust inference for difference-in-differences allowing
1957
+ for bounded violations of parallel trends.
1958
+
1959
+ Parameters
1960
+ ----------
1961
+ method : {"smoothness", "relative_magnitude", "combined"}
1962
+ Type of restriction on trend violations:
1963
+ - "smoothness": Bounds on second differences of trend violations (Delta^SD)
1964
+ - "relative_magnitude": Post first differences <= M * max pre first difference (Delta^RM)
1965
+ - "combined": Both restrictions (Delta^SDRM)
1966
+ M : float, optional
1967
+ Restriction parameter. Interpretation depends on method:
1968
+ - smoothness: Max second difference
1969
+ - relative_magnitude: Scaling factor for max pre-period first difference
1970
+ Default is 1.0 for relative_magnitude, 0.0 for smoothness.
1971
+ alpha : float
1972
+ Significance level for confidence intervals.
1973
+ l_vec : array-like or None
1974
+ Weighting vector for scalar parameter (length = num_post_periods).
1975
+ If None, uses uniform weights (average effect).
1976
+
1977
+ Examples
1978
+ --------
1979
+ >>> from diff_diff import MultiPeriodDiD
1980
+ >>> from diff_diff.honest_did import HonestDiD
1981
+ >>>
1982
+ >>> # Fit event study
1983
+ >>> mp_did = MultiPeriodDiD()
1984
+ >>> results = mp_did.fit(data, outcome='y', treatment='treated',
1985
+ ... time='period', post_periods=[4,5,6,7])
1986
+ >>>
1987
+ >>> # Sensitivity analysis with relative magnitudes
1988
+ >>> honest = HonestDiD(method='relative_magnitude', M=1.0)
1989
+ >>> bounds = honest.fit(results)
1990
+ >>> print(bounds.summary())
1991
+ >>>
1992
+ >>> # Sensitivity curve over M values
1993
+ >>> sensitivity = honest.sensitivity_analysis(results, M_grid=[0, 0.5, 1, 1.5, 2])
1994
+ >>> sensitivity.plot()
1995
+ """
1996
+
1997
+ def __init__(
1998
+ self,
1999
+ method: Literal["smoothness", "relative_magnitude", "combined"] = "relative_magnitude",
2000
+ M: Optional[float] = None,
2001
+ alpha: float = 0.05,
2002
+ l_vec: Optional[np.ndarray] = None,
2003
+ ):
2004
+ self.method = method
2005
+ self.alpha = alpha
2006
+ self.l_vec = l_vec
2007
+
2008
+ # Set default M based on method
2009
+ if M is None:
2010
+ self.M = 1.0 if method == "relative_magnitude" else 0.0
2011
+ else:
2012
+ self.M = M
2013
+
2014
+ self._validate_params()
2015
+
2016
+ def _validate_params(self):
2017
+ """Validate initialization parameters."""
2018
+ if self.method not in ["smoothness", "relative_magnitude", "combined"]:
2019
+ raise ValueError(
2020
+ f"method must be 'smoothness', 'relative_magnitude', or 'combined', "
2021
+ f"got method='{self.method}'"
2022
+ )
2023
+ if self.M < 0:
2024
+ raise ValueError(f"M must be non-negative, got M={self.M}")
2025
+ if not 0 < self.alpha < 1:
2026
+ raise ValueError(f"alpha must be between 0 and 1, got alpha={self.alpha}")
2027
+
2028
+ def get_params(self) -> Dict[str, Any]:
2029
+ """Get parameters for this estimator."""
2030
+ return {
2031
+ "method": self.method,
2032
+ "M": self.M,
2033
+ "alpha": self.alpha,
2034
+ "l_vec": self.l_vec,
2035
+ }
2036
+
2037
+ def set_params(self, **params) -> "HonestDiD":
2038
+ """Set parameters for this estimator."""
2039
+ for key, value in params.items():
2040
+ if hasattr(self, key):
2041
+ setattr(self, key, value)
2042
+ else:
2043
+ raise ValueError(f"Invalid parameter: {key}")
2044
+ self._validate_params()
2045
+ return self
2046
+
2047
+ def fit(
2048
+ self,
2049
+ results: Union[MultiPeriodDiDResults, Any],
2050
+ M: Optional[float] = None,
2051
+ ) -> HonestDiDResults:
2052
+ """
2053
+ Compute bounds and robust confidence intervals.
2054
+
2055
+ Parameters
2056
+ ----------
2057
+ results : MultiPeriodDiDResults or CallawaySantAnnaResults
2058
+ Results from event study estimation.
2059
+ M : float, optional
2060
+ Override the M parameter for this fit.
2061
+
2062
+ Returns
2063
+ -------
2064
+ HonestDiDResults
2065
+ Results containing bounds and robust confidence intervals.
2066
+ """
2067
+ M = M if M is not None else self.M
2068
+
2069
+ # Extract event study parameters
2070
+ (beta_hat, sigma, num_pre, num_post, pre_periods, post_periods, df_survey) = (
2071
+ _extract_event_study_params(results)
2072
+ )
2073
+
2074
+ # beta_hat contains [pre-period effects, post-period effects] in order.
2075
+ # Extract pre and post components for the identified set LP.
2076
+ # The LP pins delta_pre = beta_pre (Rambachan & Roth Eqs 5-6).
2077
+ if len(beta_hat) == num_pre + num_post:
2078
+ beta_pre = beta_hat[:num_pre]
2079
+ beta_post = beta_hat[num_pre:]
2080
+ elif len(beta_hat) == num_post:
2081
+ beta_pre = np.zeros(num_pre)
2082
+ beta_post = beta_hat
2083
+ else:
2084
+ beta_pre = np.zeros(num_pre)
2085
+ beta_post = beta_hat
2086
+ num_post = len(beta_hat)
2087
+
2088
+ # Handle sigma extraction for post periods
2089
+ if sigma.shape[0] == num_post and sigma.shape[0] == len(beta_post):
2090
+ sigma_post = sigma
2091
+ elif sigma.shape[0] == num_pre + num_post:
2092
+ sigma_post = sigma[num_pre:, num_pre:]
2093
+ else:
2094
+ sigma_post = sigma[: len(beta_post), : len(beta_post)]
2095
+
2096
+ # Update num_post to match actual data
2097
+ num_post = len(beta_post)
2098
+
2099
+ if num_post == 0:
2100
+ raise ValueError(
2101
+ "No post-period effects with finite estimates found. "
2102
+ "HonestDiD requires at least one identified post-period "
2103
+ "coefficient to compute bounds."
2104
+ )
2105
+
2106
+ # Set up weighting vector
2107
+ if self.l_vec is None:
2108
+ l_vec = np.ones(num_post) / num_post # Uniform weights
2109
+ else:
2110
+ l_vec = np.asarray(self.l_vec)
2111
+ if len(l_vec) != num_post:
2112
+ raise ValueError(f"l_vec must have length {num_post}, got {len(l_vec)}")
2113
+
2114
+ # Compute original estimate and SE
2115
+ original_estimate = np.dot(l_vec, beta_post)
2116
+ original_se = np.sqrt(l_vec @ sigma_post @ l_vec)
2117
+
2118
+ # Compute bounds based on method
2119
+ if self.method == "smoothness":
2120
+ lb, ub, ci_lb, ci_ub = self._compute_smoothness_bounds(
2121
+ beta_pre, beta_post, sigma, sigma_post, l_vec,
2122
+ num_pre, num_post, M, df=df_survey,
2123
+ )
2124
+ ci_method = "FLCI"
2125
+
2126
+ elif self.method == "relative_magnitude":
2127
+ lb, ub, ci_lb, ci_ub = self._compute_rm_bounds(
2128
+ beta_pre,
2129
+ beta_post,
2130
+ sigma,
2131
+ sigma_post,
2132
+ l_vec,
2133
+ num_pre,
2134
+ num_post,
2135
+ M,
2136
+ pre_periods,
2137
+ results,
2138
+ df=df_survey,
2139
+ )
2140
+ ci_method = "FLCI"
2141
+
2142
+ else: # combined
2143
+ lb, ub, ci_lb, ci_ub = self._compute_combined_bounds(
2144
+ beta_pre,
2145
+ beta_post,
2146
+ sigma,
2147
+ sigma_post,
2148
+ l_vec,
2149
+ num_pre,
2150
+ num_post,
2151
+ M,
2152
+ pre_periods,
2153
+ results,
2154
+ df=df_survey,
2155
+ )
2156
+ ci_method = "FLCI"
2157
+
2158
+ # Extract survey_metadata for storage on results
2159
+ survey_metadata = getattr(results, "survey_metadata", None)
2160
+
2161
+ return HonestDiDResults(
2162
+ lb=lb,
2163
+ ub=ub,
2164
+ ci_lb=ci_lb,
2165
+ ci_ub=ci_ub,
2166
+ M=M,
2167
+ method=self.method,
2168
+ original_estimate=original_estimate,
2169
+ original_se=original_se,
2170
+ alpha=self.alpha,
2171
+ ci_method=ci_method,
2172
+ original_results=results,
2173
+ survey_metadata=survey_metadata,
2174
+ df_survey=df_survey,
2175
+ )
2176
+
2177
+ def _compute_smoothness_bounds(
2178
+ self,
2179
+ beta_pre: np.ndarray,
2180
+ beta_post: np.ndarray,
2181
+ sigma_full: np.ndarray,
2182
+ sigma_post: np.ndarray,
2183
+ l_vec: np.ndarray,
2184
+ num_pre: int,
2185
+ num_post: int,
2186
+ M: float,
2187
+ df: Optional[int] = None,
2188
+ ) -> Tuple[float, float, float, float]:
2189
+ """Compute bounds under smoothness restriction (Delta^SD).
2190
+
2191
+ Uses the optimal FLCI from Rambachan & Roth (2023) Section 4.1,
2192
+ which jointly optimizes the affine estimator direction to minimize
2193
+ CI width. Falls back to naive FLCI if the full covariance matrix
2194
+ is not available.
2195
+ """
2196
+ # Construct constraints
2197
+ A_ineq, b_ineq = _construct_constraints_sd(num_pre, num_post, M)
2198
+
2199
+ # Solve for identified set bounds with delta_pre = beta_pre pinned
2200
+ lb, ub = _solve_bounds_lp(
2201
+ beta_pre, beta_post, l_vec, A_ineq, b_ineq, num_pre
2202
+ )
2203
+
2204
+ # Propagate infeasibility: if bounds are NaN, CI is NaN too
2205
+ if np.isnan(lb) or np.isnan(ub):
2206
+ return np.nan, np.nan, np.nan, np.nan
2207
+
2208
+ # Compute optimal FLCI (Rambachan & Roth Section 4.1)
2209
+ if sigma_full.shape[0] == num_pre + num_post:
2210
+ ci_lb, ci_ub = _compute_optimal_flci(
2211
+ beta_pre, beta_post, sigma_full, l_vec,
2212
+ num_pre, num_post, M, self.alpha, df=df,
2213
+ )
2214
+ else:
2215
+ # Fallback to naive FLCI when full sigma unavailable
2216
+ se = np.sqrt(l_vec @ sigma_post @ l_vec)
2217
+ ci_lb, ci_ub = _compute_flci(lb, ub, se, self.alpha, df=df)
2218
+
2219
+ return lb, ub, ci_lb, ci_ub
2220
+
2221
+ def _compute_rm_bounds(
2222
+ self,
2223
+ beta_pre: np.ndarray,
2224
+ beta_post: np.ndarray,
2225
+ sigma_full: np.ndarray,
2226
+ sigma_post: np.ndarray,
2227
+ l_vec: np.ndarray,
2228
+ num_pre: int,
2229
+ num_post: int,
2230
+ Mbar: float,
2231
+ pre_periods: List,
2232
+ results: Any,
2233
+ df: Optional[int] = None,
2234
+ ) -> Tuple[float, float, float, float]:
2235
+ """Compute bounds under relative magnitudes restriction (Delta^RM).
2236
+
2237
+ Uses union-of-polyhedra decomposition per Lemma 2.2 of
2238
+ Rambachan & Roth (2023). Delta^RM constrains post-treatment
2239
+ first differences relative to the max pre-treatment first difference.
2240
+
2241
+ CI construction uses naive FLCI (conservative). The paper recommends
2242
+ ARP hybrid confidence sets (Sections 3.2.1-3.2.2); infrastructure
2243
+ is implemented but disabled pending calibration of the moment
2244
+ inequality transformation.
2245
+ """
2246
+ # Solve identified set via union of polyhedra
2247
+ lb, ub = _solve_rm_bounds_union(
2248
+ beta_pre, beta_post, l_vec, num_pre, Mbar
2249
+ )
2250
+
2251
+ # CI construction for Delta^RM.
2252
+ # The paper recommends ARP conditional/hybrid confidence sets
2253
+ # (Sections 3.2.1-3.2.2). The ARP infrastructure is implemented
2254
+ # (_arp_confidence_set) but the moment inequality transformation
2255
+ # requires further calibration to produce valid CIs consistently.
2256
+ # Currently uses conservative naive FLCI (extends identified set
2257
+ # by z*se); ARP will be enabled once calibrated.
2258
+ # TODO: enable ARP hybrid for RM once transformation is validated
2259
+ se = np.sqrt(l_vec @ sigma_post @ l_vec)
2260
+ if np.isfinite(lb) and np.isfinite(ub):
2261
+ ci_lb, ci_ub = _compute_flci(lb, ub, se, self.alpha, df=df)
2262
+ else:
2263
+ ci_lb, ci_ub = -np.inf, np.inf
2264
+
2265
+ return lb, ub, ci_lb, ci_ub
2266
+
2267
+ def _compute_combined_bounds(
2268
+ self,
2269
+ beta_pre: np.ndarray,
2270
+ beta_post: np.ndarray,
2271
+ sigma_full: np.ndarray,
2272
+ sigma_post: np.ndarray,
2273
+ l_vec: np.ndarray,
2274
+ num_pre: int,
2275
+ num_post: int,
2276
+ M: float,
2277
+ pre_periods: List,
2278
+ results: Any,
2279
+ df: Optional[int] = None,
2280
+ ) -> Tuple[float, float, float, float]:
2281
+ """Compute bounds under combined smoothness + RM restriction."""
2282
+ import warnings
2283
+
2284
+ warnings.warn(
2285
+ "HonestDiD method='combined' (Delta^SDRM) uses naive FLCI on the "
2286
+ "intersection of Delta^SD and Delta^RM bounds. The paper proves "
2287
+ "FLCI is NOT consistent for Delta^SDRM (Proposition 4.2). "
2288
+ "Consider using method='smoothness' or method='relative_magnitude' "
2289
+ "separately for paper-supported inference.",
2290
+ UserWarning,
2291
+ stacklevel=3,
2292
+ )
2293
+ # Get smoothness bounds
2294
+ lb_sd, ub_sd, _, _ = self._compute_smoothness_bounds(
2295
+ beta_pre, beta_post, sigma_full, sigma_post, l_vec,
2296
+ num_pre, num_post, M, df=df,
2297
+ )
2298
+
2299
+ # Get RM bounds (use M as Mbar for combined)
2300
+ lb_rm, ub_rm, _, _ = self._compute_rm_bounds(
2301
+ beta_pre, beta_post, sigma_full, sigma_post, l_vec,
2302
+ num_pre, num_post, M, pre_periods, results, df=df,
2303
+ )
2304
+
2305
+ # Combined bounds are intersection
2306
+ lb = max(lb_sd, lb_rm)
2307
+ ub = min(ub_sd, ub_rm)
2308
+
2309
+ # If bounds cross, use the original estimate
2310
+ if lb > ub:
2311
+ theta = np.dot(l_vec, beta_post)
2312
+ lb = ub = theta
2313
+
2314
+ # Compute FLCI on combined bounds
2315
+ se = np.sqrt(l_vec @ sigma_post @ l_vec)
2316
+ ci_lb, ci_ub = _compute_flci(lb, ub, se, self.alpha, df=df)
2317
+
2318
+ return lb, ub, ci_lb, ci_ub
2319
+
2320
+ def _estimate_max_pre_violation(self, results: Any, pre_periods: List) -> float:
2321
+ """
2322
+ Estimate the maximum pre-period violation.
2323
+
2324
+ Uses pre-period coefficients if available, otherwise returns
2325
+ a default based on the overall SE.
2326
+ """
2327
+ if isinstance(results, MultiPeriodDiDResults):
2328
+ # Pre-period effects are now in period_effects directly
2329
+ # Filter out non-finite effects (e.g. from rank-deficient designs)
2330
+ pre_effects = [
2331
+ abs(results.period_effects[p].effect)
2332
+ for p in pre_periods
2333
+ if p in results.period_effects and np.isfinite(results.period_effects[p].effect)
2334
+ ]
2335
+ if pre_effects:
2336
+ return max(pre_effects)
2337
+
2338
+ # Fallback: use avg_se as a scale
2339
+ return results.avg_se
2340
+
2341
+ # For CallawaySantAnna, use pre-period event study effects
2342
+ try:
2343
+ from diff_diff.staggered import CallawaySantAnnaResults
2344
+
2345
+ if isinstance(results, CallawaySantAnnaResults):
2346
+ if results.event_study_effects:
2347
+ # Use the reference-aware pre_periods from _extract_event_study_params
2348
+ pre_set = set(pre_periods) if pre_periods else set()
2349
+ pre_effects = [
2350
+ abs(results.event_study_effects[t]["effect"])
2351
+ for t in results.event_study_effects
2352
+ if t in pre_set and results.event_study_effects[t].get("n_groups", 1) > 0
2353
+ ]
2354
+ if pre_effects:
2355
+ return max(pre_effects)
2356
+ # No valid pre-effects — should have been caught by
2357
+ # _extract_event_study_params pre-period validation
2358
+ return 0.0
2359
+ except ImportError:
2360
+ pass
2361
+
2362
+ # Default fallback
2363
+ return 0.1
2364
+
2365
+ def sensitivity_analysis(
2366
+ self,
2367
+ results: Union[MultiPeriodDiDResults, Any],
2368
+ M_grid: Optional[List[float]] = None,
2369
+ ) -> SensitivityResults:
2370
+ """
2371
+ Perform sensitivity analysis over a grid of M values.
2372
+
2373
+ Parameters
2374
+ ----------
2375
+ results : MultiPeriodDiDResults or CallawaySantAnnaResults
2376
+ Results from event study estimation.
2377
+ M_grid : list of float, optional
2378
+ Grid of M values to evaluate. If None, uses default grid
2379
+ based on method.
2380
+
2381
+ Returns
2382
+ -------
2383
+ SensitivityResults
2384
+ Results containing bounds and CIs for each M value.
2385
+ """
2386
+ if M_grid is None:
2387
+ if self.method == "relative_magnitude":
2388
+ M_grid = [0, 0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0]
2389
+ else:
2390
+ M_grid = [0, 0.1, 0.2, 0.3, 0.5, 0.75, 1.0]
2391
+
2392
+ M_values = np.array(M_grid)
2393
+ bounds_list = []
2394
+ ci_list = []
2395
+
2396
+ for M in M_values:
2397
+ result = self.fit(results, M=M)
2398
+ bounds_list.append((result.lb, result.ub))
2399
+ ci_list.append((result.ci_lb, result.ci_ub))
2400
+
2401
+ # Find breakdown value
2402
+ breakdown_M = self._find_breakdown(results, M_values, ci_list)
2403
+
2404
+ # Get original estimate info
2405
+ first_result = self.fit(results, M=0)
2406
+
2407
+ return SensitivityResults(
2408
+ M_values=M_values,
2409
+ bounds=bounds_list,
2410
+ robust_cis=ci_list,
2411
+ breakdown_M=breakdown_M,
2412
+ method=self.method,
2413
+ original_estimate=first_result.original_estimate,
2414
+ original_se=first_result.original_se,
2415
+ alpha=self.alpha,
2416
+ )
2417
+
2418
+ def _find_breakdown(
2419
+ self, results: Any, M_values: np.ndarray, ci_list: List[Tuple[float, float]]
2420
+ ) -> Optional[float]:
2421
+ """
2422
+ Find the breakdown value where CI first includes zero.
2423
+
2424
+ Uses binary search for precision.
2425
+ """
2426
+ # Check if any CI includes zero (NaN CIs are treated as undefined, not significant)
2427
+ def _ci_includes_zero(ci_lb, ci_ub):
2428
+ if not (np.isfinite(ci_lb) and np.isfinite(ci_ub)):
2429
+ return True # Undefined CIs are not "significant"
2430
+ return ci_lb <= 0 <= ci_ub
2431
+
2432
+ includes_zero = [_ci_includes_zero(ci_lb, ci_ub) for ci_lb, ci_ub in ci_list]
2433
+
2434
+ if not any(includes_zero):
2435
+ # Always significant - no breakdown
2436
+ return None
2437
+
2438
+ if all(includes_zero):
2439
+ # Never significant - breakdown at 0
2440
+ return 0.0
2441
+
2442
+ # Find first transition point
2443
+ for i, (inc, M) in enumerate(zip(includes_zero, M_values)):
2444
+ if inc and (i == 0 or not includes_zero[i - 1]):
2445
+ # Binary search between M_values[i-1] and M_values[i]
2446
+ if i == 0:
2447
+ return 0.0
2448
+
2449
+ lo, hi = M_values[i - 1], M_values[i]
2450
+
2451
+ for _ in range(20): # 20 iterations for precision
2452
+ mid = (lo + hi) / 2
2453
+ result = self.fit(results, M=mid)
2454
+ if _ci_includes_zero(result.ci_lb, result.ci_ub):
2455
+ hi = mid
2456
+ else:
2457
+ lo = mid
2458
+
2459
+ return (lo + hi) / 2
2460
+
2461
+ return None
2462
+
2463
+ def breakdown_value(
2464
+ self, results: Union[MultiPeriodDiDResults, Any], tol: float = 0.01
2465
+ ) -> Optional[float]:
2466
+ """
2467
+ Find the breakdown value directly using binary search.
2468
+
2469
+ The breakdown value is the smallest M where the robust
2470
+ confidence interval includes zero.
2471
+
2472
+ Parameters
2473
+ ----------
2474
+ results : MultiPeriodDiDResults or CallawaySantAnnaResults
2475
+ Results from event study estimation.
2476
+ tol : float
2477
+ Tolerance for binary search.
2478
+
2479
+ Returns
2480
+ -------
2481
+ float or None
2482
+ Breakdown value, or None if effect is always significant.
2483
+ """
2484
+ def _ci_covers_zero(r):
2485
+ if not (np.isfinite(r.ci_lb) and np.isfinite(r.ci_ub)):
2486
+ return True # Undefined CIs are not "significant"
2487
+ return r.ci_lb <= 0 <= r.ci_ub
2488
+
2489
+ # Check at M=0
2490
+ result_0 = self.fit(results, M=0)
2491
+ if _ci_covers_zero(result_0):
2492
+ return 0.0
2493
+
2494
+ # Check if significant even for large M
2495
+ result_large = self.fit(results, M=10)
2496
+ if not _ci_covers_zero(result_large):
2497
+ return None # Always significant
2498
+
2499
+ # Binary search
2500
+ lo, hi = 0.0, 10.0
2501
+
2502
+ while hi - lo > tol:
2503
+ mid = (lo + hi) / 2
2504
+ result = self.fit(results, M=mid)
2505
+ if _ci_covers_zero(result):
2506
+ hi = mid
2507
+ else:
2508
+ lo = mid
2509
+
2510
+ return (lo + hi) / 2
2511
+
2512
+
2513
+ # =============================================================================
2514
+ # Convenience Functions
2515
+ # =============================================================================
2516
+
2517
+
2518
+ def compute_honest_did(
2519
+ results: Union[MultiPeriodDiDResults, Any],
2520
+ method: str = "relative_magnitude",
2521
+ M: float = 1.0,
2522
+ alpha: float = 0.05,
2523
+ ) -> HonestDiDResults:
2524
+ """
2525
+ Convenience function for computing Honest DiD bounds.
2526
+
2527
+ Parameters
2528
+ ----------
2529
+ results : MultiPeriodDiDResults or CallawaySantAnnaResults
2530
+ Results from event study estimation.
2531
+ method : str
2532
+ Type of restriction ("smoothness", "relative_magnitude", "combined").
2533
+ M : float
2534
+ Restriction parameter.
2535
+ alpha : float
2536
+ Significance level.
2537
+
2538
+ Returns
2539
+ -------
2540
+ HonestDiDResults
2541
+ Bounds and robust confidence intervals.
2542
+
2543
+ Examples
2544
+ --------
2545
+ >>> bounds = compute_honest_did(event_study_results, method='relative_magnitude', M=1.0)
2546
+ >>> print(f"Robust CI: [{bounds.ci_lb:.3f}, {bounds.ci_ub:.3f}]")
2547
+ """
2548
+ honest = HonestDiD(method=method, M=M, alpha=alpha)
2549
+ return honest.fit(results)
2550
+
2551
+
2552
+ def sensitivity_plot(
2553
+ results: Union[MultiPeriodDiDResults, Any],
2554
+ method: str = "relative_magnitude",
2555
+ M_grid: Optional[List[float]] = None,
2556
+ alpha: float = 0.05,
2557
+ ax=None,
2558
+ **kwargs,
2559
+ ):
2560
+ """
2561
+ Create a sensitivity analysis plot.
2562
+
2563
+ Parameters
2564
+ ----------
2565
+ results : MultiPeriodDiDResults or CallawaySantAnnaResults
2566
+ Results from event study estimation.
2567
+ method : str
2568
+ Type of restriction.
2569
+ M_grid : list of float, optional
2570
+ Grid of M values.
2571
+ alpha : float
2572
+ Significance level.
2573
+ ax : matplotlib.axes.Axes, optional
2574
+ Axes to plot on.
2575
+ **kwargs
2576
+ Additional arguments passed to plot method.
2577
+
2578
+ Returns
2579
+ -------
2580
+ ax : matplotlib.axes.Axes
2581
+ The axes with the plot.
2582
+ """
2583
+ honest = HonestDiD(method=method, alpha=alpha)
2584
+ sensitivity = honest.sensitivity_analysis(results, M_grid=M_grid)
2585
+ return sensitivity.plot(ax=ax, **kwargs)