diff-diff 3.0.1__cp314-cp314-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. diff_diff/__init__.py +382 -0
  2. diff_diff/_backend.py +134 -0
  3. diff_diff/_rust_backend.cp314-win_amd64.pyd +0 -0
  4. diff_diff/bacon.py +1140 -0
  5. diff_diff/bootstrap_utils.py +730 -0
  6. diff_diff/continuous_did.py +1626 -0
  7. diff_diff/continuous_did_bspline.py +190 -0
  8. diff_diff/continuous_did_results.py +374 -0
  9. diff_diff/datasets.py +815 -0
  10. diff_diff/diagnostics.py +882 -0
  11. diff_diff/efficient_did.py +1770 -0
  12. diff_diff/efficient_did_bootstrap.py +359 -0
  13. diff_diff/efficient_did_covariates.py +899 -0
  14. diff_diff/efficient_did_results.py +368 -0
  15. diff_diff/efficient_did_weights.py +617 -0
  16. diff_diff/estimators.py +1501 -0
  17. diff_diff/honest_did.py +2585 -0
  18. diff_diff/imputation.py +2458 -0
  19. diff_diff/imputation_bootstrap.py +418 -0
  20. diff_diff/imputation_results.py +448 -0
  21. diff_diff/linalg.py +2538 -0
  22. diff_diff/power.py +2588 -0
  23. diff_diff/practitioner.py +869 -0
  24. diff_diff/prep.py +1738 -0
  25. diff_diff/prep_dgp.py +1718 -0
  26. diff_diff/pretrends.py +1105 -0
  27. diff_diff/results.py +918 -0
  28. diff_diff/stacked_did.py +1049 -0
  29. diff_diff/stacked_did_results.py +339 -0
  30. diff_diff/staggered.py +3895 -0
  31. diff_diff/staggered_aggregation.py +864 -0
  32. diff_diff/staggered_bootstrap.py +752 -0
  33. diff_diff/staggered_results.py +416 -0
  34. diff_diff/staggered_triple_diff.py +1545 -0
  35. diff_diff/staggered_triple_diff_results.py +416 -0
  36. diff_diff/sun_abraham.py +1685 -0
  37. diff_diff/survey.py +1981 -0
  38. diff_diff/synthetic_did.py +1136 -0
  39. diff_diff/triple_diff.py +2047 -0
  40. diff_diff/trop.py +952 -0
  41. diff_diff/trop_global.py +1270 -0
  42. diff_diff/trop_local.py +1307 -0
  43. diff_diff/trop_results.py +356 -0
  44. diff_diff/twfe.py +542 -0
  45. diff_diff/two_stage.py +1952 -0
  46. diff_diff/two_stage_bootstrap.py +520 -0
  47. diff_diff/two_stage_results.py +400 -0
  48. diff_diff/utils.py +1902 -0
  49. diff_diff/visualization/__init__.py +61 -0
  50. diff_diff/visualization/_common.py +328 -0
  51. diff_diff/visualization/_continuous.py +274 -0
  52. diff_diff/visualization/_diagnostic.py +817 -0
  53. diff_diff/visualization/_event_study.py +1086 -0
  54. diff_diff/visualization/_power.py +661 -0
  55. diff_diff/visualization/_staggered.py +833 -0
  56. diff_diff/visualization/_synthetic.py +197 -0
  57. diff_diff/wooldridge.py +1285 -0
  58. diff_diff/wooldridge_results.py +349 -0
  59. diff_diff-3.0.1.dist-info/METADATA +2997 -0
  60. diff_diff-3.0.1.dist-info/RECORD +62 -0
  61. diff_diff-3.0.1.dist-info/WHEEL +4 -0
  62. diff_diff-3.0.1.dist-info/sboms/diff_diff_rust.cyclonedx.json +5843 -0
@@ -0,0 +1,448 @@
1
+ """
2
+ Result containers for the Imputation DiD estimator.
3
+
4
+ This module contains ImputationBootstrapResults and ImputationDiDResults
5
+ dataclasses. Extracted from imputation.py for module size management.
6
+ """
7
+
8
+ from dataclasses import dataclass, field
9
+ from typing import Any, Dict, List, Optional, Tuple
10
+
11
+ import numpy as np
12
+ import pandas as pd
13
+
14
+ from diff_diff.results import _format_survey_block, _get_significance_stars
15
+
16
+ __all__ = [
17
+ "ImputationBootstrapResults",
18
+ "ImputationDiDResults",
19
+ ]
20
+
21
+
22
+ @dataclass
23
+ class ImputationBootstrapResults:
24
+ """
25
+ Results from ImputationDiD bootstrap inference.
26
+
27
+ Bootstrap is a library extension beyond Borusyak et al. (2024), which
28
+ proposes only analytical inference via the conservative variance estimator.
29
+ Provided for consistency with CallawaySantAnna and SunAbraham.
30
+
31
+ Attributes
32
+ ----------
33
+ n_bootstrap : int
34
+ Number of bootstrap iterations.
35
+ weight_type : str
36
+ Type of bootstrap weights: "rademacher", "mammen", or "webb".
37
+ alpha : float
38
+ Significance level used for confidence intervals.
39
+ overall_att_se : float
40
+ Bootstrap standard error for overall ATT.
41
+ overall_att_ci : tuple
42
+ Bootstrap confidence interval for overall ATT.
43
+ overall_att_p_value : float
44
+ Bootstrap p-value for overall ATT.
45
+ event_study_ses : dict, optional
46
+ Bootstrap SEs for event study effects.
47
+ event_study_cis : dict, optional
48
+ Bootstrap CIs for event study effects.
49
+ event_study_p_values : dict, optional
50
+ Bootstrap p-values for event study effects.
51
+ group_ses : dict, optional
52
+ Bootstrap SEs for group effects.
53
+ group_cis : dict, optional
54
+ Bootstrap CIs for group effects.
55
+ group_p_values : dict, optional
56
+ Bootstrap p-values for group effects.
57
+ bootstrap_distribution : np.ndarray, optional
58
+ Full bootstrap distribution of overall ATT.
59
+ """
60
+
61
+ n_bootstrap: int
62
+ weight_type: str
63
+ alpha: float
64
+ overall_att_se: float
65
+ overall_att_ci: Tuple[float, float]
66
+ overall_att_p_value: float
67
+ event_study_ses: Optional[Dict[int, float]] = None
68
+ event_study_cis: Optional[Dict[int, Tuple[float, float]]] = None
69
+ event_study_p_values: Optional[Dict[int, float]] = None
70
+ group_ses: Optional[Dict[Any, float]] = None
71
+ group_cis: Optional[Dict[Any, Tuple[float, float]]] = None
72
+ group_p_values: Optional[Dict[Any, float]] = None
73
+ bootstrap_distribution: Optional[np.ndarray] = field(default=None, repr=False)
74
+
75
+
76
+ @dataclass
77
+ class ImputationDiDResults:
78
+ """
79
+ Results from Borusyak-Jaravel-Spiess (2024) imputation DiD estimation.
80
+
81
+ Attributes
82
+ ----------
83
+ treatment_effects : pd.DataFrame
84
+ Unit-level treatment effects with columns: unit, time, tau_hat, weight.
85
+ overall_att : float
86
+ Overall average treatment effect on the treated.
87
+ overall_se : float
88
+ Standard error of overall ATT.
89
+ overall_t_stat : float
90
+ T-statistic for overall ATT.
91
+ overall_p_value : float
92
+ P-value for overall ATT.
93
+ overall_conf_int : tuple
94
+ Confidence interval for overall ATT.
95
+ event_study_effects : dict, optional
96
+ Dictionary mapping relative time h to effect dict with keys:
97
+ 'effect', 'se', 't_stat', 'p_value', 'conf_int', 'n_obs'.
98
+ group_effects : dict, optional
99
+ Dictionary mapping cohort g to effect dict.
100
+ groups : list
101
+ List of treatment cohorts.
102
+ time_periods : list
103
+ List of all time periods.
104
+ n_obs : int
105
+ Total number of observations.
106
+ n_treated_obs : int
107
+ Number of treated observations (|Omega_1|).
108
+ n_untreated_obs : int
109
+ Number of untreated observations (|Omega_0|).
110
+ n_treated_units : int
111
+ Number of ever-treated units.
112
+ n_control_units : int
113
+ Number of units contributing to Omega_0.
114
+ alpha : float
115
+ Significance level used.
116
+ pretrend_results : dict, optional
117
+ Populated by pretrend_test().
118
+ bootstrap_results : ImputationBootstrapResults, optional
119
+ Bootstrap inference results.
120
+ """
121
+
122
+ treatment_effects: pd.DataFrame
123
+ overall_att: float
124
+ overall_se: float
125
+ overall_t_stat: float
126
+ overall_p_value: float
127
+ overall_conf_int: Tuple[float, float]
128
+ event_study_effects: Optional[Dict[int, Dict[str, Any]]]
129
+ group_effects: Optional[Dict[Any, Dict[str, Any]]]
130
+ groups: List[Any]
131
+ time_periods: List[Any]
132
+ n_obs: int
133
+ n_treated_obs: int
134
+ n_untreated_obs: int
135
+ n_treated_units: int
136
+ n_control_units: int
137
+ alpha: float = 0.05
138
+ pretrend_results: Optional[Dict[str, Any]] = field(default=None, repr=False)
139
+ bootstrap_results: Optional[ImputationBootstrapResults] = field(default=None, repr=False)
140
+ # Internal: stores data needed for pretrend_test()
141
+ _estimator_ref: Optional[Any] = field(default=None, repr=False)
142
+ # Survey design metadata (SurveyMetadata instance from diff_diff.survey)
143
+ survey_metadata: Optional[Any] = field(default=None, repr=False)
144
+
145
+ def __repr__(self) -> str:
146
+ """Concise string representation."""
147
+ sig = _get_significance_stars(self.overall_p_value)
148
+ return (
149
+ f"ImputationDiDResults(ATT={self.overall_att:.4f}{sig}, "
150
+ f"SE={self.overall_se:.4f}, "
151
+ f"n_groups={len(self.groups)}, "
152
+ f"n_treated_obs={self.n_treated_obs})"
153
+ )
154
+
155
+ @property
156
+ def coef_var(self) -> float:
157
+ """Coefficient of variation: SE / |overall ATT|. NaN when ATT is 0 or SE non-finite."""
158
+ if not (np.isfinite(self.overall_se) and self.overall_se >= 0):
159
+ return np.nan
160
+ if not np.isfinite(self.overall_att) or self.overall_att == 0:
161
+ return np.nan
162
+ return self.overall_se / abs(self.overall_att)
163
+
164
+ def summary(self, alpha: Optional[float] = None) -> str:
165
+ """
166
+ Generate formatted summary of estimation results.
167
+
168
+ Parameters
169
+ ----------
170
+ alpha : float, optional
171
+ Significance level. Defaults to alpha used in estimation.
172
+
173
+ Returns
174
+ -------
175
+ str
176
+ Formatted summary.
177
+ """
178
+ alpha = alpha or self.alpha
179
+ conf_level = int((1 - alpha) * 100)
180
+
181
+ lines = [
182
+ "=" * 85,
183
+ "Imputation DiD Estimator Results (Borusyak et al. 2024)".center(85),
184
+ "=" * 85,
185
+ "",
186
+ f"{'Total observations:':<30} {self.n_obs:>10}",
187
+ f"{'Treated observations:':<30} {self.n_treated_obs:>10}",
188
+ f"{'Untreated observations:':<30} {self.n_untreated_obs:>10}",
189
+ f"{'Treated units:':<30} {self.n_treated_units:>10}",
190
+ f"{'Control units:':<30} {self.n_control_units:>10}",
191
+ f"{'Treatment cohorts:':<30} {len(self.groups):>10}",
192
+ f"{'Time periods:':<30} {len(self.time_periods):>10}",
193
+ "",
194
+ ]
195
+
196
+ # Survey design info
197
+ if self.survey_metadata is not None:
198
+ sm = self.survey_metadata
199
+ lines.extend(_format_survey_block(sm, 85))
200
+
201
+ # Overall ATT
202
+ lines.extend(
203
+ [
204
+ "-" * 85,
205
+ "Overall Average Treatment Effect on the Treated".center(85),
206
+ "-" * 85,
207
+ f"{'Parameter':<15} {'Estimate':>12} {'Std. Err.':>12} "
208
+ f"{'t-stat':>10} {'P>|t|':>10} {'Sig.':>6}",
209
+ "-" * 85,
210
+ ]
211
+ )
212
+
213
+ t_str = (
214
+ f"{self.overall_t_stat:>10.3f}" if np.isfinite(self.overall_t_stat) else f"{'NaN':>10}"
215
+ )
216
+ p_str = (
217
+ f"{self.overall_p_value:>10.4f}"
218
+ if np.isfinite(self.overall_p_value)
219
+ else f"{'NaN':>10}"
220
+ )
221
+ sig = _get_significance_stars(self.overall_p_value)
222
+
223
+ lines.extend(
224
+ [
225
+ f"{'ATT':<15} {self.overall_att:>12.4f} {self.overall_se:>12.4f} "
226
+ f"{t_str} {p_str} {sig:>6}",
227
+ "-" * 85,
228
+ "",
229
+ f"{conf_level}% Confidence Interval: "
230
+ f"[{self.overall_conf_int[0]:.4f}, {self.overall_conf_int[1]:.4f}]",
231
+ ]
232
+ )
233
+
234
+ cv = self.coef_var
235
+ if np.isfinite(cv):
236
+ lines.append(f"{'CV (SE/|ATT|):':<25} {cv:>10.4f}")
237
+
238
+ lines.append("")
239
+
240
+ # Event study effects
241
+ if self.event_study_effects:
242
+ lines.extend(
243
+ [
244
+ "-" * 85,
245
+ "Event Study (Dynamic) Effects".center(85),
246
+ "-" * 85,
247
+ f"{'Rel. Period':<15} {'Estimate':>12} {'Std. Err.':>12} "
248
+ f"{'t-stat':>10} {'P>|t|':>10} {'Sig.':>6}",
249
+ "-" * 85,
250
+ ]
251
+ )
252
+
253
+ for h in sorted(self.event_study_effects.keys()):
254
+ eff = self.event_study_effects[h]
255
+ if eff.get("n_obs", 1) == 0:
256
+ # Reference period marker
257
+ lines.append(
258
+ f"[ref: {h}]" f"{'0.0000':>17} {'---':>12} {'---':>10} {'---':>10} {'':>6}"
259
+ )
260
+ elif np.isnan(eff["effect"]):
261
+ lines.append(f"{h:<15} {'NaN':>12} {'NaN':>12} {'NaN':>10} {'NaN':>10} {'':>6}")
262
+ else:
263
+ e_sig = _get_significance_stars(eff["p_value"])
264
+ e_t = (
265
+ f"{eff['t_stat']:>10.3f}" if np.isfinite(eff["t_stat"]) else f"{'NaN':>10}"
266
+ )
267
+ e_p = (
268
+ f"{eff['p_value']:>10.4f}"
269
+ if np.isfinite(eff["p_value"])
270
+ else f"{'NaN':>10}"
271
+ )
272
+ lines.append(
273
+ f"{h:<15} {eff['effect']:>12.4f} {eff['se']:>12.4f} "
274
+ f"{e_t} {e_p} {e_sig:>6}"
275
+ )
276
+
277
+ lines.extend(["-" * 85, ""])
278
+
279
+ # Group effects
280
+ if self.group_effects:
281
+ lines.extend(
282
+ [
283
+ "-" * 85,
284
+ "Group (Cohort) Effects".center(85),
285
+ "-" * 85,
286
+ f"{'Cohort':<15} {'Estimate':>12} {'Std. Err.':>12} "
287
+ f"{'t-stat':>10} {'P>|t|':>10} {'Sig.':>6}",
288
+ "-" * 85,
289
+ ]
290
+ )
291
+
292
+ for g in sorted(self.group_effects.keys()):
293
+ eff = self.group_effects[g]
294
+ if np.isnan(eff["effect"]):
295
+ lines.append(f"{g:<15} {'NaN':>12} {'NaN':>12} {'NaN':>10} {'NaN':>10} {'':>6}")
296
+ else:
297
+ g_sig = _get_significance_stars(eff["p_value"])
298
+ g_t = (
299
+ f"{eff['t_stat']:>10.3f}" if np.isfinite(eff["t_stat"]) else f"{'NaN':>10}"
300
+ )
301
+ g_p = (
302
+ f"{eff['p_value']:>10.4f}"
303
+ if np.isfinite(eff["p_value"])
304
+ else f"{'NaN':>10}"
305
+ )
306
+ lines.append(
307
+ f"{g:<15} {eff['effect']:>12.4f} {eff['se']:>12.4f} "
308
+ f"{g_t} {g_p} {g_sig:>6}"
309
+ )
310
+
311
+ lines.extend(["-" * 85, ""])
312
+
313
+ # Pre-trend test
314
+ if self.pretrend_results is not None:
315
+ pt = self.pretrend_results
316
+ lines.extend(
317
+ [
318
+ "-" * 85,
319
+ "Pre-Trend Test (Equation 9)".center(85),
320
+ "-" * 85,
321
+ f"{'F-statistic:':<30} {pt['f_stat']:>10.3f}",
322
+ f"{'P-value:':<30} {pt['p_value']:>10.4f}",
323
+ f"{'Degrees of freedom:':<30} {pt['df']:>10}",
324
+ f"{'Number of leads:':<30} {pt['n_leads']:>10}",
325
+ "-" * 85,
326
+ "",
327
+ ]
328
+ )
329
+
330
+ lines.extend(
331
+ [
332
+ "Signif. codes: '***' 0.001, '**' 0.01, '*' 0.05, '.' 0.1",
333
+ "=" * 85,
334
+ ]
335
+ )
336
+
337
+ return "\n".join(lines)
338
+
339
+ def print_summary(self, alpha: Optional[float] = None) -> None:
340
+ """Print summary to stdout."""
341
+ print(self.summary(alpha))
342
+
343
+ def to_dataframe(self, level: str = "observation") -> pd.DataFrame:
344
+ """
345
+ Convert results to DataFrame.
346
+
347
+ Parameters
348
+ ----------
349
+ level : str, default="observation"
350
+ Level of aggregation:
351
+ - "observation": Unit-level treatment effects
352
+ - "event_study": Event study effects by relative time
353
+ - "group": Group (cohort) effects
354
+
355
+ Returns
356
+ -------
357
+ pd.DataFrame
358
+ Results as DataFrame.
359
+ """
360
+ if level == "observation":
361
+ return self.treatment_effects.copy()
362
+
363
+ elif level == "event_study":
364
+ if self.event_study_effects is None:
365
+ raise ValueError(
366
+ "Event study effects not computed. "
367
+ "Use aggregate='event_study' or aggregate='all'."
368
+ )
369
+ rows = []
370
+ for h, data in sorted(self.event_study_effects.items()):
371
+ rows.append(
372
+ {
373
+ "relative_period": h,
374
+ "effect": data["effect"],
375
+ "se": data["se"],
376
+ "t_stat": data["t_stat"],
377
+ "p_value": data["p_value"],
378
+ "conf_int_lower": data["conf_int"][0],
379
+ "conf_int_upper": data["conf_int"][1],
380
+ "n_obs": data.get("n_obs", np.nan),
381
+ }
382
+ )
383
+ return pd.DataFrame(rows)
384
+
385
+ elif level == "group":
386
+ if self.group_effects is None:
387
+ raise ValueError(
388
+ "Group effects not computed. " "Use aggregate='group' or aggregate='all'."
389
+ )
390
+ rows = []
391
+ for g, data in sorted(self.group_effects.items()):
392
+ rows.append(
393
+ {
394
+ "group": g,
395
+ "effect": data["effect"],
396
+ "se": data["se"],
397
+ "t_stat": data["t_stat"],
398
+ "p_value": data["p_value"],
399
+ "conf_int_lower": data["conf_int"][0],
400
+ "conf_int_upper": data["conf_int"][1],
401
+ "n_obs": data.get("n_obs", np.nan),
402
+ }
403
+ )
404
+ return pd.DataFrame(rows)
405
+
406
+ else:
407
+ raise ValueError(
408
+ f"Unknown level: {level}. Use 'observation', 'event_study', or 'group'."
409
+ )
410
+
411
+ def pretrend_test(self, n_leads: Optional[int] = None) -> Dict[str, Any]:
412
+ """
413
+ Run a pre-trend test (Equation 9 of Borusyak et al. 2024).
414
+
415
+ Adds pre-treatment lead indicators to the Step 1 OLS and tests
416
+ their joint significance via a Wald F-test (cluster-robust, or
417
+ design-based survey VCV when survey_design was provided at fit).
418
+
419
+ Parameters
420
+ ----------
421
+ n_leads : int, optional
422
+ Number of pre-treatment leads to include. If None, uses all
423
+ available pre-treatment periods minus one (for the reference period).
424
+
425
+ Returns
426
+ -------
427
+ dict
428
+ Dictionary with keys: 'f_stat', 'p_value', 'df', 'n_leads',
429
+ 'lead_coefficients'.
430
+ """
431
+ if self._estimator_ref is None:
432
+ raise RuntimeError(
433
+ "Pre-trend test requires internal estimator reference. "
434
+ "Re-fit the model to use this method."
435
+ )
436
+ result = self._estimator_ref._pretrend_test(n_leads=n_leads)
437
+ self.pretrend_results = result
438
+ return result
439
+
440
+ @property
441
+ def is_significant(self) -> bool:
442
+ """Check if overall ATT is significant."""
443
+ return bool(self.overall_p_value < self.alpha)
444
+
445
+ @property
446
+ def significance_stars(self) -> str:
447
+ """Significance stars for overall ATT."""
448
+ return _get_significance_stars(self.overall_p_value)