diff-diff 3.0.1__cp314-cp314-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. diff_diff/__init__.py +382 -0
  2. diff_diff/_backend.py +134 -0
  3. diff_diff/_rust_backend.cp314-win_amd64.pyd +0 -0
  4. diff_diff/bacon.py +1140 -0
  5. diff_diff/bootstrap_utils.py +730 -0
  6. diff_diff/continuous_did.py +1626 -0
  7. diff_diff/continuous_did_bspline.py +190 -0
  8. diff_diff/continuous_did_results.py +374 -0
  9. diff_diff/datasets.py +815 -0
  10. diff_diff/diagnostics.py +882 -0
  11. diff_diff/efficient_did.py +1770 -0
  12. diff_diff/efficient_did_bootstrap.py +359 -0
  13. diff_diff/efficient_did_covariates.py +899 -0
  14. diff_diff/efficient_did_results.py +368 -0
  15. diff_diff/efficient_did_weights.py +617 -0
  16. diff_diff/estimators.py +1501 -0
  17. diff_diff/honest_did.py +2585 -0
  18. diff_diff/imputation.py +2458 -0
  19. diff_diff/imputation_bootstrap.py +418 -0
  20. diff_diff/imputation_results.py +448 -0
  21. diff_diff/linalg.py +2538 -0
  22. diff_diff/power.py +2588 -0
  23. diff_diff/practitioner.py +869 -0
  24. diff_diff/prep.py +1738 -0
  25. diff_diff/prep_dgp.py +1718 -0
  26. diff_diff/pretrends.py +1105 -0
  27. diff_diff/results.py +918 -0
  28. diff_diff/stacked_did.py +1049 -0
  29. diff_diff/stacked_did_results.py +339 -0
  30. diff_diff/staggered.py +3895 -0
  31. diff_diff/staggered_aggregation.py +864 -0
  32. diff_diff/staggered_bootstrap.py +752 -0
  33. diff_diff/staggered_results.py +416 -0
  34. diff_diff/staggered_triple_diff.py +1545 -0
  35. diff_diff/staggered_triple_diff_results.py +416 -0
  36. diff_diff/sun_abraham.py +1685 -0
  37. diff_diff/survey.py +1981 -0
  38. diff_diff/synthetic_did.py +1136 -0
  39. diff_diff/triple_diff.py +2047 -0
  40. diff_diff/trop.py +952 -0
  41. diff_diff/trop_global.py +1270 -0
  42. diff_diff/trop_local.py +1307 -0
  43. diff_diff/trop_results.py +356 -0
  44. diff_diff/twfe.py +542 -0
  45. diff_diff/two_stage.py +1952 -0
  46. diff_diff/two_stage_bootstrap.py +520 -0
  47. diff_diff/two_stage_results.py +400 -0
  48. diff_diff/utils.py +1902 -0
  49. diff_diff/visualization/__init__.py +61 -0
  50. diff_diff/visualization/_common.py +328 -0
  51. diff_diff/visualization/_continuous.py +274 -0
  52. diff_diff/visualization/_diagnostic.py +817 -0
  53. diff_diff/visualization/_event_study.py +1086 -0
  54. diff_diff/visualization/_power.py +661 -0
  55. diff_diff/visualization/_staggered.py +833 -0
  56. diff_diff/visualization/_synthetic.py +197 -0
  57. diff_diff/wooldridge.py +1285 -0
  58. diff_diff/wooldridge_results.py +349 -0
  59. diff_diff-3.0.1.dist-info/METADATA +2997 -0
  60. diff_diff-3.0.1.dist-info/RECORD +62 -0
  61. diff_diff-3.0.1.dist-info/WHEEL +4 -0
  62. diff_diff-3.0.1.dist-info/sboms/diff_diff_rust.cyclonedx.json +5843 -0
@@ -0,0 +1,400 @@
1
+ """
2
+ Result containers for the Two-Stage DiD estimator.
3
+
4
+ This module contains TwoStageBootstrapResults and TwoStageDiDResults
5
+ dataclasses. Extracted from two_stage.py for module size management.
6
+ """
7
+
8
+ from dataclasses import dataclass, field
9
+ from typing import Any, Dict, List, Optional, Tuple
10
+
11
+ import numpy as np
12
+ import pandas as pd
13
+
14
+ from diff_diff.results import _format_survey_block, _get_significance_stars
15
+
16
+ __all__ = [
17
+ "TwoStageBootstrapResults",
18
+ "TwoStageDiDResults",
19
+ ]
20
+
21
+
22
+ @dataclass
23
+ class TwoStageBootstrapResults:
24
+ """
25
+ Results from TwoStageDiD bootstrap inference.
26
+
27
+ Bootstrap uses multiplier bootstrap on the GMM influence function,
28
+ consistent with other library estimators. The R `did2s` package uses
29
+ block bootstrap by default; multiplier bootstrap is asymptotically
30
+ equivalent.
31
+
32
+ Attributes
33
+ ----------
34
+ n_bootstrap : int
35
+ Number of bootstrap iterations.
36
+ weight_type : str
37
+ Type of bootstrap weights: "rademacher", "mammen", or "webb".
38
+ alpha : float
39
+ Significance level used for confidence intervals.
40
+ overall_att_se : float
41
+ Bootstrap standard error for overall ATT.
42
+ overall_att_ci : tuple
43
+ Bootstrap confidence interval for overall ATT.
44
+ overall_att_p_value : float
45
+ Bootstrap p-value for overall ATT.
46
+ event_study_ses : dict, optional
47
+ Bootstrap SEs for event study effects.
48
+ event_study_cis : dict, optional
49
+ Bootstrap CIs for event study effects.
50
+ event_study_p_values : dict, optional
51
+ Bootstrap p-values for event study effects.
52
+ group_ses : dict, optional
53
+ Bootstrap SEs for group effects.
54
+ group_cis : dict, optional
55
+ Bootstrap CIs for group effects.
56
+ group_p_values : dict, optional
57
+ Bootstrap p-values for group effects.
58
+ bootstrap_distribution : np.ndarray, optional
59
+ Full bootstrap distribution of overall ATT.
60
+ """
61
+
62
+ n_bootstrap: int
63
+ weight_type: str
64
+ alpha: float
65
+ overall_att_se: float
66
+ overall_att_ci: Tuple[float, float]
67
+ overall_att_p_value: float
68
+ event_study_ses: Optional[Dict[int, float]] = None
69
+ event_study_cis: Optional[Dict[int, Tuple[float, float]]] = None
70
+ event_study_p_values: Optional[Dict[int, float]] = None
71
+ group_ses: Optional[Dict[Any, float]] = None
72
+ group_cis: Optional[Dict[Any, Tuple[float, float]]] = None
73
+ group_p_values: Optional[Dict[Any, float]] = None
74
+ bootstrap_distribution: Optional[np.ndarray] = field(default=None, repr=False)
75
+
76
+
77
+ @dataclass
78
+ class TwoStageDiDResults:
79
+ """
80
+ Results from Gardner (2022) two-stage DiD estimation.
81
+
82
+ Attributes
83
+ ----------
84
+ treatment_effects : pd.DataFrame
85
+ Per-observation treatment effects with columns: unit, time,
86
+ tau_hat, weight. tau_hat is the residualized outcome y_tilde
87
+ for treated observations; weight is 1/n_treated.
88
+ overall_att : float
89
+ Overall average treatment effect on the treated.
90
+ overall_se : float
91
+ Standard error of overall ATT (GMM sandwich).
92
+ overall_t_stat : float
93
+ T-statistic for overall ATT.
94
+ overall_p_value : float
95
+ P-value for overall ATT.
96
+ overall_conf_int : tuple
97
+ Confidence interval for overall ATT.
98
+ event_study_effects : dict, optional
99
+ Dictionary mapping relative time h to effect dict with keys:
100
+ 'effect', 'se', 't_stat', 'p_value', 'conf_int', 'n_obs'.
101
+ group_effects : dict, optional
102
+ Dictionary mapping cohort g to effect dict.
103
+ groups : list
104
+ List of treatment cohorts.
105
+ time_periods : list
106
+ List of all time periods.
107
+ n_obs : int
108
+ Total number of observations.
109
+ n_treated_obs : int
110
+ Number of treated observations.
111
+ n_untreated_obs : int
112
+ Number of untreated observations.
113
+ n_treated_units : int
114
+ Number of ever-treated units.
115
+ n_control_units : int
116
+ Number of units contributing to untreated observations.
117
+ alpha : float
118
+ Significance level used.
119
+ bootstrap_results : TwoStageBootstrapResults, optional
120
+ Bootstrap inference results.
121
+ """
122
+
123
+ treatment_effects: pd.DataFrame
124
+ overall_att: float
125
+ overall_se: float
126
+ overall_t_stat: float
127
+ overall_p_value: float
128
+ overall_conf_int: Tuple[float, float]
129
+ event_study_effects: Optional[Dict[int, Dict[str, Any]]]
130
+ group_effects: Optional[Dict[Any, Dict[str, Any]]]
131
+ groups: List[Any]
132
+ time_periods: List[Any]
133
+ n_obs: int
134
+ n_treated_obs: int
135
+ n_untreated_obs: int
136
+ n_treated_units: int
137
+ n_control_units: int
138
+ alpha: float = 0.05
139
+ bootstrap_results: Optional[TwoStageBootstrapResults] = field(default=None, repr=False)
140
+ # Survey design metadata (SurveyMetadata instance from diff_diff.survey)
141
+ survey_metadata: Optional[Any] = field(default=None, repr=False)
142
+
143
+ def __repr__(self) -> str:
144
+ """Concise string representation."""
145
+ sig = _get_significance_stars(self.overall_p_value)
146
+ return (
147
+ f"TwoStageDiDResults(ATT={self.overall_att:.4f}{sig}, "
148
+ f"SE={self.overall_se:.4f}, "
149
+ f"n_groups={len(self.groups)}, "
150
+ f"n_treated_obs={self.n_treated_obs})"
151
+ )
152
+
153
+ @property
154
+ def coef_var(self) -> float:
155
+ """Coefficient of variation: SE / |overall ATT|. NaN when ATT is 0 or SE non-finite."""
156
+ if not (np.isfinite(self.overall_se) and self.overall_se >= 0):
157
+ return np.nan
158
+ if not np.isfinite(self.overall_att) or self.overall_att == 0:
159
+ return np.nan
160
+ return self.overall_se / abs(self.overall_att)
161
+
162
+ def summary(self, alpha: Optional[float] = None) -> str:
163
+ """
164
+ Generate formatted summary of estimation results.
165
+
166
+ Parameters
167
+ ----------
168
+ alpha : float, optional
169
+ Significance level. Defaults to alpha used in estimation.
170
+
171
+ Returns
172
+ -------
173
+ str
174
+ Formatted summary.
175
+ """
176
+ alpha = alpha or self.alpha
177
+ conf_level = int((1 - alpha) * 100)
178
+
179
+ lines = [
180
+ "=" * 85,
181
+ "Two-Stage DiD Estimator Results (Gardner 2022)".center(85),
182
+ "=" * 85,
183
+ "",
184
+ f"{'Total observations:':<30} {self.n_obs:>10}",
185
+ f"{'Treated observations:':<30} {self.n_treated_obs:>10}",
186
+ f"{'Untreated observations:':<30} {self.n_untreated_obs:>10}",
187
+ f"{'Treated units:':<30} {self.n_treated_units:>10}",
188
+ f"{'Control units:':<30} {self.n_control_units:>10}",
189
+ f"{'Treatment cohorts:':<30} {len(self.groups):>10}",
190
+ f"{'Time periods:':<30} {len(self.time_periods):>10}",
191
+ "",
192
+ ]
193
+
194
+ # Survey design info
195
+ if self.survey_metadata is not None:
196
+ sm = self.survey_metadata
197
+ lines.extend(_format_survey_block(sm, 85))
198
+
199
+ # Overall ATT
200
+ lines.extend(
201
+ [
202
+ "-" * 85,
203
+ "Overall Average Treatment Effect on the Treated".center(85),
204
+ "-" * 85,
205
+ f"{'Parameter':<15} {'Estimate':>12} {'Std. Err.':>12} "
206
+ f"{'t-stat':>10} {'P>|t|':>10} {'Sig.':>6}",
207
+ "-" * 85,
208
+ ]
209
+ )
210
+
211
+ t_str = (
212
+ f"{self.overall_t_stat:>10.3f}" if np.isfinite(self.overall_t_stat) else f"{'NaN':>10}"
213
+ )
214
+ p_str = (
215
+ f"{self.overall_p_value:>10.4f}"
216
+ if np.isfinite(self.overall_p_value)
217
+ else f"{'NaN':>10}"
218
+ )
219
+ sig = _get_significance_stars(self.overall_p_value)
220
+
221
+ lines.extend(
222
+ [
223
+ f"{'ATT':<15} {self.overall_att:>12.4f} {self.overall_se:>12.4f} "
224
+ f"{t_str} {p_str} {sig:>6}",
225
+ "-" * 85,
226
+ "",
227
+ f"{conf_level}% Confidence Interval: "
228
+ f"[{self.overall_conf_int[0]:.4f}, {self.overall_conf_int[1]:.4f}]",
229
+ ]
230
+ )
231
+
232
+ cv = self.coef_var
233
+ if np.isfinite(cv):
234
+ lines.append(f"{'CV (SE/|ATT|):':<25} {cv:>10.4f}")
235
+
236
+ lines.append("")
237
+
238
+ # Event study effects
239
+ if self.event_study_effects:
240
+ lines.extend(
241
+ [
242
+ "-" * 85,
243
+ "Event Study (Dynamic) Effects".center(85),
244
+ "-" * 85,
245
+ f"{'Rel. Period':<15} {'Estimate':>12} {'Std. Err.':>12} "
246
+ f"{'t-stat':>10} {'P>|t|':>10} {'Sig.':>6}",
247
+ "-" * 85,
248
+ ]
249
+ )
250
+
251
+ for h in sorted(self.event_study_effects.keys()):
252
+ eff = self.event_study_effects[h]
253
+ if eff.get("n_obs", 1) == 0:
254
+ # Reference period marker
255
+ lines.append(
256
+ f"[ref: {h}]" f"{'0.0000':>17} {'---':>12} {'---':>10} {'---':>10} {'':>6}"
257
+ )
258
+ elif np.isnan(eff["effect"]):
259
+ lines.append(f"{h:<15} {'NaN':>12} {'NaN':>12} {'NaN':>10} {'NaN':>10} {'':>6}")
260
+ else:
261
+ e_sig = _get_significance_stars(eff["p_value"])
262
+ e_t = (
263
+ f"{eff['t_stat']:>10.3f}" if np.isfinite(eff["t_stat"]) else f"{'NaN':>10}"
264
+ )
265
+ e_p = (
266
+ f"{eff['p_value']:>10.4f}"
267
+ if np.isfinite(eff["p_value"])
268
+ else f"{'NaN':>10}"
269
+ )
270
+ lines.append(
271
+ f"{h:<15} {eff['effect']:>12.4f} {eff['se']:>12.4f} "
272
+ f"{e_t} {e_p} {e_sig:>6}"
273
+ )
274
+
275
+ lines.extend(["-" * 85, ""])
276
+
277
+ # Group effects
278
+ if self.group_effects:
279
+ lines.extend(
280
+ [
281
+ "-" * 85,
282
+ "Group (Cohort) Effects".center(85),
283
+ "-" * 85,
284
+ f"{'Cohort':<15} {'Estimate':>12} {'Std. Err.':>12} "
285
+ f"{'t-stat':>10} {'P>|t|':>10} {'Sig.':>6}",
286
+ "-" * 85,
287
+ ]
288
+ )
289
+
290
+ for g in sorted(self.group_effects.keys()):
291
+ eff = self.group_effects[g]
292
+ if np.isnan(eff["effect"]):
293
+ lines.append(f"{g:<15} {'NaN':>12} {'NaN':>12} {'NaN':>10} {'NaN':>10} {'':>6}")
294
+ else:
295
+ g_sig = _get_significance_stars(eff["p_value"])
296
+ g_t = (
297
+ f"{eff['t_stat']:>10.3f}" if np.isfinite(eff["t_stat"]) else f"{'NaN':>10}"
298
+ )
299
+ g_p = (
300
+ f"{eff['p_value']:>10.4f}"
301
+ if np.isfinite(eff["p_value"])
302
+ else f"{'NaN':>10}"
303
+ )
304
+ lines.append(
305
+ f"{g:<15} {eff['effect']:>12.4f} {eff['se']:>12.4f} "
306
+ f"{g_t} {g_p} {g_sig:>6}"
307
+ )
308
+
309
+ lines.extend(["-" * 85, ""])
310
+
311
+ lines.extend(
312
+ [
313
+ "Signif. codes: '***' 0.001, '**' 0.01, '*' 0.05, '.' 0.1",
314
+ "=" * 85,
315
+ ]
316
+ )
317
+
318
+ return "\n".join(lines)
319
+
320
+ def print_summary(self, alpha: Optional[float] = None) -> None:
321
+ """Print summary to stdout."""
322
+ print(self.summary(alpha))
323
+
324
+ def to_dataframe(self, level: str = "event_study") -> pd.DataFrame:
325
+ """
326
+ Convert results to DataFrame.
327
+
328
+ Parameters
329
+ ----------
330
+ level : str, default="event_study"
331
+ Level of aggregation:
332
+ - "event_study": Event study effects by relative time
333
+ - "group": Group (cohort) effects
334
+ - "observation": Per-observation treatment effects
335
+
336
+ Returns
337
+ -------
338
+ pd.DataFrame
339
+ Results as DataFrame.
340
+ """
341
+ if level == "observation":
342
+ return self.treatment_effects.copy()
343
+
344
+ elif level == "event_study":
345
+ if self.event_study_effects is None:
346
+ raise ValueError(
347
+ "Event study effects not computed. "
348
+ "Use aggregate='event_study' or aggregate='all'."
349
+ )
350
+ rows = []
351
+ for h, data in sorted(self.event_study_effects.items()):
352
+ rows.append(
353
+ {
354
+ "relative_period": h,
355
+ "effect": data["effect"],
356
+ "se": data["se"],
357
+ "t_stat": data["t_stat"],
358
+ "p_value": data["p_value"],
359
+ "conf_int_lower": data["conf_int"][0],
360
+ "conf_int_upper": data["conf_int"][1],
361
+ "n_obs": data.get("n_obs", np.nan),
362
+ }
363
+ )
364
+ return pd.DataFrame(rows)
365
+
366
+ elif level == "group":
367
+ if self.group_effects is None:
368
+ raise ValueError(
369
+ "Group effects not computed. " "Use aggregate='group' or aggregate='all'."
370
+ )
371
+ rows = []
372
+ for g, data in sorted(self.group_effects.items()):
373
+ rows.append(
374
+ {
375
+ "group": g,
376
+ "effect": data["effect"],
377
+ "se": data["se"],
378
+ "t_stat": data["t_stat"],
379
+ "p_value": data["p_value"],
380
+ "conf_int_lower": data["conf_int"][0],
381
+ "conf_int_upper": data["conf_int"][1],
382
+ "n_obs": data.get("n_obs", np.nan),
383
+ }
384
+ )
385
+ return pd.DataFrame(rows)
386
+
387
+ else:
388
+ raise ValueError(
389
+ f"Unknown level: {level}. Use 'event_study', 'group', or 'observation'."
390
+ )
391
+
392
+ @property
393
+ def is_significant(self) -> bool:
394
+ """Check if overall ATT is significant."""
395
+ return bool(self.overall_p_value < self.alpha)
396
+
397
+ @property
398
+ def significance_stars(self) -> str:
399
+ """Significance stars for overall ATT."""
400
+ return _get_significance_stars(self.overall_p_value)