diff-diff 3.0.1__cp314-cp314-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. diff_diff/__init__.py +382 -0
  2. diff_diff/_backend.py +134 -0
  3. diff_diff/_rust_backend.cp314-win_amd64.pyd +0 -0
  4. diff_diff/bacon.py +1140 -0
  5. diff_diff/bootstrap_utils.py +730 -0
  6. diff_diff/continuous_did.py +1626 -0
  7. diff_diff/continuous_did_bspline.py +190 -0
  8. diff_diff/continuous_did_results.py +374 -0
  9. diff_diff/datasets.py +815 -0
  10. diff_diff/diagnostics.py +882 -0
  11. diff_diff/efficient_did.py +1770 -0
  12. diff_diff/efficient_did_bootstrap.py +359 -0
  13. diff_diff/efficient_did_covariates.py +899 -0
  14. diff_diff/efficient_did_results.py +368 -0
  15. diff_diff/efficient_did_weights.py +617 -0
  16. diff_diff/estimators.py +1501 -0
  17. diff_diff/honest_did.py +2585 -0
  18. diff_diff/imputation.py +2458 -0
  19. diff_diff/imputation_bootstrap.py +418 -0
  20. diff_diff/imputation_results.py +448 -0
  21. diff_diff/linalg.py +2538 -0
  22. diff_diff/power.py +2588 -0
  23. diff_diff/practitioner.py +869 -0
  24. diff_diff/prep.py +1738 -0
  25. diff_diff/prep_dgp.py +1718 -0
  26. diff_diff/pretrends.py +1105 -0
  27. diff_diff/results.py +918 -0
  28. diff_diff/stacked_did.py +1049 -0
  29. diff_diff/stacked_did_results.py +339 -0
  30. diff_diff/staggered.py +3895 -0
  31. diff_diff/staggered_aggregation.py +864 -0
  32. diff_diff/staggered_bootstrap.py +752 -0
  33. diff_diff/staggered_results.py +416 -0
  34. diff_diff/staggered_triple_diff.py +1545 -0
  35. diff_diff/staggered_triple_diff_results.py +416 -0
  36. diff_diff/sun_abraham.py +1685 -0
  37. diff_diff/survey.py +1981 -0
  38. diff_diff/synthetic_did.py +1136 -0
  39. diff_diff/triple_diff.py +2047 -0
  40. diff_diff/trop.py +952 -0
  41. diff_diff/trop_global.py +1270 -0
  42. diff_diff/trop_local.py +1307 -0
  43. diff_diff/trop_results.py +356 -0
  44. diff_diff/twfe.py +542 -0
  45. diff_diff/two_stage.py +1952 -0
  46. diff_diff/two_stage_bootstrap.py +520 -0
  47. diff_diff/two_stage_results.py +400 -0
  48. diff_diff/utils.py +1902 -0
  49. diff_diff/visualization/__init__.py +61 -0
  50. diff_diff/visualization/_common.py +328 -0
  51. diff_diff/visualization/_continuous.py +274 -0
  52. diff_diff/visualization/_diagnostic.py +817 -0
  53. diff_diff/visualization/_event_study.py +1086 -0
  54. diff_diff/visualization/_power.py +661 -0
  55. diff_diff/visualization/_staggered.py +833 -0
  56. diff_diff/visualization/_synthetic.py +197 -0
  57. diff_diff/wooldridge.py +1285 -0
  58. diff_diff/wooldridge_results.py +349 -0
  59. diff_diff-3.0.1.dist-info/METADATA +2997 -0
  60. diff_diff-3.0.1.dist-info/RECORD +62 -0
  61. diff_diff-3.0.1.dist-info/WHEEL +4 -0
  62. diff_diff-3.0.1.dist-info/sboms/diff_diff_rust.cyclonedx.json +5843 -0
@@ -0,0 +1,368 @@
1
+ """
2
+ Result container for the Efficient DiD estimator.
3
+
4
+ Follows the CallawaySantAnnaResults pattern: dataclass with summary(),
5
+ to_dataframe(), and significance properties.
6
+ """
7
+
8
+ from dataclasses import dataclass, field
9
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
10
+
11
+ import numpy as np
12
+ import pandas as pd
13
+
14
+ from diff_diff.results import _format_survey_block, _get_significance_stars
15
+
16
+ if TYPE_CHECKING:
17
+ from diff_diff.efficient_did_bootstrap import EDiDBootstrapResults
18
+
19
+
20
+ @dataclass
21
+ class HausmanPretestResult:
22
+ """Result of Hausman pretest for PT-All vs PT-Post (Theorem A.1).
23
+
24
+ Under H0 (PT-All holds), both estimators are consistent but PT-All
25
+ is efficient. Rejection suggests PT-All is too strong; use PT-Post.
26
+ """
27
+
28
+ statistic: float
29
+ """Hausman H statistic."""
30
+ p_value: float
31
+ """Chi-squared p-value."""
32
+ df: int
33
+ """Degrees of freedom (effective rank of V)."""
34
+ reject: bool
35
+ """True if p_value < alpha."""
36
+ alpha: float
37
+ """Significance level used."""
38
+ att_all: float
39
+ """Overall ATT under PT-All."""
40
+ att_post: float
41
+ """Overall ATT under PT-Post."""
42
+ recommendation: str
43
+ """``"pt_all"`` if fail to reject, ``"pt_post"`` if reject, ``"inconclusive"`` if test unavailable."""
44
+ gt_details: Optional[pd.DataFrame] = None
45
+ """Per-event-study-horizon details: relative_period, es_all, es_post, delta."""
46
+
47
+ def __repr__(self) -> str:
48
+ return (
49
+ f"HausmanPretestResult(H={self.statistic:.3f}, p={self.p_value:.4f}, "
50
+ f"df={self.df}, recommend={self.recommendation})"
51
+ )
52
+
53
+
54
+ @dataclass
55
+ class EfficientDiDResults:
56
+ """
57
+ Results from Efficient DiD (Chen, Sant'Anna & Xie 2025) estimation.
58
+
59
+ Stores group-time ATT(g,t) estimates with efficient weights, plus
60
+ optional aggregations (overall ATT, event study, group effects).
61
+
62
+ Attributes
63
+ ----------
64
+ group_time_effects : dict
65
+ ``{(g, t): {'effect', 'se', 't_stat', 'p_value', 'conf_int',
66
+ 'n_treated', 'n_control'}}``
67
+ overall_att : float
68
+ Overall ATT (cohort-size weighted average of post-treatment
69
+ group-time effects, matching CallawaySantAnna convention).
70
+ overall_se : float
71
+ Standard error of overall ATT.
72
+ overall_t_stat : float
73
+ t-statistic for overall ATT.
74
+ overall_p_value : float
75
+ p-value for overall ATT.
76
+ overall_conf_int : tuple
77
+ Confidence interval for overall ATT.
78
+ groups : list
79
+ Treatment cohort identifiers.
80
+ time_periods : list
81
+ All time periods.
82
+ n_obs : int
83
+ Total observations (units x periods).
84
+ n_treated_units : int
85
+ Number of ever-treated units.
86
+ n_control_units : int
87
+ Number of never-treated units.
88
+ alpha : float
89
+ Significance level.
90
+ pt_assumption : str
91
+ ``"all"`` or ``"post"``.
92
+ anticipation : int
93
+ Number of anticipation periods used.
94
+ n_bootstrap : int
95
+ Number of bootstrap iterations (0 = analytical only).
96
+ bootstrap_weights : str
97
+ Bootstrap weight distribution (``"rademacher"``, ``"mammen"``, ``"webb"``).
98
+ seed : int or None
99
+ Random seed used for bootstrap.
100
+ event_study_effects : dict, optional
101
+ ``{relative_time: effect_dict}``
102
+ group_effects : dict, optional
103
+ ``{group: effect_dict}``
104
+ efficient_weights : dict, optional
105
+ ``{(g, t): ndarray}`` — diagnostic: weight vector per target.
106
+ omega_condition_numbers : dict, optional
107
+ ``{(g, t): float}`` — diagnostic: Omega* condition numbers.
108
+ influence_functions : dict, optional
109
+ ``{(g, t): ndarray(n_units,)}`` — per-unit EIF values for each
110
+ group-time cell. Only populated when ``store_eif=True`` in
111
+ :meth:`~EfficientDiD.fit` (used internally by ``hausman_pretest``).
112
+ bootstrap_results : EDiDBootstrapResults, optional
113
+ Bootstrap inference results.
114
+ estimation_path : str
115
+ ``"nocov"`` or ``"dr"`` — which estimation path was used.
116
+ sieve_k_max : int or None
117
+ Maximum polynomial degree for sieve ratio estimation.
118
+ sieve_criterion : str
119
+ Information criterion used (``"aic"`` or ``"bic"``).
120
+ ratio_clip : float
121
+ Clipping bound for sieve propensity ratios.
122
+ kernel_bandwidth : float or None
123
+ Bandwidth used for kernel-smoothed conditional Omega*.
124
+ """
125
+
126
+ group_time_effects: Dict[Tuple[Any, Any], Dict[str, Any]]
127
+ overall_att: float
128
+ overall_se: float
129
+ overall_t_stat: float
130
+ overall_p_value: float
131
+ overall_conf_int: Tuple[float, float]
132
+ groups: List[Any]
133
+ time_periods: List[Any]
134
+ n_obs: int
135
+ n_treated_units: int
136
+ n_control_units: int
137
+ alpha: float = 0.05
138
+ pt_assumption: str = "all"
139
+ anticipation: int = 0
140
+ n_bootstrap: int = 0
141
+ bootstrap_weights: str = "rademacher"
142
+ seed: Optional[int] = None
143
+ event_study_effects: Optional[Dict[int, Dict[str, Any]]] = field(default=None)
144
+ group_effects: Optional[Dict[Any, Dict[str, Any]]] = field(default=None)
145
+ efficient_weights: Optional[Dict[Tuple[Any, Any], "np.ndarray"]] = field(
146
+ default=None, repr=False
147
+ )
148
+ omega_condition_numbers: Optional[Dict[Tuple[Any, Any], float]] = field(
149
+ default=None, repr=False
150
+ )
151
+ control_group: str = "never_treated"
152
+ influence_functions: Optional[Dict[Tuple[Any, Any], "np.ndarray"]] = field(
153
+ default=None, repr=False
154
+ )
155
+ bootstrap_results: Optional["EDiDBootstrapResults"] = field(default=None, repr=False)
156
+ estimation_path: str = "nocov"
157
+ sieve_k_max: Optional[int] = None
158
+ sieve_criterion: str = "bic"
159
+ ratio_clip: float = 20.0
160
+ kernel_bandwidth: Optional[float] = None
161
+ # Survey design metadata (SurveyMetadata instance from diff_diff.survey)
162
+ survey_metadata: Optional[Any] = field(default=None)
163
+
164
+ def __repr__(self) -> str:
165
+ sig = _get_significance_stars(self.overall_p_value)
166
+ path = "DR" if self.estimation_path == "dr" else "nocov"
167
+ return (
168
+ f"EfficientDiDResults(ATT={self.overall_att:.4f}{sig}, "
169
+ f"SE={self.overall_se:.4f}, "
170
+ f"pt={self.pt_assumption}, path={path}, "
171
+ f"n_groups={len(self.groups)}, "
172
+ f"n_periods={len(self.time_periods)})"
173
+ )
174
+
175
+ @property
176
+ def coef_var(self) -> float:
177
+ """Coefficient of variation: SE / |overall ATT|. NaN when ATT is 0 or SE non-finite."""
178
+ if not (np.isfinite(self.overall_se) and self.overall_se >= 0):
179
+ return np.nan
180
+ if not np.isfinite(self.overall_att) or self.overall_att == 0:
181
+ return np.nan
182
+ return self.overall_se / abs(self.overall_att)
183
+
184
+ def summary(self, alpha: Optional[float] = None) -> str:
185
+ """Generate formatted summary of estimation results."""
186
+ alpha = alpha or self.alpha
187
+ conf_level = int((1 - alpha) * 100)
188
+
189
+ lines = [
190
+ "=" * 85,
191
+ "Efficient DiD (Chen-Sant'Anna-Xie 2025) Results".center(85),
192
+ "=" * 85,
193
+ "",
194
+ f"{'Total observations:':<30} {self.n_obs:>10}",
195
+ f"{'Treated units:':<30} {self.n_treated_units:>10}",
196
+ f"{'Control units:':<30} {self.n_control_units:>10}",
197
+ f"{'Treatment cohorts:':<30} {len(self.groups):>10}",
198
+ f"{'Time periods:':<30} {len(self.time_periods):>10}",
199
+ f"{'PT assumption:':<30} {self.pt_assumption:>10}",
200
+ f"{'Estimation path:':<30} {'doubly robust' if self.estimation_path == 'dr' else 'no covariates':>10}",
201
+ ]
202
+ if self.control_group != "never_treated":
203
+ lines.append(f"{'Control group:':<30} {self.control_group:>10}")
204
+ if self.anticipation > 0:
205
+ lines.append(f"{'Anticipation periods:':<30} {self.anticipation:>10}")
206
+ if self.n_bootstrap > 0:
207
+ lines.append(f"{'Bootstrap:':<30} {self.n_bootstrap:>10} ({self.bootstrap_weights})")
208
+ lines.append("")
209
+
210
+ # Add survey design info
211
+ if self.survey_metadata is not None:
212
+ sm = self.survey_metadata
213
+ lines.extend(_format_survey_block(sm, 85))
214
+
215
+ # Overall ATT
216
+ lines.extend(
217
+ [
218
+ "-" * 85,
219
+ "Overall Average Treatment Effect on the Treated".center(85),
220
+ "-" * 85,
221
+ f"{'Parameter':<15} {'Estimate':>12} {'Std. Err.':>12} "
222
+ f"{'t-stat':>10} {'P>|t|':>10} {'Sig.':>6}",
223
+ "-" * 85,
224
+ f"{'ATT':<15} {self.overall_att:>12.4f} {self.overall_se:>12.4f} "
225
+ f"{self.overall_t_stat:>10.3f} {self.overall_p_value:>10.4f} "
226
+ f"{_get_significance_stars(self.overall_p_value):>6}",
227
+ "-" * 85,
228
+ "",
229
+ f"{conf_level}% Confidence Interval: "
230
+ f"[{self.overall_conf_int[0]:.4f}, {self.overall_conf_int[1]:.4f}]",
231
+ ]
232
+ )
233
+
234
+ cv = self.coef_var
235
+ if np.isfinite(cv):
236
+ lines.append(f"{'CV (SE/|ATT|):':<25} {cv:>10.4f}")
237
+
238
+ lines.append("")
239
+
240
+ # Event study effects
241
+ if self.event_study_effects:
242
+ lines.extend(
243
+ [
244
+ "-" * 85,
245
+ "Event Study (Dynamic) Effects".center(85),
246
+ "-" * 85,
247
+ f"{'Rel. Period':<15} {'Estimate':>12} {'Std. Err.':>12} "
248
+ f"{'t-stat':>10} {'P>|t|':>10} {'Sig.':>6}",
249
+ "-" * 85,
250
+ ]
251
+ )
252
+ for rel_t in sorted(self.event_study_effects.keys()):
253
+ eff = self.event_study_effects[rel_t]
254
+ sig = _get_significance_stars(eff["p_value"])
255
+ lines.append(
256
+ f"{rel_t:<15} {eff['effect']:>12.4f} {eff['se']:>12.4f} "
257
+ f"{eff['t_stat']:>10.3f} {eff['p_value']:>10.4f} {sig:>6}"
258
+ )
259
+ lines.extend(["-" * 85, ""])
260
+
261
+ # Group effects
262
+ if self.group_effects:
263
+ lines.extend(
264
+ [
265
+ "-" * 85,
266
+ "Effects by Treatment Cohort".center(85),
267
+ "-" * 85,
268
+ f"{'Cohort':<15} {'Estimate':>12} {'Std. Err.':>12} "
269
+ f"{'t-stat':>10} {'P>|t|':>10} {'Sig.':>6}",
270
+ "-" * 85,
271
+ ]
272
+ )
273
+ for group in sorted(self.group_effects.keys()):
274
+ eff = self.group_effects[group]
275
+ sig = _get_significance_stars(eff["p_value"])
276
+ lines.append(
277
+ f"{group:<15} {eff['effect']:>12.4f} {eff['se']:>12.4f} "
278
+ f"{eff['t_stat']:>10.3f} {eff['p_value']:>10.4f} {sig:>6}"
279
+ )
280
+ lines.extend(["-" * 85, ""])
281
+
282
+ lines.extend(
283
+ [
284
+ "Signif. codes: '***' 0.001, '**' 0.01, '*' 0.05, '.' 0.1",
285
+ "=" * 85,
286
+ ]
287
+ )
288
+ return "\n".join(lines)
289
+
290
+ def print_summary(self, alpha: Optional[float] = None) -> None:
291
+ """Print summary to stdout."""
292
+ print(self.summary(alpha))
293
+
294
+ def to_dataframe(self, level: str = "group_time") -> pd.DataFrame:
295
+ """Convert results to DataFrame.
296
+
297
+ Parameters
298
+ ----------
299
+ level : str
300
+ ``"group_time"``, ``"event_study"``, or ``"group"``.
301
+ """
302
+ if level == "group_time":
303
+ rows = []
304
+ for (g, t), data in self.group_time_effects.items():
305
+ rows.append(
306
+ {
307
+ "group": g,
308
+ "time": t,
309
+ "effect": data["effect"],
310
+ "se": data["se"],
311
+ "t_stat": data["t_stat"],
312
+ "p_value": data["p_value"],
313
+ "conf_int_lower": data["conf_int"][0],
314
+ "conf_int_upper": data["conf_int"][1],
315
+ }
316
+ )
317
+ return pd.DataFrame(rows)
318
+
319
+ elif level == "event_study":
320
+ if self.event_study_effects is None:
321
+ raise ValueError("Event study effects not computed. Use aggregate='event_study'.")
322
+ rows = []
323
+ for rel_t, data in sorted(self.event_study_effects.items()):
324
+ rows.append(
325
+ {
326
+ "relative_period": rel_t,
327
+ "effect": data["effect"],
328
+ "se": data["se"],
329
+ "t_stat": data["t_stat"],
330
+ "p_value": data["p_value"],
331
+ "conf_int_lower": data["conf_int"][0],
332
+ "conf_int_upper": data["conf_int"][1],
333
+ }
334
+ )
335
+ return pd.DataFrame(rows)
336
+
337
+ elif level == "group":
338
+ if self.group_effects is None:
339
+ raise ValueError("Group effects not computed. Use aggregate='group'.")
340
+ rows = []
341
+ for group, data in sorted(self.group_effects.items()):
342
+ rows.append(
343
+ {
344
+ "group": group,
345
+ "effect": data["effect"],
346
+ "se": data["se"],
347
+ "t_stat": data["t_stat"],
348
+ "p_value": data["p_value"],
349
+ "conf_int_lower": data["conf_int"][0],
350
+ "conf_int_upper": data["conf_int"][1],
351
+ }
352
+ )
353
+ return pd.DataFrame(rows)
354
+
355
+ else:
356
+ raise ValueError(
357
+ f"Unknown level: {level}. " "Use 'group_time', 'event_study', or 'group'."
358
+ )
359
+
360
+ @property
361
+ def is_significant(self) -> bool:
362
+ """Check if overall ATT is significant."""
363
+ return bool(self.overall_p_value < self.alpha)
364
+
365
+ @property
366
+ def significance_stars(self) -> str:
367
+ """Significance stars for overall ATT."""
368
+ return _get_significance_stars(self.overall_p_value)