diff-diff 3.0.1__cp314-cp314-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. diff_diff/__init__.py +382 -0
  2. diff_diff/_backend.py +134 -0
  3. diff_diff/_rust_backend.cp314-win_amd64.pyd +0 -0
  4. diff_diff/bacon.py +1140 -0
  5. diff_diff/bootstrap_utils.py +730 -0
  6. diff_diff/continuous_did.py +1626 -0
  7. diff_diff/continuous_did_bspline.py +190 -0
  8. diff_diff/continuous_did_results.py +374 -0
  9. diff_diff/datasets.py +815 -0
  10. diff_diff/diagnostics.py +882 -0
  11. diff_diff/efficient_did.py +1770 -0
  12. diff_diff/efficient_did_bootstrap.py +359 -0
  13. diff_diff/efficient_did_covariates.py +899 -0
  14. diff_diff/efficient_did_results.py +368 -0
  15. diff_diff/efficient_did_weights.py +617 -0
  16. diff_diff/estimators.py +1501 -0
  17. diff_diff/honest_did.py +2585 -0
  18. diff_diff/imputation.py +2458 -0
  19. diff_diff/imputation_bootstrap.py +418 -0
  20. diff_diff/imputation_results.py +448 -0
  21. diff_diff/linalg.py +2538 -0
  22. diff_diff/power.py +2588 -0
  23. diff_diff/practitioner.py +869 -0
  24. diff_diff/prep.py +1738 -0
  25. diff_diff/prep_dgp.py +1718 -0
  26. diff_diff/pretrends.py +1105 -0
  27. diff_diff/results.py +918 -0
  28. diff_diff/stacked_did.py +1049 -0
  29. diff_diff/stacked_did_results.py +339 -0
  30. diff_diff/staggered.py +3895 -0
  31. diff_diff/staggered_aggregation.py +864 -0
  32. diff_diff/staggered_bootstrap.py +752 -0
  33. diff_diff/staggered_results.py +416 -0
  34. diff_diff/staggered_triple_diff.py +1545 -0
  35. diff_diff/staggered_triple_diff_results.py +416 -0
  36. diff_diff/sun_abraham.py +1685 -0
  37. diff_diff/survey.py +1981 -0
  38. diff_diff/synthetic_did.py +1136 -0
  39. diff_diff/triple_diff.py +2047 -0
  40. diff_diff/trop.py +952 -0
  41. diff_diff/trop_global.py +1270 -0
  42. diff_diff/trop_local.py +1307 -0
  43. diff_diff/trop_results.py +356 -0
  44. diff_diff/twfe.py +542 -0
  45. diff_diff/two_stage.py +1952 -0
  46. diff_diff/two_stage_bootstrap.py +520 -0
  47. diff_diff/two_stage_results.py +400 -0
  48. diff_diff/utils.py +1902 -0
  49. diff_diff/visualization/__init__.py +61 -0
  50. diff_diff/visualization/_common.py +328 -0
  51. diff_diff/visualization/_continuous.py +274 -0
  52. diff_diff/visualization/_diagnostic.py +817 -0
  53. diff_diff/visualization/_event_study.py +1086 -0
  54. diff_diff/visualization/_power.py +661 -0
  55. diff_diff/visualization/_staggered.py +833 -0
  56. diff_diff/visualization/_synthetic.py +197 -0
  57. diff_diff/wooldridge.py +1285 -0
  58. diff_diff/wooldridge_results.py +349 -0
  59. diff_diff-3.0.1.dist-info/METADATA +2997 -0
  60. diff_diff-3.0.1.dist-info/RECORD +62 -0
  61. diff_diff-3.0.1.dist-info/WHEEL +4 -0
  62. diff_diff-3.0.1.dist-info/sboms/diff_diff_rust.cyclonedx.json +5843 -0
@@ -0,0 +1,418 @@
1
+ """
2
+ Bootstrap inference methods for the Imputation DiD estimator.
3
+
4
+ This module contains ImputationDiDBootstrapMixin, which provides multiplier
5
+ bootstrap inference. Extracted from imputation.py for module size management.
6
+ """
7
+
8
+ import warnings
9
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple
10
+
11
+ import numpy as np
12
+ import pandas as pd
13
+
14
+ from diff_diff.bootstrap_utils import (
15
+ compute_effect_bootstrap_stats as _compute_effect_bootstrap_stats,
16
+ )
17
+ from diff_diff.bootstrap_utils import (
18
+ generate_bootstrap_weights_batch as _generate_bootstrap_weights_batch,
19
+ )
20
+ from diff_diff.bootstrap_utils import (
21
+ generate_survey_multiplier_weights_batch as _generate_survey_multiplier_weights_batch,
22
+ )
23
+ from diff_diff.imputation_results import ImputationBootstrapResults
24
+
25
+ __all__ = [
26
+ "ImputationDiDBootstrapMixin",
27
+ ]
28
+
29
+
30
+ def _compute_target_weights(
31
+ tau_hat: np.ndarray,
32
+ target_mask: np.ndarray,
33
+ ) -> "tuple[np.ndarray, int]":
34
+ """
35
+ Equal weights for finite tau_hat observations within target_mask.
36
+
37
+ Used by both aggregation and bootstrap paths to avoid weight logic
38
+ duplication.
39
+
40
+ Parameters
41
+ ----------
42
+ tau_hat : np.ndarray
43
+ Per-observation treatment effects (may contain NaN).
44
+ target_mask : np.ndarray
45
+ Boolean mask selecting the target subset within tau_hat.
46
+
47
+ Returns
48
+ -------
49
+ weights : np.ndarray
50
+ Weight array (same length as tau_hat). 1/n_valid for finite
51
+ observations in target_mask, 0 elsewhere.
52
+ n_valid : int
53
+ Number of finite observations in the target subset.
54
+ """
55
+ finite_target = np.isfinite(tau_hat) & target_mask
56
+ n_valid = int(finite_target.sum())
57
+ weights = np.zeros(len(tau_hat))
58
+ if n_valid > 0:
59
+ weights[np.where(finite_target)[0]] = 1.0 / n_valid
60
+ return weights, n_valid
61
+
62
+
63
+ class ImputationDiDBootstrapMixin:
64
+ """Mixin providing bootstrap inference methods for ImputationDiD."""
65
+
66
+ # Type hints for attributes accessed from the main class
67
+ n_bootstrap: int
68
+ bootstrap_weights: str
69
+ alpha: float
70
+ seed: Optional[int]
71
+ anticipation: int
72
+ horizon_max: Optional[int]
73
+
74
+ if TYPE_CHECKING:
75
+
76
+ def _compute_cluster_psi_sums(
77
+ self,
78
+ df: pd.DataFrame,
79
+ outcome: str,
80
+ unit: str,
81
+ time: str,
82
+ first_treat: str,
83
+ covariates: Optional[List[str]],
84
+ omega_0_mask: pd.Series,
85
+ omega_1_mask: pd.Series,
86
+ unit_fe: Dict[Any, float],
87
+ time_fe: Dict[Any, float],
88
+ grand_mean: float,
89
+ delta_hat: Optional[np.ndarray],
90
+ weights: np.ndarray,
91
+ cluster_var: str,
92
+ kept_cov_mask: Optional[np.ndarray] = None,
93
+ survey_weights_0: Optional[np.ndarray] = None,
94
+ ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: ...
95
+
96
+ @staticmethod
97
+ def _build_cohort_rel_times(
98
+ df: pd.DataFrame,
99
+ first_treat: str,
100
+ ) -> Dict[Any, Set[int]]: ...
101
+
102
+ @staticmethod
103
+ def _compute_balanced_cohort_mask(
104
+ df_treated: pd.DataFrame,
105
+ first_treat: str,
106
+ all_horizons: List[int],
107
+ balance_e: int,
108
+ cohort_rel_times: Dict[Any, Set[int]],
109
+ ) -> np.ndarray: ...
110
+
111
+ def _precompute_bootstrap_psi(
112
+ self,
113
+ df: pd.DataFrame,
114
+ outcome: str,
115
+ unit: str,
116
+ time: str,
117
+ first_treat: str,
118
+ covariates: Optional[List[str]],
119
+ omega_0_mask: pd.Series,
120
+ omega_1_mask: pd.Series,
121
+ unit_fe: Dict[Any, float],
122
+ time_fe: Dict[Any, float],
123
+ grand_mean: float,
124
+ delta_hat: Optional[np.ndarray],
125
+ cluster_var: str,
126
+ kept_cov_mask: Optional[np.ndarray],
127
+ overall_weights: np.ndarray,
128
+ event_study_effects: Optional[Dict[int, Dict[str, Any]]],
129
+ group_effects: Optional[Dict[Any, Dict[str, Any]]],
130
+ treatment_groups: List[Any],
131
+ tau_hat: np.ndarray,
132
+ balance_e: Optional[int],
133
+ survey_weights_0: Optional[np.ndarray] = None,
134
+ survey_weights_1: Optional[np.ndarray] = None,
135
+ ) -> Dict[str, Any]:
136
+ """
137
+ Pre-compute cluster-level influence function sums for each bootstrap target.
138
+
139
+ For each aggregation target (overall, per-horizon, per-group), computes
140
+ psi_i = sum_t v_it * epsilon_tilde_it for each cluster. The multiplier
141
+ bootstrap then perturbs these psi sums with multiplier weights
142
+ (rademacher/mammen/webb; configurable via ``bootstrap_weights``).
143
+
144
+ Computational cost scales with the number of aggregation targets, since
145
+ each target requires its own v_untreated computation (weight-dependent).
146
+ """
147
+ result: Dict[str, Any] = {}
148
+
149
+ common = dict(
150
+ df=df,
151
+ outcome=outcome,
152
+ unit=unit,
153
+ time=time,
154
+ first_treat=first_treat,
155
+ covariates=covariates,
156
+ omega_0_mask=omega_0_mask,
157
+ omega_1_mask=omega_1_mask,
158
+ unit_fe=unit_fe,
159
+ time_fe=time_fe,
160
+ grand_mean=grand_mean,
161
+ delta_hat=delta_hat,
162
+ cluster_var=cluster_var,
163
+ kept_cov_mask=kept_cov_mask,
164
+ survey_weights_0=survey_weights_0,
165
+ )
166
+
167
+ # Overall ATT
168
+ overall_psi, cluster_ids, _ = self._compute_cluster_psi_sums(
169
+ **common, weights=overall_weights
170
+ )
171
+ result["overall"] = (overall_psi, cluster_ids)
172
+
173
+ # Event study: per-horizon weights
174
+ if event_study_effects:
175
+ result["event_study"] = {}
176
+ df_1 = df.loc[omega_1_mask]
177
+ rel_times = df_1["_rel_time"].values
178
+
179
+ all_horizons = sorted(set(int(h) for h in rel_times if np.isfinite(h)))
180
+ if self.horizon_max is not None:
181
+ all_horizons = [h for h in all_horizons if abs(h) <= self.horizon_max]
182
+
183
+ # Balanced cohort mask (same logic as _aggregate_event_study)
184
+ balanced_mask = None
185
+ if balance_e is not None:
186
+ cohort_rel_times = self._build_cohort_rel_times(df, first_treat)
187
+ balanced_mask = self._compute_balanced_cohort_mask(
188
+ df_1, first_treat, all_horizons, balance_e, cohort_rel_times
189
+ )
190
+
191
+ ref_period = -1 - self.anticipation
192
+
193
+ for h in event_study_effects:
194
+ if event_study_effects[h].get("n_obs", 0) == 0:
195
+ continue
196
+ if h == ref_period:
197
+ continue
198
+ if not np.isfinite(event_study_effects[h].get("effect", np.nan)):
199
+ continue
200
+
201
+ # Skip pre-period horizons — their SEs come from Test 1
202
+ # lead regression, not bootstrap
203
+ if h < -self.anticipation:
204
+ continue
205
+
206
+ h_mask = rel_times == h
207
+ if balanced_mask is not None:
208
+ h_mask = h_mask & balanced_mask
209
+
210
+ # When survey weights are provided, build weights proportional
211
+ # to treated-observation survey weights (matching the analytical
212
+ # path in _aggregate_event_study). Otherwise use equal weights.
213
+ if survey_weights_1 is not None:
214
+ finite_target = np.isfinite(tau_hat) & h_mask
215
+ n_valid_h = int(finite_target.sum())
216
+ if n_valid_h == 0:
217
+ continue
218
+ treated_sw = survey_weights_1
219
+ sw_h = treated_sw[h_mask]
220
+ finite_in_h = np.isfinite(tau_hat[h_mask])
221
+ sw_finite = sw_h[finite_in_h]
222
+ weights_h = np.zeros(len(tau_hat))
223
+ if sw_finite.sum() > 0:
224
+ h_indices = np.where(h_mask)[0]
225
+ finite_indices = h_indices[finite_in_h]
226
+ weights_h[finite_indices] = sw_finite / sw_finite.sum()
227
+ else:
228
+ weights_h, n_valid_h = _compute_target_weights(tau_hat, h_mask)
229
+ if n_valid_h == 0:
230
+ continue
231
+
232
+ psi_h, _, _ = self._compute_cluster_psi_sums(**common, weights=weights_h)
233
+ result["event_study"][h] = psi_h
234
+
235
+ # Group effects: per-group weights
236
+ if group_effects:
237
+ result["group"] = {}
238
+ df_1 = df.loc[omega_1_mask]
239
+ cohorts = df_1[first_treat].values
240
+
241
+ for g in group_effects:
242
+ if group_effects[g].get("n_obs", 0) == 0:
243
+ continue
244
+ if not np.isfinite(group_effects[g].get("effect", np.nan)):
245
+ continue
246
+ g_mask = cohorts == g
247
+
248
+ # When survey weights are provided, build weights proportional
249
+ # to treated-observation survey weights (matching the analytical
250
+ # path in _aggregate_group). Otherwise use equal weights.
251
+ if survey_weights_1 is not None:
252
+ finite_target = np.isfinite(tau_hat) & g_mask
253
+ n_valid_g = int(finite_target.sum())
254
+ if n_valid_g == 0:
255
+ continue
256
+ treated_sw = survey_weights_1
257
+ sw_g = treated_sw[g_mask]
258
+ finite_in_g = np.isfinite(tau_hat[g_mask])
259
+ sw_finite = sw_g[finite_in_g]
260
+ weights_g = np.zeros(len(tau_hat))
261
+ if sw_finite.sum() > 0:
262
+ g_indices = np.where(g_mask)[0]
263
+ finite_indices = g_indices[finite_in_g]
264
+ weights_g[finite_indices] = sw_finite / sw_finite.sum()
265
+ else:
266
+ weights_g, n_valid_g = _compute_target_weights(tau_hat, g_mask)
267
+ if n_valid_g == 0:
268
+ continue
269
+
270
+ psi_g, _, _ = self._compute_cluster_psi_sums(**common, weights=weights_g)
271
+ result["group"][g] = psi_g
272
+
273
+ return result
274
+
275
+ def _run_bootstrap(
276
+ self,
277
+ original_att: float,
278
+ original_event_study: Optional[Dict[int, Dict[str, Any]]],
279
+ original_group: Optional[Dict[Any, Dict[str, Any]]],
280
+ psi_data: Dict[str, Any],
281
+ resolved_survey: Optional[Any] = None,
282
+ ) -> ImputationBootstrapResults:
283
+ """
284
+ Run multiplier bootstrap on pre-computed influence function sums.
285
+
286
+ Uses T_b = sum_i w_b_i * psi_i where w_b_i are multiplier weights
287
+ (rademacher/mammen/webb; configurable via ``bootstrap_weights``)
288
+ and psi_i are cluster-level influence function sums from Theorem 3.
289
+ SE = std(T_b, ddof=1).
290
+
291
+ When ``resolved_survey`` carries PSU/strata/FPC structure, weights are
292
+ generated via ``generate_survey_multiplier_weights_batch`` so the
293
+ bootstrap variance respects the survey design (stratification and FPC
294
+ scaling).
295
+ """
296
+ if self.n_bootstrap < 50:
297
+ warnings.warn(
298
+ f"n_bootstrap={self.n_bootstrap} is low. Consider n_bootstrap >= 199 "
299
+ "for reliable inference.",
300
+ UserWarning,
301
+ stacklevel=3,
302
+ )
303
+
304
+ rng = np.random.default_rng(self.seed)
305
+
306
+ overall_psi, cluster_ids = psi_data["overall"]
307
+ n_clusters = len(cluster_ids)
308
+
309
+ # Determine whether to use survey-aware bootstrap weights
310
+ _use_survey_bootstrap = resolved_survey is not None and (
311
+ resolved_survey.strata is not None
312
+ or resolved_survey.psu is not None
313
+ or resolved_survey.fpc is not None
314
+ )
315
+
316
+ # Generate ALL weights upfront: shape (n_bootstrap, n_clusters)
317
+ if _use_survey_bootstrap:
318
+ psu_weights, psu_ids = _generate_survey_multiplier_weights_batch(
319
+ self.n_bootstrap, resolved_survey, self.bootstrap_weights, rng
320
+ )
321
+ # Reindex PSU weights to match cluster_ids ordering.
322
+ # cluster_ids are unique PSU values from _compute_cluster_psi_sums;
323
+ # psu_ids are unique PSU values from the survey weight generator.
324
+ # Build a map from psu_id -> column index in psu_weights.
325
+ psu_id_to_col = {int(p): c for c, p in enumerate(psu_ids)}
326
+ cluster_to_psu_col = np.array([psu_id_to_col[int(cid)] for cid in cluster_ids])
327
+ all_weights = psu_weights[:, cluster_to_psu_col]
328
+ else:
329
+ all_weights = _generate_bootstrap_weights_batch(
330
+ self.n_bootstrap, n_clusters, self.bootstrap_weights, rng
331
+ )
332
+
333
+ # Overall ATT bootstrap draws
334
+ boot_overall = np.dot(all_weights, overall_psi) # (n_bootstrap,)
335
+
336
+ # Event study: loop over horizons
337
+ boot_event_study: Optional[Dict[int, np.ndarray]] = None
338
+ if original_event_study and "event_study" in psi_data:
339
+ boot_event_study = {}
340
+ for h, psi_h in psi_data["event_study"].items():
341
+ boot_event_study[h] = np.dot(all_weights, psi_h)
342
+
343
+ # Group effects: loop over groups
344
+ boot_group: Optional[Dict[Any, np.ndarray]] = None
345
+ if original_group and "group" in psi_data:
346
+ boot_group = {}
347
+ for g, psi_g in psi_data["group"].items():
348
+ boot_group[g] = np.dot(all_weights, psi_g)
349
+
350
+ # --- Inference (percentile bootstrap, matching CS/SA convention) ---
351
+ # Shift perturbation-centered draws to effect-centered draws.
352
+ # The multiplier bootstrap produces T_b = sum w_b_i * psi_i centered at 0.
353
+ # CS adds the original effect back (L411 of staggered_bootstrap.py).
354
+ # We do the same here so percentile CIs and empirical p-values work correctly.
355
+ boot_overall_shifted = boot_overall + original_att
356
+
357
+ overall_se, overall_ci, overall_p = _compute_effect_bootstrap_stats(
358
+ original_att,
359
+ boot_overall_shifted,
360
+ alpha=self.alpha,
361
+ context="ImputationDiD overall ATT",
362
+ )
363
+
364
+ event_study_ses = None
365
+ event_study_cis = None
366
+ event_study_p_values = None
367
+ if boot_event_study and original_event_study:
368
+ event_study_ses = {}
369
+ event_study_cis = {}
370
+ event_study_p_values = {}
371
+ for h in boot_event_study:
372
+ orig_eff = original_event_study[h]["effect"]
373
+ shifted_h = boot_event_study[h] + orig_eff
374
+ se_h, ci_h, p_h = _compute_effect_bootstrap_stats(
375
+ orig_eff,
376
+ shifted_h,
377
+ alpha=self.alpha,
378
+ context=f"ImputationDiD event study (h={h})",
379
+ )
380
+ event_study_ses[h] = se_h
381
+ event_study_cis[h] = ci_h
382
+ event_study_p_values[h] = p_h
383
+
384
+ group_ses = None
385
+ group_cis = None
386
+ group_p_values = None
387
+ if boot_group and original_group:
388
+ group_ses = {}
389
+ group_cis = {}
390
+ group_p_values = {}
391
+ for g in boot_group:
392
+ orig_eff = original_group[g]["effect"]
393
+ shifted_g = boot_group[g] + orig_eff
394
+ se_g, ci_g, p_g = _compute_effect_bootstrap_stats(
395
+ orig_eff,
396
+ shifted_g,
397
+ alpha=self.alpha,
398
+ context=f"ImputationDiD group effect (g={g})",
399
+ )
400
+ group_ses[g] = se_g
401
+ group_cis[g] = ci_g
402
+ group_p_values[g] = p_g
403
+
404
+ return ImputationBootstrapResults(
405
+ n_bootstrap=self.n_bootstrap,
406
+ weight_type=self.bootstrap_weights,
407
+ alpha=self.alpha,
408
+ overall_att_se=overall_se,
409
+ overall_att_ci=overall_ci,
410
+ overall_att_p_value=overall_p,
411
+ event_study_ses=event_study_ses,
412
+ event_study_cis=event_study_cis,
413
+ event_study_p_values=event_study_p_values,
414
+ group_ses=group_ses,
415
+ group_cis=group_cis,
416
+ group_p_values=group_p_values,
417
+ bootstrap_distribution=boot_overall_shifted,
418
+ )