diff-diff 3.0.1__cp314-cp314-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. diff_diff/__init__.py +382 -0
  2. diff_diff/_backend.py +134 -0
  3. diff_diff/_rust_backend.cp314-win_amd64.pyd +0 -0
  4. diff_diff/bacon.py +1140 -0
  5. diff_diff/bootstrap_utils.py +730 -0
  6. diff_diff/continuous_did.py +1626 -0
  7. diff_diff/continuous_did_bspline.py +190 -0
  8. diff_diff/continuous_did_results.py +374 -0
  9. diff_diff/datasets.py +815 -0
  10. diff_diff/diagnostics.py +882 -0
  11. diff_diff/efficient_did.py +1770 -0
  12. diff_diff/efficient_did_bootstrap.py +359 -0
  13. diff_diff/efficient_did_covariates.py +899 -0
  14. diff_diff/efficient_did_results.py +368 -0
  15. diff_diff/efficient_did_weights.py +617 -0
  16. diff_diff/estimators.py +1501 -0
  17. diff_diff/honest_did.py +2585 -0
  18. diff_diff/imputation.py +2458 -0
  19. diff_diff/imputation_bootstrap.py +418 -0
  20. diff_diff/imputation_results.py +448 -0
  21. diff_diff/linalg.py +2538 -0
  22. diff_diff/power.py +2588 -0
  23. diff_diff/practitioner.py +869 -0
  24. diff_diff/prep.py +1738 -0
  25. diff_diff/prep_dgp.py +1718 -0
  26. diff_diff/pretrends.py +1105 -0
  27. diff_diff/results.py +918 -0
  28. diff_diff/stacked_did.py +1049 -0
  29. diff_diff/stacked_did_results.py +339 -0
  30. diff_diff/staggered.py +3895 -0
  31. diff_diff/staggered_aggregation.py +864 -0
  32. diff_diff/staggered_bootstrap.py +752 -0
  33. diff_diff/staggered_results.py +416 -0
  34. diff_diff/staggered_triple_diff.py +1545 -0
  35. diff_diff/staggered_triple_diff_results.py +416 -0
  36. diff_diff/sun_abraham.py +1685 -0
  37. diff_diff/survey.py +1981 -0
  38. diff_diff/synthetic_did.py +1136 -0
  39. diff_diff/triple_diff.py +2047 -0
  40. diff_diff/trop.py +952 -0
  41. diff_diff/trop_global.py +1270 -0
  42. diff_diff/trop_local.py +1307 -0
  43. diff_diff/trop_results.py +356 -0
  44. diff_diff/twfe.py +542 -0
  45. diff_diff/two_stage.py +1952 -0
  46. diff_diff/two_stage_bootstrap.py +520 -0
  47. diff_diff/two_stage_results.py +400 -0
  48. diff_diff/utils.py +1902 -0
  49. diff_diff/visualization/__init__.py +61 -0
  50. diff_diff/visualization/_common.py +328 -0
  51. diff_diff/visualization/_continuous.py +274 -0
  52. diff_diff/visualization/_diagnostic.py +817 -0
  53. diff_diff/visualization/_event_study.py +1086 -0
  54. diff_diff/visualization/_power.py +661 -0
  55. diff_diff/visualization/_staggered.py +833 -0
  56. diff_diff/visualization/_synthetic.py +197 -0
  57. diff_diff/wooldridge.py +1285 -0
  58. diff_diff/wooldridge_results.py +349 -0
  59. diff_diff-3.0.1.dist-info/METADATA +2997 -0
  60. diff_diff-3.0.1.dist-info/RECORD +62 -0
  61. diff_diff-3.0.1.dist-info/WHEEL +4 -0
  62. diff_diff-3.0.1.dist-info/sboms/diff_diff_rust.cyclonedx.json +5843 -0
@@ -0,0 +1,1049 @@
1
+ """
2
+ Wing, Freedman & Hollingsworth (2024) Stacked Difference-in-Differences Estimator.
3
+
4
+ Implements the stacked DiD estimator from Wing, Freedman & Hollingsworth (2024),
5
+ NBER Working Paper 32054. The key contribution: naive stacked DiD regressions are
6
+ biased because they implicitly weight treatment and control group trends differently
7
+ across sub-experiments. The authors derive corrective Q-weights that make a weighted
8
+ stacked regression identify the "trimmed aggregate ATT" — a well-defined convex
9
+ combination of group-time ATTs with stable composition across event time.
10
+
11
+ The implementation follows the R reference code at
12
+ https://github.com/hollina/stacked-did-weights.
13
+
14
+ References
15
+ ----------
16
+ Wing, C., Freedman, S. M., & Hollingsworth, A. (2024). Stacked
17
+ Difference-in-Differences. NBER Working Paper 32054.
18
+ """
19
+
20
+ import copy
21
+ import warnings
22
+ from typing import Any, Dict, List, Optional, Tuple
23
+
24
+ import numpy as np
25
+ import pandas as pd
26
+
27
+ from diff_diff.linalg import solve_ols
28
+ from diff_diff.stacked_did_results import StackedDiDResults # noqa: F401 (re-export)
29
+ from diff_diff.utils import safe_inference
30
+
31
+ __all__ = [
32
+ "StackedDiD",
33
+ "StackedDiDResults",
34
+ "stacked_did",
35
+ ]
36
+
37
+
38
+ class StackedDiD:
39
+ """
40
+ Stacked Difference-in-Differences estimator.
41
+
42
+ Implements Wing, Freedman & Hollingsworth (2024). Builds a stacked
43
+ dataset of sub-experiments (one per adoption cohort), applies
44
+ corrective Q-weights to address implicit weighting bias in naive
45
+ stacked regressions, and runs a weighted event-study regression.
46
+
47
+ Parameters
48
+ ----------
49
+ kappa_pre : int, default=1
50
+ Number of pre-treatment event-time periods in the event window.
51
+ The event window spans [-kappa_pre, ..., kappa_post].
52
+ kappa_post : int, default=1
53
+ Number of post-treatment event-time periods.
54
+ weighting : str, default="aggregate"
55
+ Target estimand weighting scheme per Table 1 of the paper:
56
+ - "aggregate": Equal weight per adoption event (trimmed aggregate ATT)
57
+ - "population": Weight by population size of treated cohort
58
+ - "sample_share": Weight by sample share of each sub-experiment
59
+ clean_control : str, default="not_yet_treated"
60
+ How to define clean controls per Appendix A of the paper:
61
+ - "not_yet_treated": Units with A_s > a + kappa_post
62
+ - "strict": Units with A_s > a + kappa_post + kappa_pre
63
+ - "never_treated": Only units with A_s = infinity
64
+ cluster : str, default="unit"
65
+ Clustering level for standard errors:
66
+ - "unit": Cluster on original unit identifier
67
+ - "unit_subexp": Cluster on (unit, sub_experiment) pairs
68
+ alpha : float, default=0.05
69
+ Significance level for confidence intervals.
70
+ anticipation : int, default=0
71
+ Number of anticipation periods. When anticipation > 0:
72
+ - Reference period shifts from e=-1 to e=-1-anticipation
73
+ - Post-treatment includes anticipation periods (e >= -anticipation)
74
+ - Event window expands by anticipation pre-periods
75
+ Consistent with ImputationDiD, TwoStageDiD, SunAbraham.
76
+ rank_deficient_action : str, default="warn"
77
+ Action when design matrix is rank-deficient:
78
+ - "warn": Issue warning and drop linearly dependent columns
79
+ - "error": Raise ValueError
80
+ - "silent": Drop columns silently
81
+
82
+ Attributes
83
+ ----------
84
+ results_ : StackedDiDResults
85
+ Estimation results after calling fit().
86
+ is_fitted_ : bool
87
+ Whether the model has been fitted.
88
+
89
+ Examples
90
+ --------
91
+ Basic usage:
92
+
93
+ >>> from diff_diff import StackedDiD, generate_staggered_data
94
+ >>> data = generate_staggered_data(n_units=200, seed=42)
95
+ >>> est = StackedDiD(kappa_pre=2, kappa_post=2)
96
+ >>> results = est.fit(data, outcome='outcome', unit='unit',
97
+ ... time='period', first_treat='first_treat')
98
+ >>> results.print_summary()
99
+
100
+ With event study:
101
+
102
+ >>> results = est.fit(data, outcome='outcome', unit='unit',
103
+ ... time='period', first_treat='first_treat',
104
+ ... aggregate='event_study')
105
+ >>> from diff_diff import plot_event_study
106
+ >>> plot_event_study(results)
107
+
108
+ Notes
109
+ -----
110
+ The stacked estimator addresses TWFE bias by:
111
+ 1. Creating one sub-experiment per adoption cohort with clean controls
112
+ 2. Applying Q-weights to reweight the stacked regression
113
+ 3. Running a single event-study WLS regression on the weighted stack
114
+
115
+ References
116
+ ----------
117
+ Wing, C., Freedman, S. M., & Hollingsworth, A. (2024). Stacked
118
+ Difference-in-Differences. NBER Working Paper 32054.
119
+ """
120
+
121
+ def __init__(
122
+ self,
123
+ kappa_pre: int = 1,
124
+ kappa_post: int = 1,
125
+ weighting: str = "aggregate",
126
+ clean_control: str = "not_yet_treated",
127
+ cluster: str = "unit",
128
+ alpha: float = 0.05,
129
+ anticipation: int = 0,
130
+ rank_deficient_action: str = "warn",
131
+ ):
132
+ if weighting not in ("aggregate", "population", "sample_share"):
133
+ raise ValueError(
134
+ f"weighting must be 'aggregate', 'population', or 'sample_share', "
135
+ f"got '{weighting}'"
136
+ )
137
+ if clean_control not in ("not_yet_treated", "strict", "never_treated"):
138
+ raise ValueError(
139
+ f"clean_control must be 'not_yet_treated', 'strict', or "
140
+ f"'never_treated', got '{clean_control}'"
141
+ )
142
+ if cluster not in ("unit", "unit_subexp"):
143
+ raise ValueError(f"cluster must be 'unit' or 'unit_subexp', got '{cluster}'")
144
+ if rank_deficient_action not in ("warn", "error", "silent"):
145
+ raise ValueError(
146
+ f"rank_deficient_action must be 'warn', 'error', or 'silent', "
147
+ f"got '{rank_deficient_action}'"
148
+ )
149
+
150
+ self.kappa_pre = kappa_pre
151
+ self.kappa_post = kappa_post
152
+ self.weighting = weighting
153
+ self.clean_control = clean_control
154
+ self.cluster = cluster
155
+ self.alpha = alpha
156
+ self.anticipation = anticipation
157
+ self.rank_deficient_action = rank_deficient_action
158
+
159
+ self.is_fitted_ = False
160
+ self.results_: Optional[StackedDiDResults] = None
161
+
162
+ def fit(
163
+ self,
164
+ data: pd.DataFrame,
165
+ outcome: str,
166
+ unit: str,
167
+ time: str,
168
+ first_treat: str,
169
+ aggregate: Optional[str] = None,
170
+ population: Optional[str] = None,
171
+ survey_design=None,
172
+ ) -> StackedDiDResults:
173
+ """
174
+ Fit the stacked DiD estimator.
175
+
176
+ Parameters
177
+ ----------
178
+ data : pd.DataFrame
179
+ Panel data with unit and time identifiers.
180
+ outcome : str
181
+ Name of outcome variable column.
182
+ unit : str
183
+ Name of unit identifier column.
184
+ time : str
185
+ Name of time period column.
186
+ first_treat : str
187
+ Name of column indicating when unit was first treated.
188
+ Use 0 or np.inf for never-treated units.
189
+ aggregate : str, optional
190
+ Aggregation mode: None/"simple" (overall ATT only) or
191
+ "event_study". Group aggregation is not supported because
192
+ the pooled stacked regression cannot produce cohort-specific
193
+ effects. Use CallawaySantAnna or ImputationDiD for
194
+ cohort-level estimates.
195
+ population : str, optional
196
+ Column name for population weights. Required only when
197
+ weighting="population".
198
+ survey_design : SurveyDesign, optional
199
+ Survey design specification for design-based inference. When
200
+ provided, uses Taylor Series Linearization for variance
201
+ estimation and applies sampling weights to the regression.
202
+
203
+ Returns
204
+ -------
205
+ StackedDiDResults
206
+ Object containing all estimation results.
207
+
208
+ Raises
209
+ ------
210
+ ValueError
211
+ If required columns are missing or data validation fails.
212
+ """
213
+ # ---- Validate inputs ----
214
+ if aggregate in ("group", "all"):
215
+ raise ValueError(
216
+ f"aggregate='{aggregate}' is not supported by StackedDiD. "
217
+ "The pooled stacked regression cannot produce cohort-specific "
218
+ "effects. Use CallawaySantAnna or ImputationDiD for "
219
+ "cohort-level estimates."
220
+ )
221
+ if aggregate not in (None, "simple", "event_study"):
222
+ raise ValueError(
223
+ f"aggregate must be None, 'simple', or 'event_study', " f"got '{aggregate}'"
224
+ )
225
+
226
+ required_cols = [outcome, unit, time, first_treat]
227
+ if population is not None:
228
+ required_cols.append(population)
229
+ missing = [c for c in required_cols if c not in data.columns]
230
+ if missing:
231
+ raise ValueError(f"Missing columns: {missing}")
232
+
233
+ if self.weighting == "population" and population is None:
234
+ raise ValueError("population column must be specified when weighting='population'")
235
+
236
+ # ---- Resolve survey design ----
237
+ from diff_diff.survey import (
238
+ SurveyDesign,
239
+ _resolve_survey_for_fit,
240
+ )
241
+
242
+ resolved_survey, survey_weights, survey_weight_type, survey_metadata = (
243
+ _resolve_survey_for_fit(survey_design, data, "analytical")
244
+ )
245
+ _uses_replicate_sd = (
246
+ resolved_survey is not None and resolved_survey.uses_replicate_variance
247
+ )
248
+
249
+ # Reject fweight and aweight — Q-weight composition is ratio-valued
250
+ # and breaks both frequency-weight (integer) and analytic-weight
251
+ # (inverse-variance) semantics after multiplicative composition
252
+ if (
253
+ survey_design is not None
254
+ and hasattr(survey_design, "weight_type")
255
+ and survey_design.weight_type in ("fweight", "aweight")
256
+ ):
257
+ raise ValueError(
258
+ f"StackedDiD does not support weight_type='{survey_design.weight_type}' "
259
+ "because Q-weight composition changes the weight semantics. "
260
+ "Use weight_type='pweight' (default) instead."
261
+ )
262
+
263
+ # Collect survey design column names for propagation through sub-experiments
264
+ survey_cols: List[str] = []
265
+ if survey_design is not None and isinstance(survey_design, SurveyDesign):
266
+ for attr in ("weights", "strata", "psu", "fpc"):
267
+ col_name = getattr(survey_design, attr, None)
268
+ if col_name is not None:
269
+ survey_cols.append(col_name)
270
+ # Propagate replicate weight columns through stacked dataset
271
+ if survey_design.replicate_weights is not None:
272
+ survey_cols.extend(survey_design.replicate_weights)
273
+
274
+ df = data.copy()
275
+ df[time] = pd.to_numeric(df[time])
276
+ df[first_treat] = pd.to_numeric(df[first_treat])
277
+
278
+ # ---- Data setup ----
279
+ # Handle never-treated encoding: both 0 and inf -> inf
280
+ df[first_treat] = df[first_treat].replace(0, np.inf)
281
+
282
+ # Build unit_info: one row per unit
283
+ unit_info = (
284
+ df.groupby(unit)
285
+ .agg({first_treat: "first"})
286
+ .reset_index()
287
+ .rename(columns={first_treat: "_first_treat"})
288
+ )
289
+
290
+ T_min = int(df[time].min())
291
+ T_max = int(df[time].max())
292
+ time_periods = sorted(df[time].unique())
293
+
294
+ # Extract unique adoption events (finite first_treat values)
295
+ omega_A = sorted([a for a in unit_info["_first_treat"].unique() if np.isfinite(a)])
296
+
297
+ if len(omega_A) == 0:
298
+ raise ValueError(
299
+ "No treated units found. Check 'first_treat' column "
300
+ "(use 0 or np.inf for never-treated units)."
301
+ )
302
+
303
+ # ---- Trim adoption events (IC1 + IC2) ----
304
+ omega_kappa, trimmed = self._trim_adoption_events(omega_A, T_min, T_max, unit_info)
305
+
306
+ # ---- Build stacked dataset ----
307
+ sub_experiments = []
308
+ skipped_events = []
309
+ for a in omega_kappa:
310
+ sub_exp = self._build_sub_experiment(
311
+ df,
312
+ unit_info,
313
+ a,
314
+ unit,
315
+ time,
316
+ first_treat,
317
+ outcome,
318
+ extra_cols=survey_cols,
319
+ )
320
+ if sub_exp is not None and len(sub_exp) > 0:
321
+ sub_experiments.append(sub_exp)
322
+ else:
323
+ skipped_events.append(a)
324
+
325
+ if skipped_events:
326
+ warnings.warn(
327
+ f"Sub-experiments for events {skipped_events} were empty " f"after filtering.",
328
+ UserWarning,
329
+ stacklevel=2,
330
+ )
331
+
332
+ if len(sub_experiments) == 0:
333
+ raise ValueError(
334
+ "All sub-experiments are empty after filtering. "
335
+ "Check your data or reduce kappa values."
336
+ )
337
+
338
+ stacked_df = pd.concat(sub_experiments, ignore_index=True)
339
+
340
+ # ---- Compute Q-weights ----
341
+ stacked_df = self._compute_q_weights(stacked_df, unit, population)
342
+
343
+ # ---- Count units ----
344
+ treated_units = stacked_df.loc[stacked_df["_D_sa"] == 1, unit].unique()
345
+ control_units = stacked_df.loc[stacked_df["_D_sa"] == 0, unit].unique()
346
+ n_treated_units = len(treated_units)
347
+ n_control_units = len(control_units)
348
+
349
+ # ---- Build design matrix and run WLS ----
350
+ # Always run event study regression (Equation 3 in paper)
351
+ # Reference period: e = -1 - anticipation (shifts when anticipation > 0)
352
+ ref_period = -1 - self.anticipation
353
+ event_times = sorted(
354
+ [
355
+ h
356
+ for h in range(-self.kappa_pre - self.anticipation, self.kappa_post + 1)
357
+ if h != ref_period
358
+ ]
359
+ )
360
+
361
+ n = len(stacked_df)
362
+ n_event_dummies = len(event_times)
363
+
364
+ # Track column indices for VCV extraction
365
+ # [0] intercept, [1] D_sa, [2..K+1] event-time dummies,
366
+ # [K+2..2K+1] D_sa * event-time interactions
367
+ interaction_indices: Dict[int, int] = {}
368
+
369
+ # Build design matrix
370
+ X = np.zeros((n, 2 + 2 * n_event_dummies))
371
+ X[:, 0] = 1.0 # intercept
372
+ X[:, 1] = stacked_df["_D_sa"].values # treatment indicator
373
+
374
+ et_vals = stacked_df["_event_time"].values
375
+ d_vals = stacked_df["_D_sa"].values
376
+
377
+ for j, h in enumerate(event_times):
378
+ col_lambda = 2 + j # event-time dummy
379
+ col_delta = 2 + n_event_dummies + j # interaction
380
+ mask = et_vals == h
381
+ X[mask, col_lambda] = 1.0
382
+ X[mask, col_delta] = d_vals[mask]
383
+ interaction_indices[h] = col_delta
384
+
385
+ # WLS via sqrt(w) transformation
386
+ Q_weights = stacked_df["_Q_weight"].values
387
+ n_stacked = len(stacked_df)
388
+
389
+ # Compose Q-weights with survey weights if survey design is present
390
+ if resolved_survey is not None and survey_weights is not None:
391
+ # Survey weights were resolved on the original data; the stacked
392
+ # dataset carries the survey weight column through _build_sub_experiment.
393
+ # Re-extract from the stacked data so lengths match.
394
+ survey_weights_stacked = (
395
+ stacked_df[survey_design.weights].values.astype(np.float64)
396
+ if survey_design.weights is not None
397
+ else np.ones(n_stacked, dtype=np.float64)
398
+ )
399
+ composed_weights = Q_weights * survey_weights_stacked
400
+ # Normalize composed weights to sum = n_stacked
401
+ composed_weights = composed_weights * (n_stacked / np.sum(composed_weights))
402
+ else:
403
+ composed_weights = Q_weights
404
+
405
+ sqrt_w = np.sqrt(composed_weights)
406
+ Y = stacked_df[outcome].values
407
+ Y_t = Y * sqrt_w
408
+ X_t = X * sqrt_w[:, np.newaxis]
409
+
410
+ # Cluster IDs
411
+ if self.cluster == "unit":
412
+ cluster_ids = stacked_df[unit].values
413
+ else: # unit_subexp
414
+ cluster_ids = (
415
+ stacked_df[unit].astype(str) + "_" + stacked_df["_sub_exp"].astype(str)
416
+ ).values
417
+
418
+ # Run OLS on transformed data (= WLS)
419
+ coef, residuals, vcov = solve_ols(
420
+ X_t,
421
+ Y_t,
422
+ cluster_ids=cluster_ids,
423
+ return_vcov=True,
424
+ rank_deficient_action=self.rank_deficient_action,
425
+ )
426
+ assert vcov is not None
427
+
428
+ # ---- Survey VCV override ----
429
+ _n_valid_rep_sd = None
430
+ resolved_stacked = None
431
+ if resolved_survey is not None and _uses_replicate_sd:
432
+ # Replicate variance: re-run WLS per replicate with composed weights
433
+ from diff_diff.survey import compute_replicate_refit_variance, compute_survey_metadata
434
+
435
+ resolved_stacked = survey_design.resolve(stacked_df)
436
+
437
+ # Refit closure: compose Q-weights with replicate survey weights
438
+ def _refit_stacked(w_r):
439
+ composed_r = Q_weights * w_r
440
+ w_sum = np.sum(composed_r)
441
+ if w_sum > 0:
442
+ composed_r = composed_r * (n_stacked / w_sum)
443
+ sqrt_w_r = np.sqrt(composed_r)
444
+ coef_r, _, _ = solve_ols(
445
+ X * sqrt_w_r[:, np.newaxis], Y * sqrt_w_r,
446
+ cluster_ids=cluster_ids,
447
+ rank_deficient_action="silent", return_vcov=False,
448
+ )
449
+ return coef_r
450
+
451
+ # Full-sample cohort effect vector
452
+ vcov, _n_valid_rep_sd = compute_replicate_refit_variance(
453
+ _refit_stacked, coef, resolved_stacked
454
+ )
455
+
456
+ # Compute survey metadata
457
+ raw_w_stacked = (
458
+ stacked_df[survey_design.weights].values.astype(np.float64)
459
+ if survey_design.weights is not None
460
+ else np.ones(n_stacked, dtype=np.float64)
461
+ )
462
+ survey_metadata = compute_survey_metadata(resolved_stacked, raw_w_stacked)
463
+ elif resolved_survey is not None:
464
+ from diff_diff.survey import (
465
+ _inject_cluster_as_psu,
466
+ _resolve_effective_cluster,
467
+ compute_survey_metadata,
468
+ compute_survey_vcov,
469
+ )
470
+
471
+ # Re-resolve survey design on the stacked data so that strata/PSU
472
+ # arrays have the correct length for TSL variance estimation.
473
+ resolved_stacked = survey_design.resolve(stacked_df)
474
+
475
+ # Create a copy with composed weights (normalized to sum=n_stacked)
476
+ resolved_composed = copy.copy(resolved_stacked)
477
+ resolved_composed.weights = composed_weights
478
+
479
+ # Original-scale residuals for TSL variance
480
+ resid_orig = Y - X @ coef
481
+
482
+ # Inject cluster as PSU when survey design has no explicit PSU
483
+ resolved_composed = _inject_cluster_as_psu(resolved_composed, cluster_ids)
484
+
485
+ # Resolve effective cluster (PSU overrides user-specified cluster)
486
+ _resolve_effective_cluster(resolved_composed, cluster_ids, self.cluster)
487
+
488
+ # Compute TSL variance
489
+ vcov = compute_survey_vcov(X, resid_orig, resolved_composed)
490
+
491
+ # Recompute survey metadata on the stacked resolved design
492
+ raw_w_stacked = (
493
+ stacked_df[survey_design.weights].values.astype(np.float64)
494
+ if survey_design.weights is not None
495
+ else np.ones(n_stacked, dtype=np.float64)
496
+ )
497
+ survey_metadata = compute_survey_metadata(resolved_composed, raw_w_stacked)
498
+
499
+ # ---- Extract event study effects ----
500
+ event_study_effects: Optional[Dict[int, Dict[str, Any]]] = None
501
+ if aggregate == "event_study":
502
+ event_study_effects = {}
503
+ # Reference period (e = -1 - anticipation)
504
+ event_study_effects[ref_period] = {
505
+ "effect": 0.0,
506
+ "se": 0.0,
507
+ "t_stat": np.nan,
508
+ "p_value": np.nan,
509
+ "conf_int": (np.nan, np.nan),
510
+ "n_obs": 0,
511
+ }
512
+ for h in event_times:
513
+ idx = interaction_indices[h]
514
+ effect = float(coef[idx])
515
+ se = float(np.sqrt(max(vcov[idx, idx], 0.0)))
516
+ _survey_df = (
517
+ max(survey_metadata.df_survey, 1)
518
+ if survey_metadata is not None and survey_metadata.df_survey is not None
519
+ else (0 if _uses_replicate_sd else None)
520
+ )
521
+ # Override df when replicate replicates were dropped
522
+ if _n_valid_rep_sd is not None and resolved_stacked is not None:
523
+ if _n_valid_rep_sd < resolved_stacked.n_replicates:
524
+ _survey_df = _n_valid_rep_sd - 1 if _n_valid_rep_sd > 1 else 0
525
+ if survey_metadata is not None:
526
+ survey_metadata.df_survey = _survey_df if _survey_df > 0 else None
527
+ t_stat, p_value, conf_int = safe_inference(
528
+ effect, se, alpha=self.alpha, df=_survey_df
529
+ )
530
+ n_obs_h = int(np.sum((et_vals == h) & (d_vals == 1)))
531
+ event_study_effects[h] = {
532
+ "effect": effect,
533
+ "se": se,
534
+ "t_stat": t_stat,
535
+ "p_value": p_value,
536
+ "conf_int": conf_int,
537
+ "n_obs": n_obs_h,
538
+ }
539
+
540
+ # ---- Compute overall ATT ----
541
+ # Average of post-treatment delta_h coefficients with delta-method SE
542
+ # Post-treatment includes anticipation periods (h >= -anticipation)
543
+ post_event_times = [
544
+ h for h in event_times if h >= -self.anticipation and h in interaction_indices
545
+ ]
546
+ post_indices = [interaction_indices[h] for h in post_event_times]
547
+ K = len(post_indices)
548
+
549
+ if K > 0:
550
+ overall_att = sum(float(coef[i]) for i in post_indices) / K
551
+ # Delta method: gradient = 1/K for each post-period coefficient
552
+ sub_vcv = vcov[np.ix_(post_indices, post_indices)]
553
+ ones = np.ones(K)
554
+ overall_se = float(np.sqrt(max(ones @ sub_vcv @ ones, 0.0))) / K
555
+ else:
556
+ overall_att = np.nan
557
+ overall_se = np.nan
558
+
559
+ _survey_df_overall = (
560
+ max(survey_metadata.df_survey, 1)
561
+ if survey_metadata is not None and survey_metadata.df_survey is not None
562
+ else (0 if _uses_replicate_sd else None)
563
+ )
564
+ if _n_valid_rep_sd is not None and resolved_stacked is not None:
565
+ if _n_valid_rep_sd < resolved_stacked.n_replicates:
566
+ _survey_df_overall = _n_valid_rep_sd - 1 if _n_valid_rep_sd > 1 else 0
567
+ if survey_metadata is not None:
568
+ survey_metadata.df_survey = _survey_df_overall if _survey_df_overall > 0 else None
569
+ overall_t, overall_p, overall_ci = safe_inference(
570
+ overall_att, overall_se, alpha=self.alpha, df=_survey_df_overall
571
+ )
572
+
573
+ # ---- Construct results ----
574
+ self.results_ = StackedDiDResults(
575
+ overall_att=overall_att,
576
+ overall_se=overall_se,
577
+ overall_t_stat=overall_t,
578
+ overall_p_value=overall_p,
579
+ overall_conf_int=overall_ci,
580
+ event_study_effects=event_study_effects,
581
+ group_effects=None,
582
+ stacked_data=stacked_df,
583
+ groups=list(omega_kappa),
584
+ trimmed_groups=list(trimmed),
585
+ time_periods=time_periods,
586
+ n_obs=len(data),
587
+ n_stacked_obs=n,
588
+ n_sub_experiments=len(sub_experiments),
589
+ n_treated_units=n_treated_units,
590
+ n_control_units=n_control_units,
591
+ kappa_pre=self.kappa_pre,
592
+ kappa_post=self.kappa_post,
593
+ weighting=self.weighting,
594
+ clean_control=self.clean_control,
595
+ alpha=self.alpha,
596
+ survey_metadata=survey_metadata,
597
+ )
598
+
599
+ self.is_fitted_ = True
600
+ return self.results_
601
+
602
+ # =========================================================================
603
+ # Trimming (IC1 + IC2)
604
+ # =========================================================================
605
+
606
+ def _trim_adoption_events(
607
+ self,
608
+ adoption_events: List[Any],
609
+ T_min: int,
610
+ T_max: int,
611
+ unit_info: pd.DataFrame,
612
+ ) -> Tuple[List[Any], List[Any]]:
613
+ """
614
+ Trim adoption events based on IC1 (window) and IC2 (controls).
615
+
616
+ IC1: a - kappa_pre >= T_min AND a + kappa_post <= T_max
617
+ (matches R reference: focalAdoptionTime - kappa_pre >= minTime
618
+ AND focalAdoptionTime + kappa_post <= maxTime)
619
+ With anticipation: a - kappa_pre - anticipation >= T_min
620
+
621
+ IC2: Clean controls exist for this adoption event.
622
+
623
+ Parameters
624
+ ----------
625
+ adoption_events : list
626
+ Unique finite adoption event times.
627
+ T_min, T_max : int
628
+ Min and max time periods in the data.
629
+ unit_info : pd.DataFrame
630
+ One row per unit with _first_treat column.
631
+
632
+ Returns
633
+ -------
634
+ omega_kappa : list
635
+ Included adoption events.
636
+ trimmed : list
637
+ Excluded adoption events.
638
+ """
639
+ omega_kappa = []
640
+ trimmed = []
641
+
642
+ for a in adoption_events:
643
+ a_int = int(a)
644
+
645
+ # IC1: Event window fits in data
646
+ # a - kappa_pre >= T_min AND a + kappa_post <= T_max
647
+ # (matches R reference: focalAdoptionTime - kappa_pre >= minTime)
648
+ # With anticipation: shift window start earlier
649
+ lower_ok = (a_int - self.kappa_pre - self.anticipation) >= T_min
650
+ upper_ok = (a_int + self.kappa_post) <= T_max
651
+ ic1 = lower_ok and upper_ok
652
+
653
+ # IC2: Clean controls exist
654
+ ic2 = self._check_clean_controls_exist(a_int, unit_info)
655
+
656
+ if ic1 and ic2:
657
+ omega_kappa.append(a)
658
+ else:
659
+ trimmed.append(a)
660
+
661
+ if trimmed:
662
+ warnings.warn(
663
+ f"Trimmed {len(trimmed)} adoption event(s) that don't satisfy "
664
+ f"inclusion criteria: {trimmed}. "
665
+ f"IC1 requires event window [{-self.kappa_pre}, {self.kappa_post}] "
666
+ f"to fit within data range [{T_min}, {T_max}]. "
667
+ f"IC2 requires clean controls to exist.",
668
+ UserWarning,
669
+ stacklevel=3,
670
+ )
671
+
672
+ if len(omega_kappa) == 0:
673
+ raise ValueError(
674
+ f"All {len(adoption_events)} adoption events were trimmed. "
675
+ f"No valid sub-experiments can be constructed. "
676
+ f"Consider reducing kappa_pre (currently {self.kappa_pre}) "
677
+ f"or kappa_post (currently {self.kappa_post}), or check that "
678
+ f"clean control units exist."
679
+ )
680
+
681
+ return omega_kappa, trimmed
682
+
683
+ def _check_clean_controls_exist(self, a: int, unit_info: pd.DataFrame) -> bool:
684
+ """Check IC2: whether clean control units exist for adoption event a."""
685
+ ft = unit_info["_first_treat"].values
686
+ if self.clean_control == "not_yet_treated":
687
+ return bool(np.any(ft > a + self.kappa_post))
688
+ elif self.clean_control == "strict":
689
+ return bool(np.any(ft > a + self.kappa_post + self.kappa_pre))
690
+ else: # never_treated
691
+ return bool(np.any(np.isinf(ft)))
692
+
693
+ # =========================================================================
694
+ # Sub-experiment construction
695
+ # =========================================================================
696
+
697
+ def _build_sub_experiment(
698
+ self,
699
+ df: pd.DataFrame,
700
+ unit_info: pd.DataFrame,
701
+ a: Any,
702
+ unit: str,
703
+ time: str,
704
+ first_treat: str,
705
+ outcome: str,
706
+ extra_cols: Optional[List[str]] = None,
707
+ ) -> Optional[pd.DataFrame]:
708
+ """
709
+ Build a single sub-experiment for adoption event a.
710
+
711
+ Parameters
712
+ ----------
713
+ df : pd.DataFrame
714
+ Full panel data.
715
+ unit_info : pd.DataFrame
716
+ One row per unit with _first_treat.
717
+ a : int/float
718
+ Adoption event time.
719
+ unit, time, first_treat, outcome : str
720
+ Column names.
721
+ extra_cols : list of str, optional
722
+ Additional columns to propagate from the source data into the
723
+ sub-experiment (e.g., survey design columns: weights, strata,
724
+ psu, fpc).
725
+
726
+ Returns
727
+ -------
728
+ pd.DataFrame or None
729
+ Sub-experiment data with _sub_exp, _event_time, _D_sa columns.
730
+ """
731
+ a_int = int(a)
732
+ ft = unit_info["_first_treat"].values
733
+ unit_ids = unit_info[unit].values
734
+
735
+ # Treated units: A_s = a
736
+ treated_mask = ft == a
737
+ treated_units = set(unit_ids[treated_mask])
738
+
739
+ # Clean control units
740
+ if self.clean_control == "not_yet_treated":
741
+ control_mask = ft > a_int + self.kappa_post
742
+ elif self.clean_control == "strict":
743
+ control_mask = ft > a_int + self.kappa_post + self.kappa_pre
744
+ else: # never_treated
745
+ control_mask = np.isinf(ft)
746
+ control_units = set(unit_ids[control_mask])
747
+
748
+ if len(treated_units) == 0 or len(control_units) == 0:
749
+ return None
750
+
751
+ # Time window: [a - kappa_pre - anticipation, a + kappa_post]
752
+ # Reference period a-1 (event time e=-1) is included when kappa_pre >= 1
753
+ # Matches R reference: (focalAdoptionTime - kappa_pre):(focalAdoptionTime + kappa_post)
754
+ t_start = a_int - self.kappa_pre - self.anticipation
755
+ t_end = a_int + self.kappa_post
756
+
757
+ all_units = treated_units | control_units
758
+
759
+ # Filter data
760
+ mask = df[unit].isin(all_units) & (df[time] >= t_start) & (df[time] <= t_end)
761
+ sub_df = df.loc[mask].copy()
762
+
763
+ if len(sub_df) == 0:
764
+ return None
765
+
766
+ # Add sub-experiment columns
767
+ sub_df["_sub_exp"] = a
768
+ sub_df["_event_time"] = sub_df[time] - a_int
769
+ sub_df["_D_sa"] = sub_df[unit].isin(treated_units).astype(int)
770
+
771
+ return sub_df
772
+
773
+ # =========================================================================
774
+ # Q-weight computation
775
+ # =========================================================================
776
+
777
+ def _compute_q_weights(
778
+ self,
779
+ stacked_df: pd.DataFrame,
780
+ unit_col: str,
781
+ population_col: Optional[str],
782
+ ) -> pd.DataFrame:
783
+ """
784
+ Compute Q-weights per Table 1 of Wing et al. (2024).
785
+
786
+ Treated observations always get Q = 1.
787
+ Control observations get Q based on the weighting scheme.
788
+
789
+ For aggregate weighting, Q-weights are computed using observation
790
+ counts per (event_time, sub_exp), matching the R reference
791
+ ``compute_weights()``. For balanced panels this is equivalent to
792
+ unit counts per sub-experiment. For unbalanced panels the weights
793
+ adjust for varying observation density per event time.
794
+
795
+ Population and sample_share weighting use unit counts per
796
+ sub-experiment, following the paper's notation (N_a^D, N_a^C).
797
+
798
+ Parameters
799
+ ----------
800
+ stacked_df : pd.DataFrame
801
+ Stacked dataset with _sub_exp, _event_time, and _D_sa columns.
802
+ unit_col : str
803
+ Unit column name.
804
+ population_col : str, optional
805
+ Population column name (for weighting="population").
806
+
807
+ Returns
808
+ -------
809
+ pd.DataFrame
810
+ stacked_df with _Q_weight column added.
811
+ """
812
+ if self.weighting == "aggregate":
813
+ return self._compute_q_weights_aggregate(stacked_df)
814
+
815
+ # --- Population and sample_share: unit-count-based formulas ---
816
+
817
+ # Count distinct units per sub-experiment
818
+ sub_exp_stats = (
819
+ stacked_df.groupby(["_sub_exp", "_D_sa"])[unit_col].nunique().unstack(fill_value=0)
820
+ )
821
+
822
+ # N_a^D and N_a^C per sub-experiment
823
+ N_D = sub_exp_stats.get(1, pd.Series(dtype=float)).to_dict()
824
+ N_C = sub_exp_stats.get(0, pd.Series(dtype=float)).to_dict()
825
+
826
+ # Totals
827
+ N_Omega_C = sum(N_C.values())
828
+
829
+ if self.weighting == "population":
830
+ # Pop_a^D: sum of population values for treated units per sub-exp
831
+ treated_pop = (
832
+ stacked_df[stacked_df["_D_sa"] == 1]
833
+ .drop_duplicates(subset=[unit_col, "_sub_exp"])
834
+ .groupby("_sub_exp")[population_col]
835
+ .sum()
836
+ .to_dict()
837
+ )
838
+ Pop_D_total = sum(treated_pop.values())
839
+
840
+ q_control: Dict[Any, float] = {}
841
+ for a in N_D:
842
+ n_c = N_C.get(a, 0)
843
+ if n_c == 0 or N_Omega_C == 0:
844
+ q_control[a] = 1.0
845
+ continue
846
+ control_share = n_c / N_Omega_C
847
+ pop_d = treated_pop.get(a, 0)
848
+ pop_share = pop_d / Pop_D_total if Pop_D_total > 0 else 0.0
849
+ q_control[a] = pop_share / control_share if control_share > 0 else 1.0
850
+
851
+ else: # sample_share
852
+ N_Omega_D = sum(N_D.values())
853
+ N_total = {a: N_D.get(a, 0) + N_C.get(a, 0) for a in N_D}
854
+ N_grand = N_Omega_D + N_Omega_C
855
+
856
+ q_control = {}
857
+ for a in N_D:
858
+ n_c = N_C.get(a, 0)
859
+ if n_c == 0 or N_Omega_C == 0:
860
+ q_control[a] = 1.0
861
+ continue
862
+ control_share = n_c / N_Omega_C
863
+ n_total_a = N_total.get(a, 0)
864
+ sample_share = n_total_a / N_grand if N_grand > 0 else 0.0
865
+ q_control[a] = sample_share / control_share if control_share > 0 else 1.0
866
+
867
+ # Assign weights: treated=1, control=q_control[sub_exp]
868
+ sub_exp_vals = stacked_df["_sub_exp"].values
869
+ d_vals = stacked_df["_D_sa"].values
870
+ weights = np.ones(len(stacked_df))
871
+ for i in range(len(stacked_df)):
872
+ if d_vals[i] == 0:
873
+ weights[i] = q_control.get(sub_exp_vals[i], 1.0)
874
+
875
+ stacked_df["_Q_weight"] = weights
876
+ return stacked_df
877
+
878
+ def _compute_q_weights_aggregate(self, stacked_df: pd.DataFrame) -> pd.DataFrame:
879
+ """
880
+ Compute aggregate Q-weights using observation counts per (event_time, sub_exp).
881
+
882
+ Matches the R reference ``compute_weights()`` which computes shares at the
883
+ (event_time, sub_exp) level, not the sub_exp level. For balanced panels the
884
+ two approaches are equivalent. For unbalanced panels this adjusts for varying
885
+ observation density per event time.
886
+
887
+ R reference pattern::
888
+
889
+ stack_treat_n = count(D==1) BY event_time
890
+ stack_control_n = count(D==0) BY event_time
891
+ sub_treat_n = count(D==1) BY (sub_exp, event_time)
892
+ sub_control_n = count(D==0) BY (sub_exp, event_time)
893
+ sub_treat_share = sub_treat_n / stack_treat_n
894
+ sub_control_share = sub_control_n / stack_control_n
895
+ Q = sub_treat_share / sub_control_share (for controls)
896
+ Q = 1 (for treated)
897
+ """
898
+ # Step 1: Stack-level totals by (event_time, D_sa)
899
+ stack_counts = stacked_df.groupby(["_event_time", "_D_sa"]).size().unstack(fill_value=0)
900
+ stack_treat_n = stack_counts.get(1, pd.Series(0, index=stack_counts.index))
901
+ stack_control_n = stack_counts.get(0, pd.Series(0, index=stack_counts.index))
902
+
903
+ # Step 2: Sub-experiment-level counts by (event_time, sub_exp, D_sa)
904
+ sub_counts = (
905
+ stacked_df.groupby(["_event_time", "_sub_exp", "_D_sa"]).size().unstack(fill_value=0)
906
+ )
907
+ sub_treat_n = sub_counts.get(1, pd.Series(0, index=sub_counts.index))
908
+ sub_control_n = sub_counts.get(0, pd.Series(0, index=sub_counts.index))
909
+
910
+ # Step 3: Compute shares and Q per (event_time, sub_exp)
911
+ # Q = (sub_treat_n / stack_treat_n) / (sub_control_n / stack_control_n)
912
+ q_lookup: Dict[Tuple[Any, Any], float] = {}
913
+ for et, sub_exp in sub_counts.index:
914
+ s_treat = sub_treat_n.get((et, sub_exp), 0)
915
+ s_control = sub_control_n.get((et, sub_exp), 0)
916
+ st_treat = stack_treat_n.get(et, 0)
917
+ st_control = stack_control_n.get(et, 0)
918
+
919
+ if s_control == 0 or st_treat == 0 or st_control == 0:
920
+ q_lookup[(et, sub_exp)] = 1.0
921
+ else:
922
+ treat_share = s_treat / st_treat
923
+ control_share = s_control / st_control
924
+ q_lookup[(et, sub_exp)] = treat_share / control_share if control_share > 0 else 1.0
925
+
926
+ # Step 4: Assign weights via vectorized merge
927
+ et_vals = stacked_df["_event_time"].values
928
+ sub_exp_vals = stacked_df["_sub_exp"].values
929
+ d_vals = stacked_df["_D_sa"].values
930
+ weights = np.ones(len(stacked_df))
931
+
932
+ for i in range(len(stacked_df)):
933
+ if d_vals[i] == 0:
934
+ weights[i] = q_lookup.get((et_vals[i], sub_exp_vals[i]), 1.0)
935
+
936
+ stacked_df["_Q_weight"] = weights
937
+ return stacked_df
938
+
939
+ # =========================================================================
940
+ # sklearn-compatible interface
941
+ # =========================================================================
942
+
943
+ def get_params(self) -> Dict[str, Any]:
944
+ """Get estimator parameters (sklearn-compatible)."""
945
+ return {
946
+ "kappa_pre": self.kappa_pre,
947
+ "kappa_post": self.kappa_post,
948
+ "weighting": self.weighting,
949
+ "clean_control": self.clean_control,
950
+ "cluster": self.cluster,
951
+ "alpha": self.alpha,
952
+ "anticipation": self.anticipation,
953
+ "rank_deficient_action": self.rank_deficient_action,
954
+ }
955
+
956
+ def set_params(self, **params: Any) -> "StackedDiD":
957
+ """Set estimator parameters (sklearn-compatible)."""
958
+ for key, value in params.items():
959
+ if hasattr(self, key):
960
+ setattr(self, key, value)
961
+ else:
962
+ raise ValueError(f"Unknown parameter: {key}")
963
+ return self
964
+
965
+ def summary(self) -> str:
966
+ """Get summary of estimation results."""
967
+ if not self.is_fitted_:
968
+ raise RuntimeError("Model must be fitted before calling summary()")
969
+ assert self.results_ is not None
970
+ return self.results_.summary()
971
+
972
+ def print_summary(self) -> None:
973
+ """Print summary to stdout."""
974
+ print(self.summary())
975
+
976
+
977
+ # =============================================================================
978
+ # Convenience function
979
+ # =============================================================================
980
+
981
+
982
+ def stacked_did(
983
+ data: pd.DataFrame,
984
+ outcome: str,
985
+ unit: str,
986
+ time: str,
987
+ first_treat: str,
988
+ kappa_pre: int = 1,
989
+ kappa_post: int = 1,
990
+ aggregate: Optional[str] = None,
991
+ population: Optional[str] = None,
992
+ survey_design=None,
993
+ **kwargs: Any,
994
+ ) -> StackedDiDResults:
995
+ """
996
+ Convenience function for stacked DiD estimation.
997
+
998
+ This is a shortcut for creating a StackedDiD estimator and calling fit().
999
+
1000
+ Parameters
1001
+ ----------
1002
+ data : pd.DataFrame
1003
+ Panel data.
1004
+ outcome : str
1005
+ Outcome variable column name.
1006
+ unit : str
1007
+ Unit identifier column name.
1008
+ time : str
1009
+ Time period column name.
1010
+ first_treat : str
1011
+ Column indicating first treatment period (0 or inf for never-treated).
1012
+ kappa_pre : int, default=1
1013
+ Pre-treatment event-time periods.
1014
+ kappa_post : int, default=1
1015
+ Post-treatment event-time periods.
1016
+ aggregate : str, optional
1017
+ Aggregation mode: None, "simple", or "event_study".
1018
+ population : str, optional
1019
+ Population column for weighting="population".
1020
+ survey_design : SurveyDesign, optional
1021
+ Survey design specification for design-based inference.
1022
+ **kwargs
1023
+ Additional keyword arguments passed to StackedDiD constructor.
1024
+
1025
+ Returns
1026
+ -------
1027
+ StackedDiDResults
1028
+ Estimation results.
1029
+
1030
+ Examples
1031
+ --------
1032
+ >>> from diff_diff import stacked_did, generate_staggered_data
1033
+ >>> data = generate_staggered_data(seed=42)
1034
+ >>> results = stacked_did(data, 'outcome', 'unit', 'period',
1035
+ ... 'first_treat', kappa_pre=2, kappa_post=2,
1036
+ ... aggregate='event_study')
1037
+ >>> results.print_summary()
1038
+ """
1039
+ est = StackedDiD(kappa_pre=kappa_pre, kappa_post=kappa_post, **kwargs)
1040
+ return est.fit(
1041
+ data,
1042
+ outcome=outcome,
1043
+ unit=unit,
1044
+ time=time,
1045
+ first_treat=first_treat,
1046
+ aggregate=aggregate,
1047
+ population=population,
1048
+ survey_design=survey_design,
1049
+ )