diff-diff 3.0.1__cp314-cp314-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. diff_diff/__init__.py +382 -0
  2. diff_diff/_backend.py +134 -0
  3. diff_diff/_rust_backend.cp314-win_amd64.pyd +0 -0
  4. diff_diff/bacon.py +1140 -0
  5. diff_diff/bootstrap_utils.py +730 -0
  6. diff_diff/continuous_did.py +1626 -0
  7. diff_diff/continuous_did_bspline.py +190 -0
  8. diff_diff/continuous_did_results.py +374 -0
  9. diff_diff/datasets.py +815 -0
  10. diff_diff/diagnostics.py +882 -0
  11. diff_diff/efficient_did.py +1770 -0
  12. diff_diff/efficient_did_bootstrap.py +359 -0
  13. diff_diff/efficient_did_covariates.py +899 -0
  14. diff_diff/efficient_did_results.py +368 -0
  15. diff_diff/efficient_did_weights.py +617 -0
  16. diff_diff/estimators.py +1501 -0
  17. diff_diff/honest_did.py +2585 -0
  18. diff_diff/imputation.py +2458 -0
  19. diff_diff/imputation_bootstrap.py +418 -0
  20. diff_diff/imputation_results.py +448 -0
  21. diff_diff/linalg.py +2538 -0
  22. diff_diff/power.py +2588 -0
  23. diff_diff/practitioner.py +869 -0
  24. diff_diff/prep.py +1738 -0
  25. diff_diff/prep_dgp.py +1718 -0
  26. diff_diff/pretrends.py +1105 -0
  27. diff_diff/results.py +918 -0
  28. diff_diff/stacked_did.py +1049 -0
  29. diff_diff/stacked_did_results.py +339 -0
  30. diff_diff/staggered.py +3895 -0
  31. diff_diff/staggered_aggregation.py +864 -0
  32. diff_diff/staggered_bootstrap.py +752 -0
  33. diff_diff/staggered_results.py +416 -0
  34. diff_diff/staggered_triple_diff.py +1545 -0
  35. diff_diff/staggered_triple_diff_results.py +416 -0
  36. diff_diff/sun_abraham.py +1685 -0
  37. diff_diff/survey.py +1981 -0
  38. diff_diff/synthetic_did.py +1136 -0
  39. diff_diff/triple_diff.py +2047 -0
  40. diff_diff/trop.py +952 -0
  41. diff_diff/trop_global.py +1270 -0
  42. diff_diff/trop_local.py +1307 -0
  43. diff_diff/trop_results.py +356 -0
  44. diff_diff/twfe.py +542 -0
  45. diff_diff/two_stage.py +1952 -0
  46. diff_diff/two_stage_bootstrap.py +520 -0
  47. diff_diff/two_stage_results.py +400 -0
  48. diff_diff/utils.py +1902 -0
  49. diff_diff/visualization/__init__.py +61 -0
  50. diff_diff/visualization/_common.py +328 -0
  51. diff_diff/visualization/_continuous.py +274 -0
  52. diff_diff/visualization/_diagnostic.py +817 -0
  53. diff_diff/visualization/_event_study.py +1086 -0
  54. diff_diff/visualization/_power.py +661 -0
  55. diff_diff/visualization/_staggered.py +833 -0
  56. diff_diff/visualization/_synthetic.py +197 -0
  57. diff_diff/wooldridge.py +1285 -0
  58. diff_diff/wooldridge_results.py +349 -0
  59. diff_diff-3.0.1.dist-info/METADATA +2997 -0
  60. diff_diff-3.0.1.dist-info/RECORD +62 -0
  61. diff_diff-3.0.1.dist-info/WHEEL +4 -0
  62. diff_diff-3.0.1.dist-info/sboms/diff_diff_rust.cyclonedx.json +5843 -0
@@ -0,0 +1,1136 @@
1
+ """
2
+ Synthetic Difference-in-Differences estimator.
3
+ """
4
+
5
+ import warnings
6
+ from typing import Any, Dict, List, Optional, Tuple
7
+
8
+ import numpy as np
9
+ import pandas as pd
10
+ from numpy.linalg import LinAlgError
11
+
12
+ from diff_diff.estimators import DifferenceInDifferences
13
+ from diff_diff.linalg import solve_ols
14
+ from diff_diff.results import SyntheticDiDResults
15
+ from diff_diff.utils import (
16
+ _compute_regularization,
17
+ _sum_normalize,
18
+ compute_sdid_estimator,
19
+ compute_sdid_unit_weights,
20
+ compute_time_weights,
21
+ safe_inference,
22
+ validate_binary,
23
+ )
24
+
25
+
26
+ class SyntheticDiD(DifferenceInDifferences):
27
+ """
28
+ Synthetic Difference-in-Differences (SDID) estimator.
29
+
30
+ Combines the strengths of Difference-in-Differences and Synthetic Control
31
+ methods by re-weighting control units to better match treated units'
32
+ pre-treatment trends.
33
+
34
+ This method is particularly useful when:
35
+ - You have few treated units (possibly just one)
36
+ - Parallel trends assumption may be questionable
37
+ - Control units are heterogeneous and need reweighting
38
+ - You want robustness to pre-treatment differences
39
+
40
+ Parameters
41
+ ----------
42
+ zeta_omega : float, optional
43
+ Regularization for unit weights. If None (default), auto-computed
44
+ from data as ``(N1 * T1)^(1/4) * noise_level`` matching R's synthdid.
45
+ zeta_lambda : float, optional
46
+ Regularization for time weights. If None (default), auto-computed
47
+ from data as ``1e-6 * noise_level`` matching R's synthdid.
48
+ alpha : float, default=0.05
49
+ Significance level for confidence intervals.
50
+ variance_method : str, default="placebo"
51
+ Method for variance estimation:
52
+ - "placebo": Placebo-based variance matching R's synthdid::vcov(method="placebo").
53
+ Implements Algorithm 4 from Arkhangelsky et al. (2021). This is R's default.
54
+ - "bootstrap": Bootstrap at unit level with fixed weights matching R's
55
+ synthdid::vcov(method="bootstrap").
56
+ n_bootstrap : int, default=200
57
+ Number of replications for variance estimation. Used for both:
58
+ - Bootstrap: Number of bootstrap samples
59
+ - Placebo: Number of random permutations (matches R's `replications` argument)
60
+ seed : int, optional
61
+ Random seed for reproducibility. If None (default), results
62
+ will vary between runs.
63
+
64
+ Attributes
65
+ ----------
66
+ results_ : SyntheticDiDResults
67
+ Estimation results after calling fit().
68
+ is_fitted_ : bool
69
+ Whether the model has been fitted.
70
+
71
+ Examples
72
+ --------
73
+ Basic usage with panel data:
74
+
75
+ >>> import pandas as pd
76
+ >>> from diff_diff import SyntheticDiD
77
+ >>>
78
+ >>> # Panel data with units observed over multiple time periods
79
+ >>> # Treatment occurs at period 5 for treated units
80
+ >>> data = pd.DataFrame({
81
+ ... 'unit': [...], # Unit identifier
82
+ ... 'period': [...], # Time period
83
+ ... 'outcome': [...], # Outcome variable
84
+ ... 'treated': [...] # 1 if unit is ever treated, 0 otherwise
85
+ ... })
86
+ >>>
87
+ >>> # Fit SDID model
88
+ >>> sdid = SyntheticDiD()
89
+ >>> results = sdid.fit(
90
+ ... data,
91
+ ... outcome='outcome',
92
+ ... treatment='treated',
93
+ ... unit='unit',
94
+ ... time='period',
95
+ ... post_periods=[5, 6, 7, 8]
96
+ ... )
97
+ >>>
98
+ >>> # View results
99
+ >>> results.print_summary()
100
+ >>> print(f"ATT: {results.att:.3f} (SE: {results.se:.3f})")
101
+ >>>
102
+ >>> # Examine unit weights
103
+ >>> weights_df = results.get_unit_weights_df()
104
+ >>> print(weights_df.head(10))
105
+
106
+ Notes
107
+ -----
108
+ The SDID estimator (Arkhangelsky et al., 2021) computes:
109
+
110
+ τ̂ = (Ȳ_treated,post - Σ_t λ_t * Y_treated,t)
111
+ - Σ_j ω_j * (Ȳ_j,post - Σ_t λ_t * Y_j,t)
112
+
113
+ Where:
114
+ - ω_j are unit weights (sum to 1, non-negative)
115
+ - λ_t are time weights (sum to 1, non-negative)
116
+
117
+ Unit weights ω are chosen to match pre-treatment outcomes:
118
+ min ||Σ_j ω_j * Y_j,pre - Y_treated,pre||²
119
+
120
+ This interpolates between:
121
+ - Standard DiD (uniform weights): ω_j = 1/N_control
122
+ - Synthetic Control (exact matching): concentrated weights
123
+
124
+ References
125
+ ----------
126
+ Arkhangelsky, D., Athey, S., Hirshberg, D. A., Imbens, G. W., & Wager, S.
127
+ (2021). Synthetic Difference-in-Differences. American Economic Review,
128
+ 111(12), 4088-4118.
129
+ """
130
+
131
+ def __init__(
132
+ self,
133
+ zeta_omega: Optional[float] = None,
134
+ zeta_lambda: Optional[float] = None,
135
+ alpha: float = 0.05,
136
+ variance_method: str = "placebo",
137
+ n_bootstrap: int = 200,
138
+ seed: Optional[int] = None,
139
+ # Deprecated — accepted for backward compat, ignored with warning
140
+ lambda_reg: Optional[float] = None,
141
+ zeta: Optional[float] = None,
142
+ ):
143
+ if lambda_reg is not None:
144
+ warnings.warn(
145
+ "lambda_reg is deprecated and ignored. Regularization is now "
146
+ "auto-computed from data. Use zeta_omega to override unit weight "
147
+ "regularization. Will be removed in v3.1.",
148
+ DeprecationWarning,
149
+ stacklevel=2,
150
+ )
151
+ if zeta is not None:
152
+ warnings.warn(
153
+ "zeta is deprecated and ignored. Use zeta_lambda to override "
154
+ "time weight regularization. Will be removed in v3.1.",
155
+ DeprecationWarning,
156
+ stacklevel=2,
157
+ )
158
+
159
+ super().__init__(robust=True, cluster=None, alpha=alpha)
160
+ self.zeta_omega = zeta_omega
161
+ self.zeta_lambda = zeta_lambda
162
+ self.variance_method = variance_method
163
+ self.n_bootstrap = n_bootstrap
164
+ self.seed = seed
165
+
166
+ # Validate n_bootstrap
167
+ if n_bootstrap < 2:
168
+ raise ValueError(
169
+ f"n_bootstrap must be >= 2 (got {n_bootstrap}). At least 2 "
170
+ f"iterations are needed to estimate standard errors."
171
+ )
172
+
173
+ # Validate variance_method
174
+ valid_methods = ("bootstrap", "placebo")
175
+ if variance_method not in valid_methods:
176
+ raise ValueError(
177
+ f"variance_method must be one of {valid_methods}, " f"got '{variance_method}'"
178
+ )
179
+
180
+ self._unit_weights = None
181
+ self._time_weights = None
182
+
183
+ def fit( # type: ignore[override]
184
+ self,
185
+ data: pd.DataFrame,
186
+ outcome: str,
187
+ treatment: str,
188
+ unit: str,
189
+ time: str,
190
+ post_periods: Optional[List[Any]] = None,
191
+ covariates: Optional[List[str]] = None,
192
+ survey_design=None,
193
+ ) -> SyntheticDiDResults:
194
+ """
195
+ Fit the Synthetic Difference-in-Differences model.
196
+
197
+ Parameters
198
+ ----------
199
+ data : pd.DataFrame
200
+ Panel data with observations for multiple units over multiple
201
+ time periods.
202
+ outcome : str
203
+ Name of the outcome variable column.
204
+ treatment : str
205
+ Name of the treatment group indicator column (0/1).
206
+ Should be 1 for all observations of treated units
207
+ (both pre and post treatment).
208
+ unit : str
209
+ Name of the unit identifier column.
210
+ time : str
211
+ Name of the time period column.
212
+ post_periods : list, optional
213
+ List of time period values that are post-treatment.
214
+ If None, uses the last half of periods.
215
+ covariates : list, optional
216
+ List of covariate column names. Covariates are residualized
217
+ out before computing the SDID estimator.
218
+ survey_design : SurveyDesign, optional
219
+ Survey design specification. Only pweight weight_type is supported.
220
+ Strata/PSU/FPC are supported via Rao-Wu rescaled bootstrap when
221
+ variance_method='bootstrap'. Placebo variance does not support
222
+ strata/PSU/FPC; use variance_method='bootstrap' for full designs.
223
+
224
+ Returns
225
+ -------
226
+ SyntheticDiDResults
227
+ Object containing the ATT estimate, standard error,
228
+ unit weights, and time weights.
229
+
230
+ Raises
231
+ ------
232
+ ValueError
233
+ If required parameters are missing, data validation fails,
234
+ or a non-pweight survey design is provided.
235
+ """
236
+ # Validate inputs
237
+ if outcome is None or treatment is None or unit is None or time is None:
238
+ raise ValueError("Must provide 'outcome', 'treatment', 'unit', and 'time'")
239
+
240
+ # Check columns exist
241
+ required_cols = [outcome, treatment, unit, time]
242
+ if covariates:
243
+ required_cols.extend(covariates)
244
+
245
+ missing = [c for c in required_cols if c not in data.columns]
246
+ if missing:
247
+ raise ValueError(f"Missing columns: {missing}")
248
+
249
+ # Resolve survey design
250
+ from diff_diff.survey import (
251
+ _extract_unit_survey_weights,
252
+ _resolve_survey_for_fit,
253
+ _validate_unit_constant_survey,
254
+ )
255
+
256
+ resolved_survey, survey_weights, survey_weight_type, survey_metadata = (
257
+ _resolve_survey_for_fit(survey_design, data, "analytical")
258
+ )
259
+ # Reject replicate-weight designs — SyntheticDiD uses bootstrap variance
260
+ if resolved_survey is not None and resolved_survey.uses_replicate_variance:
261
+ raise NotImplementedError(
262
+ "SyntheticDiD does not yet support replicate-weight survey "
263
+ "designs. Use a TSL-based survey design (strata/psu/fpc)."
264
+ )
265
+ # Validate pweight only (strata/PSU/FPC are allowed for Rao-Wu bootstrap)
266
+ if resolved_survey is not None and resolved_survey.weight_type != "pweight":
267
+ raise ValueError(
268
+ "SyntheticDiD survey support requires weight_type='pweight'. "
269
+ f"Got '{resolved_survey.weight_type}'."
270
+ )
271
+
272
+ # Reject placebo + full survey design (strata/PSU/FPC are silently ignored)
273
+ if (
274
+ resolved_survey is not None
275
+ and (
276
+ resolved_survey.strata is not None
277
+ or resolved_survey.psu is not None
278
+ or resolved_survey.fpc is not None
279
+ )
280
+ and self.variance_method == "placebo"
281
+ ):
282
+ raise NotImplementedError(
283
+ "SyntheticDiD with variance_method='placebo' does not support strata/PSU/FPC. "
284
+ "Use variance_method='bootstrap' for full survey design support."
285
+ )
286
+
287
+ # Validate treatment is binary
288
+ validate_binary(data[treatment].values, "treatment")
289
+
290
+ # Get all unique time periods
291
+ all_periods = sorted(data[time].unique())
292
+
293
+ if len(all_periods) < 2:
294
+ raise ValueError("Need at least 2 time periods")
295
+
296
+ # Determine pre and post periods
297
+ if post_periods is None:
298
+ mid = len(all_periods) // 2
299
+ post_periods = list(all_periods[mid:])
300
+ pre_periods = list(all_periods[:mid])
301
+ else:
302
+ post_periods = list(post_periods)
303
+ pre_periods = [p for p in all_periods if p not in post_periods]
304
+
305
+ if len(post_periods) == 0:
306
+ raise ValueError("Must have at least one post-treatment period")
307
+ if len(pre_periods) == 0:
308
+ raise ValueError("Must have at least one pre-treatment period")
309
+
310
+ # Validate post_periods are in data
311
+ for p in post_periods:
312
+ if p not in all_periods:
313
+ raise ValueError(f"Post-period '{p}' not found in time column")
314
+
315
+ # Identify treated and control units
316
+ # Treatment indicator should be constant within unit
317
+ unit_treatment = data.groupby(unit)[treatment].first()
318
+
319
+ # Validate treatment is constant within unit (SDID requires block treatment)
320
+ treatment_nunique = data.groupby(unit)[treatment].nunique()
321
+ varying_units = treatment_nunique[treatment_nunique > 1]
322
+ if len(varying_units) > 0:
323
+ example_unit = varying_units.index[0]
324
+ example_vals = sorted(data.loc[data[unit] == example_unit, treatment].unique())
325
+ raise ValueError(
326
+ f"Treatment indicator varies within {len(varying_units)} unit(s) "
327
+ f"(e.g., unit '{example_unit}' has values {example_vals}). "
328
+ f"SyntheticDiD requires 'block' treatment where treatment is "
329
+ f"constant within each unit across all time periods. "
330
+ f"For staggered adoption designs, use CallawaySantAnna or "
331
+ f"ImputationDiD instead."
332
+ )
333
+
334
+ treated_units = unit_treatment[unit_treatment == 1].index.tolist()
335
+ control_units = unit_treatment[unit_treatment == 0].index.tolist()
336
+
337
+ if len(treated_units) == 0:
338
+ raise ValueError("No treated units found")
339
+ if len(control_units) == 0:
340
+ raise ValueError("No control units found")
341
+
342
+ # Validate balanced panel (SDID requires all units observed in all periods)
343
+ periods_per_unit = data.groupby(unit)[time].nunique()
344
+ expected_n_periods = len(all_periods)
345
+ unbalanced_units = periods_per_unit[periods_per_unit != expected_n_periods]
346
+ if len(unbalanced_units) > 0:
347
+ example_unit = unbalanced_units.index[0]
348
+ actual_count = unbalanced_units.iloc[0]
349
+ raise ValueError(
350
+ f"Panel is not balanced: {len(unbalanced_units)} unit(s) do not "
351
+ f"have observations in all {expected_n_periods} periods "
352
+ f"(e.g., unit '{example_unit}' has {actual_count} periods). "
353
+ f"SyntheticDiD requires a balanced panel. Use "
354
+ f"diff_diff.prep.balance_panel() to balance the panel first."
355
+ )
356
+
357
+ # Validate and extract survey weights
358
+ # Build unit-level ResolvedSurveyDesign for Rao-Wu bootstrap when
359
+ # strata/PSU/FPC are present (survey columns are unit-constant).
360
+ _unit_resolved_survey = None
361
+ if resolved_survey is not None:
362
+ _validate_unit_constant_survey(data, unit, survey_design)
363
+ w_treated = _extract_unit_survey_weights(data, unit, survey_design, treated_units)
364
+ w_control = _extract_unit_survey_weights(data, unit, survey_design, control_units)
365
+
366
+ # Build unit-level resolved survey for Rao-Wu bootstrap
367
+ _has_design = (
368
+ resolved_survey.strata is not None
369
+ or resolved_survey.psu is not None
370
+ or resolved_survey.fpc is not None
371
+ )
372
+ if _has_design:
373
+ _unit_resolved_survey = self._build_unit_resolved_survey(
374
+ data,
375
+ unit,
376
+ survey_design,
377
+ control_units,
378
+ treated_units,
379
+ )
380
+ else:
381
+ w_treated = None
382
+ w_control = None
383
+
384
+ # Residualize covariates if provided
385
+ working_data = data.copy()
386
+ if covariates:
387
+ working_data = self._residualize_covariates(
388
+ working_data,
389
+ outcome,
390
+ covariates,
391
+ unit,
392
+ time,
393
+ survey_weights=survey_weights,
394
+ survey_weight_type=survey_weight_type,
395
+ )
396
+
397
+ # Create outcome matrices
398
+ # Shape: (n_periods, n_units)
399
+ Y_pre_control, Y_post_control, Y_pre_treated, Y_post_treated = (
400
+ self._create_outcome_matrices(
401
+ working_data,
402
+ outcome,
403
+ unit,
404
+ time,
405
+ pre_periods,
406
+ post_periods,
407
+ treated_units,
408
+ control_units,
409
+ )
410
+ )
411
+
412
+ # Compute auto-regularization (or use user overrides)
413
+ auto_zeta_omega, auto_zeta_lambda = _compute_regularization(
414
+ Y_pre_control, len(treated_units), len(post_periods)
415
+ )
416
+ zeta_omega = self.zeta_omega if self.zeta_omega is not None else auto_zeta_omega
417
+ zeta_lambda = self.zeta_lambda if self.zeta_lambda is not None else auto_zeta_lambda
418
+
419
+ # Store noise level for diagnostics
420
+ from diff_diff.utils import _compute_noise_level
421
+
422
+ noise_level = _compute_noise_level(Y_pre_control)
423
+
424
+ # Data-dependent convergence threshold (matches R's 1e-5 * noise.level).
425
+ # Floor of 1e-5 when noise_level == 0: R would use 0.0, causing FW to
426
+ # run all max_iter iterations. The result is equivalent (zero-noise
427
+ # data has no variation to optimize), but the floor enables early stop.
428
+ min_decrease = 1e-5 * noise_level if noise_level > 0 else 1e-5
429
+
430
+ # Compute unit weights (Frank-Wolfe with sparsification)
431
+ # Survey weights enter via the treated mean target
432
+ if w_treated is not None:
433
+ Y_pre_treated_mean = np.average(Y_pre_treated, axis=1, weights=w_treated)
434
+ else:
435
+ Y_pre_treated_mean = np.mean(Y_pre_treated, axis=1)
436
+
437
+ unit_weights = compute_sdid_unit_weights(
438
+ Y_pre_control,
439
+ Y_pre_treated_mean,
440
+ zeta_omega=zeta_omega,
441
+ min_decrease=min_decrease,
442
+ )
443
+
444
+ # Compute time weights (Frank-Wolfe on collapsed form)
445
+ time_weights = compute_time_weights(
446
+ Y_pre_control,
447
+ Y_post_control,
448
+ zeta_lambda=zeta_lambda,
449
+ min_decrease=min_decrease,
450
+ )
451
+
452
+ # Compose ω with control survey weights (WLS regression interpretation).
453
+ # Frank-Wolfe finds best trajectory match; survey weights reweight by
454
+ # population importance post-optimization.
455
+ if w_control is not None:
456
+ omega_eff = unit_weights * w_control
457
+ omega_eff = omega_eff / omega_eff.sum()
458
+ else:
459
+ omega_eff = unit_weights
460
+
461
+ # Compute SDID estimate
462
+ if w_treated is not None:
463
+ Y_post_treated_mean = np.average(Y_post_treated, axis=1, weights=w_treated)
464
+ else:
465
+ Y_post_treated_mean = np.mean(Y_post_treated, axis=1)
466
+
467
+ att = compute_sdid_estimator(
468
+ Y_pre_control,
469
+ Y_post_control,
470
+ Y_pre_treated_mean,
471
+ Y_post_treated_mean,
472
+ omega_eff,
473
+ time_weights,
474
+ )
475
+
476
+ # Compute pre-treatment fit (RMSE) using composed weights
477
+ synthetic_pre = Y_pre_control @ omega_eff
478
+ pre_fit_rmse = np.sqrt(np.mean((Y_pre_treated_mean - synthetic_pre) ** 2))
479
+
480
+ # Warn if pre-treatment fit is poor (Registry requirement).
481
+ # Threshold: 1× SD of treated pre-treatment outcomes — a natural baseline
482
+ # since RMSE exceeding natural variation indicates the synthetic control
483
+ # fails to reproduce the treated series' level or trend.
484
+ pre_treatment_sd = (
485
+ np.std(Y_pre_treated_mean, ddof=1) if len(Y_pre_treated_mean) > 1 else 0.0
486
+ )
487
+ if pre_treatment_sd > 0 and pre_fit_rmse > pre_treatment_sd:
488
+ warnings.warn(
489
+ f"Pre-treatment fit is poor: RMSE ({pre_fit_rmse:.4f}) exceeds "
490
+ f"the standard deviation of treated pre-treatment outcomes "
491
+ f"({pre_treatment_sd:.4f}). The synthetic control may not "
492
+ f"adequately reproduce treated unit trends. Consider adding "
493
+ f"more control units or adjusting regularization.",
494
+ UserWarning,
495
+ stacklevel=2,
496
+ )
497
+
498
+ # Compute standard errors based on variance_method
499
+ if self.variance_method == "bootstrap":
500
+ se, bootstrap_estimates = self._bootstrap_se(
501
+ Y_pre_control,
502
+ Y_post_control,
503
+ Y_pre_treated,
504
+ Y_post_treated,
505
+ unit_weights,
506
+ time_weights,
507
+ w_treated=w_treated,
508
+ w_control=w_control,
509
+ resolved_survey=_unit_resolved_survey,
510
+ )
511
+ placebo_effects = bootstrap_estimates
512
+ inference_method = "bootstrap"
513
+ else:
514
+ # Use placebo-based variance (R's synthdid Algorithm 4)
515
+ se, placebo_effects = self._placebo_variance_se(
516
+ Y_pre_control,
517
+ Y_post_control,
518
+ Y_pre_treated_mean,
519
+ Y_post_treated_mean,
520
+ n_treated=len(treated_units),
521
+ zeta_omega=zeta_omega,
522
+ zeta_lambda=zeta_lambda,
523
+ min_decrease=min_decrease,
524
+ replications=self.n_bootstrap,
525
+ w_control=w_control,
526
+ )
527
+ inference_method = "placebo"
528
+
529
+ # Compute test statistics
530
+ t_stat, p_value_analytical, conf_int = safe_inference(att, se, alpha=self.alpha)
531
+ if len(placebo_effects) > 0 and np.isfinite(t_stat):
532
+ p_value = max(
533
+ np.mean(np.abs(placebo_effects) >= np.abs(att)),
534
+ 1.0 / (len(placebo_effects) + 1),
535
+ )
536
+ else:
537
+ p_value = p_value_analytical
538
+
539
+ # Create weight dictionaries. When survey weights are active, store
540
+ # the effective (composed) weights that were actually used for the ATT
541
+ # so that results.unit_weights matches the estimator.
542
+ unit_weights_dict = {unit_id: w for unit_id, w in zip(control_units, omega_eff)}
543
+ time_weights_dict = {period: w for period, w in zip(pre_periods, time_weights)}
544
+
545
+ # Store results
546
+ self.results_ = SyntheticDiDResults(
547
+ att=att,
548
+ se=se,
549
+ t_stat=t_stat,
550
+ p_value=p_value,
551
+ conf_int=conf_int,
552
+ n_obs=len(data),
553
+ n_treated=len(treated_units),
554
+ n_control=len(control_units),
555
+ unit_weights=unit_weights_dict,
556
+ time_weights=time_weights_dict,
557
+ pre_periods=pre_periods,
558
+ post_periods=post_periods,
559
+ alpha=self.alpha,
560
+ variance_method=inference_method,
561
+ noise_level=noise_level,
562
+ zeta_omega=zeta_omega,
563
+ zeta_lambda=zeta_lambda,
564
+ pre_treatment_fit=pre_fit_rmse,
565
+ placebo_effects=placebo_effects if len(placebo_effects) > 0 else None,
566
+ n_bootstrap=self.n_bootstrap if inference_method == "bootstrap" else None,
567
+ survey_metadata=survey_metadata,
568
+ )
569
+
570
+ self._unit_weights = unit_weights
571
+ self._time_weights = time_weights
572
+ self.is_fitted_ = True
573
+
574
+ return self.results_
575
+
576
+ def _create_outcome_matrices(
577
+ self,
578
+ data: pd.DataFrame,
579
+ outcome: str,
580
+ unit: str,
581
+ time: str,
582
+ pre_periods: List[Any],
583
+ post_periods: List[Any],
584
+ treated_units: List[Any],
585
+ control_units: List[Any],
586
+ ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
587
+ """
588
+ Create outcome matrices for SDID estimation.
589
+
590
+ Returns
591
+ -------
592
+ tuple
593
+ (Y_pre_control, Y_post_control, Y_pre_treated, Y_post_treated)
594
+ Each is a 2D array with shape (n_periods, n_units)
595
+ """
596
+ # Pivot data to wide format
597
+ pivot = data.pivot(index=time, columns=unit, values=outcome)
598
+
599
+ # Extract submatrices
600
+ Y_pre_control = pivot.loc[pre_periods, control_units].values
601
+ Y_post_control = pivot.loc[post_periods, control_units].values
602
+ Y_pre_treated = pivot.loc[pre_periods, treated_units].values
603
+ Y_post_treated = pivot.loc[post_periods, treated_units].values
604
+
605
+ return (
606
+ Y_pre_control.astype(float),
607
+ Y_post_control.astype(float),
608
+ Y_pre_treated.astype(float),
609
+ Y_post_treated.astype(float),
610
+ )
611
+
612
+ def _residualize_covariates(
613
+ self,
614
+ data: pd.DataFrame,
615
+ outcome: str,
616
+ covariates: List[str],
617
+ unit: str,
618
+ time: str,
619
+ survey_weights=None,
620
+ survey_weight_type=None,
621
+ ) -> pd.DataFrame:
622
+ """
623
+ Residualize outcome by regressing out covariates.
624
+
625
+ Uses two-way fixed effects to partial out covariates. When survey
626
+ weights are provided, uses WLS for population-representative
627
+ covariate removal.
628
+ """
629
+ data = data.copy()
630
+
631
+ # Create design matrix with covariates
632
+ X = data[covariates].values.astype(float)
633
+
634
+ # Add unit and time dummies
635
+ unit_dummies = pd.get_dummies(data[unit], prefix="u", drop_first=True)
636
+ time_dummies = pd.get_dummies(data[time], prefix="t", drop_first=True)
637
+
638
+ X_full = np.column_stack([np.ones(len(data)), X, unit_dummies.values, time_dummies.values])
639
+
640
+ y = data[outcome].values.astype(float)
641
+
642
+ # Fit and get residuals using unified backend
643
+ coeffs, residuals, _ = solve_ols(
644
+ X_full,
645
+ y,
646
+ return_vcov=False,
647
+ weights=survey_weights,
648
+ weight_type=survey_weight_type,
649
+ )
650
+
651
+ # Add back the mean for interpretability
652
+ if survey_weights is not None:
653
+ y_center = np.average(y, weights=survey_weights)
654
+ else:
655
+ y_center = np.mean(y)
656
+ data[outcome] = residuals + y_center
657
+
658
+ return data
659
+
660
+ @staticmethod
661
+ def _build_unit_resolved_survey(data, unit_col, survey_design, control_units, treated_units):
662
+ """Build a unit-level ResolvedSurveyDesign for Rao-Wu bootstrap.
663
+
664
+ Extracts one row per unit (survey columns are unit-constant) in
665
+ control-then-treated order matching the panel matrix columns.
666
+ """
667
+ from diff_diff.linalg import _factorize_cluster_ids
668
+ from diff_diff.survey import ResolvedSurveyDesign
669
+
670
+ all_units = list(control_units) + list(treated_units)
671
+ # Take first row per unit in the specified order
672
+ first_rows = data.groupby(unit_col).first().loc[all_units]
673
+ n_units = len(all_units)
674
+
675
+ # Weights (normalized pweights, mean=1)
676
+ if survey_design.weights is not None:
677
+ raw_w = first_rows[survey_design.weights].values.astype(np.float64)
678
+ weights = raw_w * (n_units / np.sum(raw_w))
679
+ else:
680
+ weights = np.ones(n_units, dtype=np.float64)
681
+
682
+ # Strata
683
+ strata_arr = None
684
+ n_strata = 0
685
+ if survey_design.strata is not None:
686
+ strata_arr = _factorize_cluster_ids(first_rows[survey_design.strata].values)
687
+ n_strata = len(np.unique(strata_arr))
688
+
689
+ # PSU
690
+ psu_arr = None
691
+ n_psu = 0
692
+ if survey_design.psu is not None:
693
+ psu_raw = first_rows[survey_design.psu].values
694
+ if survey_design.nest and strata_arr is not None:
695
+ combined = np.array([f"{s}_{p}" for s, p in zip(strata_arr, psu_raw)])
696
+ psu_arr = _factorize_cluster_ids(combined)
697
+ else:
698
+ psu_arr = _factorize_cluster_ids(psu_raw)
699
+ n_psu = len(np.unique(psu_arr))
700
+
701
+ # FPC
702
+ fpc_arr = None
703
+ if survey_design.fpc is not None:
704
+ fpc_arr = first_rows[survey_design.fpc].values.astype(np.float64)
705
+
706
+ return ResolvedSurveyDesign(
707
+ weights=weights,
708
+ weight_type=survey_design.weight_type,
709
+ strata=strata_arr,
710
+ psu=psu_arr,
711
+ fpc=fpc_arr,
712
+ n_strata=n_strata,
713
+ n_psu=n_psu,
714
+ lonely_psu=survey_design.lonely_psu,
715
+ )
716
+
717
+ def _bootstrap_se(
718
+ self,
719
+ Y_pre_control: np.ndarray,
720
+ Y_post_control: np.ndarray,
721
+ Y_pre_treated: np.ndarray,
722
+ Y_post_treated: np.ndarray,
723
+ unit_weights: np.ndarray,
724
+ time_weights: np.ndarray,
725
+ w_treated=None,
726
+ w_control=None,
727
+ resolved_survey=None,
728
+ ) -> Tuple[float, np.ndarray]:
729
+ """Compute bootstrap standard error matching R's synthdid bootstrap_sample.
730
+
731
+ Resamples all units (control + treated) with replacement, renormalizes
732
+ original unit weights for the resampled controls, and computes the
733
+ SDID estimator with **fixed** weights (no re-estimation).
734
+
735
+ When ``resolved_survey`` is provided (unit-level ResolvedSurveyDesign
736
+ with strata/PSU/FPC), uses Rao-Wu rescaled bootstrap instead of the
737
+ simple pairs bootstrap. The Rao-Wu weights are per-unit rescaled
738
+ survey weights; they composite with SDID unit weights the same way
739
+ pweights do in the weights-only path.
740
+
741
+ This matches R's ``synthdid::vcov(method="bootstrap")``.
742
+ """
743
+ from diff_diff.bootstrap_utils import generate_rao_wu_weights
744
+
745
+ rng = np.random.default_rng(self.seed)
746
+ n_control = Y_pre_control.shape[1]
747
+ n_treated = Y_pre_treated.shape[1]
748
+ n_total = n_control + n_treated
749
+
750
+ # Build full panel matrix: (n_pre+n_post, n_control+n_treated)
751
+ Y_full = np.block([[Y_pre_control, Y_pre_treated], [Y_post_control, Y_post_treated]])
752
+ n_pre = Y_pre_control.shape[0]
753
+
754
+ # Determine whether to use Rao-Wu (full design) or pairs bootstrap
755
+ _use_rao_wu = resolved_survey is not None
756
+
757
+ # Check for unidentified variance (single unstratified PSU)
758
+ if (
759
+ _use_rao_wu
760
+ and resolved_survey.psu is not None
761
+ and resolved_survey.n_psu < 2
762
+ and resolved_survey.strata is None
763
+ ):
764
+ return np.nan, np.array([])
765
+
766
+ bootstrap_estimates = []
767
+
768
+ for _ in range(self.n_bootstrap):
769
+ if _use_rao_wu:
770
+ # --- Rao-Wu rescaled bootstrap path ---
771
+ # generate_rao_wu_weights returns per-unit rescaled survey
772
+ # weights (shape n_total). Units whose PSU was not drawn
773
+ # get weight 0, effectively dropping them.
774
+ try:
775
+ boot_rw = generate_rao_wu_weights(resolved_survey, rng)
776
+
777
+ rw_control = boot_rw[:n_control]
778
+ rw_treated = boot_rw[n_control:]
779
+
780
+ # Skip if all control or all treated weights are zero
781
+ if rw_control.sum() == 0 or rw_treated.sum() == 0:
782
+ continue
783
+
784
+ # Composite SDID unit weights with Rao-Wu rescaled weights
785
+ boot_omega_eff = unit_weights * rw_control
786
+ if boot_omega_eff.sum() > 0:
787
+ boot_omega_eff = boot_omega_eff / boot_omega_eff.sum()
788
+ else:
789
+ continue
790
+
791
+ # Treated mean weighted by Rao-Wu weights
792
+ Y_boot_pre_t_mean = np.average(
793
+ Y_pre_treated,
794
+ axis=1,
795
+ weights=rw_treated,
796
+ )
797
+ Y_boot_post_t_mean = np.average(
798
+ Y_post_treated,
799
+ axis=1,
800
+ weights=rw_treated,
801
+ )
802
+
803
+ tau = compute_sdid_estimator(
804
+ Y_pre_control,
805
+ Y_post_control,
806
+ Y_boot_pre_t_mean,
807
+ Y_boot_post_t_mean,
808
+ boot_omega_eff,
809
+ time_weights,
810
+ )
811
+ if np.isfinite(tau):
812
+ bootstrap_estimates.append(tau)
813
+
814
+ except (ValueError, LinAlgError):
815
+ continue
816
+ else:
817
+ # --- Standard pairs bootstrap path (weights-only or no survey) ---
818
+ # Resample ALL units with replacement
819
+ boot_idx = rng.choice(n_total, size=n_total, replace=True)
820
+
821
+ # Identify which resampled units are control vs treated
822
+ boot_is_control = boot_idx < n_control
823
+ boot_control_idx = boot_idx[boot_is_control]
824
+ boot_treated_idx = boot_idx[~boot_is_control]
825
+
826
+ # Skip if no control or no treated units in bootstrap sample
827
+ if len(boot_control_idx) == 0 or len(boot_treated_idx) == 0:
828
+ continue
829
+
830
+ try:
831
+ # Renormalize original unit weights for the resampled controls
832
+ boot_omega = _sum_normalize(unit_weights[boot_control_idx])
833
+
834
+ # Compose with control survey weights if present
835
+ if w_control is not None:
836
+ boot_w_c = w_control[boot_idx[boot_is_control]]
837
+ boot_omega_eff = boot_omega * boot_w_c
838
+ boot_omega_eff = boot_omega_eff / boot_omega_eff.sum()
839
+ else:
840
+ boot_omega_eff = boot_omega
841
+
842
+ # Extract resampled outcome matrices
843
+ Y_boot = Y_full[:, boot_idx]
844
+ Y_boot_pre_c = Y_boot[:n_pre, boot_is_control]
845
+ Y_boot_post_c = Y_boot[n_pre:, boot_is_control]
846
+ Y_boot_pre_t = Y_boot[:n_pre, ~boot_is_control]
847
+ Y_boot_post_t = Y_boot[n_pre:, ~boot_is_control]
848
+
849
+ # Compute ATT with FIXED weights (do NOT re-estimate).
850
+ # boot_idx[~boot_is_control] maps to original index space;
851
+ # subtract n_control to index into w_treated. Duplicate draws
852
+ # carry identical weights -> alignment is safe.
853
+ if w_treated is not None:
854
+ boot_w_t = w_treated[boot_idx[~boot_is_control] - n_control]
855
+ Y_boot_pre_t_mean = np.average(
856
+ Y_boot_pre_t,
857
+ axis=1,
858
+ weights=boot_w_t,
859
+ )
860
+ Y_boot_post_t_mean = np.average(
861
+ Y_boot_post_t,
862
+ axis=1,
863
+ weights=boot_w_t,
864
+ )
865
+ else:
866
+ Y_boot_pre_t_mean = np.mean(Y_boot_pre_t, axis=1)
867
+ Y_boot_post_t_mean = np.mean(Y_boot_post_t, axis=1)
868
+
869
+ tau = compute_sdid_estimator(
870
+ Y_boot_pre_c,
871
+ Y_boot_post_c,
872
+ Y_boot_pre_t_mean,
873
+ Y_boot_post_t_mean,
874
+ boot_omega_eff,
875
+ time_weights,
876
+ )
877
+ if np.isfinite(tau):
878
+ bootstrap_estimates.append(tau)
879
+
880
+ except (ValueError, LinAlgError):
881
+ continue
882
+
883
+ bootstrap_estimates = np.array(bootstrap_estimates)
884
+
885
+ # Check bootstrap success rate and handle failures
886
+ n_successful = len(bootstrap_estimates)
887
+ failure_rate = 1 - (n_successful / self.n_bootstrap)
888
+
889
+ if n_successful == 0:
890
+ raise ValueError(
891
+ f"All {self.n_bootstrap} bootstrap iterations failed. "
892
+ f"This typically occurs when:\n"
893
+ f" - Sample size is too small for reliable resampling\n"
894
+ f" - Weight matrices are singular or near-singular\n"
895
+ f" - Insufficient pre-treatment periods for weight estimation\n"
896
+ f" - Too few control units relative to treated units\n"
897
+ f"Consider using variance_method='placebo' or increasing "
898
+ f"the regularization parameters (zeta_omega, zeta_lambda)."
899
+ )
900
+ elif n_successful == 1:
901
+ warnings.warn(
902
+ f"Only 1/{self.n_bootstrap} bootstrap iteration succeeded. "
903
+ f"Standard error cannot be computed reliably (requires at least 2). "
904
+ f"Returning SE=0.0. Consider using variance_method='placebo' or "
905
+ f"increasing the regularization (zeta_omega, zeta_lambda).",
906
+ UserWarning,
907
+ stacklevel=2,
908
+ )
909
+ se = 0.0
910
+ elif failure_rate > 0.05:
911
+ warnings.warn(
912
+ f"Only {n_successful}/{self.n_bootstrap} bootstrap iterations succeeded "
913
+ f"({failure_rate:.1%} failure rate). Standard errors may be unreliable. "
914
+ f"This can occur with small samples or insufficient pre-treatment periods.",
915
+ UserWarning,
916
+ stacklevel=2,
917
+ )
918
+ se = float(np.std(bootstrap_estimates, ddof=1))
919
+ else:
920
+ se = float(np.std(bootstrap_estimates, ddof=1))
921
+
922
+ return se, bootstrap_estimates
923
+
924
+ def _placebo_variance_se(
925
+ self,
926
+ Y_pre_control: np.ndarray,
927
+ Y_post_control: np.ndarray,
928
+ Y_pre_treated_mean: np.ndarray,
929
+ Y_post_treated_mean: np.ndarray,
930
+ n_treated: int,
931
+ zeta_omega: float = 0.0,
932
+ zeta_lambda: float = 0.0,
933
+ min_decrease: float = 1e-5,
934
+ replications: int = 200,
935
+ w_control=None,
936
+ ) -> Tuple[float, np.ndarray]:
937
+ """
938
+ Compute placebo-based variance matching R's synthdid methodology.
939
+
940
+ This implements Algorithm 4 from Arkhangelsky et al. (2021),
941
+ matching R's synthdid::vcov(method = "placebo"):
942
+
943
+ 1. Randomly sample N₀ control indices (permutation)
944
+ 2. Designate last N₁ as pseudo-treated, first (N₀-N₁) as pseudo-controls
945
+ 3. Re-estimate both omega and lambda on the permuted data (from
946
+ uniform initialization, fresh start), matching R's behavior where
947
+ ``update.omega=TRUE, update.lambda=TRUE`` are passed via ``opts``
948
+ 4. Compute SDID estimate with re-estimated weights
949
+ 5. Repeat `replications` times
950
+ 6. SE = sqrt((r-1)/r) * sd(estimates)
951
+
952
+ Parameters
953
+ ----------
954
+ Y_pre_control : np.ndarray
955
+ Control outcomes in pre-treatment periods, shape (n_pre, n_control).
956
+ Y_post_control : np.ndarray
957
+ Control outcomes in post-treatment periods, shape (n_post, n_control).
958
+ Y_pre_treated_mean : np.ndarray
959
+ Mean treated outcomes in pre-treatment periods, shape (n_pre,).
960
+ Y_post_treated_mean : np.ndarray
961
+ Mean treated outcomes in post-treatment periods, shape (n_post,).
962
+ n_treated : int
963
+ Number of treated units in the original estimation.
964
+ zeta_omega : float
965
+ Regularization parameter for unit weights (for re-estimation).
966
+ zeta_lambda : float
967
+ Regularization parameter for time weights (for re-estimation).
968
+ min_decrease : float
969
+ Convergence threshold for Frank-Wolfe (for re-estimation).
970
+ replications : int, default=200
971
+ Number of placebo replications.
972
+
973
+ Returns
974
+ -------
975
+ tuple
976
+ (se, placebo_effects) where se is the standard error and
977
+ placebo_effects is the array of placebo treatment effects.
978
+
979
+ References
980
+ ----------
981
+ Arkhangelsky, D., Athey, S., Hirshberg, D. A., Imbens, G. W., & Wager, S.
982
+ (2021). Synthetic Difference-in-Differences. American Economic Review,
983
+ 111(12), 4088-4118. Algorithm 4.
984
+ """
985
+ rng = np.random.default_rng(self.seed)
986
+ n_pre, n_control = Y_pre_control.shape
987
+
988
+ # Ensure we have enough controls for the split
989
+ n_pseudo_control = n_control - n_treated
990
+ if n_pseudo_control < 1:
991
+ warnings.warn(
992
+ f"Not enough control units ({n_control}) for placebo variance "
993
+ f"estimation with {n_treated} treated units. "
994
+ f"Consider using variance_method='bootstrap'.",
995
+ UserWarning,
996
+ stacklevel=3,
997
+ )
998
+ return 0.0, np.array([])
999
+
1000
+ placebo_estimates = []
1001
+
1002
+ for _ in range(replications):
1003
+ try:
1004
+ # Random permutation of control indices (Algorithm 4, step 1)
1005
+ perm = rng.permutation(n_control)
1006
+
1007
+ # Split into pseudo-controls and pseudo-treated (step 2)
1008
+ pseudo_control_idx = perm[:n_pseudo_control]
1009
+ pseudo_treated_idx = perm[n_pseudo_control:]
1010
+
1011
+ # Get pseudo-control and pseudo-treated outcomes
1012
+ Y_pre_pseudo_control = Y_pre_control[:, pseudo_control_idx]
1013
+ Y_post_pseudo_control = Y_post_control[:, pseudo_control_idx]
1014
+
1015
+ # Pseudo-treated means: survey-weighted when available
1016
+ if w_control is not None:
1017
+ pseudo_w_tr = w_control[pseudo_treated_idx]
1018
+ Y_pre_pseudo_treated_mean = np.average(
1019
+ Y_pre_control[:, pseudo_treated_idx],
1020
+ axis=1,
1021
+ weights=pseudo_w_tr,
1022
+ )
1023
+ Y_post_pseudo_treated_mean = np.average(
1024
+ Y_post_control[:, pseudo_treated_idx],
1025
+ axis=1,
1026
+ weights=pseudo_w_tr,
1027
+ )
1028
+ else:
1029
+ Y_pre_pseudo_treated_mean = np.mean(
1030
+ Y_pre_control[:, pseudo_treated_idx], axis=1
1031
+ )
1032
+ Y_post_pseudo_treated_mean = np.mean(
1033
+ Y_post_control[:, pseudo_treated_idx], axis=1
1034
+ )
1035
+
1036
+ # Re-estimate weights on permuted data (matching R's behavior)
1037
+ # R passes update.omega=TRUE, update.lambda=TRUE via opts,
1038
+ # re-estimating weights from uniform initialization (fresh start).
1039
+ # Unit weights: re-estimate on pseudo-control/pseudo-treated data
1040
+ pseudo_omega = compute_sdid_unit_weights(
1041
+ Y_pre_pseudo_control,
1042
+ Y_pre_pseudo_treated_mean,
1043
+ zeta_omega=zeta_omega,
1044
+ min_decrease=min_decrease,
1045
+ )
1046
+
1047
+ # Compose pseudo_omega with control survey weights
1048
+ if w_control is not None:
1049
+ pseudo_w_co = w_control[pseudo_control_idx]
1050
+ pseudo_omega_eff = pseudo_omega * pseudo_w_co
1051
+ pseudo_omega_eff = pseudo_omega_eff / pseudo_omega_eff.sum()
1052
+ else:
1053
+ pseudo_omega_eff = pseudo_omega
1054
+
1055
+ # Time weights: re-estimate on pseudo-control data
1056
+ pseudo_lambda = compute_time_weights(
1057
+ Y_pre_pseudo_control,
1058
+ Y_post_pseudo_control,
1059
+ zeta_lambda=zeta_lambda,
1060
+ min_decrease=min_decrease,
1061
+ )
1062
+
1063
+ # Compute placebo SDID estimate (step 4)
1064
+ tau = compute_sdid_estimator(
1065
+ Y_pre_pseudo_control,
1066
+ Y_post_pseudo_control,
1067
+ Y_pre_pseudo_treated_mean,
1068
+ Y_post_pseudo_treated_mean,
1069
+ pseudo_omega_eff,
1070
+ pseudo_lambda,
1071
+ )
1072
+ if np.isfinite(tau):
1073
+ placebo_estimates.append(tau)
1074
+
1075
+ except (ValueError, LinAlgError, ZeroDivisionError):
1076
+ # Skip failed iterations
1077
+ continue
1078
+
1079
+ placebo_estimates = np.array(placebo_estimates)
1080
+ n_successful = len(placebo_estimates)
1081
+
1082
+ if n_successful < 2:
1083
+ warnings.warn(
1084
+ f"Only {n_successful} placebo replications completed successfully. "
1085
+ f"Standard error cannot be estimated reliably. "
1086
+ f"Consider using variance_method='bootstrap' or increasing "
1087
+ f"the number of control units.",
1088
+ UserWarning,
1089
+ stacklevel=3,
1090
+ )
1091
+ return 0.0, placebo_estimates
1092
+
1093
+ # Warn if many replications failed
1094
+ failure_rate = 1 - (n_successful / replications)
1095
+ if failure_rate > 0.05:
1096
+ warnings.warn(
1097
+ f"Only {n_successful}/{replications} placebo replications succeeded "
1098
+ f"({failure_rate:.1%} failure rate). Standard errors may be unreliable.",
1099
+ UserWarning,
1100
+ stacklevel=3,
1101
+ )
1102
+
1103
+ # Compute SE using R's formula: sqrt((r-1)/r) * sd(estimates)
1104
+ # This matches synthdid::vcov.R exactly
1105
+ se = np.sqrt((n_successful - 1) / n_successful) * np.std(placebo_estimates, ddof=1)
1106
+
1107
+ return se, placebo_estimates
1108
+
1109
+ def get_params(self) -> Dict[str, Any]:
1110
+ """Get estimator parameters."""
1111
+ return {
1112
+ "zeta_omega": self.zeta_omega,
1113
+ "zeta_lambda": self.zeta_lambda,
1114
+ "alpha": self.alpha,
1115
+ "variance_method": self.variance_method,
1116
+ "n_bootstrap": self.n_bootstrap,
1117
+ "seed": self.seed,
1118
+ }
1119
+
1120
+ def set_params(self, **params) -> "SyntheticDiD":
1121
+ """Set estimator parameters."""
1122
+ # Deprecated parameter names — emit warning and ignore
1123
+ _deprecated = {"lambda_reg", "zeta"}
1124
+ for key, value in params.items():
1125
+ if key in _deprecated:
1126
+ warnings.warn(
1127
+ f"{key} is deprecated and ignored. Use zeta_omega/zeta_lambda "
1128
+ f"instead. Will be removed in v3.1.",
1129
+ DeprecationWarning,
1130
+ stacklevel=2,
1131
+ )
1132
+ elif hasattr(self, key):
1133
+ setattr(self, key, value)
1134
+ else:
1135
+ raise ValueError(f"Unknown parameter: {key}")
1136
+ return self