diff-diff 2.8.2__tar.gz → 2.8.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {diff_diff-2.8.2 → diff_diff-2.8.4}/PKG-INFO +1 -1
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/__init__.py +3 -1
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/bootstrap_utils.py +84 -14
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/continuous_did_results.py +15 -1
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/efficient_did_results.py +15 -1
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/estimators.py +165 -21
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/imputation.py +563 -114
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/imputation_bootstrap.py +17 -7
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/imputation_results.py +17 -2
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/prep.py +75 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/prep_dgp.py +16 -1
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/results.py +39 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/stacked_did.py +55 -13
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/stacked_did_results.py +15 -1
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/staggered.py +124 -55
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/staggered_aggregation.py +4 -11
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/staggered_results.py +25 -13
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/staggered_triple_diff_results.py +20 -17
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/sun_abraham.py +159 -14
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/survey.py +253 -7
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/trop.py +11 -7
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/trop_global.py +24 -5
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/trop_local.py +46 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/trop_results.py +13 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/twfe.py +65 -13
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/two_stage.py +283 -39
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/two_stage_bootstrap.py +5 -2
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/two_stage_results.py +15 -1
- {diff_diff-2.8.2 → diff_diff-2.8.4}/pyproject.toml +1 -1
- {diff_diff-2.8.2 → diff_diff-2.8.4}/rust/Cargo.lock +3 -3
- {diff_diff-2.8.2 → diff_diff-2.8.4}/rust/Cargo.toml +1 -1
- {diff_diff-2.8.2 → diff_diff-2.8.4}/README.md +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/_backend.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/bacon.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/continuous_did.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/continuous_did_bspline.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/datasets.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/diagnostics.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/efficient_did.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/efficient_did_bootstrap.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/efficient_did_covariates.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/efficient_did_weights.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/honest_did.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/linalg.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/power.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/practitioner.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/pretrends.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/staggered_bootstrap.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/staggered_triple_diff.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/synthetic_did.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/triple_diff.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/utils.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/visualization/__init__.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/visualization/_common.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/visualization/_continuous.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/visualization/_diagnostic.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/visualization/_event_study.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/visualization/_power.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/visualization/_staggered.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/diff_diff/visualization/_synthetic.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/rust/build.rs +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/rust/src/bootstrap.rs +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/rust/src/lib.rs +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/rust/src/linalg.rs +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/rust/src/trop.rs +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.4}/rust/src/weights.rs +0 -0
|
@@ -94,6 +94,7 @@ from diff_diff.prep import (
|
|
|
94
94
|
make_treatment_indicator,
|
|
95
95
|
rank_control_units,
|
|
96
96
|
summarize_did_data,
|
|
97
|
+
trim_weights,
|
|
97
98
|
validate_did_data,
|
|
98
99
|
wide_to_long,
|
|
99
100
|
)
|
|
@@ -210,7 +211,7 @@ Stacked = StackedDiD
|
|
|
210
211
|
Bacon = BaconDecomposition
|
|
211
212
|
EDiD = EfficientDiD
|
|
212
213
|
|
|
213
|
-
__version__ = "2.8.
|
|
214
|
+
__version__ = "2.8.4"
|
|
214
215
|
__all__ = [
|
|
215
216
|
# Estimators
|
|
216
217
|
"DifferenceInDifferences",
|
|
@@ -307,6 +308,7 @@ __all__ = [
|
|
|
307
308
|
"make_post_indicator",
|
|
308
309
|
"wide_to_long",
|
|
309
310
|
"balance_panel",
|
|
311
|
+
"trim_weights",
|
|
310
312
|
"validate_did_data",
|
|
311
313
|
"summarize_did_data",
|
|
312
314
|
"generate_did_data",
|
|
@@ -433,6 +433,10 @@ def generate_survey_multiplier_weights_batch(
|
|
|
433
433
|
is present, weights are scaled by ``sqrt(1 - f_h)`` per stratum so
|
|
434
434
|
the bootstrap variance matches the TSL variance.
|
|
435
435
|
|
|
436
|
+
For ``lonely_psu="adjust"``, singleton PSUs from different strata are
|
|
437
|
+
pooled into a combined pseudo-stratum and weights are generated for
|
|
438
|
+
the pooled group (no FPC scaling on pooled singletons).
|
|
439
|
+
|
|
436
440
|
Parameters
|
|
437
441
|
----------
|
|
438
442
|
n_bootstrap : int
|
|
@@ -454,11 +458,7 @@ def generate_survey_multiplier_weights_batch(
|
|
|
454
458
|
psu = resolved_survey.psu
|
|
455
459
|
strata = resolved_survey.strata
|
|
456
460
|
|
|
457
|
-
|
|
458
|
-
raise NotImplementedError(
|
|
459
|
-
"lonely_psu='adjust' is not yet supported for survey-aware bootstrap. "
|
|
460
|
-
"Use lonely_psu='remove' or 'certainty', or use analytical inference."
|
|
461
|
-
)
|
|
461
|
+
_lonely_psu = resolved_survey.lonely_psu
|
|
462
462
|
|
|
463
463
|
if psu is None:
|
|
464
464
|
# Each observation is its own PSU
|
|
@@ -499,6 +499,7 @@ def generate_survey_multiplier_weights_batch(
|
|
|
499
499
|
psu_to_col = {int(p): i for i, p in enumerate(psu_ids)}
|
|
500
500
|
|
|
501
501
|
unique_strata = np.unique(strata)
|
|
502
|
+
_singleton_cols = [] # For lonely_psu="adjust" pooling
|
|
502
503
|
for h in unique_strata:
|
|
503
504
|
mask_h = strata == h
|
|
504
505
|
|
|
@@ -511,8 +512,12 @@ def generate_survey_multiplier_weights_batch(
|
|
|
511
512
|
cols = np.array([psu_to_col[int(p)] for p in psus_in_h])
|
|
512
513
|
|
|
513
514
|
if n_h < 2:
|
|
514
|
-
|
|
515
|
-
|
|
515
|
+
if _lonely_psu == "adjust":
|
|
516
|
+
# Collect for pooled pseudo-stratum processing
|
|
517
|
+
_singleton_cols.extend(cols.tolist())
|
|
518
|
+
else:
|
|
519
|
+
# remove / certainty — zero weight
|
|
520
|
+
weights[:, cols] = 0.0
|
|
516
521
|
continue
|
|
517
522
|
|
|
518
523
|
# Generate weights for this stratum
|
|
@@ -536,6 +541,31 @@ def generate_survey_multiplier_weights_batch(
|
|
|
536
541
|
|
|
537
542
|
weights[:, cols] = stratum_weights
|
|
538
543
|
|
|
544
|
+
# Pool singleton PSUs into a pseudo-stratum for "adjust"
|
|
545
|
+
if _singleton_cols:
|
|
546
|
+
n_pooled = len(_singleton_cols)
|
|
547
|
+
if n_pooled >= 2:
|
|
548
|
+
pooled_weights = generate_bootstrap_weights_batch_numpy(
|
|
549
|
+
n_bootstrap, n_pooled, weight_type, rng
|
|
550
|
+
)
|
|
551
|
+
# No FPC scaling for pooled singletons (conservative)
|
|
552
|
+
pooled_cols = np.array(_singleton_cols)
|
|
553
|
+
weights[:, pooled_cols] = pooled_weights
|
|
554
|
+
else:
|
|
555
|
+
# Single singleton — cannot pool, zero weight (library-specific
|
|
556
|
+
# fallback; bootstrap adjust with one singleton = remove).
|
|
557
|
+
import warnings
|
|
558
|
+
|
|
559
|
+
warnings.warn(
|
|
560
|
+
"lonely_psu='adjust' with only 1 singleton stratum in "
|
|
561
|
+
"bootstrap: singleton PSU contributes zero variance "
|
|
562
|
+
"(same as 'remove'). At least 2 singleton strata are "
|
|
563
|
+
"needed for pooled pseudo-stratum bootstrap.",
|
|
564
|
+
UserWarning,
|
|
565
|
+
stacklevel=3,
|
|
566
|
+
)
|
|
567
|
+
weights[:, _singleton_cols[0]] = 0.0
|
|
568
|
+
|
|
539
569
|
return weights, psu_ids
|
|
540
570
|
|
|
541
571
|
|
|
@@ -553,6 +583,9 @@ def generate_rao_wu_weights(
|
|
|
553
583
|
With FPC: ``m_h = max(1, round((1 - f_h) * (n_h - 1)))``
|
|
554
584
|
(Rao, Wu & Yue 1992, Section 3).
|
|
555
585
|
|
|
586
|
+
For ``lonely_psu="adjust"``, singleton PSUs are pooled into a combined
|
|
587
|
+
pseudo-stratum and resampled together (no FPC scaling on pooled group).
|
|
588
|
+
|
|
556
589
|
Parameters
|
|
557
590
|
----------
|
|
558
591
|
resolved_survey : ResolvedSurveyDesign
|
|
@@ -570,11 +603,7 @@ def generate_rao_wu_weights(
|
|
|
570
603
|
psu = resolved_survey.psu
|
|
571
604
|
strata = resolved_survey.strata
|
|
572
605
|
|
|
573
|
-
|
|
574
|
-
raise NotImplementedError(
|
|
575
|
-
"lonely_psu='adjust' is not yet supported for survey-aware bootstrap. "
|
|
576
|
-
"Use lonely_psu='remove' or 'certainty', or use analytical inference."
|
|
577
|
-
)
|
|
606
|
+
_lonely_psu_rw = resolved_survey.lonely_psu
|
|
578
607
|
|
|
579
608
|
rescaled = np.zeros(n_obs, dtype=np.float64)
|
|
580
609
|
|
|
@@ -589,14 +618,20 @@ def generate_rao_wu_weights(
|
|
|
589
618
|
unique_strata = np.unique(strata)
|
|
590
619
|
strata_masks = [strata == h for h in unique_strata]
|
|
591
620
|
|
|
621
|
+
# Collect singleton PSUs for "adjust" pooling
|
|
622
|
+
_singleton_info = [] # list of (mask_h, unique_psu_h) tuples
|
|
623
|
+
|
|
592
624
|
for mask_h in strata_masks:
|
|
593
625
|
psu_h = obs_psu[mask_h]
|
|
594
626
|
unique_psu_h = np.unique(psu_h)
|
|
595
627
|
n_h = len(unique_psu_h)
|
|
596
628
|
|
|
597
629
|
if n_h < 2:
|
|
598
|
-
|
|
599
|
-
|
|
630
|
+
if _lonely_psu_rw == "adjust":
|
|
631
|
+
_singleton_info.append((mask_h, unique_psu_h))
|
|
632
|
+
else:
|
|
633
|
+
# remove / certainty — keep original weights (zero variance)
|
|
634
|
+
rescaled[mask_h] = base_weights[mask_h]
|
|
600
635
|
continue
|
|
601
636
|
|
|
602
637
|
# Compute resample size
|
|
@@ -629,6 +664,41 @@ def generate_rao_wu_weights(
|
|
|
629
664
|
local_indices = np.array([psu_to_local[int(obs_psu[idx])] for idx in obs_in_h])
|
|
630
665
|
rescaled[obs_in_h] = base_weights[obs_in_h] * scale_per_psu[local_indices]
|
|
631
666
|
|
|
667
|
+
# Pool singleton PSUs into a pseudo-stratum for "adjust"
|
|
668
|
+
if _singleton_info:
|
|
669
|
+
# Combine all singleton PSUs into one group
|
|
670
|
+
pooled_psus = np.concatenate([p for _, p in _singleton_info])
|
|
671
|
+
n_pooled = len(pooled_psus)
|
|
672
|
+
|
|
673
|
+
if n_pooled >= 2:
|
|
674
|
+
m_pooled = n_pooled - 1 # No FPC for pooled singletons
|
|
675
|
+
drawn = rng.choice(n_pooled, size=m_pooled, replace=True)
|
|
676
|
+
counts = np.bincount(drawn, minlength=n_pooled)
|
|
677
|
+
scale_per_psu = (n_pooled / m_pooled) * counts.astype(np.float64)
|
|
678
|
+
|
|
679
|
+
# Build PSU → scale mapping and apply
|
|
680
|
+
psu_scale_map = {int(pooled_psus[i]): scale_per_psu[i] for i in range(n_pooled)}
|
|
681
|
+
for mask_h, _ in _singleton_info:
|
|
682
|
+
obs_in_h = np.where(mask_h)[0]
|
|
683
|
+
for idx in obs_in_h:
|
|
684
|
+
p = int(obs_psu[idx])
|
|
685
|
+
rescaled[idx] = base_weights[idx] * psu_scale_map.get(p, 1.0)
|
|
686
|
+
else:
|
|
687
|
+
# Single singleton — cannot pool, keep base weights (library-specific
|
|
688
|
+
# fallback; bootstrap adjust with one singleton = remove).
|
|
689
|
+
import warnings
|
|
690
|
+
|
|
691
|
+
warnings.warn(
|
|
692
|
+
"lonely_psu='adjust' with only 1 singleton stratum in "
|
|
693
|
+
"bootstrap: singleton PSU contributes zero variance "
|
|
694
|
+
"(same as 'remove'). At least 2 singleton strata are "
|
|
695
|
+
"needed for pooled pseudo-stratum bootstrap.",
|
|
696
|
+
UserWarning,
|
|
697
|
+
stacklevel=2,
|
|
698
|
+
)
|
|
699
|
+
for mask_h, _ in _singleton_info:
|
|
700
|
+
rescaled[mask_h] = base_weights[mask_h]
|
|
701
|
+
|
|
632
702
|
return rescaled
|
|
633
703
|
|
|
634
704
|
|
|
@@ -154,6 +154,15 @@ class ContinuousDiDResults:
|
|
|
154
154
|
f"n_periods={len(self.time_periods)})"
|
|
155
155
|
)
|
|
156
156
|
|
|
157
|
+
@property
|
|
158
|
+
def coef_var(self) -> float:
|
|
159
|
+
"""Coefficient of variation: SE / |overall ATT|. NaN when ATT is 0 or SE non-finite."""
|
|
160
|
+
if not (np.isfinite(self.overall_att_se) and self.overall_att_se >= 0):
|
|
161
|
+
return np.nan
|
|
162
|
+
if not np.isfinite(self.overall_att) or self.overall_att == 0:
|
|
163
|
+
return np.nan
|
|
164
|
+
return self.overall_att_se / abs(self.overall_att)
|
|
165
|
+
|
|
157
166
|
def summary(self, alpha: Optional[float] = None) -> str:
|
|
158
167
|
"""Generate formatted summary."""
|
|
159
168
|
alpha = alpha or self.alpha
|
|
@@ -223,10 +232,15 @@ class ContinuousDiDResults:
|
|
|
223
232
|
f"[{self.overall_att_conf_int[0]:.4f}, {self.overall_att_conf_int[1]:.4f}]",
|
|
224
233
|
f"{conf_level}% CI for ACRT_glob: "
|
|
225
234
|
f"[{self.overall_acrt_conf_int[0]:.4f}, {self.overall_acrt_conf_int[1]:.4f}]",
|
|
226
|
-
"",
|
|
227
235
|
]
|
|
228
236
|
)
|
|
229
237
|
|
|
238
|
+
cv = self.coef_var
|
|
239
|
+
if np.isfinite(cv):
|
|
240
|
+
lines.append(f"{'CV (SE/|ATT|):':<25} {cv:>10.4f}")
|
|
241
|
+
|
|
242
|
+
lines.append("")
|
|
243
|
+
|
|
230
244
|
# Dose-response curve summary (first/mid/last points)
|
|
231
245
|
if len(self.dose_grid) > 0:
|
|
232
246
|
lines.extend(
|
|
@@ -172,6 +172,15 @@ class EfficientDiDResults:
|
|
|
172
172
|
f"n_periods={len(self.time_periods)})"
|
|
173
173
|
)
|
|
174
174
|
|
|
175
|
+
@property
|
|
176
|
+
def coef_var(self) -> float:
|
|
177
|
+
"""Coefficient of variation: SE / |overall ATT|. NaN when ATT is 0 or SE non-finite."""
|
|
178
|
+
if not (np.isfinite(self.overall_se) and self.overall_se >= 0):
|
|
179
|
+
return np.nan
|
|
180
|
+
if not np.isfinite(self.overall_att) or self.overall_att == 0:
|
|
181
|
+
return np.nan
|
|
182
|
+
return self.overall_se / abs(self.overall_att)
|
|
183
|
+
|
|
175
184
|
def summary(self, alpha: Optional[float] = None) -> str:
|
|
176
185
|
"""Generate formatted summary of estimation results."""
|
|
177
186
|
alpha = alpha or self.alpha
|
|
@@ -219,10 +228,15 @@ class EfficientDiDResults:
|
|
|
219
228
|
"",
|
|
220
229
|
f"{conf_level}% Confidence Interval: "
|
|
221
230
|
f"[{self.overall_conf_int[0]:.4f}, {self.overall_conf_int[1]:.4f}]",
|
|
222
|
-
"",
|
|
223
231
|
]
|
|
224
232
|
)
|
|
225
233
|
|
|
234
|
+
cv = self.coef_var
|
|
235
|
+
if np.isfinite(cv):
|
|
236
|
+
lines.append(f"{'CV (SE/|ATT|):':<25} {cv:>10.4f}")
|
|
237
|
+
|
|
238
|
+
lines.append("")
|
|
239
|
+
|
|
226
240
|
# Event study effects
|
|
227
241
|
if self.event_study_effects:
|
|
228
242
|
lines.extend(
|
|
@@ -240,14 +240,14 @@ class DifferenceInDifferences:
|
|
|
240
240
|
resolved_survey, survey_weights, survey_weight_type, survey_metadata = (
|
|
241
241
|
_resolve_survey_for_fit(survey_design, data, self.inference)
|
|
242
242
|
)
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
"
|
|
249
|
-
"
|
|
250
|
-
"
|
|
243
|
+
_uses_replicate = (
|
|
244
|
+
resolved_survey is not None and resolved_survey.uses_replicate_variance
|
|
245
|
+
)
|
|
246
|
+
if _uses_replicate and self.inference == "wild_bootstrap":
|
|
247
|
+
raise ValueError(
|
|
248
|
+
"Cannot use inference='wild_bootstrap' with replicate-weight "
|
|
249
|
+
"survey designs. Replicate weights provide their own variance "
|
|
250
|
+
"estimation."
|
|
251
251
|
)
|
|
252
252
|
|
|
253
253
|
# Handle absorbed fixed effects (within-transformation)
|
|
@@ -358,6 +358,13 @@ class DifferenceInDifferences:
|
|
|
358
358
|
)
|
|
359
359
|
survey_metadata = compute_survey_metadata(resolved_survey, raw_w)
|
|
360
360
|
|
|
361
|
+
# When absorb + replicate: pass survey_design=None to prevent
|
|
362
|
+
# LinearRegression from computing replicate vcov on already-demeaned
|
|
363
|
+
# data (demeaning depends on weights, so replicate refits must re-demean).
|
|
364
|
+
_lr_survey = resolved_survey
|
|
365
|
+
if _uses_replicate and absorbed_vars:
|
|
366
|
+
_lr_survey = None
|
|
367
|
+
|
|
361
368
|
reg = LinearRegression(
|
|
362
369
|
include_intercept=False, # Intercept already in X
|
|
363
370
|
robust=self.robust,
|
|
@@ -366,7 +373,7 @@ class DifferenceInDifferences:
|
|
|
366
373
|
rank_deficient_action=self.rank_deficient_action,
|
|
367
374
|
weights=survey_weights,
|
|
368
375
|
weight_type=survey_weight_type,
|
|
369
|
-
survey_design=
|
|
376
|
+
survey_design=_lr_survey,
|
|
370
377
|
).fit(X, y, df_adjustment=n_absorbed_effects)
|
|
371
378
|
|
|
372
379
|
coefficients = reg.coefficients_
|
|
@@ -375,14 +382,69 @@ class DifferenceInDifferences:
|
|
|
375
382
|
assert coefficients is not None
|
|
376
383
|
att = coefficients[att_idx]
|
|
377
384
|
|
|
378
|
-
# Get inference -
|
|
379
|
-
if
|
|
385
|
+
# Get inference - replicate absorb override, bootstrap, or analytical
|
|
386
|
+
if _uses_replicate and absorbed_vars:
|
|
387
|
+
# Estimator-level replicate variance: re-demean + re-solve per replicate
|
|
388
|
+
from diff_diff.survey import compute_replicate_refit_variance
|
|
389
|
+
from diff_diff.utils import safe_inference
|
|
390
|
+
|
|
391
|
+
_absorb_list = list(absorbed_vars) # capture for closure
|
|
392
|
+
|
|
393
|
+
# Handle rank-deficient nuisance: refit only identified columns
|
|
394
|
+
_id_mask = ~np.isnan(coefficients)
|
|
395
|
+
_id_cols = np.where(_id_mask)[0]
|
|
396
|
+
_att_idx_reduced = int(np.searchsorted(_id_cols, att_idx))
|
|
397
|
+
|
|
398
|
+
def _refit_did_absorb(w_r):
|
|
399
|
+
nz = w_r > 0
|
|
400
|
+
wd = data[nz].copy()
|
|
401
|
+
w_nz = w_r[nz]
|
|
402
|
+
wd["_treat_time"] = (
|
|
403
|
+
wd[treatment].values.astype(float) * wd[time].values.astype(float)
|
|
404
|
+
)
|
|
405
|
+
vars_dm = [outcome, treatment, time, "_treat_time"] + (covariates or [])
|
|
406
|
+
for ab_var in _absorb_list:
|
|
407
|
+
wd, _ = demean_by_group(wd, vars_dm, ab_var, inplace=True, weights=w_nz)
|
|
408
|
+
y_r = wd[outcome].values.astype(float)
|
|
409
|
+
d_r = wd[treatment].values.astype(float)
|
|
410
|
+
t_r = wd[time].values.astype(float)
|
|
411
|
+
dt_r = wd["_treat_time"].values.astype(float)
|
|
412
|
+
X_r = np.column_stack([np.ones(len(y_r)), d_r, t_r, dt_r])
|
|
413
|
+
if covariates:
|
|
414
|
+
for cov in covariates:
|
|
415
|
+
X_r = np.column_stack([X_r, wd[cov].values.astype(float)])
|
|
416
|
+
coef_r, _, _ = solve_ols(
|
|
417
|
+
X_r[:, _id_cols], y_r,
|
|
418
|
+
weights=w_nz, weight_type=survey_weight_type,
|
|
419
|
+
rank_deficient_action="silent", return_vcov=False,
|
|
420
|
+
)
|
|
421
|
+
return coef_r
|
|
422
|
+
|
|
423
|
+
vcov_reduced, _n_valid_rep = compute_replicate_refit_variance(
|
|
424
|
+
_refit_did_absorb, coefficients[_id_mask], resolved_survey
|
|
425
|
+
)
|
|
426
|
+
vcov = _expand_vcov_with_nan(vcov_reduced, len(coefficients), _id_cols)
|
|
427
|
+
se = float(np.sqrt(max(vcov[att_idx, att_idx], 0.0)))
|
|
428
|
+
_df_rep = (
|
|
429
|
+
survey_metadata.df_survey
|
|
430
|
+
if survey_metadata and survey_metadata.df_survey
|
|
431
|
+
else 0 # rank-deficient replicate → NaN inference
|
|
432
|
+
)
|
|
433
|
+
if _n_valid_rep < resolved_survey.n_replicates:
|
|
434
|
+
_df_rep = _n_valid_rep - 1 if _n_valid_rep > 1 else 0
|
|
435
|
+
if survey_metadata is not None:
|
|
436
|
+
survey_metadata.df_survey = _df_rep if _df_rep > 0 else None
|
|
437
|
+
t_stat, p_value, conf_int = safe_inference(
|
|
438
|
+
att, se, alpha=self.alpha, df=_df_rep
|
|
439
|
+
)
|
|
440
|
+
elif self.inference == "wild_bootstrap" and self.cluster is not None:
|
|
380
441
|
# Override with wild cluster bootstrap inference
|
|
381
442
|
se, p_value, conf_int, t_stat, vcov, _ = self._run_wild_bootstrap_inference(
|
|
382
443
|
X, y, residuals, cluster_ids, att_idx
|
|
383
444
|
)
|
|
384
445
|
else:
|
|
385
446
|
# Use analytical inference from LinearRegression
|
|
447
|
+
# (handles replicate vcov for no-absorb path automatically)
|
|
386
448
|
vcov = reg.vcov_
|
|
387
449
|
inference = reg.get_inference(att_idx)
|
|
388
450
|
se = inference.se
|
|
@@ -1017,14 +1079,14 @@ class MultiPeriodDiD(DifferenceInDifferences):
|
|
|
1017
1079
|
resolved_survey, survey_weights, survey_weight_type, survey_metadata = (
|
|
1018
1080
|
_resolve_survey_for_fit(survey_design, data, effective_inference)
|
|
1019
1081
|
)
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
"
|
|
1026
|
-
"
|
|
1027
|
-
"
|
|
1082
|
+
_uses_replicate_mp = (
|
|
1083
|
+
resolved_survey is not None and resolved_survey.uses_replicate_variance
|
|
1084
|
+
)
|
|
1085
|
+
if _uses_replicate_mp and effective_inference == "wild_bootstrap":
|
|
1086
|
+
raise ValueError(
|
|
1087
|
+
"Cannot use inference='wild_bootstrap' with replicate-weight "
|
|
1088
|
+
"survey designs. Replicate weights provide their own variance "
|
|
1089
|
+
"estimation."
|
|
1028
1090
|
)
|
|
1029
1091
|
|
|
1030
1092
|
# Handle absorbed fixed effects (within-transformation)
|
|
@@ -1177,7 +1239,80 @@ class MultiPeriodDiD(DifferenceInDifferences):
|
|
|
1177
1239
|
)
|
|
1178
1240
|
|
|
1179
1241
|
# Compute survey vcov if applicable
|
|
1180
|
-
|
|
1242
|
+
_n_valid_rep_mp = None
|
|
1243
|
+
if _use_survey_vcov and _uses_replicate_mp and absorb:
|
|
1244
|
+
# Absorb + replicate: estimator-level refit (demeaning depends on weights)
|
|
1245
|
+
from diff_diff.survey import compute_replicate_refit_variance
|
|
1246
|
+
|
|
1247
|
+
_absorb_list_mp = list(absorb)
|
|
1248
|
+
# Handle rank-deficient nuisance: refit only identified columns
|
|
1249
|
+
_id_mask_mp = ~np.isnan(coefficients)
|
|
1250
|
+
_id_cols_mp = np.where(_id_mask_mp)[0]
|
|
1251
|
+
|
|
1252
|
+
def _refit_mp_absorb(w_r):
|
|
1253
|
+
nz = w_r > 0
|
|
1254
|
+
wd = data[nz].copy()
|
|
1255
|
+
w_nz = w_r[nz]
|
|
1256
|
+
d_raw_ = wd[treatment].values.astype(float)
|
|
1257
|
+
t_raw_ = wd[time].values
|
|
1258
|
+
wd["_did_treatment"] = d_raw_
|
|
1259
|
+
for period_ in non_ref_periods:
|
|
1260
|
+
wd[f"_did_period_{period_}"] = (t_raw_ == period_).astype(float)
|
|
1261
|
+
wd[f"_did_interact_{period_}"] = d_raw_ * (t_raw_ == period_).astype(float)
|
|
1262
|
+
vars_dm_ = (
|
|
1263
|
+
[outcome, "_did_treatment"]
|
|
1264
|
+
+ [f"_did_period_{p}" for p in non_ref_periods]
|
|
1265
|
+
+ [f"_did_interact_{p}" for p in non_ref_periods]
|
|
1266
|
+
+ (covariates or [])
|
|
1267
|
+
)
|
|
1268
|
+
for ab_var_ in _absorb_list_mp:
|
|
1269
|
+
wd, _ = demean_by_group(wd, vars_dm_, ab_var_, inplace=True, weights=w_nz)
|
|
1270
|
+
y_r = wd[outcome].values.astype(float)
|
|
1271
|
+
d_r = wd["_did_treatment"].values.astype(float)
|
|
1272
|
+
X_r = np.column_stack([np.ones(len(y_r)), d_r])
|
|
1273
|
+
for period_ in non_ref_periods:
|
|
1274
|
+
X_r = np.column_stack(
|
|
1275
|
+
[X_r, wd[f"_did_period_{period_}"].values.astype(float)]
|
|
1276
|
+
)
|
|
1277
|
+
for period_ in non_ref_periods:
|
|
1278
|
+
X_r = np.column_stack(
|
|
1279
|
+
[X_r, wd[f"_did_interact_{period_}"].values.astype(float)]
|
|
1280
|
+
)
|
|
1281
|
+
if covariates:
|
|
1282
|
+
for cov_ in covariates:
|
|
1283
|
+
X_r = np.column_stack([X_r, wd[cov_].values.astype(float)])
|
|
1284
|
+
coef_r, _, _ = solve_ols(
|
|
1285
|
+
X_r[:, _id_cols_mp], y_r,
|
|
1286
|
+
weights=w_nz, weight_type=survey_weight_type,
|
|
1287
|
+
rank_deficient_action="silent", return_vcov=False,
|
|
1288
|
+
)
|
|
1289
|
+
return coef_r
|
|
1290
|
+
|
|
1291
|
+
vcov_reduced_mp, _n_valid_rep_mp = compute_replicate_refit_variance(
|
|
1292
|
+
_refit_mp_absorb, coefficients[_id_mask_mp], resolved_survey
|
|
1293
|
+
)
|
|
1294
|
+
vcov = _expand_vcov_with_nan(vcov_reduced_mp, len(coefficients), _id_cols_mp)
|
|
1295
|
+
elif _use_survey_vcov and _uses_replicate_mp:
|
|
1296
|
+
# No absorb + replicate: X is fixed, use compute_replicate_vcov directly
|
|
1297
|
+
from diff_diff.survey import compute_replicate_vcov
|
|
1298
|
+
|
|
1299
|
+
nan_mask = np.isnan(coefficients)
|
|
1300
|
+
if np.any(nan_mask):
|
|
1301
|
+
kept_cols = np.where(~nan_mask)[0]
|
|
1302
|
+
if len(kept_cols) > 0:
|
|
1303
|
+
vcov_reduced, _n_valid_rep_mp = compute_replicate_vcov(
|
|
1304
|
+
X[:, kept_cols], y, coefficients[kept_cols], resolved_survey,
|
|
1305
|
+
weight_type=survey_weight_type,
|
|
1306
|
+
)
|
|
1307
|
+
vcov = _expand_vcov_with_nan(vcov_reduced, X.shape[1], kept_cols)
|
|
1308
|
+
else:
|
|
1309
|
+
vcov = np.full((X.shape[1], X.shape[1]), np.nan)
|
|
1310
|
+
_n_valid_rep_mp = 0
|
|
1311
|
+
else:
|
|
1312
|
+
vcov, _n_valid_rep_mp = compute_replicate_vcov(
|
|
1313
|
+
X, y, coefficients, resolved_survey, weight_type=survey_weight_type,
|
|
1314
|
+
)
|
|
1315
|
+
elif _use_survey_vcov:
|
|
1181
1316
|
from diff_diff.survey import compute_survey_vcov
|
|
1182
1317
|
|
|
1183
1318
|
nan_mask = np.isnan(coefficients)
|
|
@@ -1201,9 +1336,18 @@ class MultiPeriodDiD(DifferenceInDifferences):
|
|
|
1201
1336
|
df = n_eff_df - k_effective - n_absorbed_effects
|
|
1202
1337
|
if resolved_survey is not None and resolved_survey.df_survey is not None:
|
|
1203
1338
|
df = resolved_survey.df_survey
|
|
1339
|
+
# Replicate df: rank-deficient → NaN inference; dropped replicates → n_valid-1
|
|
1340
|
+
if _uses_replicate_mp:
|
|
1341
|
+
if resolved_survey.df_survey is None:
|
|
1342
|
+
df = 0 # rank-deficient replicate → NaN inference
|
|
1343
|
+
if _n_valid_rep_mp is not None and _n_valid_rep_mp < resolved_survey.n_replicates:
|
|
1344
|
+
df = _n_valid_rep_mp - 1 if _n_valid_rep_mp > 1 else 0
|
|
1345
|
+
if survey_metadata is not None:
|
|
1346
|
+
survey_metadata.df_survey = df if df > 0 else None
|
|
1204
1347
|
|
|
1205
1348
|
# Guard: fall back to normal distribution if df is non-positive
|
|
1206
|
-
|
|
1349
|
+
# Skip for replicate designs — df=0 is intentional for NaN inference
|
|
1350
|
+
if df is not None and df <= 0 and not _uses_replicate_mp:
|
|
1207
1351
|
warnings.warn(
|
|
1208
1352
|
f"Degrees of freedom is non-positive (df={df}). "
|
|
1209
1353
|
"Using normal distribution instead of t-distribution for inference.",
|