diff-diff 2.8.3__tar.gz → 2.8.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {diff_diff-2.8.3 → diff_diff-2.8.4}/PKG-INFO +1 -1
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/__init__.py +3 -1
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/bootstrap_utils.py +84 -14
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/continuous_did_results.py +15 -1
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/efficient_did_results.py +15 -1
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/imputation.py +247 -72
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/imputation_bootstrap.py +6 -4
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/imputation_results.py +17 -2
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/prep.py +75 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/prep_dgp.py +16 -1
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/results.py +39 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/stacked_did_results.py +15 -1
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/staggered_results.py +25 -13
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/staggered_triple_diff_results.py +20 -17
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/sun_abraham.py +57 -16
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/survey.py +100 -7
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/trop_results.py +13 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/two_stage.py +69 -15
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/two_stage_results.py +15 -1
- {diff_diff-2.8.3 → diff_diff-2.8.4}/pyproject.toml +1 -1
- {diff_diff-2.8.3 → diff_diff-2.8.4}/rust/Cargo.lock +3 -3
- {diff_diff-2.8.3 → diff_diff-2.8.4}/rust/Cargo.toml +1 -1
- {diff_diff-2.8.3 → diff_diff-2.8.4}/README.md +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/_backend.py +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/bacon.py +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/continuous_did.py +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/continuous_did_bspline.py +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/datasets.py +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/diagnostics.py +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/efficient_did.py +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/efficient_did_bootstrap.py +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/efficient_did_covariates.py +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/efficient_did_weights.py +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/estimators.py +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/honest_did.py +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/linalg.py +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/power.py +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/practitioner.py +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/pretrends.py +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/stacked_did.py +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/staggered.py +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/staggered_aggregation.py +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/staggered_bootstrap.py +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/staggered_triple_diff.py +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/synthetic_did.py +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/triple_diff.py +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/trop.py +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/trop_global.py +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/trop_local.py +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/twfe.py +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/two_stage_bootstrap.py +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/utils.py +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/visualization/__init__.py +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/visualization/_common.py +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/visualization/_continuous.py +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/visualization/_diagnostic.py +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/visualization/_event_study.py +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/visualization/_power.py +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/visualization/_staggered.py +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/visualization/_synthetic.py +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/rust/build.rs +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/rust/src/bootstrap.rs +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/rust/src/lib.rs +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/rust/src/linalg.rs +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/rust/src/trop.rs +0 -0
- {diff_diff-2.8.3 → diff_diff-2.8.4}/rust/src/weights.rs +0 -0
|
@@ -94,6 +94,7 @@ from diff_diff.prep import (
|
|
|
94
94
|
make_treatment_indicator,
|
|
95
95
|
rank_control_units,
|
|
96
96
|
summarize_did_data,
|
|
97
|
+
trim_weights,
|
|
97
98
|
validate_did_data,
|
|
98
99
|
wide_to_long,
|
|
99
100
|
)
|
|
@@ -210,7 +211,7 @@ Stacked = StackedDiD
|
|
|
210
211
|
Bacon = BaconDecomposition
|
|
211
212
|
EDiD = EfficientDiD
|
|
212
213
|
|
|
213
|
-
__version__ = "2.8.
|
|
214
|
+
__version__ = "2.8.4"
|
|
214
215
|
__all__ = [
|
|
215
216
|
# Estimators
|
|
216
217
|
"DifferenceInDifferences",
|
|
@@ -307,6 +308,7 @@ __all__ = [
|
|
|
307
308
|
"make_post_indicator",
|
|
308
309
|
"wide_to_long",
|
|
309
310
|
"balance_panel",
|
|
311
|
+
"trim_weights",
|
|
310
312
|
"validate_did_data",
|
|
311
313
|
"summarize_did_data",
|
|
312
314
|
"generate_did_data",
|
|
@@ -433,6 +433,10 @@ def generate_survey_multiplier_weights_batch(
|
|
|
433
433
|
is present, weights are scaled by ``sqrt(1 - f_h)`` per stratum so
|
|
434
434
|
the bootstrap variance matches the TSL variance.
|
|
435
435
|
|
|
436
|
+
For ``lonely_psu="adjust"``, singleton PSUs from different strata are
|
|
437
|
+
pooled into a combined pseudo-stratum and weights are generated for
|
|
438
|
+
the pooled group (no FPC scaling on pooled singletons).
|
|
439
|
+
|
|
436
440
|
Parameters
|
|
437
441
|
----------
|
|
438
442
|
n_bootstrap : int
|
|
@@ -454,11 +458,7 @@ def generate_survey_multiplier_weights_batch(
|
|
|
454
458
|
psu = resolved_survey.psu
|
|
455
459
|
strata = resolved_survey.strata
|
|
456
460
|
|
|
457
|
-
|
|
458
|
-
raise NotImplementedError(
|
|
459
|
-
"lonely_psu='adjust' is not yet supported for survey-aware bootstrap. "
|
|
460
|
-
"Use lonely_psu='remove' or 'certainty', or use analytical inference."
|
|
461
|
-
)
|
|
461
|
+
_lonely_psu = resolved_survey.lonely_psu
|
|
462
462
|
|
|
463
463
|
if psu is None:
|
|
464
464
|
# Each observation is its own PSU
|
|
@@ -499,6 +499,7 @@ def generate_survey_multiplier_weights_batch(
|
|
|
499
499
|
psu_to_col = {int(p): i for i, p in enumerate(psu_ids)}
|
|
500
500
|
|
|
501
501
|
unique_strata = np.unique(strata)
|
|
502
|
+
_singleton_cols = [] # For lonely_psu="adjust" pooling
|
|
502
503
|
for h in unique_strata:
|
|
503
504
|
mask_h = strata == h
|
|
504
505
|
|
|
@@ -511,8 +512,12 @@ def generate_survey_multiplier_weights_batch(
|
|
|
511
512
|
cols = np.array([psu_to_col[int(p)] for p in psus_in_h])
|
|
512
513
|
|
|
513
514
|
if n_h < 2:
|
|
514
|
-
|
|
515
|
-
|
|
515
|
+
if _lonely_psu == "adjust":
|
|
516
|
+
# Collect for pooled pseudo-stratum processing
|
|
517
|
+
_singleton_cols.extend(cols.tolist())
|
|
518
|
+
else:
|
|
519
|
+
# remove / certainty — zero weight
|
|
520
|
+
weights[:, cols] = 0.0
|
|
516
521
|
continue
|
|
517
522
|
|
|
518
523
|
# Generate weights for this stratum
|
|
@@ -536,6 +541,31 @@ def generate_survey_multiplier_weights_batch(
|
|
|
536
541
|
|
|
537
542
|
weights[:, cols] = stratum_weights
|
|
538
543
|
|
|
544
|
+
# Pool singleton PSUs into a pseudo-stratum for "adjust"
|
|
545
|
+
if _singleton_cols:
|
|
546
|
+
n_pooled = len(_singleton_cols)
|
|
547
|
+
if n_pooled >= 2:
|
|
548
|
+
pooled_weights = generate_bootstrap_weights_batch_numpy(
|
|
549
|
+
n_bootstrap, n_pooled, weight_type, rng
|
|
550
|
+
)
|
|
551
|
+
# No FPC scaling for pooled singletons (conservative)
|
|
552
|
+
pooled_cols = np.array(_singleton_cols)
|
|
553
|
+
weights[:, pooled_cols] = pooled_weights
|
|
554
|
+
else:
|
|
555
|
+
# Single singleton — cannot pool, zero weight (library-specific
|
|
556
|
+
# fallback; bootstrap adjust with one singleton = remove).
|
|
557
|
+
import warnings
|
|
558
|
+
|
|
559
|
+
warnings.warn(
|
|
560
|
+
"lonely_psu='adjust' with only 1 singleton stratum in "
|
|
561
|
+
"bootstrap: singleton PSU contributes zero variance "
|
|
562
|
+
"(same as 'remove'). At least 2 singleton strata are "
|
|
563
|
+
"needed for pooled pseudo-stratum bootstrap.",
|
|
564
|
+
UserWarning,
|
|
565
|
+
stacklevel=3,
|
|
566
|
+
)
|
|
567
|
+
weights[:, _singleton_cols[0]] = 0.0
|
|
568
|
+
|
|
539
569
|
return weights, psu_ids
|
|
540
570
|
|
|
541
571
|
|
|
@@ -553,6 +583,9 @@ def generate_rao_wu_weights(
|
|
|
553
583
|
With FPC: ``m_h = max(1, round((1 - f_h) * (n_h - 1)))``
|
|
554
584
|
(Rao, Wu & Yue 1992, Section 3).
|
|
555
585
|
|
|
586
|
+
For ``lonely_psu="adjust"``, singleton PSUs are pooled into a combined
|
|
587
|
+
pseudo-stratum and resampled together (no FPC scaling on pooled group).
|
|
588
|
+
|
|
556
589
|
Parameters
|
|
557
590
|
----------
|
|
558
591
|
resolved_survey : ResolvedSurveyDesign
|
|
@@ -570,11 +603,7 @@ def generate_rao_wu_weights(
|
|
|
570
603
|
psu = resolved_survey.psu
|
|
571
604
|
strata = resolved_survey.strata
|
|
572
605
|
|
|
573
|
-
|
|
574
|
-
raise NotImplementedError(
|
|
575
|
-
"lonely_psu='adjust' is not yet supported for survey-aware bootstrap. "
|
|
576
|
-
"Use lonely_psu='remove' or 'certainty', or use analytical inference."
|
|
577
|
-
)
|
|
606
|
+
_lonely_psu_rw = resolved_survey.lonely_psu
|
|
578
607
|
|
|
579
608
|
rescaled = np.zeros(n_obs, dtype=np.float64)
|
|
580
609
|
|
|
@@ -589,14 +618,20 @@ def generate_rao_wu_weights(
|
|
|
589
618
|
unique_strata = np.unique(strata)
|
|
590
619
|
strata_masks = [strata == h for h in unique_strata]
|
|
591
620
|
|
|
621
|
+
# Collect singleton PSUs for "adjust" pooling
|
|
622
|
+
_singleton_info = [] # list of (mask_h, unique_psu_h) tuples
|
|
623
|
+
|
|
592
624
|
for mask_h in strata_masks:
|
|
593
625
|
psu_h = obs_psu[mask_h]
|
|
594
626
|
unique_psu_h = np.unique(psu_h)
|
|
595
627
|
n_h = len(unique_psu_h)
|
|
596
628
|
|
|
597
629
|
if n_h < 2:
|
|
598
|
-
|
|
599
|
-
|
|
630
|
+
if _lonely_psu_rw == "adjust":
|
|
631
|
+
_singleton_info.append((mask_h, unique_psu_h))
|
|
632
|
+
else:
|
|
633
|
+
# remove / certainty — keep original weights (zero variance)
|
|
634
|
+
rescaled[mask_h] = base_weights[mask_h]
|
|
600
635
|
continue
|
|
601
636
|
|
|
602
637
|
# Compute resample size
|
|
@@ -629,6 +664,41 @@ def generate_rao_wu_weights(
|
|
|
629
664
|
local_indices = np.array([psu_to_local[int(obs_psu[idx])] for idx in obs_in_h])
|
|
630
665
|
rescaled[obs_in_h] = base_weights[obs_in_h] * scale_per_psu[local_indices]
|
|
631
666
|
|
|
667
|
+
# Pool singleton PSUs into a pseudo-stratum for "adjust"
|
|
668
|
+
if _singleton_info:
|
|
669
|
+
# Combine all singleton PSUs into one group
|
|
670
|
+
pooled_psus = np.concatenate([p for _, p in _singleton_info])
|
|
671
|
+
n_pooled = len(pooled_psus)
|
|
672
|
+
|
|
673
|
+
if n_pooled >= 2:
|
|
674
|
+
m_pooled = n_pooled - 1 # No FPC for pooled singletons
|
|
675
|
+
drawn = rng.choice(n_pooled, size=m_pooled, replace=True)
|
|
676
|
+
counts = np.bincount(drawn, minlength=n_pooled)
|
|
677
|
+
scale_per_psu = (n_pooled / m_pooled) * counts.astype(np.float64)
|
|
678
|
+
|
|
679
|
+
# Build PSU → scale mapping and apply
|
|
680
|
+
psu_scale_map = {int(pooled_psus[i]): scale_per_psu[i] for i in range(n_pooled)}
|
|
681
|
+
for mask_h, _ in _singleton_info:
|
|
682
|
+
obs_in_h = np.where(mask_h)[0]
|
|
683
|
+
for idx in obs_in_h:
|
|
684
|
+
p = int(obs_psu[idx])
|
|
685
|
+
rescaled[idx] = base_weights[idx] * psu_scale_map.get(p, 1.0)
|
|
686
|
+
else:
|
|
687
|
+
# Single singleton — cannot pool, keep base weights (library-specific
|
|
688
|
+
# fallback; bootstrap adjust with one singleton = remove).
|
|
689
|
+
import warnings
|
|
690
|
+
|
|
691
|
+
warnings.warn(
|
|
692
|
+
"lonely_psu='adjust' with only 1 singleton stratum in "
|
|
693
|
+
"bootstrap: singleton PSU contributes zero variance "
|
|
694
|
+
"(same as 'remove'). At least 2 singleton strata are "
|
|
695
|
+
"needed for pooled pseudo-stratum bootstrap.",
|
|
696
|
+
UserWarning,
|
|
697
|
+
stacklevel=2,
|
|
698
|
+
)
|
|
699
|
+
for mask_h, _ in _singleton_info:
|
|
700
|
+
rescaled[mask_h] = base_weights[mask_h]
|
|
701
|
+
|
|
632
702
|
return rescaled
|
|
633
703
|
|
|
634
704
|
|
|
@@ -154,6 +154,15 @@ class ContinuousDiDResults:
|
|
|
154
154
|
f"n_periods={len(self.time_periods)})"
|
|
155
155
|
)
|
|
156
156
|
|
|
157
|
+
@property
|
|
158
|
+
def coef_var(self) -> float:
|
|
159
|
+
"""Coefficient of variation: SE / |overall ATT|. NaN when ATT is 0 or SE non-finite."""
|
|
160
|
+
if not (np.isfinite(self.overall_att_se) and self.overall_att_se >= 0):
|
|
161
|
+
return np.nan
|
|
162
|
+
if not np.isfinite(self.overall_att) or self.overall_att == 0:
|
|
163
|
+
return np.nan
|
|
164
|
+
return self.overall_att_se / abs(self.overall_att)
|
|
165
|
+
|
|
157
166
|
def summary(self, alpha: Optional[float] = None) -> str:
|
|
158
167
|
"""Generate formatted summary."""
|
|
159
168
|
alpha = alpha or self.alpha
|
|
@@ -223,10 +232,15 @@ class ContinuousDiDResults:
|
|
|
223
232
|
f"[{self.overall_att_conf_int[0]:.4f}, {self.overall_att_conf_int[1]:.4f}]",
|
|
224
233
|
f"{conf_level}% CI for ACRT_glob: "
|
|
225
234
|
f"[{self.overall_acrt_conf_int[0]:.4f}, {self.overall_acrt_conf_int[1]:.4f}]",
|
|
226
|
-
"",
|
|
227
235
|
]
|
|
228
236
|
)
|
|
229
237
|
|
|
238
|
+
cv = self.coef_var
|
|
239
|
+
if np.isfinite(cv):
|
|
240
|
+
lines.append(f"{'CV (SE/|ATT|):':<25} {cv:>10.4f}")
|
|
241
|
+
|
|
242
|
+
lines.append("")
|
|
243
|
+
|
|
230
244
|
# Dose-response curve summary (first/mid/last points)
|
|
231
245
|
if len(self.dose_grid) > 0:
|
|
232
246
|
lines.extend(
|
|
@@ -172,6 +172,15 @@ class EfficientDiDResults:
|
|
|
172
172
|
f"n_periods={len(self.time_periods)})"
|
|
173
173
|
)
|
|
174
174
|
|
|
175
|
+
@property
|
|
176
|
+
def coef_var(self) -> float:
|
|
177
|
+
"""Coefficient of variation: SE / |overall ATT|. NaN when ATT is 0 or SE non-finite."""
|
|
178
|
+
if not (np.isfinite(self.overall_se) and self.overall_se >= 0):
|
|
179
|
+
return np.nan
|
|
180
|
+
if not np.isfinite(self.overall_att) or self.overall_att == 0:
|
|
181
|
+
return np.nan
|
|
182
|
+
return self.overall_se / abs(self.overall_att)
|
|
183
|
+
|
|
175
184
|
def summary(self, alpha: Optional[float] = None) -> str:
|
|
176
185
|
"""Generate formatted summary of estimation results."""
|
|
177
186
|
alpha = alpha or self.alpha
|
|
@@ -219,10 +228,15 @@ class EfficientDiDResults:
|
|
|
219
228
|
"",
|
|
220
229
|
f"{conf_level}% Confidence Interval: "
|
|
221
230
|
f"[{self.overall_conf_int[0]:.4f}, {self.overall_conf_int[1]:.4f}]",
|
|
222
|
-
"",
|
|
223
231
|
]
|
|
224
232
|
)
|
|
225
233
|
|
|
234
|
+
cv = self.coef_var
|
|
235
|
+
if np.isfinite(cv):
|
|
236
|
+
lines.append(f"{'CV (SE/|ATT|):':<25} {cv:>10.4f}")
|
|
237
|
+
|
|
238
|
+
lines.append("")
|
|
239
|
+
|
|
226
240
|
# Event study effects
|
|
227
241
|
if self.event_study_effects:
|
|
228
242
|
lines.extend(
|