diff-diff 2.8.2__tar.gz → 2.8.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {diff_diff-2.8.2 → diff_diff-2.8.3}/PKG-INFO +1 -1
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/__init__.py +1 -1
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/estimators.py +165 -21
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/imputation.py +395 -121
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/imputation_bootstrap.py +11 -3
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/stacked_did.py +55 -13
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/staggered.py +124 -55
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/staggered_aggregation.py +4 -11
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/sun_abraham.py +115 -11
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/survey.py +155 -2
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/trop.py +11 -7
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/trop_global.py +24 -5
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/trop_local.py +46 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/twfe.py +65 -13
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/two_stage.py +214 -24
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/two_stage_bootstrap.py +5 -2
- {diff_diff-2.8.2 → diff_diff-2.8.3}/pyproject.toml +1 -1
- {diff_diff-2.8.2 → diff_diff-2.8.3}/rust/Cargo.lock +1 -1
- {diff_diff-2.8.2 → diff_diff-2.8.3}/rust/Cargo.toml +1 -1
- {diff_diff-2.8.2 → diff_diff-2.8.3}/README.md +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/_backend.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/bacon.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/bootstrap_utils.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/continuous_did.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/continuous_did_bspline.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/continuous_did_results.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/datasets.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/diagnostics.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/efficient_did.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/efficient_did_bootstrap.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/efficient_did_covariates.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/efficient_did_results.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/efficient_did_weights.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/honest_did.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/imputation_results.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/linalg.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/power.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/practitioner.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/prep.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/prep_dgp.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/pretrends.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/results.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/stacked_did_results.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/staggered_bootstrap.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/staggered_results.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/staggered_triple_diff.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/staggered_triple_diff_results.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/synthetic_did.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/triple_diff.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/trop_results.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/two_stage_results.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/utils.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/visualization/__init__.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/visualization/_common.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/visualization/_continuous.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/visualization/_diagnostic.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/visualization/_event_study.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/visualization/_power.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/visualization/_staggered.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/diff_diff/visualization/_synthetic.py +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/rust/build.rs +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/rust/src/bootstrap.rs +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/rust/src/lib.rs +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/rust/src/linalg.rs +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/rust/src/trop.rs +0 -0
- {diff_diff-2.8.2 → diff_diff-2.8.3}/rust/src/weights.rs +0 -0
|
@@ -240,14 +240,14 @@ class DifferenceInDifferences:
|
|
|
240
240
|
resolved_survey, survey_weights, survey_weight_type, survey_metadata = (
|
|
241
241
|
_resolve_survey_for_fit(survey_design, data, self.inference)
|
|
242
242
|
)
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
"
|
|
249
|
-
"
|
|
250
|
-
"
|
|
243
|
+
_uses_replicate = (
|
|
244
|
+
resolved_survey is not None and resolved_survey.uses_replicate_variance
|
|
245
|
+
)
|
|
246
|
+
if _uses_replicate and self.inference == "wild_bootstrap":
|
|
247
|
+
raise ValueError(
|
|
248
|
+
"Cannot use inference='wild_bootstrap' with replicate-weight "
|
|
249
|
+
"survey designs. Replicate weights provide their own variance "
|
|
250
|
+
"estimation."
|
|
251
251
|
)
|
|
252
252
|
|
|
253
253
|
# Handle absorbed fixed effects (within-transformation)
|
|
@@ -358,6 +358,13 @@ class DifferenceInDifferences:
|
|
|
358
358
|
)
|
|
359
359
|
survey_metadata = compute_survey_metadata(resolved_survey, raw_w)
|
|
360
360
|
|
|
361
|
+
# When absorb + replicate: pass survey_design=None to prevent
|
|
362
|
+
# LinearRegression from computing replicate vcov on already-demeaned
|
|
363
|
+
# data (demeaning depends on weights, so replicate refits must re-demean).
|
|
364
|
+
_lr_survey = resolved_survey
|
|
365
|
+
if _uses_replicate and absorbed_vars:
|
|
366
|
+
_lr_survey = None
|
|
367
|
+
|
|
361
368
|
reg = LinearRegression(
|
|
362
369
|
include_intercept=False, # Intercept already in X
|
|
363
370
|
robust=self.robust,
|
|
@@ -366,7 +373,7 @@ class DifferenceInDifferences:
|
|
|
366
373
|
rank_deficient_action=self.rank_deficient_action,
|
|
367
374
|
weights=survey_weights,
|
|
368
375
|
weight_type=survey_weight_type,
|
|
369
|
-
survey_design=
|
|
376
|
+
survey_design=_lr_survey,
|
|
370
377
|
).fit(X, y, df_adjustment=n_absorbed_effects)
|
|
371
378
|
|
|
372
379
|
coefficients = reg.coefficients_
|
|
@@ -375,14 +382,69 @@ class DifferenceInDifferences:
|
|
|
375
382
|
assert coefficients is not None
|
|
376
383
|
att = coefficients[att_idx]
|
|
377
384
|
|
|
378
|
-
# Get inference -
|
|
379
|
-
if
|
|
385
|
+
# Get inference - replicate absorb override, bootstrap, or analytical
|
|
386
|
+
if _uses_replicate and absorbed_vars:
|
|
387
|
+
# Estimator-level replicate variance: re-demean + re-solve per replicate
|
|
388
|
+
from diff_diff.survey import compute_replicate_refit_variance
|
|
389
|
+
from diff_diff.utils import safe_inference
|
|
390
|
+
|
|
391
|
+
_absorb_list = list(absorbed_vars) # capture for closure
|
|
392
|
+
|
|
393
|
+
# Handle rank-deficient nuisance: refit only identified columns
|
|
394
|
+
_id_mask = ~np.isnan(coefficients)
|
|
395
|
+
_id_cols = np.where(_id_mask)[0]
|
|
396
|
+
_att_idx_reduced = int(np.searchsorted(_id_cols, att_idx))
|
|
397
|
+
|
|
398
|
+
def _refit_did_absorb(w_r):
|
|
399
|
+
nz = w_r > 0
|
|
400
|
+
wd = data[nz].copy()
|
|
401
|
+
w_nz = w_r[nz]
|
|
402
|
+
wd["_treat_time"] = (
|
|
403
|
+
wd[treatment].values.astype(float) * wd[time].values.astype(float)
|
|
404
|
+
)
|
|
405
|
+
vars_dm = [outcome, treatment, time, "_treat_time"] + (covariates or [])
|
|
406
|
+
for ab_var in _absorb_list:
|
|
407
|
+
wd, _ = demean_by_group(wd, vars_dm, ab_var, inplace=True, weights=w_nz)
|
|
408
|
+
y_r = wd[outcome].values.astype(float)
|
|
409
|
+
d_r = wd[treatment].values.astype(float)
|
|
410
|
+
t_r = wd[time].values.astype(float)
|
|
411
|
+
dt_r = wd["_treat_time"].values.astype(float)
|
|
412
|
+
X_r = np.column_stack([np.ones(len(y_r)), d_r, t_r, dt_r])
|
|
413
|
+
if covariates:
|
|
414
|
+
for cov in covariates:
|
|
415
|
+
X_r = np.column_stack([X_r, wd[cov].values.astype(float)])
|
|
416
|
+
coef_r, _, _ = solve_ols(
|
|
417
|
+
X_r[:, _id_cols], y_r,
|
|
418
|
+
weights=w_nz, weight_type=survey_weight_type,
|
|
419
|
+
rank_deficient_action="silent", return_vcov=False,
|
|
420
|
+
)
|
|
421
|
+
return coef_r
|
|
422
|
+
|
|
423
|
+
vcov_reduced, _n_valid_rep = compute_replicate_refit_variance(
|
|
424
|
+
_refit_did_absorb, coefficients[_id_mask], resolved_survey
|
|
425
|
+
)
|
|
426
|
+
vcov = _expand_vcov_with_nan(vcov_reduced, len(coefficients), _id_cols)
|
|
427
|
+
se = float(np.sqrt(max(vcov[att_idx, att_idx], 0.0)))
|
|
428
|
+
_df_rep = (
|
|
429
|
+
survey_metadata.df_survey
|
|
430
|
+
if survey_metadata and survey_metadata.df_survey
|
|
431
|
+
else 0 # rank-deficient replicate → NaN inference
|
|
432
|
+
)
|
|
433
|
+
if _n_valid_rep < resolved_survey.n_replicates:
|
|
434
|
+
_df_rep = _n_valid_rep - 1 if _n_valid_rep > 1 else 0
|
|
435
|
+
if survey_metadata is not None:
|
|
436
|
+
survey_metadata.df_survey = _df_rep if _df_rep > 0 else None
|
|
437
|
+
t_stat, p_value, conf_int = safe_inference(
|
|
438
|
+
att, se, alpha=self.alpha, df=_df_rep
|
|
439
|
+
)
|
|
440
|
+
elif self.inference == "wild_bootstrap" and self.cluster is not None:
|
|
380
441
|
# Override with wild cluster bootstrap inference
|
|
381
442
|
se, p_value, conf_int, t_stat, vcov, _ = self._run_wild_bootstrap_inference(
|
|
382
443
|
X, y, residuals, cluster_ids, att_idx
|
|
383
444
|
)
|
|
384
445
|
else:
|
|
385
446
|
# Use analytical inference from LinearRegression
|
|
447
|
+
# (handles replicate vcov for no-absorb path automatically)
|
|
386
448
|
vcov = reg.vcov_
|
|
387
449
|
inference = reg.get_inference(att_idx)
|
|
388
450
|
se = inference.se
|
|
@@ -1017,14 +1079,14 @@ class MultiPeriodDiD(DifferenceInDifferences):
|
|
|
1017
1079
|
resolved_survey, survey_weights, survey_weight_type, survey_metadata = (
|
|
1018
1080
|
_resolve_survey_for_fit(survey_design, data, effective_inference)
|
|
1019
1081
|
)
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
"
|
|
1026
|
-
"
|
|
1027
|
-
"
|
|
1082
|
+
_uses_replicate_mp = (
|
|
1083
|
+
resolved_survey is not None and resolved_survey.uses_replicate_variance
|
|
1084
|
+
)
|
|
1085
|
+
if _uses_replicate_mp and effective_inference == "wild_bootstrap":
|
|
1086
|
+
raise ValueError(
|
|
1087
|
+
"Cannot use inference='wild_bootstrap' with replicate-weight "
|
|
1088
|
+
"survey designs. Replicate weights provide their own variance "
|
|
1089
|
+
"estimation."
|
|
1028
1090
|
)
|
|
1029
1091
|
|
|
1030
1092
|
# Handle absorbed fixed effects (within-transformation)
|
|
@@ -1177,7 +1239,80 @@ class MultiPeriodDiD(DifferenceInDifferences):
|
|
|
1177
1239
|
)
|
|
1178
1240
|
|
|
1179
1241
|
# Compute survey vcov if applicable
|
|
1180
|
-
|
|
1242
|
+
_n_valid_rep_mp = None
|
|
1243
|
+
if _use_survey_vcov and _uses_replicate_mp and absorb:
|
|
1244
|
+
# Absorb + replicate: estimator-level refit (demeaning depends on weights)
|
|
1245
|
+
from diff_diff.survey import compute_replicate_refit_variance
|
|
1246
|
+
|
|
1247
|
+
_absorb_list_mp = list(absorb)
|
|
1248
|
+
# Handle rank-deficient nuisance: refit only identified columns
|
|
1249
|
+
_id_mask_mp = ~np.isnan(coefficients)
|
|
1250
|
+
_id_cols_mp = np.where(_id_mask_mp)[0]
|
|
1251
|
+
|
|
1252
|
+
def _refit_mp_absorb(w_r):
|
|
1253
|
+
nz = w_r > 0
|
|
1254
|
+
wd = data[nz].copy()
|
|
1255
|
+
w_nz = w_r[nz]
|
|
1256
|
+
d_raw_ = wd[treatment].values.astype(float)
|
|
1257
|
+
t_raw_ = wd[time].values
|
|
1258
|
+
wd["_did_treatment"] = d_raw_
|
|
1259
|
+
for period_ in non_ref_periods:
|
|
1260
|
+
wd[f"_did_period_{period_}"] = (t_raw_ == period_).astype(float)
|
|
1261
|
+
wd[f"_did_interact_{period_}"] = d_raw_ * (t_raw_ == period_).astype(float)
|
|
1262
|
+
vars_dm_ = (
|
|
1263
|
+
[outcome, "_did_treatment"]
|
|
1264
|
+
+ [f"_did_period_{p}" for p in non_ref_periods]
|
|
1265
|
+
+ [f"_did_interact_{p}" for p in non_ref_periods]
|
|
1266
|
+
+ (covariates or [])
|
|
1267
|
+
)
|
|
1268
|
+
for ab_var_ in _absorb_list_mp:
|
|
1269
|
+
wd, _ = demean_by_group(wd, vars_dm_, ab_var_, inplace=True, weights=w_nz)
|
|
1270
|
+
y_r = wd[outcome].values.astype(float)
|
|
1271
|
+
d_r = wd["_did_treatment"].values.astype(float)
|
|
1272
|
+
X_r = np.column_stack([np.ones(len(y_r)), d_r])
|
|
1273
|
+
for period_ in non_ref_periods:
|
|
1274
|
+
X_r = np.column_stack(
|
|
1275
|
+
[X_r, wd[f"_did_period_{period_}"].values.astype(float)]
|
|
1276
|
+
)
|
|
1277
|
+
for period_ in non_ref_periods:
|
|
1278
|
+
X_r = np.column_stack(
|
|
1279
|
+
[X_r, wd[f"_did_interact_{period_}"].values.astype(float)]
|
|
1280
|
+
)
|
|
1281
|
+
if covariates:
|
|
1282
|
+
for cov_ in covariates:
|
|
1283
|
+
X_r = np.column_stack([X_r, wd[cov_].values.astype(float)])
|
|
1284
|
+
coef_r, _, _ = solve_ols(
|
|
1285
|
+
X_r[:, _id_cols_mp], y_r,
|
|
1286
|
+
weights=w_nz, weight_type=survey_weight_type,
|
|
1287
|
+
rank_deficient_action="silent", return_vcov=False,
|
|
1288
|
+
)
|
|
1289
|
+
return coef_r
|
|
1290
|
+
|
|
1291
|
+
vcov_reduced_mp, _n_valid_rep_mp = compute_replicate_refit_variance(
|
|
1292
|
+
_refit_mp_absorb, coefficients[_id_mask_mp], resolved_survey
|
|
1293
|
+
)
|
|
1294
|
+
vcov = _expand_vcov_with_nan(vcov_reduced_mp, len(coefficients), _id_cols_mp)
|
|
1295
|
+
elif _use_survey_vcov and _uses_replicate_mp:
|
|
1296
|
+
# No absorb + replicate: X is fixed, use compute_replicate_vcov directly
|
|
1297
|
+
from diff_diff.survey import compute_replicate_vcov
|
|
1298
|
+
|
|
1299
|
+
nan_mask = np.isnan(coefficients)
|
|
1300
|
+
if np.any(nan_mask):
|
|
1301
|
+
kept_cols = np.where(~nan_mask)[0]
|
|
1302
|
+
if len(kept_cols) > 0:
|
|
1303
|
+
vcov_reduced, _n_valid_rep_mp = compute_replicate_vcov(
|
|
1304
|
+
X[:, kept_cols], y, coefficients[kept_cols], resolved_survey,
|
|
1305
|
+
weight_type=survey_weight_type,
|
|
1306
|
+
)
|
|
1307
|
+
vcov = _expand_vcov_with_nan(vcov_reduced, X.shape[1], kept_cols)
|
|
1308
|
+
else:
|
|
1309
|
+
vcov = np.full((X.shape[1], X.shape[1]), np.nan)
|
|
1310
|
+
_n_valid_rep_mp = 0
|
|
1311
|
+
else:
|
|
1312
|
+
vcov, _n_valid_rep_mp = compute_replicate_vcov(
|
|
1313
|
+
X, y, coefficients, resolved_survey, weight_type=survey_weight_type,
|
|
1314
|
+
)
|
|
1315
|
+
elif _use_survey_vcov:
|
|
1181
1316
|
from diff_diff.survey import compute_survey_vcov
|
|
1182
1317
|
|
|
1183
1318
|
nan_mask = np.isnan(coefficients)
|
|
@@ -1201,9 +1336,18 @@ class MultiPeriodDiD(DifferenceInDifferences):
|
|
|
1201
1336
|
df = n_eff_df - k_effective - n_absorbed_effects
|
|
1202
1337
|
if resolved_survey is not None and resolved_survey.df_survey is not None:
|
|
1203
1338
|
df = resolved_survey.df_survey
|
|
1339
|
+
# Replicate df: rank-deficient → NaN inference; dropped replicates → n_valid-1
|
|
1340
|
+
if _uses_replicate_mp:
|
|
1341
|
+
if resolved_survey.df_survey is None:
|
|
1342
|
+
df = 0 # rank-deficient replicate → NaN inference
|
|
1343
|
+
if _n_valid_rep_mp is not None and _n_valid_rep_mp < resolved_survey.n_replicates:
|
|
1344
|
+
df = _n_valid_rep_mp - 1 if _n_valid_rep_mp > 1 else 0
|
|
1345
|
+
if survey_metadata is not None:
|
|
1346
|
+
survey_metadata.df_survey = df if df > 0 else None
|
|
1204
1347
|
|
|
1205
1348
|
# Guard: fall back to normal distribution if df is non-positive
|
|
1206
|
-
|
|
1349
|
+
# Skip for replicate designs — df=0 is intentional for NaN inference
|
|
1350
|
+
if df is not None and df <= 0 and not _uses_replicate_mp:
|
|
1207
1351
|
warnings.warn(
|
|
1208
1352
|
f"Degrees of freedom is non-positive (df={df}). "
|
|
1209
1353
|
"Using normal distribution instead of t-distribution for inference.",
|