diff-diff 2.9.0__tar.gz → 2.9.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. {diff_diff-2.9.0 → diff_diff-2.9.1}/PKG-INFO +1 -1
  2. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/__init__.py +1 -1
  3. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/prep_dgp.py +312 -23
  4. {diff_diff-2.9.0 → diff_diff-2.9.1}/pyproject.toml +1 -1
  5. {diff_diff-2.9.0 → diff_diff-2.9.1}/rust/Cargo.lock +1 -1
  6. {diff_diff-2.9.0 → diff_diff-2.9.1}/rust/Cargo.toml +1 -1
  7. {diff_diff-2.9.0 → diff_diff-2.9.1}/README.md +0 -0
  8. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/_backend.py +0 -0
  9. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/bacon.py +0 -0
  10. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/bootstrap_utils.py +0 -0
  11. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/continuous_did.py +0 -0
  12. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/continuous_did_bspline.py +0 -0
  13. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/continuous_did_results.py +0 -0
  14. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/datasets.py +0 -0
  15. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/diagnostics.py +0 -0
  16. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/efficient_did.py +0 -0
  17. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/efficient_did_bootstrap.py +0 -0
  18. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/efficient_did_covariates.py +0 -0
  19. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/efficient_did_results.py +0 -0
  20. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/efficient_did_weights.py +0 -0
  21. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/estimators.py +0 -0
  22. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/honest_did.py +0 -0
  23. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/imputation.py +0 -0
  24. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/imputation_bootstrap.py +0 -0
  25. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/imputation_results.py +0 -0
  26. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/linalg.py +0 -0
  27. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/power.py +0 -0
  28. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/practitioner.py +0 -0
  29. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/prep.py +0 -0
  30. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/pretrends.py +0 -0
  31. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/results.py +0 -0
  32. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/stacked_did.py +0 -0
  33. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/stacked_did_results.py +0 -0
  34. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/staggered.py +0 -0
  35. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/staggered_aggregation.py +0 -0
  36. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/staggered_bootstrap.py +0 -0
  37. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/staggered_results.py +0 -0
  38. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/staggered_triple_diff.py +0 -0
  39. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/staggered_triple_diff_results.py +0 -0
  40. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/sun_abraham.py +0 -0
  41. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/survey.py +0 -0
  42. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/synthetic_did.py +0 -0
  43. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/triple_diff.py +0 -0
  44. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/trop.py +0 -0
  45. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/trop_global.py +0 -0
  46. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/trop_local.py +0 -0
  47. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/trop_results.py +0 -0
  48. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/twfe.py +0 -0
  49. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/two_stage.py +0 -0
  50. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/two_stage_bootstrap.py +0 -0
  51. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/two_stage_results.py +0 -0
  52. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/utils.py +0 -0
  53. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/visualization/__init__.py +0 -0
  54. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/visualization/_common.py +0 -0
  55. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/visualization/_continuous.py +0 -0
  56. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/visualization/_diagnostic.py +0 -0
  57. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/visualization/_event_study.py +0 -0
  58. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/visualization/_power.py +0 -0
  59. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/visualization/_staggered.py +0 -0
  60. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/visualization/_synthetic.py +0 -0
  61. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/wooldridge.py +0 -0
  62. {diff_diff-2.9.0 → diff_diff-2.9.1}/diff_diff/wooldridge_results.py +0 -0
  63. {diff_diff-2.9.0 → diff_diff-2.9.1}/rust/build.rs +0 -0
  64. {diff_diff-2.9.0 → diff_diff-2.9.1}/rust/src/bootstrap.rs +0 -0
  65. {diff_diff-2.9.0 → diff_diff-2.9.1}/rust/src/lib.rs +0 -0
  66. {diff_diff-2.9.0 → diff_diff-2.9.1}/rust/src/linalg.rs +0 -0
  67. {diff_diff-2.9.0 → diff_diff-2.9.1}/rust/src/trop.rs +0 -0
  68. {diff_diff-2.9.0 → diff_diff-2.9.1}/rust/src/weights.rs +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: diff-diff
3
- Version: 2.9.0
3
+ Version: 2.9.1
4
4
  Classifier: Development Status :: 5 - Production/Stable
5
5
  Classifier: Intended Audience :: Science/Research
6
6
  Classifier: Operating System :: OS Independent
@@ -214,7 +214,7 @@ Bacon = BaconDecomposition
214
214
  EDiD = EfficientDiD
215
215
  ETWFE = WooldridgeDiD
216
216
 
217
- __version__ = "2.9.0"
217
+ __version__ = "2.9.1"
218
218
  __all__ = [
219
219
  # Estimators
220
220
  "DifferenceInDifferences",
@@ -1129,6 +1129,37 @@ def generate_staggered_ddd_data(
1129
1129
  return pd.DataFrame(records)
1130
1130
 
1131
1131
 
1132
+ def _rank_pair_weights(
1133
+ unit_weight: np.ndarray,
1134
+ unit_stratum: np.ndarray,
1135
+ y0: np.ndarray,
1136
+ n_strata: int,
1137
+ ) -> None:
1138
+ """Rank-pair weights with Y(0) within each stratum (in-place).
1139
+
1140
+ High-outcome units receive higher weights, modeling informative sampling
1141
+ where hard-to-reach (high-outcome) subpopulations are under-covered
1142
+ and therefore carry larger inverse-selection-probability weights.
1143
+ """
1144
+ for s in range(n_strata):
1145
+ mask = unit_stratum == s
1146
+ n_s = mask.sum()
1147
+ if n_s <= 1:
1148
+ continue
1149
+ idx_s = np.where(mask)[0]
1150
+ w_vals = unit_weight[idx_s].copy()
1151
+ if w_vals.std() < 1e-10:
1152
+ # No within-stratum variation: create rank-based weights
1153
+ # scaled to preserve stratum baseline weight level
1154
+ ranks = np.argsort(np.argsort(y0[idx_s])).astype(float) + 1.0
1155
+ unit_weight[idx_s] = ranks / ranks.mean() * w_vals.mean()
1156
+ else:
1157
+ # Rank-pair: highest Y(0) gets heaviest weight
1158
+ y0_order = np.argsort(-y0[idx_s])
1159
+ w_sorted = np.sort(w_vals)[::-1] # heaviest first
1160
+ unit_weight[idx_s[y0_order]] = w_sorted
1161
+
1162
+
1132
1163
  def generate_survey_did_data(
1133
1164
  n_units: int = 200,
1134
1165
  n_periods: int = 8,
@@ -1149,6 +1180,15 @@ def generate_survey_did_data(
1149
1180
  add_covariates: bool = False,
1150
1181
  panel: bool = True,
1151
1182
  seed: Optional[int] = None,
1183
+ # --- Research-grade DGP parameters ---
1184
+ icc: Optional[float] = None,
1185
+ weight_cv: Optional[float] = None,
1186
+ informative_sampling: bool = False,
1187
+ heterogeneous_te_by_strata: bool = False,
1188
+ strata_sizes: Optional[List[int]] = None,
1189
+ return_true_population_att: bool = False,
1190
+ covariate_effects: Optional[tuple] = None,
1191
+ te_covariate_interaction: float = 0.0,
1152
1192
  ) -> pd.DataFrame:
1153
1193
  """
1154
1194
  Generate synthetic staggered DiD data with survey structure.
@@ -1215,6 +1255,52 @@ def generate_survey_did_data(
1215
1255
  CallawaySantAnna(panel=False)).
1216
1256
  seed : int, optional
1217
1257
  Random seed for reproducibility.
1258
+ icc : float, optional
1259
+ Target intra-class correlation coefficient (0 < icc < 1). Overrides
1260
+ ``psu_re_sd`` via the variance decomposition:
1261
+ ``psu_re_sd = sqrt(icc * (sigma2_unit + sigma2_noise + sigma2_cov) /
1262
+ ((1 - icc) * (1 + psu_period_factor^2)))`` where ``sigma2_cov``
1263
+ includes covariate variance when ``add_covariates=True``.
1264
+ Cannot be combined with a non-default ``psu_re_sd``.
1265
+ weight_cv : float, optional
1266
+ Target coefficient of variation for sampling weights. Generates
1267
+ LogNormal weights normalized to mean 1, bypassing ``weight_variation``.
1268
+ Cannot be combined with a non-default ``weight_variation``.
1269
+ informative_sampling : bool, default=False
1270
+ If True, sampling weights correlate with Y(0) — high-outcome units
1271
+ receive higher weights (under-coverage → larger inverse-selection-
1272
+ probability weights). Uses rank-pairing within each stratum. For
1273
+ panel data, ranking is done once from period-1 outcomes. For
1274
+ repeated cross-sections, ranking is refreshed each period. Within
1275
+ each stratum, rank-based weights are scaled to preserve the
1276
+ stratum's baseline weight level from ``weight_variation``.
1277
+ When ``add_covariates=True``, covariate contributions are
1278
+ included in the Y(0) ranking.
1279
+ heterogeneous_te_by_strata : bool, default=False
1280
+ If True, treatment effect varies by stratum:
1281
+ ``TE_h = TE * (1 + 0.5 * (h - mean) / std)``. Creates a gap
1282
+ between unweighted and population ATT. With ``n_strata=1``,
1283
+ all units receive the base ``treatment_effect``.
1284
+ strata_sizes : list of int, optional
1285
+ Custom per-stratum unit counts. Must have length ``n_strata`` and
1286
+ sum to ``n_units``. Replaces equal allocation across strata.
1287
+ return_true_population_att : bool, default=False
1288
+ If True, attaches a diagnostic dict to ``df.attrs["dgp_truth"]``
1289
+ with keys: ``population_att`` (weight-weighted average of treated
1290
+ true effects), ``deff_kish`` (1 + CV(w)^2), ``base_stratum_effects``
1291
+ (base stratum TEs before dynamic/covariate modifiers),
1292
+ ``icc_realized`` (ANOVA-based
1293
+ ICC computed on period-1 data).
1294
+ covariate_effects : tuple of (float, float), optional
1295
+ Coefficients ``(beta1, beta2)`` for covariates x1 and x2 in the
1296
+ outcome equation ``y += beta1 * x1 + beta2 * x2``. Default uses
1297
+ ``(0.5, 0.3)``. Only used when ``add_covariates=True``. The ICC
1298
+ calibration automatically adjusts for the implied covariate variance.
1299
+ te_covariate_interaction : float, default=0.0
1300
+ Coefficient for treatment-by-covariate interaction:
1301
+ ``TE_i = base_TE + te_covariate_interaction * x1_i``. Creates
1302
+ unit-level treatment effect heterogeneity driven by the continuous
1303
+ covariate. Requires ``add_covariates=True``.
1218
1304
 
1219
1305
  Returns
1220
1306
  -------
@@ -1222,6 +1308,8 @@ def generate_survey_did_data(
1222
1308
  Columns: unit, period, outcome, first_treat, treated, true_effect,
1223
1309
  stratum, psu, fpc, weight. Also rep_0..rep_K if
1224
1310
  include_replicate_weights=True, and x1, x2 if add_covariates=True.
1311
+ If ``return_true_population_att=True``, ``df.attrs["dgp_truth"]``
1312
+ contains DGP diagnostics.
1225
1313
  """
1226
1314
  rng = np.random.default_rng(seed)
1227
1315
 
@@ -1284,30 +1372,120 @@ def generate_survey_did_data(
1284
1372
  f"weight_variation must be one of {valid_wv}, got {weight_variation!r}"
1285
1373
  )
1286
1374
 
1375
+ # --- Validate research-grade DGP parameters ---
1376
+ if icc is not None:
1377
+ if not (0 < icc < 1):
1378
+ raise ValueError(f"icc must be between 0 and 1 (exclusive), got {icc}")
1379
+ if psu_re_sd != 2.0:
1380
+ raise ValueError(
1381
+ "Cannot specify both icc and a non-default psu_re_sd. "
1382
+ "icc overrides psu_re_sd via the ICC formula."
1383
+ )
1384
+
1385
+ if weight_cv is not None:
1386
+ if not np.isfinite(weight_cv) or weight_cv <= 0:
1387
+ raise ValueError(
1388
+ f"weight_cv must be finite and positive, got {weight_cv}"
1389
+ )
1390
+ if weight_variation != "moderate":
1391
+ raise ValueError(
1392
+ "Cannot specify both weight_cv and a non-default "
1393
+ "weight_variation. weight_cv overrides weight_variation."
1394
+ )
1395
+
1396
+ if strata_sizes is not None:
1397
+ strata_sizes = list(strata_sizes)
1398
+ for ss in strata_sizes:
1399
+ if isinstance(ss, bool) or not isinstance(ss, (int, np.integer)):
1400
+ raise ValueError(
1401
+ f"strata_sizes must contain integers, got {ss!r}"
1402
+ )
1403
+ if len(strata_sizes) != n_strata:
1404
+ raise ValueError(
1405
+ f"strata_sizes must have length n_strata={n_strata}, "
1406
+ f"got {len(strata_sizes)}"
1407
+ )
1408
+ if any(s < 1 for s in strata_sizes):
1409
+ raise ValueError("All strata_sizes must be >= 1")
1410
+ if sum(strata_sizes) != n_units:
1411
+ raise ValueError(
1412
+ f"strata_sizes must sum to n_units={n_units}, "
1413
+ f"got {sum(strata_sizes)}"
1414
+ )
1415
+
1416
+ # --- Validate and resolve covariate coefficients ---
1417
+ if covariate_effects is not None:
1418
+ covariate_effects = tuple(covariate_effects)
1419
+ if len(covariate_effects) != 2:
1420
+ raise ValueError(
1421
+ f"covariate_effects must have length 2, got {len(covariate_effects)}"
1422
+ )
1423
+ if not all(np.isfinite(c) for c in covariate_effects):
1424
+ raise ValueError(
1425
+ f"covariate_effects must be finite, got {covariate_effects}"
1426
+ )
1427
+ _beta1, _beta2 = covariate_effects if covariate_effects is not None else (0.5, 0.3)
1428
+
1429
+ if not np.isfinite(te_covariate_interaction):
1430
+ raise ValueError(
1431
+ f"te_covariate_interaction must be finite, got {te_covariate_interaction}"
1432
+ )
1433
+ if te_covariate_interaction != 0.0 and not add_covariates:
1434
+ raise ValueError(
1435
+ "te_covariate_interaction requires add_covariates=True"
1436
+ )
1437
+
1438
+ # --- ICC -> psu_re_sd resolution ---
1439
+ if icc is not None:
1440
+ # Covariate variance: Var(beta1*x1) + Var(beta2*x2)
1441
+ # where x1 ~ N(0,1), x2 ~ Bernoulli(0.5)
1442
+ cov_var = (_beta1**2 * 1.0 + _beta2**2 * 0.25) if add_covariates else 0.0
1443
+ non_psu_var = unit_fe_sd**2 + noise_sd**2 + cov_var
1444
+ if non_psu_var < 1e-12:
1445
+ raise ValueError(
1446
+ "icc requires non-zero non-PSU variance "
1447
+ "(unit_fe_sd, noise_sd, or add_covariates must contribute variance)"
1448
+ )
1449
+ psu_re_sd = np.sqrt(
1450
+ icc * non_psu_var
1451
+ / ((1 - icc) * (1 + psu_period_factor**2))
1452
+ )
1453
+
1287
1454
  # --- Survey structure: assign units to strata and PSUs ---
1288
1455
  n_psu_total = n_strata * psu_per_stratum
1289
- units_per_stratum = n_units // n_strata
1290
- remainder = n_units % n_strata
1456
+
1457
+ if strata_sizes is not None:
1458
+ stratum_n = strata_sizes
1459
+ else:
1460
+ units_per_stratum = n_units // n_strata
1461
+ remainder = n_units % n_strata
1462
+ stratum_n = [
1463
+ units_per_stratum + (1 if s < remainder else 0)
1464
+ for s in range(n_strata)
1465
+ ]
1291
1466
 
1292
1467
  unit_stratum = np.empty(n_units, dtype=int)
1293
1468
  unit_psu = np.empty(n_units, dtype=int)
1294
1469
  idx = 0
1295
1470
  for s in range(n_strata):
1296
- # Distribute remainder units across first strata
1297
- n_s = units_per_stratum + (1 if s < remainder else 0)
1471
+ n_s = stratum_n[s]
1298
1472
  unit_stratum[idx : idx + n_s] = s
1299
-
1300
- # Assign PSUs within this stratum
1301
1473
  psu_start = s * psu_per_stratum
1302
1474
  for j in range(n_s):
1303
1475
  unit_psu[idx + j] = psu_start + (j % psu_per_stratum)
1304
1476
  idx += n_s
1305
1477
 
1306
- # Sampling weights: vary by stratum (inverse selection probability)
1307
- scale_map = {"none": 0.0, "moderate": 1.0, "high": 3.0}
1308
- scale = scale_map.get(weight_variation, 1.0)
1309
- denom = max(n_strata - 1, 1)
1310
- unit_weight = 1.0 + scale * (unit_stratum / denom)
1478
+ # Sampling weights
1479
+ if weight_cv is not None:
1480
+ sigma_ln = np.sqrt(np.log(1 + weight_cv**2))
1481
+ raw_w = rng.lognormal(-sigma_ln**2 / 2, sigma_ln, size=n_units)
1482
+ unit_weight = raw_w / raw_w.mean()
1483
+ else:
1484
+ # Stratum-based weights (inverse selection probability)
1485
+ scale_map = {"none": 0.0, "moderate": 1.0, "high": 3.0}
1486
+ scale = scale_map.get(weight_variation, 1.0)
1487
+ denom = max(n_strata - 1, 1)
1488
+ unit_weight = 1.0 + scale * (unit_stratum / denom)
1311
1489
 
1312
1490
  # --- Treatment assignment (cohort structure) ---
1313
1491
  n_never = int(n_units * never_treated_frac)
@@ -1344,6 +1522,37 @@ def generate_survey_did_data(
1344
1522
  0, psu_re_sd * psu_period_factor, size=(n_psu_total, n_periods)
1345
1523
  )
1346
1524
 
1525
+ # --- Informative sampling (panel path): pre-draw FEs, rank-pair weights ---
1526
+ if informative_sampling and panel:
1527
+ _panel_unit_fe = rng.normal(0, unit_fe_sd, size=n_units)
1528
+ y0_period1 = (
1529
+ _panel_unit_fe
1530
+ + psu_re[unit_psu]
1531
+ + psu_period_re[unit_psu, 0]
1532
+ + 0.5
1533
+ )
1534
+ if add_covariates:
1535
+ _panel_x1 = rng.normal(0, 1, size=n_units)
1536
+ _panel_x2 = rng.choice([0, 1], size=n_units)
1537
+ y0_period1 = y0_period1 + _beta1 * _panel_x1 + _beta2 * _panel_x2
1538
+ _rank_pair_weights(unit_weight, unit_stratum, y0_period1, n_strata)
1539
+
1540
+ # Save base weights for cross-section informative sampling (reset each period)
1541
+ if informative_sampling and not panel:
1542
+ _base_weight = unit_weight.copy()
1543
+
1544
+ # --- Heterogeneous treatment effects by stratum ---
1545
+ if heterogeneous_te_by_strata:
1546
+ if n_strata == 1:
1547
+ te_by_stratum = np.array([treatment_effect])
1548
+ else:
1549
+ strata_idx = np.arange(n_strata, dtype=float)
1550
+ te_by_stratum = treatment_effect * (
1551
+ 1 + 0.5 * (strata_idx - strata_idx.mean()) / strata_idx.std()
1552
+ )
1553
+ else:
1554
+ te_by_stratum = None
1555
+
1347
1556
  # --- Generate panel or repeated cross-sections ---
1348
1557
  records = []
1349
1558
  for t in range(1, n_periods + 1):
@@ -1351,21 +1560,47 @@ def generate_survey_did_data(
1351
1560
  unit_fe = rng.normal(0, unit_fe_sd, size=n_units)
1352
1561
  if panel and t > 1:
1353
1562
  pass # reuse unit_fe from first period (set below)
1354
- if panel and t == 1:
1563
+ if informative_sampling and panel:
1564
+ unit_fe = _panel_unit_fe # use pre-drawn FEs
1565
+ elif panel and t == 1:
1355
1566
  _panel_unit_fe = unit_fe # save for reuse
1356
- if panel and t > 1:
1567
+ elif panel and t > 1:
1357
1568
  unit_fe = _panel_unit_fe # type: ignore[possibly-undefined]
1358
1569
 
1359
- x1 = rng.normal(0, 1, size=n_units) if add_covariates else None
1360
- if panel and t > 1 and add_covariates:
1570
+ # Cross-section informative sampling: re-rank weights each period
1571
+ if informative_sampling and not panel:
1572
+ # Draw covariates early so they can be included in Y(0) ranking
1573
+ if add_covariates:
1574
+ x1 = rng.normal(0, 1, size=n_units)
1575
+ x2 = rng.choice([0, 1], size=n_units)
1576
+ unit_weight = _base_weight.copy() # type: ignore[possibly-undefined]
1577
+ y0_t = (
1578
+ unit_fe
1579
+ + psu_re[unit_psu]
1580
+ + psu_period_re[unit_psu, t - 1]
1581
+ + 0.5 * t
1582
+ )
1583
+ if add_covariates:
1584
+ y0_t = y0_t + _beta1 * x1 + _beta2 * x2
1585
+ _rank_pair_weights(unit_weight, unit_stratum, y0_t, n_strata)
1586
+
1587
+ # Covariates — may already be drawn by informative sampling above
1588
+ if informative_sampling and panel and add_covariates:
1589
+ x1 = _panel_x1 # pre-drawn before loop for ranking
1590
+ x2 = _panel_x2
1591
+ elif informative_sampling and not panel and add_covariates:
1592
+ pass # x1, x2 already drawn in cross-section ranking block
1593
+ elif add_covariates:
1594
+ x1 = rng.normal(0, 1, size=n_units)
1595
+ x2 = rng.choice([0, 1], size=n_units)
1596
+ else:
1597
+ x1 = None
1598
+ x2 = None
1599
+ if not informative_sampling and panel and t > 1 and add_covariates:
1361
1600
  x1 = _panel_x1 # type: ignore[possibly-undefined]
1362
- elif panel and t == 1 and add_covariates:
1363
- _panel_x1 = x1
1364
-
1365
- x2 = rng.choice([0, 1], size=n_units) if add_covariates else None
1366
- if panel and t > 1 and add_covariates:
1367
1601
  x2 = _panel_x2 # type: ignore[possibly-undefined]
1368
- elif panel and t == 1 and add_covariates:
1602
+ elif not informative_sampling and panel and t == 1 and add_covariates:
1603
+ _panel_x1 = x1
1369
1604
  _panel_x2 = x2
1370
1605
 
1371
1606
  for i in range(n_units):
@@ -1374,12 +1609,17 @@ def generate_survey_did_data(
1374
1609
  y = unit_fe[i] + psu_re[unit_psu[i]] + psu_period_re[unit_psu[i], t - 1] + 0.5 * t
1375
1610
 
1376
1611
  if add_covariates:
1377
- y += 0.5 * x1[i] + 0.3 * x2[i]
1612
+ y += _beta1 * x1[i] + _beta2 * x2[i]
1378
1613
 
1379
1614
  treated = int(g_i > 0 and t >= g_i)
1380
1615
  true_eff = 0.0
1381
1616
  if treated:
1382
- true_eff = treatment_effect
1617
+ if te_by_stratum is not None:
1618
+ true_eff = float(te_by_stratum[unit_stratum[i]])
1619
+ else:
1620
+ true_eff = treatment_effect
1621
+ if te_covariate_interaction != 0.0:
1622
+ true_eff += te_covariate_interaction * x1[i]
1383
1623
  if dynamic_effects:
1384
1624
  true_eff *= 1 + effect_growth * (t - g_i)
1385
1625
  y += true_eff
@@ -1426,4 +1666,53 @@ def generate_survey_did_data(
1426
1666
  w_r[w_r > 0] *= n_rep / (n_rep - 1)
1427
1667
  df[f"rep_{r}"] = w_r
1428
1668
 
1669
+ # --- DGP truth diagnostics ---
1670
+ if return_true_population_att:
1671
+ treated_mask = df["treated"] == 1
1672
+ if treated_mask.any():
1673
+ w_treated = df.loc[treated_mask, "weight"].values
1674
+ te_treated = df.loc[treated_mask, "true_effect"].values
1675
+ population_att = float(np.average(te_treated, weights=w_treated))
1676
+ else:
1677
+ population_att = float("nan")
1678
+
1679
+ if te_by_stratum is not None:
1680
+ stratum_effects = {
1681
+ int(s): float(te_by_stratum[s]) for s in range(n_strata)
1682
+ }
1683
+ else:
1684
+ stratum_effects = {
1685
+ int(s): float(treatment_effect) for s in range(n_strata)
1686
+ }
1687
+
1688
+ # Kish DEFF from weight variation
1689
+ w_all = df.groupby("unit")["weight"].first().values
1690
+ cv_w = float(w_all.std() / w_all.mean()) if w_all.mean() > 0 else 0.0
1691
+ deff_kish = 1 + cv_w**2
1692
+
1693
+ # Realized ICC (ANOVA-based, period-1 only to avoid TE contamination)
1694
+ _p1 = df[df["period"] == 1]
1695
+ _groups = _p1.groupby("psu")["outcome"]
1696
+ _n_total = len(_p1)
1697
+ _n_groups = _groups.ngroups
1698
+ # ICC undefined with < 2 groups or no within-group replication
1699
+ if _n_groups < 2 or _n_total <= _n_groups:
1700
+ icc_realized = float("nan")
1701
+ else:
1702
+ _n_bar = _n_total / _n_groups
1703
+ _grand_mean = _p1["outcome"].mean()
1704
+ _ssb = (_groups.size() * (_groups.mean() - _grand_mean) ** 2).sum()
1705
+ _msb = _ssb / (_n_groups - 1)
1706
+ _ssw = _groups.apply(lambda x: ((x - x.mean()) ** 2).sum()).sum()
1707
+ _msw = _ssw / (_n_total - _n_groups)
1708
+ _denom = _msb + (_n_bar - 1) * _msw
1709
+ icc_realized = float((_msb - _msw) / _denom) if _denom > 0 else float("nan")
1710
+
1711
+ df.attrs["dgp_truth"] = {
1712
+ "population_att": population_att,
1713
+ "deff_kish": float(deff_kish),
1714
+ "base_stratum_effects": stratum_effects,
1715
+ "icc_realized": icc_realized,
1716
+ }
1717
+
1429
1718
  return df
@@ -4,7 +4,7 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "diff-diff"
7
- version = "2.9.0"
7
+ version = "2.9.1"
8
8
  description = "Difference-in-Differences causal inference with sklearn-like API. Callaway-Sant'Anna, Synthetic DiD, Honest DiD, event studies, parallel trends."
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -197,7 +197,7 @@ checksum = "930c7171c8df9fb1782bdf9b918ed9ed2d33d1d22300abb754f9085bc48bf8e8"
197
197
 
198
198
  [[package]]
199
199
  name = "diff_diff_rust"
200
- version = "2.9.0"
200
+ version = "2.9.1"
201
201
  dependencies = [
202
202
  "blas-src",
203
203
  "faer",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "diff_diff_rust"
3
- version = "2.9.0"
3
+ version = "2.9.1"
4
4
  edition = "2021"
5
5
  description = "Rust backend for diff-diff DiD library"
6
6
  license = "MIT"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes