diff-diff 2.3.1__tar.gz → 2.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {diff_diff-2.3.1 → diff_diff-2.3.2}/PKG-INFO +4 -3
  2. {diff_diff-2.3.1 → diff_diff-2.3.2}/README.md +1 -1
  3. {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/__init__.py +1 -1
  4. {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/staggered.py +6 -3
  5. {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/sun_abraham.py +60 -24
  6. {diff_diff-2.3.1 → diff_diff-2.3.2}/pyproject.toml +3 -2
  7. {diff_diff-2.3.1 → diff_diff-2.3.2}/rust/Cargo.lock +3 -3
  8. {diff_diff-2.3.1 → diff_diff-2.3.2}/rust/Cargo.toml +1 -1
  9. {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/_backend.py +0 -0
  10. {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/bacon.py +0 -0
  11. {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/datasets.py +0 -0
  12. {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/diagnostics.py +0 -0
  13. {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/estimators.py +0 -0
  14. {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/honest_did.py +0 -0
  15. {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/imputation.py +0 -0
  16. {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/linalg.py +0 -0
  17. {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/power.py +0 -0
  18. {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/prep.py +0 -0
  19. {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/prep_dgp.py +0 -0
  20. {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/pretrends.py +0 -0
  21. {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/results.py +0 -0
  22. {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/staggered_aggregation.py +0 -0
  23. {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/staggered_bootstrap.py +0 -0
  24. {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/staggered_results.py +0 -0
  25. {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/synthetic_did.py +0 -0
  26. {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/triple_diff.py +0 -0
  27. {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/trop.py +0 -0
  28. {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/twfe.py +0 -0
  29. {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/utils.py +0 -0
  30. {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/visualization.py +0 -0
  31. {diff_diff-2.3.1 → diff_diff-2.3.2}/rust/src/bootstrap.rs +0 -0
  32. {diff_diff-2.3.1 → diff_diff-2.3.2}/rust/src/lib.rs +0 -0
  33. {diff_diff-2.3.1 → diff_diff-2.3.2}/rust/src/linalg.rs +0 -0
  34. {diff_diff-2.3.1 → diff_diff-2.3.2}/rust/src/trop.rs +0 -0
  35. {diff_diff-2.3.1 → diff_diff-2.3.2}/rust/src/weights.rs +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: diff-diff
3
- Version: 2.3.1
3
+ Version: 2.3.2
4
4
  Classifier: Development Status :: 5 - Production/Stable
5
5
  Classifier: Intended Audience :: Science/Research
6
6
  Classifier: Operating System :: OS Independent
@@ -9,6 +9,7 @@ Classifier: Programming Language :: Python :: 3.9
9
9
  Classifier: Programming Language :: Python :: 3.10
10
10
  Classifier: Programming Language :: Python :: 3.11
11
11
  Classifier: Programming Language :: Python :: 3.12
12
+ Classifier: Programming Language :: Python :: 3.13
12
13
  Classifier: Topic :: Scientific/Engineering :: Mathematics
13
14
  Requires-Dist: numpy>=1.20.0
14
15
  Requires-Dist: pandas>=1.3.0
@@ -28,7 +29,7 @@ Summary: A library for Difference-in-Differences causal inference analysis
28
29
  Keywords: causal-inference,difference-in-differences,econometrics,statistics,treatment-effects
29
30
  Author: diff-diff contributors
30
31
  License-Expression: MIT
31
- Requires-Python: >=3.9
32
+ Requires-Python: >=3.9, <3.14
32
33
  Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
33
34
  Project-URL: Documentation, https://diff-diff.readthedocs.io
34
35
  Project-URL: Homepage, https://github.com/igerber/diff-diff
@@ -2489,7 +2490,7 @@ Returns DataFrame with columns: `unit`, `quality_score`, `outcome_trend_score`,
2489
2490
 
2490
2491
  ## Requirements
2491
2492
 
2492
- - Python >= 3.9
2493
+ - Python 3.9 - 3.13
2493
2494
  - numpy >= 1.20
2494
2495
  - pandas >= 1.3
2495
2496
  - scipy >= 1.7
@@ -2452,7 +2452,7 @@ Returns DataFrame with columns: `unit`, `quality_score`, `outcome_trend_score`,
2452
2452
 
2453
2453
  ## Requirements
2454
2454
 
2455
- - Python >= 3.9
2455
+ - Python 3.9 - 3.13
2456
2456
  - numpy >= 1.20
2457
2457
  - pandas >= 1.3
2458
2458
  - scipy >= 1.7
@@ -142,7 +142,7 @@ from diff_diff.datasets import (
142
142
  load_mpdta,
143
143
  )
144
144
 
145
- __version__ = "2.3.1"
145
+ __version__ = "2.3.2"
146
146
  __all__ = [
147
147
  # Estimators
148
148
  "DifferenceInDifferences",
@@ -415,6 +415,7 @@ class CallawaySantAnna(
415
415
  cohort_masks[g] = (unit_cohorts == g)
416
416
 
417
417
  # Never-treated mask
418
+ # np.inf was normalized to 0 in fit(), so the np.inf check is defensive only
418
419
  never_treated_mask = (unit_cohorts == 0) | (unit_cohorts == np.inf)
419
420
 
420
421
  # Pre-compute covariate matrices by time period if needed
@@ -639,13 +640,15 @@ class CallawaySantAnna(
639
640
  # This avoids hardcoding column names in internal methods
640
641
  df['first_treat'] = df[first_treat]
641
642
 
643
+ # Never-treated indicator (must precede treatment_groups to exclude np.inf)
644
+ df['_never_treated'] = (df[first_treat] == 0) | (df[first_treat] == np.inf)
645
+ # Normalize np.inf → 0 so all downstream `> 0` checks exclude never-treated
646
+ df.loc[df[first_treat] == np.inf, first_treat] = 0
647
+
642
648
  # Identify groups and time periods
643
649
  time_periods = sorted(df[time].unique())
644
650
  treatment_groups = sorted([g for g in df[first_treat].unique() if g > 0])
645
651
 
646
- # Never-treated indicator (first_treat = 0 or inf)
647
- df['_never_treated'] = (df[first_treat] == 0) | (df[first_treat] == np.inf)
648
-
649
652
  # Get unique units
650
653
  unit_info = df.groupby(unit).agg({
651
654
  first_treat: 'first',
@@ -456,9 +456,9 @@ class SunAbraham:
456
456
  covariates : list, optional
457
457
  List of covariate column names to include in regression.
458
458
  min_pre_periods : int, default=1
459
- Minimum number of pre-treatment periods to include in event study.
459
+ **Deprecated**: Accepted but ignored. Will be removed in a future version.
460
460
  min_post_periods : int, default=1
461
- Minimum number of post-treatment periods to include in event study.
461
+ **Deprecated**: Accepted but ignored. Will be removed in a future version.
462
462
 
463
463
  Returns
464
464
  -------
@@ -470,6 +470,22 @@ class SunAbraham:
470
470
  ValueError
471
471
  If required columns are missing or data validation fails.
472
472
  """
473
+ # Deprecation warnings for unimplemented parameters
474
+ if min_pre_periods != 1:
475
+ warnings.warn(
476
+ "min_pre_periods is not yet implemented and will be ignored. "
477
+ "This parameter will be removed in a future version.",
478
+ FutureWarning,
479
+ stacklevel=2,
480
+ )
481
+ if min_post_periods != 1:
482
+ warnings.warn(
483
+ "min_post_periods is not yet implemented and will be ignored. "
484
+ "This parameter will be removed in a future version.",
485
+ FutureWarning,
486
+ stacklevel=2,
487
+ )
488
+
473
489
  # Validate inputs
474
490
  required_cols = [outcome, unit, time, first_treat]
475
491
  if covariates:
@@ -486,13 +502,15 @@ class SunAbraham:
486
502
  df[time] = pd.to_numeric(df[time])
487
503
  df[first_treat] = pd.to_numeric(df[first_treat])
488
504
 
505
+ # Never-treated indicator (must precede treatment_groups to exclude np.inf)
506
+ df["_never_treated"] = (df[first_treat] == 0) | (df[first_treat] == np.inf)
507
+ # Normalize np.inf → 0 so all downstream `> 0` checks exclude never-treated
508
+ df.loc[df[first_treat] == np.inf, first_treat] = 0
509
+
489
510
  # Identify groups and time periods
490
511
  time_periods = sorted(df[time].unique())
491
512
  treatment_groups = sorted([g for g in df[first_treat].unique() if g > 0])
492
513
 
493
- # Never-treated indicator
494
- df["_never_treated"] = (df[first_treat] == 0) | (df[first_treat] == np.inf)
495
-
496
514
  # Get unique units
497
515
  unit_info = (
498
516
  df.groupby(unit)
@@ -533,9 +551,9 @@ class SunAbraham:
533
551
 
534
552
  all_rel_times_sorted = sorted(all_rel_times)
535
553
 
536
- # Filter to reasonable range
537
- min_rel = max(min(all_rel_times_sorted), -20) # cap at -20
538
- max_rel = min(max(all_rel_times_sorted), 20) # cap at +20
554
+ # Use full range of relative times (no artificial truncation, matches R's fixest::sunab())
555
+ min_rel = min(all_rel_times_sorted)
556
+ max_rel = max(all_rel_times_sorted)
539
557
 
540
558
  # Reference period: last pre-treatment period (typically -1)
541
559
  self._reference_period = -1 - self.anticipation
@@ -765,12 +783,18 @@ class SunAbraham:
765
783
 
766
784
  # Fit OLS using LinearRegression helper (more stable than manual X'X inverse)
767
785
  cluster_ids = df_demeaned[cluster_var].values
786
+
787
+ # Degrees of freedom adjustment for absorbed unit and time fixed effects
788
+ n_units_fe = df[unit].nunique()
789
+ n_times_fe = df[time].nunique()
790
+ df_adj = n_units_fe + n_times_fe - 1
791
+
768
792
  reg = LinearRegression(
769
793
  include_intercept=False, # Already demeaned, no intercept needed
770
794
  robust=True,
771
795
  cluster_ids=cluster_ids,
772
796
  rank_deficient_action=self.rank_deficient_action,
773
- ).fit(X, y)
797
+ ).fit(X, y, df_adjustment=df_adj)
774
798
 
775
799
  coefficients = reg.coefficients_
776
800
  vcov = reg.vcov_
@@ -821,7 +845,8 @@ class SunAbraham:
821
845
 
822
846
  β_e = Σ_g w_{g,e} × δ_{g,e}
823
847
 
824
- where w_{g,e} is the share of cohort g among treated units at relative time e.
848
+ where w_{g,e} = n_{g,e} / Σ_g n_{g,e} is the share of observations from cohort g
849
+ at event-time e among all treated observations at that event-time.
825
850
 
826
851
  Returns
827
852
  -------
@@ -833,9 +858,8 @@ class SunAbraham:
833
858
  event_study_effects: Dict[int, Dict[str, Any]] = {}
834
859
  cohort_weights: Dict[int, Dict[Any, float]] = {}
835
860
 
836
- # Get cohort sizes
837
- unit_cohorts = df.groupby(unit)[first_treat].first()
838
- cohort_sizes = unit_cohorts[unit_cohorts > 0].value_counts().to_dict()
861
+ # Pre-compute per-event-time observation counts: n_{g,e}
862
+ event_time_counts = df[df[first_treat] > 0].groupby([first_treat, "_rel_time"]).size()
839
863
 
840
864
  for e in rel_periods:
841
865
  # Get cohorts that have observations at this relative time
@@ -847,13 +871,13 @@ class SunAbraham:
847
871
  if not cohorts_at_e:
848
872
  continue
849
873
 
850
- # Compute IW weights: share of each cohort among those observed at e
874
+ # Compute IW weights: n_{g,e} / Σ_g n_{g,e}
851
875
  weights = {}
852
876
  total_size = 0
853
877
  for g in cohorts_at_e:
854
- n_g = cohort_sizes.get(g, 0)
855
- weights[g] = n_g
856
- total_size += n_g
878
+ n_g_e = event_time_counts.get((g, e), 0)
879
+ weights[g] = n_g_e
880
+ total_size += n_g_e
857
881
 
858
882
  if total_size == 0:
859
883
  continue
@@ -915,7 +939,7 @@ class SunAbraham:
915
939
  ]
916
940
 
917
941
  if not post_effects:
918
- return 0.0, 0.0
942
+ return np.nan, np.nan
919
943
 
920
944
  # Weight by number of treated observations at each relative time
921
945
  post_weights = []
@@ -948,7 +972,13 @@ class SunAbraham:
948
972
  overall_weights_by_coef[key] += period_weight * cw
949
973
 
950
974
  if not overall_weights_by_coef:
951
- # Fallback to simple variance calculation
975
+ # Fallback to simplified variance that ignores covariances between periods
976
+ warnings.warn(
977
+ "Could not construct full weight vector for overall ATT SE. "
978
+ "Using simplified variance that ignores covariances between periods.",
979
+ UserWarning,
980
+ stacklevel=2,
981
+ )
952
982
  overall_var = float(
953
983
  np.sum((post_weights ** 2) * np.array([eff["se"] ** 2 for _, eff in post_effects]))
954
984
  )
@@ -1029,6 +1059,7 @@ class SunAbraham:
1029
1059
  df_b[time] - df_b[first_treat],
1030
1060
  np.nan
1031
1061
  )
1062
+ # np.inf was normalized to 0 in fit(), so the np.inf check is defensive only
1032
1063
  df_b["_never_treated"] = (
1033
1064
  (df_b[first_treat] == 0) | (df_b[first_treat] == np.inf)
1034
1065
  )
@@ -1113,11 +1144,16 @@ class SunAbraham:
1113
1144
  event_study_p_values[e] = p_value
1114
1145
 
1115
1146
  # Overall ATT statistics
1116
- overall_se = float(np.std(bootstrap_overall, ddof=1))
1117
- overall_ci = self._compute_percentile_ci(bootstrap_overall, self.alpha)
1118
- overall_p = self._compute_bootstrap_pvalue(
1119
- original_overall_att, bootstrap_overall
1120
- )
1147
+ if not np.isfinite(original_overall_att):
1148
+ overall_se = np.nan
1149
+ overall_ci = (np.nan, np.nan)
1150
+ overall_p = np.nan
1151
+ else:
1152
+ overall_se = float(np.std(bootstrap_overall, ddof=1))
1153
+ overall_ci = self._compute_percentile_ci(bootstrap_overall, self.alpha)
1154
+ overall_p = self._compute_bootstrap_pvalue(
1155
+ original_overall_att, bootstrap_overall
1156
+ )
1121
1157
 
1122
1158
  return SABootstrapResults(
1123
1159
  n_bootstrap=self.n_bootstrap,
@@ -4,11 +4,11 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "diff-diff"
7
- version = "2.3.1"
7
+ version = "2.3.2"
8
8
  description = "A library for Difference-in-Differences causal inference analysis"
9
9
  readme = "README.md"
10
10
  license = "MIT"
11
- requires-python = ">=3.9"
11
+ requires-python = ">=3.9,<3.14"
12
12
  authors = [
13
13
  {name = "diff-diff contributors"}
14
14
  ]
@@ -28,6 +28,7 @@ classifiers = [
28
28
  "Programming Language :: Python :: 3.10",
29
29
  "Programming Language :: Python :: 3.11",
30
30
  "Programming Language :: Python :: 3.12",
31
+ "Programming Language :: Python :: 3.13",
31
32
  "Topic :: Scientific/Engineering :: Mathematics",
32
33
  ]
33
34
  dependencies = [
@@ -173,7 +173,7 @@ checksum = "930c7171c8df9fb1782bdf9b918ed9ed2d33d1d22300abb754f9085bc48bf8e8"
173
173
 
174
174
  [[package]]
175
175
  name = "diff_diff_rust"
176
- version = "2.3.1"
176
+ version = "2.3.2"
177
177
  dependencies = [
178
178
  "faer",
179
179
  "ndarray",
@@ -1376,9 +1376,9 @@ checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971"
1376
1376
 
1377
1377
  [[package]]
1378
1378
  name = "unicode-ident"
1379
- version = "1.0.23"
1379
+ version = "1.0.24"
1380
1380
  source = "registry+https://github.com/rust-lang/crates.io-index"
1381
- checksum = "537dd038a89878be9b64dd4bd1b260315c1bb94f4d784956b81e27a088d9a09e"
1381
+ checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
1382
1382
 
1383
1383
  [[package]]
1384
1384
  name = "unindent"
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "diff_diff_rust"
3
- version = "2.3.1"
3
+ version = "2.3.2"
4
4
  edition = "2021"
5
5
  description = "Rust backend for diff-diff DiD library"
6
6
  license = "MIT"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes