diff-diff 2.3.1__tar.gz → 2.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {diff_diff-2.3.1 → diff_diff-2.3.2}/PKG-INFO +4 -3
- {diff_diff-2.3.1 → diff_diff-2.3.2}/README.md +1 -1
- {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/__init__.py +1 -1
- {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/staggered.py +6 -3
- {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/sun_abraham.py +60 -24
- {diff_diff-2.3.1 → diff_diff-2.3.2}/pyproject.toml +3 -2
- {diff_diff-2.3.1 → diff_diff-2.3.2}/rust/Cargo.lock +3 -3
- {diff_diff-2.3.1 → diff_diff-2.3.2}/rust/Cargo.toml +1 -1
- {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/_backend.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/bacon.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/datasets.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/diagnostics.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/estimators.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/honest_did.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/imputation.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/linalg.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/power.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/prep.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/prep_dgp.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/pretrends.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/results.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/staggered_aggregation.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/staggered_bootstrap.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/staggered_results.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/synthetic_did.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/triple_diff.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/trop.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/twfe.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/utils.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.3.2}/diff_diff/visualization.py +0 -0
- {diff_diff-2.3.1 → diff_diff-2.3.2}/rust/src/bootstrap.rs +0 -0
- {diff_diff-2.3.1 → diff_diff-2.3.2}/rust/src/lib.rs +0 -0
- {diff_diff-2.3.1 → diff_diff-2.3.2}/rust/src/linalg.rs +0 -0
- {diff_diff-2.3.1 → diff_diff-2.3.2}/rust/src/trop.rs +0 -0
- {diff_diff-2.3.1 → diff_diff-2.3.2}/rust/src/weights.rs +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: diff-diff
|
|
3
|
-
Version: 2.3.
|
|
3
|
+
Version: 2.3.2
|
|
4
4
|
Classifier: Development Status :: 5 - Production/Stable
|
|
5
5
|
Classifier: Intended Audience :: Science/Research
|
|
6
6
|
Classifier: Operating System :: OS Independent
|
|
@@ -9,6 +9,7 @@ Classifier: Programming Language :: Python :: 3.9
|
|
|
9
9
|
Classifier: Programming Language :: Python :: 3.10
|
|
10
10
|
Classifier: Programming Language :: Python :: 3.11
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
12
13
|
Classifier: Topic :: Scientific/Engineering :: Mathematics
|
|
13
14
|
Requires-Dist: numpy>=1.20.0
|
|
14
15
|
Requires-Dist: pandas>=1.3.0
|
|
@@ -28,7 +29,7 @@ Summary: A library for Difference-in-Differences causal inference analysis
|
|
|
28
29
|
Keywords: causal-inference,difference-in-differences,econometrics,statistics,treatment-effects
|
|
29
30
|
Author: diff-diff contributors
|
|
30
31
|
License-Expression: MIT
|
|
31
|
-
Requires-Python: >=3.9
|
|
32
|
+
Requires-Python: >=3.9, <3.14
|
|
32
33
|
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
33
34
|
Project-URL: Documentation, https://diff-diff.readthedocs.io
|
|
34
35
|
Project-URL: Homepage, https://github.com/igerber/diff-diff
|
|
@@ -2489,7 +2490,7 @@ Returns DataFrame with columns: `unit`, `quality_score`, `outcome_trend_score`,
|
|
|
2489
2490
|
|
|
2490
2491
|
## Requirements
|
|
2491
2492
|
|
|
2492
|
-
- Python
|
|
2493
|
+
- Python 3.9 - 3.13
|
|
2493
2494
|
- numpy >= 1.20
|
|
2494
2495
|
- pandas >= 1.3
|
|
2495
2496
|
- scipy >= 1.7
|
|
@@ -415,6 +415,7 @@ class CallawaySantAnna(
|
|
|
415
415
|
cohort_masks[g] = (unit_cohorts == g)
|
|
416
416
|
|
|
417
417
|
# Never-treated mask
|
|
418
|
+
# np.inf was normalized to 0 in fit(), so the np.inf check is defensive only
|
|
418
419
|
never_treated_mask = (unit_cohorts == 0) | (unit_cohorts == np.inf)
|
|
419
420
|
|
|
420
421
|
# Pre-compute covariate matrices by time period if needed
|
|
@@ -639,13 +640,15 @@ class CallawaySantAnna(
|
|
|
639
640
|
# This avoids hardcoding column names in internal methods
|
|
640
641
|
df['first_treat'] = df[first_treat]
|
|
641
642
|
|
|
643
|
+
# Never-treated indicator (must precede treatment_groups to exclude np.inf)
|
|
644
|
+
df['_never_treated'] = (df[first_treat] == 0) | (df[first_treat] == np.inf)
|
|
645
|
+
# Normalize np.inf → 0 so all downstream `> 0` checks exclude never-treated
|
|
646
|
+
df.loc[df[first_treat] == np.inf, first_treat] = 0
|
|
647
|
+
|
|
642
648
|
# Identify groups and time periods
|
|
643
649
|
time_periods = sorted(df[time].unique())
|
|
644
650
|
treatment_groups = sorted([g for g in df[first_treat].unique() if g > 0])
|
|
645
651
|
|
|
646
|
-
# Never-treated indicator (first_treat = 0 or inf)
|
|
647
|
-
df['_never_treated'] = (df[first_treat] == 0) | (df[first_treat] == np.inf)
|
|
648
|
-
|
|
649
652
|
# Get unique units
|
|
650
653
|
unit_info = df.groupby(unit).agg({
|
|
651
654
|
first_treat: 'first',
|
|
@@ -456,9 +456,9 @@ class SunAbraham:
|
|
|
456
456
|
covariates : list, optional
|
|
457
457
|
List of covariate column names to include in regression.
|
|
458
458
|
min_pre_periods : int, default=1
|
|
459
|
-
|
|
459
|
+
**Deprecated**: Accepted but ignored. Will be removed in a future version.
|
|
460
460
|
min_post_periods : int, default=1
|
|
461
|
-
|
|
461
|
+
**Deprecated**: Accepted but ignored. Will be removed in a future version.
|
|
462
462
|
|
|
463
463
|
Returns
|
|
464
464
|
-------
|
|
@@ -470,6 +470,22 @@ class SunAbraham:
|
|
|
470
470
|
ValueError
|
|
471
471
|
If required columns are missing or data validation fails.
|
|
472
472
|
"""
|
|
473
|
+
# Deprecation warnings for unimplemented parameters
|
|
474
|
+
if min_pre_periods != 1:
|
|
475
|
+
warnings.warn(
|
|
476
|
+
"min_pre_periods is not yet implemented and will be ignored. "
|
|
477
|
+
"This parameter will be removed in a future version.",
|
|
478
|
+
FutureWarning,
|
|
479
|
+
stacklevel=2,
|
|
480
|
+
)
|
|
481
|
+
if min_post_periods != 1:
|
|
482
|
+
warnings.warn(
|
|
483
|
+
"min_post_periods is not yet implemented and will be ignored. "
|
|
484
|
+
"This parameter will be removed in a future version.",
|
|
485
|
+
FutureWarning,
|
|
486
|
+
stacklevel=2,
|
|
487
|
+
)
|
|
488
|
+
|
|
473
489
|
# Validate inputs
|
|
474
490
|
required_cols = [outcome, unit, time, first_treat]
|
|
475
491
|
if covariates:
|
|
@@ -486,13 +502,15 @@ class SunAbraham:
|
|
|
486
502
|
df[time] = pd.to_numeric(df[time])
|
|
487
503
|
df[first_treat] = pd.to_numeric(df[first_treat])
|
|
488
504
|
|
|
505
|
+
# Never-treated indicator (must precede treatment_groups to exclude np.inf)
|
|
506
|
+
df["_never_treated"] = (df[first_treat] == 0) | (df[first_treat] == np.inf)
|
|
507
|
+
# Normalize np.inf → 0 so all downstream `> 0` checks exclude never-treated
|
|
508
|
+
df.loc[df[first_treat] == np.inf, first_treat] = 0
|
|
509
|
+
|
|
489
510
|
# Identify groups and time periods
|
|
490
511
|
time_periods = sorted(df[time].unique())
|
|
491
512
|
treatment_groups = sorted([g for g in df[first_treat].unique() if g > 0])
|
|
492
513
|
|
|
493
|
-
# Never-treated indicator
|
|
494
|
-
df["_never_treated"] = (df[first_treat] == 0) | (df[first_treat] == np.inf)
|
|
495
|
-
|
|
496
514
|
# Get unique units
|
|
497
515
|
unit_info = (
|
|
498
516
|
df.groupby(unit)
|
|
@@ -533,9 +551,9 @@ class SunAbraham:
|
|
|
533
551
|
|
|
534
552
|
all_rel_times_sorted = sorted(all_rel_times)
|
|
535
553
|
|
|
536
|
-
#
|
|
537
|
-
min_rel =
|
|
538
|
-
max_rel =
|
|
554
|
+
# Use full range of relative times (no artificial truncation, matches R's fixest::sunab())
|
|
555
|
+
min_rel = min(all_rel_times_sorted)
|
|
556
|
+
max_rel = max(all_rel_times_sorted)
|
|
539
557
|
|
|
540
558
|
# Reference period: last pre-treatment period (typically -1)
|
|
541
559
|
self._reference_period = -1 - self.anticipation
|
|
@@ -765,12 +783,18 @@ class SunAbraham:
|
|
|
765
783
|
|
|
766
784
|
# Fit OLS using LinearRegression helper (more stable than manual X'X inverse)
|
|
767
785
|
cluster_ids = df_demeaned[cluster_var].values
|
|
786
|
+
|
|
787
|
+
# Degrees of freedom adjustment for absorbed unit and time fixed effects
|
|
788
|
+
n_units_fe = df[unit].nunique()
|
|
789
|
+
n_times_fe = df[time].nunique()
|
|
790
|
+
df_adj = n_units_fe + n_times_fe - 1
|
|
791
|
+
|
|
768
792
|
reg = LinearRegression(
|
|
769
793
|
include_intercept=False, # Already demeaned, no intercept needed
|
|
770
794
|
robust=True,
|
|
771
795
|
cluster_ids=cluster_ids,
|
|
772
796
|
rank_deficient_action=self.rank_deficient_action,
|
|
773
|
-
).fit(X, y)
|
|
797
|
+
).fit(X, y, df_adjustment=df_adj)
|
|
774
798
|
|
|
775
799
|
coefficients = reg.coefficients_
|
|
776
800
|
vcov = reg.vcov_
|
|
@@ -821,7 +845,8 @@ class SunAbraham:
|
|
|
821
845
|
|
|
822
846
|
β_e = Σ_g w_{g,e} × δ_{g,e}
|
|
823
847
|
|
|
824
|
-
where w_{g,e}
|
|
848
|
+
where w_{g,e} = n_{g,e} / Σ_g n_{g,e} is the share of observations from cohort g
|
|
849
|
+
at event-time e among all treated observations at that event-time.
|
|
825
850
|
|
|
826
851
|
Returns
|
|
827
852
|
-------
|
|
@@ -833,9 +858,8 @@ class SunAbraham:
|
|
|
833
858
|
event_study_effects: Dict[int, Dict[str, Any]] = {}
|
|
834
859
|
cohort_weights: Dict[int, Dict[Any, float]] = {}
|
|
835
860
|
|
|
836
|
-
#
|
|
837
|
-
|
|
838
|
-
cohort_sizes = unit_cohorts[unit_cohorts > 0].value_counts().to_dict()
|
|
861
|
+
# Pre-compute per-event-time observation counts: n_{g,e}
|
|
862
|
+
event_time_counts = df[df[first_treat] > 0].groupby([first_treat, "_rel_time"]).size()
|
|
839
863
|
|
|
840
864
|
for e in rel_periods:
|
|
841
865
|
# Get cohorts that have observations at this relative time
|
|
@@ -847,13 +871,13 @@ class SunAbraham:
|
|
|
847
871
|
if not cohorts_at_e:
|
|
848
872
|
continue
|
|
849
873
|
|
|
850
|
-
# Compute IW weights:
|
|
874
|
+
# Compute IW weights: n_{g,e} / Σ_g n_{g,e}
|
|
851
875
|
weights = {}
|
|
852
876
|
total_size = 0
|
|
853
877
|
for g in cohorts_at_e:
|
|
854
|
-
|
|
855
|
-
weights[g] =
|
|
856
|
-
total_size +=
|
|
878
|
+
n_g_e = event_time_counts.get((g, e), 0)
|
|
879
|
+
weights[g] = n_g_e
|
|
880
|
+
total_size += n_g_e
|
|
857
881
|
|
|
858
882
|
if total_size == 0:
|
|
859
883
|
continue
|
|
@@ -915,7 +939,7 @@ class SunAbraham:
|
|
|
915
939
|
]
|
|
916
940
|
|
|
917
941
|
if not post_effects:
|
|
918
|
-
return
|
|
942
|
+
return np.nan, np.nan
|
|
919
943
|
|
|
920
944
|
# Weight by number of treated observations at each relative time
|
|
921
945
|
post_weights = []
|
|
@@ -948,7 +972,13 @@ class SunAbraham:
|
|
|
948
972
|
overall_weights_by_coef[key] += period_weight * cw
|
|
949
973
|
|
|
950
974
|
if not overall_weights_by_coef:
|
|
951
|
-
# Fallback to
|
|
975
|
+
# Fallback to simplified variance that ignores covariances between periods
|
|
976
|
+
warnings.warn(
|
|
977
|
+
"Could not construct full weight vector for overall ATT SE. "
|
|
978
|
+
"Using simplified variance that ignores covariances between periods.",
|
|
979
|
+
UserWarning,
|
|
980
|
+
stacklevel=2,
|
|
981
|
+
)
|
|
952
982
|
overall_var = float(
|
|
953
983
|
np.sum((post_weights ** 2) * np.array([eff["se"] ** 2 for _, eff in post_effects]))
|
|
954
984
|
)
|
|
@@ -1029,6 +1059,7 @@ class SunAbraham:
|
|
|
1029
1059
|
df_b[time] - df_b[first_treat],
|
|
1030
1060
|
np.nan
|
|
1031
1061
|
)
|
|
1062
|
+
# np.inf was normalized to 0 in fit(), so the np.inf check is defensive only
|
|
1032
1063
|
df_b["_never_treated"] = (
|
|
1033
1064
|
(df_b[first_treat] == 0) | (df_b[first_treat] == np.inf)
|
|
1034
1065
|
)
|
|
@@ -1113,11 +1144,16 @@ class SunAbraham:
|
|
|
1113
1144
|
event_study_p_values[e] = p_value
|
|
1114
1145
|
|
|
1115
1146
|
# Overall ATT statistics
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1147
|
+
if not np.isfinite(original_overall_att):
|
|
1148
|
+
overall_se = np.nan
|
|
1149
|
+
overall_ci = (np.nan, np.nan)
|
|
1150
|
+
overall_p = np.nan
|
|
1151
|
+
else:
|
|
1152
|
+
overall_se = float(np.std(bootstrap_overall, ddof=1))
|
|
1153
|
+
overall_ci = self._compute_percentile_ci(bootstrap_overall, self.alpha)
|
|
1154
|
+
overall_p = self._compute_bootstrap_pvalue(
|
|
1155
|
+
original_overall_att, bootstrap_overall
|
|
1156
|
+
)
|
|
1121
1157
|
|
|
1122
1158
|
return SABootstrapResults(
|
|
1123
1159
|
n_bootstrap=self.n_bootstrap,
|
|
@@ -4,11 +4,11 @@ build-backend = "maturin"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "diff-diff"
|
|
7
|
-
version = "2.3.
|
|
7
|
+
version = "2.3.2"
|
|
8
8
|
description = "A library for Difference-in-Differences causal inference analysis"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "MIT"
|
|
11
|
-
requires-python = ">=3.9"
|
|
11
|
+
requires-python = ">=3.9,<3.14"
|
|
12
12
|
authors = [
|
|
13
13
|
{name = "diff-diff contributors"}
|
|
14
14
|
]
|
|
@@ -28,6 +28,7 @@ classifiers = [
|
|
|
28
28
|
"Programming Language :: Python :: 3.10",
|
|
29
29
|
"Programming Language :: Python :: 3.11",
|
|
30
30
|
"Programming Language :: Python :: 3.12",
|
|
31
|
+
"Programming Language :: Python :: 3.13",
|
|
31
32
|
"Topic :: Scientific/Engineering :: Mathematics",
|
|
32
33
|
]
|
|
33
34
|
dependencies = [
|
|
@@ -173,7 +173,7 @@ checksum = "930c7171c8df9fb1782bdf9b918ed9ed2d33d1d22300abb754f9085bc48bf8e8"
|
|
|
173
173
|
|
|
174
174
|
[[package]]
|
|
175
175
|
name = "diff_diff_rust"
|
|
176
|
-
version = "2.3.
|
|
176
|
+
version = "2.3.2"
|
|
177
177
|
dependencies = [
|
|
178
178
|
"faer",
|
|
179
179
|
"ndarray",
|
|
@@ -1376,9 +1376,9 @@ checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971"
|
|
|
1376
1376
|
|
|
1377
1377
|
[[package]]
|
|
1378
1378
|
name = "unicode-ident"
|
|
1379
|
-
version = "1.0.
|
|
1379
|
+
version = "1.0.24"
|
|
1380
1380
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1381
|
-
checksum = "
|
|
1381
|
+
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
|
|
1382
1382
|
|
|
1383
1383
|
[[package]]
|
|
1384
1384
|
name = "unindent"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|