diff-diff 2.1.2__tar.gz → 2.1.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {diff_diff-2.1.2 → diff_diff-2.1.4}/PKG-INFO +1 -1
- {diff_diff-2.1.2 → diff_diff-2.1.4}/diff_diff/__init__.py +1 -1
- {diff_diff-2.1.2 → diff_diff-2.1.4}/diff_diff/estimators.py +78 -31
- {diff_diff-2.1.2 → diff_diff-2.1.4}/diff_diff/linalg.py +587 -71
- {diff_diff-2.1.2 → diff_diff-2.1.4}/diff_diff/results.py +8 -1
- diff_diff-2.1.4/diff_diff/staggered.py +1066 -0
- diff_diff-2.1.4/diff_diff/staggered_aggregation.py +435 -0
- diff_diff-2.1.4/diff_diff/staggered_bootstrap.py +643 -0
- diff_diff-2.1.4/diff_diff/staggered_results.py +294 -0
- {diff_diff-2.1.2 → diff_diff-2.1.4}/diff_diff/sun_abraham.py +15 -0
- {diff_diff-2.1.2 → diff_diff-2.1.4}/diff_diff/triple_diff.py +33 -2
- {diff_diff-2.1.2 → diff_diff-2.1.4}/diff_diff/trop.py +184 -46
- {diff_diff-2.1.2 → diff_diff-2.1.4}/diff_diff/twfe.py +59 -6
- {diff_diff-2.1.2 → diff_diff-2.1.4}/pyproject.toml +1 -1
- {diff_diff-2.1.2 → diff_diff-2.1.4}/rust/Cargo.lock +3 -3
- {diff_diff-2.1.2 → diff_diff-2.1.4}/rust/Cargo.toml +1 -1
- {diff_diff-2.1.2 → diff_diff-2.1.4}/rust/src/linalg.rs +72 -11
- {diff_diff-2.1.2 → diff_diff-2.1.4}/rust/src/trop.rs +131 -60
- diff_diff-2.1.2/diff_diff/staggered.py +0 -2301
- {diff_diff-2.1.2 → diff_diff-2.1.4}/README.md +0 -0
- {diff_diff-2.1.2 → diff_diff-2.1.4}/diff_diff/_backend.py +0 -0
- {diff_diff-2.1.2 → diff_diff-2.1.4}/diff_diff/bacon.py +0 -0
- {diff_diff-2.1.2 → diff_diff-2.1.4}/diff_diff/datasets.py +0 -0
- {diff_diff-2.1.2 → diff_diff-2.1.4}/diff_diff/diagnostics.py +0 -0
- {diff_diff-2.1.2 → diff_diff-2.1.4}/diff_diff/honest_did.py +0 -0
- {diff_diff-2.1.2 → diff_diff-2.1.4}/diff_diff/power.py +0 -0
- {diff_diff-2.1.2 → diff_diff-2.1.4}/diff_diff/prep.py +0 -0
- {diff_diff-2.1.2 → diff_diff-2.1.4}/diff_diff/pretrends.py +0 -0
- {diff_diff-2.1.2 → diff_diff-2.1.4}/diff_diff/synthetic_did.py +0 -0
- {diff_diff-2.1.2 → diff_diff-2.1.4}/diff_diff/utils.py +0 -0
- {diff_diff-2.1.2 → diff_diff-2.1.4}/diff_diff/visualization.py +0 -0
- {diff_diff-2.1.2 → diff_diff-2.1.4}/rust/src/bootstrap.rs +0 -0
- {diff_diff-2.1.2 → diff_diff-2.1.4}/rust/src/lib.rs +0 -0
- {diff_diff-2.1.2 → diff_diff-2.1.4}/rust/src/weights.rs +0 -0
|
@@ -64,6 +64,11 @@ class DifferenceInDifferences:
|
|
|
64
64
|
seed : int, optional
|
|
65
65
|
Random seed for reproducibility when using bootstrap inference.
|
|
66
66
|
If None (default), results will vary between runs.
|
|
67
|
+
rank_deficient_action : str, default "warn"
|
|
68
|
+
Action when design matrix is rank-deficient (linearly dependent columns):
|
|
69
|
+
- "warn": Issue warning and drop linearly dependent columns (default)
|
|
70
|
+
- "error": Raise ValueError
|
|
71
|
+
- "silent": Drop columns silently without warning
|
|
67
72
|
|
|
68
73
|
Attributes
|
|
69
74
|
----------
|
|
@@ -120,7 +125,8 @@ class DifferenceInDifferences:
|
|
|
120
125
|
inference: str = "analytical",
|
|
121
126
|
n_bootstrap: int = 999,
|
|
122
127
|
bootstrap_weights: str = "rademacher",
|
|
123
|
-
seed: Optional[int] = None
|
|
128
|
+
seed: Optional[int] = None,
|
|
129
|
+
rank_deficient_action: str = "warn",
|
|
124
130
|
):
|
|
125
131
|
self.robust = robust
|
|
126
132
|
self.cluster = cluster
|
|
@@ -129,6 +135,7 @@ class DifferenceInDifferences:
|
|
|
129
135
|
self.n_bootstrap = n_bootstrap
|
|
130
136
|
self.bootstrap_weights = bootstrap_weights
|
|
131
137
|
self.seed = seed
|
|
138
|
+
self.rank_deficient_action = rank_deficient_action
|
|
132
139
|
|
|
133
140
|
self.is_fitted_ = False
|
|
134
141
|
self.results_ = None
|
|
@@ -283,6 +290,7 @@ class DifferenceInDifferences:
|
|
|
283
290
|
robust=self.robust,
|
|
284
291
|
cluster_ids=cluster_ids if self.inference != "wild_bootstrap" else None,
|
|
285
292
|
alpha=self.alpha,
|
|
293
|
+
rank_deficient_action=self.rank_deficient_action,
|
|
286
294
|
).fit(X, y, df_adjustment=n_absorbed_effects)
|
|
287
295
|
|
|
288
296
|
coefficients = reg.coefficients_
|
|
@@ -596,6 +604,7 @@ class DifferenceInDifferences:
|
|
|
596
604
|
"n_bootstrap": self.n_bootstrap,
|
|
597
605
|
"bootstrap_weights": self.bootstrap_weights,
|
|
598
606
|
"seed": self.seed,
|
|
607
|
+
"rank_deficient_action": self.rank_deficient_action,
|
|
599
608
|
}
|
|
600
609
|
|
|
601
610
|
def set_params(self, **params) -> "DifferenceInDifferences":
|
|
@@ -873,29 +882,43 @@ class MultiPeriodDiD(DifferenceInDifferences):
|
|
|
873
882
|
var_names.append(col)
|
|
874
883
|
|
|
875
884
|
# Fit OLS using unified backend
|
|
876
|
-
|
|
877
|
-
|
|
885
|
+
# Pass cluster_ids to solve_ols for proper vcov computation
|
|
886
|
+
# This handles rank-deficient matrices by returning NaN for dropped columns
|
|
887
|
+
cluster_ids = data[self.cluster].values if self.cluster is not None else None
|
|
888
|
+
|
|
889
|
+
# Note: Wild bootstrap for multi-period effects is complex (multiple coefficients)
|
|
890
|
+
# For now, we use analytical inference even if inference="wild_bootstrap"
|
|
891
|
+
coefficients, residuals, fitted, vcov = solve_ols(
|
|
892
|
+
X, y,
|
|
893
|
+
return_fitted=True,
|
|
894
|
+
return_vcov=True,
|
|
895
|
+
cluster_ids=cluster_ids,
|
|
896
|
+
column_names=var_names,
|
|
897
|
+
rank_deficient_action=self.rank_deficient_action,
|
|
878
898
|
)
|
|
879
899
|
r_squared = compute_r_squared(y, residuals)
|
|
880
900
|
|
|
881
|
-
# Degrees of freedom
|
|
882
|
-
|
|
901
|
+
# Degrees of freedom using effective rank (non-NaN coefficients)
|
|
902
|
+
k_effective = int(np.sum(~np.isnan(coefficients)))
|
|
903
|
+
df = len(y) - k_effective - n_absorbed_effects
|
|
883
904
|
|
|
884
|
-
#
|
|
885
|
-
#
|
|
886
|
-
|
|
887
|
-
if self.cluster is not None:
|
|
888
|
-
cluster_ids = data[self.cluster].values
|
|
889
|
-
vcov = compute_robust_vcov(X, residuals, cluster_ids)
|
|
890
|
-
elif self.robust:
|
|
891
|
-
vcov = compute_robust_vcov(X, residuals)
|
|
892
|
-
else:
|
|
905
|
+
# For non-robust, non-clustered case, we need homoskedastic vcov
|
|
906
|
+
# solve_ols returns HC1 by default, so compute homoskedastic if needed
|
|
907
|
+
if not self.robust and self.cluster is None:
|
|
893
908
|
n = len(y)
|
|
894
|
-
|
|
895
|
-
mse = np.sum(residuals**2) / (n - k)
|
|
909
|
+
mse = np.sum(residuals**2) / (n - k_effective)
|
|
896
910
|
# Use solve() instead of inv() for numerical stability
|
|
897
|
-
#
|
|
898
|
-
|
|
911
|
+
# Only compute for identified columns (non-NaN coefficients)
|
|
912
|
+
identified_mask = ~np.isnan(coefficients)
|
|
913
|
+
if np.all(identified_mask):
|
|
914
|
+
vcov = np.linalg.solve(X.T @ X, mse * np.eye(X.shape[1]))
|
|
915
|
+
else:
|
|
916
|
+
# For rank-deficient case, compute vcov on reduced matrix then expand
|
|
917
|
+
X_reduced = X[:, identified_mask]
|
|
918
|
+
vcov_reduced = np.linalg.solve(X_reduced.T @ X_reduced, mse * np.eye(X_reduced.shape[1]))
|
|
919
|
+
# Expand to full size with NaN for dropped columns
|
|
920
|
+
vcov = np.full((X.shape[1], X.shape[1]), np.nan)
|
|
921
|
+
vcov[np.ix_(identified_mask, identified_mask)] = vcov_reduced
|
|
899
922
|
|
|
900
923
|
# Extract period-specific treatment effects
|
|
901
924
|
period_effects = {}
|
|
@@ -922,19 +945,43 @@ class MultiPeriodDiD(DifferenceInDifferences):
|
|
|
922
945
|
effect_indices.append(idx)
|
|
923
946
|
|
|
924
947
|
# Compute average treatment effect
|
|
925
|
-
#
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
948
|
+
# R-style NA propagation: if ANY period effect is NaN, average is undefined
|
|
949
|
+
effect_arr = np.array(effect_values)
|
|
950
|
+
|
|
951
|
+
if np.any(np.isnan(effect_arr)):
|
|
952
|
+
# Some period effects are NaN (unidentified) - cannot compute valid average
|
|
953
|
+
# This follows R's default behavior where mean(c(1, 2, NA)) returns NA
|
|
954
|
+
avg_att = np.nan
|
|
955
|
+
avg_se = np.nan
|
|
956
|
+
avg_t_stat = np.nan
|
|
957
|
+
avg_p_value = np.nan
|
|
958
|
+
avg_conf_int = (np.nan, np.nan)
|
|
959
|
+
else:
|
|
960
|
+
# All effects identified - compute average normally
|
|
961
|
+
avg_att = float(np.mean(effect_arr))
|
|
962
|
+
|
|
963
|
+
# Standard error of average: need to account for covariance
|
|
964
|
+
n_post = len(post_periods)
|
|
965
|
+
sub_vcov = vcov[np.ix_(effect_indices, effect_indices)]
|
|
966
|
+
avg_var = np.sum(sub_vcov) / (n_post ** 2)
|
|
967
|
+
|
|
968
|
+
if np.isnan(avg_var) or avg_var < 0:
|
|
969
|
+
# Vcov has NaN (dropped columns) - propagate NaN
|
|
970
|
+
avg_se = np.nan
|
|
971
|
+
avg_t_stat = np.nan
|
|
972
|
+
avg_p_value = np.nan
|
|
973
|
+
avg_conf_int = (np.nan, np.nan)
|
|
974
|
+
else:
|
|
975
|
+
avg_se = float(np.sqrt(avg_var))
|
|
976
|
+
if avg_se > 0:
|
|
977
|
+
avg_t_stat = avg_att / avg_se
|
|
978
|
+
avg_p_value = compute_p_value(avg_t_stat, df=df)
|
|
979
|
+
avg_conf_int = compute_confidence_interval(avg_att, avg_se, self.alpha, df=df)
|
|
980
|
+
else:
|
|
981
|
+
# Zero SE (degenerate case)
|
|
982
|
+
avg_t_stat = np.nan
|
|
983
|
+
avg_p_value = np.nan
|
|
984
|
+
avg_conf_int = (np.nan, np.nan)
|
|
938
985
|
|
|
939
986
|
# Count observations
|
|
940
987
|
n_treated = int(np.sum(d))
|