diff-diff 2.1.3__tar.gz → 2.1.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {diff_diff-2.1.3 → diff_diff-2.1.4}/PKG-INFO +1 -1
  2. {diff_diff-2.1.3 → diff_diff-2.1.4}/diff_diff/__init__.py +1 -1
  3. {diff_diff-2.1.3 → diff_diff-2.1.4}/diff_diff/estimators.py +78 -31
  4. {diff_diff-2.1.3 → diff_diff-2.1.4}/diff_diff/linalg.py +587 -71
  5. {diff_diff-2.1.3 → diff_diff-2.1.4}/diff_diff/results.py +8 -1
  6. diff_diff-2.1.4/diff_diff/staggered.py +1066 -0
  7. diff_diff-2.1.4/diff_diff/staggered_aggregation.py +435 -0
  8. diff_diff-2.1.4/diff_diff/staggered_bootstrap.py +643 -0
  9. diff_diff-2.1.4/diff_diff/staggered_results.py +294 -0
  10. {diff_diff-2.1.3 → diff_diff-2.1.4}/diff_diff/sun_abraham.py +15 -0
  11. {diff_diff-2.1.3 → diff_diff-2.1.4}/diff_diff/triple_diff.py +33 -2
  12. {diff_diff-2.1.3 → diff_diff-2.1.4}/diff_diff/twfe.py +59 -6
  13. {diff_diff-2.1.3 → diff_diff-2.1.4}/pyproject.toml +1 -1
  14. {diff_diff-2.1.3 → diff_diff-2.1.4}/rust/Cargo.lock +3 -3
  15. {diff_diff-2.1.3 → diff_diff-2.1.4}/rust/Cargo.toml +1 -1
  16. {diff_diff-2.1.3 → diff_diff-2.1.4}/rust/src/linalg.rs +72 -11
  17. diff_diff-2.1.3/diff_diff/staggered.py +0 -2301
  18. {diff_diff-2.1.3 → diff_diff-2.1.4}/README.md +0 -0
  19. {diff_diff-2.1.3 → diff_diff-2.1.4}/diff_diff/_backend.py +0 -0
  20. {diff_diff-2.1.3 → diff_diff-2.1.4}/diff_diff/bacon.py +0 -0
  21. {diff_diff-2.1.3 → diff_diff-2.1.4}/diff_diff/datasets.py +0 -0
  22. {diff_diff-2.1.3 → diff_diff-2.1.4}/diff_diff/diagnostics.py +0 -0
  23. {diff_diff-2.1.3 → diff_diff-2.1.4}/diff_diff/honest_did.py +0 -0
  24. {diff_diff-2.1.3 → diff_diff-2.1.4}/diff_diff/power.py +0 -0
  25. {diff_diff-2.1.3 → diff_diff-2.1.4}/diff_diff/prep.py +0 -0
  26. {diff_diff-2.1.3 → diff_diff-2.1.4}/diff_diff/pretrends.py +0 -0
  27. {diff_diff-2.1.3 → diff_diff-2.1.4}/diff_diff/synthetic_did.py +0 -0
  28. {diff_diff-2.1.3 → diff_diff-2.1.4}/diff_diff/trop.py +0 -0
  29. {diff_diff-2.1.3 → diff_diff-2.1.4}/diff_diff/utils.py +0 -0
  30. {diff_diff-2.1.3 → diff_diff-2.1.4}/diff_diff/visualization.py +0 -0
  31. {diff_diff-2.1.3 → diff_diff-2.1.4}/rust/src/bootstrap.rs +0 -0
  32. {diff_diff-2.1.3 → diff_diff-2.1.4}/rust/src/lib.rs +0 -0
  33. {diff_diff-2.1.3 → diff_diff-2.1.4}/rust/src/trop.rs +0 -0
  34. {diff_diff-2.1.3 → diff_diff-2.1.4}/rust/src/weights.rs +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: diff-diff
3
- Version: 2.1.3
3
+ Version: 2.1.4
4
4
  Classifier: Development Status :: 5 - Production/Stable
5
5
  Classifier: Intended Audience :: Science/Research
6
6
  Classifier: Operating System :: OS Independent
@@ -136,7 +136,7 @@ from diff_diff.datasets import (
136
136
  load_mpdta,
137
137
  )
138
138
 
139
- __version__ = "2.1.3"
139
+ __version__ = "2.1.4"
140
140
  __all__ = [
141
141
  # Estimators
142
142
  "DifferenceInDifferences",
@@ -64,6 +64,11 @@ class DifferenceInDifferences:
64
64
  seed : int, optional
65
65
  Random seed for reproducibility when using bootstrap inference.
66
66
  If None (default), results will vary between runs.
67
+ rank_deficient_action : str, default "warn"
68
+ Action when design matrix is rank-deficient (linearly dependent columns):
69
+ - "warn": Issue warning and drop linearly dependent columns (default)
70
+ - "error": Raise ValueError
71
+ - "silent": Drop columns silently without warning
67
72
 
68
73
  Attributes
69
74
  ----------
@@ -120,7 +125,8 @@ class DifferenceInDifferences:
120
125
  inference: str = "analytical",
121
126
  n_bootstrap: int = 999,
122
127
  bootstrap_weights: str = "rademacher",
123
- seed: Optional[int] = None
128
+ seed: Optional[int] = None,
129
+ rank_deficient_action: str = "warn",
124
130
  ):
125
131
  self.robust = robust
126
132
  self.cluster = cluster
@@ -129,6 +135,7 @@ class DifferenceInDifferences:
129
135
  self.n_bootstrap = n_bootstrap
130
136
  self.bootstrap_weights = bootstrap_weights
131
137
  self.seed = seed
138
+ self.rank_deficient_action = rank_deficient_action
132
139
 
133
140
  self.is_fitted_ = False
134
141
  self.results_ = None
@@ -283,6 +290,7 @@ class DifferenceInDifferences:
283
290
  robust=self.robust,
284
291
  cluster_ids=cluster_ids if self.inference != "wild_bootstrap" else None,
285
292
  alpha=self.alpha,
293
+ rank_deficient_action=self.rank_deficient_action,
286
294
  ).fit(X, y, df_adjustment=n_absorbed_effects)
287
295
 
288
296
  coefficients = reg.coefficients_
@@ -596,6 +604,7 @@ class DifferenceInDifferences:
596
604
  "n_bootstrap": self.n_bootstrap,
597
605
  "bootstrap_weights": self.bootstrap_weights,
598
606
  "seed": self.seed,
607
+ "rank_deficient_action": self.rank_deficient_action,
599
608
  }
600
609
 
601
610
  def set_params(self, **params) -> "DifferenceInDifferences":
@@ -873,29 +882,43 @@ class MultiPeriodDiD(DifferenceInDifferences):
873
882
  var_names.append(col)
874
883
 
875
884
  # Fit OLS using unified backend
876
- coefficients, residuals, fitted, _ = solve_ols(
877
- X, y, return_fitted=True, return_vcov=False
885
+ # Pass cluster_ids to solve_ols for proper vcov computation
886
+ # This handles rank-deficient matrices by returning NaN for dropped columns
887
+ cluster_ids = data[self.cluster].values if self.cluster is not None else None
888
+
889
+ # Note: Wild bootstrap for multi-period effects is complex (multiple coefficients)
890
+ # For now, we use analytical inference even if inference="wild_bootstrap"
891
+ coefficients, residuals, fitted, vcov = solve_ols(
892
+ X, y,
893
+ return_fitted=True,
894
+ return_vcov=True,
895
+ cluster_ids=cluster_ids,
896
+ column_names=var_names,
897
+ rank_deficient_action=self.rank_deficient_action,
878
898
  )
879
899
  r_squared = compute_r_squared(y, residuals)
880
900
 
881
- # Degrees of freedom
882
- df = len(y) - X.shape[1] - n_absorbed_effects
901
+ # Degrees of freedom using effective rank (non-NaN coefficients)
902
+ k_effective = int(np.sum(~np.isnan(coefficients)))
903
+ df = len(y) - k_effective - n_absorbed_effects
883
904
 
884
- # Compute standard errors
885
- # Note: Wild bootstrap for multi-period effects is complex (multiple coefficients)
886
- # For now, we use analytical inference even if inference="wild_bootstrap"
887
- if self.cluster is not None:
888
- cluster_ids = data[self.cluster].values
889
- vcov = compute_robust_vcov(X, residuals, cluster_ids)
890
- elif self.robust:
891
- vcov = compute_robust_vcov(X, residuals)
892
- else:
905
+ # For non-robust, non-clustered case, we need homoskedastic vcov
906
+ # solve_ols returns HC1 by default, so compute homoskedastic if needed
907
+ if not self.robust and self.cluster is None:
893
908
  n = len(y)
894
- k = X.shape[1]
895
- mse = np.sum(residuals**2) / (n - k)
909
+ mse = np.sum(residuals**2) / (n - k_effective)
896
910
  # Use solve() instead of inv() for numerical stability
897
- # solve(A, B) computes X where AX=B, so this yields (X'X)^{-1} * mse
898
- vcov = np.linalg.solve(X.T @ X, mse * np.eye(k))
911
+ # Only compute for identified columns (non-NaN coefficients)
912
+ identified_mask = ~np.isnan(coefficients)
913
+ if np.all(identified_mask):
914
+ vcov = np.linalg.solve(X.T @ X, mse * np.eye(X.shape[1]))
915
+ else:
916
+ # For rank-deficient case, compute vcov on reduced matrix then expand
917
+ X_reduced = X[:, identified_mask]
918
+ vcov_reduced = np.linalg.solve(X_reduced.T @ X_reduced, mse * np.eye(X_reduced.shape[1]))
919
+ # Expand to full size with NaN for dropped columns
920
+ vcov = np.full((X.shape[1], X.shape[1]), np.nan)
921
+ vcov[np.ix_(identified_mask, identified_mask)] = vcov_reduced
899
922
 
900
923
  # Extract period-specific treatment effects
901
924
  period_effects = {}
@@ -922,19 +945,43 @@ class MultiPeriodDiD(DifferenceInDifferences):
922
945
  effect_indices.append(idx)
923
946
 
924
947
  # Compute average treatment effect
925
- # Average ATT = mean of period-specific effects
926
- avg_att = np.mean(effect_values)
927
-
928
- # Standard error of average: need to account for covariance
929
- # Var(avg) = (1/n^2) * sum of all elements in the sub-covariance matrix
930
- n_post = len(post_periods)
931
- sub_vcov = vcov[np.ix_(effect_indices, effect_indices)]
932
- avg_var = np.sum(sub_vcov) / (n_post ** 2)
933
- avg_se = np.sqrt(avg_var)
934
-
935
- avg_t_stat = avg_att / avg_se if avg_se > 0 else 0.0
936
- avg_p_value = compute_p_value(avg_t_stat, df=df)
937
- avg_conf_int = compute_confidence_interval(avg_att, avg_se, self.alpha, df=df)
948
+ # R-style NA propagation: if ANY period effect is NaN, average is undefined
949
+ effect_arr = np.array(effect_values)
950
+
951
+ if np.any(np.isnan(effect_arr)):
952
+ # Some period effects are NaN (unidentified) - cannot compute valid average
953
+ # This follows R's default behavior where mean(c(1, 2, NA)) returns NA
954
+ avg_att = np.nan
955
+ avg_se = np.nan
956
+ avg_t_stat = np.nan
957
+ avg_p_value = np.nan
958
+ avg_conf_int = (np.nan, np.nan)
959
+ else:
960
+ # All effects identified - compute average normally
961
+ avg_att = float(np.mean(effect_arr))
962
+
963
+ # Standard error of average: need to account for covariance
964
+ n_post = len(post_periods)
965
+ sub_vcov = vcov[np.ix_(effect_indices, effect_indices)]
966
+ avg_var = np.sum(sub_vcov) / (n_post ** 2)
967
+
968
+ if np.isnan(avg_var) or avg_var < 0:
969
+ # Vcov has NaN (dropped columns) - propagate NaN
970
+ avg_se = np.nan
971
+ avg_t_stat = np.nan
972
+ avg_p_value = np.nan
973
+ avg_conf_int = (np.nan, np.nan)
974
+ else:
975
+ avg_se = float(np.sqrt(avg_var))
976
+ if avg_se > 0:
977
+ avg_t_stat = avg_att / avg_se
978
+ avg_p_value = compute_p_value(avg_t_stat, df=df)
979
+ avg_conf_int = compute_confidence_interval(avg_att, avg_se, self.alpha, df=df)
980
+ else:
981
+ # Zero SE (degenerate case)
982
+ avg_t_stat = np.nan
983
+ avg_p_value = np.nan
984
+ avg_conf_int = (np.nan, np.nan)
938
985
 
939
986
  # Count observations
940
987
  n_treated = int(np.sum(d))