cbps 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. cbps/__init__.py +3462 -0
  2. cbps/constants.py +46 -0
  3. cbps/core/__init__.py +93 -0
  4. cbps/core/cbps_binary.py +1943 -0
  5. cbps/core/cbps_continuous.py +945 -0
  6. cbps/core/cbps_multitreat.py +1123 -0
  7. cbps/core/cbps_optimal.py +507 -0
  8. cbps/core/results.py +1447 -0
  9. cbps/data/Blackwell.csv +571 -0
  10. cbps/data/LaLonde.csv +3213 -0
  11. cbps/data/npcbps_continuous_sim.csv +501 -0
  12. cbps/data/nsw.csv +723 -0
  13. cbps/data/nsw_dw.csv +446 -0
  14. cbps/data/political_ads_urban_niebler.csv +16266 -0
  15. cbps/data/psid_controls.csv +2491 -0
  16. cbps/data/psid_controls2.csv +254 -0
  17. cbps/data/psid_controls3.csv +129 -0
  18. cbps/data/simulation_dgp1_seed12345.csv +201 -0
  19. cbps/data/simulation_dgp2_seed12345.csv +201 -0
  20. cbps/data/simulation_dgp3_seed12345.csv +201 -0
  21. cbps/data/simulation_dgp4_seed12345.csv +201 -0
  22. cbps/datasets/__init__.py +78 -0
  23. cbps/datasets/blackwell.py +112 -0
  24. cbps/datasets/continuous.py +223 -0
  25. cbps/datasets/lalonde.py +272 -0
  26. cbps/datasets/npcbps_sim.py +101 -0
  27. cbps/diagnostics/__init__.py +101 -0
  28. cbps/diagnostics/balance.py +760 -0
  29. cbps/diagnostics/balance_cbmsm_addon.py +162 -0
  30. cbps/diagnostics/continuous_diagnostics.py +259 -0
  31. cbps/diagnostics/normality.py +173 -0
  32. cbps/diagnostics/ocbps_conditions.py +197 -0
  33. cbps/diagnostics/overlap.py +198 -0
  34. cbps/diagnostics/plots.py +1193 -0
  35. cbps/diagnostics/weights_diag.py +205 -0
  36. cbps/highdim/__init__.py +84 -0
  37. cbps/highdim/gmm_loss.py +340 -0
  38. cbps/highdim/hdcbps.py +1078 -0
  39. cbps/highdim/lasso_utils.py +498 -0
  40. cbps/highdim/weight_funcs.py +298 -0
  41. cbps/inference/__init__.py +42 -0
  42. cbps/inference/asyvar.py +621 -0
  43. cbps/inference/vcov_outcome.py +217 -0
  44. cbps/iv/__init__.py +48 -0
  45. cbps/iv/cbiv.py +2603 -0
  46. cbps/logging_config.py +45 -0
  47. cbps/msm/__init__.py +45 -0
  48. cbps/msm/cbmsm.py +1871 -0
  49. cbps/msm/rank_diagnostics.py +112 -0
  50. cbps/nonparametric/__init__.py +58 -0
  51. cbps/nonparametric/cholesky_whitening.py +232 -0
  52. cbps/nonparametric/empirical_likelihood.py +339 -0
  53. cbps/nonparametric/npcbps.py +1036 -0
  54. cbps/nonparametric/taylor_approx.py +207 -0
  55. cbps/py.typed +0 -0
  56. cbps/sklearn/__init__.py +42 -0
  57. cbps/sklearn/estimator.py +378 -0
  58. cbps/utils/__init__.py +82 -0
  59. cbps/utils/formula.py +415 -0
  60. cbps/utils/helpers.py +378 -0
  61. cbps/utils/numerics.py +438 -0
  62. cbps/utils/r_compat.py +109 -0
  63. cbps/utils/validation.py +224 -0
  64. cbps/utils/variance_transform.py +483 -0
  65. cbps/utils/weights.py +586 -0
  66. cbps-0.2.0.dist-info/METADATA +1090 -0
  67. cbps-0.2.0.dist-info/RECORD +70 -0
  68. cbps-0.2.0.dist-info/WHEEL +5 -0
  69. cbps-0.2.0.dist-info/licenses/LICENSE +661 -0
  70. cbps-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,217 @@
1
+ """
2
+ Variance Adjustment for Weighted Outcome Regression
3
+ ====================================================
4
+
5
+ Sandwich variance estimator for weighted least squares regression using
6
+ CBPS weights from continuous treatment models. Adjusts standard errors to
7
+ account for estimation uncertainty in the generalized propensity score.
8
+
9
+ The methodology follows Section 3.2 of Fong, Hazlett, and Imai (2018),
10
+ which derives the asymptotic variance of the weighted least squares
11
+ estimator by viewing it as a method of moments estimator based on the
12
+ combined moment conditions for propensity score estimation and outcome
13
+ regression.
14
+
15
+ References
16
+ ----------
17
+ Fong, C., Hazlett, C., and Imai, K. (2018). Covariate balancing propensity
18
+ score for a continuous treatment. The Annals of Applied Statistics,
19
+ 12(1), 156-177. https://doi.org/10.1214/17-AOAS1101
20
+
21
+ Newey, W. K. and McFadden, D. (1994). Large sample estimation and
22
+ hypothesis testing. In Handbook of Econometrics, Vol. IV, 2111-2245.
23
+ """
24
+
25
+ from typing import Union
26
+ import numpy as np
27
+ from cbps.core.results import CBPSResults
28
+
29
+
30
+ def vcov_outcome(
31
+ cbps_fit: CBPSResults,
32
+ Y: np.ndarray,
33
+ Z: np.ndarray,
34
+ delta: np.ndarray,
35
+ tol: float = 1e-5,
36
+ lambda_: float = 0.01
37
+ ) -> np.ndarray:
38
+ """
39
+ Compute adjusted variance-covariance matrix for weighted outcome regression.
40
+
41
+ Adjusts standard errors to account for uncertainty in CBPS weight
42
+ estimation when using continuous treatment weights. Implements the
43
+ asymptotic variance formula from Section 3.2 of Fong, Hazlett, and
44
+ Imai (2018), treating the weighted regression as a GMM estimator.
45
+
46
+ Parameters
47
+ ----------
48
+ cbps_fit : CBPSResults
49
+ Fitted continuous treatment CBPS object with attributes: Ttilde
50
+ (standardized treatment), Xtilde (whitened covariates), beta_tilde,
51
+ sigmasq_tilde, and weights.
52
+ Y : array-like of shape (n,)
53
+ Outcome variable.
54
+ Z : array-like of shape (n, q)
55
+ Outcome model design matrix (including treatment and intercept).
56
+ delta : array-like of shape (q,)
57
+ WLS coefficients from the weighted outcome regression.
58
+ tol : float, default=1e-5
59
+ Condition number tolerance. If the smallest singular value of M
60
+ divided by the largest is below tol, regularization is applied.
61
+ lambda_ : float, default=0.01
62
+ Ridge regularization constant added to diagonal of M when
63
+ ill-conditioned.
64
+
65
+ Returns
66
+ -------
67
+ V : ndarray of shape (q, q)
68
+ Adjusted variance-covariance matrix for delta.
69
+
70
+ Raises
71
+ ------
72
+ ValueError
73
+ If cbps_fit lacks continuous treatment attributes or dimensions
74
+ are incompatible.
75
+
76
+ See Also
77
+ --------
78
+ asy_var : Variance estimation for binary treatment ATE.
79
+
80
+ Notes
81
+ -----
82
+ The variance formula accounts for estimation uncertainty in both the
83
+ propensity score parameters (beta, sigma^2) and the outcome regression
84
+ coefficients (delta). The sandwich estimator follows Newey and McFadden
85
+ (1994, Theorem 6.1).
86
+
87
+ Examples
88
+ --------
89
+ >>> import statsmodels.api as sm
90
+ >>> from cbps import CBPS, vcov_outcome
91
+ >>> fit = CBPS('T ~ X1 + X2 + X3', data=df, att=False)
92
+ >>> Z = sm.add_constant(df[['T', 'X1', 'X2']])
93
+ >>> wls = sm.WLS(df['Y'], Z, weights=fit.weights).fit()
94
+ >>> V_adj = vcov_outcome(fit, df['Y'], Z, wls.params)
95
+ >>> se_adj = np.sqrt(np.diag(V_adj))
96
+ """
97
+ # Input validation
98
+ if not hasattr(cbps_fit, 'Ttilde') or cbps_fit.Ttilde is None:
99
+ raise ValueError(
100
+ "cbps_fit must be a continuous treatment CBPS object with Ttilde "
101
+ "attribute. For binary treatments, use asy_var() instead."
102
+ )
103
+ if not hasattr(cbps_fit, 'Xtilde') or cbps_fit.Xtilde is None:
104
+ raise ValueError("cbps_fit missing Xtilde attribute")
105
+ if not hasattr(cbps_fit, 'beta_tilde') or cbps_fit.beta_tilde is None:
106
+ raise ValueError("cbps_fit missing beta_tilde attribute")
107
+ if not hasattr(cbps_fit, 'sigmasq_tilde') or cbps_fit.sigmasq_tilde is None:
108
+ raise ValueError("cbps_fit missing sigmasq_tilde attribute")
109
+
110
+ # Extract attributes
111
+ Xtilde = cbps_fit.Xtilde
112
+ Ttilde = cbps_fit.Ttilde
113
+ w = cbps_fit.weights
114
+ beta_tilde = cbps_fit.beta_tilde
115
+ sigmasq_tilde = cbps_fit.sigmasq_tilde
116
+
117
+ # Convert to numpy arrays
118
+ Y = np.asarray(Y).ravel()
119
+ Z = np.asarray(Z)
120
+ delta = np.asarray(delta).ravel()
121
+
122
+ # Dimension validation and shape normalization
123
+ N = len(Y)
124
+ Ttilde = np.asarray(Ttilde).reshape(-1)
125
+ w = np.asarray(w).reshape(-1)
126
+ Xtilde = np.asarray(Xtilde)
127
+ Z = np.asarray(Z)
128
+ if Xtilde.ndim != 2:
129
+ raise ValueError("Xtilde must be a 2D matrix")
130
+ if Z.ndim != 2:
131
+ raise ValueError("Z must be a 2D matrix")
132
+ if Xtilde.shape[0] != N and Xtilde.shape[1] == N:
133
+ Xtilde = Xtilde.T
134
+ if Z.shape[0] != N and Z.shape[1] == N:
135
+ Z = Z.T
136
+ if len(Ttilde) != N:
137
+ raise ValueError(f"Ttilde length ({len(Ttilde)}) does not match Y length ({N})")
138
+ if Xtilde.shape[0] != N:
139
+ raise ValueError(f"Xtilde row count ({Xtilde.shape[0]}) does not match Y length ({N})")
140
+ if Z.shape[0] != N:
141
+ raise ValueError(f"Z row count ({Z.shape[0]}) does not match Y length ({N})")
142
+ if len(delta) != Z.shape[1]:
143
+ raise ValueError(f"delta length ({len(delta)}) does not match Z column count ({Z.shape[1]})")
144
+ if len(w) != N:
145
+ raise ValueError(f"weights length ({len(w)}) does not match Y length ({N})")
146
+
147
+ # Parameter validation
148
+ if tol <= 0:
149
+ raise ValueError(f"tol must be positive, got {tol}")
150
+ if tol > 1.0:
151
+ import warnings
152
+ warnings.warn(
153
+ f"tol={tol} > 1 triggers regularization unconditionally",
154
+ UserWarning
155
+ )
156
+ if lambda_ < 0:
157
+ raise ValueError(f"lambda_ must be >= 0, got {lambda_}")
158
+
159
+ # Dimensions: K = number of covariates in propensity model, P = outcome model
160
+ K = Xtilde.shape[1]
161
+ P = Z.shape[1]
162
+ Sdelta = np.zeros((P, P))
163
+ Stheta = np.zeros((P, K+1))
164
+
165
+ # Residuals from propensity and outcome models
166
+ eps_beta = Ttilde - Xtilde @ beta_tilde
167
+ eps_delta = Y - Z @ delta
168
+
169
+ # M-matrix: Jacobian of moment conditions (Section 3.2, Fong et al. 2018)
170
+ M11 = np.mean(-2/sigmasq_tilde * eps_beta[:, None] * Xtilde, axis=0)
171
+ M12 = np.mean(-1/sigmasq_tilde**2 * eps_beta**2)
172
+ M22 = np.mean(
173
+ (1/(2*sigmasq_tilde) * w * (1 - 1/sigmasq_tilde * eps_beta**2) * Ttilde)[:, None] * Xtilde,
174
+ axis=0
175
+ )
176
+
177
+ # Compute M21, Sdelta, Stheta via accumulation
178
+ M21 = np.zeros((K, K))
179
+ for i in range(N):
180
+ M21 += (-1/sigmasq_tilde * w[i] * Ttilde[i] * eps_beta[i]) * np.outer(Xtilde[i], Xtilde[i]) / N
181
+ Sdelta -= w[i] * np.outer(Z[i], Z[i]) / N
182
+ Stheta += np.hstack([
183
+ -1/sigmasq_tilde * w[i] * eps_beta[i] * eps_delta[i] * np.outer(Z[i], Xtilde[i]),
184
+ (1/(2*sigmasq_tilde) * w[i] * (1 - 1/sigmasq_tilde * eps_beta[i]**2) * eps_delta[i] * Z[i])[:, None]
185
+ ]) / N
186
+
187
+ # Assemble M-matrix
188
+ M = np.vstack([
189
+ np.hstack([M11, [M12]]),
190
+ np.hstack([M21, M22[:, None]])
191
+ ])
192
+
193
+ # Ridge regularization if M is ill-conditioned
194
+ sv = np.linalg.svd(M, compute_uv=False)
195
+ cond_num = sv[0] / sv[-1]
196
+ if cond_num > (1/tol):
197
+ M = M + lambda_ * np.eye(M.shape[0])
198
+
199
+ # Sandwich variance estimator (Section 3.2, Fong et al. 2018)
200
+ s = (w * eps_delta)[:, None] * Z
201
+ mtheta = np.hstack([
202
+ ((1/sigmasq_tilde) * (eps_beta**2) - 1)[:, None],
203
+ (w * Ttilde)[:, None] * Xtilde
204
+ ])
205
+ assert mtheta.shape == (N, K+1), f"mtheta shape mismatch: {mtheta.shape}"
206
+
207
+ M_inv = np.linalg.inv(M)
208
+ inner = np.zeros((P, P))
209
+ for i in range(N):
210
+ inner_part = s[i] - Stheta @ M_inv @ mtheta[i]
211
+ inner += np.outer(inner_part, inner_part) / N
212
+
213
+ Sdelta_inv = np.linalg.inv(Sdelta)
214
+ V = Sdelta_inv @ inner @ Sdelta_inv.T / N
215
+
216
+ return V
217
+
cbps/iv/__init__.py ADDED
@@ -0,0 +1,48 @@
1
+ """
2
+ Covariate Balancing Propensity Score for Instrumental Variables (CBIV)
3
+ =======================================================================
4
+
5
+ This module implements the Covariate Balancing Propensity Score (CBPS) methodology
6
+ for instrumental variable (IV) settings with treatment noncompliance. CBIV estimates
7
+ compliance type probabilities using generalized method of moments (GMM), simultaneously
8
+ optimizing covariate balance among compliers and prediction of treatment assignment.
9
+
10
+ In IV settings with noncompliance, units can be classified into principal strata
11
+ based on their potential treatment status under different instrument values:
12
+
13
+ - **Compliers**: Units who take treatment when encouraged (Z=1) and do not
14
+ take treatment when not encouraged (Z=0).
15
+ - **Always-takers**: Units who take treatment regardless of encouragement.
16
+ - **Never-takers**: Units who do not take treatment regardless of encouragement.
17
+
18
+ The local average treatment effect (LATE) is identified among compliers. CBIV
19
+ provides weights (inverse of complier probability) that can be used for
20
+ downstream causal effect estimation.
21
+
22
+ Key Components
23
+ --------------
24
+ - ``CBIV``: Main function for estimating compliance type propensity scores
25
+ - ``CBIVResults``: Result container with fitted compliance probabilities and weights
26
+ - ``CBIVNumericalWarning``: Warning class for numerical stability issues
27
+
28
+ Noncompliance Models
29
+ --------------------
30
+ - **Two-sided noncompliance** (default): Models compliers, always-takers, and
31
+ never-takers using multinomial logistic regression with three compliance types.
32
+ - **One-sided noncompliance**: Models compliers and never-takers only (assumes
33
+ no always-takers), using binary logistic regression.
34
+
35
+ References
36
+ ----------
37
+ Imai, K. and Ratkovic, M. (2014). Covariate Balancing Propensity Score.
38
+ Journal of the Royal Statistical Society: Series B (Statistical Methodology),
39
+ 76(1), 243-263. https://doi.org/10.1111/rssb.12027
40
+
41
+ Angrist, J. D., Imbens, G. W., and Rubin, D. B. (1996). Identification of
42
+ Causal Effects Using Instrumental Variables. Journal of the American
43
+ Statistical Association, 91(434), 444-455. https://doi.org/10.1080/01621459.1996.10476902
44
+ """
45
+
46
+ from .cbiv import CBIV, CBIVResults, CBIVNumericalWarning
47
+
48
+ __all__ = ["CBIV", "CBIVResults", "CBIVNumericalWarning"]