cbps 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cbps/__init__.py +3462 -0
- cbps/constants.py +46 -0
- cbps/core/__init__.py +93 -0
- cbps/core/cbps_binary.py +1943 -0
- cbps/core/cbps_continuous.py +945 -0
- cbps/core/cbps_multitreat.py +1123 -0
- cbps/core/cbps_optimal.py +507 -0
- cbps/core/results.py +1447 -0
- cbps/data/Blackwell.csv +571 -0
- cbps/data/LaLonde.csv +3213 -0
- cbps/data/npcbps_continuous_sim.csv +501 -0
- cbps/data/nsw.csv +723 -0
- cbps/data/nsw_dw.csv +446 -0
- cbps/data/political_ads_urban_niebler.csv +16266 -0
- cbps/data/psid_controls.csv +2491 -0
- cbps/data/psid_controls2.csv +254 -0
- cbps/data/psid_controls3.csv +129 -0
- cbps/data/simulation_dgp1_seed12345.csv +201 -0
- cbps/data/simulation_dgp2_seed12345.csv +201 -0
- cbps/data/simulation_dgp3_seed12345.csv +201 -0
- cbps/data/simulation_dgp4_seed12345.csv +201 -0
- cbps/datasets/__init__.py +78 -0
- cbps/datasets/blackwell.py +112 -0
- cbps/datasets/continuous.py +223 -0
- cbps/datasets/lalonde.py +272 -0
- cbps/datasets/npcbps_sim.py +101 -0
- cbps/diagnostics/__init__.py +101 -0
- cbps/diagnostics/balance.py +760 -0
- cbps/diagnostics/balance_cbmsm_addon.py +162 -0
- cbps/diagnostics/continuous_diagnostics.py +259 -0
- cbps/diagnostics/normality.py +173 -0
- cbps/diagnostics/ocbps_conditions.py +197 -0
- cbps/diagnostics/overlap.py +198 -0
- cbps/diagnostics/plots.py +1193 -0
- cbps/diagnostics/weights_diag.py +205 -0
- cbps/highdim/__init__.py +84 -0
- cbps/highdim/gmm_loss.py +340 -0
- cbps/highdim/hdcbps.py +1078 -0
- cbps/highdim/lasso_utils.py +498 -0
- cbps/highdim/weight_funcs.py +298 -0
- cbps/inference/__init__.py +42 -0
- cbps/inference/asyvar.py +621 -0
- cbps/inference/vcov_outcome.py +217 -0
- cbps/iv/__init__.py +48 -0
- cbps/iv/cbiv.py +2603 -0
- cbps/logging_config.py +45 -0
- cbps/msm/__init__.py +45 -0
- cbps/msm/cbmsm.py +1871 -0
- cbps/msm/rank_diagnostics.py +112 -0
- cbps/nonparametric/__init__.py +58 -0
- cbps/nonparametric/cholesky_whitening.py +232 -0
- cbps/nonparametric/empirical_likelihood.py +339 -0
- cbps/nonparametric/npcbps.py +1036 -0
- cbps/nonparametric/taylor_approx.py +207 -0
- cbps/py.typed +0 -0
- cbps/sklearn/__init__.py +42 -0
- cbps/sklearn/estimator.py +378 -0
- cbps/utils/__init__.py +82 -0
- cbps/utils/formula.py +415 -0
- cbps/utils/helpers.py +378 -0
- cbps/utils/numerics.py +438 -0
- cbps/utils/r_compat.py +109 -0
- cbps/utils/validation.py +224 -0
- cbps/utils/variance_transform.py +483 -0
- cbps/utils/weights.py +586 -0
- cbps-0.2.0.dist-info/METADATA +1090 -0
- cbps-0.2.0.dist-info/RECORD +70 -0
- cbps-0.2.0.dist-info/WHEEL +5 -0
- cbps-0.2.0.dist-info/licenses/LICENSE +661 -0
- cbps-0.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Empirical Likelihood Optimization for Nonparametric CBPS.
|
|
3
|
+
|
|
4
|
+
This module implements the dual optimization approach for empirical
|
|
5
|
+
likelihood estimation described in Section 3.3.2 of Fong, Hazlett,
|
|
6
|
+
and Imai (2018).
|
|
7
|
+
|
|
8
|
+
The key insight is dimension reduction: instead of optimizing over
|
|
9
|
+
n weights directly, we optimize over (2K+1) Lagrange multipliers
|
|
10
|
+
:math:`\\gamma`, then recover weights via:
|
|
11
|
+
|
|
12
|
+
.. math::
|
|
13
|
+
|
|
14
|
+
w_i = \\frac{1}{1 - \\gamma^T g(X_i^*, T_i^*)}
|
|
15
|
+
|
|
16
|
+
Key Functions
|
|
17
|
+
-------------
|
|
18
|
+
- :func:`log_elgiven_eta`: Objective function for :math:`\\gamma`
|
|
19
|
+
optimization given the weighted correlation :math:`\\eta`.
|
|
20
|
+
- :func:`get_w`: Recover weights and check convergence.
|
|
21
|
+
- :func:`log_post`: Penalized likelihood for the outer :math:`\\alpha`
|
|
22
|
+
line search.
|
|
23
|
+
|
|
24
|
+
Mathematical Background
|
|
25
|
+
-----------------------
|
|
26
|
+
The Lagrangian for the constrained likelihood maximization (Section 3.3.2)
|
|
27
|
+
leads to the dual problem:
|
|
28
|
+
|
|
29
|
+
.. math::
|
|
30
|
+
|
|
31
|
+
\\underset{\\gamma}{\\text{argmax}} \\sum_{i=1}^n
|
|
32
|
+
\\log(1 - \\gamma^T(g_i - \\eta))
|
|
33
|
+
|
|
34
|
+
where :math:`g_i = (X_i^* T_i^*, X_i^*, T_i^*)^T` is the constraint vector
|
|
35
|
+
and :math:`\\eta` is the allowed finite-sample imbalance.
|
|
36
|
+
|
|
37
|
+
Note: The ordering of components in :math:`g_i` follows the implementation
|
|
38
|
+
rather than the paper's notation :math:`(X_i^*, T_i^*, X_i^* T_i^*)^T`.
|
|
39
|
+
|
|
40
|
+
References
|
|
41
|
+
----------
|
|
42
|
+
Fong, C., Hazlett, C., and Imai, K. (2018). Covariate balancing propensity
|
|
43
|
+
score for a continuous treatment: Application to the efficacy of political
|
|
44
|
+
advertisements. The Annals of Applied Statistics, 12(1), 156-177.
|
|
45
|
+
https://doi.org/10.1214/17-AOAS1101
|
|
46
|
+
|
|
47
|
+
Owen, A.B. (2001). Empirical Likelihood. Chapman & Hall/CRC.
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
from typing import Dict, Union
|
|
51
|
+
import numpy as np
|
|
52
|
+
import scipy.optimize
|
|
53
|
+
|
|
54
|
+
from .taylor_approx import llog
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def log_elgiven_eta(
|
|
58
|
+
gamma: np.ndarray,
|
|
59
|
+
eta: np.ndarray,
|
|
60
|
+
z: np.ndarray,
|
|
61
|
+
eps: float,
|
|
62
|
+
ncon_cor: int,
|
|
63
|
+
n: int
|
|
64
|
+
) -> float:
|
|
65
|
+
"""
|
|
66
|
+
Dual objective function for empirical likelihood optimization.
|
|
67
|
+
|
|
68
|
+
Computes the negative log empirical likelihood as a function of the
|
|
69
|
+
Lagrange multipliers :math:`\\gamma`, given the allowed imbalance
|
|
70
|
+
:math:`\\eta`. This is the inner optimization in the npCBPS algorithm.
|
|
71
|
+
|
|
72
|
+
The dual formulation (Equation 9 in Fong et al., 2018) reduces the
|
|
73
|
+
problem from n-dimensional weight optimization to (2K+1)-dimensional
|
|
74
|
+
:math:`\\gamma` optimization.
|
|
75
|
+
|
|
76
|
+
Parameters
|
|
77
|
+
----------
|
|
78
|
+
gamma : np.ndarray of shape (ncon,)
|
|
79
|
+
Lagrange multiplier vector to optimize.
|
|
80
|
+
eta : np.ndarray of shape (ncon_cor,)
|
|
81
|
+
Allowed weighted correlation vector :math:`\\eta`.
|
|
82
|
+
z : np.ndarray of shape (n, ncon)
|
|
83
|
+
Constraint matrix :math:`(X^* T^*, X^*, T^*)`.
|
|
84
|
+
eps : float
|
|
85
|
+
Threshold for Taylor approximation in :func:`llog`, typically 1/n.
|
|
86
|
+
ncon_cor : int
|
|
87
|
+
Number of correlation constraints (K for continuous treatment,
|
|
88
|
+
K*(J-1) for J-level factor treatment).
|
|
89
|
+
n : int
|
|
90
|
+
Sample size.
|
|
91
|
+
|
|
92
|
+
Returns
|
|
93
|
+
-------
|
|
94
|
+
float
|
|
95
|
+
Negative log empirical likelihood (to be minimized).
|
|
96
|
+
|
|
97
|
+
Notes
|
|
98
|
+
-----
|
|
99
|
+
**Mathematical formulation:**
|
|
100
|
+
|
|
101
|
+
The objective is derived from Equation 9 in Section 3.3.3:
|
|
102
|
+
|
|
103
|
+
.. math::
|
|
104
|
+
|
|
105
|
+
-\\sum_{i=1}^n \\log(1 - \\gamma^T(g_i - \\eta))
|
|
106
|
+
|
|
107
|
+
Equivalently, with the scaling convention used in this implementation:
|
|
108
|
+
|
|
109
|
+
.. math::
|
|
110
|
+
|
|
111
|
+
-\\sum_{i=1}^n \\text{llog}(n + \\gamma^T(\\eta - z_i))
|
|
112
|
+
|
|
113
|
+
where :func:`llog` provides numerical stability for small arguments.
|
|
114
|
+
|
|
115
|
+
References
|
|
116
|
+
----------
|
|
117
|
+
Fong, C., Hazlett, C., and Imai, K. (2018). Equation 9, Section 3.3.3.
|
|
118
|
+
"""
|
|
119
|
+
ncon = z.shape[1]
|
|
120
|
+
|
|
121
|
+
# Extend eta to ncon dimensions (pad with zeros)
|
|
122
|
+
eta_long = np.concatenate([eta, np.zeros(ncon - ncon_cor)])
|
|
123
|
+
|
|
124
|
+
# Broadcast eta_long to matrix (ncon × n)
|
|
125
|
+
eta_mat = eta_long[:, None] @ np.ones((1, n))
|
|
126
|
+
|
|
127
|
+
# Core formula: arg is a 1 × n row vector
|
|
128
|
+
arg = n + gamma.T @ (eta_mat - z.T)
|
|
129
|
+
|
|
130
|
+
# Empirical likelihood
|
|
131
|
+
log_el = -np.sum(llog(arg, eps))
|
|
132
|
+
|
|
133
|
+
return log_el
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def get_w(
|
|
137
|
+
eta: np.ndarray,
|
|
138
|
+
z: np.ndarray,
|
|
139
|
+
sumw_tol: float,
|
|
140
|
+
eps: float,
|
|
141
|
+
ncon_cor: int,
|
|
142
|
+
n: int
|
|
143
|
+
) -> Dict[str, Union[np.ndarray, float]]:
|
|
144
|
+
"""
|
|
145
|
+
Compute optimal weights given the allowed imbalance eta.
|
|
146
|
+
|
|
147
|
+
This function performs the inner optimization: given :math:`\\eta`,
|
|
148
|
+
find the optimal :math:`\\gamma` via BFGS, then recover weights using
|
|
149
|
+
the formula from Owen (2001):
|
|
150
|
+
|
|
151
|
+
.. math::
|
|
152
|
+
|
|
153
|
+
w_i = \\frac{1}{1 - \\gamma^T(g_i - \\eta)}
|
|
154
|
+
|
|
155
|
+
A convergence check verifies that :math:`\\sum w_i \\approx 1`.
|
|
156
|
+
|
|
157
|
+
Parameters
|
|
158
|
+
----------
|
|
159
|
+
eta : np.ndarray of shape (ncon_cor,)
|
|
160
|
+
Allowed weighted correlation vector.
|
|
161
|
+
z : np.ndarray of shape (n, ncon)
|
|
162
|
+
Constraint matrix.
|
|
163
|
+
sumw_tol : float
|
|
164
|
+
Tolerance for weight sum convergence. If :math:`|1 - \\sum w_i|`
|
|
165
|
+
exceeds this threshold, a penalty is added to the likelihood.
|
|
166
|
+
Typical values: 0.05 for final weights, 0.001 during optimization.
|
|
167
|
+
eps : float
|
|
168
|
+
Threshold for Taylor approximation, typically 1/n.
|
|
169
|
+
ncon_cor : int
|
|
170
|
+
Number of correlation constraints.
|
|
171
|
+
n : int
|
|
172
|
+
Sample size.
|
|
173
|
+
|
|
174
|
+
Returns
|
|
175
|
+
-------
|
|
176
|
+
dict
|
|
177
|
+
Dictionary with keys:
|
|
178
|
+
|
|
179
|
+
- **w** : np.ndarray of shape (n,)
|
|
180
|
+
Unnormalized weights (before normalization to sum to n).
|
|
181
|
+
- **sumw** : float
|
|
182
|
+
Sum of weights (ideally close to 1).
|
|
183
|
+
- **log_el** : float
|
|
184
|
+
Log empirical likelihood, possibly with penalty if sumw
|
|
185
|
+
deviates from 1.
|
|
186
|
+
- **el_gamma** : np.ndarray of shape (ncon_cor,)
|
|
187
|
+
Optimal Lagrange multipliers for correlation constraints.
|
|
188
|
+
|
|
189
|
+
Notes
|
|
190
|
+
-----
|
|
191
|
+
**Convergence penalty:**
|
|
192
|
+
|
|
193
|
+
When :math:`|1 - \\sum w_i| > \\text{sumw\\_tol}`:
|
|
194
|
+
|
|
195
|
+
.. math::
|
|
196
|
+
|
|
197
|
+
\\text{log\\_el} = -\\sum \\log(w_i / \\sum w_i)
|
|
198
|
+
- 10^4 \\cdot (1 + |1 - \\sum w_i|)
|
|
199
|
+
|
|
200
|
+
This penalty guides the outer optimization away from :math:`\\eta`
|
|
201
|
+
values that lead to poor weight recovery.
|
|
202
|
+
|
|
203
|
+
References
|
|
204
|
+
----------
|
|
205
|
+
Owen, A.B. (2001). Empirical Likelihood. Chapman & Hall/CRC.
|
|
206
|
+
"""
|
|
207
|
+
ncon = z.shape[1]
|
|
208
|
+
|
|
209
|
+
# Initialize gamma = 0
|
|
210
|
+
gam_init = np.zeros(ncon)
|
|
211
|
+
|
|
212
|
+
# BFGS optimization
|
|
213
|
+
result = scipy.optimize.minimize(
|
|
214
|
+
log_elgiven_eta,
|
|
215
|
+
gam_init,
|
|
216
|
+
args=(eta, z, eps, ncon_cor, n),
|
|
217
|
+
method='BFGS'
|
|
218
|
+
)
|
|
219
|
+
gam_opt = result.x
|
|
220
|
+
|
|
221
|
+
# Recover weights
|
|
222
|
+
eta_long = np.concatenate([eta, np.zeros(ncon - ncon_cor)])
|
|
223
|
+
eta_mat = eta_long[:, None] @ np.ones((1, n))
|
|
224
|
+
arg_temp = n + gam_opt.T @ (eta_mat - z.T)
|
|
225
|
+
|
|
226
|
+
# w = 1 / arg_temp
|
|
227
|
+
w = 1 / arg_temp.flatten()
|
|
228
|
+
sum_w = w.sum()
|
|
229
|
+
|
|
230
|
+
# Normalize weights
|
|
231
|
+
w_scaled = w / sum_w
|
|
232
|
+
|
|
233
|
+
# Convergence check
|
|
234
|
+
if abs(1 - sum_w) <= sumw_tol:
|
|
235
|
+
# Pass: weight sum is close enough to 1
|
|
236
|
+
log_el = -np.sum(np.log(w_scaled))
|
|
237
|
+
else:
|
|
238
|
+
# Fail: add penalty term
|
|
239
|
+
log_el = -np.sum(np.log(w_scaled)) - 10**4 * (1 + abs(1 - sum_w))
|
|
240
|
+
|
|
241
|
+
# Return results
|
|
242
|
+
return {
|
|
243
|
+
'w': w,
|
|
244
|
+
'sumw': sum_w,
|
|
245
|
+
'log_el': log_el,
|
|
246
|
+
'el_gamma': gam_opt[:ncon_cor]
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def log_post(
|
|
251
|
+
par: float,
|
|
252
|
+
eta_to_be_scaled: np.ndarray,
|
|
253
|
+
eta_prior_sd: np.ndarray,
|
|
254
|
+
z: np.ndarray,
|
|
255
|
+
eps: float,
|
|
256
|
+
sumw_tol: float,
|
|
257
|
+
ncon_cor: int,
|
|
258
|
+
n: int
|
|
259
|
+
) -> float:
|
|
260
|
+
"""
|
|
261
|
+
Penalized log-likelihood for the outer line search.
|
|
262
|
+
|
|
263
|
+
Computes the objective for Equation 10 in Section 3.3.3 of Fong et al.
|
|
264
|
+
(2018):
|
|
265
|
+
|
|
266
|
+
.. math::
|
|
267
|
+
|
|
268
|
+
\\log f(X^*, T^* | \\eta) + \\log f(\\eta)
|
|
269
|
+
|
|
270
|
+
where :math:`\\eta = \\alpha \\cdot \\eta_0` is parameterized by the
|
|
271
|
+
scalar :math:`\\alpha \\in [0, 1]`, and :math:`\\eta_0` is the initial
|
|
272
|
+
(unweighted) correlation.
|
|
273
|
+
|
|
274
|
+
Parameters
|
|
275
|
+
----------
|
|
276
|
+
par : float
|
|
277
|
+
Scaling parameter :math:`\\alpha` in the range [0, 1].
|
|
278
|
+
At :math:`\\alpha = 0`, exact balance is enforced.
|
|
279
|
+
At :math:`\\alpha = 1`, the initial imbalance is retained.
|
|
280
|
+
eta_to_be_scaled : np.ndarray of shape (ncon_cor,)
|
|
281
|
+
Base correlation vector :math:`\\eta_0` to be scaled.
|
|
282
|
+
eta_prior_sd : np.ndarray of shape (ncon_cor,)
|
|
283
|
+
Prior standard deviation :math:`\\sigma` for :math:`\\eta`, where
|
|
284
|
+
:math:`\\eta \\sim N(0, \\sigma^2 I_K)`. This equals the ``corprior``
|
|
285
|
+
parameter.
|
|
286
|
+
z : np.ndarray of shape (n, ncon)
|
|
287
|
+
Constraint matrix.
|
|
288
|
+
eps : float
|
|
289
|
+
Threshold for Taylor approximation.
|
|
290
|
+
sumw_tol : float
|
|
291
|
+
Weight sum tolerance (typically 0.001 during line search).
|
|
292
|
+
ncon_cor : int
|
|
293
|
+
Number of correlation constraints.
|
|
294
|
+
n : int
|
|
295
|
+
Sample size.
|
|
296
|
+
|
|
297
|
+
Returns
|
|
298
|
+
-------
|
|
299
|
+
float
|
|
300
|
+
Log posterior (penalized likelihood) for maximization.
|
|
301
|
+
|
|
302
|
+
Notes
|
|
303
|
+
-----
|
|
304
|
+
**Prior specification (Section 3.3.3):**
|
|
305
|
+
|
|
306
|
+
The penalty assumes :math:`\\eta \\sim N(0, \\sigma^2 I_K)`:
|
|
307
|
+
|
|
308
|
+
.. math::
|
|
309
|
+
|
|
310
|
+
\\log f(\\eta) = -\\frac{K}{2}\\log(2\\pi\\sigma^2)
|
|
311
|
+
- \\frac{\\eta^T \\eta}{2\\sigma^2}
|
|
312
|
+
|
|
313
|
+
The ``corprior`` parameter corresponds to :math:`\\sigma`. Smaller
|
|
314
|
+
values enforce tighter balance constraints at the cost of potentially
|
|
315
|
+
more extreme weights.
|
|
316
|
+
|
|
317
|
+
References
|
|
318
|
+
----------
|
|
319
|
+
Fong, C., Hazlett, C., and Imai, K. (2018). Section 3.3.3: A penalized
|
|
320
|
+
imbalance approach. Equation 10.
|
|
321
|
+
"""
|
|
322
|
+
# Scale eta
|
|
323
|
+
eta_now = par * eta_to_be_scaled
|
|
324
|
+
|
|
325
|
+
# Compute prior log density
|
|
326
|
+
log_p_eta = np.sum(
|
|
327
|
+
-0.5 * np.log(2 * np.pi * eta_prior_sd**2)
|
|
328
|
+
- eta_now**2 / (2 * eta_prior_sd**2)
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
# Compute empirical likelihood
|
|
332
|
+
el_out = get_w(eta_now, z, sumw_tol, eps, ncon_cor, n)
|
|
333
|
+
|
|
334
|
+
# Compute posterior density
|
|
335
|
+
c = 1
|
|
336
|
+
log_post_value = el_out['log_el'] + c * log_p_eta
|
|
337
|
+
|
|
338
|
+
# Return log posterior for maximization
|
|
339
|
+
return log_post_value
|