cbps 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. cbps/__init__.py +3462 -0
  2. cbps/constants.py +46 -0
  3. cbps/core/__init__.py +93 -0
  4. cbps/core/cbps_binary.py +1943 -0
  5. cbps/core/cbps_continuous.py +945 -0
  6. cbps/core/cbps_multitreat.py +1123 -0
  7. cbps/core/cbps_optimal.py +507 -0
  8. cbps/core/results.py +1447 -0
  9. cbps/data/Blackwell.csv +571 -0
  10. cbps/data/LaLonde.csv +3213 -0
  11. cbps/data/npcbps_continuous_sim.csv +501 -0
  12. cbps/data/nsw.csv +723 -0
  13. cbps/data/nsw_dw.csv +446 -0
  14. cbps/data/political_ads_urban_niebler.csv +16266 -0
  15. cbps/data/psid_controls.csv +2491 -0
  16. cbps/data/psid_controls2.csv +254 -0
  17. cbps/data/psid_controls3.csv +129 -0
  18. cbps/data/simulation_dgp1_seed12345.csv +201 -0
  19. cbps/data/simulation_dgp2_seed12345.csv +201 -0
  20. cbps/data/simulation_dgp3_seed12345.csv +201 -0
  21. cbps/data/simulation_dgp4_seed12345.csv +201 -0
  22. cbps/datasets/__init__.py +78 -0
  23. cbps/datasets/blackwell.py +112 -0
  24. cbps/datasets/continuous.py +223 -0
  25. cbps/datasets/lalonde.py +272 -0
  26. cbps/datasets/npcbps_sim.py +101 -0
  27. cbps/diagnostics/__init__.py +101 -0
  28. cbps/diagnostics/balance.py +760 -0
  29. cbps/diagnostics/balance_cbmsm_addon.py +162 -0
  30. cbps/diagnostics/continuous_diagnostics.py +259 -0
  31. cbps/diagnostics/normality.py +173 -0
  32. cbps/diagnostics/ocbps_conditions.py +197 -0
  33. cbps/diagnostics/overlap.py +198 -0
  34. cbps/diagnostics/plots.py +1193 -0
  35. cbps/diagnostics/weights_diag.py +205 -0
  36. cbps/highdim/__init__.py +84 -0
  37. cbps/highdim/gmm_loss.py +340 -0
  38. cbps/highdim/hdcbps.py +1078 -0
  39. cbps/highdim/lasso_utils.py +498 -0
  40. cbps/highdim/weight_funcs.py +298 -0
  41. cbps/inference/__init__.py +42 -0
  42. cbps/inference/asyvar.py +621 -0
  43. cbps/inference/vcov_outcome.py +217 -0
  44. cbps/iv/__init__.py +48 -0
  45. cbps/iv/cbiv.py +2603 -0
  46. cbps/logging_config.py +45 -0
  47. cbps/msm/__init__.py +45 -0
  48. cbps/msm/cbmsm.py +1871 -0
  49. cbps/msm/rank_diagnostics.py +112 -0
  50. cbps/nonparametric/__init__.py +58 -0
  51. cbps/nonparametric/cholesky_whitening.py +232 -0
  52. cbps/nonparametric/empirical_likelihood.py +339 -0
  53. cbps/nonparametric/npcbps.py +1036 -0
  54. cbps/nonparametric/taylor_approx.py +207 -0
  55. cbps/py.typed +0 -0
  56. cbps/sklearn/__init__.py +42 -0
  57. cbps/sklearn/estimator.py +378 -0
  58. cbps/utils/__init__.py +82 -0
  59. cbps/utils/formula.py +415 -0
  60. cbps/utils/helpers.py +378 -0
  61. cbps/utils/numerics.py +438 -0
  62. cbps/utils/r_compat.py +109 -0
  63. cbps/utils/validation.py +224 -0
  64. cbps/utils/variance_transform.py +483 -0
  65. cbps/utils/weights.py +586 -0
  66. cbps-0.2.0.dist-info/METADATA +1090 -0
  67. cbps-0.2.0.dist-info/RECORD +70 -0
  68. cbps-0.2.0.dist-info/WHEEL +5 -0
  69. cbps-0.2.0.dist-info/licenses/LICENSE +661 -0
  70. cbps-0.2.0.dist-info/top_level.txt +1 -0
cbps/iv/cbiv.py ADDED
@@ -0,0 +1,2603 @@
1
+ """
2
+ Covariate Balancing Propensity Score for Instrumental Variable Estimates (CBIV)
3
+
4
+ This module implements the Covariate Balancing Propensity Score (CBPS) methodology
5
+ for instrumental variable (IV) settings with treatment noncompliance. CBIV estimates
6
+ compliance type probabilities (complier, always-taker, never-taker) using the
7
+ generalized method of moments (GMM) framework that simultaneously:
8
+
9
+ 1. **Propensity score estimation**: Fits a multinomial logistic model to predict
10
+ the observed joint distribution of instrument (Z) and treatment (Tr).
11
+ 2. **Covariate balance optimization**: Ensures that weighted covariate means
12
+ for compliers approximate population means, reducing selection bias.
13
+
14
+ Principal Stratification Framework
15
+ ----------------------------------
16
+ In IV settings with noncompliance, units are classified into principal strata
17
+ based on potential treatment under different instrument values (Angrist et al., 1996):
18
+
19
+ - Compliers (C): Tr(Z=1)=1, Tr(Z=0)=0 - respond to encouragement
20
+ - Always-takers (A): Tr(Z=1)=1, Tr(Z=0)=1 - always treated
21
+ - Never-takers (N): Tr(Z=1)=0, Tr(Z=0)=0 - never treated
22
+ - Defiers: Tr(Z=1)=0, Tr(Z=0)=1 - excluded by monotonicity assumption
23
+
24
+ The local average treatment effect (LATE) is identified among compliers. CBIV
25
+ provides inverse probability weights (1/π_c) for downstream effect estimation.
26
+
27
+ Noncompliance Models
28
+ --------------------
29
+ - **Two-sided** (twosided=True): Models all three compliance types using
30
+ multinomial logistic regression. Appropriate when both always-takers and
31
+ never-takers are present.
32
+ - **One-sided** (twosided=False): Models only compliers and never-takers
33
+ using binary logistic regression. Appropriate when always-takers are absent
34
+ (e.g., encouragement designs where treatment access requires encouragement).
35
+
36
+ References
37
+ ----------
38
+ Imai, K. and Ratkovic, M. (2014). Covariate Balancing Propensity Score.
39
+ Journal of the Royal Statistical Society: Series B (Statistical Methodology),
40
+ 76(1), 243-263. https://doi.org/10.1111/rssb.12027
41
+
42
+ Angrist, J. D., Imbens, G. W., and Rubin, D. B. (1996). Identification of
43
+ Causal Effects Using Instrumental Variables. Journal of the American
44
+ Statistical Association, 91(434), 444-455. https://doi.org/10.1080/01621459.1996.10476902
45
+
46
+ Hansen, L. P. (1982). Large Sample Properties of Generalized Method of
47
+ Moments Estimators. Econometrica, 50(4), 1029-1054. https://doi.org/10.2307/1912775
48
+ """
49
+
50
+ import warnings
51
+ from typing import Optional, Tuple, Dict, Callable
52
+
53
+ import numpy as np
54
+ import pandas as pd
55
+ import scipy.optimize
56
+ import scipy.special
57
+ import statsmodels.api as sm
58
+ from statsmodels.genmod.families import Gaussian
59
+
60
+ from ..utils.numerics import r_ginv_like, pinv_symmetric_psd, symmetrize, numeric_rank
61
+
62
+ # Constants
63
+ PROBS_MIN = 1e-6 # Probability clipping threshold for numerical stability
64
+
65
+
66
+ class CBIVNumericalWarning(UserWarning):
67
+ """
68
+ Warning raised for numerical stability issues during CBIV estimation.
69
+
70
+ This warning is issued when compliance probabilities approach extreme values
71
+ and require clipping to the interval [probs_min, 1 - probs_min]. Excessive
72
+ clipping may indicate:
73
+
74
+ - Complete or quasi-complete separation in the data
75
+ - Weak instrument (low correlation between Z and Tr)
76
+ - Insufficient sample size relative to the number of covariates
77
+ - Extreme covariate values causing numerical overflow
78
+
79
+ When this warning appears, consider:
80
+
81
+ 1. Checking instrument relevance (first-stage F-statistic > 10)
82
+ 2. Reducing the number of covariates or using regularization
83
+ 3. Examining the data for outliers or extreme values
84
+ 4. Increasing sample size if possible
85
+ """
86
+ pass
87
+
88
+
89
+ class CBIVSummary:
90
+ """Summary object for CBIVResults.
91
+
92
+ Encapsulates formatted summary information of CBIV estimation results,
93
+ providing a statsmodels-style summary() return type. Use ``str()`` or
94
+ ``print()`` to obtain formatted text.
95
+
96
+ Parameters
97
+ ----------
98
+ coefficients : np.ndarray
99
+ Estimated coefficients.
100
+ fitted_values : np.ndarray
101
+ Fitted compliance type probabilities.
102
+ weights : np.ndarray
103
+ Inverse probability weights (1/π_c).
104
+ deviance : float
105
+ Model deviance.
106
+ converged : bool
107
+ Whether optimization converged.
108
+ J : float
109
+ Hansen J-statistic.
110
+ df : int
111
+ Effective degrees of freedom.
112
+ bal : float
113
+ Covariate balance loss.
114
+ method : str
115
+ Estimation method.
116
+ two_sided : bool
117
+ Whether two-sided noncompliance model was used.
118
+ """
119
+
120
+ def __init__(
121
+ self,
122
+ coefficients: np.ndarray,
123
+ fitted_values: np.ndarray,
124
+ weights: np.ndarray,
125
+ deviance: float,
126
+ converged: bool,
127
+ J: float,
128
+ df: int,
129
+ bal: float,
130
+ method: str,
131
+ two_sided: bool,
132
+ ):
133
+ self.coefficients = coefficients
134
+ self.fitted_values = fitted_values
135
+ self.weights = weights
136
+ self.deviance = deviance
137
+ self.converged = converged
138
+ self.J = J
139
+ self.df = df
140
+ self.bal = bal
141
+ self.method = method
142
+ self.two_sided = two_sided
143
+
144
+ def __str__(self) -> str:
145
+ """Return formatted CBIV estimation results summary (consistent with CBIVResults.__str__() output)."""
146
+ output = "\nCBIV Estimation Results\n"
147
+ output += "=" * 60 + "\n"
148
+
149
+ # Basic information
150
+ n = len(self.weights)
151
+ k = self.coefficients.shape[0]
152
+ output += f"Sample size: {n}\n"
153
+ output += f"Coefficients: {k}\n"
154
+ output += f"Method: {self.method}\n"
155
+ output += f"Two-sided noncompliance: {'Yes' if self.two_sided else 'No'}\n"
156
+ output += f"Converged: {'Yes' if self.converged else 'No'}\n"
157
+
158
+ # Statistics
159
+ output += f"\nModel Statistics:\n"
160
+ output += f" J-statistic: {self.J:.6f}\n"
161
+ output += f" Deviance: {self.deviance:.6f}\n"
162
+ output += f" Balance loss: {self.bal:.6f}\n"
163
+ output += f" Degrees of freedom: {self.df}\n"
164
+
165
+ # Fitted values information
166
+ if self.fitted_values is not None:
167
+ if self.two_sided:
168
+ output += f"\nCompliance Probabilities (π_c, π_a, π_n):\n"
169
+ output += f" Compliers (π_c): min={self.fitted_values[:, 0].min():.4f}, max={self.fitted_values[:, 0].max():.4f}, mean={self.fitted_values[:, 0].mean():.4f}\n"
170
+ output += f" Always-takers (π_a): min={self.fitted_values[:, 1].min():.4f}, max={self.fitted_values[:, 1].max():.4f}, mean={self.fitted_values[:, 1].mean():.4f}\n"
171
+ output += f" Never-takers (π_n): min={self.fitted_values[:, 2].min():.4f}, max={self.fitted_values[:, 2].max():.4f}, mean={self.fitted_values[:, 2].mean():.4f}\n"
172
+ else:
173
+ output += f"\nComplier Probabilities (π_c):\n"
174
+ output += f" Min: {self.fitted_values.min():.4f}\n"
175
+ output += f" Max: {self.fitted_values.max():.4f}\n"
176
+ output += f" Mean: {self.fitted_values.mean():.4f}\n"
177
+
178
+ # Weight information
179
+ if self.weights is not None:
180
+ output += f"\nComplier Weights (1/π_c):\n"
181
+ output += f" Min: {self.weights.min():.4f}\n"
182
+ output += f" Max: {self.weights.max():.4f}\n"
183
+ output += f" Mean: {self.weights.mean():.4f}\n"
184
+
185
+ output += "=" * 60 + "\n"
186
+
187
+ return output
188
+
189
+ def __repr__(self) -> str:
190
+ n = len(self.weights)
191
+ return f"CBIVSummary(n={n}, converged={self.converged})"
192
+
193
+
194
+ class CBIVResults:
195
+ """
196
+ Container for CBIV estimation results.
197
+
198
+ Stores all outputs from the CBIV fitting procedure, including compliance
199
+ type coefficients, estimated probabilities, inverse probability weights,
200
+ and diagnostic statistics for model assessment.
201
+
202
+ Attributes
203
+ ----------
204
+ coefficients : np.ndarray
205
+ Estimated coefficients for the compliance type model.
206
+
207
+ - Two-sided noncompliance: shape (k, 2) matrix where column 0 contains
208
+ complier coefficients (β_c) and column 1 contains always-taker
209
+ coefficients (β_a). Never-taker probability is the reference category.
210
+ - One-sided noncompliance: shape (k,) vector of complier coefficients.
211
+
212
+ fitted_values : np.ndarray
213
+ Estimated compliance type probabilities for each observation.
214
+
215
+ - Two-sided: shape (n, 3) matrix with columns [π_c, π_a, π_n] representing
216
+ complier, always-taker, and never-taker probabilities respectively.
217
+ Each row sums to 1.
218
+ - One-sided: shape (n, 1) matrix of complier probabilities π_c.
219
+
220
+ weights : np.ndarray
221
+ Inverse probability weights for compliers, computed as 1/π_c.
222
+ Shape (n,). These weights can be used for downstream LATE estimation.
223
+
224
+ deviance : float
225
+ Model deviance, computed as -2 times the log-likelihood. Lower values
226
+ indicate better fit to the observed (Z, Tr) distribution.
227
+
228
+ converged : bool
229
+ Whether the optimization algorithm converged successfully. If False,
230
+ results should be interpreted with caution.
231
+
232
+ J : float
233
+ Hansen's J-statistic for the over-identification test. Under correct
234
+ model specification, J ~ χ²(df) asymptotically. Large J values suggest
235
+ model misspecification or violation of IV assumptions.
236
+
237
+ df : int
238
+ Effective degrees of freedom, equal to the rank of the covariate matrix.
239
+ Used as the degrees of freedom for the J-statistic test.
240
+
241
+ bal : float
242
+ Covariate balance loss, computed as the GMM objective using only the
243
+ balance moment conditions. Lower values indicate better balance.
244
+
245
+ method : str
246
+ Estimation method used: 'over' (over-identified), 'exact' (just-identified),
247
+ or 'mle' (maximum likelihood only).
248
+
249
+ two_sided : bool
250
+ Whether two-sided noncompliance model was fitted.
251
+
252
+ iterations : int or None
253
+ Maximum number of optimization iterations specified.
254
+
255
+ Examples
256
+ --------
257
+ >>> import numpy as np
258
+ >>> from cbps import CBIV
259
+ >>> # Simulate data
260
+ >>> n = 500
261
+ >>> np.random.seed(42)
262
+ >>> X = np.random.randn(n, 2)
263
+ >>> Z = np.random.binomial(1, 0.5, n)
264
+ >>> # Compliance depends on X
265
+ >>> p_comply = 1 / (1 + np.exp(-0.5 - 0.3 * X[:, 0]))
266
+ >>> comply = np.random.binomial(1, p_comply, n)
267
+ >>> Tr = Z * comply # Simplified one-sided model
268
+ >>> # Fit CBIV
269
+ >>> fit = CBIV(Tr=Tr, Z=Z, X=X, method='over', twosided=False)
270
+ >>> print(f"Converged: {fit.converged}")
271
+ >>> print(f"J-statistic: {fit.J:.4f}")
272
+ >>> print(f"Mean weight: {fit.weights.mean():.4f}")
273
+ """
274
+
275
+ def __init__(
276
+ self,
277
+ coefficients: np.ndarray,
278
+ fitted_values: np.ndarray,
279
+ weights: np.ndarray,
280
+ deviance: float,
281
+ converged: bool,
282
+ J: float,
283
+ df: int,
284
+ bal: float,
285
+ method: str,
286
+ two_sided: bool,
287
+ iterations: Optional[int] = None,
288
+ ):
289
+ """
290
+ Initialize a CBIVResults instance.
291
+
292
+ This constructor is typically not called directly. Use the :func:`CBIV`
293
+ function to fit a model and obtain a CBIVResults object.
294
+
295
+ Parameters
296
+ ----------
297
+ coefficients : np.ndarray
298
+ Estimated model coefficients. Shape (k, 2) for two-sided or (k,)
299
+ for one-sided noncompliance.
300
+ fitted_values : np.ndarray
301
+ Estimated compliance probabilities. Shape (n, 3) for two-sided
302
+ with columns [π_c, π_a, π_n], or (n, 1) for one-sided.
303
+ weights : np.ndarray
304
+ Inverse probability weights (1/π_c), shape (n,).
305
+ deviance : float
306
+ Model deviance (-2 × log-likelihood).
307
+ converged : bool
308
+ Optimization convergence status.
309
+ J : float
310
+ Hansen's J-statistic for over-identification test.
311
+ df : int
312
+ Effective degrees of freedom (covariate matrix rank).
313
+ bal : float
314
+ Covariate balance loss from GMM objective.
315
+ method : str
316
+ Estimation method: 'over', 'exact', or 'mle'.
317
+ two_sided : bool
318
+ Whether two-sided noncompliance model was used.
319
+ iterations : int, optional
320
+ Maximum optimization iterations specified.
321
+ """
322
+ self.coefficients = coefficients
323
+ self.fitted_values = fitted_values
324
+ self.weights = weights
325
+ self.deviance = deviance
326
+ self.converged = converged
327
+ self.J = J
328
+ self.df = df
329
+ self.bal = bal
330
+ self.method = method
331
+ self.two_sided = two_sided
332
+ self.iterations = iterations
333
+
334
+ def __repr__(self) -> str:
335
+ """Concise representation."""
336
+ n = len(self.weights)
337
+ k = self.coefficients.shape[0]
338
+ return (
339
+ f"CBIVResults(n={n}, k={k}, J={self.J:.6f}, " f"converged={self.converged})"
340
+ )
341
+
342
+ def __str__(self) -> str:
343
+ """Full string representation (for print)."""
344
+ output = "\nCBIV Estimation Results\n"
345
+ output += "=" * 60 + "\n"
346
+
347
+ # Basic information
348
+ n = len(self.weights)
349
+ k = self.coefficients.shape[0]
350
+ output += f"Sample size: {n}\n"
351
+ output += f"Coefficients: {k}\n"
352
+ output += f"Method: {self.method}\n"
353
+ output += f"Two-sided noncompliance: {'Yes' if self.two_sided else 'No'}\n"
354
+ output += f"Converged: {'Yes' if self.converged else 'No'}\n"
355
+
356
+ # Statistics
357
+ output += f"\nModel Statistics:\n"
358
+ output += f" J-statistic: {self.J:.6f}\n"
359
+ output += f" Deviance: {self.deviance:.6f}\n"
360
+ output += f" Balance loss: {self.bal:.6f}\n"
361
+ output += f" Degrees of freedom: {self.df}\n"
362
+
363
+ # Fitted values information
364
+ if self.fitted_values is not None:
365
+ if self.two_sided:
366
+ output += f"\nCompliance Probabilities (π_c, π_a, π_n):\n"
367
+ output += f" Compliers (π_c): min={self.fitted_values[:, 0].min():.4f}, max={self.fitted_values[:, 0].max():.4f}, mean={self.fitted_values[:, 0].mean():.4f}\n"
368
+ output += f" Always-takers (π_a): min={self.fitted_values[:, 1].min():.4f}, max={self.fitted_values[:, 1].max():.4f}, mean={self.fitted_values[:, 1].mean():.4f}\n"
369
+ output += f" Never-takers (π_n): min={self.fitted_values[:, 2].min():.4f}, max={self.fitted_values[:, 2].max():.4f}, mean={self.fitted_values[:, 2].mean():.4f}\n"
370
+ else:
371
+ output += f"\nComplier Probabilities (π_c):\n"
372
+ output += f" Min: {self.fitted_values.min():.4f}\n"
373
+ output += f" Max: {self.fitted_values.max():.4f}\n"
374
+ output += f" Mean: {self.fitted_values.mean():.4f}\n"
375
+
376
+ # Weight information
377
+ if self.weights is not None:
378
+ output += f"\nComplier Weights (1/π_c):\n"
379
+ output += f" Min: {self.weights.min():.4f}\n"
380
+ output += f" Max: {self.weights.max():.4f}\n"
381
+ output += f" Mean: {self.weights.mean():.4f}\n"
382
+
383
+ output += "=" * 60 + "\n"
384
+
385
+ return output
386
+
387
+ @property
388
+ def p_complier(self) -> np.ndarray:
389
+ """
390
+ Complier probability vector.
391
+
392
+ Provides a unified interface to access complier probabilities regardless
393
+ of whether the model used two-sided or one-sided noncompliance.
394
+
395
+ Returns
396
+ -------
397
+ np.ndarray
398
+ Estimated complier probabilities π_c, shape (n,).
399
+
400
+ Notes
401
+ -----
402
+ For two-sided models, extracts the first column of `fitted_values`.
403
+ For one-sided models, flattens `fitted_values` to a 1D array.
404
+
405
+ The inverse of these probabilities equals the `weights` attribute:
406
+ ``weights = 1 / p_complier``.
407
+
408
+ Examples
409
+ --------
410
+ >>> fit = CBIV(Tr=Tr, Z=Z, X=X, twosided=True)
411
+ >>> p_c = fit.p_complier
412
+ >>> np.allclose(fit.weights, 1.0 / p_c) # True
413
+ """
414
+ if self.two_sided:
415
+ return self.fitted_values[:, 0]
416
+ else:
417
+ return self.fitted_values.ravel()
418
+
419
+ def vcov(self) -> np.ndarray:
420
+ """
421
+ Retrieve the variance-covariance matrix of estimated coefficients.
422
+
423
+ Returns
424
+ -------
425
+ np.ndarray
426
+ Variance-covariance matrix of coefficients.
427
+
428
+ - Two-sided: shape (2k, 2k), joint covariance of stacked vector [β_c; β_a]
429
+ - One-sided: shape (k, k), covariance of β_c
430
+
431
+ Raises
432
+ ------
433
+ AttributeError
434
+ If the variance-covariance matrix was not computed during fitting.
435
+
436
+ Notes
437
+ -----
438
+ The variance-covariance matrix is computed using the standard GMM
439
+ sandwich formula:
440
+
441
+ .. math::
442
+
443
+ \\text{Var}(\\hat{\\beta}) = (G' V^{-1} G)^{-1} / n
444
+
445
+ where G is the Jacobian of moment conditions with respect to parameters,
446
+ V is the covariance matrix of moment conditions, and n is the sample size.
447
+
448
+ The Jacobian G is computed via numerical differentiation. If optimization
449
+ did not converge, the variance estimates may be unreliable.
450
+
451
+ Standard errors can be obtained as the square root of the diagonal:
452
+ ``se = np.sqrt(np.diag(fit.vcov()))``.
453
+
454
+ References
455
+ ----------
456
+ Hansen, L. P. (1982). Large Sample Properties of Generalized Method of
457
+ Moments Estimators. Econometrica, 50(4), 1029-1054.
458
+
459
+ Examples
460
+ --------
461
+ >>> fit = CBIV(Tr=Tr, Z=Z, X=X, method='over', twosided=True)
462
+ >>> V = fit.vcov()
463
+ >>> se = np.sqrt(np.diag(V))
464
+ >>> print(f"Standard errors: {se}")
465
+ """
466
+ if not hasattr(self, '_vcov_matrix'):
467
+ raise AttributeError(
468
+ "Variance-covariance matrix not available. "
469
+ "This may occur if computation failed during fitting. "
470
+ "Re-fit the model to attempt computation."
471
+ )
472
+ return self._vcov_matrix
473
+
474
+ @property
475
+ def var(self) -> np.ndarray:
476
+ """
477
+ Variance-covariance matrix of coefficients (property alias).
478
+
479
+ This property provides convenient access to the variance-covariance
480
+ matrix, equivalent to calling :meth:`vcov`.
481
+
482
+ Returns
483
+ -------
484
+ np.ndarray
485
+ Variance-covariance matrix, same as ``vcov()``.
486
+
487
+ See Also
488
+ --------
489
+ vcov : Method returning the same variance-covariance matrix.
490
+ """
491
+ return self.vcov()
492
+
493
+ def summary(self) -> 'CBIVSummary':
494
+ """
495
+ Generate a summary object for CBIV estimation results.
496
+
497
+ Returns a ``CBIVSummary`` instance. Use ``str()`` or ``print()``
498
+ to obtain formatted text summary.
499
+
500
+ Returns
501
+ -------
502
+ CBIVSummary
503
+ Object containing estimation result summary information.
504
+
505
+ Examples
506
+ --------
507
+ >>> fit = CBIV(Tr=Tr, Z=Z, X=X, method='over', twosided=False)
508
+ >>> summary = fit.summary()
509
+ >>> print(summary) # Formatted output
510
+ >>> type(summary) # CBIVSummary
511
+ """
512
+ return CBIVSummary(
513
+ coefficients=self.coefficients,
514
+ fitted_values=self.fitted_values,
515
+ weights=self.weights,
516
+ deviance=self.deviance,
517
+ converged=self.converged,
518
+ J=self.J,
519
+ df=self.df,
520
+ bal=self.bal,
521
+ method=self.method,
522
+ two_sided=self.two_sided,
523
+ )
524
+
525
+
526
+ def CBIV(
527
+ formula: str | None = None,
528
+ data: pd.DataFrame | None = None,
529
+ Tr: np.ndarray | pd.Series | None = None,
530
+ Z: np.ndarray | pd.DataFrame | None = None,
531
+ X: np.ndarray | pd.DataFrame | None = None,
532
+ iterations: int = 1000,
533
+ method: str = "over",
534
+ twostep: bool = True,
535
+ twosided: bool = True,
536
+ probs_min: float = 1e-6,
537
+ warn_clipping: bool = True,
538
+ clipping_warn_threshold: float = 0.05,
539
+ verbose: int = 0,
540
+ ) -> CBIVResults:
541
+ """
542
+ Covariate Balancing Propensity Score for Instrumental Variable Estimates.
543
+
544
+ Estimates compliance type propensity scores in an instrumental variable framework,
545
+ simultaneously optimizing covariate balance and propensity score prediction.
546
+
547
+ Parameters
548
+ ----------
549
+ formula : str, optional
550
+ IV formula string in format "treatment ~ covariates | instruments".
551
+ Example: "treat ~ x1 + x2 | z1 + z2"
552
+ - treatment: binary treatment variable (0/1)
553
+ - covariates: pre-treatment covariates (intercept added automatically)
554
+ - instruments: binary instrument variable (0/1)
555
+ data : pd.DataFrame, optional
556
+ DataFrame containing treatment, covariates, and instruments (required with formula).
557
+ Tr : np.ndarray or pd.Series, shape (n,), optional
558
+ Binary treatment variable (0/1) (matrix interface, mutually exclusive with formula).
559
+ Z : np.ndarray or pd.DataFrame, shape (n,) or (n, 1), optional
560
+ Binary instrument variable (0/1, encouragement) (matrix interface).
561
+ X : np.ndarray or pd.DataFrame, shape (n, p), optional
562
+ Pre-treatment covariate matrix (without intercept) (matrix interface).
563
+ iterations : int, default=1000
564
+ Maximum number of optimization iterations.
565
+ method : str, default="over"
566
+ Estimation method:
567
+ - "over": Over-identified model (propensity score + covariate balance conditions)
568
+ - "exact": Exactly-identified model (covariate balance conditions only)
569
+ - "mle": Maximum likelihood estimation (propensity score conditions only)
570
+ twostep : bool, default=True
571
+ GMM estimation mode:
572
+
573
+ - **True** (default): Two-step GMM estimator.
574
+ Pre-computes weight matrix invV and uses fixed invV during optimization.
575
+ Faster and numerically stable.
576
+
577
+ - **False**: Continuously updating GMM estimator (Hansen et al. 1996).
578
+ Re-computes invV at each iteration.
579
+ Better finite-sample properties in theory, but 5-10x slower.
580
+
581
+ twosided : bool, default=True
582
+ Whether to allow two-sided noncompliance:
583
+ - True: Two-sided noncompliance (with always-takers and never-takers)
584
+ - False: One-sided noncompliance (never-takers only, π_a=0)
585
+ probs_min : float, default=1e-6
586
+ Probability clipping boundary. Compliance probabilities are constrained
587
+ to [probs_min, 1-probs_min] interval.
588
+
589
+ - Default 1e-6 maintains numerical stability
590
+ - Lowering this value may cause numerical instability
591
+ - Raising this value increases bias but improves stability
592
+ warn_clipping : bool, default=True
593
+ Whether to warn when proportion of clipped compliance probabilities
594
+ exceeds threshold.
595
+
596
+ - True: Issue warning (recommended for transparency)
597
+ - False: Silent operation
598
+
599
+ This feature helps identify numerical issues such as complete
600
+ or quasi-complete separation.
601
+ clipping_warn_threshold : float, default=0.05
602
+ Minimum clipping proportion to trigger warning (between 0 and 1).
603
+
604
+ - Default 0.05 means warning when >5% of probabilities are clipped
605
+ - Set to 0.0 to warn on any clipping
606
+ - Set to 1.0 to never warn (equivalent to warn_clipping=False)
607
+ verbose : int, default=0
608
+ Controls output verbosity during optimization.
609
+
610
+ - 0: Silent mode, only warnings and errors
611
+ - 1: Basic optimization info (iterations, convergence status)
612
+ - 2: Detailed diagnostics (loss, gradients per iteration)
613
+
614
+ Returns
615
+ -------
616
+ CBIVResults
617
+ Result object containing coefficients, fitted values, weights, etc.
618
+
619
+ Notes
620
+ -----
621
+ **Principal Stratification (Three-State Model)**:
622
+
623
+ - Compliers: Accept treatment when Z=1, refuse when Z=0
624
+ - Always-takers: Accept treatment regardless of Z
625
+ - Never-takers: Refuse treatment regardless of Z
626
+
627
+ **Instrumental Variable (IV) Assumptions**:
628
+
629
+ CBIV assumes the following conditions are satisfied:
630
+
631
+ 1. **Relevance**: Instrument Z is correlated with treatment Tr.
632
+ Recommend verifying first-stage correlation before using CBIV:
633
+
634
+ >>> import numpy as np
635
+ >>> corr = np.corrcoef(Z, Tr)[0, 1]
636
+ >>> if abs(corr) < 0.1:
637
+ ... print("Warning: Weak instrument detected.")
638
+
639
+ Or run first-stage regression and check F-statistic (should be > 10,
640
+ see Stock & Yogo 2005).
641
+
642
+ 2. **Exclusion Restriction**: Z affects outcome Y only through Tr.
643
+
644
+ 3. **Monotonicity**: No defiers (individuals who refuse treatment when
645
+ Z=1 but accept when Z=0).
646
+
647
+ Weak instruments lead to finite-sample bias and inference failure.
648
+ See Staiger & Stock (1997) and Stock & Yogo (2005) for weak IV diagnostics.
649
+
650
+ **Optimization Behavior**:
651
+
652
+ - CBIV executes silently by default
653
+ - Uses multiple starting points internally to find optimal solution
654
+ - Falls back to random initialization with warning if GLM initialization fails
655
+
656
+ Examples
657
+ --------
658
+ **Basic usage with matrix interface:**
659
+
660
+ >>> import numpy as np
661
+ >>> from cbps import CBIV
662
+ >>> # Simulate IV data with noncompliance
663
+ >>> np.random.seed(42)
664
+ >>> n = 500
665
+ >>> X = np.random.randn(n, 2) # Covariates
666
+ >>> Z = np.random.binomial(1, 0.5, n) # Instrument (randomized)
667
+ >>> # Generate compliance: Pr(comply | X) depends on X
668
+ >>> p_comply = 1 / (1 + np.exp(-0.5 - 0.3 * X[:, 0]))
669
+ >>> comply = np.random.binomial(1, p_comply, n)
670
+ >>> Tr = Z * comply # Treatment = instrument * compliance
671
+ >>> # Fit CBIV model (one-sided noncompliance)
672
+ >>> fit = CBIV(Tr=Tr, Z=Z, X=X, method='over', twosided=False)
673
+ >>> print(f"Converged: {fit.converged}")
674
+ >>> print(f"J-statistic: {fit.J:.4f}")
675
+
676
+ **Formula interface (recommended for DataFrames):**
677
+
678
+ >>> import pandas as pd
679
+ >>> df = pd.DataFrame({
680
+ ... 'treat': Tr, 'z': Z, 'x1': X[:, 0], 'x2': X[:, 1]
681
+ ... })
682
+ >>> fit = CBIV(formula="treat ~ x1 + x2 | z", data=df,
683
+ ... method='over', twosided=False)
684
+
685
+ **Two-sided noncompliance model:**
686
+
687
+ >>> # When both always-takers and never-takers are present
688
+ >>> fit = CBIV(Tr=Tr, Z=Z, X=X, method='over', twosided=True)
689
+ >>> print(fit.fitted_values.shape) # (n, 3): [π_c, π_a, π_n]
690
+ >>> print(fit.coefficients.shape) # (k, 2): [β_c, β_a]
691
+
692
+ **Estimation methods:**
693
+
694
+ >>> # Over-identified (default): combines balance + score conditions
695
+ >>> fit_over = CBIV(Tr=Tr, Z=Z, X=X, method='over')
696
+ >>> # Exactly-identified: balance conditions only
697
+ >>> fit_exact = CBIV(Tr=Tr, Z=Z, X=X, method='exact')
698
+ >>> # Maximum likelihood: score conditions only
699
+ >>> fit_mle = CBIV(Tr=Tr, Z=Z, X=X, method='mle')
700
+
701
+ **Accessing results:**
702
+
703
+ >>> fit = CBIV(Tr=Tr, Z=Z, X=X, twosided=False)
704
+ >>> weights = fit.weights # IPW weights for LATE estimation
705
+ >>> p_c = fit.p_complier # Complier probabilities
706
+ >>> se = np.sqrt(np.diag(fit.vcov())) # Standard errors
707
+
708
+ **Input Interfaces**:
709
+
710
+ CBIV supports two input methods:
711
+
712
+ 1. **Formula interface**: ``CBIV(formula="treat ~ x1 + x2 | z", data=df)``
713
+ Uses patsy for formula parsing. Intercept is added automatically.
714
+
715
+ 2. **Matrix interface**: ``CBIV(Tr=Tr, Z=Z, X=X)``
716
+ Pass arrays directly. Note that an intercept column is added internally.
717
+
718
+ The interfaces are mutually exclusive; specify either formula+data or Tr+Z+X.
719
+
720
+ See Also
721
+ --------
722
+ CBIVResults : Result container returned by this function.
723
+
724
+ References
725
+ ----------
726
+ Imai, K. and Ratkovic, M. (2014). Covariate Balancing Propensity Score.
727
+ Journal of the Royal Statistical Society: Series B (Statistical Methodology),
728
+ 76(1), 243-263. https://doi.org/10.1111/rssb.12027
729
+
730
+ Angrist, J. D., Imbens, G. W., and Rubin, D. B. (1996). Identification of
731
+ Causal Effects Using Instrumental Variables. Journal of the American
732
+ Statistical Association, 91(434), 444-455.
733
+
734
+ Staiger, D. and Stock, J. H. (1997). Instrumental Variables Regression with
735
+ Weak Instruments. Econometrica, 65(3), 557-586.
736
+
737
+ Stock, J. H. and Yogo, M. (2005). Testing for Weak Instruments in Linear IV
738
+ Regression. In: Andrews, D. W. K. and Stock, J. H. (eds.) Identification and
739
+ Inference for Econometric Models: Essays in Honor of Thomas Rothenberg.
740
+ Cambridge University Press, pp. 80-108.
741
+ """
742
+ # ========== Step 1: Parameter Validation and Interface Selection ==========
743
+
744
+ # Import required modules
745
+ import pandas as pd
746
+
747
+ # Mutual exclusivity check: formula and matrix interface cannot be used together
748
+ if formula is not None and (Tr is not None or Z is not None or X is not None):
749
+ raise ValueError(
750
+ "Cannot specify both 'formula' and matrix parameters (Tr/Z/X). "
751
+ "Please use either:\n"
752
+ " 1. Formula interface: CBIV(formula='treat ~ x1 + x2 | z1 + z2', data=df)\n"
753
+ " 2. Matrix interface: CBIV(Tr=treat_array, Z=z_array, X=X_matrix)\n"
754
+ f"\nReceived:\n"
755
+ f" formula = {repr(formula)}\n"
756
+ f" Tr = {'<provided>' if Tr is not None else 'None'}\n"
757
+ f" Z = {'<provided>' if Z is not None else 'None'}\n"
758
+ f" X = {'<provided>' if X is not None else 'None'}"
759
+ )
760
+
761
+ # Parameter completeness check
762
+ if formula is None and (Tr is None or Z is None or X is None):
763
+ raise ValueError(
764
+ "Must provide either:\n"
765
+ " 1. Formula interface: formula + data\n"
766
+ " 2. Matrix interface: Tr + Z + X\n"
767
+ f"\nReceived:\n"
768
+ f" formula = {repr(formula)}\n"
769
+ f" Tr = {'<provided>' if Tr is not None else 'None'}\n"
770
+ f" Z = {'<provided>' if Z is not None else 'None'}\n"
771
+ f" X = {'<provided>' if X is not None else 'None'}"
772
+ )
773
+
774
+ # Formula interface: requires data parameter
775
+ if formula is not None and data is None:
776
+ raise ValueError(
777
+ "data parameter is required when using formula interface. "
778
+ "Please provide a pandas DataFrame containing the variables in your formula.\n"
779
+ f"Example: CBIV(formula='{formula}', data=your_dataframe)"
780
+ )
781
+
782
+ # Formula interface: validate data type
783
+ if formula is not None and not isinstance(data, pd.DataFrame):
784
+ raise TypeError(
785
+ f"data must be a pandas DataFrame when using formula interface. "
786
+ f"Got: {type(data).__name__}. "
787
+ f"If you have a dict, convert it: pd.DataFrame(your_dict)"
788
+ )
789
+
790
+ # Formula interface: validate formula type and format
791
+ if formula is not None:
792
+ if not isinstance(formula, str):
793
+ raise TypeError(
794
+ f"formula must be a string, got {type(formula).__name__}. "
795
+ f"Received: formula={formula}\n\n"
796
+ f"Example: 'treat ~ x1 + x2 | z1 + z2'"
797
+ )
798
+
799
+ # Validate formula contains '~' and '|'
800
+ if '~' not in formula:
801
+ raise ValueError(
802
+ f"Formula must contain '~' to separate treatment from covariates. "
803
+ f"Got: '{formula}'. "
804
+ f"Example: 'treat ~ x1 + x2 | z1 + z2'"
805
+ )
806
+
807
+ if '|' not in formula:
808
+ raise ValueError(
809
+ f"IV formula must contain '|' to separate covariates from instruments. "
810
+ f"Got: '{formula}'. "
811
+ f"Format: 'treatment ~ covariates | instruments'\n"
812
+ f"Example: 'treat ~ x1 + x2 | z1 + z2'"
813
+ )
814
+
815
+ # ========== Step 2: Formula Parsing (if using formula interface) ==========
816
+
817
+ if formula is not None:
818
+ # Parse IV formula: "treatment ~ covariates | instruments"
819
+ # Split into three parts
820
+ if '|' not in formula:
821
+ raise ValueError(
822
+ f"IV formula must contain '|' to separate covariates from instruments. "
823
+ f"Got: '{formula}'"
824
+ )
825
+
826
+ # Split formula
827
+ parts = formula.split('|')
828
+ if len(parts) != 2:
829
+ raise ValueError(
830
+ f"IV formula must have exactly one '|' separator. "
831
+ f"Got: '{formula}'\n"
832
+ f"Format: 'treatment ~ covariates | instruments'"
833
+ )
834
+
835
+ main_formula = parts[0].strip() # "treatment ~ covariates"
836
+ instrument_formula = parts[1].strip() # "instruments"
837
+
838
+ # Validate main_formula contains '~'
839
+ if '~' not in main_formula:
840
+ raise ValueError(
841
+ f"Main formula part must contain '~'. "
842
+ f"Got: '{main_formula}'\n"
843
+ f"Full formula: '{formula}'"
844
+ )
845
+
846
+ # Use patsy to parse main formula (treatment ~ covariates)
847
+ from patsy import dmatrices, PatsyError
848
+
849
+ try:
850
+ # Parse treatment and covariates
851
+ # dmatrices returns (y, X), where y is treatment, X is covariates (with intercept)
852
+ y_matrix, X_matrix = dmatrices(main_formula, data, return_type='dataframe')
853
+
854
+ # Extract treatment (y_matrix may have multiple columns, take first)
855
+ if y_matrix.shape[1] == 1:
856
+ Tr = y_matrix.iloc[:, 0].to_numpy()
857
+ else:
858
+ # If patsy one-hot encoded categorical treatment, needs reverse transform
859
+ # Simplified handling: take first column
860
+ warnings.warn(
861
+ f"Treatment variable was one-hot encoded by patsy. Using first column. "
862
+ f"Consider using matrix interface for more control.",
863
+ UserWarning
864
+ )
865
+ Tr = y_matrix.iloc[:, 0].to_numpy()
866
+
867
+ # Extract covariates (X_matrix already includes intercept)
868
+ X = X_matrix.to_numpy()
869
+
870
+ except PatsyError as e:
871
+ raise ValueError(
872
+ f"Failed to parse formula '{main_formula}': {e}\n"
873
+ f"Please check that all variables exist in the data."
874
+ )
875
+
876
+ # Parse instruments
877
+ # Construct a simple formula to extract instrument variables
878
+ # Use "0 + instrument_vars" to avoid adding intercept
879
+ instrument_formula_patsy = "0 + " + instrument_formula
880
+
881
+ try:
882
+ from patsy import dmatrix
883
+ Z_matrix = dmatrix(instrument_formula_patsy, data, return_type='dataframe')
884
+ Z = Z_matrix.to_numpy()
885
+
886
+ # CBIV only supports single instrument; if multiple columns, use first and warn
887
+ if Z.shape[1] > 1:
888
+ warnings.warn(
889
+ f"Formula specified {Z.shape[1]} instruments, but CBIV only uses the first one. "
890
+ f"Multiple instruments are not supported in the current CBIV implementation.",
891
+ UserWarning
892
+ )
893
+ Z = Z[:, 0:1] # Keep 2D shape
894
+
895
+ except PatsyError as e:
896
+ raise ValueError(
897
+ f"Failed to parse instruments '{instrument_formula}': {e}\n"
898
+ f"Please check that all instrument variables exist in the data."
899
+ )
900
+
901
+ # ========== Step 3: Matrix Interface Parameter Validation ==========
902
+
903
+ # Validate iterations parameter
904
+ if not isinstance(iterations, (int, np.integer)):
905
+ raise TypeError(
906
+ f"CBIV: iterations must be an integer, got {type(iterations).__name__}. "
907
+ f"Received: iterations={iterations}"
908
+ )
909
+ if iterations < 1:
910
+ raise ValueError(
911
+ f"CBIV: iterations must be ≥1 (at least one optimization step required). "
912
+ f"Received: iterations={iterations}"
913
+ )
914
+ if iterations > 100000:
915
+ warnings.warn(
916
+ f"CBIV: iterations={iterations} is very large and may take a long time. "
917
+ f"Consider using a smaller value (default is 1000).",
918
+ UserWarning
919
+ )
920
+
921
+ Tr = np.asarray(Tr).ravel()
922
+ Z = np.asarray(Z)
923
+ X = np.asarray(X)
924
+
925
+ # Ensure Z and X are 2D arrays
926
+ if Z.ndim == 1:
927
+ Z = Z.reshape(-1, 1)
928
+
929
+ if X.ndim == 1:
930
+ X = X.reshape(-1, 1)
931
+
932
+ n = len(Tr)
933
+ # Use shape[0] for dimension check
934
+ if Z.shape[0] != n or X.shape[0] != n:
935
+ raise ValueError(
936
+ f"Tr, Z, X must have same number of rows, got {n}, {Z.shape[0]}, {X.shape[0]}"
937
+ )
938
+
939
+ # CBIV only uses single instrument variable
940
+ # If Z has multiple columns, warn and use only the first
941
+ if Z.shape[1] > 1:
942
+ warnings.warn(
943
+ f"CBIV: Z has {Z.shape[1]} columns but CBIV only uses the first column as the instrument. "
944
+ f"Multiple instruments are not supported in the current CBIV implementation.",
945
+ UserWarning
946
+ )
947
+
948
+ # Extract first column as instrument variable (Z becomes (n,) vector)
949
+ Z = Z[:, 0]
950
+
951
+ if not np.all(np.isin(Tr, [0, 1])):
952
+ raise ValueError("Tr must be binary (0/1)")
953
+
954
+ if not np.all(np.isin(Z, [0, 1])):
955
+ raise ValueError("Z must be binary (0/1)")
956
+
957
+ if method not in ["over", "exact", "mle"]:
958
+ raise ValueError(f"method must be 'over', 'exact', or 'mle', got {method}")
959
+
960
+ # Validate additional parameters
961
+ if not isinstance(probs_min, (int, float)) or probs_min <= 0 or probs_min >= 0.5:
962
+ raise ValueError(
963
+ f"CBIV: probs_min must be in (0, 0.5), got {probs_min}. "
964
+ f"Recommended range: [1e-8, 1e-4]."
965
+ )
966
+
967
+ if not isinstance(warn_clipping, bool):
968
+ raise TypeError(f"CBIV: warn_clipping must be bool, got {type(warn_clipping).__name__}")
969
+
970
+ if not isinstance(clipping_warn_threshold, (int, float)) or \
971
+ clipping_warn_threshold < 0 or clipping_warn_threshold > 1:
972
+ raise ValueError(
973
+ f"CBIV: clipping_warn_threshold must be in [0, 1], got {clipping_warn_threshold}"
974
+ )
975
+
976
+ # Compute pZ (proportion with Z=1)
977
+ pZ = Z.mean()
978
+
979
+ # Validate pZ range to avoid division by zero
980
+ if pZ <= 0.0 or pZ >= 1.0:
981
+ if pZ == 0.0:
982
+ raise ValueError(
983
+ f"CBIV: Instrument Z has no variation. All observations have Z=0.\n"
984
+ f"CBIV requires variation in the instrument (0 < P(Z=1) < 1).\n"
985
+ f"This results in pZ=0.0, which causes division by zero in the weighting formula."
986
+ )
987
+ elif pZ == 1.0:
988
+ raise ValueError(
989
+ f"CBIV: Instrument Z has no variation. All observations have Z=1.\n"
990
+ f"CBIV requires variation in the instrument (0 < P(Z=1) < 1).\n"
991
+ f"This results in pZ=1.0, which causes division by zero in the weighting formula."
992
+ )
993
+ else:
994
+ # Should not reach here in theory, but for completeness
995
+ raise ValueError(
996
+ f"CBIV: Invalid pZ value: {pZ}. "
997
+ f"Expected 0 < pZ < 1, but got pZ={pZ}."
998
+ )
999
+
1000
+ # Set method flags
1001
+ score_only = method == "mle"
1002
+ bal_only = method == "exact"
1003
+
1004
+ # X matrix preprocessing
1005
+ # 1. Add intercept column, remove constant columns
1006
+ X = np.column_stack([np.ones(n), X[:, X.std(axis=0) > 0]])
1007
+
1008
+ # 2. Save original X and compute statistics
1009
+ X_orig = X.copy()
1010
+ x_sd = X[:, 1:].std(axis=0)
1011
+ x_mean = X[:, 1:].mean(axis=0)
1012
+
1013
+ # 3. Standardize non-intercept columns
1014
+ X[:, 1:] = (X[:, 1:] - x_mean) / x_sd
1015
+
1016
+ # 4. Compute effective degrees of freedom k (matrix rank)
1017
+ XtX = X.T @ X
1018
+ k = int(np.floor(np.trace(XtX @ r_ginv_like(XtX)) + 0.1))
1019
+
1020
+ # Detect covariate collinearity
1021
+ p = X.shape[1]
1022
+ if k < p:
1023
+ raise ValueError(
1024
+ f"CBIV: Covariate matrix X is rank-deficient after preprocessing.\n"
1025
+ f"Effective rank k={k}, Columns={p}. Perfect collinearity detected.\n"
1026
+ f"Please remove linearly dependent covariates before calling CBIV.\n"
1027
+ f"Hint: Check for duplicate columns, constant multiples, or linear combinations."
1028
+ )
1029
+
1030
+ # ========== Internal Helper Functions ==========
1031
+
1032
+ def _check_and_warn_clipping(
1033
+ probs_before_clip: np.ndarray,
1034
+ probs_after_clip: np.ndarray,
1035
+ probs_min: float,
1036
+ label: str,
1037
+ warn: bool,
1038
+ threshold: float,
1039
+ ) -> None:
1040
+ """
1041
+ Detect and warn about probability clipping.
1042
+
1043
+ Parameters
1044
+ ----------
1045
+ probs_before_clip : np.ndarray
1046
+ Probabilities before clipping.
1047
+ probs_after_clip : np.ndarray
1048
+ Probabilities after clipping.
1049
+ probs_min : float
1050
+ Probability clipping boundary.
1051
+ label : str
1052
+ Probability type label (e.g., "complier probability").
1053
+ warn : bool
1054
+ Whether to issue warning.
1055
+ threshold : float
1056
+ Minimum clipping proportion to trigger warning (0-1).
1057
+ """
1058
+ if not warn:
1059
+ return
1060
+
1061
+ n = len(probs_before_clip)
1062
+ # Count samples clipped to lower and upper bounds
1063
+ clipped_low = np.sum(probs_after_clip <= probs_min)
1064
+ clipped_high = np.sum(probs_after_clip >= 1 - probs_min)
1065
+ n_clipped = clipped_low + clipped_high
1066
+ clipping_rate = n_clipped / n
1067
+
1068
+ if clipping_rate > threshold:
1069
+ warnings.warn(
1070
+ f"\nCBIV Numerical Warning: {n_clipped}/{n} ({clipping_rate:.1%}) {label} "
1071
+ f"clipped to bounds [{probs_min:.1e}, {1-probs_min:.1e}].\n"
1072
+ f" - Clipped to lower bound: {clipped_low} ({clipped_low/n:.1%})\n"
1073
+ f" - Clipped to upper bound: {clipped_high} ({clipped_high/n:.1%})\n\n"
1074
+ f"This may indicate complete or quasi-complete separation.\n\n"
1075
+ f"Recommendations:\n"
1076
+ f" (1) Check data quality and sample size\n"
1077
+ f" (2) Check instrument Z relevance (weak instrument?)\n"
1078
+ f" (3) Consider increasing sample size or reducing covariates\n"
1079
+ f" (4) To adjust tolerance: use probs_min parameter (current={probs_min:.1e})\n"
1080
+ f" (5) To disable this warning: use warn_clipping=False\n",
1081
+ CBIVNumericalWarning,
1082
+ stacklevel=4
1083
+ )
1084
+
1085
+ def _compute_compliance_probs_twosided(
1086
+ beta_curr: np.ndarray,
1087
+ probs_min: float = PROBS_MIN,
1088
+ warn_clipping: bool = False,
1089
+ clipping_warn_threshold: float = 0.05
1090
+ ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
1091
+ """
1092
+ Compute three compliance probabilities for two-sided noncompliance.
1093
+
1094
+ Parameters
1095
+ ----------
1096
+ beta_curr : np.ndarray, shape (2*k,)
1097
+ Parameter vector, first k are β_c, last k are β_a.
1098
+ probs_min : float
1099
+ Probability clipping lower bound.
1100
+
1101
+ Returns
1102
+ -------
1103
+ probs_c : np.ndarray, shape (n,)
1104
+ Complier probability.
1105
+ probs_a : np.ndarray, shape (n,)
1106
+ Always-taker probability.
1107
+ probs_n : np.ndarray, shape (n,)
1108
+ Never-taker probability.
1109
+
1110
+ Notes
1111
+ -----
1112
+ Key steps:
1113
+ 1. Compute baseline_prob = 1 / (1 + exp(X@β_c) + exp(X@β_a))
1114
+ 2. Compute three probabilities: π_c, π_a, π_n
1115
+ 3. Clip to [probs_min, 1-probs_min]
1116
+ 4. Renormalize to ensure sum equals 1
1117
+
1118
+ Numerical stability: uses log-sum-exp trick to avoid overflow.
1119
+ """
1120
+ beta_c = beta_curr[:k]
1121
+ beta_a = beta_curr[k:]
1122
+
1123
+ # Compute linear predictors
1124
+ eta_c = X @ beta_c
1125
+ eta_a = X @ beta_a
1126
+
1127
+ # Use log-sum-exp trick to compute softmax (numerically stable)
1128
+ # log(1 + exp(eta_c) + exp(eta_a)) = log(exp(0) + exp(eta_c) + exp(eta_a))
1129
+ # = max_eta + log(exp(0-max_eta) + exp(eta_c-max_eta) + exp(eta_a-max_eta))
1130
+ max_eta = np.maximum(np.maximum(0.0, eta_c), eta_a)
1131
+ log_sum = max_eta + np.log(
1132
+ np.exp(0.0 - max_eta) + np.exp(eta_c - max_eta) + np.exp(eta_a - max_eta)
1133
+ )
1134
+
1135
+ # Compute three probabilities (in log space)
1136
+ # probs_c = exp(eta_c) / (1 + exp(eta_c) + exp(eta_a))
1137
+ # = exp(eta_c - log_sum)
1138
+ probs_c_raw = np.exp(eta_c - log_sum)
1139
+ probs_a_raw = np.exp(eta_a - log_sum)
1140
+ probs_n_raw = np.exp(0.0 - log_sum)
1141
+
1142
+ # Clip to [probs_min, 1-probs_min]
1143
+ probs_c = np.clip(probs_c_raw, probs_min, 1.0 - probs_min)
1144
+ probs_a = np.clip(probs_a_raw, probs_min, 1.0 - probs_min)
1145
+ probs_n = np.clip(probs_n_raw, probs_min, 1.0 - probs_min)
1146
+
1147
+ # Renormalize to ensure sum equals 1
1148
+ sums = probs_c + probs_a + probs_n
1149
+ probs_c = probs_c / sums
1150
+ probs_a = probs_a / sums
1151
+ probs_n = probs_n / sums
1152
+
1153
+ # Check and warn about clipping (only when explicitly passed)
1154
+ if warn_clipping:
1155
+ _check_and_warn_clipping(probs_c_raw, probs_c, probs_min,
1156
+ "complier probability (π_c)", warn_clipping, clipping_warn_threshold)
1157
+ _check_and_warn_clipping(probs_a_raw, probs_a, probs_min,
1158
+ "always-taker probability (π_a)", warn_clipping, clipping_warn_threshold)
1159
+ _check_and_warn_clipping(probs_n_raw, probs_n, probs_min,
1160
+ "never-taker probability (π_n)", warn_clipping, clipping_warn_threshold)
1161
+
1162
+ return probs_c, probs_a, probs_n
1163
+
1164
+ def _compute_compliance_probs_onesided(
1165
+ beta_curr: np.ndarray,
1166
+ probs_min: float = PROBS_MIN,
1167
+ warn_clipping: bool = False,
1168
+ clipping_warn_threshold: float = 0.05
1169
+ ) -> Tuple[np.ndarray, np.ndarray]:
1170
+ """
1171
+ Compute compliance probabilities for one-sided noncompliance.
1172
+
1173
+ Parameters
1174
+ ----------
1175
+ beta_curr : np.ndarray, shape (k,)
1176
+ Parameter vector β_c.
1177
+ probs_min : float
1178
+ Lower bound for probability clipping.
1179
+ warn_clipping : bool
1180
+ Whether to issue clipping warnings.
1181
+ clipping_warn_threshold : float
1182
+ Minimum clipping proportion to trigger warning.
1183
+
1184
+ Returns
1185
+ -------
1186
+ probs_c : np.ndarray, shape (n,)
1187
+ Complier probabilities.
1188
+ probs_n : np.ndarray, shape (n,)
1189
+ Never-taker probabilities (= 1 - π_c).
1190
+
1191
+ Notes
1192
+ -----
1193
+ One-sided noncompliance assumes π_a = 0 (no always-takers).
1194
+ Formula: π_c = clip(sigmoid(X @ β_c), probs_min, 1-probs_min)
1195
+
1196
+ Uses scipy.special.expit for numerical stability.
1197
+ """
1198
+ # Use stable sigmoid function (avoids overflow)
1199
+ # expit(x) = 1 / (1 + exp(-x)) = exp(x) / (1 + exp(x))
1200
+ eta = X @ beta_curr
1201
+ probs_c_raw = scipy.special.expit(eta)
1202
+
1203
+ # Clip probabilities
1204
+ probs_c = np.clip(probs_c_raw, probs_min, 1.0 - probs_min)
1205
+
1206
+ # π_n = 1 - π_c (one-sided noncompliance, no renormalization needed)
1207
+ probs_n = 1.0 - probs_c
1208
+
1209
+ # Check and warn about clipping if requested
1210
+ if warn_clipping:
1211
+ _check_and_warn_clipping(probs_c_raw, probs_c, probs_min,
1212
+ "complier probability (π_c)", warn_clipping, clipping_warn_threshold)
1213
+
1214
+ return probs_c, probs_n
1215
+
1216
+ def _gmm_func_twosided(
1217
+ beta_curr: np.ndarray, invV: Optional[np.ndarray] = None
1218
+ ) -> Dict[str, np.ndarray]:
1219
+ """
1220
+ GMM objective function for two-sided noncompliance.
1221
+
1222
+ Parameters
1223
+ ----------
1224
+ beta_curr : np.ndarray, shape (2*k,)
1225
+ Parameter vector: first k elements are β_c, last k are β_a.
1226
+ invV : np.ndarray, optional
1227
+ Inverse of V matrix. If None, it will be computed.
1228
+
1229
+ Returns
1230
+ -------
1231
+ dict
1232
+ Dictionary containing 'loss' and 'invV'.
1233
+
1234
+ Notes
1235
+ -----
1236
+ GMM loss: loss = gbar' @ invV @ gbar
1237
+ - gbar: 6K-dimensional moment condition vector
1238
+ - V: 6K × 6K covariance matrix
1239
+ - invV: Moore-Penrose pseudoinverse of V
1240
+ """
1241
+ # Compute three compliance probabilities
1242
+ probs_c, probs_a, probs_n = _compute_compliance_probs_twosided(beta_curr)
1243
+
1244
+ # Compute combined probabilities (avoid redundant computation)
1245
+ s_ca = probs_c + probs_a # compliers + always-takers
1246
+ s_cn = probs_c + probs_n # compliers + never-takers
1247
+
1248
+ # ========== Construct gbar vector (6K-dimensional) ==========
1249
+ # First 2K: propensity score conditions
1250
+ # Note: no eps added for consistency (probability clipping provides sufficient protection)
1251
+ t1 = (
1252
+ Z * Tr / (1.0 - probs_n)
1253
+ + (1.0 - Z) * (1.0 - Tr) / (1.0 - probs_a)
1254
+ - 1.0
1255
+ ) * probs_c
1256
+ t2 = (
1257
+ Z * Tr / (1.0 - probs_n) + (1.0 - Z) * Tr / probs_a - 1.0
1258
+ ) * probs_a
1259
+
1260
+ g1 = (X.T @ t1) / n # K-dimensional
1261
+ g2 = (X.T @ t2) / n # K-dimensional
1262
+
1263
+ # Last 4K: covariate balance conditions
1264
+ w1 = Z * Tr / (pZ * s_ca) - 1.0
1265
+ w2 = (1.0 - Z) * Tr / ((1.0 - pZ) * probs_a) - 1.0
1266
+ w3 = Z * (1.0 - Tr) / (pZ * probs_n) - 1.0
1267
+ w4 = (1.0 - Z) * (1.0 - Tr) / ((1.0 - pZ) * s_cn) - 1.0
1268
+
1269
+ W = np.column_stack([w1, w2, w3, w4]) # (n, 4)
1270
+ w_del = (X.T @ W) / n # (k, 4)
1271
+
1272
+ # Concatenate gbar (6K-dimensional): [g1, g2, flattened 4 columns of w_del]
1273
+ gbar = np.concatenate([g1, g2, w_del.ravel(order="F")])
1274
+
1275
+ # ========== Construct V matrix (6K × 6K) ==========
1276
+ if invV is None:
1277
+ # Compute all weight vectors
1278
+ # Consistent with original implementation, no eps added
1279
+ w11 = (
1280
+ pZ / (1.0 - probs_n) + (1.0 - pZ) / (1.0 - probs_a) - 1.0
1281
+ ) * probs_c**2
1282
+ w12 = (pZ / s_ca - 1.0) * probs_a * probs_c
1283
+ w13 = probs_c * (1.0 / s_ca - 1.0)
1284
+ w14 = -probs_c
1285
+ w15 = -probs_c
1286
+ w16 = probs_c * (1.0 / s_cn - 1.0)
1287
+
1288
+ w22 = (
1289
+ pZ / (1.0 - probs_n) + (1.0 - pZ) / probs_a - 1.0
1290
+ ) * probs_a**2
1291
+ w23 = probs_a * (1.0 / s_ca - 1.0)
1292
+ w24 = probs_a * (1.0 / probs_a - 1.0)
1293
+ w25 = -probs_a
1294
+ w26 = -probs_a
1295
+
1296
+ w33 = 1.0 / (pZ * s_ca) - 1.0
1297
+ w34 = -np.ones(n)
1298
+ w35 = -np.ones(n)
1299
+ w36 = -np.ones(n)
1300
+
1301
+ w44 = 1.0 / ((1.0 - pZ) * probs_a) - 1.0
1302
+ w45 = -np.ones(n)
1303
+ w46 = -np.ones(n)
1304
+
1305
+ w55 = 1.0 / (pZ * probs_n) - 1.0
1306
+ w56 = -np.ones(n)
1307
+
1308
+ w66 = 1.0 / ((1.0 - pZ) * s_cn) - 1.0
1309
+
1310
+ # Helper function: compute X'diag(w)X
1311
+ def XtXw(w: np.ndarray) -> np.ndarray:
1312
+ return X.T @ (X * w[:, None])
1313
+
1314
+ # Compute all K × K blocks
1315
+ B11 = XtXw(w11) / n
1316
+ B12 = XtXw(w12) / n
1317
+ B13 = XtXw(w13) / n
1318
+ B14 = XtXw(w14) / n
1319
+ B15 = XtXw(w15) / n
1320
+ B16 = XtXw(w16) / n
1321
+
1322
+ B22 = XtXw(w22) / n
1323
+ B23 = XtXw(w23) / n
1324
+ B24 = XtXw(w24) / n
1325
+ B25 = XtXw(w25) / n
1326
+ B26 = XtXw(w26) / n
1327
+
1328
+ B33 = XtXw(w33) / n
1329
+ B34 = XtXw(w34) / n
1330
+ B35 = XtXw(w35) / n
1331
+ B36 = XtXw(w36) / n
1332
+
1333
+ B44 = XtXw(w44) / n
1334
+ B45 = XtXw(w45) / n
1335
+ B46 = XtXw(w46) / n
1336
+
1337
+ B55 = XtXw(w55) / n
1338
+ B56 = XtXw(w56) / n
1339
+
1340
+ B66 = XtXw(w66) / n
1341
+
1342
+ # Construct 6 × 6 block matrix (exploiting symmetry)
1343
+ V = np.block(
1344
+ [
1345
+ [B11, B12, B13, B14, B15, B16],
1346
+ [B12.T, B22, B23, B24, B25, B26],
1347
+ [B13.T, B23.T, B33, B34, B35, B36],
1348
+ [B14.T, B24.T, B34.T, B44, B45, B46],
1349
+ [B15.T, B25.T, B35.T, B45.T, B55, B56],
1350
+ [B16.T, B26.T, B36.T, B46.T, B56.T, B66],
1351
+ ]
1352
+ )
1353
+ # Numerical symmetrization to prevent floating-point errors
1354
+ V = symmetrize(V)
1355
+ # Warn if V is rank-deficient (only when invV is first constructed)
1356
+ r = numeric_rank(V)
1357
+ if r < V.shape[0]:
1358
+ warnings.warn(
1359
+ f"GMM weighting matrix V is rank-deficient (rank={r} < {V.shape[0]}). Using pseudoinverse.",
1360
+ RuntimeWarning,
1361
+ )
1362
+ # Compute pseudoinverse using specialized version for symmetric (semi-)positive definite matrices
1363
+ invV = pinv_symmetric_psd(V)
1364
+
1365
+ # Compute GMM loss
1366
+ loss = float(gbar @ invV @ gbar)
1367
+
1368
+ return {"loss": loss, "invV": invV}
1369
+
1370
+ def _gmm_func_onesided(
1371
+ beta_curr: np.ndarray, invV: Optional[np.ndarray] = None
1372
+ ) -> Dict[str, np.ndarray]:
1373
+ """
1374
+ GMM objective function for one-sided noncompliance.
1375
+
1376
+ Parameters
1377
+ ----------
1378
+ beta_curr : np.ndarray, shape (k,)
1379
+ Parameter vector β_c
1380
+ invV : np.ndarray, optional
1381
+ Inverse of V matrix. If None, will be computed.
1382
+
1383
+ Returns
1384
+ -------
1385
+ dict
1386
+ Dictionary containing 'loss' and 'invV'
1387
+
1388
+ Notes
1389
+ -----
1390
+ GMM loss: loss = gbar' @ invV @ gbar
1391
+ - gbar: 3K-dimensional moment conditions vector
1392
+ - V: 3K×3K covariance matrix
1393
+ - invV: Moore-Penrose pseudoinverse of V
1394
+ """
1395
+ # Compute two compliance probabilities
1396
+ probs_c, probs_n = _compute_compliance_probs_onesided(beta_curr)
1397
+
1398
+ # ========== Construct gbar vector (3K-dimensional) ==========
1399
+ # First K: propensity score conditions
1400
+ g1 = (X.T @ (Tr * Z * (1.0 - probs_c) - Z * (1.0 - Tr) * probs_c)) / n
1401
+
1402
+ # Next 2K: covariate balance conditions
1403
+ w1 = Z * Tr / (pZ * probs_c) - 1.0
1404
+ w2 = Z * (1.0 - Tr) / (pZ * probs_n) - 1.0
1405
+
1406
+ W = np.column_stack([w1, w2]) # (n, 2)
1407
+ w_del = (X.T @ W) / n # (k, 2)
1408
+
1409
+ # Concatenate gbar (3K-dimensional): [g1, flattened w_del columns]
1410
+ gbar = np.concatenate([g1, w_del.ravel(order="F")])
1411
+
1412
+ # ========== Construct V matrix (3K×3K) ==========
1413
+ if invV is None:
1414
+ # Compute all weight vectors
1415
+ w11 = pZ * probs_c * (1.0 - probs_c)
1416
+ w12 = 1.0 - probs_c
1417
+ w13 = -probs_c
1418
+
1419
+ w22 = 1.0 / (pZ * probs_c) - 1.0
1420
+ w23 = -np.ones(n)
1421
+
1422
+ w33 = 1.0 / (pZ * probs_n) - 1.0
1423
+
1424
+ # Helper function: compute X'diag(w)X
1425
+ def XtXw(w: np.ndarray) -> np.ndarray:
1426
+ return X.T @ (X * w[:, None])
1427
+
1428
+ # Compute all K×K blocks
1429
+ B11 = XtXw(w11) / n
1430
+ B12 = XtXw(w12) / n
1431
+ B13 = XtXw(w13) / n
1432
+
1433
+ B22 = XtXw(w22) / n
1434
+ B23 = XtXw(w23) / n
1435
+
1436
+ B33 = XtXw(w33) / n
1437
+
1438
+ # Construct 3×3 block matrix (exploiting symmetry)
1439
+ V = np.block([[B11, B12, B13], [B12.T, B22, B23], [B13.T, B23.T, B33]])
1440
+ # Numerical symmetrization to prevent FP errors from breaking symmetry
1441
+ V = symmetrize(V)
1442
+ # Warn if V is rank-deficient (only on first invV construction)
1443
+ r = numeric_rank(V)
1444
+ if r < V.shape[0]:
1445
+ warnings.warn(
1446
+ f"GMM weighting matrix V is rank-deficient (rank={r} < {V.shape[0]}). Using pseudoinverse.",
1447
+ RuntimeWarning,
1448
+ )
1449
+ # Compute pseudoinverse using specialized version for symmetric (semi-)positive definite matrices
1450
+ invV = pinv_symmetric_psd(V)
1451
+
1452
+ # Compute GMM loss
1453
+ loss = float(gbar @ invV @ gbar)
1454
+
1455
+ return {"loss": loss, "invV": invV}
1456
+
1457
+ def _mle_gradient_twosided(beta_curr: np.ndarray) -> np.ndarray:
1458
+ """
1459
+ MLE gradient for two-sided noncompliance.
1460
+
1461
+ Returns a 2K-length gradient vector.
1462
+
1463
+ Uses numerically stable log-sum-exp trick to avoid exp overflow.
1464
+ """
1465
+ beta_c = beta_curr[:k]
1466
+ beta_a = beta_curr[k:]
1467
+
1468
+ # Compute log probabilities (numerically stable)
1469
+ logit_c = X @ beta_c
1470
+ logit_a = X @ beta_a
1471
+
1472
+ # log(1 + exp(logit_c) + exp(logit_a)) using log-sum-exp
1473
+ max_logit = np.maximum(np.maximum(logit_c, logit_a), 0.0)
1474
+ log_sum = max_logit + np.log(
1475
+ np.exp(-max_logit) + np.exp(logit_c - max_logit) + np.exp(logit_a - max_logit)
1476
+ )
1477
+
1478
+ # Probability = exp(logit) / (1 + exp(logit_c) + exp(logit_a))
1479
+ log_probs_c = logit_c - log_sum
1480
+ log_probs_a = logit_a - log_sum
1481
+ log_probs_n = -log_sum
1482
+
1483
+ probs_c = np.exp(log_probs_c)
1484
+ probs_a = np.exp(log_probs_a)
1485
+ probs_n = np.exp(log_probs_n)
1486
+
1487
+ # Clip and normalize
1488
+ probs_c = np.clip(probs_c, PROBS_MIN, 1 - PROBS_MIN)
1489
+ probs_a = np.clip(probs_a, PROBS_MIN, 1 - PROBS_MIN)
1490
+ probs_n = np.clip(probs_n, PROBS_MIN, 1 - PROBS_MIN)
1491
+
1492
+ sums = probs_c + probs_a + probs_n
1493
+ probs_c = probs_c / sums
1494
+ probs_a = probs_a / sums
1495
+ probs_n = probs_n / sums
1496
+
1497
+ # Gradient computation
1498
+ # Probabilities are already clipped to [PROBS_MIN, 1-PROBS_MIN],
1499
+ # so denominators are safe. No eps needed (consistent with R).
1500
+ grad_c = -X.T @ ((Z * Tr / (probs_c + probs_a) + (1 - Z) * (1 - Tr) / (1 - probs_a) - 1) * probs_c)
1501
+ grad_a = -X.T @ ((Z * Tr / (probs_c + probs_a) + (1 - Z) * Tr / probs_a - 1) * probs_a)
1502
+
1503
+ return np.concatenate([grad_c, grad_a])
1504
+
1505
+ def _mle_gradient_onesided(beta_curr: np.ndarray) -> np.ndarray:
1506
+ """
1507
+ MLE gradient for one-sided noncompliance.
1508
+
1509
+ Returns a K-length gradient vector.
1510
+ """
1511
+ # Compute probabilities
1512
+ probs = scipy.special.expit(X @ beta_curr)
1513
+ probs = np.clip(probs, PROBS_MIN, 1 - PROBS_MIN)
1514
+
1515
+ # Gradient computation
1516
+ # Probabilities are already clipped to [PROBS_MIN, 1-PROBS_MIN],
1517
+ # so denominators are safe. No eps needed (consistent with R).
1518
+ grad = -X.T @ ((Z * Tr / probs - Z * (1 - Tr) / (1 - probs)) * probs * (1 - probs))
1519
+
1520
+ return grad
1521
+
1522
+ def _bal_gradient_twosided(beta_curr: np.ndarray, invV: np.ndarray) -> np.ndarray:
1523
+ """
1524
+ Balance gradient for two-sided noncompliance.
1525
+
1526
+ Returns a 2K-length gradient vector.
1527
+ """
1528
+ beta_c = beta_curr[:k]
1529
+ beta_a = beta_curr[k:]
1530
+
1531
+ # Compute probabilities using numerically stable log-sum-exp trick
1532
+ logit_c = X @ beta_c
1533
+ logit_a = X @ beta_a
1534
+
1535
+ # log(1 + exp(logit_c) + exp(logit_a)) using log-sum-exp
1536
+ max_logit = np.maximum(np.maximum(logit_c, logit_a), 0.0)
1537
+ log_sum = max_logit + np.log(
1538
+ np.exp(-max_logit) + np.exp(logit_c - max_logit) + np.exp(logit_a - max_logit)
1539
+ )
1540
+
1541
+ # Probability = exp(logit) / (1 + exp(logit_c) + exp(logit_a))
1542
+ log_probs_c = logit_c - log_sum
1543
+ log_probs_a = logit_a - log_sum
1544
+ log_probs_n = -log_sum
1545
+
1546
+ probs_c = np.exp(log_probs_c)
1547
+ probs_a = np.exp(log_probs_a)
1548
+ probs_n = np.exp(log_probs_n)
1549
+
1550
+ # Clip and normalize
1551
+ probs_c = np.clip(probs_c, PROBS_MIN, 1 - PROBS_MIN)
1552
+ probs_a = np.clip(probs_a, PROBS_MIN, 1 - PROBS_MIN)
1553
+ probs_n = np.clip(probs_n, PROBS_MIN, 1 - PROBS_MIN)
1554
+
1555
+ sums = probs_c + probs_a + probs_n
1556
+ probs_c = probs_c / sums
1557
+ probs_a = probs_a / sums
1558
+ probs_n = probs_n / sums
1559
+
1560
+ # Compute intermediate variables
1561
+ Ac = -probs_c * probs_n / ((probs_c + probs_a) ** 2)
1562
+ Bc = probs_c / probs_a
1563
+ Cc = -probs_c * probs_a / ((1 - probs_a) ** 2)
1564
+ Dc = probs_c / probs_n
1565
+ Aa = -probs_a * probs_n / ((probs_c + probs_a) ** 2)
1566
+ Ba = -(1 - probs_a) / probs_a
1567
+ Ca = probs_a / (1 - probs_a)
1568
+ Da = probs_a / probs_n
1569
+
1570
+ # Compute weight matrix
1571
+ w_curr = np.column_stack([
1572
+ Z * Tr / (pZ * (probs_c + probs_a)) - 1,
1573
+ (1 - Z) * Tr / ((1 - pZ) * probs_a) - 1,
1574
+ Z * (1 - Tr) / (pZ * probs_n) - 1,
1575
+ (1 - Z) * (1 - Tr) / ((1 - pZ) * (probs_c + probs_n)) - 1
1576
+ ])
1577
+
1578
+ w_curr_del = (1.0 / n) * X.T @ w_curr
1579
+ wbar = w_curr_del.ravel(order='F')
1580
+
1581
+ # Compute derivative of weights w.r.t. beta_c
1582
+ dw_beta_c = (1.0 / n) * np.column_stack([
1583
+ (X * (Z * Tr / pZ * Ac)[:, None]).T @ X,
1584
+ (X * ((1 - Z) * Tr / (1 - pZ) * Bc)[:, None]).T @ X,
1585
+ (X * (Z * (1 - Tr) / pZ * Dc)[:, None]).T @ X,
1586
+ (X * ((1 - Z) * (1 - Tr) / (1 - pZ) * Cc)[:, None]).T @ X
1587
+ ])
1588
+
1589
+ # Compute derivative of weights w.r.t. beta_a
1590
+ dw_beta_a = (1.0 / n) * np.column_stack([
1591
+ (X * (Z * Tr / pZ * Aa)[:, None]).T @ X,
1592
+ (X * ((1 - Z) * Tr / (1 - pZ) * Ba)[:, None]).T @ X,
1593
+ (X * (Z * (1 - Tr) / pZ * Da)[:, None]).T @ X,
1594
+ (X * ((1 - Z) * (1 - Tr) / (1 - pZ) * Ca)[:, None]).T @ X
1595
+ ])
1596
+
1597
+ # Extract the last 4K×4K sub-block of invV
1598
+ invV_sub = invV[2 * k:, 2 * k:]
1599
+
1600
+ # Compute gradient
1601
+ out_1 = 2 * dw_beta_c @ invV_sub @ wbar
1602
+ out_2 = 2 * dw_beta_a @ invV_sub @ wbar
1603
+
1604
+ return np.concatenate([out_1, out_2])
1605
+
1606
+ def _bal_gradient_onesided(beta_curr: np.ndarray, invV: np.ndarray) -> np.ndarray:
1607
+ """
1608
+ Balance gradient for one-sided noncompliance.
1609
+
1610
+ Returns a K-length gradient vector.
1611
+ """
1612
+ # Compute probabilities
1613
+ probs = scipy.special.expit(X @ beta_curr)
1614
+ probs = np.clip(probs, PROBS_MIN, 1 - PROBS_MIN)
1615
+
1616
+ # Compute weight matrix
1617
+ w_curr = np.column_stack([
1618
+ Z * Tr / (pZ * probs) - 1,
1619
+ Z * (1 - Tr) / (pZ * (1 - probs)) - 1
1620
+ ])
1621
+
1622
+ w_curr_del = (1.0 / n) * X.T @ w_curr
1623
+ wbar = w_curr_del.ravel(order='F')
1624
+
1625
+ # Compute derivative of weights w.r.t. beta
1626
+ dw_beta = (1.0 / n) * np.column_stack([
1627
+ (X * (-Z * Tr * (1 - probs) / (pZ * probs))[:, None]).T @ X,
1628
+ (X * (Z * (1 - Tr) * probs / (pZ * (1 - probs)))[:, None]).T @ X
1629
+ ])
1630
+
1631
+ # Extract the last 2K×2K sub-block of invV
1632
+ invV_sub = invV[k:, k:]
1633
+
1634
+ # Compute gradient
1635
+ grad = 2 * dw_beta @ invV_sub @ wbar
1636
+
1637
+ return grad
1638
+
1639
+ def _gmm_gradient_twosided(beta_curr: np.ndarray, invV: np.ndarray) -> np.ndarray:
1640
+ """
1641
+ GMM gradient for two-sided noncompliance.
1642
+
1643
+ Returns a 2K-length gradient vector.
1644
+ """
1645
+ beta_c = beta_curr[:k]
1646
+ beta_a = beta_curr[k:]
1647
+
1648
+ # Compute probabilities using numerically stable log-sum-exp trick
1649
+ logit_c = X @ beta_c
1650
+ logit_a = X @ beta_a
1651
+
1652
+ # log(1 + exp(logit_c) + exp(logit_a)) using log-sum-exp
1653
+ max_logit = np.maximum(np.maximum(logit_c, logit_a), 0.0)
1654
+ log_sum = max_logit + np.log(
1655
+ np.exp(-max_logit) + np.exp(logit_c - max_logit) + np.exp(logit_a - max_logit)
1656
+ )
1657
+
1658
+ # Probability = exp(logit) / (1 + exp(logit_c) + exp(logit_a))
1659
+ log_probs_c = logit_c - log_sum
1660
+ log_probs_a = logit_a - log_sum
1661
+ log_probs_n = -log_sum
1662
+
1663
+ probs_c = np.exp(log_probs_c)
1664
+ probs_a = np.exp(log_probs_a)
1665
+ probs_n = np.exp(log_probs_n)
1666
+
1667
+ # Clip and normalize
1668
+ probs_c = np.clip(probs_c, PROBS_MIN, 1 - PROBS_MIN)
1669
+ probs_a = np.clip(probs_a, PROBS_MIN, 1 - PROBS_MIN)
1670
+ probs_n = np.clip(probs_n, PROBS_MIN, 1 - PROBS_MIN)
1671
+
1672
+ sums = probs_c + probs_a + probs_n
1673
+ probs_c = probs_c / sums
1674
+ probs_a = probs_a / sums
1675
+ probs_n = probs_n / sums
1676
+
1677
+ # Probability clipping provides sufficient protection
1678
+ s_ca = probs_c + probs_a
1679
+ s_cn = probs_c + probs_n
1680
+
1681
+ # Compute weight matrix
1682
+ w_curr = np.column_stack([
1683
+ Z * Tr / (pZ * s_ca) - 1,
1684
+ (1 - Z) * Tr / ((1 - pZ) * probs_a) - 1,
1685
+ Z * (1 - Tr) / (pZ * probs_n) - 1,
1686
+ (1 - Z) * (1 - Tr) / ((1 - pZ) * s_cn) - 1
1687
+ ])
1688
+
1689
+ w_curr_del = (1.0 / n) * X.T @ w_curr
1690
+
1691
+ # Compute gbar
1692
+ gbar = np.concatenate([
1693
+ (1.0 / n) * X.T @ ((Z * Tr / (1 - probs_n) + (1 - Z) * (1 - Tr) / (1 - probs_a) - 1) * probs_c),
1694
+ (1.0 / n) * X.T @ ((Z * Tr / (1 - probs_n) + (1 - Z) * Tr / probs_a - 1) * probs_a),
1695
+ w_curr_del.ravel('F') # Use Fortran order for consistency with objective function
1696
+ ])
1697
+
1698
+ # Compute intermediate variables
1699
+ Ac = -probs_c * probs_n / (s_ca ** 2)
1700
+ Bc = probs_c / probs_a
1701
+ Cc = -probs_c * probs_a / ((1 - probs_a) ** 2)
1702
+ Dc = probs_c / probs_n
1703
+ Aa = -probs_a * probs_n / (s_ca ** 2)
1704
+ Ba = -(1 - probs_a) / probs_a
1705
+ Ca = probs_a / (1 - probs_a)
1706
+ Da = probs_a / probs_n
1707
+
1708
+ # Compute dgbar
1709
+ # First row: derivative w.r.t. beta_c
1710
+ dgbar_c1 = (X * (probs_c * (Z * Tr * Ac + (1 - Z) * (1 - Tr) * Cc +
1711
+ (Z * Tr / s_ca + (1 - Z) * (1 - Tr) / (1 - probs_a) - 1) *
1712
+ (1 - probs_c)))[:, None]).T @ X
1713
+ dgbar_c2 = (X * (probs_a * (Z * Tr * Ac + (1 - Z) * Tr * Bc -
1714
+ (Z * Tr / s_ca + (1 - Z) * Tr / probs_a - 1) *
1715
+ probs_c))[:, None]).T @ X
1716
+ dgbar_c3 = (X * (Z * Tr / pZ * Ac)[:, None]).T @ X
1717
+ dgbar_c4 = (X * ((1 - Z) * Tr / (1 - pZ) * Bc)[:, None]).T @ X
1718
+ dgbar_c5 = (X * (Z * (1 - Tr) / pZ * Dc)[:, None]).T @ X
1719
+ dgbar_c6 = (X * ((1 - Z) * (1 - Tr) / (1 - pZ) * Cc)[:, None]).T @ X
1720
+
1721
+ dgbar_c = np.column_stack([dgbar_c1, dgbar_c2, dgbar_c3, dgbar_c4, dgbar_c5, dgbar_c6])
1722
+
1723
+ # Second row: derivative w.r.t. beta_a
1724
+ dgbar_a1 = (X * (probs_c * (Z * Tr * Aa + (1 - Z) * (1 - Tr) * Ca -
1725
+ (Z * Tr / s_ca + (1 - Z) * (1 - Tr) / (1 - probs_a) - 1) *
1726
+ probs_a))[:, None]).T @ X
1727
+ dgbar_a2 = (X * (probs_a * (Z * Tr * Aa + (1 - Z) * Tr * Ba +
1728
+ (Z * Tr / s_ca + (1 - Z) * Tr / probs_a - 1) *
1729
+ (1 - probs_a)))[:, None]).T @ X
1730
+ dgbar_a3 = (X * (Z * Tr / pZ * Aa)[:, None]).T @ X
1731
+ dgbar_a4 = (X * ((1 - Z) * Tr / (1 - pZ) * Ba)[:, None]).T @ X
1732
+ dgbar_a5 = (X * (Z * (1 - Tr) / pZ * Da)[:, None]).T @ X
1733
+ dgbar_a6 = (X * ((1 - Z) * (1 - Tr) / (1 - pZ) * Ca)[:, None]).T @ X
1734
+
1735
+ dgbar_a = np.column_stack([dgbar_a1, dgbar_a2, dgbar_a3, dgbar_a4, dgbar_a5, dgbar_a6])
1736
+
1737
+ dgbar = (1.0 / n) * np.vstack([dgbar_c, dgbar_a])
1738
+
1739
+ # Compute gradient: 2 * dgbar %*% invV %*% gbar
1740
+ grad = 2 * dgbar @ invV @ gbar
1741
+
1742
+ return grad
1743
+
1744
+ def _gmm_gradient_onesided(beta_curr: np.ndarray, invV: np.ndarray) -> np.ndarray:
1745
+ """
1746
+ GMM gradient for one-sided noncompliance.
1747
+
1748
+ Returns a K-length gradient vector.
1749
+ """
1750
+ # Compute probabilities
1751
+ probs = scipy.special.expit(X @ beta_curr)
1752
+ probs = np.clip(probs, PROBS_MIN, 1 - PROBS_MIN)
1753
+
1754
+ # Probability clipping provides sufficient protection
1755
+ # Compute weight matrix
1756
+ w_curr = np.column_stack([
1757
+ Z * Tr / (pZ * probs) - 1,
1758
+ Z * (1 - Tr) / (pZ * (1 - probs)) - 1
1759
+ ])
1760
+
1761
+ w_curr_del = (1.0 / n) * X.T @ w_curr
1762
+
1763
+ # Compute gbar
1764
+ # gbar is a 3k-dimensional vector: [gbar_part1, w_curr_del[:,0], w_curr_del[:,1]]
1765
+ gbar = np.concatenate([
1766
+ (1.0 / n) * X.T @ (Z * (Tr - probs)),
1767
+ w_curr_del.ravel('F') # Fortran order (column-major)
1768
+ ])
1769
+
1770
+ # Compute dgbar: (k, 3k) matrix containing three (k, k) blocks
1771
+ # Simplification: -Z*Tr - Z*(1-Tr) = -Z
1772
+ dgbar_1 = (X * (-Z * probs * (1 - probs))[:, None]).T @ X
1773
+ dgbar_2 = (X * (-Z * Tr * (1 - probs) / (pZ * probs))[:, None]).T @ X
1774
+ dgbar_3 = (X * (Z * (1 - Tr) * probs / (pZ * (1 - probs)))[:, None]).T @ X
1775
+
1776
+ dgbar = (1.0 / n) * np.column_stack([dgbar_1, dgbar_2, dgbar_3])
1777
+
1778
+ # Compute gradient
1779
+ grad = 2 * dgbar @ invV @ gbar
1780
+
1781
+ return grad
1782
+
1783
+ def _mle_loss_twosided(beta_curr: np.ndarray) -> float:
1784
+ """
1785
+ MLE loss function (negative log-likelihood) for two-sided noncompliance.
1786
+
1787
+ Parameters
1788
+ ----------
1789
+ beta_curr : np.ndarray, shape (2*k,)
1790
+ Parameter vector.
1791
+
1792
+ Returns
1793
+ -------
1794
+ float
1795
+ Negative log-likelihood.
1796
+ """
1797
+ # Compute three compliance probabilities using numerically stable log-sum-exp
1798
+ beta_c = beta_curr[:k]
1799
+ beta_a = beta_curr[k:]
1800
+
1801
+ # Compute log probabilities (numerically stable)
1802
+ logit_c = X @ beta_c
1803
+ logit_a = X @ beta_a
1804
+
1805
+ # log(1 + exp(logit_c) + exp(logit_a)) using log-sum-exp
1806
+ max_logit = np.maximum(np.maximum(logit_c, logit_a), 0.0)
1807
+ log_sum = max_logit + np.log(
1808
+ np.exp(-max_logit) + np.exp(logit_c - max_logit) + np.exp(logit_a - max_logit)
1809
+ )
1810
+
1811
+ # Probability = exp(logit) / (1 + exp(logit_c) + exp(logit_a))
1812
+ log_probs_c = logit_c - log_sum
1813
+ log_probs_a = logit_a - log_sum
1814
+ log_probs_n = -log_sum
1815
+
1816
+ probs_c = np.exp(log_probs_c)
1817
+ probs_a = np.exp(log_probs_a)
1818
+ # Note: use 1-probs_c-probs_a for probs_n
1819
+ probs_n = 1.0 - probs_c - probs_a
1820
+
1821
+ # Clip probabilities
1822
+ eps = PROBS_MIN
1823
+ probs_c = np.clip(probs_c, eps, 1.0 - eps)
1824
+ probs_a = np.clip(probs_a, eps, 1.0 - eps)
1825
+ probs_n = np.clip(probs_n, eps, 1.0 - eps)
1826
+
1827
+ # Renormalize
1828
+ sums = probs_c + probs_a + probs_n
1829
+ probs_c = probs_c / sums
1830
+ probs_a = probs_a / sums
1831
+ probs_n = probs_n / sums
1832
+
1833
+ # Negative log-likelihood
1834
+ # Probabilities are already clipped to [PROBS_MIN, 1-PROBS_MIN] and renormalized,
1835
+ # so log arguments are safe. No eps needed (consistent with R).
1836
+ loss = -np.sum(
1837
+ Z * Tr * np.log(probs_c + probs_a)
1838
+ + Z * (1.0 - Tr) * np.log(probs_n)
1839
+ + (1.0 - Z) * Tr * np.log(probs_a)
1840
+ + (1.0 - Z) * (1.0 - Tr) * np.log(1.0 - probs_a)
1841
+ )
1842
+
1843
+ return float(loss)
1844
+
1845
+ def _mle_loss_onesided(beta_curr: np.ndarray) -> float:
1846
+ """
1847
+ MLE loss function for one-sided noncompliance (negative log-likelihood).
1848
+
1849
+ Parameters
1850
+ ----------
1851
+ beta_curr : np.ndarray, shape (k,)
1852
+ Parameter vector.
1853
+
1854
+ Returns
1855
+ -------
1856
+ float
1857
+ Negative log-likelihood.
1858
+ """
1859
+ # Compute compliance probabilities using logistic function
1860
+ probs_c = scipy.special.expit(X @ beta_curr)
1861
+ probs_c = np.clip(probs_c, PROBS_MIN, 1.0 - PROBS_MIN)
1862
+
1863
+ # Negative log-likelihood
1864
+ # Probabilities are already clipped to [PROBS_MIN, 1-PROBS_MIN],
1865
+ # so log arguments are safe. No eps needed (consistent with R).
1866
+ loss = -np.sum(
1867
+ Z * Tr * np.log(probs_c)
1868
+ + Z * (1.0 - Tr) * np.log(1.0 - probs_c)
1869
+ )
1870
+
1871
+ return float(loss)
1872
+
1873
+ def _bal_loss_twosided(beta_curr: np.ndarray, invV: np.ndarray) -> float:
1874
+ """
1875
+ Balance loss function for two-sided noncompliance.
1876
+
1877
+ Parameters
1878
+ ----------
1879
+ beta_curr : np.ndarray, shape (2*k,)
1880
+ Parameter vector.
1881
+ invV : np.ndarray
1882
+ Inverse of V matrix (6K×6K).
1883
+
1884
+ Returns
1885
+ -------
1886
+ float
1887
+ Balance loss.
1888
+
1889
+ Notes
1890
+ -----
1891
+ invV needs to be sliced to the last 4K×4K submatrix.
1892
+ """
1893
+ # Compute three compliance probabilities
1894
+ probs_c, probs_a, probs_n = _compute_compliance_probs_twosided(beta_curr)
1895
+
1896
+ # Compute combined probabilities
1897
+ s_ca = probs_c + probs_a
1898
+ s_cn = probs_c + probs_n
1899
+
1900
+ # Compute balance weights for two-sided noncompliance
1901
+ # No epsilon added to denominators (fitted values already clipped)
1902
+ w1 = Z * Tr / (pZ * s_ca) - 1.0
1903
+ w2 = (1.0 - Z) * Tr / ((1.0 - pZ) * probs_a) - 1.0
1904
+ w3 = Z * (1.0 - Tr) / (pZ * probs_n) - 1.0
1905
+ w4 = (1.0 - Z) * (1.0 - Tr) / ((1.0 - pZ) * s_cn) - 1.0
1906
+
1907
+ W = np.column_stack([w1, w2, w3, w4])
1908
+ w_del = (X.T @ W) / n # (k, 4)
1909
+ wbar = w_del.ravel(order="F") # 4K-dimensional
1910
+
1911
+ # Slice invV to last 4K×4K submatrix
1912
+ invV_sub = invV[2 * k :, 2 * k :]
1913
+
1914
+ # Compute GMM loss
1915
+ loss = float(wbar @ invV_sub @ wbar)
1916
+
1917
+ return loss
1918
+
1919
+ def _bal_loss_onesided(beta_curr: np.ndarray, invV: np.ndarray) -> float:
1920
+ """
1921
+ Balance loss function for one-sided noncompliance.
1922
+
1923
+ Parameters
1924
+ ----------
1925
+ beta_curr : np.ndarray, shape (k,)
1926
+ Parameter vector.
1927
+ invV : np.ndarray
1928
+ Inverse of V matrix (3K×3K).
1929
+
1930
+ Returns
1931
+ -------
1932
+ float
1933
+ Balance loss.
1934
+
1935
+ Notes
1936
+ -----
1937
+ invV needs to be sliced to the last 2K×2K submatrix.
1938
+ """
1939
+ # Compute probabilities
1940
+ probs_c, probs_n = _compute_compliance_probs_onesided(beta_curr)
1941
+
1942
+ # Compute balance weights for one-sided noncompliance
1943
+ # No epsilon added to denominators (fitted values already clipped)
1944
+ w1 = Z * Tr / (pZ * probs_c) - 1.0
1945
+ w2 = Z * (1.0 - Tr) / (pZ * probs_n) - 1.0
1946
+
1947
+ W = np.column_stack([w1, w2])
1948
+ w_del = (X.T @ W) / n # (k, 2)
1949
+ wbar = w_del.ravel(order="F") # 2K-dimensional
1950
+
1951
+ # Slice invV to last 2K×2K submatrix
1952
+ invV_sub = invV[k:, k:]
1953
+
1954
+ # Compute GMM loss
1955
+ loss = float(wbar @ invV_sub @ wbar)
1956
+
1957
+ return loss
1958
+
1959
+ # ========== Initialization ==========
1960
+
1961
+ # Detect perfect or near-perfect compliance
1962
+ # When Tr has no variation in a Z subset, GLM fitting will fail
1963
+ Z1_mask = Z == 1
1964
+ Z0_mask = Z == 0
1965
+
1966
+ # Check perfect compliance: variance of Tr in each Z subset
1967
+ Tr_Z1_var = np.var(Tr[Z1_mask]) if Z1_mask.sum() > 1 else 0.0
1968
+ Tr_Z0_var = np.var(Tr[Z0_mask]) if Z0_mask.sum() > 1 else 0.0
1969
+
1970
+ perfect_compliance_Z1 = Tr_Z1_var < 1e-10 # Tr has no variation when Z=1
1971
+ perfect_compliance_Z0 = Tr_Z0_var < 1e-10 # Tr has no variation when Z=0
1972
+
1973
+ if perfect_compliance_Z1 and perfect_compliance_Z0:
1974
+ # Perfect compliance: Tr is completely determined by Z
1975
+ warnings.warn(
1976
+ "Perfect compliance detected: Tr is completely determined by Z. "
1977
+ "All units are compliers (π_c≈1, π_a≈0, π_n≈0). "
1978
+ "Using fallback initialization with small random perturbation.",
1979
+ UserWarning
1980
+ )
1981
+ # Use initial values close to perfect compliance
1982
+ if twosided:
1983
+ # Two-sided: β_c and β_a both approach -∞ (making π_a and π_n approach 0)
1984
+ p = X.shape[1]
1985
+ beta_c_init = np.random.randn(p) * 0.01 - 5.0 # logit(π_c) ≈ 5, π_c≈0.993
1986
+ beta_a_init = np.random.randn(p) * 0.01 - 5.0 # logit(π_a) ≈ -5, π_a≈0.007
1987
+ beta_init = np.concatenate([beta_c_init, beta_a_init])
1988
+ else:
1989
+ # One-sided: β_c approaches +∞ (making π_c approach 1)
1990
+ p = X.shape[1]
1991
+ beta_init = np.random.randn(p) * 0.01 + 5.0 # logit(π_c) ≈ 5, π_c≈0.993
1992
+ elif twosided:
1993
+ # Two-sided noncompliance initialization
1994
+ # Use try-except to handle possible GLM failures
1995
+ try:
1996
+ # Step 1: Fit never-takers model on Z=1 subset
1997
+ if not perfect_compliance_Z1:
1998
+ glm_n = sm.GLM(1 - Tr[Z1_mask], X[Z1_mask], family=Gaussian()).fit()
1999
+ beta_n0 = glm_n.params
2000
+ else:
2001
+ # When Z=1, Tr is all 1, 1-Tr is all 0, use zero vector
2002
+ beta_n0 = np.zeros(X.shape[1])
2003
+
2004
+ # Step 2: Fit always-takers model on Z=0 subset
2005
+ if not perfect_compliance_Z0:
2006
+ glm_a = sm.GLM(Tr[Z0_mask], X[Z0_mask], family=Gaussian()).fit()
2007
+ beta_a0 = glm_a.params
2008
+ else:
2009
+ # When Z=0, Tr is all 0, use zero vector
2010
+ beta_a0 = np.zeros(X.shape[1])
2011
+
2012
+ # Step 3: Compute initial compliance probabilities
2013
+ # Use numerically stable log-sum-exp trick
2014
+ logit_a0 = X @ beta_a0
2015
+ logit_n0 = X @ beta_n0
2016
+
2017
+ # log(1 + exp(logit_a0) + exp(logit_n0)) using log-sum-exp
2018
+ max_logit = np.maximum(np.maximum(logit_a0, logit_n0), 0.0)
2019
+ log_sum = max_logit + np.log(
2020
+ np.exp(-max_logit) + np.exp(logit_a0 - max_logit) + np.exp(logit_n0 - max_logit)
2021
+ )
2022
+
2023
+ # Probability = exp(logit) / (1 + exp(logit_a0) + exp(logit_n0))
2024
+ # Note: here c corresponds to never-taker (logit=0), a to always-taker, n to complier
2025
+ # Variable names follow the standard compliance type convention
2026
+ p_hat_a0 = np.exp(logit_a0 - log_sum)
2027
+ p_hat_n0 = np.exp(logit_n0 - log_sum)
2028
+ p_hat_c0 = np.exp(-log_sum)
2029
+
2030
+ # Clip probabilities
2031
+ eps = PROBS_MIN
2032
+ p_hat_a0 = np.clip(p_hat_a0, eps, 1.0 - eps)
2033
+ p_hat_n0 = np.clip(p_hat_n0, eps, 1.0 - eps)
2034
+ p_hat_c0 = np.clip(p_hat_c0, eps, 1.0 - eps)
2035
+
2036
+ # Renormalize
2037
+ sums = p_hat_c0 + p_hat_a0 + p_hat_n0
2038
+ p_hat_c0 = p_hat_c0 / sums
2039
+ p_hat_a0 = p_hat_a0 / sums
2040
+ p_hat_n0 = p_hat_n0 / sums
2041
+
2042
+ # Step 4: Get initial beta via linear regression
2043
+ # Linear regression on log(p/(1-p))
2044
+ # Note: p_hat is already clipped to [PROBS_MIN, 1-PROBS_MIN], so no extra eps needed
2045
+ logit_c = np.log(p_hat_c0 / (1.0 - p_hat_c0))
2046
+ logit_a = np.log(p_hat_a0 / (1.0 - p_hat_a0))
2047
+
2048
+ beta_c_init = np.linalg.lstsq(X, logit_c, rcond=None)[0]
2049
+ beta_a_init = np.linalg.lstsq(X, logit_a, rcond=None)[0]
2050
+
2051
+ beta_init = np.concatenate([beta_c_init, beta_a_init])
2052
+ except (ValueError, np.linalg.LinAlgError) as e:
2053
+ # GLM failed, use fallback initialization
2054
+ warnings.warn(
2055
+ f"GLM initialization failed ({e}). Using fallback initialization.",
2056
+ UserWarning
2057
+ )
2058
+ p = X.shape[1]
2059
+ beta_c_init = np.random.randn(p) * 0.1
2060
+ beta_a_init = np.random.randn(p) * 0.1
2061
+ beta_init = np.concatenate([beta_c_init, beta_a_init])
2062
+ else:
2063
+ # One-sided noncompliance initialization
2064
+ # Fit logistic regression on Z=1 subset
2065
+ if not perfect_compliance_Z1:
2066
+ try:
2067
+ glm_c = sm.GLM(Tr[Z1_mask], X[Z1_mask], family=Gaussian()).fit()
2068
+ beta_init = glm_c.params
2069
+ except (ValueError, np.linalg.LinAlgError) as e:
2070
+ # GLM failed, use fallback initialization
2071
+ warnings.warn(
2072
+ f"GLM initialization failed ({e}). Using fallback initialization.",
2073
+ UserWarning
2074
+ )
2075
+ beta_init = np.random.randn(X.shape[1]) * 0.1
2076
+ else:
2077
+ # Tr is all 1 or all 0 when Z=1, cannot fit
2078
+ # Use small random values for initialization
2079
+ warnings.warn(
2080
+ "No variation in Tr when Z=1. Using fallback initialization.",
2081
+ UserWarning
2082
+ )
2083
+ beta_init = np.random.randn(X.shape[1]) * 0.1
2084
+
2085
+ # ========== Optimization Loop ==========
2086
+
2087
+ # Step 1: MLE optimization
2088
+ if twosided:
2089
+ mle_loss_func = _mle_loss_twosided
2090
+ mle_grad_func = _mle_gradient_twosided
2091
+ else:
2092
+ mle_loss_func = _mle_loss_onesided
2093
+ mle_grad_func = _mle_gradient_onesided
2094
+
2095
+ # Verbose output for optimization progress
2096
+ if verbose >= 1:
2097
+ print(f"\n[CBIV] Starting MLE optimization (iterations={iterations})...")
2098
+
2099
+ mle_result = scipy.optimize.minimize(
2100
+ mle_loss_func, beta_init, method="BFGS", jac=mle_grad_func,
2101
+ options={"maxiter": iterations, "gtol": 1e-8, "disp": verbose >= 2}
2102
+ )
2103
+ beta_mle = mle_result.x
2104
+ mle_converged = mle_result.success
2105
+
2106
+ if verbose >= 1:
2107
+ print(f"[CBIV] MLE optimization: converged={mle_converged}, nit={mle_result.nit}, loss={mle_result.fun:.6f}")
2108
+
2109
+ # Step 2: Compute inverse weight matrix
2110
+ if twosided:
2111
+ gmm_result = _gmm_func_twosided(beta_mle, invV=None)
2112
+ else:
2113
+ gmm_result = _gmm_func_onesided(beta_mle, invV=None)
2114
+
2115
+ this_invV = gmm_result["invV"]
2116
+
2117
+ # ========== Continuously Updating GMM vs Two-Step GMM Branch ==========
2118
+ #
2119
+ # Two-step GMM (twostep=True, default):
2120
+ # - Use pre-computed fixed this_invV
2121
+ # - Fast, stable
2122
+ #
2123
+ # Continuously updating GMM (twostep=False):
2124
+ # - Re-compute invV at each optimization call (pass invV=None)
2125
+ # - Theoretically better finite-sample properties (Hansen et al. 1996)
2126
+ # - 5-10x slower, may be numerically unstable
2127
+
2128
+ if twostep:
2129
+ # Two-step GMM: Use pre-computed fixed invV (current behavior)
2130
+ if twosided:
2131
+ bal_loss_func = lambda b: _bal_loss_twosided(b, this_invV)
2132
+ bal_grad_func = lambda b: _bal_gradient_twosided(b, this_invV)
2133
+ gmm_loss_func = lambda b: _gmm_func_twosided(b, this_invV)["loss"]
2134
+ gmm_grad_func = lambda b: _gmm_gradient_twosided(b, this_invV)
2135
+ else:
2136
+ bal_loss_func = lambda b: _bal_loss_onesided(b, this_invV)
2137
+ bal_grad_func = lambda b: _bal_gradient_onesided(b, this_invV)
2138
+ gmm_loss_func = lambda b: _gmm_func_onesided(b, this_invV)["loss"]
2139
+ gmm_grad_func = lambda b: _gmm_gradient_onesided(b, this_invV)
2140
+ else:
2141
+ # Continuously updating GMM: GMM loss function re-computes invV each time (new feature)
2142
+ # Note: balance loss still uses fixed invV (because bal_loss function doesn't support invV=None)
2143
+ warnings.warn(
2144
+ "Using Continuous Updating GMM (twostep=False). "
2145
+ "This may be 5-10x slower for CBIV but has better finite sample properties. "
2146
+ "See Hansen et al. (1996) for theory.",
2147
+ UserWarning
2148
+ )
2149
+
2150
+ if twosided:
2151
+ # Balance loss uses fixed invV (bal_loss function doesn't support dynamic invV)
2152
+ bal_loss_func = lambda b: _bal_loss_twosided(b, this_invV)
2153
+ bal_grad_func = None # Continuously updating mode doesn't use analytical gradient
2154
+ # GMM loss uses continuous updating (dynamic invV)
2155
+ gmm_loss_func = lambda b: _gmm_func_twosided(b, invV=None)["loss"]
2156
+ gmm_grad_func = None
2157
+ else:
2158
+ # Balance loss uses fixed invV
2159
+ bal_loss_func = lambda b: _bal_loss_onesided(b, this_invV)
2160
+ bal_grad_func = None
2161
+ # GMM loss uses continuous updating (dynamic invV)
2162
+ gmm_loss_func = lambda b: _gmm_func_onesided(b, invV=None)["loss"]
2163
+ gmm_grad_func = None
2164
+
2165
+ # Step 3: Choose final optimization based on method
2166
+ if score_only:
2167
+ # method="mle": Use MLE result only
2168
+ gmm_opt = mle_result
2169
+ beta_opt = beta_mle
2170
+ if verbose >= 1:
2171
+ print(f"[CBIV] Using method='mle', no additional optimization needed")
2172
+ elif bal_only:
2173
+ # method="exact": Optimize balance loss
2174
+ # Try two starting points and select the better result
2175
+
2176
+ if verbose >= 1:
2177
+ print(f"[CBIV] method='exact': Optimizing balance conditions...")
2178
+
2179
+ # Start from beta_init
2180
+ bal_init_result = scipy.optimize.minimize(
2181
+ bal_loss_func, beta_init, method="BFGS", jac=bal_grad_func,
2182
+ options={"maxiter": iterations, "gtol": 1e-8, "disp": verbose >= 2}
2183
+ )
2184
+ # Start from beta_mle
2185
+ bal_mle_result = scipy.optimize.minimize(
2186
+ bal_loss_func, beta_mle, method="BFGS", jac=bal_grad_func,
2187
+ options={"maxiter": iterations, "gtol": 1e-8, "disp": verbose >= 2}
2188
+ )
2189
+
2190
+ # Select better result from two starting points
2191
+ if bal_init_result.fun > bal_mle_result.fun:
2192
+ gmm_opt = bal_mle_result
2193
+ beta_opt = bal_mle_result.x
2194
+ if verbose >= 1:
2195
+ print(f"[CBIV] Selected MLE starting point: loss={bal_mle_result.fun:.6f}")
2196
+ else:
2197
+ gmm_opt = bal_init_result
2198
+ beta_opt = bal_init_result.x
2199
+ if verbose >= 1:
2200
+ print(f"[CBIV] Selected init starting point: loss={bal_init_result.fun:.6f}")
2201
+ else:
2202
+ # method="over": Optimize GMM loss
2203
+ # Try two starting points and select the better result
2204
+
2205
+ if verbose >= 1:
2206
+ print(f"[CBIV] method='over': Step 1 - Balance optimization...")
2207
+
2208
+ # Step 1: Compute beta_bal from two starting points
2209
+ # Start from beta_init
2210
+ bal_init_result = scipy.optimize.minimize(
2211
+ bal_loss_func, beta_init, method="BFGS", jac=bal_grad_func,
2212
+ options={"maxiter": iterations, "gtol": 1e-8, "disp": verbose >= 2}
2213
+ )
2214
+ # Start from beta_mle
2215
+ bal_mle_result = scipy.optimize.minimize(
2216
+ bal_loss_func, beta_mle, method="BFGS", jac=bal_grad_func,
2217
+ options={"maxiter": iterations, "gtol": 1e-8, "disp": verbose >= 2}
2218
+ )
2219
+
2220
+ # Select better result from two starting points
2221
+ if bal_init_result.fun > bal_mle_result.fun:
2222
+ beta_bal = bal_mle_result.x
2223
+ if verbose >= 1:
2224
+ print(f"[CBIV] Selected MLE start: loss={bal_mle_result.fun:.6f}")
2225
+ else:
2226
+ beta_bal = bal_init_result.x
2227
+ if verbose >= 1:
2228
+ print(f"[CBIV] Selected init start: loss={bal_init_result.fun:.6f}")
2229
+
2230
+ if verbose >= 1:
2231
+ print(f"[CBIV] method='over': Step 2 - GMM optimization...")
2232
+
2233
+ # Step 2: GMM optimization from two starting points
2234
+ # Start from beta_mle
2235
+ gmm_mle_result = scipy.optimize.minimize(
2236
+ gmm_loss_func, beta_mle, method="BFGS", jac=gmm_grad_func,
2237
+ options={"maxiter": iterations, "gtol": 1e-8, "disp": verbose >= 2}
2238
+ )
2239
+ # Start from beta_bal
2240
+ gmm_bal_result = scipy.optimize.minimize(
2241
+ gmm_loss_func, beta_bal, method="BFGS", jac=gmm_grad_func,
2242
+ options={"maxiter": iterations, "gtol": 1e-8, "disp": verbose >= 2}
2243
+ )
2244
+
2245
+ # Select better result from two starting points
2246
+ if gmm_mle_result.fun > gmm_bal_result.fun:
2247
+ gmm_opt = gmm_bal_result
2248
+ beta_opt = gmm_bal_result.x
2249
+ if verbose >= 1:
2250
+ print(f"[CBIV] Selected balance start: converged={gmm_bal_result.success}, loss={gmm_bal_result.fun:.6f}")
2251
+ else:
2252
+ gmm_opt = gmm_mle_result
2253
+ beta_opt = gmm_mle_result.x
2254
+ if verbose >= 1:
2255
+ print(f"[CBIV] Selected MLE start: converged={gmm_mle_result.success}, loss={gmm_mle_result.fun:.6f}")
2256
+
2257
+ # ========== Compute Final Statistics ==========
2258
+
2259
+ # Reshape beta to matrix
2260
+ if twosided:
2261
+ beta_opt_matrix = beta_opt.reshape(k, 2, order="F") # (k, 2)
2262
+ else:
2263
+ beta_opt_matrix = beta_opt.reshape(k, 1, order="F") # (k, 1)
2264
+
2265
+ # Compute J-statistic
2266
+ if twosided:
2267
+ J_opt = _gmm_func_twosided(beta_opt, this_invV)["loss"]
2268
+ bal_loss_opt = _bal_loss_twosided(beta_opt, this_invV)
2269
+ else:
2270
+ J_opt = _gmm_func_onesided(beta_opt, this_invV)["loss"]
2271
+ bal_loss_opt = _bal_loss_onesided(beta_opt, this_invV)
2272
+
2273
+ # ========== Coefficient Inverse Transform and Final Probability Computation ==========
2274
+
2275
+ if twosided:
2276
+ # Two-sided noncompliance
2277
+ # Step 1: Compute raw probabilities
2278
+ # Use numerically stable log-sum-exp trick
2279
+ logit_c_opt = X @ beta_opt_matrix[:, 0]
2280
+ logit_a_opt = X @ beta_opt_matrix[:, 1]
2281
+
2282
+ # log(1 + exp(logit_c) + exp(logit_a)) using log-sum-exp
2283
+ max_logit = np.maximum(np.maximum(logit_c_opt, logit_a_opt), 0.0)
2284
+ log_sum = max_logit + np.log(
2285
+ np.exp(-max_logit) + np.exp(logit_c_opt - max_logit) + np.exp(logit_a_opt - max_logit)
2286
+ )
2287
+
2288
+ # Probability = exp(logit) / (1 + exp(logit_c) + exp(logit_a))
2289
+ pi_c_opt_raw = np.exp(logit_c_opt - log_sum)
2290
+ pi_a_opt_raw = np.exp(logit_a_opt - log_sum)
2291
+ pi_n_opt_raw = np.exp(-log_sum)
2292
+
2293
+ # Step 2: Clip probabilities to avoid numerical issues
2294
+ pi_c_opt = np.clip(pi_c_opt_raw, probs_min, 1.0 - probs_min)
2295
+ pi_a_opt = np.clip(pi_a_opt_raw, probs_min, 1.0 - probs_min)
2296
+ pi_n_opt = np.clip(pi_n_opt_raw, probs_min, 1.0 - probs_min)
2297
+
2298
+ # Step 3: Renormalize to ensure probabilities sum to 1
2299
+ sums = pi_c_opt + pi_a_opt + pi_n_opt
2300
+ fitted_values = np.column_stack(
2301
+ [pi_c_opt / sums, pi_a_opt / sums, pi_n_opt / sums]
2302
+ )
2303
+
2304
+ # Enhanced: detect and warn about clipping of final fitted_values
2305
+ _check_and_warn_clipping(pi_c_opt_raw, fitted_values[:, 0], probs_min,
2306
+ "final complier probability (π_c)", warn_clipping, clipping_warn_threshold)
2307
+ _check_and_warn_clipping(pi_a_opt_raw, fitted_values[:, 1], probs_min,
2308
+ "final always-taker probability (π_a)", warn_clipping, clipping_warn_threshold)
2309
+ _check_and_warn_clipping(pi_n_opt_raw, fitted_values[:, 2], probs_min,
2310
+ "final never-taker probability (π_n)", warn_clipping, clipping_warn_threshold)
2311
+
2312
+ # Step 4: Coefficient inverse transform
2313
+ # Divide coefficients except intercept by standard deviation
2314
+ beta_opt_matrix[1:, :] = beta_opt_matrix[1:, :] / x_sd[:, None]
2315
+
2316
+ # Intercept adjustment
2317
+ if k > 2:
2318
+ # Matrix multiplication for intercept adjustment
2319
+ beta_opt_matrix[0, :] = beta_opt_matrix[0, :] - (
2320
+ x_mean @ beta_opt_matrix[1:, :]
2321
+ )
2322
+ else:
2323
+ # Scalar multiplication for intercept adjustment
2324
+ beta_opt_matrix[0, :] = (
2325
+ beta_opt_matrix[0, :] - x_mean * beta_opt_matrix[1, :]
2326
+ )
2327
+
2328
+ # Compute deviance (-2 * log-likelihood)
2329
+ # Note: No epsilon added inside log argument (fitted_values already clipped)
2330
+ deviance = -2.0 * np.sum(
2331
+ Z * Tr * np.log(fitted_values[:, 0] + fitted_values[:, 1])
2332
+ + Z * (1.0 - Tr) * np.log(fitted_values[:, 2])
2333
+ + (1.0 - Z) * Tr * np.log(fitted_values[:, 1])
2334
+ + (1.0 - Z) * (1.0 - Tr) * np.log(1.0 - fitted_values[:, 1])
2335
+ )
2336
+
2337
+ # Weights: 1/π_c
2338
+ weights = 1.0 / fitted_values[:, 0]
2339
+ else:
2340
+ # One-sided noncompliance
2341
+ # Step 1: Compute final probabilities
2342
+ # Use stable sigmoid function
2343
+ fitted_values_c_raw = scipy.special.expit(X @ beta_opt_matrix[:, 0])
2344
+ fitted_values_c = np.clip(fitted_values_c_raw, probs_min, 1.0 - probs_min)
2345
+
2346
+ # Return (n, 1) for one-sided
2347
+ # Note: Two-sided returns (n, 3) matrix, one-sided returns (n, 1) matrix
2348
+ # p_complier property handles both cases uniformly by extracting/flattening
2349
+ fitted_values = fitted_values_c[:, None] # shape (n, 1)
2350
+
2351
+ # Enhanced: detect and warn about clipping of final fitted_values
2352
+ _check_and_warn_clipping(fitted_values_c_raw, fitted_values, probs_min,
2353
+ "final complier probability (π_c)", warn_clipping, clipping_warn_threshold)
2354
+
2355
+ # Step 2: Compute deviance
2356
+ # Formula: -2*sum(Z*Tr*log(fitted.values) + Z*(1-Tr)*log(1-fitted.values))
2357
+ # fitted_values is (n,1), need to flatten to (n,)
2358
+ fitted_values_flat = fitted_values[:, 0]
2359
+ deviance = -2.0 * np.sum(
2360
+ Z * Tr * np.log(fitted_values_flat)
2361
+ + Z * (1.0 - Tr) * np.log(1.0 - fitted_values_flat)
2362
+ )
2363
+
2364
+ # Step 3: Coefficient inverse transform
2365
+ beta_opt_matrix[1:, 0] = beta_opt_matrix[1:, 0] / x_sd
2366
+ beta_opt_matrix[0, 0] = beta_opt_matrix[0, 0] - np.sum(
2367
+ x_mean * beta_opt_matrix[1:, 0]
2368
+ )
2369
+
2370
+ # One-sided returns (k,) vector, not (k, 1) matrix
2371
+ # Creates (k,1) matrix
2372
+ # No extraction needed, use beta.opt directly
2373
+ # coef() method converts to vector
2374
+ # Return (k,) vector for one-sided
2375
+ beta_opt_vector = beta_opt_matrix.ravel()
2376
+
2377
+ # Weights: 1/π_c
2378
+ # weights should be (n,) vector for consistency
2379
+ # weights are vectors in both implementations
2380
+ # Even if fitted_values is (n,1), weights should be (n,)
2381
+ weights = (1.0 / fitted_values).ravel()
2382
+
2383
+ # ========== Compute variance-covariance matrix ==========
2384
+ # GMM variance formula: Var(β) = (G' invV G)^{-1} / n
2385
+ # where G is the Jacobian of moment conditions
2386
+
2387
+ def _compute_vcov_matrix(beta_opt: np.ndarray, invV: np.ndarray) -> np.ndarray:
2388
+ """
2389
+ Compute variance-covariance matrix for GMM estimator
2390
+
2391
+ Parameters
2392
+ ----------
2393
+ beta_opt : np.ndarray
2394
+ Optimal parameter vector
2395
+ invV : np.ndarray
2396
+ Weight matrix (pseudoinverse of V)
2397
+
2398
+ Returns
2399
+ -------
2400
+ np.ndarray
2401
+ Variance-covariance matrix
2402
+
2403
+ Notes
2404
+ -----
2405
+ Use numerical differentiation to compute Jacobian G = ∂gbar/∂β
2406
+ Then apply GMM variance formula: Var(β) = (G' invV G)^{-1} / n
2407
+ """
2408
+ # Parameter dimension
2409
+ p = len(beta_opt)
2410
+
2411
+ # Compute Jacobian using numerical differentiation
2412
+ eps = 1e-7
2413
+
2414
+ # Compute moment conditions at current parameters
2415
+ if twosided:
2416
+ gbar_current = _gmm_func_twosided(beta_opt, invV)
2417
+ # Extract gbar (recompute to get gbar vector)
2418
+ probs_c, probs_a, probs_n = _compute_compliance_probs_twosided(beta_opt)
2419
+ s_ca = probs_c + probs_a
2420
+ s_cn = probs_c + probs_n
2421
+
2422
+ t1 = (Z * Tr / (1.0 - probs_n) + (1.0 - Z) * (1.0 - Tr) / (1.0 - probs_a) - 1.0) * probs_c
2423
+ t2 = (Z * Tr / (1.0 - probs_n) + (1.0 - Z) * Tr / probs_a - 1.0) * probs_a
2424
+ g1 = (X.T @ t1) / n
2425
+ g2 = (X.T @ t2) / n
2426
+
2427
+ w1 = Z * Tr / (pZ * s_ca) - 1.0
2428
+ w2 = (1.0 - Z) * Tr / ((1.0 - pZ) * probs_a) - 1.0
2429
+ w3 = Z * (1.0 - Tr) / (pZ * probs_n) - 1.0
2430
+ w4 = (1.0 - Z) * (1.0 - Tr) / ((1.0 - pZ) * s_cn) - 1.0
2431
+ W = np.column_stack([w1, w2, w3, w4])
2432
+ w_del = (X.T @ W) / n
2433
+
2434
+ gbar = np.concatenate([g1, g2, w_del.ravel(order="F")])
2435
+ else:
2436
+ gbar_current = _gmm_func_onesided(beta_opt, invV)
2437
+ # Extract gbar
2438
+ probs_c, probs_n = _compute_compliance_probs_onesided(beta_opt)
2439
+ g1 = (X.T @ (Tr * Z * (1.0 - probs_c) - Z * (1.0 - Tr) * probs_c)) / n
2440
+
2441
+ w1 = Z * Tr / (pZ * probs_c) - 1.0
2442
+ w2 = Z * (1.0 - Tr) / (pZ * probs_n) - 1.0
2443
+ W = np.column_stack([w1, w2])
2444
+ w_del = (X.T @ W) / n
2445
+
2446
+ gbar = np.concatenate([g1, w_del.ravel(order="F")])
2447
+
2448
+ # Moment condition dimension
2449
+ m = len(gbar)
2450
+
2451
+ # Initialize Jacobian matrix G (m × p)
2452
+ G = np.zeros((m, p))
2453
+
2454
+ # Compute numerical derivative for each parameter
2455
+ for j in range(p):
2456
+ beta_plus = beta_opt.copy()
2457
+ beta_plus[j] += eps
2458
+ beta_minus = beta_opt.copy()
2459
+ beta_minus[j] -= eps
2460
+
2461
+ # Compute moment conditions after perturbation
2462
+ if twosided:
2463
+ probs_c_p, probs_a_p, probs_n_p = _compute_compliance_probs_twosided(beta_plus)
2464
+ s_ca_p = probs_c_p + probs_a_p
2465
+ s_cn_p = probs_c_p + probs_n_p
2466
+
2467
+ t1_p = (Z * Tr / (1.0 - probs_n_p) + (1.0 - Z) * (1.0 - Tr) / (1.0 - probs_a_p) - 1.0) * probs_c_p
2468
+ t2_p = (Z * Tr / (1.0 - probs_n_p) + (1.0 - Z) * Tr / probs_a_p - 1.0) * probs_a_p
2469
+ g1_p = (X.T @ t1_p) / n
2470
+ g2_p = (X.T @ t2_p) / n
2471
+
2472
+ w1_p = Z * Tr / (pZ * s_ca_p) - 1.0
2473
+ w2_p = (1.0 - Z) * Tr / ((1.0 - pZ) * probs_a_p) - 1.0
2474
+ w3_p = Z * (1.0 - Tr) / (pZ * probs_n_p) - 1.0
2475
+ w4_p = (1.0 - Z) * (1.0 - Tr) / ((1.0 - pZ) * s_cn_p) - 1.0
2476
+ W_p = np.column_stack([w1_p, w2_p, w3_p, w4_p])
2477
+ w_del_p = (X.T @ W_p) / n
2478
+
2479
+ gbar_plus = np.concatenate([g1_p, g2_p, w_del_p.ravel(order="F")])
2480
+
2481
+ probs_c_m, probs_a_m, probs_n_m = _compute_compliance_probs_twosided(beta_minus)
2482
+ s_ca_m = probs_c_m + probs_a_m
2483
+ s_cn_m = probs_c_m + probs_n_m
2484
+
2485
+ t1_m = (Z * Tr / (1.0 - probs_n_m) + (1.0 - Z) * (1.0 - Tr) / (1.0 - probs_a_m) - 1.0) * probs_c_m
2486
+ t2_m = (Z * Tr / (1.0 - probs_n_m) + (1.0 - Z) * Tr / probs_a_m - 1.0) * probs_a_m
2487
+ g1_m = (X.T @ t1_m) / n
2488
+ g2_m = (X.T @ t2_m) / n
2489
+
2490
+ w1_m = Z * Tr / (pZ * s_ca_m) - 1.0
2491
+ w2_m = (1.0 - Z) * Tr / ((1.0 - pZ) * probs_a_m) - 1.0
2492
+ w3_m = Z * (1.0 - Tr) / (pZ * probs_n_m) - 1.0
2493
+ w4_m = (1.0 - Z) * (1.0 - Tr) / ((1.0 - pZ) * s_cn_m) - 1.0
2494
+ W_m = np.column_stack([w1_m, w2_m, w3_m, w4_m])
2495
+ w_del_m = (X.T @ W_m) / n
2496
+
2497
+ gbar_minus = np.concatenate([g1_m, g2_m, w_del_m.ravel(order="F")])
2498
+ else:
2499
+ probs_c_p, probs_n_p = _compute_compliance_probs_onesided(beta_plus)
2500
+ g1_p = (X.T @ (Tr * Z * (1.0 - probs_c_p) - Z * (1.0 - Tr) * probs_c_p)) / n
2501
+
2502
+ w1_p = Z * Tr / (pZ * probs_c_p) - 1.0
2503
+ w2_p = Z * (1.0 - Tr) / (pZ * probs_n_p) - 1.0
2504
+ W_p = np.column_stack([w1_p, w2_p])
2505
+ w_del_p = (X.T @ W_p) / n
2506
+
2507
+ gbar_plus = np.concatenate([g1_p, w_del_p.ravel(order="F")])
2508
+
2509
+ probs_c_m, probs_n_m = _compute_compliance_probs_onesided(beta_minus)
2510
+ g1_m = (X.T @ (Tr * Z * (1.0 - probs_c_m) - Z * (1.0 - Tr) * probs_c_m)) / n
2511
+
2512
+ w1_m = Z * Tr / (pZ * probs_c_m) - 1.0
2513
+ w2_m = Z * (1.0 - Tr) / (pZ * probs_n_m) - 1.0
2514
+ W_m = np.column_stack([w1_m, w2_m])
2515
+ w_del_m = (X.T @ W_m) / n
2516
+
2517
+ gbar_minus = np.concatenate([g1_m, w_del_m.ravel(order="F")])
2518
+
2519
+ # Numerical derivative: ∂gbar/∂β_j
2520
+ G[:, j] = (gbar_plus - gbar_minus) / (2 * eps)
2521
+
2522
+ # Compute Variance-covariance matrix: Var(β) = (G' invV G)^{-1} / n
2523
+ # Note: invV is already the pseudoinverse of V
2524
+ GtinvVG = G.T @ invV @ G
2525
+
2526
+ try:
2527
+ # Try using specialized inverse for symmetric positive definite matrix
2528
+ vcov_matrix = pinv_symmetric_psd(GtinvVG) / n
2529
+ except (np.linalg.LinAlgError, ValueError):
2530
+ # If failed, use general pseudoinverse
2531
+ vcov_matrix = np.linalg.pinv(GtinvVG) / n
2532
+
2533
+ # Ensure symmetry
2534
+ vcov_matrix = symmetrize(vcov_matrix)
2535
+
2536
+ return vcov_matrix
2537
+
2538
+ # Compute vcov matrix
2539
+ try:
2540
+ vcov_matrix = _compute_vcov_matrix(beta_opt, this_invV)
2541
+ except Exception as e:
2542
+ # If vcov computation fails, warn but do not interrupt
2543
+ warnings.warn(
2544
+ f"Failed to compute variance-covariance matrix: {e}. "
2545
+ f"vcov() method will not be available for this fit.",
2546
+ RuntimeWarning
2547
+ )
2548
+ vcov_matrix = None
2549
+
2550
+ # ========== Return value encapsulation ==========
2551
+ # Verbose final summary
2552
+ if verbose >= 1:
2553
+ print(f"\n[CBIV] Optimization complete:")
2554
+ print(f" - Method: {method}")
2555
+ print(f" - Two-sided: {twosided}")
2556
+ print(f" - Converged: {gmm_opt.success}")
2557
+ print(f" - J statistic: {J_opt:.6f}")
2558
+ print(f" - Balance loss: {bal_loss_opt:.6f}")
2559
+ if twosided:
2560
+ print(f" - Mean p_complier: {fitted_values[:, 0].mean():.4f}")
2561
+ print(f" - Mean p_always: {fitted_values[:, 1].mean():.4f}")
2562
+ print(f" - Mean p_never: {fitted_values[:, 2].mean():.4f}")
2563
+ else:
2564
+ print(f" - Mean p_complier: {fitted_values.mean():.4f}")
2565
+
2566
+ # Add method and two_sided fields
2567
+
2568
+ if twosided:
2569
+ result = CBIVResults(
2570
+ coefficients=beta_opt_matrix,
2571
+ fitted_values=fitted_values,
2572
+ weights=weights,
2573
+ deviance=float(deviance),
2574
+ converged=bool(gmm_opt.success),
2575
+ J=float(J_opt),
2576
+ df=k,
2577
+ bal=float(bal_loss_opt),
2578
+ method=method,
2579
+ two_sided=True,
2580
+ iterations=iterations,
2581
+ )
2582
+ # Add vcov matrix (if computed successfully)
2583
+ if vcov_matrix is not None:
2584
+ result._vcov_matrix = vcov_matrix
2585
+ return result
2586
+ else:
2587
+ result = CBIVResults(
2588
+ coefficients=beta_opt_vector, # Returns (k,) vector for one-sided
2589
+ fitted_values=fitted_values, # Returns (n, 1) matrix for one-sided
2590
+ weights=weights,
2591
+ deviance=float(deviance),
2592
+ converged=bool(gmm_opt.success),
2593
+ J=float(J_opt),
2594
+ df=k,
2595
+ bal=float(bal_loss_opt),
2596
+ method=method,
2597
+ two_sided=False,
2598
+ iterations=iterations,
2599
+ )
2600
+ # Add vcov matrix (if computed successfully)
2601
+ if vcov_matrix is not None:
2602
+ result._vcov_matrix = vcov_matrix
2603
+ return result