cbps 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cbps/__init__.py +3462 -0
- cbps/constants.py +46 -0
- cbps/core/__init__.py +93 -0
- cbps/core/cbps_binary.py +1943 -0
- cbps/core/cbps_continuous.py +945 -0
- cbps/core/cbps_multitreat.py +1123 -0
- cbps/core/cbps_optimal.py +507 -0
- cbps/core/results.py +1447 -0
- cbps/data/Blackwell.csv +571 -0
- cbps/data/LaLonde.csv +3213 -0
- cbps/data/npcbps_continuous_sim.csv +501 -0
- cbps/data/nsw.csv +723 -0
- cbps/data/nsw_dw.csv +446 -0
- cbps/data/political_ads_urban_niebler.csv +16266 -0
- cbps/data/psid_controls.csv +2491 -0
- cbps/data/psid_controls2.csv +254 -0
- cbps/data/psid_controls3.csv +129 -0
- cbps/data/simulation_dgp1_seed12345.csv +201 -0
- cbps/data/simulation_dgp2_seed12345.csv +201 -0
- cbps/data/simulation_dgp3_seed12345.csv +201 -0
- cbps/data/simulation_dgp4_seed12345.csv +201 -0
- cbps/datasets/__init__.py +78 -0
- cbps/datasets/blackwell.py +112 -0
- cbps/datasets/continuous.py +223 -0
- cbps/datasets/lalonde.py +272 -0
- cbps/datasets/npcbps_sim.py +101 -0
- cbps/diagnostics/__init__.py +101 -0
- cbps/diagnostics/balance.py +760 -0
- cbps/diagnostics/balance_cbmsm_addon.py +162 -0
- cbps/diagnostics/continuous_diagnostics.py +259 -0
- cbps/diagnostics/normality.py +173 -0
- cbps/diagnostics/ocbps_conditions.py +197 -0
- cbps/diagnostics/overlap.py +198 -0
- cbps/diagnostics/plots.py +1193 -0
- cbps/diagnostics/weights_diag.py +205 -0
- cbps/highdim/__init__.py +84 -0
- cbps/highdim/gmm_loss.py +340 -0
- cbps/highdim/hdcbps.py +1078 -0
- cbps/highdim/lasso_utils.py +498 -0
- cbps/highdim/weight_funcs.py +298 -0
- cbps/inference/__init__.py +42 -0
- cbps/inference/asyvar.py +621 -0
- cbps/inference/vcov_outcome.py +217 -0
- cbps/iv/__init__.py +48 -0
- cbps/iv/cbiv.py +2603 -0
- cbps/logging_config.py +45 -0
- cbps/msm/__init__.py +45 -0
- cbps/msm/cbmsm.py +1871 -0
- cbps/msm/rank_diagnostics.py +112 -0
- cbps/nonparametric/__init__.py +58 -0
- cbps/nonparametric/cholesky_whitening.py +232 -0
- cbps/nonparametric/empirical_likelihood.py +339 -0
- cbps/nonparametric/npcbps.py +1036 -0
- cbps/nonparametric/taylor_approx.py +207 -0
- cbps/py.typed +0 -0
- cbps/sklearn/__init__.py +42 -0
- cbps/sklearn/estimator.py +378 -0
- cbps/utils/__init__.py +82 -0
- cbps/utils/formula.py +415 -0
- cbps/utils/helpers.py +378 -0
- cbps/utils/numerics.py +438 -0
- cbps/utils/r_compat.py +109 -0
- cbps/utils/validation.py +224 -0
- cbps/utils/variance_transform.py +483 -0
- cbps/utils/weights.py +586 -0
- cbps-0.2.0.dist-info/METADATA +1090 -0
- cbps-0.2.0.dist-info/RECORD +70 -0
- cbps-0.2.0.dist-info/WHEEL +5 -0
- cbps-0.2.0.dist-info/licenses/LICENSE +661 -0
- cbps-0.2.0.dist-info/top_level.txt +1 -0
cbps/utils/validation.py
ADDED
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Input Validation Utilities
|
|
3
|
+
|
|
4
|
+
This module provides centralized input validation for CBPS estimators,
|
|
5
|
+
ensuring consistent error handling and user-friendly error messages
|
|
6
|
+
across all model classes.
|
|
7
|
+
|
|
8
|
+
The validation functions check for common issues such as:
|
|
9
|
+
|
|
10
|
+
- Empty or insufficient sample sizes
|
|
11
|
+
- Dimension mismatches between treatment and covariates
|
|
12
|
+
- Missing or infinite values
|
|
13
|
+
- Zero-variance treatment variables
|
|
14
|
+
- Improperly shaped covariate matrices
|
|
15
|
+
|
|
16
|
+
All validation errors include descriptive messages with the calling
|
|
17
|
+
module name, making it easy to identify the source of issues.
|
|
18
|
+
|
|
19
|
+
Functions
|
|
20
|
+
---------
|
|
21
|
+
validate_cbps_input
|
|
22
|
+
Comprehensive input validation for CBPS estimators.
|
|
23
|
+
|
|
24
|
+
References
|
|
25
|
+
----------
|
|
26
|
+
Imai, K. and Ratkovic, M. (2014). Covariate balancing propensity score.
|
|
27
|
+
Journal of the Royal Statistical Society, Series B 76(1), 243-263.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
import numpy as np
|
|
31
|
+
import warnings
|
|
32
|
+
from typing import Optional
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def ensure_dense(X):
|
|
36
|
+
"""Convert sparse matrix to dense if needed.
|
|
37
|
+
|
|
38
|
+
For standard CBPS (k < 1000), dense operations are faster.
|
|
39
|
+
Sparse support is primarily for hdCBPS preprocessing.
|
|
40
|
+
|
|
41
|
+
Parameters
|
|
42
|
+
----------
|
|
43
|
+
X : array-like or scipy.sparse matrix
|
|
44
|
+
Input covariate matrix.
|
|
45
|
+
|
|
46
|
+
Returns
|
|
47
|
+
-------
|
|
48
|
+
np.ndarray
|
|
49
|
+
Dense numpy array.
|
|
50
|
+
"""
|
|
51
|
+
if hasattr(X, 'toarray'): # scipy.sparse
|
|
52
|
+
if X.shape[1] > 1000:
|
|
53
|
+
warnings.warn(
|
|
54
|
+
f"Converting sparse matrix ({X.shape}) to dense. "
|
|
55
|
+
f"For k>{X.shape[1]}, consider using hdCBPS with built-in "
|
|
56
|
+
f"LASSO variable selection instead.",
|
|
57
|
+
UserWarning
|
|
58
|
+
)
|
|
59
|
+
return np.asarray(X.todense())
|
|
60
|
+
return np.asarray(X)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def validate_cbps_input(
|
|
64
|
+
treat: np.ndarray,
|
|
65
|
+
X: np.ndarray,
|
|
66
|
+
min_observations: int = 2,
|
|
67
|
+
module_name: str = "CBPS",
|
|
68
|
+
check_treatment_variance: bool = True
|
|
69
|
+
) -> None:
|
|
70
|
+
"""
|
|
71
|
+
Validate treatment and covariate arrays for CBPS estimation.
|
|
72
|
+
|
|
73
|
+
Performs comprehensive validation of input arrays before CBPS fitting,
|
|
74
|
+
providing informative error messages that identify the specific issue
|
|
75
|
+
and suggest remediation steps.
|
|
76
|
+
|
|
77
|
+
Parameters
|
|
78
|
+
----------
|
|
79
|
+
treat : np.ndarray
|
|
80
|
+
Treatment variable, shape (n,).
|
|
81
|
+
X : np.ndarray
|
|
82
|
+
Covariate matrix, shape (n, k).
|
|
83
|
+
min_observations : int, default=2
|
|
84
|
+
Minimum required sample size.
|
|
85
|
+
module_name : str, default="CBPS"
|
|
86
|
+
Name of calling module for error message prefixes.
|
|
87
|
+
check_treatment_variance : bool, default=True
|
|
88
|
+
If True, verify treatment has non-zero variance.
|
|
89
|
+
Set to False for binary treatments where variance check
|
|
90
|
+
is handled separately.
|
|
91
|
+
|
|
92
|
+
Raises
|
|
93
|
+
------
|
|
94
|
+
ValueError
|
|
95
|
+
If any validation check fails. The error message includes:
|
|
96
|
+
|
|
97
|
+
- The module name prefix for easy identification
|
|
98
|
+
- A description of the specific issue
|
|
99
|
+
- The actual values that caused the error
|
|
100
|
+
- Suggested remediation steps
|
|
101
|
+
|
|
102
|
+
Notes
|
|
103
|
+
-----
|
|
104
|
+
The following checks are performed in order:
|
|
105
|
+
|
|
106
|
+
1. Treatment array is non-empty
|
|
107
|
+
2. Covariate matrix is 2-dimensional
|
|
108
|
+
3. Sample size >= min_observations
|
|
109
|
+
4. Treatment and covariate row counts match
|
|
110
|
+
5. Covariate matrix has >= 1 column
|
|
111
|
+
6. No NaN or Inf values in treatment
|
|
112
|
+
7. No NaN or Inf values in covariates
|
|
113
|
+
8. Treatment has non-zero variance (if check_treatment_variance=True)
|
|
114
|
+
|
|
115
|
+
Examples
|
|
116
|
+
--------
|
|
117
|
+
>>> import numpy as np
|
|
118
|
+
>>> from cbps.utils.validation import validate_cbps_input
|
|
119
|
+
>>>
|
|
120
|
+
>>> # Valid input passes silently
|
|
121
|
+
>>> treat = np.array([0, 1, 0, 1])
|
|
122
|
+
>>> X = np.array([[1, 2], [1, 3], [1, 4], [1, 5]])
|
|
123
|
+
>>> validate_cbps_input(treat, X)
|
|
124
|
+
>>>
|
|
125
|
+
>>> # Dimension mismatch raises informative error
|
|
126
|
+
>>> try:
|
|
127
|
+
... validate_cbps_input(np.array([0, 1]), X)
|
|
128
|
+
... except ValueError as e:
|
|
129
|
+
... print("Validation failed")
|
|
130
|
+
Validation failed
|
|
131
|
+
"""
|
|
132
|
+
# Check 1: Empty array (highest priority to avoid len() errors)
|
|
133
|
+
if not isinstance(treat, np.ndarray):
|
|
134
|
+
treat = np.asarray(treat)
|
|
135
|
+
if not isinstance(X, np.ndarray):
|
|
136
|
+
X = np.asarray(X)
|
|
137
|
+
|
|
138
|
+
n_treat = len(treat)
|
|
139
|
+
|
|
140
|
+
if n_treat == 0:
|
|
141
|
+
raise ValueError(
|
|
142
|
+
f"{module_name}: Treatment array is empty (n=0). "
|
|
143
|
+
f"At least {min_observations} observation(s) are required to estimate "
|
|
144
|
+
f"the propensity score."
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
# Check 2: Covariate matrix dimensions (check before accessing shape[0])
|
|
148
|
+
if X.ndim == 0:
|
|
149
|
+
raise ValueError(
|
|
150
|
+
f"{module_name}: Covariate input is a scalar. "
|
|
151
|
+
f"Expected a 2-dimensional array with shape (n_observations, n_covariates)."
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
if X.ndim == 1:
|
|
155
|
+
raise ValueError(
|
|
156
|
+
f"{module_name}: Covariate matrix X is 1-dimensional with shape {X.shape}. "
|
|
157
|
+
f"Expected a 2-dimensional array with shape (n_observations, n_covariates). "
|
|
158
|
+
f"If you have a single covariate, use X.reshape(-1, 1)."
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
if X.ndim > 2:
|
|
162
|
+
raise ValueError(
|
|
163
|
+
f"{module_name}: Covariate matrix X has {X.ndim} dimensions with shape {X.shape}. "
|
|
164
|
+
f"Expected a 2-dimensional array with shape (n_observations, n_covariates)."
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
# Now safe to access X.shape[0]
|
|
168
|
+
n_X = X.shape[0]
|
|
169
|
+
|
|
170
|
+
# Check 3: Insufficient sample size
|
|
171
|
+
if n_treat < min_observations:
|
|
172
|
+
raise ValueError(
|
|
173
|
+
f"{module_name}: Treatment array has only {n_treat} observation(s). "
|
|
174
|
+
f"At least {min_observations} observations are required to estimate "
|
|
175
|
+
f"the propensity score and its variance."
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
# Check 4: Sample size mismatch
|
|
179
|
+
if n_X != n_treat:
|
|
180
|
+
raise ValueError(
|
|
181
|
+
f"{module_name}: Sample size mismatch between treatment and covariates. "
|
|
182
|
+
f"Treatment has {n_treat} observations, but covariates have {n_X} rows. "
|
|
183
|
+
f"Both arrays must have the same number of observations."
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
# Check 5: Covariate column count
|
|
187
|
+
if X.shape[1] == 0:
|
|
188
|
+
raise ValueError(
|
|
189
|
+
f"{module_name}: Covariate matrix has 0 columns. "
|
|
190
|
+
f"At least 1 column (e.g., intercept) is required."
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
# Check 6: NaN/Inf in treatment variable
|
|
194
|
+
if np.any(~np.isfinite(treat)):
|
|
195
|
+
n_nan = np.sum(np.isnan(treat))
|
|
196
|
+
n_inf = np.sum(np.isinf(treat))
|
|
197
|
+
raise ValueError(
|
|
198
|
+
f"{module_name}: Treatment contains {n_nan} NaN value(s) and {n_inf} Inf value(s). "
|
|
199
|
+
f"Please remove or impute missing/infinite values before calling CBPS. "
|
|
200
|
+
f"Consider using data.dropna() or df[df.isfinite().all(axis=1)]."
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
# Check 7: NaN/Inf in covariates
|
|
204
|
+
if np.any(~np.isfinite(X)):
|
|
205
|
+
n_nan = np.sum(np.isnan(X))
|
|
206
|
+
n_inf = np.sum(np.isinf(X))
|
|
207
|
+
raise ValueError(
|
|
208
|
+
f"{module_name}: Covariates contain {n_nan} NaN value(s) and {n_inf} Inf value(s). "
|
|
209
|
+
f"Please remove or impute missing/infinite values before calling CBPS. "
|
|
210
|
+
f"Consider using data.dropna() or df[df.isfinite().all(axis=1)]."
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
# Check 8: Treatment variance (only for continuous treatments)
|
|
214
|
+
if check_treatment_variance:
|
|
215
|
+
treat_std = np.std(treat, ddof=1)
|
|
216
|
+
if treat_std == 0 or not np.isfinite(treat_std):
|
|
217
|
+
treat_unique = np.unique(treat)
|
|
218
|
+
raise ValueError(
|
|
219
|
+
f"{module_name}: Treatment variable has zero variance (all values are identical). "
|
|
220
|
+
f"Found only 1 unique value: {treat_unique}. "
|
|
221
|
+
f"CBPS requires variation in the treatment to estimate propensity scores. "
|
|
222
|
+
f"Please check your data or consider using a different treatment definition."
|
|
223
|
+
)
|
|
224
|
+
|
|
@@ -0,0 +1,483 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Variance Matrix Transformation Utilities
|
|
3
|
+
|
|
4
|
+
This module provides functions to transform variance-covariance matrices
|
|
5
|
+
from SVD-orthogonalized parameter space back to the original covariate space.
|
|
6
|
+
This transformation is essential for valid statistical inference after SVD
|
|
7
|
+
preprocessing of the design matrix.
|
|
8
|
+
|
|
9
|
+
When SVD is applied to the design matrix X for numerical stability, the
|
|
10
|
+
estimated coefficients live in a transformed space. To obtain standard errors
|
|
11
|
+
in the original covariate space, the variance matrix must be back-transformed
|
|
12
|
+
using the inverse of the SVD transformation.
|
|
13
|
+
|
|
14
|
+
The transformation formula for a variance matrix V in SVD space is:
|
|
15
|
+
|
|
16
|
+
V_orig = D_x^{-1} @ (X'X)^{-1} @ X' @ X_svd @ V_d^{-1} @ V_svd @ V_d^{-1} @
|
|
17
|
+
V_svd' @ X_svd' @ X @ (X'X)^{-1} @ D_x^{-1}
|
|
18
|
+
|
|
19
|
+
where D_x is the standardization matrix and V_d contains inverse singular values.
|
|
20
|
+
|
|
21
|
+
Functions
|
|
22
|
+
---------
|
|
23
|
+
transform_variance_binary
|
|
24
|
+
Transform variance for binary treatment models.
|
|
25
|
+
transform_variance_3treat
|
|
26
|
+
Transform variance for 3-level treatment models.
|
|
27
|
+
transform_variance_4treat
|
|
28
|
+
Transform variance for 4-level treatment models.
|
|
29
|
+
transform_variance_continuous
|
|
30
|
+
Transform variance for continuous treatment models.
|
|
31
|
+
apply_variance_svd_inverse_transform
|
|
32
|
+
Dispatch function selecting appropriate transform based on treatment type.
|
|
33
|
+
|
|
34
|
+
References
|
|
35
|
+
----------
|
|
36
|
+
Imai, K. and Ratkovic, M. (2014). Covariate balancing propensity score.
|
|
37
|
+
Journal of the Royal Statistical Society, Series B 76(1), 243-263.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
import numpy as np
|
|
41
|
+
from typing import Dict, Any, Optional
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _r_ginv(A: np.ndarray, tol: Optional[float] = None) -> np.ndarray:
|
|
45
|
+
"""
|
|
46
|
+
Compute Moore-Penrose generalized inverse via SVD.
|
|
47
|
+
|
|
48
|
+
Internal function used for variance matrix transformations.
|
|
49
|
+
Singular values below the tolerance threshold are set to zero
|
|
50
|
+
in the inversion to ensure numerical stability.
|
|
51
|
+
|
|
52
|
+
Parameters
|
|
53
|
+
----------
|
|
54
|
+
A : np.ndarray
|
|
55
|
+
Input matrix, shape (m, n).
|
|
56
|
+
tol : float, optional
|
|
57
|
+
Relative singular value truncation threshold.
|
|
58
|
+
If None, defaults to sqrt(machine_epsilon).
|
|
59
|
+
|
|
60
|
+
Returns
|
|
61
|
+
-------
|
|
62
|
+
np.ndarray
|
|
63
|
+
Generalized inverse, shape (n, m).
|
|
64
|
+
"""
|
|
65
|
+
if tol is None:
|
|
66
|
+
tol = np.sqrt(np.finfo(float).eps) # R: sqrt(.Machine$double.eps)
|
|
67
|
+
|
|
68
|
+
U, s, Vt = np.linalg.svd(A, full_matrices=False)
|
|
69
|
+
if len(s) == 0:
|
|
70
|
+
return np.zeros((A.shape[1], A.shape[0]))
|
|
71
|
+
|
|
72
|
+
threshold = max(tol * s[0], 0.0)
|
|
73
|
+
positive = s > threshold
|
|
74
|
+
|
|
75
|
+
if np.all(positive):
|
|
76
|
+
s_inv = 1.0 / s
|
|
77
|
+
return Vt.T @ np.diag(s_inv) @ U.T
|
|
78
|
+
if not np.any(positive):
|
|
79
|
+
return np.zeros((A.shape[1], A.shape[0]))
|
|
80
|
+
|
|
81
|
+
s_pos = s[positive]
|
|
82
|
+
U_pos = U[:, positive]
|
|
83
|
+
V_pos = Vt.T[:, positive]
|
|
84
|
+
return V_pos @ np.diag(1.0 / s_pos) @ U_pos.T
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def transform_variance_binary(
|
|
88
|
+
variance_svd: np.ndarray,
|
|
89
|
+
Dx_inv: np.ndarray,
|
|
90
|
+
X_orig: np.ndarray,
|
|
91
|
+
X_svd: np.ndarray,
|
|
92
|
+
V: np.ndarray,
|
|
93
|
+
d_inv: np.ndarray
|
|
94
|
+
) -> np.ndarray:
|
|
95
|
+
"""
|
|
96
|
+
Transform variance matrix from SVD space for binary treatment models.
|
|
97
|
+
|
|
98
|
+
Applies the inverse SVD transformation to convert the variance-covariance
|
|
99
|
+
matrix from the orthogonalized parameter space back to the original
|
|
100
|
+
covariate space, enabling proper inference on the original coefficients.
|
|
101
|
+
|
|
102
|
+
Parameters
|
|
103
|
+
----------
|
|
104
|
+
variance_svd : np.ndarray
|
|
105
|
+
Variance matrix in SVD space, shape (k, k).
|
|
106
|
+
Dx_inv : np.ndarray
|
|
107
|
+
Inverse standardization diagonal matrix, shape (k, k).
|
|
108
|
+
Contains [1, sd(x_1), sd(x_2), ...] on the diagonal.
|
|
109
|
+
X_orig : np.ndarray
|
|
110
|
+
Original design matrix with intercept, shape (n, k).
|
|
111
|
+
X_svd : np.ndarray
|
|
112
|
+
SVD-transformed design matrix, shape (n, k).
|
|
113
|
+
V : np.ndarray
|
|
114
|
+
Right singular vectors from X's SVD, shape (k, k).
|
|
115
|
+
d_inv : np.ndarray
|
|
116
|
+
Inverse singular values with small values zeroed, shape (k,).
|
|
117
|
+
|
|
118
|
+
Returns
|
|
119
|
+
-------
|
|
120
|
+
np.ndarray
|
|
121
|
+
Variance matrix in original covariate space, shape (k, k).
|
|
122
|
+
|
|
123
|
+
See Also
|
|
124
|
+
--------
|
|
125
|
+
apply_variance_svd_inverse_transform : High-level dispatch function.
|
|
126
|
+
"""
|
|
127
|
+
k = variance_svd.shape[0]
|
|
128
|
+
|
|
129
|
+
# Compute generalized inverse of X_orig' @ X_orig
|
|
130
|
+
XorigT_Xorig_inv = _r_ginv(X_orig.T @ X_orig)
|
|
131
|
+
|
|
132
|
+
# Diagonal matrix of inverse singular values
|
|
133
|
+
D_inv = np.diag(d_inv)
|
|
134
|
+
|
|
135
|
+
# Complete transformation formula
|
|
136
|
+
# Dx.inv @ ginv(X'X) @ X' @ Xsvd @ V @ D^-1 @ Var @ D^-1 @ V' @ Xsvd' @ X @ ginv(X'X) @ Dx.inv
|
|
137
|
+
var_transformed = (
|
|
138
|
+
Dx_inv @
|
|
139
|
+
XorigT_Xorig_inv @
|
|
140
|
+
X_orig.T @
|
|
141
|
+
X_svd @
|
|
142
|
+
V @
|
|
143
|
+
D_inv @
|
|
144
|
+
variance_svd @
|
|
145
|
+
D_inv @
|
|
146
|
+
V.T @
|
|
147
|
+
X_svd.T @
|
|
148
|
+
X_orig @
|
|
149
|
+
XorigT_Xorig_inv @
|
|
150
|
+
Dx_inv
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
return var_transformed
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def transform_variance_3treat(
|
|
157
|
+
variance_svd: np.ndarray,
|
|
158
|
+
Dx_inv: np.ndarray,
|
|
159
|
+
X_orig: np.ndarray,
|
|
160
|
+
X_svd: np.ndarray,
|
|
161
|
+
V: np.ndarray,
|
|
162
|
+
d_inv: np.ndarray
|
|
163
|
+
) -> np.ndarray:
|
|
164
|
+
"""
|
|
165
|
+
Transform variance matrix from SVD space for 3-level treatment models.
|
|
166
|
+
|
|
167
|
+
For multinomial treatment with 3 levels, the variance matrix has a
|
|
168
|
+
2x2 block structure corresponding to the (J-1) = 2 treatment contrasts.
|
|
169
|
+
Each k x k block is transformed independently using the same formula
|
|
170
|
+
as binary treatment.
|
|
171
|
+
|
|
172
|
+
Parameters
|
|
173
|
+
----------
|
|
174
|
+
variance_svd : np.ndarray
|
|
175
|
+
Variance matrix in SVD space, shape (2k, 2k).
|
|
176
|
+
Dx_inv : np.ndarray
|
|
177
|
+
Inverse standardization diagonal matrix, shape (k, k).
|
|
178
|
+
X_orig : np.ndarray
|
|
179
|
+
Original design matrix with intercept, shape (n, k).
|
|
180
|
+
X_svd : np.ndarray
|
|
181
|
+
SVD-transformed design matrix, shape (n, k).
|
|
182
|
+
V : np.ndarray
|
|
183
|
+
Right singular vectors from X's SVD, shape (k, k).
|
|
184
|
+
d_inv : np.ndarray
|
|
185
|
+
Inverse singular values with small values zeroed, shape (k,).
|
|
186
|
+
|
|
187
|
+
Returns
|
|
188
|
+
-------
|
|
189
|
+
np.ndarray
|
|
190
|
+
Variance matrix in original covariate space, shape (2k, 2k).
|
|
191
|
+
|
|
192
|
+
See Also
|
|
193
|
+
--------
|
|
194
|
+
transform_variance_binary : Single-block transformation formula.
|
|
195
|
+
"""
|
|
196
|
+
k = X_orig.shape[1]
|
|
197
|
+
|
|
198
|
+
# Decompose into 4 blocks
|
|
199
|
+
var_1_1 = variance_svd[0:k, 0:k]
|
|
200
|
+
var_1_2 = variance_svd[0:k, k:2*k]
|
|
201
|
+
var_2_1 = variance_svd[k:2*k, 0:k]
|
|
202
|
+
var_2_2 = variance_svd[k:2*k, k:2*k]
|
|
203
|
+
|
|
204
|
+
# Compute common transformation matrices
|
|
205
|
+
XorigT_Xorig_inv = _r_ginv(X_orig.T @ X_orig)
|
|
206
|
+
D_inv = np.diag(d_inv)
|
|
207
|
+
|
|
208
|
+
# Transform each block independently
|
|
209
|
+
trans_var_1_1 = (
|
|
210
|
+
Dx_inv @ XorigT_Xorig_inv @ X_orig.T @ X_svd @ V @ D_inv @
|
|
211
|
+
var_1_1 @ D_inv @ V.T @ X_svd.T @ X_orig @ XorigT_Xorig_inv @ Dx_inv
|
|
212
|
+
)
|
|
213
|
+
trans_var_1_2 = (
|
|
214
|
+
Dx_inv @ XorigT_Xorig_inv @ X_orig.T @ X_svd @ V @ D_inv @
|
|
215
|
+
var_1_2 @ D_inv @ V.T @ X_svd.T @ X_orig @ XorigT_Xorig_inv @ Dx_inv
|
|
216
|
+
)
|
|
217
|
+
trans_var_2_1 = (
|
|
218
|
+
Dx_inv @ XorigT_Xorig_inv @ X_orig.T @ X_svd @ V @ D_inv @
|
|
219
|
+
var_2_1 @ D_inv @ V.T @ X_svd.T @ X_orig @ XorigT_Xorig_inv @ Dx_inv
|
|
220
|
+
)
|
|
221
|
+
trans_var_2_2 = (
|
|
222
|
+
Dx_inv @ XorigT_Xorig_inv @ X_orig.T @ X_svd @ V @ D_inv @
|
|
223
|
+
var_2_2 @ D_inv @ V.T @ X_svd.T @ X_orig @ XorigT_Xorig_inv @ Dx_inv
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
# Reassemble full variance matrix
|
|
227
|
+
var_transformed = np.block([
|
|
228
|
+
[trans_var_1_1, trans_var_1_2],
|
|
229
|
+
[trans_var_2_1, trans_var_2_2]
|
|
230
|
+
])
|
|
231
|
+
|
|
232
|
+
return var_transformed
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def transform_variance_4treat(
|
|
236
|
+
variance_svd: np.ndarray,
|
|
237
|
+
Dx_inv: np.ndarray,
|
|
238
|
+
X_orig: np.ndarray,
|
|
239
|
+
X_svd: np.ndarray,
|
|
240
|
+
V: np.ndarray,
|
|
241
|
+
d_inv: np.ndarray
|
|
242
|
+
) -> np.ndarray:
|
|
243
|
+
"""
|
|
244
|
+
Transform variance matrix from SVD space for 4-level treatment models.
|
|
245
|
+
|
|
246
|
+
For multinomial treatment with 4 levels, the variance matrix has a
|
|
247
|
+
3x3 block structure corresponding to the (J-1) = 3 treatment contrasts.
|
|
248
|
+
Each k x k block is transformed independently using the same formula
|
|
249
|
+
as binary treatment.
|
|
250
|
+
|
|
251
|
+
Parameters
|
|
252
|
+
----------
|
|
253
|
+
variance_svd : np.ndarray
|
|
254
|
+
Variance matrix in SVD space, shape (3k, 3k).
|
|
255
|
+
Dx_inv : np.ndarray
|
|
256
|
+
Inverse standardization diagonal matrix, shape (k, k).
|
|
257
|
+
X_orig : np.ndarray
|
|
258
|
+
Original design matrix with intercept, shape (n, k).
|
|
259
|
+
X_svd : np.ndarray
|
|
260
|
+
SVD-transformed design matrix, shape (n, k).
|
|
261
|
+
V : np.ndarray
|
|
262
|
+
Right singular vectors from X's SVD, shape (k, k).
|
|
263
|
+
d_inv : np.ndarray
|
|
264
|
+
Inverse singular values with small values zeroed, shape (k,).
|
|
265
|
+
|
|
266
|
+
Returns
|
|
267
|
+
-------
|
|
268
|
+
np.ndarray
|
|
269
|
+
Variance matrix in original covariate space, shape (3k, 3k).
|
|
270
|
+
|
|
271
|
+
See Also
|
|
272
|
+
--------
|
|
273
|
+
transform_variance_binary : Single-block transformation formula.
|
|
274
|
+
"""
|
|
275
|
+
k = X_orig.shape[1]
|
|
276
|
+
|
|
277
|
+
# Decompose into 9 blocks
|
|
278
|
+
var_1_1 = variance_svd[0:k, 0:k]
|
|
279
|
+
var_1_2 = variance_svd[0:k, k:2*k]
|
|
280
|
+
var_1_3 = variance_svd[0:k, 2*k:3*k]
|
|
281
|
+
var_2_1 = variance_svd[k:2*k, 0:k]
|
|
282
|
+
var_2_2 = variance_svd[k:2*k, k:2*k]
|
|
283
|
+
var_2_3 = variance_svd[k:2*k, 2*k:3*k]
|
|
284
|
+
var_3_1 = variance_svd[2*k:3*k, 0:k]
|
|
285
|
+
var_3_2 = variance_svd[2*k:3*k, k:2*k]
|
|
286
|
+
var_3_3 = variance_svd[2*k:3*k, 2*k:3*k]
|
|
287
|
+
|
|
288
|
+
# Compute common transformation matrices
|
|
289
|
+
XorigT_Xorig_inv = _r_ginv(X_orig.T @ X_orig)
|
|
290
|
+
D_inv = np.diag(d_inv)
|
|
291
|
+
|
|
292
|
+
# Transform each block independently
|
|
293
|
+
trans_var_1_1 = (
|
|
294
|
+
Dx_inv @ XorigT_Xorig_inv @ X_orig.T @ X_svd @ V @ D_inv @
|
|
295
|
+
var_1_1 @ D_inv @ V.T @ X_svd.T @ X_orig @ XorigT_Xorig_inv @ Dx_inv
|
|
296
|
+
)
|
|
297
|
+
trans_var_1_2 = (
|
|
298
|
+
Dx_inv @ XorigT_Xorig_inv @ X_orig.T @ X_svd @ V @ D_inv @
|
|
299
|
+
var_1_2 @ D_inv @ V.T @ X_svd.T @ X_orig @ XorigT_Xorig_inv @ Dx_inv
|
|
300
|
+
)
|
|
301
|
+
trans_var_1_3 = (
|
|
302
|
+
Dx_inv @ XorigT_Xorig_inv @ X_orig.T @ X_svd @ V @ D_inv @
|
|
303
|
+
var_1_3 @ D_inv @ V.T @ X_svd.T @ X_orig @ XorigT_Xorig_inv @ Dx_inv
|
|
304
|
+
)
|
|
305
|
+
trans_var_2_1 = (
|
|
306
|
+
Dx_inv @ XorigT_Xorig_inv @ X_orig.T @ X_svd @ V @ D_inv @
|
|
307
|
+
var_2_1 @ D_inv @ V.T @ X_svd.T @ X_orig @ XorigT_Xorig_inv @ Dx_inv
|
|
308
|
+
)
|
|
309
|
+
trans_var_2_2 = (
|
|
310
|
+
Dx_inv @ XorigT_Xorig_inv @ X_orig.T @ X_svd @ V @ D_inv @
|
|
311
|
+
var_2_2 @ D_inv @ V.T @ X_svd.T @ X_orig @ XorigT_Xorig_inv @ Dx_inv
|
|
312
|
+
)
|
|
313
|
+
trans_var_2_3 = (
|
|
314
|
+
Dx_inv @ XorigT_Xorig_inv @ X_orig.T @ X_svd @ V @ D_inv @
|
|
315
|
+
var_2_3 @ D_inv @ V.T @ X_svd.T @ X_orig @ XorigT_Xorig_inv @ Dx_inv
|
|
316
|
+
)
|
|
317
|
+
trans_var_3_1 = (
|
|
318
|
+
Dx_inv @ XorigT_Xorig_inv @ X_orig.T @ X_svd @ V @ D_inv @
|
|
319
|
+
var_3_1 @ D_inv @ V.T @ X_svd.T @ X_orig @ XorigT_Xorig_inv @ Dx_inv
|
|
320
|
+
)
|
|
321
|
+
trans_var_3_2 = (
|
|
322
|
+
Dx_inv @ XorigT_Xorig_inv @ X_orig.T @ X_svd @ V @ D_inv @
|
|
323
|
+
var_3_2 @ D_inv @ V.T @ X_svd.T @ X_orig @ XorigT_Xorig_inv @ Dx_inv
|
|
324
|
+
)
|
|
325
|
+
trans_var_3_3 = (
|
|
326
|
+
Dx_inv @ XorigT_Xorig_inv @ X_orig.T @ X_svd @ V @ D_inv @
|
|
327
|
+
var_3_3 @ D_inv @ V.T @ X_svd.T @ X_orig @ XorigT_Xorig_inv @ Dx_inv
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
# Reassemble full variance matrix
|
|
331
|
+
var_transformed = np.block([
|
|
332
|
+
[trans_var_1_1, trans_var_1_2, trans_var_1_3],
|
|
333
|
+
[trans_var_2_1, trans_var_2_2, trans_var_2_3],
|
|
334
|
+
[trans_var_3_1, trans_var_3_2, trans_var_3_3]
|
|
335
|
+
])
|
|
336
|
+
|
|
337
|
+
return var_transformed
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def transform_variance_continuous(
|
|
341
|
+
variance_svd: np.ndarray,
|
|
342
|
+
Dx_inv: np.ndarray,
|
|
343
|
+
X_orig: np.ndarray,
|
|
344
|
+
X_svd: np.ndarray,
|
|
345
|
+
V: np.ndarray,
|
|
346
|
+
d_inv: np.ndarray
|
|
347
|
+
) -> np.ndarray:
|
|
348
|
+
"""
|
|
349
|
+
Transform variance matrix from SVD space for continuous treatment models.
|
|
350
|
+
|
|
351
|
+
For continuous treatments, the coefficient vector has the same dimension
|
|
352
|
+
as the binary case (k parameters), so the same transformation formula
|
|
353
|
+
applies directly.
|
|
354
|
+
|
|
355
|
+
Parameters
|
|
356
|
+
----------
|
|
357
|
+
variance_svd : np.ndarray
|
|
358
|
+
Variance matrix in SVD space, shape (k, k).
|
|
359
|
+
Dx_inv : np.ndarray
|
|
360
|
+
Inverse standardization diagonal matrix, shape (k, k).
|
|
361
|
+
X_orig : np.ndarray
|
|
362
|
+
Original design matrix with intercept, shape (n, k).
|
|
363
|
+
X_svd : np.ndarray
|
|
364
|
+
SVD-transformed design matrix, shape (n, k).
|
|
365
|
+
V : np.ndarray
|
|
366
|
+
Right singular vectors from X's SVD, shape (k, k).
|
|
367
|
+
d_inv : np.ndarray
|
|
368
|
+
Inverse singular values with small values zeroed, shape (k,).
|
|
369
|
+
|
|
370
|
+
Returns
|
|
371
|
+
-------
|
|
372
|
+
np.ndarray
|
|
373
|
+
Variance matrix in original covariate space, shape (k, k).
|
|
374
|
+
|
|
375
|
+
See Also
|
|
376
|
+
--------
|
|
377
|
+
transform_variance_binary : Underlying transformation implementation.
|
|
378
|
+
"""
|
|
379
|
+
# Continuous treatment uses same inverse transform as binary
|
|
380
|
+
return transform_variance_binary(
|
|
381
|
+
variance_svd, Dx_inv, X_orig, X_svd, V, d_inv
|
|
382
|
+
)
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
def apply_variance_svd_inverse_transform(
|
|
386
|
+
variance_svd: np.ndarray,
|
|
387
|
+
svd_info: Dict[str, Any],
|
|
388
|
+
X_orig: np.ndarray,
|
|
389
|
+
X_svd: np.ndarray,
|
|
390
|
+
is_factor: bool,
|
|
391
|
+
no_treats: int
|
|
392
|
+
) -> np.ndarray:
|
|
393
|
+
"""
|
|
394
|
+
Apply SVD inverse transformation to variance matrix.
|
|
395
|
+
|
|
396
|
+
High-level dispatch function that selects the appropriate inverse
|
|
397
|
+
transformation method based on the treatment type and applies it
|
|
398
|
+
to convert the variance matrix from SVD space to original space.
|
|
399
|
+
|
|
400
|
+
Parameters
|
|
401
|
+
----------
|
|
402
|
+
variance_svd : np.ndarray
|
|
403
|
+
Variance matrix in SVD space.
|
|
404
|
+
Shape depends on treatment type: (k, k) for binary/continuous,
|
|
405
|
+
(2k, 2k) for 3-level, (3k, 3k) for 4-level.
|
|
406
|
+
svd_info : dict
|
|
407
|
+
SVD preprocessing information containing:
|
|
408
|
+
|
|
409
|
+
- ``'d'`` : Singular values from SVD, shape (k,)
|
|
410
|
+
- ``'V'`` : Right singular vectors, shape (k, k)
|
|
411
|
+
- ``'x_mean'`` : Column means of original X (excluding intercept)
|
|
412
|
+
- ``'x_sd'`` : Column standard deviations of original X
|
|
413
|
+
|
|
414
|
+
X_orig : np.ndarray
|
|
415
|
+
Original design matrix with intercept, shape (n, k).
|
|
416
|
+
X_svd : np.ndarray
|
|
417
|
+
SVD-transformed design matrix, shape (n, k).
|
|
418
|
+
is_factor : bool
|
|
419
|
+
True for discrete (factor) treatment, False for continuous.
|
|
420
|
+
no_treats : int
|
|
421
|
+
Number of treatment levels.
|
|
422
|
+
2, 3, or 4 for discrete treatments; ignored for continuous.
|
|
423
|
+
|
|
424
|
+
Returns
|
|
425
|
+
-------
|
|
426
|
+
np.ndarray
|
|
427
|
+
Variance matrix in original covariate space.
|
|
428
|
+
|
|
429
|
+
Notes
|
|
430
|
+
-----
|
|
431
|
+
The transformation handles different treatment types:
|
|
432
|
+
|
|
433
|
+
- **Binary** (no_treats=2): Single k x k block transformation
|
|
434
|
+
- **3-level** (no_treats=3): Four k x k blocks forming 2k x 2k matrix
|
|
435
|
+
- **4-level** (no_treats=4): Nine k x k blocks forming 3k x 3k matrix
|
|
436
|
+
- **Continuous**: Same as binary (single k x k block)
|
|
437
|
+
|
|
438
|
+
Examples
|
|
439
|
+
--------
|
|
440
|
+
This function is typically called internally by CBPS variance methods:
|
|
441
|
+
|
|
442
|
+
>>> import numpy as np
|
|
443
|
+
>>> # svd_info, X_orig, X_svd are obtained from CBPS fitting
|
|
444
|
+
>>> # var_orig = apply_variance_svd_inverse_transform(
|
|
445
|
+
>>> # var_svd, svd_info, X_orig, X_svd, is_factor=True, no_treats=2
|
|
446
|
+
>>> # )
|
|
447
|
+
"""
|
|
448
|
+
k = X_orig.shape[1]
|
|
449
|
+
|
|
450
|
+
# Construct Dx_inv: diag([1, x_sd[0], x_sd[1], ...])
|
|
451
|
+
x_sd = svd_info['x_sd']
|
|
452
|
+
Dx_inv = np.diag(np.concatenate([[1.0], x_sd]))
|
|
453
|
+
|
|
454
|
+
# Construct d_inv: inverse of singular values (values <= 1e-5 set to 0)
|
|
455
|
+
d_inv = svd_info['d'].copy()
|
|
456
|
+
d_inv[d_inv > 1e-5] = 1.0 / d_inv[d_inv > 1e-5]
|
|
457
|
+
d_inv[d_inv <= 1e-5] = 0.0
|
|
458
|
+
|
|
459
|
+
V = svd_info['V']
|
|
460
|
+
|
|
461
|
+
# Select inverse transform method based on treatment type
|
|
462
|
+
if is_factor and no_treats == 2:
|
|
463
|
+
# Binary treatment
|
|
464
|
+
var_transformed = transform_variance_binary(
|
|
465
|
+
variance_svd, Dx_inv, X_orig, X_svd, V, d_inv
|
|
466
|
+
)
|
|
467
|
+
elif is_factor and no_treats == 3:
|
|
468
|
+
# 3-level treatment
|
|
469
|
+
var_transformed = transform_variance_3treat(
|
|
470
|
+
variance_svd, Dx_inv, X_orig, X_svd, V, d_inv
|
|
471
|
+
)
|
|
472
|
+
elif is_factor and no_treats == 4:
|
|
473
|
+
# 4-level treatment
|
|
474
|
+
var_transformed = transform_variance_4treat(
|
|
475
|
+
variance_svd, Dx_inv, X_orig, X_svd, V, d_inv
|
|
476
|
+
)
|
|
477
|
+
else:
|
|
478
|
+
# Continuous treatment
|
|
479
|
+
var_transformed = transform_variance_continuous(
|
|
480
|
+
variance_svd, Dx_inv, X_orig, X_svd, V, d_inv
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
return var_transformed
|