cbps 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. cbps/__init__.py +3462 -0
  2. cbps/constants.py +46 -0
  3. cbps/core/__init__.py +93 -0
  4. cbps/core/cbps_binary.py +1943 -0
  5. cbps/core/cbps_continuous.py +945 -0
  6. cbps/core/cbps_multitreat.py +1123 -0
  7. cbps/core/cbps_optimal.py +507 -0
  8. cbps/core/results.py +1447 -0
  9. cbps/data/Blackwell.csv +571 -0
  10. cbps/data/LaLonde.csv +3213 -0
  11. cbps/data/npcbps_continuous_sim.csv +501 -0
  12. cbps/data/nsw.csv +723 -0
  13. cbps/data/nsw_dw.csv +446 -0
  14. cbps/data/political_ads_urban_niebler.csv +16266 -0
  15. cbps/data/psid_controls.csv +2491 -0
  16. cbps/data/psid_controls2.csv +254 -0
  17. cbps/data/psid_controls3.csv +129 -0
  18. cbps/data/simulation_dgp1_seed12345.csv +201 -0
  19. cbps/data/simulation_dgp2_seed12345.csv +201 -0
  20. cbps/data/simulation_dgp3_seed12345.csv +201 -0
  21. cbps/data/simulation_dgp4_seed12345.csv +201 -0
  22. cbps/datasets/__init__.py +78 -0
  23. cbps/datasets/blackwell.py +112 -0
  24. cbps/datasets/continuous.py +223 -0
  25. cbps/datasets/lalonde.py +272 -0
  26. cbps/datasets/npcbps_sim.py +101 -0
  27. cbps/diagnostics/__init__.py +101 -0
  28. cbps/diagnostics/balance.py +760 -0
  29. cbps/diagnostics/balance_cbmsm_addon.py +162 -0
  30. cbps/diagnostics/continuous_diagnostics.py +259 -0
  31. cbps/diagnostics/normality.py +173 -0
  32. cbps/diagnostics/ocbps_conditions.py +197 -0
  33. cbps/diagnostics/overlap.py +198 -0
  34. cbps/diagnostics/plots.py +1193 -0
  35. cbps/diagnostics/weights_diag.py +205 -0
  36. cbps/highdim/__init__.py +84 -0
  37. cbps/highdim/gmm_loss.py +340 -0
  38. cbps/highdim/hdcbps.py +1078 -0
  39. cbps/highdim/lasso_utils.py +498 -0
  40. cbps/highdim/weight_funcs.py +298 -0
  41. cbps/inference/__init__.py +42 -0
  42. cbps/inference/asyvar.py +621 -0
  43. cbps/inference/vcov_outcome.py +217 -0
  44. cbps/iv/__init__.py +48 -0
  45. cbps/iv/cbiv.py +2603 -0
  46. cbps/logging_config.py +45 -0
  47. cbps/msm/__init__.py +45 -0
  48. cbps/msm/cbmsm.py +1871 -0
  49. cbps/msm/rank_diagnostics.py +112 -0
  50. cbps/nonparametric/__init__.py +58 -0
  51. cbps/nonparametric/cholesky_whitening.py +232 -0
  52. cbps/nonparametric/empirical_likelihood.py +339 -0
  53. cbps/nonparametric/npcbps.py +1036 -0
  54. cbps/nonparametric/taylor_approx.py +207 -0
  55. cbps/py.typed +0 -0
  56. cbps/sklearn/__init__.py +42 -0
  57. cbps/sklearn/estimator.py +378 -0
  58. cbps/utils/__init__.py +82 -0
  59. cbps/utils/formula.py +415 -0
  60. cbps/utils/helpers.py +378 -0
  61. cbps/utils/numerics.py +438 -0
  62. cbps/utils/r_compat.py +109 -0
  63. cbps/utils/validation.py +224 -0
  64. cbps/utils/variance_transform.py +483 -0
  65. cbps/utils/weights.py +586 -0
  66. cbps-0.2.0.dist-info/METADATA +1090 -0
  67. cbps-0.2.0.dist-info/RECORD +70 -0
  68. cbps-0.2.0.dist-info/WHEEL +5 -0
  69. cbps-0.2.0.dist-info/licenses/LICENSE +661 -0
  70. cbps-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,224 @@
1
+ """
2
+ Input Validation Utilities
3
+
4
+ This module provides centralized input validation for CBPS estimators,
5
+ ensuring consistent error handling and user-friendly error messages
6
+ across all model classes.
7
+
8
+ The validation functions check for common issues such as:
9
+
10
+ - Empty or insufficient sample sizes
11
+ - Dimension mismatches between treatment and covariates
12
+ - Missing or infinite values
13
+ - Zero-variance treatment variables
14
+ - Improperly shaped covariate matrices
15
+
16
+ All validation errors include descriptive messages with the calling
17
+ module name, making it easy to identify the source of issues.
18
+
19
+ Functions
20
+ ---------
21
+ validate_cbps_input
22
+ Comprehensive input validation for CBPS estimators.
23
+
24
+ References
25
+ ----------
26
+ Imai, K. and Ratkovic, M. (2014). Covariate balancing propensity score.
27
+ Journal of the Royal Statistical Society, Series B 76(1), 243-263.
28
+ """
29
+
30
+ import numpy as np
31
+ import warnings
32
+ from typing import Optional
33
+
34
+
35
+ def ensure_dense(X):
36
+ """Convert sparse matrix to dense if needed.
37
+
38
+ For standard CBPS (k < 1000), dense operations are faster.
39
+ Sparse support is primarily for hdCBPS preprocessing.
40
+
41
+ Parameters
42
+ ----------
43
+ X : array-like or scipy.sparse matrix
44
+ Input covariate matrix.
45
+
46
+ Returns
47
+ -------
48
+ np.ndarray
49
+ Dense numpy array.
50
+ """
51
+ if hasattr(X, 'toarray'): # scipy.sparse
52
+ if X.shape[1] > 1000:
53
+ warnings.warn(
54
+ f"Converting sparse matrix ({X.shape}) to dense. "
55
+ f"For k>{X.shape[1]}, consider using hdCBPS with built-in "
56
+ f"LASSO variable selection instead.",
57
+ UserWarning
58
+ )
59
+ return np.asarray(X.todense())
60
+ return np.asarray(X)
61
+
62
+
63
+ def validate_cbps_input(
64
+ treat: np.ndarray,
65
+ X: np.ndarray,
66
+ min_observations: int = 2,
67
+ module_name: str = "CBPS",
68
+ check_treatment_variance: bool = True
69
+ ) -> None:
70
+ """
71
+ Validate treatment and covariate arrays for CBPS estimation.
72
+
73
+ Performs comprehensive validation of input arrays before CBPS fitting,
74
+ providing informative error messages that identify the specific issue
75
+ and suggest remediation steps.
76
+
77
+ Parameters
78
+ ----------
79
+ treat : np.ndarray
80
+ Treatment variable, shape (n,).
81
+ X : np.ndarray
82
+ Covariate matrix, shape (n, k).
83
+ min_observations : int, default=2
84
+ Minimum required sample size.
85
+ module_name : str, default="CBPS"
86
+ Name of calling module for error message prefixes.
87
+ check_treatment_variance : bool, default=True
88
+ If True, verify treatment has non-zero variance.
89
+ Set to False for binary treatments where variance check
90
+ is handled separately.
91
+
92
+ Raises
93
+ ------
94
+ ValueError
95
+ If any validation check fails. The error message includes:
96
+
97
+ - The module name prefix for easy identification
98
+ - A description of the specific issue
99
+ - The actual values that caused the error
100
+ - Suggested remediation steps
101
+
102
+ Notes
103
+ -----
104
+ The following checks are performed in order:
105
+
106
+ 1. Treatment array is non-empty
107
+ 2. Covariate matrix is 2-dimensional
108
+ 3. Sample size >= min_observations
109
+ 4. Treatment and covariate row counts match
110
+ 5. Covariate matrix has >= 1 column
111
+ 6. No NaN or Inf values in treatment
112
+ 7. No NaN or Inf values in covariates
113
+ 8. Treatment has non-zero variance (if check_treatment_variance=True)
114
+
115
+ Examples
116
+ --------
117
+ >>> import numpy as np
118
+ >>> from cbps.utils.validation import validate_cbps_input
119
+ >>>
120
+ >>> # Valid input passes silently
121
+ >>> treat = np.array([0, 1, 0, 1])
122
+ >>> X = np.array([[1, 2], [1, 3], [1, 4], [1, 5]])
123
+ >>> validate_cbps_input(treat, X)
124
+ >>>
125
+ >>> # Dimension mismatch raises informative error
126
+ >>> try:
127
+ ... validate_cbps_input(np.array([0, 1]), X)
128
+ ... except ValueError as e:
129
+ ... print("Validation failed")
130
+ Validation failed
131
+ """
132
+ # Check 1: Empty array (highest priority to avoid len() errors)
133
+ if not isinstance(treat, np.ndarray):
134
+ treat = np.asarray(treat)
135
+ if not isinstance(X, np.ndarray):
136
+ X = np.asarray(X)
137
+
138
+ n_treat = len(treat)
139
+
140
+ if n_treat == 0:
141
+ raise ValueError(
142
+ f"{module_name}: Treatment array is empty (n=0). "
143
+ f"At least {min_observations} observation(s) are required to estimate "
144
+ f"the propensity score."
145
+ )
146
+
147
+ # Check 2: Covariate matrix dimensions (check before accessing shape[0])
148
+ if X.ndim == 0:
149
+ raise ValueError(
150
+ f"{module_name}: Covariate input is a scalar. "
151
+ f"Expected a 2-dimensional array with shape (n_observations, n_covariates)."
152
+ )
153
+
154
+ if X.ndim == 1:
155
+ raise ValueError(
156
+ f"{module_name}: Covariate matrix X is 1-dimensional with shape {X.shape}. "
157
+ f"Expected a 2-dimensional array with shape (n_observations, n_covariates). "
158
+ f"If you have a single covariate, use X.reshape(-1, 1)."
159
+ )
160
+
161
+ if X.ndim > 2:
162
+ raise ValueError(
163
+ f"{module_name}: Covariate matrix X has {X.ndim} dimensions with shape {X.shape}. "
164
+ f"Expected a 2-dimensional array with shape (n_observations, n_covariates)."
165
+ )
166
+
167
+ # Now safe to access X.shape[0]
168
+ n_X = X.shape[0]
169
+
170
+ # Check 3: Insufficient sample size
171
+ if n_treat < min_observations:
172
+ raise ValueError(
173
+ f"{module_name}: Treatment array has only {n_treat} observation(s). "
174
+ f"At least {min_observations} observations are required to estimate "
175
+ f"the propensity score and its variance."
176
+ )
177
+
178
+ # Check 4: Sample size mismatch
179
+ if n_X != n_treat:
180
+ raise ValueError(
181
+ f"{module_name}: Sample size mismatch between treatment and covariates. "
182
+ f"Treatment has {n_treat} observations, but covariates have {n_X} rows. "
183
+ f"Both arrays must have the same number of observations."
184
+ )
185
+
186
+ # Check 5: Covariate column count
187
+ if X.shape[1] == 0:
188
+ raise ValueError(
189
+ f"{module_name}: Covariate matrix has 0 columns. "
190
+ f"At least 1 column (e.g., intercept) is required."
191
+ )
192
+
193
+ # Check 6: NaN/Inf in treatment variable
194
+ if np.any(~np.isfinite(treat)):
195
+ n_nan = np.sum(np.isnan(treat))
196
+ n_inf = np.sum(np.isinf(treat))
197
+ raise ValueError(
198
+ f"{module_name}: Treatment contains {n_nan} NaN value(s) and {n_inf} Inf value(s). "
199
+ f"Please remove or impute missing/infinite values before calling CBPS. "
200
+ f"Consider using data.dropna() or df[df.isfinite().all(axis=1)]."
201
+ )
202
+
203
+ # Check 7: NaN/Inf in covariates
204
+ if np.any(~np.isfinite(X)):
205
+ n_nan = np.sum(np.isnan(X))
206
+ n_inf = np.sum(np.isinf(X))
207
+ raise ValueError(
208
+ f"{module_name}: Covariates contain {n_nan} NaN value(s) and {n_inf} Inf value(s). "
209
+ f"Please remove or impute missing/infinite values before calling CBPS. "
210
+ f"Consider using data.dropna() or df[df.isfinite().all(axis=1)]."
211
+ )
212
+
213
+ # Check 8: Treatment variance (only for continuous treatments)
214
+ if check_treatment_variance:
215
+ treat_std = np.std(treat, ddof=1)
216
+ if treat_std == 0 or not np.isfinite(treat_std):
217
+ treat_unique = np.unique(treat)
218
+ raise ValueError(
219
+ f"{module_name}: Treatment variable has zero variance (all values are identical). "
220
+ f"Found only 1 unique value: {treat_unique}. "
221
+ f"CBPS requires variation in the treatment to estimate propensity scores. "
222
+ f"Please check your data or consider using a different treatment definition."
223
+ )
224
+
@@ -0,0 +1,483 @@
1
+ """
2
+ Variance Matrix Transformation Utilities
3
+
4
+ This module provides functions to transform variance-covariance matrices
5
+ from SVD-orthogonalized parameter space back to the original covariate space.
6
+ This transformation is essential for valid statistical inference after SVD
7
+ preprocessing of the design matrix.
8
+
9
+ When SVD is applied to the design matrix X for numerical stability, the
10
+ estimated coefficients live in a transformed space. To obtain standard errors
11
+ in the original covariate space, the variance matrix must be back-transformed
12
+ using the inverse of the SVD transformation.
13
+
14
+ The transformation formula for a variance matrix V in SVD space is:
15
+
16
+ V_orig = D_x^{-1} @ (X'X)^{-1} @ X' @ X_svd @ V_d^{-1} @ V_svd @ V_d^{-1} @
17
+ V_svd' @ X_svd' @ X @ (X'X)^{-1} @ D_x^{-1}
18
+
19
+ where D_x is the standardization matrix and V_d contains inverse singular values.
20
+
21
+ Functions
22
+ ---------
23
+ transform_variance_binary
24
+ Transform variance for binary treatment models.
25
+ transform_variance_3treat
26
+ Transform variance for 3-level treatment models.
27
+ transform_variance_4treat
28
+ Transform variance for 4-level treatment models.
29
+ transform_variance_continuous
30
+ Transform variance for continuous treatment models.
31
+ apply_variance_svd_inverse_transform
32
+ Dispatch function selecting appropriate transform based on treatment type.
33
+
34
+ References
35
+ ----------
36
+ Imai, K. and Ratkovic, M. (2014). Covariate balancing propensity score.
37
+ Journal of the Royal Statistical Society, Series B 76(1), 243-263.
38
+ """
39
+
40
+ import numpy as np
41
+ from typing import Dict, Any, Optional
42
+
43
+
44
+ def _r_ginv(A: np.ndarray, tol: Optional[float] = None) -> np.ndarray:
45
+ """
46
+ Compute Moore-Penrose generalized inverse via SVD.
47
+
48
+ Internal function used for variance matrix transformations.
49
+ Singular values below the tolerance threshold are set to zero
50
+ in the inversion to ensure numerical stability.
51
+
52
+ Parameters
53
+ ----------
54
+ A : np.ndarray
55
+ Input matrix, shape (m, n).
56
+ tol : float, optional
57
+ Relative singular value truncation threshold.
58
+ If None, defaults to sqrt(machine_epsilon).
59
+
60
+ Returns
61
+ -------
62
+ np.ndarray
63
+ Generalized inverse, shape (n, m).
64
+ """
65
+ if tol is None:
66
+ tol = np.sqrt(np.finfo(float).eps) # R: sqrt(.Machine$double.eps)
67
+
68
+ U, s, Vt = np.linalg.svd(A, full_matrices=False)
69
+ if len(s) == 0:
70
+ return np.zeros((A.shape[1], A.shape[0]))
71
+
72
+ threshold = max(tol * s[0], 0.0)
73
+ positive = s > threshold
74
+
75
+ if np.all(positive):
76
+ s_inv = 1.0 / s
77
+ return Vt.T @ np.diag(s_inv) @ U.T
78
+ if not np.any(positive):
79
+ return np.zeros((A.shape[1], A.shape[0]))
80
+
81
+ s_pos = s[positive]
82
+ U_pos = U[:, positive]
83
+ V_pos = Vt.T[:, positive]
84
+ return V_pos @ np.diag(1.0 / s_pos) @ U_pos.T
85
+
86
+
87
+ def transform_variance_binary(
88
+ variance_svd: np.ndarray,
89
+ Dx_inv: np.ndarray,
90
+ X_orig: np.ndarray,
91
+ X_svd: np.ndarray,
92
+ V: np.ndarray,
93
+ d_inv: np.ndarray
94
+ ) -> np.ndarray:
95
+ """
96
+ Transform variance matrix from SVD space for binary treatment models.
97
+
98
+ Applies the inverse SVD transformation to convert the variance-covariance
99
+ matrix from the orthogonalized parameter space back to the original
100
+ covariate space, enabling proper inference on the original coefficients.
101
+
102
+ Parameters
103
+ ----------
104
+ variance_svd : np.ndarray
105
+ Variance matrix in SVD space, shape (k, k).
106
+ Dx_inv : np.ndarray
107
+ Inverse standardization diagonal matrix, shape (k, k).
108
+ Contains [1, sd(x_1), sd(x_2), ...] on the diagonal.
109
+ X_orig : np.ndarray
110
+ Original design matrix with intercept, shape (n, k).
111
+ X_svd : np.ndarray
112
+ SVD-transformed design matrix, shape (n, k).
113
+ V : np.ndarray
114
+ Right singular vectors from X's SVD, shape (k, k).
115
+ d_inv : np.ndarray
116
+ Inverse singular values with small values zeroed, shape (k,).
117
+
118
+ Returns
119
+ -------
120
+ np.ndarray
121
+ Variance matrix in original covariate space, shape (k, k).
122
+
123
+ See Also
124
+ --------
125
+ apply_variance_svd_inverse_transform : High-level dispatch function.
126
+ """
127
+ k = variance_svd.shape[0]
128
+
129
+ # Compute generalized inverse of X_orig' @ X_orig
130
+ XorigT_Xorig_inv = _r_ginv(X_orig.T @ X_orig)
131
+
132
+ # Diagonal matrix of inverse singular values
133
+ D_inv = np.diag(d_inv)
134
+
135
+ # Complete transformation formula
136
+ # Dx.inv @ ginv(X'X) @ X' @ Xsvd @ V @ D^-1 @ Var @ D^-1 @ V' @ Xsvd' @ X @ ginv(X'X) @ Dx.inv
137
+ var_transformed = (
138
+ Dx_inv @
139
+ XorigT_Xorig_inv @
140
+ X_orig.T @
141
+ X_svd @
142
+ V @
143
+ D_inv @
144
+ variance_svd @
145
+ D_inv @
146
+ V.T @
147
+ X_svd.T @
148
+ X_orig @
149
+ XorigT_Xorig_inv @
150
+ Dx_inv
151
+ )
152
+
153
+ return var_transformed
154
+
155
+
156
+ def transform_variance_3treat(
157
+ variance_svd: np.ndarray,
158
+ Dx_inv: np.ndarray,
159
+ X_orig: np.ndarray,
160
+ X_svd: np.ndarray,
161
+ V: np.ndarray,
162
+ d_inv: np.ndarray
163
+ ) -> np.ndarray:
164
+ """
165
+ Transform variance matrix from SVD space for 3-level treatment models.
166
+
167
+ For multinomial treatment with 3 levels, the variance matrix has a
168
+ 2x2 block structure corresponding to the (J-1) = 2 treatment contrasts.
169
+ Each k x k block is transformed independently using the same formula
170
+ as binary treatment.
171
+
172
+ Parameters
173
+ ----------
174
+ variance_svd : np.ndarray
175
+ Variance matrix in SVD space, shape (2k, 2k).
176
+ Dx_inv : np.ndarray
177
+ Inverse standardization diagonal matrix, shape (k, k).
178
+ X_orig : np.ndarray
179
+ Original design matrix with intercept, shape (n, k).
180
+ X_svd : np.ndarray
181
+ SVD-transformed design matrix, shape (n, k).
182
+ V : np.ndarray
183
+ Right singular vectors from X's SVD, shape (k, k).
184
+ d_inv : np.ndarray
185
+ Inverse singular values with small values zeroed, shape (k,).
186
+
187
+ Returns
188
+ -------
189
+ np.ndarray
190
+ Variance matrix in original covariate space, shape (2k, 2k).
191
+
192
+ See Also
193
+ --------
194
+ transform_variance_binary : Single-block transformation formula.
195
+ """
196
+ k = X_orig.shape[1]
197
+
198
+ # Decompose into 4 blocks
199
+ var_1_1 = variance_svd[0:k, 0:k]
200
+ var_1_2 = variance_svd[0:k, k:2*k]
201
+ var_2_1 = variance_svd[k:2*k, 0:k]
202
+ var_2_2 = variance_svd[k:2*k, k:2*k]
203
+
204
+ # Compute common transformation matrices
205
+ XorigT_Xorig_inv = _r_ginv(X_orig.T @ X_orig)
206
+ D_inv = np.diag(d_inv)
207
+
208
+ # Transform each block independently
209
+ trans_var_1_1 = (
210
+ Dx_inv @ XorigT_Xorig_inv @ X_orig.T @ X_svd @ V @ D_inv @
211
+ var_1_1 @ D_inv @ V.T @ X_svd.T @ X_orig @ XorigT_Xorig_inv @ Dx_inv
212
+ )
213
+ trans_var_1_2 = (
214
+ Dx_inv @ XorigT_Xorig_inv @ X_orig.T @ X_svd @ V @ D_inv @
215
+ var_1_2 @ D_inv @ V.T @ X_svd.T @ X_orig @ XorigT_Xorig_inv @ Dx_inv
216
+ )
217
+ trans_var_2_1 = (
218
+ Dx_inv @ XorigT_Xorig_inv @ X_orig.T @ X_svd @ V @ D_inv @
219
+ var_2_1 @ D_inv @ V.T @ X_svd.T @ X_orig @ XorigT_Xorig_inv @ Dx_inv
220
+ )
221
+ trans_var_2_2 = (
222
+ Dx_inv @ XorigT_Xorig_inv @ X_orig.T @ X_svd @ V @ D_inv @
223
+ var_2_2 @ D_inv @ V.T @ X_svd.T @ X_orig @ XorigT_Xorig_inv @ Dx_inv
224
+ )
225
+
226
+ # Reassemble full variance matrix
227
+ var_transformed = np.block([
228
+ [trans_var_1_1, trans_var_1_2],
229
+ [trans_var_2_1, trans_var_2_2]
230
+ ])
231
+
232
+ return var_transformed
233
+
234
+
235
+ def transform_variance_4treat(
236
+ variance_svd: np.ndarray,
237
+ Dx_inv: np.ndarray,
238
+ X_orig: np.ndarray,
239
+ X_svd: np.ndarray,
240
+ V: np.ndarray,
241
+ d_inv: np.ndarray
242
+ ) -> np.ndarray:
243
+ """
244
+ Transform variance matrix from SVD space for 4-level treatment models.
245
+
246
+ For multinomial treatment with 4 levels, the variance matrix has a
247
+ 3x3 block structure corresponding to the (J-1) = 3 treatment contrasts.
248
+ Each k x k block is transformed independently using the same formula
249
+ as binary treatment.
250
+
251
+ Parameters
252
+ ----------
253
+ variance_svd : np.ndarray
254
+ Variance matrix in SVD space, shape (3k, 3k).
255
+ Dx_inv : np.ndarray
256
+ Inverse standardization diagonal matrix, shape (k, k).
257
+ X_orig : np.ndarray
258
+ Original design matrix with intercept, shape (n, k).
259
+ X_svd : np.ndarray
260
+ SVD-transformed design matrix, shape (n, k).
261
+ V : np.ndarray
262
+ Right singular vectors from X's SVD, shape (k, k).
263
+ d_inv : np.ndarray
264
+ Inverse singular values with small values zeroed, shape (k,).
265
+
266
+ Returns
267
+ -------
268
+ np.ndarray
269
+ Variance matrix in original covariate space, shape (3k, 3k).
270
+
271
+ See Also
272
+ --------
273
+ transform_variance_binary : Single-block transformation formula.
274
+ """
275
+ k = X_orig.shape[1]
276
+
277
+ # Decompose into 9 blocks
278
+ var_1_1 = variance_svd[0:k, 0:k]
279
+ var_1_2 = variance_svd[0:k, k:2*k]
280
+ var_1_3 = variance_svd[0:k, 2*k:3*k]
281
+ var_2_1 = variance_svd[k:2*k, 0:k]
282
+ var_2_2 = variance_svd[k:2*k, k:2*k]
283
+ var_2_3 = variance_svd[k:2*k, 2*k:3*k]
284
+ var_3_1 = variance_svd[2*k:3*k, 0:k]
285
+ var_3_2 = variance_svd[2*k:3*k, k:2*k]
286
+ var_3_3 = variance_svd[2*k:3*k, 2*k:3*k]
287
+
288
+ # Compute common transformation matrices
289
+ XorigT_Xorig_inv = _r_ginv(X_orig.T @ X_orig)
290
+ D_inv = np.diag(d_inv)
291
+
292
+ # Transform each block independently
293
+ trans_var_1_1 = (
294
+ Dx_inv @ XorigT_Xorig_inv @ X_orig.T @ X_svd @ V @ D_inv @
295
+ var_1_1 @ D_inv @ V.T @ X_svd.T @ X_orig @ XorigT_Xorig_inv @ Dx_inv
296
+ )
297
+ trans_var_1_2 = (
298
+ Dx_inv @ XorigT_Xorig_inv @ X_orig.T @ X_svd @ V @ D_inv @
299
+ var_1_2 @ D_inv @ V.T @ X_svd.T @ X_orig @ XorigT_Xorig_inv @ Dx_inv
300
+ )
301
+ trans_var_1_3 = (
302
+ Dx_inv @ XorigT_Xorig_inv @ X_orig.T @ X_svd @ V @ D_inv @
303
+ var_1_3 @ D_inv @ V.T @ X_svd.T @ X_orig @ XorigT_Xorig_inv @ Dx_inv
304
+ )
305
+ trans_var_2_1 = (
306
+ Dx_inv @ XorigT_Xorig_inv @ X_orig.T @ X_svd @ V @ D_inv @
307
+ var_2_1 @ D_inv @ V.T @ X_svd.T @ X_orig @ XorigT_Xorig_inv @ Dx_inv
308
+ )
309
+ trans_var_2_2 = (
310
+ Dx_inv @ XorigT_Xorig_inv @ X_orig.T @ X_svd @ V @ D_inv @
311
+ var_2_2 @ D_inv @ V.T @ X_svd.T @ X_orig @ XorigT_Xorig_inv @ Dx_inv
312
+ )
313
+ trans_var_2_3 = (
314
+ Dx_inv @ XorigT_Xorig_inv @ X_orig.T @ X_svd @ V @ D_inv @
315
+ var_2_3 @ D_inv @ V.T @ X_svd.T @ X_orig @ XorigT_Xorig_inv @ Dx_inv
316
+ )
317
+ trans_var_3_1 = (
318
+ Dx_inv @ XorigT_Xorig_inv @ X_orig.T @ X_svd @ V @ D_inv @
319
+ var_3_1 @ D_inv @ V.T @ X_svd.T @ X_orig @ XorigT_Xorig_inv @ Dx_inv
320
+ )
321
+ trans_var_3_2 = (
322
+ Dx_inv @ XorigT_Xorig_inv @ X_orig.T @ X_svd @ V @ D_inv @
323
+ var_3_2 @ D_inv @ V.T @ X_svd.T @ X_orig @ XorigT_Xorig_inv @ Dx_inv
324
+ )
325
+ trans_var_3_3 = (
326
+ Dx_inv @ XorigT_Xorig_inv @ X_orig.T @ X_svd @ V @ D_inv @
327
+ var_3_3 @ D_inv @ V.T @ X_svd.T @ X_orig @ XorigT_Xorig_inv @ Dx_inv
328
+ )
329
+
330
+ # Reassemble full variance matrix
331
+ var_transformed = np.block([
332
+ [trans_var_1_1, trans_var_1_2, trans_var_1_3],
333
+ [trans_var_2_1, trans_var_2_2, trans_var_2_3],
334
+ [trans_var_3_1, trans_var_3_2, trans_var_3_3]
335
+ ])
336
+
337
+ return var_transformed
338
+
339
+
340
+ def transform_variance_continuous(
341
+ variance_svd: np.ndarray,
342
+ Dx_inv: np.ndarray,
343
+ X_orig: np.ndarray,
344
+ X_svd: np.ndarray,
345
+ V: np.ndarray,
346
+ d_inv: np.ndarray
347
+ ) -> np.ndarray:
348
+ """
349
+ Transform variance matrix from SVD space for continuous treatment models.
350
+
351
+ For continuous treatments, the coefficient vector has the same dimension
352
+ as the binary case (k parameters), so the same transformation formula
353
+ applies directly.
354
+
355
+ Parameters
356
+ ----------
357
+ variance_svd : np.ndarray
358
+ Variance matrix in SVD space, shape (k, k).
359
+ Dx_inv : np.ndarray
360
+ Inverse standardization diagonal matrix, shape (k, k).
361
+ X_orig : np.ndarray
362
+ Original design matrix with intercept, shape (n, k).
363
+ X_svd : np.ndarray
364
+ SVD-transformed design matrix, shape (n, k).
365
+ V : np.ndarray
366
+ Right singular vectors from X's SVD, shape (k, k).
367
+ d_inv : np.ndarray
368
+ Inverse singular values with small values zeroed, shape (k,).
369
+
370
+ Returns
371
+ -------
372
+ np.ndarray
373
+ Variance matrix in original covariate space, shape (k, k).
374
+
375
+ See Also
376
+ --------
377
+ transform_variance_binary : Underlying transformation implementation.
378
+ """
379
+ # Continuous treatment uses same inverse transform as binary
380
+ return transform_variance_binary(
381
+ variance_svd, Dx_inv, X_orig, X_svd, V, d_inv
382
+ )
383
+
384
+
385
+ def apply_variance_svd_inverse_transform(
386
+ variance_svd: np.ndarray,
387
+ svd_info: Dict[str, Any],
388
+ X_orig: np.ndarray,
389
+ X_svd: np.ndarray,
390
+ is_factor: bool,
391
+ no_treats: int
392
+ ) -> np.ndarray:
393
+ """
394
+ Apply SVD inverse transformation to variance matrix.
395
+
396
+ High-level dispatch function that selects the appropriate inverse
397
+ transformation method based on the treatment type and applies it
398
+ to convert the variance matrix from SVD space to original space.
399
+
400
+ Parameters
401
+ ----------
402
+ variance_svd : np.ndarray
403
+ Variance matrix in SVD space.
404
+ Shape depends on treatment type: (k, k) for binary/continuous,
405
+ (2k, 2k) for 3-level, (3k, 3k) for 4-level.
406
+ svd_info : dict
407
+ SVD preprocessing information containing:
408
+
409
+ - ``'d'`` : Singular values from SVD, shape (k,)
410
+ - ``'V'`` : Right singular vectors, shape (k, k)
411
+ - ``'x_mean'`` : Column means of original X (excluding intercept)
412
+ - ``'x_sd'`` : Column standard deviations of original X
413
+
414
+ X_orig : np.ndarray
415
+ Original design matrix with intercept, shape (n, k).
416
+ X_svd : np.ndarray
417
+ SVD-transformed design matrix, shape (n, k).
418
+ is_factor : bool
419
+ True for discrete (factor) treatment, False for continuous.
420
+ no_treats : int
421
+ Number of treatment levels.
422
+ 2, 3, or 4 for discrete treatments; ignored for continuous.
423
+
424
+ Returns
425
+ -------
426
+ np.ndarray
427
+ Variance matrix in original covariate space.
428
+
429
+ Notes
430
+ -----
431
+ The transformation handles different treatment types:
432
+
433
+ - **Binary** (no_treats=2): Single k x k block transformation
434
+ - **3-level** (no_treats=3): Four k x k blocks forming 2k x 2k matrix
435
+ - **4-level** (no_treats=4): Nine k x k blocks forming 3k x 3k matrix
436
+ - **Continuous**: Same as binary (single k x k block)
437
+
438
+ Examples
439
+ --------
440
+ This function is typically called internally by CBPS variance methods:
441
+
442
+ >>> import numpy as np
443
+ >>> # svd_info, X_orig, X_svd are obtained from CBPS fitting
444
+ >>> # var_orig = apply_variance_svd_inverse_transform(
445
+ >>> # var_svd, svd_info, X_orig, X_svd, is_factor=True, no_treats=2
446
+ >>> # )
447
+ """
448
+ k = X_orig.shape[1]
449
+
450
+ # Construct Dx_inv: diag([1, x_sd[0], x_sd[1], ...])
451
+ x_sd = svd_info['x_sd']
452
+ Dx_inv = np.diag(np.concatenate([[1.0], x_sd]))
453
+
454
+ # Construct d_inv: inverse of singular values (values <= 1e-5 set to 0)
455
+ d_inv = svd_info['d'].copy()
456
+ d_inv[d_inv > 1e-5] = 1.0 / d_inv[d_inv > 1e-5]
457
+ d_inv[d_inv <= 1e-5] = 0.0
458
+
459
+ V = svd_info['V']
460
+
461
+ # Select inverse transform method based on treatment type
462
+ if is_factor and no_treats == 2:
463
+ # Binary treatment
464
+ var_transformed = transform_variance_binary(
465
+ variance_svd, Dx_inv, X_orig, X_svd, V, d_inv
466
+ )
467
+ elif is_factor and no_treats == 3:
468
+ # 3-level treatment
469
+ var_transformed = transform_variance_3treat(
470
+ variance_svd, Dx_inv, X_orig, X_svd, V, d_inv
471
+ )
472
+ elif is_factor and no_treats == 4:
473
+ # 4-level treatment
474
+ var_transformed = transform_variance_4treat(
475
+ variance_svd, Dx_inv, X_orig, X_svd, V, d_inv
476
+ )
477
+ else:
478
+ # Continuous treatment
479
+ var_transformed = transform_variance_continuous(
480
+ variance_svd, Dx_inv, X_orig, X_svd, V, d_inv
481
+ )
482
+
483
+ return var_transformed