cbps 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cbps/__init__.py +3462 -0
- cbps/constants.py +46 -0
- cbps/core/__init__.py +93 -0
- cbps/core/cbps_binary.py +1943 -0
- cbps/core/cbps_continuous.py +945 -0
- cbps/core/cbps_multitreat.py +1123 -0
- cbps/core/cbps_optimal.py +507 -0
- cbps/core/results.py +1447 -0
- cbps/data/Blackwell.csv +571 -0
- cbps/data/LaLonde.csv +3213 -0
- cbps/data/npcbps_continuous_sim.csv +501 -0
- cbps/data/nsw.csv +723 -0
- cbps/data/nsw_dw.csv +446 -0
- cbps/data/political_ads_urban_niebler.csv +16266 -0
- cbps/data/psid_controls.csv +2491 -0
- cbps/data/psid_controls2.csv +254 -0
- cbps/data/psid_controls3.csv +129 -0
- cbps/data/simulation_dgp1_seed12345.csv +201 -0
- cbps/data/simulation_dgp2_seed12345.csv +201 -0
- cbps/data/simulation_dgp3_seed12345.csv +201 -0
- cbps/data/simulation_dgp4_seed12345.csv +201 -0
- cbps/datasets/__init__.py +78 -0
- cbps/datasets/blackwell.py +112 -0
- cbps/datasets/continuous.py +223 -0
- cbps/datasets/lalonde.py +272 -0
- cbps/datasets/npcbps_sim.py +101 -0
- cbps/diagnostics/__init__.py +101 -0
- cbps/diagnostics/balance.py +760 -0
- cbps/diagnostics/balance_cbmsm_addon.py +162 -0
- cbps/diagnostics/continuous_diagnostics.py +259 -0
- cbps/diagnostics/normality.py +173 -0
- cbps/diagnostics/ocbps_conditions.py +197 -0
- cbps/diagnostics/overlap.py +198 -0
- cbps/diagnostics/plots.py +1193 -0
- cbps/diagnostics/weights_diag.py +205 -0
- cbps/highdim/__init__.py +84 -0
- cbps/highdim/gmm_loss.py +340 -0
- cbps/highdim/hdcbps.py +1078 -0
- cbps/highdim/lasso_utils.py +498 -0
- cbps/highdim/weight_funcs.py +298 -0
- cbps/inference/__init__.py +42 -0
- cbps/inference/asyvar.py +621 -0
- cbps/inference/vcov_outcome.py +217 -0
- cbps/iv/__init__.py +48 -0
- cbps/iv/cbiv.py +2603 -0
- cbps/logging_config.py +45 -0
- cbps/msm/__init__.py +45 -0
- cbps/msm/cbmsm.py +1871 -0
- cbps/msm/rank_diagnostics.py +112 -0
- cbps/nonparametric/__init__.py +58 -0
- cbps/nonparametric/cholesky_whitening.py +232 -0
- cbps/nonparametric/empirical_likelihood.py +339 -0
- cbps/nonparametric/npcbps.py +1036 -0
- cbps/nonparametric/taylor_approx.py +207 -0
- cbps/py.typed +0 -0
- cbps/sklearn/__init__.py +42 -0
- cbps/sklearn/estimator.py +378 -0
- cbps/utils/__init__.py +82 -0
- cbps/utils/formula.py +415 -0
- cbps/utils/helpers.py +378 -0
- cbps/utils/numerics.py +438 -0
- cbps/utils/r_compat.py +109 -0
- cbps/utils/validation.py +224 -0
- cbps/utils/variance_transform.py +483 -0
- cbps/utils/weights.py +586 -0
- cbps-0.2.0.dist-info/METADATA +1090 -0
- cbps-0.2.0.dist-info/RECORD +70 -0
- cbps-0.2.0.dist-info/WHEEL +5 -0
- cbps-0.2.0.dist-info/licenses/LICENSE +661 -0
- cbps-0.2.0.dist-info/top_level.txt +1 -0
cbps/constants.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""Central numerical stability constants for CBPS package.
|
|
2
|
+
|
|
3
|
+
All defaults are aligned with R CBPS v0.23 where applicable.
|
|
4
|
+
"""
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass(frozen=True)
|
|
9
|
+
class NumericalConfig:
|
|
10
|
+
"""Immutable numerical configuration for CBPS algorithms.
|
|
11
|
+
|
|
12
|
+
Parameters are grouped by function:
|
|
13
|
+
- Propensity score clipping
|
|
14
|
+
- Optimization tolerances
|
|
15
|
+
- Matrix computation thresholds
|
|
16
|
+
|
|
17
|
+
References
|
|
18
|
+
----------
|
|
19
|
+
R CBPS v0.23: probs.min = 1e-6 (CBPSBinary.R line 4)
|
|
20
|
+
R optim(): ndeps default = 1e-3
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
# Propensity Score Clipping
|
|
24
|
+
probs_min: float = 1e-6 # P(T|X) lower bound [R: probs.min]
|
|
25
|
+
probs_trim_msm: float = 1e-4 # CBMSM probability trim threshold
|
|
26
|
+
|
|
27
|
+
# Column Detection
|
|
28
|
+
const_col_threshold: float = 1e-10 # Std below this = constant column
|
|
29
|
+
|
|
30
|
+
# Optimization
|
|
31
|
+
ndeps: float = 1e-3 # Finite difference step [R: optim default]
|
|
32
|
+
glm_tol: float = 1e-8 # GLM IRLS convergence tolerance
|
|
33
|
+
optim_xtol: float = 1e-12 # Parameter convergence tolerance
|
|
34
|
+
|
|
35
|
+
# SVD / Matrix
|
|
36
|
+
svd_threshold_msm: float = 1e-4 # MSM singular value cutoff
|
|
37
|
+
log_clip_range: float = 50.0 # |x|>50 → clip before exp(x)
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def probs_max(self) -> float:
|
|
41
|
+
"""Upper bound for propensity score clipping: 1 - probs_min."""
|
|
42
|
+
return 1.0 - self.probs_min
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
# Global default instance
|
|
46
|
+
DEFAULT_CONFIG = NumericalConfig()
|
cbps/core/__init__.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Core CBPS Algorithm Implementation
|
|
3
|
+
|
|
4
|
+
This module implements the fundamental covariate balancing propensity score (CBPS)
|
|
5
|
+
algorithms for various treatment modalities. The CBPS methodology extends
|
|
6
|
+
traditional propensity score estimation by directly incorporating covariate balance
|
|
7
|
+
conditions into the estimation framework through generalized method of moments
|
|
8
|
+
(GMM) optimization.
|
|
9
|
+
|
|
10
|
+
Algorithm Components
|
|
11
|
+
--------------------
|
|
12
|
+
|
|
13
|
+
The module provides implementations for the following treatment types:
|
|
14
|
+
|
|
15
|
+
* **Binary treatments** (`cbps_binary_fit`): Standard CBPS for estimating average
|
|
16
|
+
treatment effects (ATE) and average treatment effects on the treated (ATT)
|
|
17
|
+
using logistic regression models within the GMM framework
|
|
18
|
+
|
|
19
|
+
* **Continuous treatments** (`cbps_continuous_fit`): Generalized propensity score
|
|
20
|
+
(GPS) estimation for continuous treatment variables, extending the CBPS
|
|
21
|
+
methodology to parametric continuous treatment models
|
|
22
|
+
|
|
23
|
+
* **Multi-valued treatments** (`cbps_3treat_fit`, `cbps_4treat_fit`): Extension
|
|
24
|
+
to categorical treatments with three or four levels using multinomial logistic
|
|
25
|
+
regression models
|
|
26
|
+
|
|
27
|
+
* **Optimal CBPS** (`cbps_optimal_2treat`): Doubly robust estimation that
|
|
28
|
+
incorporates outcome model information to improve efficiency while maintaining
|
|
29
|
+
robustness to model misspecification
|
|
30
|
+
|
|
31
|
+
Statistical Framework
|
|
32
|
+
--------------------
|
|
33
|
+
|
|
34
|
+
For binary treatment assignment :math:`T \\in \\{0,1\\}` and covariates
|
|
35
|
+
:math:`X`, the CBPS estimator solves the following GMM optimization problem:
|
|
36
|
+
|
|
37
|
+
.. math::
|
|
38
|
+
\\hat{\\beta} = \\arg\\min_{\\beta} \\, \\frac{1}{2} g_n(\\beta)' W_n g_n(\\beta)
|
|
39
|
+
|
|
40
|
+
where the moment conditions :math:`g_n(\\beta)` combine:
|
|
41
|
+
|
|
42
|
+
1. **Score condition**: :math:`E[T_i - e(X_i, \\beta)] = 0`
|
|
43
|
+
2. **Balance conditions**: :math:`E[X_i(T_i - e(X_i, \\beta))] = 0`
|
|
44
|
+
|
|
45
|
+
The weight matrix :math:`W_n` is chosen optimally to achieve the
|
|
46
|
+
Hansen (1982) efficiency bound within the class of GMM estimators.
|
|
47
|
+
|
|
48
|
+
Computational Methods
|
|
49
|
+
---------------------
|
|
50
|
+
|
|
51
|
+
The implementations employ numerical optimization algorithms suitable for
|
|
52
|
+
the non-convex objective functions that arise in CBPS estimation:
|
|
53
|
+
|
|
54
|
+
* Two-step GMM estimator for computational efficiency
|
|
55
|
+
* Continuous-updating GMM for improved finite-sample properties
|
|
56
|
+
* Newton-Raphson and BFGS algorithms with analytical gradients
|
|
57
|
+
|
|
58
|
+
References
|
|
59
|
+
----------
|
|
60
|
+
.. [1] Imai, K. and Ratkovic, M. (2014). Covariate balancing propensity score.
|
|
61
|
+
Journal of the Royal Statistical Society, Series B 76(1), 243-263.
|
|
62
|
+
https://doi.org/10.1111/rssb.12027
|
|
63
|
+
|
|
64
|
+
.. [2] Hansen, L. P. (1982). Large sample properties of generalized method
|
|
65
|
+
of moments estimators. Econometrica 50(4), 1029-1054.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
# Binary treatment CBPS implementation
|
|
69
|
+
from .cbps_binary import cbps_binary_fit
|
|
70
|
+
|
|
71
|
+
# Continuous treatment CBPS (Generalized Propensity Score)
|
|
72
|
+
from .cbps_continuous import cbps_continuous_fit
|
|
73
|
+
|
|
74
|
+
# Multi-valued treatment CBPS for categorical treatments
|
|
75
|
+
from .cbps_multitreat import cbps_3treat_fit, cbps_4treat_fit
|
|
76
|
+
|
|
77
|
+
# Optimal CBPS with dual balancing conditions
|
|
78
|
+
from .cbps_optimal import cbps_optimal_2treat
|
|
79
|
+
|
|
80
|
+
# Result classes and summary statistics
|
|
81
|
+
from .results import CBPSResults, CBPSSummary, j_test_pvalue
|
|
82
|
+
|
|
83
|
+
__all__ = [
|
|
84
|
+
'cbps_binary_fit',
|
|
85
|
+
'cbps_continuous_fit',
|
|
86
|
+
'cbps_3treat_fit',
|
|
87
|
+
'cbps_4treat_fit',
|
|
88
|
+
'cbps_optimal_2treat',
|
|
89
|
+
'CBPSResults',
|
|
90
|
+
'CBPSSummary',
|
|
91
|
+
'j_test_pvalue',
|
|
92
|
+
]
|
|
93
|
+
|