cbps 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. cbps/__init__.py +3462 -0
  2. cbps/constants.py +46 -0
  3. cbps/core/__init__.py +93 -0
  4. cbps/core/cbps_binary.py +1943 -0
  5. cbps/core/cbps_continuous.py +945 -0
  6. cbps/core/cbps_multitreat.py +1123 -0
  7. cbps/core/cbps_optimal.py +507 -0
  8. cbps/core/results.py +1447 -0
  9. cbps/data/Blackwell.csv +571 -0
  10. cbps/data/LaLonde.csv +3213 -0
  11. cbps/data/npcbps_continuous_sim.csv +501 -0
  12. cbps/data/nsw.csv +723 -0
  13. cbps/data/nsw_dw.csv +446 -0
  14. cbps/data/political_ads_urban_niebler.csv +16266 -0
  15. cbps/data/psid_controls.csv +2491 -0
  16. cbps/data/psid_controls2.csv +254 -0
  17. cbps/data/psid_controls3.csv +129 -0
  18. cbps/data/simulation_dgp1_seed12345.csv +201 -0
  19. cbps/data/simulation_dgp2_seed12345.csv +201 -0
  20. cbps/data/simulation_dgp3_seed12345.csv +201 -0
  21. cbps/data/simulation_dgp4_seed12345.csv +201 -0
  22. cbps/datasets/__init__.py +78 -0
  23. cbps/datasets/blackwell.py +112 -0
  24. cbps/datasets/continuous.py +223 -0
  25. cbps/datasets/lalonde.py +272 -0
  26. cbps/datasets/npcbps_sim.py +101 -0
  27. cbps/diagnostics/__init__.py +101 -0
  28. cbps/diagnostics/balance.py +760 -0
  29. cbps/diagnostics/balance_cbmsm_addon.py +162 -0
  30. cbps/diagnostics/continuous_diagnostics.py +259 -0
  31. cbps/diagnostics/normality.py +173 -0
  32. cbps/diagnostics/ocbps_conditions.py +197 -0
  33. cbps/diagnostics/overlap.py +198 -0
  34. cbps/diagnostics/plots.py +1193 -0
  35. cbps/diagnostics/weights_diag.py +205 -0
  36. cbps/highdim/__init__.py +84 -0
  37. cbps/highdim/gmm_loss.py +340 -0
  38. cbps/highdim/hdcbps.py +1078 -0
  39. cbps/highdim/lasso_utils.py +498 -0
  40. cbps/highdim/weight_funcs.py +298 -0
  41. cbps/inference/__init__.py +42 -0
  42. cbps/inference/asyvar.py +621 -0
  43. cbps/inference/vcov_outcome.py +217 -0
  44. cbps/iv/__init__.py +48 -0
  45. cbps/iv/cbiv.py +2603 -0
  46. cbps/logging_config.py +45 -0
  47. cbps/msm/__init__.py +45 -0
  48. cbps/msm/cbmsm.py +1871 -0
  49. cbps/msm/rank_diagnostics.py +112 -0
  50. cbps/nonparametric/__init__.py +58 -0
  51. cbps/nonparametric/cholesky_whitening.py +232 -0
  52. cbps/nonparametric/empirical_likelihood.py +339 -0
  53. cbps/nonparametric/npcbps.py +1036 -0
  54. cbps/nonparametric/taylor_approx.py +207 -0
  55. cbps/py.typed +0 -0
  56. cbps/sklearn/__init__.py +42 -0
  57. cbps/sklearn/estimator.py +378 -0
  58. cbps/utils/__init__.py +82 -0
  59. cbps/utils/formula.py +415 -0
  60. cbps/utils/helpers.py +378 -0
  61. cbps/utils/numerics.py +438 -0
  62. cbps/utils/r_compat.py +109 -0
  63. cbps/utils/validation.py +224 -0
  64. cbps/utils/variance_transform.py +483 -0
  65. cbps/utils/weights.py +586 -0
  66. cbps-0.2.0.dist-info/METADATA +1090 -0
  67. cbps-0.2.0.dist-info/RECORD +70 -0
  68. cbps-0.2.0.dist-info/WHEEL +5 -0
  69. cbps-0.2.0.dist-info/licenses/LICENSE +661 -0
  70. cbps-0.2.0.dist-info/top_level.txt +1 -0
cbps/constants.py ADDED
@@ -0,0 +1,46 @@
1
+ """Central numerical stability constants for CBPS package.
2
+
3
+ All defaults are aligned with R CBPS v0.23 where applicable.
4
+ """
5
+ from dataclasses import dataclass
6
+
7
+
8
+ @dataclass(frozen=True)
9
+ class NumericalConfig:
10
+ """Immutable numerical configuration for CBPS algorithms.
11
+
12
+ Parameters are grouped by function:
13
+ - Propensity score clipping
14
+ - Optimization tolerances
15
+ - Matrix computation thresholds
16
+
17
+ References
18
+ ----------
19
+ R CBPS v0.23: probs.min = 1e-6 (CBPSBinary.R line 4)
20
+ R optim(): ndeps default = 1e-3
21
+ """
22
+
23
+ # Propensity Score Clipping
24
+ probs_min: float = 1e-6 # P(T|X) lower bound [R: probs.min]
25
+ probs_trim_msm: float = 1e-4 # CBMSM probability trim threshold
26
+
27
+ # Column Detection
28
+ const_col_threshold: float = 1e-10 # Std below this = constant column
29
+
30
+ # Optimization
31
+ ndeps: float = 1e-3 # Finite difference step [R: optim default]
32
+ glm_tol: float = 1e-8 # GLM IRLS convergence tolerance
33
+ optim_xtol: float = 1e-12 # Parameter convergence tolerance
34
+
35
+ # SVD / Matrix
36
+ svd_threshold_msm: float = 1e-4 # MSM singular value cutoff
37
+ log_clip_range: float = 50.0 # |x|>50 → clip before exp(x)
38
+
39
+ @property
40
+ def probs_max(self) -> float:
41
+ """Upper bound for propensity score clipping: 1 - probs_min."""
42
+ return 1.0 - self.probs_min
43
+
44
+
45
+ # Global default instance
46
+ DEFAULT_CONFIG = NumericalConfig()
cbps/core/__init__.py ADDED
@@ -0,0 +1,93 @@
1
+ """
2
+ Core CBPS Algorithm Implementation
3
+
4
+ This module implements the fundamental covariate balancing propensity score (CBPS)
5
+ algorithms for various treatment modalities. The CBPS methodology extends
6
+ traditional propensity score estimation by directly incorporating covariate balance
7
+ conditions into the estimation framework through generalized method of moments
8
+ (GMM) optimization.
9
+
10
+ Algorithm Components
11
+ --------------------
12
+
13
+ The module provides implementations for the following treatment types:
14
+
15
+ * **Binary treatments** (`cbps_binary_fit`): Standard CBPS for estimating average
16
+ treatment effects (ATE) and average treatment effects on the treated (ATT)
17
+ using logistic regression models within the GMM framework
18
+
19
+ * **Continuous treatments** (`cbps_continuous_fit`): Generalized propensity score
20
+ (GPS) estimation for continuous treatment variables, extending the CBPS
21
+ methodology to parametric continuous treatment models
22
+
23
+ * **Multi-valued treatments** (`cbps_3treat_fit`, `cbps_4treat_fit`): Extension
24
+ to categorical treatments with three or four levels using multinomial logistic
25
+ regression models
26
+
27
+ * **Optimal CBPS** (`cbps_optimal_2treat`): Doubly robust estimation that
28
+ incorporates outcome model information to improve efficiency while maintaining
29
+ robustness to model misspecification
30
+
31
+ Statistical Framework
32
+ --------------------
33
+
34
+ For binary treatment assignment :math:`T \\in \\{0,1\\}` and covariates
35
+ :math:`X`, the CBPS estimator solves the following GMM optimization problem:
36
+
37
+ .. math::
38
+ \\hat{\\beta} = \\arg\\min_{\\beta} \\, \\frac{1}{2} g_n(\\beta)' W_n g_n(\\beta)
39
+
40
+ where the moment conditions :math:`g_n(\\beta)` combine:
41
+
42
+ 1. **Score condition**: :math:`E[T_i - e(X_i, \\beta)] = 0`
43
+ 2. **Balance conditions**: :math:`E[X_i(T_i - e(X_i, \\beta))] = 0`
44
+
45
+ The weight matrix :math:`W_n` is chosen optimally to achieve the
46
+ Hansen (1982) efficiency bound within the class of GMM estimators.
47
+
48
+ Computational Methods
49
+ ---------------------
50
+
51
+ The implementations employ numerical optimization algorithms suitable for
52
+ the non-convex objective functions that arise in CBPS estimation:
53
+
54
+ * Two-step GMM estimator for computational efficiency
55
+ * Continuous-updating GMM for improved finite-sample properties
56
+ * Newton-Raphson and BFGS algorithms with analytical gradients
57
+
58
+ References
59
+ ----------
60
+ .. [1] Imai, K. and Ratkovic, M. (2014). Covariate balancing propensity score.
61
+ Journal of the Royal Statistical Society, Series B 76(1), 243-263.
62
+ https://doi.org/10.1111/rssb.12027
63
+
64
+ .. [2] Hansen, L. P. (1982). Large sample properties of generalized method
65
+ of moments estimators. Econometrica 50(4), 1029-1054.
66
+ """
67
+
68
+ # Binary treatment CBPS implementation
69
+ from .cbps_binary import cbps_binary_fit
70
+
71
+ # Continuous treatment CBPS (Generalized Propensity Score)
72
+ from .cbps_continuous import cbps_continuous_fit
73
+
74
+ # Multi-valued treatment CBPS for categorical treatments
75
+ from .cbps_multitreat import cbps_3treat_fit, cbps_4treat_fit
76
+
77
+ # Optimal CBPS with dual balancing conditions
78
+ from .cbps_optimal import cbps_optimal_2treat
79
+
80
+ # Result classes and summary statistics
81
+ from .results import CBPSResults, CBPSSummary, j_test_pvalue
82
+
83
+ __all__ = [
84
+ 'cbps_binary_fit',
85
+ 'cbps_continuous_fit',
86
+ 'cbps_3treat_fit',
87
+ 'cbps_4treat_fit',
88
+ 'cbps_optimal_2treat',
89
+ 'CBPSResults',
90
+ 'CBPSSummary',
91
+ 'j_test_pvalue',
92
+ ]
93
+