cbps 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. cbps-0.2.0/CHANGELOG.md +97 -0
  2. cbps-0.2.0/CITATION.cff +111 -0
  3. cbps-0.2.0/CODE_OF_CONDUCT.md +135 -0
  4. cbps-0.2.0/CONTRIBUTING.md +285 -0
  5. cbps-0.2.0/LICENSE +661 -0
  6. cbps-0.2.0/MANIFEST.in +58 -0
  7. cbps-0.2.0/PKG-INFO +1090 -0
  8. cbps-0.2.0/README.md +1021 -0
  9. cbps-0.2.0/SECURITY.md +32 -0
  10. cbps-0.2.0/cbps/__init__.py +3462 -0
  11. cbps-0.2.0/cbps/constants.py +46 -0
  12. cbps-0.2.0/cbps/core/__init__.py +93 -0
  13. cbps-0.2.0/cbps/core/cbps_binary.py +1943 -0
  14. cbps-0.2.0/cbps/core/cbps_continuous.py +945 -0
  15. cbps-0.2.0/cbps/core/cbps_multitreat.py +1123 -0
  16. cbps-0.2.0/cbps/core/cbps_optimal.py +507 -0
  17. cbps-0.2.0/cbps/core/results.py +1447 -0
  18. cbps-0.2.0/cbps/data/Blackwell.csv +571 -0
  19. cbps-0.2.0/cbps/data/LaLonde.csv +3213 -0
  20. cbps-0.2.0/cbps/data/npcbps_continuous_sim.csv +501 -0
  21. cbps-0.2.0/cbps/data/nsw.csv +723 -0
  22. cbps-0.2.0/cbps/data/nsw_dw.csv +446 -0
  23. cbps-0.2.0/cbps/data/political_ads_urban_niebler.csv +16266 -0
  24. cbps-0.2.0/cbps/data/psid_controls.csv +2491 -0
  25. cbps-0.2.0/cbps/data/psid_controls2.csv +254 -0
  26. cbps-0.2.0/cbps/data/psid_controls3.csv +129 -0
  27. cbps-0.2.0/cbps/data/simulation_dgp1_seed12345.csv +201 -0
  28. cbps-0.2.0/cbps/data/simulation_dgp2_seed12345.csv +201 -0
  29. cbps-0.2.0/cbps/data/simulation_dgp3_seed12345.csv +201 -0
  30. cbps-0.2.0/cbps/data/simulation_dgp4_seed12345.csv +201 -0
  31. cbps-0.2.0/cbps/datasets/__init__.py +78 -0
  32. cbps-0.2.0/cbps/datasets/blackwell.py +112 -0
  33. cbps-0.2.0/cbps/datasets/continuous.py +223 -0
  34. cbps-0.2.0/cbps/datasets/lalonde.py +272 -0
  35. cbps-0.2.0/cbps/datasets/npcbps_sim.py +101 -0
  36. cbps-0.2.0/cbps/diagnostics/__init__.py +101 -0
  37. cbps-0.2.0/cbps/diagnostics/balance.py +760 -0
  38. cbps-0.2.0/cbps/diagnostics/balance_cbmsm_addon.py +162 -0
  39. cbps-0.2.0/cbps/diagnostics/continuous_diagnostics.py +259 -0
  40. cbps-0.2.0/cbps/diagnostics/normality.py +173 -0
  41. cbps-0.2.0/cbps/diagnostics/ocbps_conditions.py +197 -0
  42. cbps-0.2.0/cbps/diagnostics/overlap.py +198 -0
  43. cbps-0.2.0/cbps/diagnostics/plots.py +1193 -0
  44. cbps-0.2.0/cbps/diagnostics/weights_diag.py +205 -0
  45. cbps-0.2.0/cbps/highdim/__init__.py +84 -0
  46. cbps-0.2.0/cbps/highdim/gmm_loss.py +340 -0
  47. cbps-0.2.0/cbps/highdim/hdcbps.py +1078 -0
  48. cbps-0.2.0/cbps/highdim/lasso_utils.py +498 -0
  49. cbps-0.2.0/cbps/highdim/weight_funcs.py +298 -0
  50. cbps-0.2.0/cbps/inference/__init__.py +42 -0
  51. cbps-0.2.0/cbps/inference/asyvar.py +621 -0
  52. cbps-0.2.0/cbps/inference/vcov_outcome.py +217 -0
  53. cbps-0.2.0/cbps/iv/__init__.py +48 -0
  54. cbps-0.2.0/cbps/iv/cbiv.py +2603 -0
  55. cbps-0.2.0/cbps/logging_config.py +45 -0
  56. cbps-0.2.0/cbps/msm/__init__.py +45 -0
  57. cbps-0.2.0/cbps/msm/cbmsm.py +1871 -0
  58. cbps-0.2.0/cbps/msm/rank_diagnostics.py +112 -0
  59. cbps-0.2.0/cbps/nonparametric/__init__.py +58 -0
  60. cbps-0.2.0/cbps/nonparametric/cholesky_whitening.py +232 -0
  61. cbps-0.2.0/cbps/nonparametric/empirical_likelihood.py +339 -0
  62. cbps-0.2.0/cbps/nonparametric/npcbps.py +1036 -0
  63. cbps-0.2.0/cbps/nonparametric/taylor_approx.py +207 -0
  64. cbps-0.2.0/cbps/py.typed +0 -0
  65. cbps-0.2.0/cbps/sklearn/__init__.py +42 -0
  66. cbps-0.2.0/cbps/sklearn/estimator.py +378 -0
  67. cbps-0.2.0/cbps/utils/__init__.py +82 -0
  68. cbps-0.2.0/cbps/utils/formula.py +415 -0
  69. cbps-0.2.0/cbps/utils/helpers.py +378 -0
  70. cbps-0.2.0/cbps/utils/numerics.py +438 -0
  71. cbps-0.2.0/cbps/utils/r_compat.py +109 -0
  72. cbps-0.2.0/cbps/utils/validation.py +224 -0
  73. cbps-0.2.0/cbps/utils/variance_transform.py +483 -0
  74. cbps-0.2.0/cbps/utils/weights.py +586 -0
  75. cbps-0.2.0/cbps.egg-info/PKG-INFO +1090 -0
  76. cbps-0.2.0/cbps.egg-info/SOURCES.txt +174 -0
  77. cbps-0.2.0/cbps.egg-info/dependency_links.txt +1 -0
  78. cbps-0.2.0/cbps.egg-info/requires.txt +39 -0
  79. cbps-0.2.0/cbps.egg-info/top_level.txt +1 -0
  80. cbps-0.2.0/docs/Makefile +21 -0
  81. cbps-0.2.0/docs/advanced_usage.rst +517 -0
  82. cbps-0.2.0/docs/api/config.rst +85 -0
  83. cbps-0.2.0/docs/api/core.rst +359 -0
  84. cbps-0.2.0/docs/api/datasets.rst +337 -0
  85. cbps-0.2.0/docs/api/diagnostics.rst +513 -0
  86. cbps-0.2.0/docs/api/highdim.rst +266 -0
  87. cbps-0.2.0/docs/api/index.rst +279 -0
  88. cbps-0.2.0/docs/api/inference.rst +309 -0
  89. cbps-0.2.0/docs/api/iv.rst +321 -0
  90. cbps-0.2.0/docs/api/msm.rst +347 -0
  91. cbps-0.2.0/docs/api/nonparametric.rst +229 -0
  92. cbps-0.2.0/docs/conf.py +184 -0
  93. cbps-0.2.0/docs/implementation_notes.rst +133 -0
  94. cbps-0.2.0/docs/index.rst +172 -0
  95. cbps-0.2.0/docs/installation.rst +189 -0
  96. cbps-0.2.0/docs/make.bat +36 -0
  97. cbps-0.2.0/docs/quickstart.rst +333 -0
  98. cbps-0.2.0/docs/references.rst +236 -0
  99. cbps-0.2.0/docs/theory.rst +228 -0
  100. cbps-0.2.0/docs/tutorials/index.rst +153 -0
  101. cbps-0.2.0/examples/README.md +212 -0
  102. cbps-0.2.0/examples/compare_with_r.py +126 -0
  103. cbps-0.2.0/examples/replicate_fong_hazlett_imai_2018.ipynb +586 -0
  104. cbps-0.2.0/examples/replicate_fong_hazlett_imai_2018.py +323 -0
  105. cbps-0.2.0/examples/replicate_imai_ratkovic_2014.ipynb +709 -0
  106. cbps-0.2.0/examples/replicate_imai_ratkovic_2014.py +530 -0
  107. cbps-0.2.0/examples/replicate_imai_ratkovic_2015.ipynb +406 -0
  108. cbps-0.2.0/examples/replicate_imai_ratkovic_2015.py +237 -0
  109. cbps-0.2.0/examples/run_replication.py +273 -0
  110. cbps-0.2.0/examples/test_table2_quick.py +34 -0
  111. cbps-0.2.0/examples/test_vmmin_vs_r.py +179 -0
  112. cbps-0.2.0/pyproject.toml +159 -0
  113. cbps-0.2.0/requirements.txt +9 -0
  114. cbps-0.2.0/setup.cfg +4 -0
  115. cbps-0.2.0/tests/__init__.py +63 -0
  116. cbps-0.2.0/tests/binary/__init__.py +19 -0
  117. cbps-0.2.0/tests/binary/test_att_gradient.py +254 -0
  118. cbps-0.2.0/tests/binary/test_edge_cases.py +664 -0
  119. cbps-0.2.0/tests/binary/test_edge_cases_p2.py +172 -0
  120. cbps-0.2.0/tests/binary/test_integration.py +1121 -0
  121. cbps-0.2.0/tests/binary/test_separation_detection.py +478 -0
  122. cbps-0.2.0/tests/binary/test_unit.py +8273 -0
  123. cbps-0.2.0/tests/conftest.py +482 -0
  124. cbps-0.2.0/tests/continuous/__init__.py +27 -0
  125. cbps-0.2.0/tests/continuous/test_continuous.py +682 -0
  126. cbps-0.2.0/tests/core/__init__.py +29 -0
  127. cbps-0.2.0/tests/core/test_core.py +1361 -0
  128. cbps-0.2.0/tests/datasets/__init__.py +27 -0
  129. cbps-0.2.0/tests/datasets/test_datasets.py +523 -0
  130. cbps-0.2.0/tests/diagnostics/__init__.py +28 -0
  131. cbps-0.2.0/tests/diagnostics/test_diagnostics.py +2708 -0
  132. cbps-0.2.0/tests/diagnostics/test_j_test_pvalue.py +114 -0
  133. cbps-0.2.0/tests/diagnostics/test_normality.py +188 -0
  134. cbps-0.2.0/tests/diagnostics/test_ocbps_conditions.py +174 -0
  135. cbps-0.2.0/tests/diagnostics/test_omnibus_balance.py +309 -0
  136. cbps-0.2.0/tests/diagnostics/test_overlap.py +197 -0
  137. cbps-0.2.0/tests/diagnostics/test_plots.py +445 -0
  138. cbps-0.2.0/tests/diagnostics/test_weight_diagnostics.py +173 -0
  139. cbps-0.2.0/tests/highdim/__init__.py +35 -0
  140. cbps-0.2.0/tests/highdim/test_hdcbps.py +4354 -0
  141. cbps-0.2.0/tests/inference/__init__.py +24 -0
  142. cbps-0.2.0/tests/inference/test_inference.py +2213 -0
  143. cbps-0.2.0/tests/integration/__init__.py +27 -0
  144. cbps-0.2.0/tests/integration/test_pipeline.py +677 -0
  145. cbps-0.2.0/tests/iv/__init__.py +28 -0
  146. cbps-0.2.0/tests/iv/test_cbiv.py +1235 -0
  147. cbps-0.2.0/tests/monte_carlo/__init__.py +94 -0
  148. cbps-0.2.0/tests/monte_carlo/conftest.py +2835 -0
  149. cbps-0.2.0/tests/monte_carlo/paper_constants.py +1321 -0
  150. cbps-0.2.0/tests/monte_carlo/test_fan2022.py +1528 -0
  151. cbps-0.2.0/tests/monte_carlo/test_fong2018.py +1661 -0
  152. cbps-0.2.0/tests/monte_carlo/test_imai2014.py +2171 -0
  153. cbps-0.2.0/tests/monte_carlo/test_ir2015.py +1382 -0
  154. cbps-0.2.0/tests/monte_carlo/test_ning2020.py +1680 -0
  155. cbps-0.2.0/tests/msm/__init__.py +31 -0
  156. cbps-0.2.0/tests/msm/test_cbmsm.py +822 -0
  157. cbps-0.2.0/tests/msm/test_rank_diagnostics.py +121 -0
  158. cbps-0.2.0/tests/multitreat/__init__.py +30 -0
  159. cbps-0.2.0/tests/multitreat/test_multitreat.py +350 -0
  160. cbps-0.2.0/tests/nonparametric/__init__.py +30 -0
  161. cbps-0.2.0/tests/nonparametric/test_npcbps.py +4715 -0
  162. cbps-0.2.0/tests/optimal/__init__.py +30 -0
  163. cbps-0.2.0/tests/optimal/test_ocbps.py +994 -0
  164. cbps-0.2.0/tests/sklearn/__init__.py +34 -0
  165. cbps-0.2.0/tests/sklearn/test_estimator.py +380 -0
  166. cbps-0.2.0/tests/test_api.py +883 -0
  167. cbps-0.2.0/tests/test_api_improvements.py +269 -0
  168. cbps-0.2.0/tests/test_bugfix_audit.py +226 -0
  169. cbps-0.2.0/tests/test_constants.py +125 -0
  170. cbps-0.2.0/tests/test_imports.py +724 -0
  171. cbps-0.2.0/tests/test_infrastructure.py +184 -0
  172. cbps-0.2.0/tests/test_ux_polish.py +1461 -0
  173. cbps-0.2.0/tests/utils/__init__.py +27 -0
  174. cbps-0.2.0/tests/utils/test_matrix_diagnostics.py +277 -0
  175. cbps-0.2.0/tests/utils/test_utils.py +3047 -0
  176. cbps-0.2.0/tests/utils/test_weight_normalizer.py +529 -0
@@ -0,0 +1,97 @@
1
+ # Changelog
2
+
3
+ All notable changes to the CBPS Python package will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [Unreleased]
9
+
10
+ ### Changed
11
+ - Minimum Python version raised to 3.10 (Python 3.9 reached end-of-life October 2025)
12
+ - Added Python 3.13 support
13
+ - `[all]` extra no longer includes `dev` and `docs` dependencies (user-facing only)
14
+ - Replaced invalid PyPI classifier `Topic :: Scientific/Engineering :: Statistics` with `Topic :: Scientific/Engineering :: Information Analysis`
15
+ - Removed unused `setuptools_scm` from build dependencies
16
+ - Separated `[test]` optional dependencies from `[dev]` for cleaner CI installs
17
+
18
+ ### Added
19
+ - Documentation build check in CI pipeline
20
+ - mypy hook in pre-commit configuration
21
+ - `numpydoc` added to docs dependencies for NumPy-style docstring support
22
+ - `pre-commit` added to dev dependencies
23
+ - Package build verification step in CI workflow
24
+ - Bandit security scanning in pre-commit hooks
25
+ - McCabe complexity checking in flake8 configuration (max-complexity=15)
26
+ - CodeQL security scanning workflow for automated vulnerability detection
27
+ - `Framework :: Pytest` PyPI classifier
28
+ - `sphinx-copybutton` extension for documentation code blocks
29
+ - `numpydoc` and `sphinx.ext.todo` Sphinx extensions for improved documentation rendering
30
+ - Version tag validation step in publish workflow
31
+ - `timeout-minutes` for all CI jobs to prevent hanging builds
32
+ - LaTeX packages in ReadTheDocs configuration for reliable PDF builds
33
+ - `statistics` and `count` options in flake8 configuration for summary output
34
+
35
+ ### Fixed
36
+ - Codecov action token configuration updated to recommended `with: token:` syntax
37
+ - Removed E501 from flake8 extend-ignore to properly enforce line length via black
38
+ - Fixed truncated `[tool.black]` include regex pattern in pyproject.toml
39
+ - Set `nbsphinx_allow_errors = False` in Sphinx configuration for production quality
40
+ - Removed redundant `Download` URL from pyproject.toml project URLs
41
+ - Updated copyright year range in documentation configuration
42
+
43
+ ## [0.1.0] - 2025-12-04
44
+
45
+ ### Added
46
+
47
+ #### Core Estimation Algorithms
48
+ - **CBPS** — Main function for binary/multi-valued/continuous treatments
49
+ - Binary treatments (0/1): ATT and ATE estimation with GMM and over-identified GMM
50
+ - Multi-valued treatments (3–4 levels): Multinomial logit propensity scores
51
+ - Continuous treatments: Generalized propensity score (GPS)
52
+ - Automatic treatment type detection for integer arrays (≤4 unique values)
53
+ - Formula interface with automatic intercept handling
54
+ - SVD preprocessing for numerical stability
55
+ - Over-identified GMM with J-statistic
56
+ - **Optimal CBPS (oCBPS)** — Doubly-robust estimation (Fan et al. 2022)
57
+ - **CBMSM** — Marginal structural models for longitudinal data
58
+ - **npCBPS** — Nonparametric CBPS using empirical likelihood
59
+ - **hdCBPS** — High-dimensional CBPS with LASSO variable selection
60
+ - **CBIV** — CBPS for instrumental variables
61
+
62
+ #### Inference Methods
63
+ - **AsyVar** — Sandwich variance estimator for CBPS coefficients
64
+ - **vcov_outcome** — Robust variance estimation for weighted outcome regression
65
+
66
+ #### Diagnostic and Visualization Tools
67
+ - **balance** — Covariate balance assessment (SMD for discrete, correlation for continuous)
68
+ - **summary** — Statistical summary with coefficient table and J-statistic
69
+ - **plot** — Balance plots, weight distribution plots for binary and continuous treatments
70
+
71
+ #### Data and Examples
72
+ - LaLonde NSW dataset, Blackwell dataset, continuous treatment simulation data
73
+ - 13 Python example scripts, 4 Jupyter notebook tutorials
74
+
75
+ #### scikit-learn Integration
76
+ - **CBPSEstimator** — sklearn-compatible wrapper with `fit()`, `predict_proba()`, `predict()`, `get_weights()`
77
+
78
+ #### Development Infrastructure
79
+ - Testing framework with pytest (400+ tests, >80% coverage)
80
+ - Code quality tools: black, isort, flake8, mypy
81
+ - CI/CD: GitHub Actions with multi-OS, multi-Python testing
82
+ - Documentation: Sphinx + ReadTheDocs with PDF/ePub output
83
+
84
+ ### Numerical Accuracy
85
+ - Core algorithms validated to ±1e-6 precision against R CBPS package v0.23
86
+ - Benchmark tests using LaLonde and Blackwell datasets
87
+
88
+ ### References
89
+
90
+ 1. Imai, K., & Ratkovic, M. (2014). Covariate balancing propensity score. *JRSS-B*, 76(1), 243–263.
91
+ 2. Fan, J., et al. (2022). Optimal covariate balancing conditions in propensity score estimation. *JBES*, 41(1), 97–110.
92
+ 3. Imai, K., & Ratkovic, M. (2015). Robust estimation of inverse probability weights for marginal structural models. *JASA*, 110(511), 1013–1023.
93
+ 4. Fong, C., Hazlett, C., & Imai, K. (2018). Covariate balancing propensity score for a continuous treatment. *AOAS*, 12(1), 156–177.
94
+ 5. Ning, Y., Peng, S., & Imai, K. (2020). Robust estimation of causal effects via a high-dimensional covariate balancing propensity score. *Biometrika*, 107(3), 533–554.
95
+
96
+ [Unreleased]: https://github.com/gorgeousfish/CBPS-py/compare/v0.1.0...HEAD
97
+ [0.1.0]: https://github.com/gorgeousfish/CBPS-py/releases/tag/v0.1.0
@@ -0,0 +1,111 @@
1
+ cff-version: 1.2.0
2
+ title: "cbps: Covariate Balancing Propensity Score for Python"
3
+ message: >-
4
+ If you use this software, please cite it using the metadata from this file,
5
+ as well as the relevant methodology paper(s) listed below.
6
+ type: software
7
+ authors:
8
+ - given-names: Xuanyu
9
+ family-names: Cai
10
+ email: xuanyuCAI@outlook.com
11
+ affiliation: City University of Macau
12
+ - given-names: Wenli
13
+ family-names: Xu
14
+ email: wlxu@cityu.edu.mo
15
+ affiliation: City University of Macau
16
+ repository-code: "https://github.com/gorgeousfish/CBPS-py"
17
+ url: "https://cbps.readthedocs.io"
18
+ license: AGPL-3.0
19
+ version: 0.1.0
20
+ date-released: "2026-02-16"
21
+ keywords:
22
+ - causal-inference
23
+ - propensity-score
24
+ - covariate-balancing
25
+ - treatment-effects
26
+ - observational-studies
27
+ - inverse-probability-weighting
28
+ - generalized-method-of-moments
29
+ references:
30
+ - type: article
31
+ title: "Covariate balancing propensity score"
32
+ authors:
33
+ - given-names: Kosuke
34
+ family-names: Imai
35
+ - given-names: Marc
36
+ family-names: Ratkovic
37
+ journal: "Journal of the Royal Statistical Society: Series B (Statistical Methodology)"
38
+ year: 2014
39
+ volume: 76
40
+ issue: 1
41
+ start: 243
42
+ end: 263
43
+ doi: "10.1111/rssb.12027"
44
+ - type: article
45
+ title: "Robust estimation of inverse probability weights for marginal structural models"
46
+ authors:
47
+ - given-names: Kosuke
48
+ family-names: Imai
49
+ - given-names: Marc
50
+ family-names: Ratkovic
51
+ journal: "Journal of the American Statistical Association"
52
+ year: 2015
53
+ volume: 110
54
+ issue: 511
55
+ start: 1013
56
+ end: 1023
57
+ doi: "10.1080/01621459.2014.956872"
58
+ - type: article
59
+ title: "Covariate balancing propensity score for a continuous treatment: Application to the efficacy of political advertisements"
60
+ authors:
61
+ - given-names: Christian
62
+ family-names: Fong
63
+ - given-names: Chad
64
+ family-names: Hazlett
65
+ - given-names: Kosuke
66
+ family-names: Imai
67
+ journal: "The Annals of Applied Statistics"
68
+ year: 2018
69
+ volume: 12
70
+ issue: 1
71
+ start: 156
72
+ end: 177
73
+ doi: "10.1214/17-AOAS1101"
74
+ - type: article
75
+ title: "Robust estimation of causal effects via a high-dimensional covariate balancing propensity score"
76
+ authors:
77
+ - given-names: Yang
78
+ family-names: Ning
79
+ - given-names: Sida
80
+ family-names: Peng
81
+ - given-names: Kosuke
82
+ family-names: Imai
83
+ journal: "Biometrika"
84
+ year: 2020
85
+ volume: 107
86
+ issue: 3
87
+ start: 533
88
+ end: 554
89
+ doi: "10.1093/biomet/asaa020"
90
+ - type: article
91
+ title: "Optimal covariate balancing conditions in propensity score estimation"
92
+ authors:
93
+ - given-names: Jianqing
94
+ family-names: Fan
95
+ - given-names: Kosuke
96
+ family-names: Imai
97
+ - given-names: Inbeom
98
+ family-names: Lee
99
+ - given-names: Han
100
+ family-names: Liu
101
+ - given-names: Yang
102
+ family-names: Ning
103
+ - given-names: Xiaolin
104
+ family-names: Yang
105
+ journal: "Journal of Business & Economic Statistics"
106
+ year: 2022
107
+ volume: 41
108
+ issue: 1
109
+ start: 97
110
+ end: 110
111
+ doi: "10.1080/07350015.2021.2002159"
@@ -0,0 +1,135 @@
1
+ # Contributor Covenant Code of Conduct
2
+
3
+ ## Our Pledge
4
+
5
+ We as members, contributors, and leaders pledge to make participation in our
6
+ community a harassment-free experience for everyone, regardless of age, body
7
+ size, visible or invisible disability, ethnicity, sex characteristics, gender
8
+ identity and expression, level of experience, education, socio-economic status,
9
+ nationality, personal appearance, race, caste, color, religion, or sexual
10
+ identity and orientation.
11
+
12
+ We pledge to act and interact in ways that contribute to an open, welcoming,
13
+ diverse, inclusive, and healthy community.
14
+
15
+ ## Our Standards
16
+
17
+ Examples of behavior that contributes to a positive environment for our
18
+ community include:
19
+
20
+ * Demonstrating empathy and kindness toward other people
21
+ * Being respectful of differing opinions, viewpoints, and experiences
22
+ * Giving and gracefully accepting constructive feedback
23
+ * Accepting responsibility and apologizing to those affected by our mistakes,
24
+ and learning from the experience
25
+ * Focusing on what is best not just for us as individuals, but for the overall
26
+ community
27
+
28
+ Examples of unacceptable behavior include:
29
+
30
+ * The use of sexualized language or imagery, and sexual attention or advances of
31
+ any kind
32
+ * Trolling, insulting or derogatory comments, and personal or political attacks
33
+ * Public or private harassment
34
+ * Publishing others' private information, such as a physical or email address,
35
+ without their explicit permission
36
+ * Other conduct which could reasonably be considered inappropriate in a
37
+ professional setting
38
+
39
+ ## Enforcement Responsibilities
40
+
41
+ Community leaders are responsible for clarifying and enforcing our standards of
42
+ acceptable behavior and will take appropriate and fair corrective action in
43
+ response to any behavior that they deem inappropriate, threatening, offensive,
44
+ or harmful.
45
+
46
+ Community leaders have the right and responsibility to remove, edit, or reject
47
+ comments, commits, code, wiki edits, issues, and other contributions that are
48
+ not aligned to this Code of Conduct, and will communicate reasons for moderation
49
+ decisions when appropriate.
50
+
51
+ ## Scope
52
+
53
+ This Code of Conduct applies within all community spaces, and also applies when
54
+ an individual is officially representing the community in public spaces.
55
+ Examples of representing our community include using an official email address,
56
+ posting via an official social media account, or acting as an appointed
57
+ representative at an online or offline event.
58
+
59
+ ## Enforcement
60
+
61
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be
62
+ reported to the community leaders responsible for enforcement at:
63
+
64
+ * **Cai Xuanyu**: xuanyuCAI@outlook.com
65
+ * **Xu Wenli**: wlxu@cityu.edu.mo
66
+
67
+ All complaints will be reviewed and investigated promptly and fairly.
68
+
69
+ All community leaders are obligated to respect the privacy and security of the
70
+ reporter of any incident.
71
+
72
+ ## Enforcement Guidelines
73
+
74
+ Community leaders will follow these Community Impact Guidelines in determining
75
+ the consequences for any action they deem in violation of this Code of Conduct:
76
+
77
+ ### 1. Correction
78
+
79
+ **Community Impact**: Use of inappropriate language or other behavior deemed
80
+ unprofessional or unwelcome in the community.
81
+
82
+ **Consequence**: A private, written warning from community leaders, providing
83
+ clarity around the nature of the violation and an explanation of why the
84
+ behavior was inappropriate. A public apology may be requested.
85
+
86
+ ### 2. Warning
87
+
88
+ **Community Impact**: A violation through a single incident or series of
89
+ actions.
90
+
91
+ **Consequence**: A warning with consequences for continued behavior. No
92
+ interaction with the people involved, including unsolicited interaction with
93
+ those enforcing the Code of Conduct, for a specified period of time. This
94
+ includes avoiding interactions in community spaces as well as external channels
95
+ like social media. Violating these terms may lead to a temporary or permanent
96
+ ban.
97
+
98
+ ### 3. Temporary Ban
99
+
100
+ **Community Impact**: A serious violation of community standards, including
101
+ sustained inappropriate behavior.
102
+
103
+ **Consequence**: A temporary ban from any sort of interaction or public
104
+ communication with the community for a specified period of time. No public or
105
+ private interaction with the people involved, including unsolicited interaction
106
+ with those enforcing the Code of Conduct, is allowed during this period.
107
+ Violating these terms may lead to a permanent ban.
108
+
109
+ ### 4. Permanent Ban
110
+
111
+ **Community Impact**: Demonstrating a pattern of violation of community
112
+ standards, including sustained inappropriate behavior, harassment of an
113
+ individual, or aggression toward or disparagement of classes of individuals.
114
+
115
+ **Consequence**: A permanent ban from any sort of public interaction within the
116
+ community.
117
+
118
+ ## Attribution
119
+
120
+ This Code of Conduct is adapted from the [Contributor Covenant][homepage],
121
+ version 2.1, available at
122
+ [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
123
+
124
+ Community Impact Guidelines were inspired by
125
+ [Mozilla's code of conduct enforcement ladder][Mozilla CoC].
126
+
127
+ For answers to common questions about this code of conduct, see the FAQ at
128
+ [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
129
+ [https://www.contributor-covenant.org/translations][translations].
130
+
131
+ [homepage]: https://www.contributor-covenant.org
132
+ [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
133
+ [Mozilla CoC]: https://github.com/mozilla/diversity
134
+ [FAQ]: https://www.contributor-covenant.org/faq
135
+ [translations]: https://www.contributor-covenant.org/translations
@@ -0,0 +1,285 @@
1
+ # Contributing to cbps
2
+
3
+ Thank you for your interest in contributing to the cbps Python package! We welcome contributions from everyone and are grateful for every contribution, whether it's a bug report, feature suggestion, documentation improvement, or code contribution.
4
+
5
+ ## Code of Conduct
6
+
7
+ By participating in this project, you agree to abide by our [Code of Conduct](CODE_OF_CONDUCT.md). Please read it before contributing.
8
+
9
+ ## First-Time Contributors
10
+
11
+ New to open source? We're happy to help you get started! Here are some ways to make your first contribution:
12
+
13
+ - **Fix a typo** in the documentation or docstrings
14
+ - **Improve an example** or add a new one
15
+ - **Report a bug** you've encountered
16
+ - **Add a test case** for an existing function
17
+
18
+ Look for issues labeled [`good first issue`](https://github.com/gorgeousfish/CBPS-py/labels/good%20first%20issue) for beginner-friendly tasks.
19
+
20
+ ## How to Contribute
21
+
22
+ ### Reporting Bugs
23
+
24
+ If you find a bug, please open an issue on our [GitHub Issues](https://github.com/gorgeousfish/CBPS-py/issues) page with:
25
+
26
+ 1. A clear, descriptive title
27
+ 2. A detailed description of the problem
28
+ 3. Steps to reproduce the issue
29
+ 4. Expected behavior vs. actual behavior
30
+ 5. Your environment (Python version, OS, package version)
31
+ 6. If possible, a minimal code example that reproduces the issue
32
+
33
+ ### Suggesting Features
34
+
35
+ We welcome feature suggestions! Please open an issue with:
36
+
37
+ 1. A clear description of the feature
38
+ 2. The motivation/use case for the feature
39
+ 3. Any relevant references (papers, other implementations, etc.)
40
+
41
+ ### Contributing Code
42
+
43
+ 1. **Open an issue first**: Before submitting a pull request with new features or significant changes, please open an issue to discuss your proposed changes. This helps avoid duplicate work and ensures your contribution aligns with the project's direction.
44
+
45
+ 2. **Fork the repository**: Create your own fork of the project.
46
+
47
+ 3. **Create a feature branch**: Use a descriptive branch name.
48
+ ```bash
49
+ git checkout -b feature/your-feature-name
50
+ ```
51
+
52
+ 4. **Make your changes**: Follow the code style guidelines below.
53
+
54
+ 5. **Write tests**: All new code should include appropriate tests.
55
+
56
+ 6. **Submit a pull request**: Include a clear description of the changes and reference any related issues.
57
+
58
+ ## Development Environment Setup
59
+
60
+ ### Prerequisites
61
+
62
+ - Python 3.10 or higher
63
+ - Git
64
+ - Virtual environment tool (venv, conda, etc.)
65
+
66
+ ### Installation
67
+
68
+ ```bash
69
+ # Clone the repository
70
+ git clone https://github.com/gorgeousfish/CBPS-py.git
71
+ cd CBPS-py
72
+
73
+ # Create and activate virtual environment
74
+ python -m venv venv
75
+ source venv/bin/activate # On Windows: venv\Scripts\activate
76
+
77
+ # Install in development mode with all dependencies
78
+ pip install -e ".[dev]"
79
+
80
+ # Install pre-commit hooks
81
+ pre-commit install
82
+ ```
83
+
84
+ ## Code Style Guidelines
85
+
86
+ ### Formatting
87
+
88
+ We use automated tools to ensure consistent code style:
89
+
90
+ - **Black** for code formatting (88 characters per line)
91
+ - **isort** for import sorting (with black profile)
92
+ - **flake8** for linting
93
+ - **mypy** for type checking
94
+
95
+ ```bash
96
+ # Format code
97
+ black cbps/
98
+ isort cbps/
99
+
100
+ # Check code style
101
+ black --check cbps/
102
+ isort --check cbps/
103
+ flake8 cbps/
104
+ mypy cbps/
105
+ ```
106
+
107
+ ### Type Annotations
108
+
109
+ All functions must have complete type annotations. Use `# type: ignore` sparingly and only when necessary (e.g., for third-party libraries without type stubs).
110
+
111
+ ### Documentation
112
+
113
+ - All public functions, classes, and methods must have docstrings
114
+ - Use NumPy-style docstrings
115
+ - Include type information in docstrings for complex types
116
+ - Add examples where appropriate
117
+
118
+ Example:
119
+
120
+ ```python
121
+ def compute_weights(
122
+ probs: np.ndarray,
123
+ treatment: np.ndarray,
124
+ estimand: str = "ATE"
125
+ ) -> np.ndarray:
126
+ """Compute inverse probability weights.
127
+
128
+ Parameters
129
+ ----------
130
+ probs : np.ndarray
131
+ Estimated propensity scores, shape (n,).
132
+ treatment : np.ndarray
133
+ Binary treatment indicator, shape (n,).
134
+ estimand : str, optional
135
+ Target estimand, either "ATE" or "ATT". Default is "ATE".
136
+
137
+ Returns
138
+ -------
139
+ np.ndarray
140
+ Computed weights, shape (n,).
141
+
142
+ Examples
143
+ --------
144
+ >>> probs = np.array([0.3, 0.7, 0.5])
145
+ >>> treatment = np.array([0, 1, 1])
146
+ >>> weights = compute_weights(probs, treatment, estimand="ATE")
147
+ """
148
+ ```
149
+
150
+ ## Numerical Precision Requirements
151
+
152
+ **Critical**: This package maintains high numerical precision (±1e-6) for all core algorithms. This is essential for reproducing results from the original R CBPS package.
153
+
154
+ ### Key Constraints
155
+
156
+ 1. **Float64 only**: All floating-point operations must use `numpy.float64`
157
+ ```python
158
+ X = np.array(data, dtype=np.float64)
159
+ ```
160
+
161
+ 2. **Generalized inverse**: Use `scipy.linalg.pinv(V, rcond=None)` for numerical stability
162
+ ```python
163
+ invV = scipy.linalg.pinv(V, rcond=None)
164
+ ```
165
+
166
+ 3. **GLM initialization**: Use `statsmodels.GLM` for propensity score estimation
167
+ ```python
168
+ glm_fit = sm.GLM(y, X, family=sm.families.Binomial()).fit(tol=1e-8, maxiter=25)
169
+ ```
170
+
171
+ 4. **Probability clipping**: Use `np.clip(probs, 1e-6, 1-1e-6)`
172
+ ```python
173
+ probs = np.clip(probs, 1e-6, 1-1e-6)
174
+ ```
175
+
176
+ 5. **Sample weights normalization**: First step in all modules
177
+ ```python
178
+ sw = sw / sw.mean() # Ensures sw.sum() = n
179
+ ```
180
+
181
+ ## Testing Guidelines
182
+
183
+ ### Running Tests
184
+
185
+ ```bash
186
+ # Run all tests
187
+ pytest
188
+
189
+ # Run with coverage
190
+ pytest --cov=cbps --cov-report=html
191
+
192
+ # Run specific test file
193
+ pytest tests/test_cbps.py
194
+
195
+ # Run tests with specific markers
196
+ pytest -m "not slow" # Skip slow tests
197
+ pytest -m "r_benchmark" # Only R benchmark tests
198
+ ```
199
+
200
+ ### Writing Tests
201
+
202
+ - All new code should have tests with coverage ≥90%
203
+ - Use `numpy.testing.assert_allclose(atol=1e-6, rtol=0)` for numerical comparisons
204
+ - Set random seeds for reproducibility: `np.random.seed(12345)`
205
+ - Use pytest markers appropriately:
206
+ - `@pytest.mark.slow` for tests taking >10 seconds
207
+ - `@pytest.mark.r_benchmark` for R comparison tests
208
+ - `@pytest.mark.integration` for end-to-end tests
209
+
210
+ Example test:
211
+
212
+ ```python
213
+ import pytest
214
+ import numpy as np
215
+ from numpy.testing import assert_allclose
216
+
217
+ def test_cbps_lalonde():
218
+ """Test CBPS on LaLonde data."""
219
+ from cbps.datasets import load_lalonde
220
+ import cbps
221
+
222
+ data = load_lalonde(dehejia_wahba_only=True)
223
+ fit = cbps.CBPS(formula="treat ~ age + educ", data=data, att=1)
224
+
225
+ # Verify convergence and basic properties
226
+ assert fit.converged
227
+ assert len(fit.coefficients) == 3 # intercept + 2 covariates
228
+
229
+ @pytest.mark.r_benchmark
230
+ def test_cbps_matches_r():
231
+ """Test that Python results match R CBPS package."""
232
+ # ... comparison with R results
233
+ assert_allclose(python_coef, r_coef, atol=1e-6)
234
+ ```
235
+
236
+ ## Pull Request Process
237
+
238
+ 1. **Ensure all checks pass**:
239
+ ```bash
240
+ black cbps/
241
+ isort cbps/
242
+ flake8 cbps/
243
+ mypy cbps/
244
+ pytest
245
+ ```
246
+
247
+ 2. **Update documentation** if you've changed APIs or added features.
248
+
249
+ 3. **Add changelog entry** in `CHANGELOG.md` under "Unreleased" section.
250
+
251
+ 4. **Commit with clear messages** following conventional commits:
252
+ ```bash
253
+ git commit -m "feat(cbps): add support for clustered standard errors"
254
+ git commit -m "fix(multitreat): correct weight normalization"
255
+ git commit -m "docs: update installation instructions"
256
+ ```
257
+
258
+ 5. **Push and create pull request**:
259
+ ```bash
260
+ git push origin feature/your-feature-name
261
+ ```
262
+
263
+ 6. **PR Requirements**:
264
+ - All CI checks pass (GitHub Actions)
265
+ - Code coverage does not decrease
266
+ - No new linter warnings
267
+ - Numerical tests pass (±1e-6 precision)
268
+ - Documentation updated if needed
269
+
270
+ ## Getting Help
271
+
272
+ - **Questions**: Open a [GitHub Discussion](https://github.com/gorgeousfish/CBPS-py/discussions) or Issue
273
+ - **Bug Reports**: Use [GitHub Issues](https://github.com/gorgeousfish/CBPS-py/issues)
274
+ - **Documentation**: See [https://cbps.readthedocs.io](https://cbps.readthedocs.io)
275
+
276
+ ## Maintainers
277
+
278
+ - **Cai Xuanyu** - xuanyuCAI@outlook.com
279
+ - **Xu Wenli** - wlxu@cityu.edu.mo
280
+
281
+ ## Attribution
282
+
283
+ Contributors will be acknowledged in the project's documentation. We use the all-contributors specification to recognize all types of contributions.
284
+
285
+ Thank you for contributing to cbps!