cbps 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cbps-0.2.0/CHANGELOG.md +97 -0
- cbps-0.2.0/CITATION.cff +111 -0
- cbps-0.2.0/CODE_OF_CONDUCT.md +135 -0
- cbps-0.2.0/CONTRIBUTING.md +285 -0
- cbps-0.2.0/LICENSE +661 -0
- cbps-0.2.0/MANIFEST.in +58 -0
- cbps-0.2.0/PKG-INFO +1090 -0
- cbps-0.2.0/README.md +1021 -0
- cbps-0.2.0/SECURITY.md +32 -0
- cbps-0.2.0/cbps/__init__.py +3462 -0
- cbps-0.2.0/cbps/constants.py +46 -0
- cbps-0.2.0/cbps/core/__init__.py +93 -0
- cbps-0.2.0/cbps/core/cbps_binary.py +1943 -0
- cbps-0.2.0/cbps/core/cbps_continuous.py +945 -0
- cbps-0.2.0/cbps/core/cbps_multitreat.py +1123 -0
- cbps-0.2.0/cbps/core/cbps_optimal.py +507 -0
- cbps-0.2.0/cbps/core/results.py +1447 -0
- cbps-0.2.0/cbps/data/Blackwell.csv +571 -0
- cbps-0.2.0/cbps/data/LaLonde.csv +3213 -0
- cbps-0.2.0/cbps/data/npcbps_continuous_sim.csv +501 -0
- cbps-0.2.0/cbps/data/nsw.csv +723 -0
- cbps-0.2.0/cbps/data/nsw_dw.csv +446 -0
- cbps-0.2.0/cbps/data/political_ads_urban_niebler.csv +16266 -0
- cbps-0.2.0/cbps/data/psid_controls.csv +2491 -0
- cbps-0.2.0/cbps/data/psid_controls2.csv +254 -0
- cbps-0.2.0/cbps/data/psid_controls3.csv +129 -0
- cbps-0.2.0/cbps/data/simulation_dgp1_seed12345.csv +201 -0
- cbps-0.2.0/cbps/data/simulation_dgp2_seed12345.csv +201 -0
- cbps-0.2.0/cbps/data/simulation_dgp3_seed12345.csv +201 -0
- cbps-0.2.0/cbps/data/simulation_dgp4_seed12345.csv +201 -0
- cbps-0.2.0/cbps/datasets/__init__.py +78 -0
- cbps-0.2.0/cbps/datasets/blackwell.py +112 -0
- cbps-0.2.0/cbps/datasets/continuous.py +223 -0
- cbps-0.2.0/cbps/datasets/lalonde.py +272 -0
- cbps-0.2.0/cbps/datasets/npcbps_sim.py +101 -0
- cbps-0.2.0/cbps/diagnostics/__init__.py +101 -0
- cbps-0.2.0/cbps/diagnostics/balance.py +760 -0
- cbps-0.2.0/cbps/diagnostics/balance_cbmsm_addon.py +162 -0
- cbps-0.2.0/cbps/diagnostics/continuous_diagnostics.py +259 -0
- cbps-0.2.0/cbps/diagnostics/normality.py +173 -0
- cbps-0.2.0/cbps/diagnostics/ocbps_conditions.py +197 -0
- cbps-0.2.0/cbps/diagnostics/overlap.py +198 -0
- cbps-0.2.0/cbps/diagnostics/plots.py +1193 -0
- cbps-0.2.0/cbps/diagnostics/weights_diag.py +205 -0
- cbps-0.2.0/cbps/highdim/__init__.py +84 -0
- cbps-0.2.0/cbps/highdim/gmm_loss.py +340 -0
- cbps-0.2.0/cbps/highdim/hdcbps.py +1078 -0
- cbps-0.2.0/cbps/highdim/lasso_utils.py +498 -0
- cbps-0.2.0/cbps/highdim/weight_funcs.py +298 -0
- cbps-0.2.0/cbps/inference/__init__.py +42 -0
- cbps-0.2.0/cbps/inference/asyvar.py +621 -0
- cbps-0.2.0/cbps/inference/vcov_outcome.py +217 -0
- cbps-0.2.0/cbps/iv/__init__.py +48 -0
- cbps-0.2.0/cbps/iv/cbiv.py +2603 -0
- cbps-0.2.0/cbps/logging_config.py +45 -0
- cbps-0.2.0/cbps/msm/__init__.py +45 -0
- cbps-0.2.0/cbps/msm/cbmsm.py +1871 -0
- cbps-0.2.0/cbps/msm/rank_diagnostics.py +112 -0
- cbps-0.2.0/cbps/nonparametric/__init__.py +58 -0
- cbps-0.2.0/cbps/nonparametric/cholesky_whitening.py +232 -0
- cbps-0.2.0/cbps/nonparametric/empirical_likelihood.py +339 -0
- cbps-0.2.0/cbps/nonparametric/npcbps.py +1036 -0
- cbps-0.2.0/cbps/nonparametric/taylor_approx.py +207 -0
- cbps-0.2.0/cbps/py.typed +0 -0
- cbps-0.2.0/cbps/sklearn/__init__.py +42 -0
- cbps-0.2.0/cbps/sklearn/estimator.py +378 -0
- cbps-0.2.0/cbps/utils/__init__.py +82 -0
- cbps-0.2.0/cbps/utils/formula.py +415 -0
- cbps-0.2.0/cbps/utils/helpers.py +378 -0
- cbps-0.2.0/cbps/utils/numerics.py +438 -0
- cbps-0.2.0/cbps/utils/r_compat.py +109 -0
- cbps-0.2.0/cbps/utils/validation.py +224 -0
- cbps-0.2.0/cbps/utils/variance_transform.py +483 -0
- cbps-0.2.0/cbps/utils/weights.py +586 -0
- cbps-0.2.0/cbps.egg-info/PKG-INFO +1090 -0
- cbps-0.2.0/cbps.egg-info/SOURCES.txt +174 -0
- cbps-0.2.0/cbps.egg-info/dependency_links.txt +1 -0
- cbps-0.2.0/cbps.egg-info/requires.txt +39 -0
- cbps-0.2.0/cbps.egg-info/top_level.txt +1 -0
- cbps-0.2.0/docs/Makefile +21 -0
- cbps-0.2.0/docs/advanced_usage.rst +517 -0
- cbps-0.2.0/docs/api/config.rst +85 -0
- cbps-0.2.0/docs/api/core.rst +359 -0
- cbps-0.2.0/docs/api/datasets.rst +337 -0
- cbps-0.2.0/docs/api/diagnostics.rst +513 -0
- cbps-0.2.0/docs/api/highdim.rst +266 -0
- cbps-0.2.0/docs/api/index.rst +279 -0
- cbps-0.2.0/docs/api/inference.rst +309 -0
- cbps-0.2.0/docs/api/iv.rst +321 -0
- cbps-0.2.0/docs/api/msm.rst +347 -0
- cbps-0.2.0/docs/api/nonparametric.rst +229 -0
- cbps-0.2.0/docs/conf.py +184 -0
- cbps-0.2.0/docs/implementation_notes.rst +133 -0
- cbps-0.2.0/docs/index.rst +172 -0
- cbps-0.2.0/docs/installation.rst +189 -0
- cbps-0.2.0/docs/make.bat +36 -0
- cbps-0.2.0/docs/quickstart.rst +333 -0
- cbps-0.2.0/docs/references.rst +236 -0
- cbps-0.2.0/docs/theory.rst +228 -0
- cbps-0.2.0/docs/tutorials/index.rst +153 -0
- cbps-0.2.0/examples/README.md +212 -0
- cbps-0.2.0/examples/compare_with_r.py +126 -0
- cbps-0.2.0/examples/replicate_fong_hazlett_imai_2018.ipynb +586 -0
- cbps-0.2.0/examples/replicate_fong_hazlett_imai_2018.py +323 -0
- cbps-0.2.0/examples/replicate_imai_ratkovic_2014.ipynb +709 -0
- cbps-0.2.0/examples/replicate_imai_ratkovic_2014.py +530 -0
- cbps-0.2.0/examples/replicate_imai_ratkovic_2015.ipynb +406 -0
- cbps-0.2.0/examples/replicate_imai_ratkovic_2015.py +237 -0
- cbps-0.2.0/examples/run_replication.py +273 -0
- cbps-0.2.0/examples/test_table2_quick.py +34 -0
- cbps-0.2.0/examples/test_vmmin_vs_r.py +179 -0
- cbps-0.2.0/pyproject.toml +159 -0
- cbps-0.2.0/requirements.txt +9 -0
- cbps-0.2.0/setup.cfg +4 -0
- cbps-0.2.0/tests/__init__.py +63 -0
- cbps-0.2.0/tests/binary/__init__.py +19 -0
- cbps-0.2.0/tests/binary/test_att_gradient.py +254 -0
- cbps-0.2.0/tests/binary/test_edge_cases.py +664 -0
- cbps-0.2.0/tests/binary/test_edge_cases_p2.py +172 -0
- cbps-0.2.0/tests/binary/test_integration.py +1121 -0
- cbps-0.2.0/tests/binary/test_separation_detection.py +478 -0
- cbps-0.2.0/tests/binary/test_unit.py +8273 -0
- cbps-0.2.0/tests/conftest.py +482 -0
- cbps-0.2.0/tests/continuous/__init__.py +27 -0
- cbps-0.2.0/tests/continuous/test_continuous.py +682 -0
- cbps-0.2.0/tests/core/__init__.py +29 -0
- cbps-0.2.0/tests/core/test_core.py +1361 -0
- cbps-0.2.0/tests/datasets/__init__.py +27 -0
- cbps-0.2.0/tests/datasets/test_datasets.py +523 -0
- cbps-0.2.0/tests/diagnostics/__init__.py +28 -0
- cbps-0.2.0/tests/diagnostics/test_diagnostics.py +2708 -0
- cbps-0.2.0/tests/diagnostics/test_j_test_pvalue.py +114 -0
- cbps-0.2.0/tests/diagnostics/test_normality.py +188 -0
- cbps-0.2.0/tests/diagnostics/test_ocbps_conditions.py +174 -0
- cbps-0.2.0/tests/diagnostics/test_omnibus_balance.py +309 -0
- cbps-0.2.0/tests/diagnostics/test_overlap.py +197 -0
- cbps-0.2.0/tests/diagnostics/test_plots.py +445 -0
- cbps-0.2.0/tests/diagnostics/test_weight_diagnostics.py +173 -0
- cbps-0.2.0/tests/highdim/__init__.py +35 -0
- cbps-0.2.0/tests/highdim/test_hdcbps.py +4354 -0
- cbps-0.2.0/tests/inference/__init__.py +24 -0
- cbps-0.2.0/tests/inference/test_inference.py +2213 -0
- cbps-0.2.0/tests/integration/__init__.py +27 -0
- cbps-0.2.0/tests/integration/test_pipeline.py +677 -0
- cbps-0.2.0/tests/iv/__init__.py +28 -0
- cbps-0.2.0/tests/iv/test_cbiv.py +1235 -0
- cbps-0.2.0/tests/monte_carlo/__init__.py +94 -0
- cbps-0.2.0/tests/monte_carlo/conftest.py +2835 -0
- cbps-0.2.0/tests/monte_carlo/paper_constants.py +1321 -0
- cbps-0.2.0/tests/monte_carlo/test_fan2022.py +1528 -0
- cbps-0.2.0/tests/monte_carlo/test_fong2018.py +1661 -0
- cbps-0.2.0/tests/monte_carlo/test_imai2014.py +2171 -0
- cbps-0.2.0/tests/monte_carlo/test_ir2015.py +1382 -0
- cbps-0.2.0/tests/monte_carlo/test_ning2020.py +1680 -0
- cbps-0.2.0/tests/msm/__init__.py +31 -0
- cbps-0.2.0/tests/msm/test_cbmsm.py +822 -0
- cbps-0.2.0/tests/msm/test_rank_diagnostics.py +121 -0
- cbps-0.2.0/tests/multitreat/__init__.py +30 -0
- cbps-0.2.0/tests/multitreat/test_multitreat.py +350 -0
- cbps-0.2.0/tests/nonparametric/__init__.py +30 -0
- cbps-0.2.0/tests/nonparametric/test_npcbps.py +4715 -0
- cbps-0.2.0/tests/optimal/__init__.py +30 -0
- cbps-0.2.0/tests/optimal/test_ocbps.py +994 -0
- cbps-0.2.0/tests/sklearn/__init__.py +34 -0
- cbps-0.2.0/tests/sklearn/test_estimator.py +380 -0
- cbps-0.2.0/tests/test_api.py +883 -0
- cbps-0.2.0/tests/test_api_improvements.py +269 -0
- cbps-0.2.0/tests/test_bugfix_audit.py +226 -0
- cbps-0.2.0/tests/test_constants.py +125 -0
- cbps-0.2.0/tests/test_imports.py +724 -0
- cbps-0.2.0/tests/test_infrastructure.py +184 -0
- cbps-0.2.0/tests/test_ux_polish.py +1461 -0
- cbps-0.2.0/tests/utils/__init__.py +27 -0
- cbps-0.2.0/tests/utils/test_matrix_diagnostics.py +277 -0
- cbps-0.2.0/tests/utils/test_utils.py +3047 -0
- cbps-0.2.0/tests/utils/test_weight_normalizer.py +529 -0
cbps-0.2.0/CHANGELOG.md
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to the CBPS Python package will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
### Changed
|
|
11
|
+
- Minimum Python version raised to 3.10 (Python 3.9 reached end-of-life October 2025)
|
|
12
|
+
- Added Python 3.13 support
|
|
13
|
+
- `[all]` extra no longer includes `dev` and `docs` dependencies (user-facing only)
|
|
14
|
+
- Replaced invalid PyPI classifier `Topic :: Scientific/Engineering :: Statistics` with `Topic :: Scientific/Engineering :: Information Analysis`
|
|
15
|
+
- Removed unused `setuptools_scm` from build dependencies
|
|
16
|
+
- Separated `[test]` optional dependencies from `[dev]` for cleaner CI installs
|
|
17
|
+
|
|
18
|
+
### Added
|
|
19
|
+
- Documentation build check in CI pipeline
|
|
20
|
+
- mypy hook in pre-commit configuration
|
|
21
|
+
- `numpydoc` added to docs dependencies for NumPy-style docstring support
|
|
22
|
+
- `pre-commit` added to dev dependencies
|
|
23
|
+
- Package build verification step in CI workflow
|
|
24
|
+
- Bandit security scanning in pre-commit hooks
|
|
25
|
+
- McCabe complexity checking in flake8 configuration (max-complexity=15)
|
|
26
|
+
- CodeQL security scanning workflow for automated vulnerability detection
|
|
27
|
+
- `Framework :: Pytest` PyPI classifier
|
|
28
|
+
- `sphinx-copybutton` extension for documentation code blocks
|
|
29
|
+
- `numpydoc` and `sphinx.ext.todo` Sphinx extensions for improved documentation rendering
|
|
30
|
+
- Version tag validation step in publish workflow
|
|
31
|
+
- `timeout-minutes` for all CI jobs to prevent hanging builds
|
|
32
|
+
- LaTeX packages in ReadTheDocs configuration for reliable PDF builds
|
|
33
|
+
- `statistics` and `count` options in flake8 configuration for summary output
|
|
34
|
+
|
|
35
|
+
### Fixed
|
|
36
|
+
- Codecov action token configuration updated to recommended `with: token:` syntax
|
|
37
|
+
- Removed E501 from flake8 extend-ignore to properly enforce line length via black
|
|
38
|
+
- Fixed truncated `[tool.black]` include regex pattern in pyproject.toml
|
|
39
|
+
- Set `nbsphinx_allow_errors = False` in Sphinx configuration for production quality
|
|
40
|
+
- Removed redundant `Download` URL from pyproject.toml project URLs
|
|
41
|
+
- Updated copyright year range in documentation configuration
|
|
42
|
+
|
|
43
|
+
## [0.1.0] - 2025-12-04
|
|
44
|
+
|
|
45
|
+
### Added
|
|
46
|
+
|
|
47
|
+
#### Core Estimation Algorithms
|
|
48
|
+
- **CBPS** — Main function for binary/multi-valued/continuous treatments
|
|
49
|
+
- Binary treatments (0/1): ATT and ATE estimation with GMM and over-identified GMM
|
|
50
|
+
- Multi-valued treatments (3–4 levels): Multinomial logit propensity scores
|
|
51
|
+
- Continuous treatments: Generalized propensity score (GPS)
|
|
52
|
+
- Automatic treatment type detection for integer arrays (≤4 unique values)
|
|
53
|
+
- Formula interface with automatic intercept handling
|
|
54
|
+
- SVD preprocessing for numerical stability
|
|
55
|
+
- Over-identified GMM with J-statistic
|
|
56
|
+
- **Optimal CBPS (oCBPS)** — Doubly-robust estimation (Fan et al. 2022)
|
|
57
|
+
- **CBMSM** — Marginal structural models for longitudinal data
|
|
58
|
+
- **npCBPS** — Nonparametric CBPS using empirical likelihood
|
|
59
|
+
- **hdCBPS** — High-dimensional CBPS with LASSO variable selection
|
|
60
|
+
- **CBIV** — CBPS for instrumental variables
|
|
61
|
+
|
|
62
|
+
#### Inference Methods
|
|
63
|
+
- **AsyVar** — Sandwich variance estimator for CBPS coefficients
|
|
64
|
+
- **vcov_outcome** — Robust variance estimation for weighted outcome regression
|
|
65
|
+
|
|
66
|
+
#### Diagnostic and Visualization Tools
|
|
67
|
+
- **balance** — Covariate balance assessment (SMD for discrete, correlation for continuous)
|
|
68
|
+
- **summary** — Statistical summary with coefficient table and J-statistic
|
|
69
|
+
- **plot** — Balance plots, weight distribution plots for binary and continuous treatments
|
|
70
|
+
|
|
71
|
+
#### Data and Examples
|
|
72
|
+
- LaLonde NSW dataset, Blackwell dataset, continuous treatment simulation data
|
|
73
|
+
- 13 Python example scripts, 4 Jupyter notebook tutorials
|
|
74
|
+
|
|
75
|
+
#### scikit-learn Integration
|
|
76
|
+
- **CBPSEstimator** — sklearn-compatible wrapper with `fit()`, `predict_proba()`, `predict()`, `get_weights()`
|
|
77
|
+
|
|
78
|
+
#### Development Infrastructure
|
|
79
|
+
- Testing framework with pytest (400+ tests, >80% coverage)
|
|
80
|
+
- Code quality tools: black, isort, flake8, mypy
|
|
81
|
+
- CI/CD: GitHub Actions with multi-OS, multi-Python testing
|
|
82
|
+
- Documentation: Sphinx + ReadTheDocs with PDF/ePub output
|
|
83
|
+
|
|
84
|
+
### Numerical Accuracy
|
|
85
|
+
- Core algorithms validated to ±1e-6 precision against R CBPS package v0.23
|
|
86
|
+
- Benchmark tests using LaLonde and Blackwell datasets
|
|
87
|
+
|
|
88
|
+
### References
|
|
89
|
+
|
|
90
|
+
1. Imai, K., & Ratkovic, M. (2014). Covariate balancing propensity score. *JRSS-B*, 76(1), 243–263.
|
|
91
|
+
2. Fan, J., et al. (2022). Optimal covariate balancing conditions in propensity score estimation. *JBES*, 41(1), 97–110.
|
|
92
|
+
3. Imai, K., & Ratkovic, M. (2015). Robust estimation of inverse probability weights for marginal structural models. *JASA*, 110(511), 1013–1023.
|
|
93
|
+
4. Fong, C., Hazlett, C., & Imai, K. (2018). Covariate balancing propensity score for a continuous treatment. *AOAS*, 12(1), 156–177.
|
|
94
|
+
5. Ning, Y., Peng, S., & Imai, K. (2020). Robust estimation of causal effects via a high-dimensional covariate balancing propensity score. *Biometrika*, 107(3), 533–554.
|
|
95
|
+
|
|
96
|
+
[Unreleased]: https://github.com/gorgeousfish/CBPS-py/compare/v0.1.0...HEAD
|
|
97
|
+
[0.1.0]: https://github.com/gorgeousfish/CBPS-py/releases/tag/v0.1.0
|
cbps-0.2.0/CITATION.cff
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
cff-version: 1.2.0
|
|
2
|
+
title: "cbps: Covariate Balancing Propensity Score for Python"
|
|
3
|
+
message: >-
|
|
4
|
+
If you use this software, please cite it using the metadata from this file,
|
|
5
|
+
as well as the relevant methodology paper(s) listed below.
|
|
6
|
+
type: software
|
|
7
|
+
authors:
|
|
8
|
+
- given-names: Xuanyu
|
|
9
|
+
family-names: Cai
|
|
10
|
+
email: xuanyuCAI@outlook.com
|
|
11
|
+
affiliation: City University of Macau
|
|
12
|
+
- given-names: Wenli
|
|
13
|
+
family-names: Xu
|
|
14
|
+
email: wlxu@cityu.edu.mo
|
|
15
|
+
affiliation: City University of Macau
|
|
16
|
+
repository-code: "https://github.com/gorgeousfish/CBPS-py"
|
|
17
|
+
url: "https://cbps.readthedocs.io"
|
|
18
|
+
license: AGPL-3.0
|
|
19
|
+
version: 0.1.0
|
|
20
|
+
date-released: "2026-02-16"
|
|
21
|
+
keywords:
|
|
22
|
+
- causal-inference
|
|
23
|
+
- propensity-score
|
|
24
|
+
- covariate-balancing
|
|
25
|
+
- treatment-effects
|
|
26
|
+
- observational-studies
|
|
27
|
+
- inverse-probability-weighting
|
|
28
|
+
- generalized-method-of-moments
|
|
29
|
+
references:
|
|
30
|
+
- type: article
|
|
31
|
+
title: "Covariate balancing propensity score"
|
|
32
|
+
authors:
|
|
33
|
+
- given-names: Kosuke
|
|
34
|
+
family-names: Imai
|
|
35
|
+
- given-names: Marc
|
|
36
|
+
family-names: Ratkovic
|
|
37
|
+
journal: "Journal of the Royal Statistical Society: Series B (Statistical Methodology)"
|
|
38
|
+
year: 2014
|
|
39
|
+
volume: 76
|
|
40
|
+
issue: 1
|
|
41
|
+
start: 243
|
|
42
|
+
end: 263
|
|
43
|
+
doi: "10.1111/rssb.12027"
|
|
44
|
+
- type: article
|
|
45
|
+
title: "Robust estimation of inverse probability weights for marginal structural models"
|
|
46
|
+
authors:
|
|
47
|
+
- given-names: Kosuke
|
|
48
|
+
family-names: Imai
|
|
49
|
+
- given-names: Marc
|
|
50
|
+
family-names: Ratkovic
|
|
51
|
+
journal: "Journal of the American Statistical Association"
|
|
52
|
+
year: 2015
|
|
53
|
+
volume: 110
|
|
54
|
+
issue: 511
|
|
55
|
+
start: 1013
|
|
56
|
+
end: 1023
|
|
57
|
+
doi: "10.1080/01621459.2014.956872"
|
|
58
|
+
- type: article
|
|
59
|
+
title: "Covariate balancing propensity score for a continuous treatment: Application to the efficacy of political advertisements"
|
|
60
|
+
authors:
|
|
61
|
+
- given-names: Christian
|
|
62
|
+
family-names: Fong
|
|
63
|
+
- given-names: Chad
|
|
64
|
+
family-names: Hazlett
|
|
65
|
+
- given-names: Kosuke
|
|
66
|
+
family-names: Imai
|
|
67
|
+
journal: "The Annals of Applied Statistics"
|
|
68
|
+
year: 2018
|
|
69
|
+
volume: 12
|
|
70
|
+
issue: 1
|
|
71
|
+
start: 156
|
|
72
|
+
end: 177
|
|
73
|
+
doi: "10.1214/17-AOAS1101"
|
|
74
|
+
- type: article
|
|
75
|
+
title: "Robust estimation of causal effects via a high-dimensional covariate balancing propensity score"
|
|
76
|
+
authors:
|
|
77
|
+
- given-names: Yang
|
|
78
|
+
family-names: Ning
|
|
79
|
+
- given-names: Sida
|
|
80
|
+
family-names: Peng
|
|
81
|
+
- given-names: Kosuke
|
|
82
|
+
family-names: Imai
|
|
83
|
+
journal: "Biometrika"
|
|
84
|
+
year: 2020
|
|
85
|
+
volume: 107
|
|
86
|
+
issue: 3
|
|
87
|
+
start: 533
|
|
88
|
+
end: 554
|
|
89
|
+
doi: "10.1093/biomet/asaa020"
|
|
90
|
+
- type: article
|
|
91
|
+
title: "Optimal covariate balancing conditions in propensity score estimation"
|
|
92
|
+
authors:
|
|
93
|
+
- given-names: Jianqing
|
|
94
|
+
family-names: Fan
|
|
95
|
+
- given-names: Kosuke
|
|
96
|
+
family-names: Imai
|
|
97
|
+
- given-names: Inbeom
|
|
98
|
+
family-names: Lee
|
|
99
|
+
- given-names: Han
|
|
100
|
+
family-names: Liu
|
|
101
|
+
- given-names: Yang
|
|
102
|
+
family-names: Ning
|
|
103
|
+
- given-names: Xiaolin
|
|
104
|
+
family-names: Yang
|
|
105
|
+
journal: "Journal of Business & Economic Statistics"
|
|
106
|
+
year: 2022
|
|
107
|
+
volume: 41
|
|
108
|
+
issue: 1
|
|
109
|
+
start: 97
|
|
110
|
+
end: 110
|
|
111
|
+
doi: "10.1080/07350015.2021.2002159"
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
# Contributor Covenant Code of Conduct
|
|
2
|
+
|
|
3
|
+
## Our Pledge
|
|
4
|
+
|
|
5
|
+
We as members, contributors, and leaders pledge to make participation in our
|
|
6
|
+
community a harassment-free experience for everyone, regardless of age, body
|
|
7
|
+
size, visible or invisible disability, ethnicity, sex characteristics, gender
|
|
8
|
+
identity and expression, level of experience, education, socio-economic status,
|
|
9
|
+
nationality, personal appearance, race, caste, color, religion, or sexual
|
|
10
|
+
identity and orientation.
|
|
11
|
+
|
|
12
|
+
We pledge to act and interact in ways that contribute to an open, welcoming,
|
|
13
|
+
diverse, inclusive, and healthy community.
|
|
14
|
+
|
|
15
|
+
## Our Standards
|
|
16
|
+
|
|
17
|
+
Examples of behavior that contributes to a positive environment for our
|
|
18
|
+
community include:
|
|
19
|
+
|
|
20
|
+
* Demonstrating empathy and kindness toward other people
|
|
21
|
+
* Being respectful of differing opinions, viewpoints, and experiences
|
|
22
|
+
* Giving and gracefully accepting constructive feedback
|
|
23
|
+
* Accepting responsibility and apologizing to those affected by our mistakes,
|
|
24
|
+
and learning from the experience
|
|
25
|
+
* Focusing on what is best not just for us as individuals, but for the overall
|
|
26
|
+
community
|
|
27
|
+
|
|
28
|
+
Examples of unacceptable behavior include:
|
|
29
|
+
|
|
30
|
+
* The use of sexualized language or imagery, and sexual attention or advances of
|
|
31
|
+
any kind
|
|
32
|
+
* Trolling, insulting or derogatory comments, and personal or political attacks
|
|
33
|
+
* Public or private harassment
|
|
34
|
+
* Publishing others' private information, such as a physical or email address,
|
|
35
|
+
without their explicit permission
|
|
36
|
+
* Other conduct which could reasonably be considered inappropriate in a
|
|
37
|
+
professional setting
|
|
38
|
+
|
|
39
|
+
## Enforcement Responsibilities
|
|
40
|
+
|
|
41
|
+
Community leaders are responsible for clarifying and enforcing our standards of
|
|
42
|
+
acceptable behavior and will take appropriate and fair corrective action in
|
|
43
|
+
response to any behavior that they deem inappropriate, threatening, offensive,
|
|
44
|
+
or harmful.
|
|
45
|
+
|
|
46
|
+
Community leaders have the right and responsibility to remove, edit, or reject
|
|
47
|
+
comments, commits, code, wiki edits, issues, and other contributions that are
|
|
48
|
+
not aligned to this Code of Conduct, and will communicate reasons for moderation
|
|
49
|
+
decisions when appropriate.
|
|
50
|
+
|
|
51
|
+
## Scope
|
|
52
|
+
|
|
53
|
+
This Code of Conduct applies within all community spaces, and also applies when
|
|
54
|
+
an individual is officially representing the community in public spaces.
|
|
55
|
+
Examples of representing our community include using an official email address,
|
|
56
|
+
posting via an official social media account, or acting as an appointed
|
|
57
|
+
representative at an online or offline event.
|
|
58
|
+
|
|
59
|
+
## Enforcement
|
|
60
|
+
|
|
61
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
|
62
|
+
reported to the community leaders responsible for enforcement at:
|
|
63
|
+
|
|
64
|
+
* **Cai Xuanyu**: xuanyuCAI@outlook.com
|
|
65
|
+
* **Xu Wenli**: wlxu@cityu.edu.mo
|
|
66
|
+
|
|
67
|
+
All complaints will be reviewed and investigated promptly and fairly.
|
|
68
|
+
|
|
69
|
+
All community leaders are obligated to respect the privacy and security of the
|
|
70
|
+
reporter of any incident.
|
|
71
|
+
|
|
72
|
+
## Enforcement Guidelines
|
|
73
|
+
|
|
74
|
+
Community leaders will follow these Community Impact Guidelines in determining
|
|
75
|
+
the consequences for any action they deem in violation of this Code of Conduct:
|
|
76
|
+
|
|
77
|
+
### 1. Correction
|
|
78
|
+
|
|
79
|
+
**Community Impact**: Use of inappropriate language or other behavior deemed
|
|
80
|
+
unprofessional or unwelcome in the community.
|
|
81
|
+
|
|
82
|
+
**Consequence**: A private, written warning from community leaders, providing
|
|
83
|
+
clarity around the nature of the violation and an explanation of why the
|
|
84
|
+
behavior was inappropriate. A public apology may be requested.
|
|
85
|
+
|
|
86
|
+
### 2. Warning
|
|
87
|
+
|
|
88
|
+
**Community Impact**: A violation through a single incident or series of
|
|
89
|
+
actions.
|
|
90
|
+
|
|
91
|
+
**Consequence**: A warning with consequences for continued behavior. No
|
|
92
|
+
interaction with the people involved, including unsolicited interaction with
|
|
93
|
+
those enforcing the Code of Conduct, for a specified period of time. This
|
|
94
|
+
includes avoiding interactions in community spaces as well as external channels
|
|
95
|
+
like social media. Violating these terms may lead to a temporary or permanent
|
|
96
|
+
ban.
|
|
97
|
+
|
|
98
|
+
### 3. Temporary Ban
|
|
99
|
+
|
|
100
|
+
**Community Impact**: A serious violation of community standards, including
|
|
101
|
+
sustained inappropriate behavior.
|
|
102
|
+
|
|
103
|
+
**Consequence**: A temporary ban from any sort of interaction or public
|
|
104
|
+
communication with the community for a specified period of time. No public or
|
|
105
|
+
private interaction with the people involved, including unsolicited interaction
|
|
106
|
+
with those enforcing the Code of Conduct, is allowed during this period.
|
|
107
|
+
Violating these terms may lead to a permanent ban.
|
|
108
|
+
|
|
109
|
+
### 4. Permanent Ban
|
|
110
|
+
|
|
111
|
+
**Community Impact**: Demonstrating a pattern of violation of community
|
|
112
|
+
standards, including sustained inappropriate behavior, harassment of an
|
|
113
|
+
individual, or aggression toward or disparagement of classes of individuals.
|
|
114
|
+
|
|
115
|
+
**Consequence**: A permanent ban from any sort of public interaction within the
|
|
116
|
+
community.
|
|
117
|
+
|
|
118
|
+
## Attribution
|
|
119
|
+
|
|
120
|
+
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
|
|
121
|
+
version 2.1, available at
|
|
122
|
+
[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
|
|
123
|
+
|
|
124
|
+
Community Impact Guidelines were inspired by
|
|
125
|
+
[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
|
|
126
|
+
|
|
127
|
+
For answers to common questions about this code of conduct, see the FAQ at
|
|
128
|
+
[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
|
|
129
|
+
[https://www.contributor-covenant.org/translations][translations].
|
|
130
|
+
|
|
131
|
+
[homepage]: https://www.contributor-covenant.org
|
|
132
|
+
[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
|
|
133
|
+
[Mozilla CoC]: https://github.com/mozilla/diversity
|
|
134
|
+
[FAQ]: https://www.contributor-covenant.org/faq
|
|
135
|
+
[translations]: https://www.contributor-covenant.org/translations
|
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
# Contributing to cbps
|
|
2
|
+
|
|
3
|
+
Thank you for your interest in contributing to the cbps Python package! We welcome contributions from everyone and are grateful for every contribution, whether it's a bug report, feature suggestion, documentation improvement, or code contribution.
|
|
4
|
+
|
|
5
|
+
## Code of Conduct
|
|
6
|
+
|
|
7
|
+
By participating in this project, you agree to abide by our [Code of Conduct](CODE_OF_CONDUCT.md). Please read it before contributing.
|
|
8
|
+
|
|
9
|
+
## First-Time Contributors
|
|
10
|
+
|
|
11
|
+
New to open source? We're happy to help you get started! Here are some ways to make your first contribution:
|
|
12
|
+
|
|
13
|
+
- **Fix a typo** in the documentation or docstrings
|
|
14
|
+
- **Improve an example** or add a new one
|
|
15
|
+
- **Report a bug** you've encountered
|
|
16
|
+
- **Add a test case** for an existing function
|
|
17
|
+
|
|
18
|
+
Look for issues labeled [`good first issue`](https://github.com/gorgeousfish/CBPS-py/labels/good%20first%20issue) for beginner-friendly tasks.
|
|
19
|
+
|
|
20
|
+
## How to Contribute
|
|
21
|
+
|
|
22
|
+
### Reporting Bugs
|
|
23
|
+
|
|
24
|
+
If you find a bug, please open an issue on our [GitHub Issues](https://github.com/gorgeousfish/CBPS-py/issues) page with:
|
|
25
|
+
|
|
26
|
+
1. A clear, descriptive title
|
|
27
|
+
2. A detailed description of the problem
|
|
28
|
+
3. Steps to reproduce the issue
|
|
29
|
+
4. Expected behavior vs. actual behavior
|
|
30
|
+
5. Your environment (Python version, OS, package version)
|
|
31
|
+
6. If possible, a minimal code example that reproduces the issue
|
|
32
|
+
|
|
33
|
+
### Suggesting Features
|
|
34
|
+
|
|
35
|
+
We welcome feature suggestions! Please open an issue with:
|
|
36
|
+
|
|
37
|
+
1. A clear description of the feature
|
|
38
|
+
2. The motivation/use case for the feature
|
|
39
|
+
3. Any relevant references (papers, other implementations, etc.)
|
|
40
|
+
|
|
41
|
+
### Contributing Code
|
|
42
|
+
|
|
43
|
+
1. **Open an issue first**: Before submitting a pull request with new features or significant changes, please open an issue to discuss your proposed changes. This helps avoid duplicate work and ensures your contribution aligns with the project's direction.
|
|
44
|
+
|
|
45
|
+
2. **Fork the repository**: Create your own fork of the project.
|
|
46
|
+
|
|
47
|
+
3. **Create a feature branch**: Use a descriptive branch name.
|
|
48
|
+
```bash
|
|
49
|
+
git checkout -b feature/your-feature-name
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
4. **Make your changes**: Follow the code style guidelines below.
|
|
53
|
+
|
|
54
|
+
5. **Write tests**: All new code should include appropriate tests.
|
|
55
|
+
|
|
56
|
+
6. **Submit a pull request**: Include a clear description of the changes and reference any related issues.
|
|
57
|
+
|
|
58
|
+
## Development Environment Setup
|
|
59
|
+
|
|
60
|
+
### Prerequisites
|
|
61
|
+
|
|
62
|
+
- Python 3.10 or higher
|
|
63
|
+
- Git
|
|
64
|
+
- Virtual environment tool (venv, conda, etc.)
|
|
65
|
+
|
|
66
|
+
### Installation
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
# Clone the repository
|
|
70
|
+
git clone https://github.com/gorgeousfish/CBPS-py.git
|
|
71
|
+
cd CBPS-py
|
|
72
|
+
|
|
73
|
+
# Create and activate virtual environment
|
|
74
|
+
python -m venv venv
|
|
75
|
+
source venv/bin/activate # On Windows: venv\Scripts\activate
|
|
76
|
+
|
|
77
|
+
# Install in development mode with all dependencies
|
|
78
|
+
pip install -e ".[dev]"
|
|
79
|
+
|
|
80
|
+
# Install pre-commit hooks
|
|
81
|
+
pre-commit install
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## Code Style Guidelines
|
|
85
|
+
|
|
86
|
+
### Formatting
|
|
87
|
+
|
|
88
|
+
We use automated tools to ensure consistent code style:
|
|
89
|
+
|
|
90
|
+
- **Black** for code formatting (88 characters per line)
|
|
91
|
+
- **isort** for import sorting (with black profile)
|
|
92
|
+
- **flake8** for linting
|
|
93
|
+
- **mypy** for type checking
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
# Format code
|
|
97
|
+
black cbps/
|
|
98
|
+
isort cbps/
|
|
99
|
+
|
|
100
|
+
# Check code style
|
|
101
|
+
black --check cbps/
|
|
102
|
+
isort --check cbps/
|
|
103
|
+
flake8 cbps/
|
|
104
|
+
mypy cbps/
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### Type Annotations
|
|
108
|
+
|
|
109
|
+
All functions must have complete type annotations. Use `# type: ignore` sparingly and only when necessary (e.g., for third-party libraries without type stubs).
|
|
110
|
+
|
|
111
|
+
### Documentation
|
|
112
|
+
|
|
113
|
+
- All public functions, classes, and methods must have docstrings
|
|
114
|
+
- Use NumPy-style docstrings
|
|
115
|
+
- Include type information in docstrings for complex types
|
|
116
|
+
- Add examples where appropriate
|
|
117
|
+
|
|
118
|
+
Example:
|
|
119
|
+
|
|
120
|
+
```python
|
|
121
|
+
def compute_weights(
|
|
122
|
+
probs: np.ndarray,
|
|
123
|
+
treatment: np.ndarray,
|
|
124
|
+
estimand: str = "ATE"
|
|
125
|
+
) -> np.ndarray:
|
|
126
|
+
"""Compute inverse probability weights.
|
|
127
|
+
|
|
128
|
+
Parameters
|
|
129
|
+
----------
|
|
130
|
+
probs : np.ndarray
|
|
131
|
+
Estimated propensity scores, shape (n,).
|
|
132
|
+
treatment : np.ndarray
|
|
133
|
+
Binary treatment indicator, shape (n,).
|
|
134
|
+
estimand : str, optional
|
|
135
|
+
Target estimand, either "ATE" or "ATT". Default is "ATE".
|
|
136
|
+
|
|
137
|
+
Returns
|
|
138
|
+
-------
|
|
139
|
+
np.ndarray
|
|
140
|
+
Computed weights, shape (n,).
|
|
141
|
+
|
|
142
|
+
Examples
|
|
143
|
+
--------
|
|
144
|
+
>>> probs = np.array([0.3, 0.7, 0.5])
|
|
145
|
+
>>> treatment = np.array([0, 1, 1])
|
|
146
|
+
>>> weights = compute_weights(probs, treatment, estimand="ATE")
|
|
147
|
+
"""
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
## Numerical Precision Requirements
|
|
151
|
+
|
|
152
|
+
**Critical**: This package maintains high numerical precision (±1e-6) for all core algorithms. This is essential for reproducing results from the original R CBPS package.
|
|
153
|
+
|
|
154
|
+
### Key Constraints
|
|
155
|
+
|
|
156
|
+
1. **Float64 only**: All floating-point operations must use `numpy.float64`
|
|
157
|
+
```python
|
|
158
|
+
X = np.array(data, dtype=np.float64)
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
2. **Generalized inverse**: Use `scipy.linalg.pinv(V, rcond=None)` for numerical stability
|
|
162
|
+
```python
|
|
163
|
+
invV = scipy.linalg.pinv(V, rcond=None)
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
3. **GLM initialization**: Use `statsmodels.GLM` for propensity score estimation
|
|
167
|
+
```python
|
|
168
|
+
glm_fit = sm.GLM(y, X, family=sm.families.Binomial()).fit(tol=1e-8, maxiter=25)
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
4. **Probability clipping**: Use `np.clip(probs, 1e-6, 1-1e-6)`
|
|
172
|
+
```python
|
|
173
|
+
probs = np.clip(probs, 1e-6, 1-1e-6)
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
5. **Sample weights normalization**: First step in all modules
|
|
177
|
+
```python
|
|
178
|
+
sw = sw / sw.mean() # Ensures sw.sum() = n
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
## Testing Guidelines
|
|
182
|
+
|
|
183
|
+
### Running Tests
|
|
184
|
+
|
|
185
|
+
```bash
|
|
186
|
+
# Run all tests
|
|
187
|
+
pytest
|
|
188
|
+
|
|
189
|
+
# Run with coverage
|
|
190
|
+
pytest --cov=cbps --cov-report=html
|
|
191
|
+
|
|
192
|
+
# Run specific test file
|
|
193
|
+
pytest tests/test_cbps.py
|
|
194
|
+
|
|
195
|
+
# Run tests with specific markers
|
|
196
|
+
pytest -m "not slow" # Skip slow tests
|
|
197
|
+
pytest -m "r_benchmark" # Only R benchmark tests
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
### Writing Tests
|
|
201
|
+
|
|
202
|
+
- All new code should have tests with coverage ≥90%
|
|
203
|
+
- Use `numpy.testing.assert_allclose(atol=1e-6, rtol=0)` for numerical comparisons
|
|
204
|
+
- Set random seeds for reproducibility: `np.random.seed(12345)`
|
|
205
|
+
- Use pytest markers appropriately:
|
|
206
|
+
- `@pytest.mark.slow` for tests taking >10 seconds
|
|
207
|
+
- `@pytest.mark.r_benchmark` for R comparison tests
|
|
208
|
+
- `@pytest.mark.integration` for end-to-end tests
|
|
209
|
+
|
|
210
|
+
Example test:
|
|
211
|
+
|
|
212
|
+
```python
|
|
213
|
+
import pytest
|
|
214
|
+
import numpy as np
|
|
215
|
+
from numpy.testing import assert_allclose
|
|
216
|
+
|
|
217
|
+
def test_cbps_lalonde():
|
|
218
|
+
"""Test CBPS on LaLonde data."""
|
|
219
|
+
from cbps.datasets import load_lalonde
|
|
220
|
+
import cbps
|
|
221
|
+
|
|
222
|
+
data = load_lalonde(dehejia_wahba_only=True)
|
|
223
|
+
fit = cbps.CBPS(formula="treat ~ age + educ", data=data, att=1)
|
|
224
|
+
|
|
225
|
+
# Verify convergence and basic properties
|
|
226
|
+
assert fit.converged
|
|
227
|
+
assert len(fit.coefficients) == 3 # intercept + 2 covariates
|
|
228
|
+
|
|
229
|
+
@pytest.mark.r_benchmark
|
|
230
|
+
def test_cbps_matches_r():
|
|
231
|
+
"""Test that Python results match R CBPS package."""
|
|
232
|
+
# ... comparison with R results
|
|
233
|
+
assert_allclose(python_coef, r_coef, atol=1e-6)
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
## Pull Request Process
|
|
237
|
+
|
|
238
|
+
1. **Ensure all checks pass**:
|
|
239
|
+
```bash
|
|
240
|
+
black cbps/
|
|
241
|
+
isort cbps/
|
|
242
|
+
flake8 cbps/
|
|
243
|
+
mypy cbps/
|
|
244
|
+
pytest
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
2. **Update documentation** if you've changed APIs or added features.
|
|
248
|
+
|
|
249
|
+
3. **Add changelog entry** in `CHANGELOG.md` under "Unreleased" section.
|
|
250
|
+
|
|
251
|
+
4. **Commit with clear messages** following conventional commits:
|
|
252
|
+
```bash
|
|
253
|
+
git commit -m "feat(cbps): add support for clustered standard errors"
|
|
254
|
+
git commit -m "fix(multitreat): correct weight normalization"
|
|
255
|
+
git commit -m "docs: update installation instructions"
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
5. **Push and create pull request**:
|
|
259
|
+
```bash
|
|
260
|
+
git push origin feature/your-feature-name
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
6. **PR Requirements**:
|
|
264
|
+
- All CI checks pass (GitHub Actions)
|
|
265
|
+
- Code coverage does not decrease
|
|
266
|
+
- No new linter warnings
|
|
267
|
+
- Numerical tests pass (±1e-6 precision)
|
|
268
|
+
- Documentation updated if needed
|
|
269
|
+
|
|
270
|
+
## Getting Help
|
|
271
|
+
|
|
272
|
+
- **Questions**: Open a [GitHub Discussion](https://github.com/gorgeousfish/CBPS-py/discussions) or Issue
|
|
273
|
+
- **Bug Reports**: Use [GitHub Issues](https://github.com/gorgeousfish/CBPS-py/issues)
|
|
274
|
+
- **Documentation**: See [https://cbps.readthedocs.io](https://cbps.readthedocs.io)
|
|
275
|
+
|
|
276
|
+
## Maintainers
|
|
277
|
+
|
|
278
|
+
- **Cai Xuanyu** - xuanyuCAI@outlook.com
|
|
279
|
+
- **Xu Wenli** - wlxu@cityu.edu.mo
|
|
280
|
+
|
|
281
|
+
## Attribution
|
|
282
|
+
|
|
283
|
+
Contributors will be acknowledged in the project's documentation. We use the all-contributors specification to recognize all types of contributions.
|
|
284
|
+
|
|
285
|
+
Thank you for contributing to cbps!
|