pycausal-inference-joshlim 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Josh Lim
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,80 @@
1
+ Metadata-Version: 2.4
2
+ Name: pycausal-inference-joshlim
3
+ Version: 0.1.0
4
+ Summary: A Python package for causal inference methods including ATE estimation, propensity score methods, and meta-learners
5
+ Author-email: Josh Lim <j.lim703@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/jhl126/pycausal-inference-joshlim
8
+ Project-URL: Documentation, https://github.com/jhl126/pycausal-inference-joshlim#readme
9
+ Project-URL: Repository, https://github.com/jhl126/pycausal-inference-joshlim
10
+ Project-URL: Bug Tracker, https://github.com/jhl126/pycausal-inference-joshlim/issues
11
+ Keywords: causal inference,statistics,machine learning,treatment effects
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.8
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Topic :: Scientific/Engineering :: Mathematics
21
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
22
+ Requires-Python: >=3.8
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: pandas>=1.3.0
26
+ Requires-Dist: numpy>=1.21.0
27
+ Requires-Dist: scipy>=1.7.0
28
+ Requires-Dist: scikit-learn>=1.0.0
29
+ Requires-Dist: lightgbm>=3.3.0
30
+ Requires-Dist: patsy>=0.5.0
31
+ Provides-Extra: dev
32
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
33
+ Requires-Dist: pytest-cov>=3.0.0; extra == "dev"
34
+ Requires-Dist: black>=22.0.0; extra == "dev"
35
+ Requires-Dist: pylint>=2.12.0; extra == "dev"
36
+ Requires-Dist: mypy>=0.950; extra == "dev"
37
+ Dynamic: license-file
38
+
39
+ [![Tests](https://github.com/jhl126/pycausal-inference-joshlim/workflows/Tests/badge.svg)](https://github.com/jhl126/pycausal-inference-joshlim/actions)
40
+
41
+ # Causal Inference Python Package - Josh Lim
42
+
43
+ This package provides key causal inference methods. These methods include ATE estimation from randomized experiments, propensity score methods, and meta-learners.
44
+
45
+ ## Installation
46
+
47
+ ```bash
48
+ # Clone the repository
49
+ git clone https://github.com/jhl126/pycausal-inference-joshlim.git
50
+ cd pycausal-inference-joshlim
51
+
52
+ # Install in editable mode
53
+ uv pip install -e .
54
+ ```
55
+
56
+ ## Usage
57
+
58
+ Import functions with the following code:
59
+
60
+ ```python
61
+ from pycausal_inference_joshlim import calculate_ate_ci, calculate_ate_pvalue
62
+ from pycausal_inference_joshlim import ipw, doubly_robust
63
+ from pycausal_inference_joshlim import s_learner_discrete, t_learner_discrete, x_learner_discrete, double_ml_cate
64
+ ```
65
+
66
+ ## API Documentation
67
+
68
+ ### RCT Module
69
+ - `calculate_ate_ci(data)` - Calculates the average treatment effect (ATE) and confidence interval from randomized experiment data
70
+ - `calculate_ate_pvalue(data)` - Calculates the p-value for the ATE estimate
71
+
72
+ ### Propensity Score Module
73
+ - `ipw(data)` - Estimates the ATE using inverse probability weighting
74
+ - `doubly_robust(data)` - Estimates the ATE using the doubly robust estimator
75
+
76
+ ### Meta-Learners Module
77
+ - `s_learner_discrete(data)` - Estimates heterogeneous treatment effects using the S-Learner approach
78
+ - `t_learner_discrete(data)` - Estimates heterogeneous treatment effects using the T-Learner approach
79
+ - `x_learner_discrete(data)` - Estimates heterogeneous treatment effects using the X-Learner approach
80
+ - `double_ml_cate(data)` - Estimates heterogeneous treatment effects using Double ML
@@ -0,0 +1,42 @@
1
+ [![Tests](https://github.com/jhl126/pycausal-inference-joshlim/workflows/Tests/badge.svg)](https://github.com/jhl126/pycausal-inference-joshlim/actions)
2
+
3
+ # Causal Inference Python Package - Josh Lim
4
+
5
+ This package provides key causal inference methods. These methods include ATE estimation from randomized experiments, propensity score methods, and meta-learners.
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ # Clone the repository
11
+ git clone https://github.com/jhl126/pycausal-inference-joshlim.git
12
+ cd pycausal-inference-joshlim
13
+
14
+ # Install in editable mode
15
+ uv pip install -e .
16
+ ```
17
+
18
+ ## Usage
19
+
20
+ Import functions with the following code:
21
+
22
+ ```python
23
+ from pycausal_inference_joshlim import calculate_ate_ci, calculate_ate_pvalue
24
+ from pycausal_inference_joshlim import ipw, doubly_robust
25
+ from pycausal_inference_joshlim import s_learner_discrete, t_learner_discrete, x_learner_discrete, double_ml_cate
26
+ ```
27
+
28
+ ## API Documentation
29
+
30
+ ### RCT Module
31
+ - `calculate_ate_ci(data)` - Calculates the average treatment effect (ATE) and confidence interval from randomized experiment data
32
+ - `calculate_ate_pvalue(data)` - Calculates the p-value for the ATE estimate
33
+
34
+ ### Propensity Score Module
35
+ - `ipw(data)` - Estimates the ATE using inverse probability weighting
36
+ - `doubly_robust(data)` - Estimates the ATE using the doubly robust estimator
37
+
38
+ ### Meta-Learners Module
39
+ - `s_learner_discrete(data)` - Estimates heterogeneous treatment effects using the S-Learner approach
40
+ - `t_learner_discrete(data)` - Estimates heterogeneous treatment effects using the T-Learner approach
41
+ - `x_learner_discrete(data)` - Estimates heterogeneous treatment effects using the X-Learner approach
42
+ - `double_ml_cate(data)` - Estimates heterogeneous treatment effects using Double ML
@@ -0,0 +1,24 @@
1
+ """pycausal_inference_joshlim - Causal Inference Toolkit"""
2
+
3
+ __version__ = "0.1.0"
4
+
5
+ # Import key functions for easy access
6
+ from .rct import calculate_ate_ci, calculate_ate_pvalue
7
+ from .propensity import ipw, doubly_robust
8
+ from .meta_learners import (
9
+ s_learner_discrete,
10
+ t_learner_discrete,
11
+ x_learner_discrete,
12
+ double_ml_cate
13
+ )
14
+
15
+ __all__ = [
16
+ "calculate_ate_ci",
17
+ "calculate_ate_pvalue",
18
+ "ipw",
19
+ "doubly_robust",
20
+ "s_learner_discrete",
21
+ "t_learner_discrete",
22
+ "x_learner_discrete",
23
+ "double_ml_cate",
24
+ ]
@@ -0,0 +1,163 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from lightgbm import LGBMRegressor
4
+ from sklearn.linear_model import LogisticRegression
5
+
6
+
7
+ def simple_data(self):
8
+ """Generate simple data with known treatment effect"""
9
+ np.random.seed(42)
10
+ n = 1000
11
+
12
+ # Covariates
13
+ x1 = np.random.normal(0, 1, n)
14
+ x2 = np.random.normal(0, 1, n)
15
+
16
+ # Treatment assignment (confounded)
17
+ prob_t = 1 / (1 + np.exp(-(0.5 * x1 + 0.3 * x2)))
18
+ t = np.random.binomial(1, prob_t, n)
19
+
20
+ # Outcome with constant treatment effect = 2.0
21
+ y = 2.0 * t + x1 + 0.5 * x2 + np.random.normal(0, 0.5, n)
22
+
23
+ df = pd.DataFrame({'x1': x1, 'x2': x2, 't': t, 'y': y})
24
+
25
+ # Split into train/test
26
+ train = df.iloc[:800].copy()
27
+ test = df.iloc[800:].copy()
28
+
29
+ return train, test
30
+
31
+ def heterogeneous_data(self):
32
+ """Generate data with heterogeneous treatment effect"""
33
+ np.random.seed(123)
34
+ n = 1500
35
+
36
+ # Covariates
37
+ x1 = np.random.normal(0, 1, n)
38
+ x2 = np.random.normal(0, 1, n)
39
+
40
+ # Treatment assignment
41
+ prob_t = 1 / (1 + np.exp(-(0.4 * x1)))
42
+ t = np.random.binomial(1, prob_t, n)
43
+
44
+ # Outcome with heterogeneous effect: effect depends on x1
45
+ # CATE(x1) = 1 + 0.5*x1
46
+ te = 1.0 + 0.5 * x1
47
+ y = te * t + x1 + 0.3 * x2 + np.random.normal(0, 0.5, n)
48
+
49
+ df = pd.DataFrame({'x1': x1, 'x2': x2, 't': t, 'y': y})
50
+
51
+ train = df.iloc[:1200].copy()
52
+ test = df.iloc[1200:].copy()
53
+
54
+ return train, test
55
+
56
+ def continuous_treatment_data(self):
57
+ """Generate data with continuous treatment"""
58
+ np.random.seed(789)
59
+ n = 1000
60
+
61
+ # Covariates
62
+ x1 = np.random.normal(0, 1, n)
63
+ x2 = np.random.normal(0, 1, n)
64
+
65
+ # Continuous treatment
66
+ t = 10 + x1 + 2*x2 + np.random.normal(0, 1, n)
67
+
68
+ # Outcome: linear effect of treatment
69
+ y = t + x1 + 0.5*x2 + np.random.normal(0, 0.5, n)
70
+
71
+ df = pd.DataFrame({'x1': x1, 'x2': x2, 't': t, 'y': y})
72
+
73
+ train = df.iloc[:800].copy()
74
+ test = df.iloc[800:].copy()
75
+
76
+ return train, test
77
+
78
+ def s_learner_discrete(train, test, X, T, y) -> pd.DataFrame:
79
+ model = LGBMRegressor()
80
+ model.fit(train[X + [T]], train[y])
81
+
82
+ t0 = test.copy()
83
+ t1 = test.copy()
84
+
85
+ t0[T] = 0
86
+ t1[T] = 1
87
+
88
+ cate = model.predict(t1[X + [T]]) - model.predict(t0[X + [T]])
89
+
90
+ output = test.copy()
91
+ output['cate'] = cate
92
+
93
+ return output
94
+
95
+ def t_learner_discrete(train, test, X, T, y) -> pd.DataFrame:
96
+ t0 = train.loc[train[T] == 0]
97
+ t1 = train.loc[train[T] == 1]
98
+
99
+ model0 = LGBMRegressor()
100
+ model1 = LGBMRegressor()
101
+
102
+ model0.fit(t0[X], t0[y])
103
+ model1.fit(t1[X], t1[y])
104
+
105
+ cate = model1.predict(test[X]) - model0.predict(test[X])
106
+
107
+ output = test.copy()
108
+ output['cate'] = cate
109
+
110
+ return output
111
+
112
+ def x_learner_discrete(train, test, X, T, y) -> pd.DataFrame:
113
+ t0 = train.loc[train[T] == 0]
114
+ t1 = train.loc[train[T] == 1]
115
+
116
+ model0 = LGBMRegressor()
117
+ model1 = LGBMRegressor()
118
+
119
+ model0.fit(t0[X], t0[y])
120
+ model1.fit(t1[X], t1[y])
121
+
122
+ pseudo0 = model1.predict(t0[X]) - t0[y]
123
+ pseudo1 = t1[y] - model0.predict(t1[X])
124
+
125
+ tau_model0 = LGBMRegressor()
126
+ tau_model1 = LGBMRegressor()
127
+
128
+ tau_model0.fit(t0[X], pseudo0)
129
+ tau_model1.fit(t1[X], pseudo1)
130
+
131
+ lr = LogisticRegression(penalty=None)
132
+ lr.fit(train[X], train[T])
133
+
134
+ e = lr.predict_proba(test[X])[:, 1]
135
+
136
+ cate = e * tau_model0.predict(test[X]) + (1 - e) * tau_model1.predict(test[X])
137
+
138
+ output = test.copy()
139
+ output['cate'] = cate
140
+
141
+ return output
142
+
143
+ def double_ml_cate(train, test, X, T, y) -> pd.DataFrame:
144
+ model_t = LGBMRegressor()
145
+ model_t.fit(train[X], train[T])
146
+ T_res = train[T] - model_t.predict(train[X])
147
+
148
+ model_y = LGBMRegressor()
149
+ model_y.fit(train[X], train[y])
150
+ Y_res = train[y] - model_y.predict(train[X])
151
+
152
+ Y_star = Y_res / T_res
153
+ w = T_res ** 2
154
+
155
+ model = LGBMRegressor()
156
+ model.fit(train[X], Y_star, sample_weight = w)
157
+
158
+ cate = model.predict(test[X])
159
+
160
+ output = test.copy()
161
+ output['cate'] = cate
162
+
163
+ return output
@@ -0,0 +1,60 @@
1
+ # Solution obtained from Claude to solve for packaging error
2
+ import subprocess
3
+ import sys
4
+
5
+ subprocess.run([sys.executable, "-m", "pip", "install", "packaging"], capture_output=True)
6
+
7
+ # Add imports
8
+ import numpy as np
9
+ import pandas as pd
10
+ from patsy import dmatrices, dmatrix
11
+ from sklearn.linear_model import LinearRegression, LogisticRegression
12
+
13
+ """Test 1: IPW with simple positive treatment effect"""
14
+ np.random.seed(42)
15
+ n = 1000
16
+ # Generate data with known ATE = 2
17
+ x = np.random.normal(0, 1, n)
18
+ prob_t = 1 / (1 + np.exp(-(0.5 * x)))
19
+ t = np.random.binomial(1, prob_t, n)
20
+ y = 2 * t + x + np.random.normal(0, 0.5, n)
21
+ df = pd.DataFrame({'x': x, 't': t, 'y': y})
22
+
23
+ """Test 5: IPW with categorical covariate"""
24
+ np.random.seed(101)
25
+ n = 1000
26
+ # Generate data with categorical confounder
27
+ group = np.random.choice(['A', 'B', 'C'], n)
28
+ group_effect = {'A': 0, 'B': 1, 'C': 2}
29
+ x_numeric = np.array([group_effect[g] for g in group])
30
+ prob_t = 1 / (1 + np.exp(-(0.5 * x_numeric)))
31
+ t = np.random.binomial(1, prob_t, n)
32
+ y = 2.0 * t + x_numeric + np.random.normal(0, 0.5, n)
33
+ df = pd.DataFrame({'group': group, 't': t, 'y': y})
34
+
35
+ def ipw(df: pd.DataFrame, ps_formula: str, T: str, Y: str) -> float:
36
+ X = dmatrix(ps_formula, df)
37
+ model = LogisticRegression(penalty = None, max_iter = 1000).fit(X, df[T])
38
+ ps = model.predict_proba(X)[:,1]
39
+ return np.mean((df[T] - ps) / (ps*(1-ps)) * df[Y])
40
+
41
+ def doubly_robust(df: pd.DataFrame, formula: str, T: str, Y: str) -> float:
42
+ X = dmatrix(formula, df)
43
+ model = LogisticRegression(penalty=None, max_iter=1000).fit(X,df[T])
44
+ ps = model.predict_proba(X)[:,1]
45
+
46
+ Y_mat, X_out = dmatrices(f"{Y} ~ {T} + {formula}", df)
47
+ outcome_model = LinearRegression().fit(X_out, np.array(Y_mat).flatten())
48
+
49
+ mu1_df = df.copy()
50
+ mu0_df = df.copy()
51
+ mu1_df[T] = 1
52
+ mu0_df[T] = 0
53
+ mu1x = dmatrix(f"{T} + {formula}", mu1_df)
54
+ mu0x = dmatrix(f"{T} + {formula}", mu0_df)
55
+
56
+ mu1 = outcome_model.predict(mu1x).flatten()
57
+ mu0 = outcome_model.predict(mu0x).flatten()
58
+
59
+ ate = np.mean(df[T] * (df[Y] - mu1) / ps + mu1) - np.mean((1-df[T]) * (df[Y] - mu0) / (1-ps) + mu0)
60
+ return ate
@@ -0,0 +1,61 @@
1
+ from typing import Tuple
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ from scipy.stats import norm
6
+
7
+ # Positive effect data
8
+ np.random.seed(42)
9
+ n = 1000
10
+ positive_effect_data = pd.DataFrame({
11
+ 'I': range(n),
12
+ 'T': np.random.binomial(1, 0.5, n),
13
+ })
14
+ positive_effect_data['Y'] = np.where(
15
+ positive_effect_data['T'] == 1,
16
+ np.random.normal(10, 2, n),
17
+ np.random.normal(8, 2, n)
18
+ )
19
+
20
+ # No effect data
21
+ np.random.seed(123)
22
+ n = 500
23
+ no_effect_data = pd.DataFrame({
24
+ 'I': range(n),
25
+ 'T': np.random.binomial(1, 0.5, n),
26
+ })
27
+ no_effect_data['Y'] = np.random.normal(5, 3, n)
28
+
29
+ def calculate_ate_ci(data: pd.DataFrame, alpha: float = 0.05) -> Tuple[float, float, float]:
30
+ avg_treatment = data.loc[data['T']==1]['Y'].mean()
31
+ avg_control = data.loc[data['T']==0]['Y'].mean()
32
+ ATE_estimate = avg_treatment - avg_control
33
+
34
+ n_1 = len(data.loc[data['T']==1])
35
+ n_0 = len(data.loc[data['T']==0])
36
+ var_1 = data.loc[data['T']==1]['Y'].var()
37
+ var_0 = data.loc[data['T']==0]['Y'].var()
38
+ se_ate = np.sqrt(var_1/n_1 + var_0/n_0)
39
+
40
+ z = norm.ppf(1-alpha/2)
41
+
42
+ ci_lower = ATE_estimate - z*se_ate
43
+ ci_upper = ATE_estimate + z*se_ate
44
+
45
+ return(ATE_estimate, ci_lower, ci_upper)
46
+
47
+ def calculate_ate_pvalue(data: pd.DataFrame) -> Tuple[float, float, float]:
48
+ avg_treatment = data.loc[data['T']==1]['Y'].mean()
49
+ avg_control = data.loc[data['T']==0]['Y'].mean()
50
+ ATE_estimate = avg_treatment - avg_control
51
+
52
+ n_1 = len(data.loc[data['T']==1])
53
+ n_0 = len(data.loc[data['T']==0])
54
+ var_1 = data.loc[data['T']==1]['Y'].var()
55
+ var_0 = data.loc[data['T']==0]['Y'].var()
56
+ se_ate = np.sqrt(var_1/n_1 + var_0/n_0)
57
+
58
+ t_statistic = ATE_estimate / se_ate
59
+ p_value = 2 * (1-norm.cdf(abs(t_statistic)))
60
+
61
+ return(ATE_estimate, t_statistic, p_value)
@@ -0,0 +1,80 @@
1
+ Metadata-Version: 2.4
2
+ Name: pycausal-inference-joshlim
3
+ Version: 0.1.0
4
+ Summary: A Python package for causal inference methods including ATE estimation, propensity score methods, and meta-learners
5
+ Author-email: Josh Lim <j.lim703@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/jhl126/pycausal-inference-joshlim
8
+ Project-URL: Documentation, https://github.com/jhl126/pycausal-inference-joshlim#readme
9
+ Project-URL: Repository, https://github.com/jhl126/pycausal-inference-joshlim
10
+ Project-URL: Bug Tracker, https://github.com/jhl126/pycausal-inference-joshlim/issues
11
+ Keywords: causal inference,statistics,machine learning,treatment effects
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.8
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Topic :: Scientific/Engineering :: Mathematics
21
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
22
+ Requires-Python: >=3.8
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: pandas>=1.3.0
26
+ Requires-Dist: numpy>=1.21.0
27
+ Requires-Dist: scipy>=1.7.0
28
+ Requires-Dist: scikit-learn>=1.0.0
29
+ Requires-Dist: lightgbm>=3.3.0
30
+ Requires-Dist: patsy>=0.5.0
31
+ Provides-Extra: dev
32
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
33
+ Requires-Dist: pytest-cov>=3.0.0; extra == "dev"
34
+ Requires-Dist: black>=22.0.0; extra == "dev"
35
+ Requires-Dist: pylint>=2.12.0; extra == "dev"
36
+ Requires-Dist: mypy>=0.950; extra == "dev"
37
+ Dynamic: license-file
38
+
39
+ [![Tests](https://github.com/jhl126/pycausal-inference-joshlim/workflows/Tests/badge.svg)](https://github.com/jhl126/pycausal-inference-joshlim/actions)
40
+
41
+ # Causal Inference Python Package - Josh Lim
42
+
43
+ This package provides key causal inference methods. These methods include ATE estimation from randomized experiments, propensity score methods, and meta-learners.
44
+
45
+ ## Installation
46
+
47
+ ```bash
48
+ # Clone the repository
49
+ git clone https://github.com/jhl126/pycausal-inference-joshlim.git
50
+ cd pycausal-inference-joshlim
51
+
52
+ # Install in editable mode
53
+ uv pip install -e .
54
+ ```
55
+
56
+ ## Usage
57
+
58
+ Import functions with the following code:
59
+
60
+ ```python
61
+ from pycausal_inference_joshlim import calculate_ate_ci, calculate_ate_pvalue
62
+ from pycausal_inference_joshlim import ipw, doubly_robust
63
+ from pycausal_inference_joshlim import s_learner_discrete, t_learner_discrete, x_learner_discrete, double_ml_cate
64
+ ```
65
+
66
+ ## API Documentation
67
+
68
+ ### RCT Module
69
+ - `calculate_ate_ci(data)` - Calculates the average treatment effect (ATE) and confidence interval from randomized experiment data
70
+ - `calculate_ate_pvalue(data)` - Calculates the p-value for the ATE estimate
71
+
72
+ ### Propensity Score Module
73
+ - `ipw(data)` - Estimates the ATE using inverse probability weighting
74
+ - `doubly_robust(data)` - Estimates the ATE using the doubly robust estimator
75
+
76
+ ### Meta-Learners Module
77
+ - `s_learner_discrete(data)` - Estimates heterogeneous treatment effects using the S-Learner approach
78
+ - `t_learner_discrete(data)` - Estimates heterogeneous treatment effects using the T-Learner approach
79
+ - `x_learner_discrete(data)` - Estimates heterogeneous treatment effects using the X-Learner approach
80
+ - `double_ml_cate(data)` - Estimates heterogeneous treatment effects using Double ML
@@ -0,0 +1,13 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ pycausal_inference_joshlim/__init__.py
5
+ pycausal_inference_joshlim/meta_learners.py
6
+ pycausal_inference_joshlim/propensity.py
7
+ pycausal_inference_joshlim/rct.py
8
+ pycausal_inference_joshlim.egg-info/PKG-INFO
9
+ pycausal_inference_joshlim.egg-info/SOURCES.txt
10
+ pycausal_inference_joshlim.egg-info/dependency_links.txt
11
+ pycausal_inference_joshlim.egg-info/requires.txt
12
+ pycausal_inference_joshlim.egg-info/top_level.txt
13
+ tests/test_meta_learners.py
@@ -0,0 +1,13 @@
1
+ pandas>=1.3.0
2
+ numpy>=1.21.0
3
+ scipy>=1.7.0
4
+ scikit-learn>=1.0.0
5
+ lightgbm>=3.3.0
6
+ patsy>=0.5.0
7
+
8
+ [dev]
9
+ pytest>=7.0.0
10
+ pytest-cov>=3.0.0
11
+ black>=22.0.0
12
+ pylint>=2.12.0
13
+ mypy>=0.950
@@ -0,0 +1 @@
1
+ pycausal_inference_joshlim
@@ -0,0 +1,69 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "pycausal-inference-joshlim"
7
+ version = "0.1.0"
8
+ description = "A Python package for causal inference methods including ATE estimation, propensity score methods, and meta-learners"
9
+ readme = "README.md"
10
+ requires-python = ">=3.8"
11
+ license = {text = "MIT"}
12
+ authors = [
13
+ {name = "Josh Lim", email = "j.lim703@gmail.com"}
14
+ ]
15
+ keywords = ["causal inference", "statistics", "machine learning", "treatment effects"]
16
+ classifiers = [
17
+ "Development Status :: 3 - Alpha",
18
+ "Intended Audience :: Science/Research",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3.8",
22
+ "Programming Language :: Python :: 3.9",
23
+ "Programming Language :: Python :: 3.10",
24
+ "Programming Language :: Python :: 3.11",
25
+ "Topic :: Scientific/Engineering :: Mathematics",
26
+ "Topic :: Scientific/Engineering :: Information Analysis",
27
+ ]
28
+
29
+ dependencies = [
30
+ "pandas>=1.3.0",
31
+ "numpy>=1.21.0",
32
+ "scipy>=1.7.0",
33
+ "scikit-learn>=1.0.0",
34
+ "lightgbm>=3.3.0",
35
+ "patsy>=0.5.0",
36
+ ]
37
+
38
+ [project.optional-dependencies]
39
+ dev = [
40
+ "pytest>=7.0.0",
41
+ "pytest-cov>=3.0.0",
42
+ "black>=22.0.0",
43
+ "pylint>=2.12.0",
44
+ "mypy>=0.950",
45
+ ]
46
+
47
+ [project.urls]
48
+ Homepage = "https://github.com/jhl126/pycausal-inference-joshlim"
49
+ Documentation = "https://github.com/jhl126/pycausal-inference-joshlim#readme"
50
+ Repository = "https://github.com/jhl126/pycausal-inference-joshlim"
51
+ "Bug Tracker" = "https://github.com/jhl126/pycausal-inference-joshlim/issues"
52
+
53
+ [tool.pytest.ini_options]
54
+ testpaths = ["tests"]
55
+ python_files = ["test_*.py"]
56
+ python_classes = ["Test*"]
57
+ python_functions = ["test_*"]
58
+ addopts = "-v --cov=pycausal_inference_joshlim --cov-report=html --cov-report=term"
59
+
60
+ [tool.black]
61
+ line-length = 88
62
+ target-version = ['py38', 'py39', 'py310', 'py311']
63
+ include = '\.pyi?$'
64
+
65
+ [tool.mypy]
66
+ python_version = "3.8"
67
+ warn_return_any = true
68
+ warn_unused_configs = true
69
+ disallow_untyped_defs = false
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,96 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+ from pycausal_inference_joshlim import s_learner_discrete, t_learner_discrete, x_learner_discrete, double_ml_cate
5
+
6
+ def simple_data():
7
+ """Generate simple data with known treatment effect"""
8
+ np.random.seed(42)
9
+ n = 1000
10
+
11
+ # Covariates
12
+ x1 = np.random.normal(0, 1, n)
13
+ x2 = np.random.normal(0, 1, n)
14
+
15
+ # Treatment assignment (confounded)
16
+ prob_t = 1 / (1 + np.exp(-(0.5 * x1 + 0.3 * x2)))
17
+ t = np.random.binomial(1, prob_t, n)
18
+
19
+ # Outcome with constant treatment effect = 2.0
20
+ y = 2.0 * t + x1 + 0.5 * x2 + np.random.normal(0, 0.5, n)
21
+
22
+ df = pd.DataFrame({'x1': x1, 'x2': x2, 't': t, 'y': y})
23
+
24
+ # Split into train/test
25
+ train = df.iloc[:800].copy()
26
+ test = df.iloc[800:].copy()
27
+
28
+ return train, test
29
+
30
+ def continuous_treatment_data():
31
+ """Generate data with continuous treatment"""
32
+ np.random.seed(789)
33
+ n = 1000
34
+
35
+ # Covariates
36
+ x1 = np.random.normal(0, 1, n)
37
+ x2 = np.random.normal(0, 1, n)
38
+
39
+ # Continuous treatment
40
+ t = 10 + x1 + 2*x2 + np.random.normal(0, 1, n)
41
+
42
+ # Outcome: linear effect of treatment
43
+ y = t + x1 + 0.5*x2 + np.random.normal(0, 0.5, n)
44
+
45
+ df = pd.DataFrame({'x1': x1, 'x2': x2, 't': t, 'y': y})
46
+
47
+ train = df.iloc[:800].copy()
48
+ test = df.iloc[800:].copy()
49
+
50
+ return train, test
51
+
52
+ def test_s_learner_returns_dataframe():
53
+ train, test = simple_data()
54
+ result = s_learner_discrete(train, test, ['x1', 'x2'], 't', 'y')
55
+ assert isinstance(result, pd.DataFrame)
56
+
57
+ def test_s_learner_has_cate_column():
58
+ train, test = simple_data()
59
+ result = s_learner_discrete(train, test, ['x1', 'x2'], 't', 'y')
60
+ assert 'cate' in result.columns
61
+
62
+ def test_s_learner_constant_effect():
63
+ train, test = simple_data()
64
+ result = s_learner_discrete(train, test, ['x1', 'x2'], 't', 'y')
65
+ assert abs(result['cate'].mean() - 2.0) < 0.5
66
+
67
+ def test_s_learner_return_numeric_cate():
68
+ train, test = simple_data()
69
+ result = s_learner_discrete(train, test, ['x1','x2'], 't','y')
70
+ assert pd.api.types.is_numeric_dtype(result['cate'])
71
+
72
+ def test_s_learner_no_nan_values():
73
+ train, test = simple_data()
74
+ result = s_learner_discrete(train, test, ['x1', 'x2'], 't', 'y')
75
+ assert result['cate'].isna().sum() == 0
76
+
77
+ def test_t_learner_returns_dataframe():
78
+ train, test = simple_data()
79
+ result = t_learner_discrete(train, test, ['x1','x2'], 't', 'y')
80
+ assert isinstance(result, pd.DataFrame)
81
+
82
+ def test_x_learner_returns_dataframe():
83
+ train, test = simple_data()
84
+ result = x_learner_discrete(train, test, ['x1','x2'],'t','y')
85
+ assert isinstance(result, pd.DataFrame)
86
+
87
+ def test_double_ml_returns_dataframe():
88
+ train, test = continuous_treatment_data()
89
+ result = double_ml_cate(train, test, ['x1','x2'],'t','y')
90
+ assert isinstance(result, pd.DataFrame)
91
+
92
+ def test_double_ml_continuous_treatment():
93
+ train, test = continuous_treatment_data()
94
+ result = double_ml_cate(train, test, ['x1','x2'],'t','y')
95
+ estimated_ate = result['cate'].mean()
96
+ assert abs(estimated_ate - 1) < 0.5