mini-causal 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mini_causal/__init__.py +1 -0
- mini_causal/model_selection.py +1 -0
- mini_causal/utils.py +157 -0
- mini_causal-0.1.3.dist-info/METADATA +21 -0
- mini_causal-0.1.3.dist-info/RECORD +8 -0
- mini_causal-0.1.3.dist-info/WHEEL +5 -0
- mini_causal-0.1.3.dist-info/licenses/LICENSE +21 -0
- mini_causal-0.1.3.dist-info/top_level.txt +1 -0
mini_causal/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from mini_causal.utils import treatment_control_selection
|
mini_causal/utils.py
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from sklearn import clone
|
|
4
|
+
|
|
5
|
+
def clone_model(model):
|
|
6
|
+
return clone(model)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def treatment_control_selection(df, control_size=0.2, random_seed=None):
|
|
10
|
+
"""
|
|
11
|
+
Randomly split dataframe into treatment and control groups.
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
df (pd.DataFrame):
|
|
15
|
+
the input dataframe
|
|
16
|
+
|
|
17
|
+
control_size (float):
|
|
18
|
+
Fraction assigned to control group
|
|
19
|
+
|
|
20
|
+
random_seed(int or None):
|
|
21
|
+
for reproducibilit
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
treatment,control
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
if random_seed is not None:
|
|
28
|
+
np.random.seed(random_seed)
|
|
29
|
+
|
|
30
|
+
n_samples = len(df)
|
|
31
|
+
|
|
32
|
+
# Shuffle row indices
|
|
33
|
+
indices = np.random.permutation(n_samples)
|
|
34
|
+
|
|
35
|
+
# Split point
|
|
36
|
+
split_idx = int(n_samples * (1 - control_size))
|
|
37
|
+
|
|
38
|
+
# Indices
|
|
39
|
+
treat_idx = indices[:split_idx]
|
|
40
|
+
control_idx = indices[split_idx:]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
treatment = df.iloc[treat_idx]
|
|
44
|
+
control = df.iloc[control_idx]
|
|
45
|
+
|
|
46
|
+
return treatment,control
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def make_treatment_control_df(treatment,control)->pd.DataFrame:
|
|
51
|
+
df=pd.concat([treatment,control])
|
|
52
|
+
|
|
53
|
+
return df.sort_index()
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def summary_stats(treatment:np.ndarray,control:np.ndarray)->pd.DataFrame:
|
|
57
|
+
n_treatment=len(treatment)
|
|
58
|
+
n_control=len(control)
|
|
59
|
+
|
|
60
|
+
mean_control_preds=float(np.mean(control))
|
|
61
|
+
mean_treatment_preds=float(np.mean(treatment))
|
|
62
|
+
|
|
63
|
+
variance_treatment=float(np.var(treatment,ddof=1))
|
|
64
|
+
variance_control=float(np.var(control,ddof=1))
|
|
65
|
+
|
|
66
|
+
return pd.DataFrame(
|
|
67
|
+
{
|
|
68
|
+
"type":["treatment","control"],
|
|
69
|
+
"n":[n_treatment,n_control],
|
|
70
|
+
"mean":[mean_treatment_preds,mean_control_preds],
|
|
71
|
+
"sample_variance":[variance_treatment,variance_control]
|
|
72
|
+
})
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def causal_effects_stratum(X:pd.DataFrame,causal_effects:np.ndarray,strata_feature:str):
|
|
77
|
+
"""
|
|
78
|
+
Args:
|
|
79
|
+
X (pd.DataFrame):
|
|
80
|
+
X values stored in a dataframe that will be used for the prediction.
|
|
81
|
+
|
|
82
|
+
causal_effects(np.ndarray):
|
|
83
|
+
the causal effects values
|
|
84
|
+
|
|
85
|
+
strata_feature(str):
|
|
86
|
+
the name of the feature that will be used for strata
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
pd.DataFrame:
|
|
90
|
+
- a dataframe with the following columns: strata_feature,mean,size and weight of the strata
|
|
91
|
+
- the mean is based on the type arg i.e if preds ,then it will be the mean for the
|
|
92
|
+
predictions, else if probs then it will be the mean for the predicted probabilities
|
|
93
|
+
"""
|
|
94
|
+
N=len(X)
|
|
95
|
+
X["causal_effects"]=causal_effects
|
|
96
|
+
|
|
97
|
+
results=X.groupby(strata_feature)["causal_effects"].agg(["mean","size"]).reset_index()
|
|
98
|
+
results["weight_rss"]= results["size"] * N
|
|
99
|
+
|
|
100
|
+
return results
|
|
101
|
+
|
|
102
|
+
def stratum_weight_rss(X:pd.DataFrame,
|
|
103
|
+
treatment_model,
|
|
104
|
+
control_model,
|
|
105
|
+
feature:str,
|
|
106
|
+
strata_feature:str,
|
|
107
|
+
type:str="preds")->pd.DataFrame:
|
|
108
|
+
"""
|
|
109
|
+
The causal effect based on the stratum to validate the effect per stratum of the treatment.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
X (pd.DataFrame):
|
|
113
|
+
X values stored in a dataframe that will be used for the prediction.
|
|
114
|
+
|
|
115
|
+
strata_feature (str):
|
|
116
|
+
the name of the feature that will be used for strata
|
|
117
|
+
|
|
118
|
+
treatment_model:
|
|
119
|
+
the trained treatment estimator
|
|
120
|
+
|
|
121
|
+
control_model:
|
|
122
|
+
the trained control estimator
|
|
123
|
+
|
|
124
|
+
type (str): {preds,probs}
|
|
125
|
+
return the stratum values based on the predicted probabilites or predicted values of the target
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
pd.DataFrame:
|
|
130
|
+
- a dataframe with the following columns: strata_feature,mean,size and weight of the strata
|
|
131
|
+
- the mean is based on the type arg i.e if preds ,then it will be the mean for the
|
|
132
|
+
predictions, else if probs then it will be the mean for the predicted probabilities
|
|
133
|
+
|
|
134
|
+
"""
|
|
135
|
+
X_control=X.drop([feature,strata_feature],axis=1)
|
|
136
|
+
X_treatment=X.drop(strata_feature,axis=1)
|
|
137
|
+
|
|
138
|
+
treatment_probs=treatment_model.predict_proba(X_treatment)[:,1]
|
|
139
|
+
control_probs=control_model.predict_proba(X_control)[:,1]
|
|
140
|
+
|
|
141
|
+
treatment_preds=treatment_model.predict(X_treatment)
|
|
142
|
+
control_preds=control_model.predict(X_control)
|
|
143
|
+
|
|
144
|
+
causal_effects_probs=treatment_probs-control_probs
|
|
145
|
+
causal_effects=treatment_preds-control_preds
|
|
146
|
+
|
|
147
|
+
if type=="preds":
|
|
148
|
+
return causal_effects_stratum(X=X,causal_effects=causal_effects_probs,
|
|
149
|
+
strata_feature=strata_feature)
|
|
150
|
+
|
|
151
|
+
elif type=="probs":
|
|
152
|
+
return causal_effects_stratum(X=X,causal_effects=causal_effects,
|
|
153
|
+
strata_feature=strata_feature)
|
|
154
|
+
|
|
155
|
+
else:
|
|
156
|
+
raise ValueError("Type invalid")
|
|
157
|
+
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mini-causal
|
|
3
|
+
Version: 0.1.3
|
|
4
|
+
Summary: A mini package for causal inference
|
|
5
|
+
Author-email: Masemene Matlakana Benny <bennymasemene46@gmail.com>
|
|
6
|
+
Project-URL: Homepage, https://github.com/MasemeneMatlakanaBenny/Mini_Causal
|
|
7
|
+
Project-URL: Issues, https://github.com/MasemeneMatlakana/Mini_Causal/issues
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.12
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
Requires-Dist: pandas>=3.0.2
|
|
15
|
+
Requires-Dist: numpy>=2.4.4
|
|
16
|
+
Requires-Dist: scipy>=1.17.1
|
|
17
|
+
Dynamic: license-file
|
|
18
|
+
|
|
19
|
+
# MiniCausal
|
|
20
|
+
|
|
21
|
+

|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
mini_causal/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
2
|
+
mini_causal/model_selection.py,sha256=MHdg8SmFI_GUbUaawktnZNv86lFzyC_Z4DKQi8gCYTU,58
|
|
3
|
+
mini_causal/utils.py,sha256=iObTWTvvsj_Ak8gNFrZ7UCpdWPwhBudL9BOSBlf42Xs,4686
|
|
4
|
+
mini_causal-0.1.3.dist-info/licenses/LICENSE,sha256=inTwgDJnDsOeOMXHNRzKEsjfs9A3l2Ib70mJgz9RmhE,1079
|
|
5
|
+
mini_causal-0.1.3.dist-info/METADATA,sha256=ml6-QQh1rm63frHwTfXwghMrfaq4FrHPjWWnYwDEDaU,710
|
|
6
|
+
mini_causal-0.1.3.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
7
|
+
mini_causal-0.1.3.dist-info/top_level.txt,sha256=z_s2J0gL5M0LfBt99Z-i3uJaw08pbJd0zkhRdV-HNak,12
|
|
8
|
+
mini_causal-0.1.3.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 MasemeneMatlakanaBenny
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
mini_causal
|