pyAgrum-nightly 2.3.1.9.dev202512261765915415__cp310-abi3-macosx_10_15_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyagrum/__init__.py +165 -0
- pyagrum/_pyagrum.so +0 -0
- pyagrum/bnmixture/BNMInference.py +268 -0
- pyagrum/bnmixture/BNMLearning.py +376 -0
- pyagrum/bnmixture/BNMixture.py +464 -0
- pyagrum/bnmixture/__init__.py +60 -0
- pyagrum/bnmixture/notebook.py +1058 -0
- pyagrum/causal/_CausalFormula.py +280 -0
- pyagrum/causal/_CausalModel.py +436 -0
- pyagrum/causal/__init__.py +81 -0
- pyagrum/causal/_causalImpact.py +356 -0
- pyagrum/causal/_dSeparation.py +598 -0
- pyagrum/causal/_doAST.py +761 -0
- pyagrum/causal/_doCalculus.py +361 -0
- pyagrum/causal/_doorCriteria.py +374 -0
- pyagrum/causal/_exceptions.py +95 -0
- pyagrum/causal/_types.py +61 -0
- pyagrum/causal/causalEffectEstimation/_CausalEffectEstimation.py +1175 -0
- pyagrum/causal/causalEffectEstimation/_IVEstimators.py +718 -0
- pyagrum/causal/causalEffectEstimation/_RCTEstimators.py +132 -0
- pyagrum/causal/causalEffectEstimation/__init__.py +46 -0
- pyagrum/causal/causalEffectEstimation/_backdoorEstimators.py +774 -0
- pyagrum/causal/causalEffectEstimation/_causalBNEstimator.py +324 -0
- pyagrum/causal/causalEffectEstimation/_frontdoorEstimators.py +396 -0
- pyagrum/causal/causalEffectEstimation/_learners.py +118 -0
- pyagrum/causal/causalEffectEstimation/_utils.py +466 -0
- pyagrum/causal/notebook.py +172 -0
- pyagrum/clg/CLG.py +658 -0
- pyagrum/clg/GaussianVariable.py +111 -0
- pyagrum/clg/SEM.py +312 -0
- pyagrum/clg/__init__.py +63 -0
- pyagrum/clg/canonicalForm.py +408 -0
- pyagrum/clg/constants.py +54 -0
- pyagrum/clg/forwardSampling.py +202 -0
- pyagrum/clg/learning.py +776 -0
- pyagrum/clg/notebook.py +480 -0
- pyagrum/clg/variableElimination.py +271 -0
- pyagrum/common.py +60 -0
- pyagrum/config.py +319 -0
- pyagrum/ctbn/CIM.py +513 -0
- pyagrum/ctbn/CTBN.py +573 -0
- pyagrum/ctbn/CTBNGenerator.py +216 -0
- pyagrum/ctbn/CTBNInference.py +459 -0
- pyagrum/ctbn/CTBNLearner.py +161 -0
- pyagrum/ctbn/SamplesStats.py +671 -0
- pyagrum/ctbn/StatsIndepTest.py +355 -0
- pyagrum/ctbn/__init__.py +79 -0
- pyagrum/ctbn/constants.py +54 -0
- pyagrum/ctbn/notebook.py +264 -0
- pyagrum/defaults.ini +199 -0
- pyagrum/deprecated.py +95 -0
- pyagrum/explain/_ComputationCausal.py +75 -0
- pyagrum/explain/_ComputationConditional.py +48 -0
- pyagrum/explain/_ComputationMarginal.py +48 -0
- pyagrum/explain/_CustomShapleyCache.py +110 -0
- pyagrum/explain/_Explainer.py +176 -0
- pyagrum/explain/_Explanation.py +70 -0
- pyagrum/explain/_FIFOCache.py +54 -0
- pyagrum/explain/_ShallCausalValues.py +204 -0
- pyagrum/explain/_ShallConditionalValues.py +155 -0
- pyagrum/explain/_ShallMarginalValues.py +155 -0
- pyagrum/explain/_ShallValues.py +296 -0
- pyagrum/explain/_ShapCausalValues.py +208 -0
- pyagrum/explain/_ShapConditionalValues.py +126 -0
- pyagrum/explain/_ShapMarginalValues.py +191 -0
- pyagrum/explain/_ShapleyValues.py +298 -0
- pyagrum/explain/__init__.py +81 -0
- pyagrum/explain/_explGeneralizedMarkovBlanket.py +152 -0
- pyagrum/explain/_explIndependenceListForPairs.py +146 -0
- pyagrum/explain/_explInformationGraph.py +264 -0
- pyagrum/explain/notebook/__init__.py +54 -0
- pyagrum/explain/notebook/_bar.py +142 -0
- pyagrum/explain/notebook/_beeswarm.py +174 -0
- pyagrum/explain/notebook/_showShapValues.py +97 -0
- pyagrum/explain/notebook/_waterfall.py +220 -0
- pyagrum/explain/shapley.py +225 -0
- pyagrum/lib/__init__.py +46 -0
- pyagrum/lib/_colors.py +390 -0
- pyagrum/lib/bn2graph.py +299 -0
- pyagrum/lib/bn2roc.py +1026 -0
- pyagrum/lib/bn2scores.py +217 -0
- pyagrum/lib/bn_vs_bn.py +605 -0
- pyagrum/lib/cn2graph.py +305 -0
- pyagrum/lib/discreteTypeProcessor.py +1102 -0
- pyagrum/lib/discretizer.py +58 -0
- pyagrum/lib/dynamicBN.py +390 -0
- pyagrum/lib/explain.py +57 -0
- pyagrum/lib/export.py +84 -0
- pyagrum/lib/id2graph.py +258 -0
- pyagrum/lib/image.py +387 -0
- pyagrum/lib/ipython.py +307 -0
- pyagrum/lib/mrf2graph.py +471 -0
- pyagrum/lib/notebook.py +1821 -0
- pyagrum/lib/proba_histogram.py +552 -0
- pyagrum/lib/utils.py +138 -0
- pyagrum/pyagrum.py +31495 -0
- pyagrum/skbn/_MBCalcul.py +242 -0
- pyagrum/skbn/__init__.py +49 -0
- pyagrum/skbn/_learningMethods.py +282 -0
- pyagrum/skbn/_utils.py +297 -0
- pyagrum/skbn/bnclassifier.py +1014 -0
- pyagrum_nightly-2.3.1.9.dev202512261765915415.dist-info/LICENSE.md +12 -0
- pyagrum_nightly-2.3.1.9.dev202512261765915415.dist-info/LICENSES/LGPL-3.0-or-later.txt +304 -0
- pyagrum_nightly-2.3.1.9.dev202512261765915415.dist-info/LICENSES/MIT.txt +18 -0
- pyagrum_nightly-2.3.1.9.dev202512261765915415.dist-info/METADATA +145 -0
- pyagrum_nightly-2.3.1.9.dev202512261765915415.dist-info/RECORD +107 -0
- pyagrum_nightly-2.3.1.9.dev202512261765915415.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,774 @@
|
|
|
1
|
+
############################################################################
|
|
2
|
+
# This file is part of the aGrUM/pyAgrum library. #
|
|
3
|
+
# #
|
|
4
|
+
# Copyright (c) 2005-2025 by #
|
|
5
|
+
# - Pierre-Henri WUILLEMIN(_at_LIP6) #
|
|
6
|
+
# - Christophe GONZALES(_at_AMU) #
|
|
7
|
+
# #
|
|
8
|
+
# The aGrUM/pyAgrum library is free software; you can redistribute it #
|
|
9
|
+
# and/or modify it under the terms of either : #
|
|
10
|
+
# #
|
|
11
|
+
# - the GNU Lesser General Public License as published by #
|
|
12
|
+
# the Free Software Foundation, either version 3 of the License, #
|
|
13
|
+
# or (at your option) any later version, #
|
|
14
|
+
# - the MIT license (MIT), #
|
|
15
|
+
# - or both in dual license, as here. #
|
|
16
|
+
# #
|
|
17
|
+
# (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) #
|
|
18
|
+
# #
|
|
19
|
+
# This aGrUM/pyAgrum library is distributed in the hope that it will be #
|
|
20
|
+
# useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, #
|
|
21
|
+
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS #
|
|
22
|
+
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE #
|
|
23
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER #
|
|
24
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, #
|
|
25
|
+
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR #
|
|
26
|
+
# OTHER DEALINGS IN THE SOFTWARE. #
|
|
27
|
+
# #
|
|
28
|
+
# See LICENCES for more details. #
|
|
29
|
+
# #
|
|
30
|
+
# SPDX-FileCopyrightText: Copyright 2005-2025 #
|
|
31
|
+
# - Pierre-Henri WUILLEMIN(_at_LIP6) #
|
|
32
|
+
# - Christophe GONZALES(_at_AMU) #
|
|
33
|
+
# SPDX-License-Identifier: LGPL-3.0-or-later OR MIT #
|
|
34
|
+
# #
|
|
35
|
+
# Contact : info_at_agrum_dot_org #
|
|
36
|
+
# homepage : http://agrum.gitlab.io #
|
|
37
|
+
# gitlab : https://gitlab.com/agrumery/agrum #
|
|
38
|
+
# #
|
|
39
|
+
############################################################################
|
|
40
|
+
|
|
41
|
+
import pandas as pd
|
|
42
|
+
import numpy as np
|
|
43
|
+
|
|
44
|
+
from typing import Any
|
|
45
|
+
|
|
46
|
+
from sklearn.base import clone
|
|
47
|
+
|
|
48
|
+
from ._learners import learnerFromString
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class SLearner:
|
|
52
|
+
"""
|
|
53
|
+
A basic implementation of the S-learner based on Kunzel et al. (2018)
|
|
54
|
+
(see https://arxiv.org/abs/1706.03461).
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
def __init__(self, learner: str | Any | None = None) -> None:
|
|
58
|
+
"""
|
|
59
|
+
Initialize an S-learner.
|
|
60
|
+
|
|
61
|
+
Parameters
|
|
62
|
+
----------
|
|
63
|
+
learner: str or Any, optional
|
|
64
|
+
Base estimator for all learners.
|
|
65
|
+
If not provided, defaults to LinearRegression.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
if isinstance(learner, str):
|
|
69
|
+
self.learner = learnerFromString(learner)
|
|
70
|
+
elif learner is None:
|
|
71
|
+
self.learner = learnerFromString("LinearRegression")
|
|
72
|
+
else:
|
|
73
|
+
self.learner = clone(learner)
|
|
74
|
+
|
|
75
|
+
def fit(
|
|
76
|
+
self,
|
|
77
|
+
X: np.matrix | np.ndarray | pd.DataFrame,
|
|
78
|
+
treatment: np.ndarray | pd.Series,
|
|
79
|
+
y: np.ndarray | pd.Series,
|
|
80
|
+
) -> None:
|
|
81
|
+
"""
|
|
82
|
+
Fit the inference model.
|
|
83
|
+
|
|
84
|
+
Parameters
|
|
85
|
+
----------
|
|
86
|
+
X: np.matrix or np.ndarray or pd.DataFrame
|
|
87
|
+
The covariate matrix.
|
|
88
|
+
treatment: np.ndarray or pd.Series
|
|
89
|
+
The treatment assignment vector.
|
|
90
|
+
y: np.ndarray or pd.Series,
|
|
91
|
+
The outcome vector.
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
self.learner.fit(X=pd.concat([pd.DataFrame(X), pd.DataFrame(treatment)], axis=1), y=np.array(y))
|
|
95
|
+
|
|
96
|
+
def predict(
|
|
97
|
+
self,
|
|
98
|
+
X: np.matrix | np.ndarray | pd.DataFrame,
|
|
99
|
+
treatment: np.ndarray | pd.Series | None = None,
|
|
100
|
+
y: np.ndarray | pd.Series | None = None,
|
|
101
|
+
) -> np.ndarray:
|
|
102
|
+
"""
|
|
103
|
+
Predict the Idividual Causal Effect (ICE),
|
|
104
|
+
also referd to as the Individual Treatment Effect (ITE).
|
|
105
|
+
|
|
106
|
+
Note: For an S-learner, the ICE is constant and corresponds to the
|
|
107
|
+
Average Causal Effect (ACE) of the fitted groups, due to the
|
|
108
|
+
use of a single linear model.
|
|
109
|
+
|
|
110
|
+
Parameters
|
|
111
|
+
----------
|
|
112
|
+
X: np.matrix or np.ndarray or pd.DataFrame
|
|
113
|
+
The matrix of covariates.
|
|
114
|
+
treatment: np.ndarray or pd.Series or None, optional
|
|
115
|
+
The vector of treatment assignments.
|
|
116
|
+
y: np.ndarray or pd.Series, optional
|
|
117
|
+
The vector of outcomes.
|
|
118
|
+
|
|
119
|
+
Returns
|
|
120
|
+
-------
|
|
121
|
+
np.ndarray
|
|
122
|
+
An array containing the predicted ICE.
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
X_control = pd.concat(
|
|
126
|
+
[
|
|
127
|
+
pd.DataFrame(X),
|
|
128
|
+
pd.DataFrame({self.learner.feature_names_in_[-1]: np.zeros(len(X))}, index=pd.DataFrame(X).index),
|
|
129
|
+
],
|
|
130
|
+
axis=1,
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
X_treatment = pd.concat(
|
|
134
|
+
[
|
|
135
|
+
pd.DataFrame(X),
|
|
136
|
+
pd.DataFrame({self.learner.feature_names_in_[-1]: np.ones(len(X))}, index=pd.DataFrame(X).index),
|
|
137
|
+
],
|
|
138
|
+
axis=1,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
mu0 = self.learner.predict(X=X_control)
|
|
142
|
+
mu1 = self.learner.predict(X=X_treatment)
|
|
143
|
+
|
|
144
|
+
return mu1 - mu0
|
|
145
|
+
|
|
146
|
+
def estimate_ate(
|
|
147
|
+
self,
|
|
148
|
+
X: np.matrix | np.ndarray | pd.DataFrame,
|
|
149
|
+
treatment: np.ndarray | pd.Series | None = None,
|
|
150
|
+
y: np.ndarray | pd.Series | None = None,
|
|
151
|
+
pretrain: bool = True,
|
|
152
|
+
) -> float:
|
|
153
|
+
"""
|
|
154
|
+
Predicts the Average Causal Effect (ACE),
|
|
155
|
+
also refered to as the Average Treatment Effect (ATE).
|
|
156
|
+
(The term ATE is used in the method name for compatibility purposes.)
|
|
157
|
+
|
|
158
|
+
Parameters
|
|
159
|
+
----------
|
|
160
|
+
X: np.matrix or np.ndarray or pd.DataFrame
|
|
161
|
+
The matrix of covariates.
|
|
162
|
+
treatment: np.ndarray or pd.Series or None, optional
|
|
163
|
+
The vector of treatment assignments.
|
|
164
|
+
y: np.ndarray or pd.Series, optional
|
|
165
|
+
The vector of outcomes.
|
|
166
|
+
|
|
167
|
+
Returns
|
|
168
|
+
-------
|
|
169
|
+
float
|
|
170
|
+
The value of the ACE.
|
|
171
|
+
"""
|
|
172
|
+
|
|
173
|
+
return self.predict(X, treatment, y).mean()
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
class TLearner:
|
|
177
|
+
"""
|
|
178
|
+
A basic implementation of the T-learner based on Kunzel et al. (2018)
|
|
179
|
+
(see https://arxiv.org/abs/1706.03461).
|
|
180
|
+
"""
|
|
181
|
+
|
|
182
|
+
def __init__(
|
|
183
|
+
self,
|
|
184
|
+
learner: str | Any | None = None,
|
|
185
|
+
control_learner: str | Any | None = None,
|
|
186
|
+
treatment_learner: str | Any | None = None,
|
|
187
|
+
) -> None:
|
|
188
|
+
"""
|
|
189
|
+
Initialize an T-learner.
|
|
190
|
+
|
|
191
|
+
Parameters
|
|
192
|
+
----------
|
|
193
|
+
learner: str or Any, optional
|
|
194
|
+
Base estimator for all learners.
|
|
195
|
+
If not provided, defaults to LinearRegression.
|
|
196
|
+
control_learner: str or Any, optional
|
|
197
|
+
Estimator for control group outcome.
|
|
198
|
+
Overrides `learner` if specified.
|
|
199
|
+
treatment_learner: str or Any, optional
|
|
200
|
+
Estimator for treatment group outcome.
|
|
201
|
+
Overrides `learner` if specified.
|
|
202
|
+
"""
|
|
203
|
+
|
|
204
|
+
if learner is None:
|
|
205
|
+
self.control_learner = learnerFromString("LinearRegression")
|
|
206
|
+
self.treatment_learner = learnerFromString("LinearRegression")
|
|
207
|
+
elif isinstance(learner, str):
|
|
208
|
+
self.control_learner = learnerFromString(learner)
|
|
209
|
+
self.treatment_learner = learnerFromString(learner)
|
|
210
|
+
else:
|
|
211
|
+
self.treatment_learner = clone(learner)
|
|
212
|
+
self.control_learner = clone(learner)
|
|
213
|
+
|
|
214
|
+
if isinstance(control_learner, str):
|
|
215
|
+
self.control_learner = learnerFromString(control_learner)
|
|
216
|
+
elif control_learner is not None:
|
|
217
|
+
self.control_learner = clone(control_learner)
|
|
218
|
+
|
|
219
|
+
if isinstance(treatment_learner, str):
|
|
220
|
+
self.treatment_learner = learnerFromString(treatment_learner)
|
|
221
|
+
elif treatment_learner is not None:
|
|
222
|
+
self.treatment_learner = clone(treatment_learner)
|
|
223
|
+
|
|
224
|
+
def fit(
|
|
225
|
+
self,
|
|
226
|
+
X: np.matrix | np.ndarray | pd.DataFrame,
|
|
227
|
+
treatment: np.ndarray | pd.Series,
|
|
228
|
+
y: np.ndarray | pd.Series,
|
|
229
|
+
) -> None:
|
|
230
|
+
"""
|
|
231
|
+
Fit the inference model.
|
|
232
|
+
|
|
233
|
+
Parameters
|
|
234
|
+
----------
|
|
235
|
+
X: np.matrix or np.ndarray or pd.DataFrame
|
|
236
|
+
The matrix of covariates.
|
|
237
|
+
treatment: np.ndarray or pd.Series or None, optional
|
|
238
|
+
The vector of treatment assignments.
|
|
239
|
+
y: np.ndarray or pd.Series, optional
|
|
240
|
+
The vector of outcomes.
|
|
241
|
+
"""
|
|
242
|
+
|
|
243
|
+
self.control_learner.fit(X=X[treatment == 0], y=y[treatment == 0])
|
|
244
|
+
self.treatment_learner.fit(X=X[treatment == 1], y=y[treatment == 1])
|
|
245
|
+
|
|
246
|
+
def predict(
|
|
247
|
+
self,
|
|
248
|
+
X: np.matrix | np.ndarray | pd.DataFrame,
|
|
249
|
+
treatment: np.ndarray | pd.Series | None = None,
|
|
250
|
+
y: np.ndarray | pd.Series | None = None,
|
|
251
|
+
) -> np.ndarray:
|
|
252
|
+
"""
|
|
253
|
+
Predict the Idividual Causal Effect (ICE),
|
|
254
|
+
also referd to as the Individual Treatment Effect (ITE).
|
|
255
|
+
|
|
256
|
+
Parameters
|
|
257
|
+
----------
|
|
258
|
+
X: np.matrix or np.ndarray or pd.DataFrame
|
|
259
|
+
The matrix of covariates.
|
|
260
|
+
treatment: np.ndarray or pd.Series or None, optional
|
|
261
|
+
The vector of treatment assignments.
|
|
262
|
+
y: np.ndarray or pd.Series, optional
|
|
263
|
+
The vector of outcomes.
|
|
264
|
+
|
|
265
|
+
Returns
|
|
266
|
+
-------
|
|
267
|
+
np.ndarray
|
|
268
|
+
An array containing the predicted ICE.
|
|
269
|
+
"""
|
|
270
|
+
|
|
271
|
+
mu0 = self.control_learner.predict(X=X)
|
|
272
|
+
mu1 = self.treatment_learner.predict(X=X)
|
|
273
|
+
|
|
274
|
+
return mu1 - mu0
|
|
275
|
+
|
|
276
|
+
def estimate_ate(
|
|
277
|
+
self,
|
|
278
|
+
X: np.matrix | np.ndarray | pd.DataFrame,
|
|
279
|
+
treatment: np.ndarray | pd.Series | None = None,
|
|
280
|
+
y: np.ndarray | pd.Series | None = None,
|
|
281
|
+
pretrain: bool = True,
|
|
282
|
+
) -> float:
|
|
283
|
+
"""
|
|
284
|
+
Predicts the Average Causal Effect (ACE),
|
|
285
|
+
also refered to as the Average Treatment Effect (ATE).
|
|
286
|
+
(The term ATE is used in the method name for compatibility purposes.)
|
|
287
|
+
|
|
288
|
+
Parameters
|
|
289
|
+
----------
|
|
290
|
+
X: np.matrix or np.ndarray or pd.DataFrame
|
|
291
|
+
The matrix of covariates.
|
|
292
|
+
treatment: np.ndarray or pd.Series or None, optional
|
|
293
|
+
The vector of treatment assignments.
|
|
294
|
+
y: np.ndarray or pd.Series, optional
|
|
295
|
+
The vector of outcomes.
|
|
296
|
+
|
|
297
|
+
Returns
|
|
298
|
+
-------
|
|
299
|
+
float
|
|
300
|
+
The value of the ACE.
|
|
301
|
+
"""
|
|
302
|
+
|
|
303
|
+
return self.predict(X, treatment, y).mean()
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
class XLearner:
|
|
307
|
+
"""
|
|
308
|
+
A basic implementation of the X-learner based on Kunzel et al. (2018)
|
|
309
|
+
(see https://arxiv.org/abs/1706.03461).
|
|
310
|
+
"""
|
|
311
|
+
|
|
312
|
+
def __init__(
|
|
313
|
+
self,
|
|
314
|
+
learner: str | Any | None = None,
|
|
315
|
+
control_outcome_learner: str | Any | None = None,
|
|
316
|
+
treatment_outcome_learner: str | Any | None = None,
|
|
317
|
+
control_effect_learner: str | Any | None = None,
|
|
318
|
+
treatment_effect_learner: str | Any | None = None,
|
|
319
|
+
propensity_score_learner: str | Any | None = None,
|
|
320
|
+
) -> None:
|
|
321
|
+
"""
|
|
322
|
+
Initialize an X-learner.
|
|
323
|
+
|
|
324
|
+
Parameters
|
|
325
|
+
----------
|
|
326
|
+
learner: str or Any, optional
|
|
327
|
+
Base estimator for all learners.
|
|
328
|
+
If not provided, defaults to LinearRegression.
|
|
329
|
+
control_outcome_learner: str or Any, optional
|
|
330
|
+
Estimator for control group outcome.
|
|
331
|
+
Overrides `learner` if specified.
|
|
332
|
+
treatment_outcome_learner: str or Any, optional
|
|
333
|
+
Estimator for treatment group outcome.
|
|
334
|
+
Overrides `learner` if specified.
|
|
335
|
+
control_effect_learner: str or Any, optional
|
|
336
|
+
Estimator for control group effect.
|
|
337
|
+
Overrides `learner` if specified.
|
|
338
|
+
treatment_effect_learner: str or Any, optional
|
|
339
|
+
Estimator for treatment group effect.
|
|
340
|
+
Overrides `learner` if specified.
|
|
341
|
+
propensity_score_learner: str or Any, optional
|
|
342
|
+
Estimator for propensity score.
|
|
343
|
+
If not provided, defaults to LogisticRegression.
|
|
344
|
+
|
|
345
|
+
"""
|
|
346
|
+
|
|
347
|
+
if learner is None:
|
|
348
|
+
self.control_outcome_learner = learnerFromString("LinearRegression")
|
|
349
|
+
self.treatment_outcome_learner = learnerFromString("LinearRegression")
|
|
350
|
+
self.control_effect_learner = learnerFromString("LinearRegression")
|
|
351
|
+
self.treatment_effect_learner = learnerFromString("LinearRegression")
|
|
352
|
+
elif isinstance(learner, str):
|
|
353
|
+
self.control_outcome_learner = learnerFromString(learner)
|
|
354
|
+
self.treatment_outcome_learner = learnerFromString(learner)
|
|
355
|
+
self.control_effect_learner = learnerFromString(learner)
|
|
356
|
+
self.treatment_effect_learner = learnerFromString(learner)
|
|
357
|
+
else:
|
|
358
|
+
self.control_outcome_learner = clone(learner)
|
|
359
|
+
self.treatment_outcome_learner = clone(learner)
|
|
360
|
+
self.control_effect_learner = clone(learner)
|
|
361
|
+
self.treatment_effect_learner = clone(learner)
|
|
362
|
+
|
|
363
|
+
if isinstance(control_outcome_learner, str):
|
|
364
|
+
self.control_outcome_learner = learnerFromString(control_outcome_learner)
|
|
365
|
+
elif control_outcome_learner is not None:
|
|
366
|
+
self.control_outcome_learner = clone(control_outcome_learner)
|
|
367
|
+
|
|
368
|
+
if isinstance(treatment_outcome_learner, str):
|
|
369
|
+
self.treatment_outcome_learner = learnerFromString(treatment_outcome_learner)
|
|
370
|
+
elif treatment_outcome_learner is not None:
|
|
371
|
+
self.treatment_outcome_learner = clone(treatment_outcome_learner)
|
|
372
|
+
|
|
373
|
+
if isinstance(control_effect_learner, str):
|
|
374
|
+
self.control_effect_learner = learnerFromString(control_effect_learner)
|
|
375
|
+
elif control_effect_learner is not None:
|
|
376
|
+
self.control_effect_learner = clone(control_effect_learner)
|
|
377
|
+
|
|
378
|
+
if isinstance(treatment_effect_learner, str):
|
|
379
|
+
self.treatment_effect_learner = learnerFromString(treatment_effect_learner)
|
|
380
|
+
elif treatment_effect_learner is not None:
|
|
381
|
+
self.treatment_effect_learner = clone(treatment_effect_learner)
|
|
382
|
+
|
|
383
|
+
if propensity_score_learner is None:
|
|
384
|
+
self.propensity_score_learner = learnerFromString("LogisticRegression")
|
|
385
|
+
elif isinstance(propensity_score_learner, str):
|
|
386
|
+
self.propensity_score_learner = learnerFromString(propensity_score_learner)
|
|
387
|
+
else:
|
|
388
|
+
self.propensity_score_learner = clone(propensity_score_learner)
|
|
389
|
+
|
|
390
|
+
def fit(
|
|
391
|
+
self,
|
|
392
|
+
X: np.matrix | np.ndarray | pd.DataFrame,
|
|
393
|
+
treatment: np.ndarray | pd.Series,
|
|
394
|
+
y: np.ndarray | pd.Series,
|
|
395
|
+
) -> None:
|
|
396
|
+
"""
|
|
397
|
+
Fit the inference model.
|
|
398
|
+
|
|
399
|
+
Parameters
|
|
400
|
+
----------
|
|
401
|
+
X: np.matrix | np.ndarray | pd.DataFrame
|
|
402
|
+
The covariate matrix.
|
|
403
|
+
treatment: np.ndarray | pd.Series
|
|
404
|
+
The treatment assignment vector.
|
|
405
|
+
y: np.ndarray | pd.Series,
|
|
406
|
+
The outcome vector.
|
|
407
|
+
"""
|
|
408
|
+
|
|
409
|
+
self.control_outcome_learner.fit(X=X[treatment == 0], y=y[treatment == 0])
|
|
410
|
+
self.treatment_outcome_learner.fit(X=X[treatment == 1], y=y[treatment == 1])
|
|
411
|
+
|
|
412
|
+
Delta0 = self.treatment_outcome_learner.predict(X=X[treatment == 0]) - y[treatment == 0]
|
|
413
|
+
Delta1 = y[treatment == 1] - self.control_outcome_learner.predict(X=X[treatment == 1])
|
|
414
|
+
|
|
415
|
+
self.control_effect_learner.fit(X=X[treatment == 0], y=Delta0)
|
|
416
|
+
self.treatment_effect_learner.fit(X=X[treatment == 1], y=Delta1)
|
|
417
|
+
|
|
418
|
+
self.propensity_score_learner.fit(X=X, y=treatment)
|
|
419
|
+
|
|
420
|
+
def predict(
|
|
421
|
+
self,
|
|
422
|
+
X: np.matrix | np.ndarray | pd.DataFrame,
|
|
423
|
+
treatment: np.ndarray | pd.Series | None = None,
|
|
424
|
+
y: np.ndarray | pd.Series | None = None,
|
|
425
|
+
) -> np.ndarray:
|
|
426
|
+
"""
|
|
427
|
+
Predict the Idividual Causal Effect (ICE),
|
|
428
|
+
also referd to as the Individual Treatment Effect (ITE).
|
|
429
|
+
|
|
430
|
+
Parameters
|
|
431
|
+
----------
|
|
432
|
+
X: np.matrix | np.ndarray | pd.DataFrame
|
|
433
|
+
The matrix of covariates.
|
|
434
|
+
treatment (optional): np.ndarray | pd.Series | None
|
|
435
|
+
The vector of treatment assignments.
|
|
436
|
+
y (optional): np.ndarray | pd.Series | None
|
|
437
|
+
The vector of outcomes.
|
|
438
|
+
|
|
439
|
+
Returns
|
|
440
|
+
-------
|
|
441
|
+
np.ndarray
|
|
442
|
+
An array containing the predicted ICE.
|
|
443
|
+
"""
|
|
444
|
+
|
|
445
|
+
tau0 = self.control_effect_learner.predict(X)
|
|
446
|
+
tau1 = self.treatment_effect_learner.predict(X)
|
|
447
|
+
e = self.propensity_score_learner.predict_proba(X)
|
|
448
|
+
|
|
449
|
+
v_func = np.vectorize(lambda e0, e1, t0, t1: e0 * t0 + e1 * t1)
|
|
450
|
+
|
|
451
|
+
return v_func(e[:, 0], e[:, 1], tau0, tau1)
|
|
452
|
+
|
|
453
|
+
def estimate_ate(
|
|
454
|
+
self,
|
|
455
|
+
X: np.matrix | np.ndarray | pd.DataFrame,
|
|
456
|
+
treatment: np.ndarray | pd.Series | None = None,
|
|
457
|
+
y: np.ndarray | pd.Series | None = None,
|
|
458
|
+
pretrain: bool = True,
|
|
459
|
+
) -> float:
|
|
460
|
+
"""
|
|
461
|
+
Predicts the Average Causal Effect (ACE),
|
|
462
|
+
also refered to as the Average Treatment Effect (ATE).
|
|
463
|
+
(The term ATE is used in the method name for compatibility purposes.)
|
|
464
|
+
|
|
465
|
+
Parameters
|
|
466
|
+
----------
|
|
467
|
+
X: np.matrix or np.ndarray or pd.DataFrame
|
|
468
|
+
The matrix of covariates.
|
|
469
|
+
treatment: np.ndarray or pd.Series or None, optional
|
|
470
|
+
The vector of treatment assignments.
|
|
471
|
+
y: np.ndarray or pd.Series, optional
|
|
472
|
+
The vector of outcomes.
|
|
473
|
+
|
|
474
|
+
Returns
|
|
475
|
+
-------
|
|
476
|
+
float
|
|
477
|
+
The value of the ACE.
|
|
478
|
+
"""
|
|
479
|
+
|
|
480
|
+
return self.predict(X, treatment, y).mean()
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
class PStratification:
|
|
484
|
+
"""
|
|
485
|
+
A basic implementation of Propensity Stratification estimator
|
|
486
|
+
based on Lunceford et al. (2004)
|
|
487
|
+
(see https://onlinelibrary.wiley.com/doi/abs/10.1002/sim.1903).
|
|
488
|
+
"""
|
|
489
|
+
|
|
490
|
+
def __init__(self, propensity_score_learner: str | Any | None = None, num_strata: int | None = None) -> None:
|
|
491
|
+
"""
|
|
492
|
+
Initialize an P-Stratification estimator.
|
|
493
|
+
|
|
494
|
+
Parameters
|
|
495
|
+
----------
|
|
496
|
+
propensity_score_learner: str or Any, optional
|
|
497
|
+
Estimator for propensity score.
|
|
498
|
+
If not provided, defaults to LogisticRegression.
|
|
499
|
+
num_strata: int, optional
|
|
500
|
+
The number of strata.
|
|
501
|
+
"""
|
|
502
|
+
|
|
503
|
+
if propensity_score_learner is None:
|
|
504
|
+
self.propensity_score_learner = learnerFromString("LogisticRegression")
|
|
505
|
+
elif isinstance(propensity_score_learner, str):
|
|
506
|
+
self.propensity_score_learner = learnerFromString(propensity_score_learner)
|
|
507
|
+
else:
|
|
508
|
+
self.propensity_score_learner = clone(propensity_score_learner)
|
|
509
|
+
|
|
510
|
+
self.num_strata = num_strata
|
|
511
|
+
|
|
512
|
+
def fit(
|
|
513
|
+
self,
|
|
514
|
+
X: np.matrix | np.ndarray | pd.DataFrame,
|
|
515
|
+
treatment: np.ndarray | pd.Series,
|
|
516
|
+
y: np.ndarray | pd.Series,
|
|
517
|
+
num_strata: int | None = None,
|
|
518
|
+
) -> None:
|
|
519
|
+
"""
|
|
520
|
+
Fit the inference model.
|
|
521
|
+
|
|
522
|
+
Parameters
|
|
523
|
+
----------
|
|
524
|
+
X: np.matrix or np.ndarray or pd.DataFrame
|
|
525
|
+
The matrix of covariates.
|
|
526
|
+
treatment: np.ndarray or pd.Series or None, optional
|
|
527
|
+
The vector of treatment assignments.
|
|
528
|
+
y: np.ndarray or pd.Series, optional
|
|
529
|
+
The vector of outcomes.
|
|
530
|
+
num_strata: int, optional
|
|
531
|
+
The number of strata.
|
|
532
|
+
"""
|
|
533
|
+
|
|
534
|
+
self.propensity_score_learner.fit(X=X, y=treatment)
|
|
535
|
+
|
|
536
|
+
if num_strata is not None:
|
|
537
|
+
self.num_strata = num_strata
|
|
538
|
+
elif self.num_strata is None:
|
|
539
|
+
self.num_strata = 100
|
|
540
|
+
|
|
541
|
+
def predict(
|
|
542
|
+
self,
|
|
543
|
+
X: np.matrix | np.ndarray | pd.DataFrame,
|
|
544
|
+
treatment: np.ndarray | pd.Series | None = None,
|
|
545
|
+
y: np.ndarray | pd.Series | None = None,
|
|
546
|
+
num_strata: int | None = None,
|
|
547
|
+
) -> np.ndarray:
|
|
548
|
+
"""
|
|
549
|
+
Predict the Idividual Causal Effect (ICE),
|
|
550
|
+
also referd to as the Individual Treatment Effect (ITE).
|
|
551
|
+
|
|
552
|
+
Parameters
|
|
553
|
+
----------
|
|
554
|
+
X: np.matrix or np.ndarray or pd.DataFrame
|
|
555
|
+
The matrix of covariates.
|
|
556
|
+
treatment: np.ndarray or pd.Series or None, optional
|
|
557
|
+
The vector of treatment assignments.
|
|
558
|
+
y: np.ndarray or pd.Series, optional
|
|
559
|
+
The vector of outcomes.
|
|
560
|
+
num_strata: int, optional
|
|
561
|
+
The number of strata.
|
|
562
|
+
Default is the lenght of X over 100.
|
|
563
|
+
|
|
564
|
+
Returns
|
|
565
|
+
-------
|
|
566
|
+
np.ndarray
|
|
567
|
+
An array containing the predicted ICE.
|
|
568
|
+
"""
|
|
569
|
+
|
|
570
|
+
if num_strata is not None:
|
|
571
|
+
self.num_strata = num_strata
|
|
572
|
+
elif self.num_strata is None:
|
|
573
|
+
self.num_strata = 100
|
|
574
|
+
|
|
575
|
+
e = self.propensity_score_learner.predict_proba(X)[:, 1]
|
|
576
|
+
e = pd.DataFrame({"e": e}).sort_values("e")
|
|
577
|
+
|
|
578
|
+
indices_strata = np.array_split(e.index, self.num_strata, axis=0)
|
|
579
|
+
|
|
580
|
+
def _tauStratum(indices_stratum):
|
|
581
|
+
y_strat = y[indices_stratum]
|
|
582
|
+
T_strat = treatment[indices_stratum]
|
|
583
|
+
return np.full(len(indices_stratum), y_strat[T_strat == 1].mean() - y_strat[T_strat == 0].mean())
|
|
584
|
+
|
|
585
|
+
tau_list = np.hstack([_tauStratum(id_stratum) for id_stratum in indices_strata])
|
|
586
|
+
|
|
587
|
+
if np.isnan(tau_list[e.index]).sum() != 0:
|
|
588
|
+
print(
|
|
589
|
+
"Warning: The Positivity assumption is not satisfied across "
|
|
590
|
+
"all strata.\nStrata lacking treatment or control have been "
|
|
591
|
+
"filled with numpy.nan values. Consider reducing the number "
|
|
592
|
+
"of strata [num_strata]. "
|
|
593
|
+
)
|
|
594
|
+
|
|
595
|
+
return tau_list[e.index]
|
|
596
|
+
|
|
597
|
+
# ecrire do-calculus not identifiable try catch
|
|
598
|
+
|
|
599
|
+
def estimate_ate(
|
|
600
|
+
self,
|
|
601
|
+
X: np.matrix | np.ndarray | pd.DataFrame,
|
|
602
|
+
treatment: np.ndarray | pd.Series | None = None,
|
|
603
|
+
y: np.ndarray | pd.Series | None = None,
|
|
604
|
+
pretrain: bool = True,
|
|
605
|
+
) -> float:
|
|
606
|
+
"""
|
|
607
|
+
Predicts the Average Causal Effect (ACE),
|
|
608
|
+
also refered to as the Average Treatment Effect (ATE).
|
|
609
|
+
(The term ATE is used in the method name for compatibility purposes.)
|
|
610
|
+
|
|
611
|
+
Parameters
|
|
612
|
+
----------
|
|
613
|
+
X: np.matrix or np.ndarray or pd.DataFrame
|
|
614
|
+
The matrix of covariates.
|
|
615
|
+
treatment: np.ndarray or pd.Series or None, optional
|
|
616
|
+
The vector of treatment assignments.
|
|
617
|
+
y: np.ndarray or pd.Series, optional
|
|
618
|
+
The vector of outcomes.
|
|
619
|
+
|
|
620
|
+
Returns
|
|
621
|
+
-------
|
|
622
|
+
float
|
|
623
|
+
The value of the ACE.
|
|
624
|
+
"""
|
|
625
|
+
|
|
626
|
+
tau = self.predict(X, treatment, y)
|
|
627
|
+
|
|
628
|
+
if np.isnan(tau).sum() != 0:
|
|
629
|
+
print("The ACE is calculated after excluding the NaN values.")
|
|
630
|
+
return tau[~np.isnan(tau)].mean()
|
|
631
|
+
|
|
632
|
+
|
|
633
|
+
class IPW:
|
|
634
|
+
"""
|
|
635
|
+
A basic implementation of the Inverse Propensity Score Weighting (IPW)
|
|
636
|
+
estimator based on Lunceford et al. (2004)
|
|
637
|
+
(see https://onlinelibrary.wiley.com/doi/abs/10.1002/sim.1903).
|
|
638
|
+
"""
|
|
639
|
+
|
|
640
|
+
def __init__(
|
|
641
|
+
self,
|
|
642
|
+
propensity_score_learner: str | Any | None = None,
|
|
643
|
+
) -> None:
|
|
644
|
+
"""
|
|
645
|
+
Initialize an IPW estimator.
|
|
646
|
+
|
|
647
|
+
Parameters
|
|
648
|
+
----------
|
|
649
|
+
propensity_score_learner: str or Any, optional
|
|
650
|
+
Estimator for propensity score.
|
|
651
|
+
If not provided, defaults to LogisticRegression.
|
|
652
|
+
"""
|
|
653
|
+
|
|
654
|
+
if propensity_score_learner is None:
|
|
655
|
+
self.propensity_score_learner = learnerFromString("LogisticRegression")
|
|
656
|
+
elif isinstance(propensity_score_learner, str):
|
|
657
|
+
self.propensity_score_learner = learnerFromString(propensity_score_learner)
|
|
658
|
+
else:
|
|
659
|
+
self.propensity_score_learner = clone(propensity_score_learner)
|
|
660
|
+
|
|
661
|
+
def fit(
|
|
662
|
+
self,
|
|
663
|
+
X: np.matrix | np.ndarray | pd.DataFrame,
|
|
664
|
+
treatment: np.ndarray | pd.Series,
|
|
665
|
+
y: np.ndarray | pd.Series,
|
|
666
|
+
) -> None:
|
|
667
|
+
"""
|
|
668
|
+
Fit the inference model.
|
|
669
|
+
|
|
670
|
+
Parameters
|
|
671
|
+
----------
|
|
672
|
+
X: np.matrix or np.ndarray or pd.DataFrame
|
|
673
|
+
The matrix of covariates.
|
|
674
|
+
treatment: np.ndarray or pd.Series or None, optional
|
|
675
|
+
The vector of treatment assignments.
|
|
676
|
+
y: np.ndarray or pd.Series, optional
|
|
677
|
+
The vector of outcomes.
|
|
678
|
+
"""
|
|
679
|
+
|
|
680
|
+
self.propensity_score_learner.fit(X=X, y=treatment)
|
|
681
|
+
|
|
682
|
+
def predict(
|
|
683
|
+
self,
|
|
684
|
+
X: np.matrix | np.ndarray | pd.DataFrame,
|
|
685
|
+
treatment: np.ndarray | pd.Series | None = None,
|
|
686
|
+
y: np.ndarray | pd.Series | None = None,
|
|
687
|
+
) -> np.ndarray:
|
|
688
|
+
"""
|
|
689
|
+
Predict the Idividual Causal Effect (ICE),
|
|
690
|
+
also referd to as the Individual Treatment Effect (ITE).
|
|
691
|
+
|
|
692
|
+
Parameters
|
|
693
|
+
----------
|
|
694
|
+
X: np.matrix or np.ndarray or pd.DataFrame
|
|
695
|
+
The matrix of covariates.
|
|
696
|
+
treatment: np.ndarray or pd.Series or None, optional
|
|
697
|
+
The vector of treatment assignments.
|
|
698
|
+
y: np.ndarray or pd.Series, optional
|
|
699
|
+
The vector of outcomes.
|
|
700
|
+
|
|
701
|
+
Returns
|
|
702
|
+
-------
|
|
703
|
+
np.ndarray
|
|
704
|
+
An array containing the predicted ICE.
|
|
705
|
+
"""
|
|
706
|
+
|
|
707
|
+
e = self.propensity_score_learner.predict_proba(X)[:, 1]
|
|
708
|
+
v_func = np.vectorize(lambda e, t, y: (t / e - (1 - t) / (1 - e)) * y)
|
|
709
|
+
|
|
710
|
+
return v_func(e, treatment, y)
|
|
711
|
+
|
|
712
|
+
def estimate_ate(
|
|
713
|
+
self,
|
|
714
|
+
X: np.matrix | np.ndarray | pd.DataFrame,
|
|
715
|
+
treatment: np.ndarray | pd.Series | None = None,
|
|
716
|
+
y: np.ndarray | pd.Series | None = None,
|
|
717
|
+
pretrain: bool = True,
|
|
718
|
+
) -> float:
|
|
719
|
+
"""
|
|
720
|
+
Predicts the Average Causal Effect (ACE),
|
|
721
|
+
also refered to as the Average Treatment Effect (ATE).
|
|
722
|
+
(The term ATE is used in the method name for compatibility purposes.)
|
|
723
|
+
|
|
724
|
+
Parameters
|
|
725
|
+
----------
|
|
726
|
+
X: np.matrix or np.ndarray or pd.DataFrame
|
|
727
|
+
The matrix of covariates.
|
|
728
|
+
treatment: np.ndarray or pd.Series or None, optional
|
|
729
|
+
The vector of treatment assignments.
|
|
730
|
+
y: np.ndarray or pd.Series, optional
|
|
731
|
+
The vector of outcomes.
|
|
732
|
+
|
|
733
|
+
Returns
|
|
734
|
+
-------
|
|
735
|
+
float
|
|
736
|
+
The value of the ACE.
|
|
737
|
+
"""
|
|
738
|
+
|
|
739
|
+
return self.predict(X, treatment, y).mean()
|
|
740
|
+
|
|
741
|
+
|
|
742
|
+
# WORK IN PROGRESS
|
|
743
|
+
|
|
744
|
+
|
|
745
|
+
def _AIPW(self, learner: any):
|
|
746
|
+
"""
|
|
747
|
+
(see https://scholar.harvard.edu/files/aglynn/files/AIPW.pdf)
|
|
748
|
+
"""
|
|
749
|
+
|
|
750
|
+
mu0 = clone(learner)
|
|
751
|
+
mu1 = clone(learner)
|
|
752
|
+
|
|
753
|
+
df0 = self.df[self.df[self.T] == 0]
|
|
754
|
+
df1 = self.df[self.df[self.T] == 1]
|
|
755
|
+
|
|
756
|
+
mu0.fit(X=df0[[*self.X]], y=df0[self.Y])
|
|
757
|
+
mu1.fit(X=df1[[*self.X]], y=df1[self.Y])
|
|
758
|
+
|
|
759
|
+
e = self.e if self.e is not None else self.propensityScoreFunc()
|
|
760
|
+
|
|
761
|
+
if self.cond == None:
|
|
762
|
+
df = self.df[[*self.X]]
|
|
763
|
+
else:
|
|
764
|
+
df = pd.DataFrame(columns=[*self.X], index=[0], data=self.cond)
|
|
765
|
+
|
|
766
|
+
e_pred = e.predict_proba(df)[:, 1]
|
|
767
|
+
|
|
768
|
+
mu0_pred = mu0.predict(df)
|
|
769
|
+
mu1_pred = mu1.predict(df)
|
|
770
|
+
|
|
771
|
+
v_func = np.vectorize(lambda e, t, y, mu0, mu1: (t * y - (t - e) * mu1) / e - ((1 - t) * y - (t - e) * mu0) / (1 - e))
|
|
772
|
+
tau_list = v_func(e_pred, self.df[self.T], self.df[self.Y], mu0_pred, mu1_pred)
|
|
773
|
+
|
|
774
|
+
return tau_list.mean()
|