pyAgrum-nightly 2.3.0.9.dev202512061764412981__cp310-abi3-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyagrum/__init__.py +165 -0
- pyagrum/_pyagrum.so +0 -0
- pyagrum/bnmixture/BNMInference.py +268 -0
- pyagrum/bnmixture/BNMLearning.py +376 -0
- pyagrum/bnmixture/BNMixture.py +464 -0
- pyagrum/bnmixture/__init__.py +60 -0
- pyagrum/bnmixture/notebook.py +1058 -0
- pyagrum/causal/_CausalFormula.py +280 -0
- pyagrum/causal/_CausalModel.py +436 -0
- pyagrum/causal/__init__.py +81 -0
- pyagrum/causal/_causalImpact.py +356 -0
- pyagrum/causal/_dSeparation.py +598 -0
- pyagrum/causal/_doAST.py +761 -0
- pyagrum/causal/_doCalculus.py +361 -0
- pyagrum/causal/_doorCriteria.py +374 -0
- pyagrum/causal/_exceptions.py +95 -0
- pyagrum/causal/_types.py +61 -0
- pyagrum/causal/causalEffectEstimation/_CausalEffectEstimation.py +1175 -0
- pyagrum/causal/causalEffectEstimation/_IVEstimators.py +718 -0
- pyagrum/causal/causalEffectEstimation/_RCTEstimators.py +132 -0
- pyagrum/causal/causalEffectEstimation/__init__.py +46 -0
- pyagrum/causal/causalEffectEstimation/_backdoorEstimators.py +774 -0
- pyagrum/causal/causalEffectEstimation/_causalBNEstimator.py +324 -0
- pyagrum/causal/causalEffectEstimation/_frontdoorEstimators.py +396 -0
- pyagrum/causal/causalEffectEstimation/_learners.py +118 -0
- pyagrum/causal/causalEffectEstimation/_utils.py +466 -0
- pyagrum/causal/notebook.py +171 -0
- pyagrum/clg/CLG.py +658 -0
- pyagrum/clg/GaussianVariable.py +111 -0
- pyagrum/clg/SEM.py +312 -0
- pyagrum/clg/__init__.py +63 -0
- pyagrum/clg/canonicalForm.py +408 -0
- pyagrum/clg/constants.py +54 -0
- pyagrum/clg/forwardSampling.py +202 -0
- pyagrum/clg/learning.py +776 -0
- pyagrum/clg/notebook.py +480 -0
- pyagrum/clg/variableElimination.py +271 -0
- pyagrum/common.py +60 -0
- pyagrum/config.py +319 -0
- pyagrum/ctbn/CIM.py +513 -0
- pyagrum/ctbn/CTBN.py +573 -0
- pyagrum/ctbn/CTBNGenerator.py +216 -0
- pyagrum/ctbn/CTBNInference.py +459 -0
- pyagrum/ctbn/CTBNLearner.py +161 -0
- pyagrum/ctbn/SamplesStats.py +671 -0
- pyagrum/ctbn/StatsIndepTest.py +355 -0
- pyagrum/ctbn/__init__.py +79 -0
- pyagrum/ctbn/constants.py +54 -0
- pyagrum/ctbn/notebook.py +264 -0
- pyagrum/defaults.ini +199 -0
- pyagrum/deprecated.py +95 -0
- pyagrum/explain/_ComputationCausal.py +75 -0
- pyagrum/explain/_ComputationConditional.py +48 -0
- pyagrum/explain/_ComputationMarginal.py +48 -0
- pyagrum/explain/_CustomShapleyCache.py +110 -0
- pyagrum/explain/_Explainer.py +176 -0
- pyagrum/explain/_Explanation.py +70 -0
- pyagrum/explain/_FIFOCache.py +54 -0
- pyagrum/explain/_ShallCausalValues.py +204 -0
- pyagrum/explain/_ShallConditionalValues.py +155 -0
- pyagrum/explain/_ShallMarginalValues.py +155 -0
- pyagrum/explain/_ShallValues.py +296 -0
- pyagrum/explain/_ShapCausalValues.py +208 -0
- pyagrum/explain/_ShapConditionalValues.py +126 -0
- pyagrum/explain/_ShapMarginalValues.py +191 -0
- pyagrum/explain/_ShapleyValues.py +298 -0
- pyagrum/explain/__init__.py +81 -0
- pyagrum/explain/_explGeneralizedMarkovBlanket.py +152 -0
- pyagrum/explain/_explIndependenceListForPairs.py +146 -0
- pyagrum/explain/_explInformationGraph.py +264 -0
- pyagrum/explain/notebook/__init__.py +54 -0
- pyagrum/explain/notebook/_bar.py +142 -0
- pyagrum/explain/notebook/_beeswarm.py +174 -0
- pyagrum/explain/notebook/_showShapValues.py +97 -0
- pyagrum/explain/notebook/_waterfall.py +220 -0
- pyagrum/explain/shapley.py +225 -0
- pyagrum/lib/__init__.py +46 -0
- pyagrum/lib/_colors.py +390 -0
- pyagrum/lib/bn2graph.py +299 -0
- pyagrum/lib/bn2roc.py +1026 -0
- pyagrum/lib/bn2scores.py +217 -0
- pyagrum/lib/bn_vs_bn.py +605 -0
- pyagrum/lib/cn2graph.py +305 -0
- pyagrum/lib/discreteTypeProcessor.py +1102 -0
- pyagrum/lib/discretizer.py +58 -0
- pyagrum/lib/dynamicBN.py +390 -0
- pyagrum/lib/explain.py +57 -0
- pyagrum/lib/export.py +84 -0
- pyagrum/lib/id2graph.py +258 -0
- pyagrum/lib/image.py +387 -0
- pyagrum/lib/ipython.py +307 -0
- pyagrum/lib/mrf2graph.py +471 -0
- pyagrum/lib/notebook.py +1821 -0
- pyagrum/lib/proba_histogram.py +552 -0
- pyagrum/lib/utils.py +138 -0
- pyagrum/pyagrum.py +31495 -0
- pyagrum/skbn/_MBCalcul.py +242 -0
- pyagrum/skbn/__init__.py +49 -0
- pyagrum/skbn/_learningMethods.py +282 -0
- pyagrum/skbn/_utils.py +297 -0
- pyagrum/skbn/bnclassifier.py +1014 -0
- pyagrum_nightly-2.3.0.9.dev202512061764412981.dist-info/LICENSE.md +12 -0
- pyagrum_nightly-2.3.0.9.dev202512061764412981.dist-info/LICENSES/LGPL-3.0-or-later.txt +304 -0
- pyagrum_nightly-2.3.0.9.dev202512061764412981.dist-info/LICENSES/MIT.txt +18 -0
- pyagrum_nightly-2.3.0.9.dev202512061764412981.dist-info/METADATA +145 -0
- pyagrum_nightly-2.3.0.9.dev202512061764412981.dist-info/RECORD +107 -0
- pyagrum_nightly-2.3.0.9.dev202512061764412981.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
############################################################################
|
|
2
|
+
# This file is part of the aGrUM/pyAgrum library. #
|
|
3
|
+
# #
|
|
4
|
+
# Copyright (c) 2005-2025 by #
|
|
5
|
+
# - Pierre-Henri WUILLEMIN(_at_LIP6) #
|
|
6
|
+
# - Christophe GONZALES(_at_AMU) #
|
|
7
|
+
# #
|
|
8
|
+
# The aGrUM/pyAgrum library is free software; you can redistribute it #
|
|
9
|
+
# and/or modify it under the terms of either : #
|
|
10
|
+
# #
|
|
11
|
+
# - the GNU Lesser General Public License as published by #
|
|
12
|
+
# the Free Software Foundation, either version 3 of the License, #
|
|
13
|
+
# or (at your option) any later version, #
|
|
14
|
+
# - the MIT license (MIT), #
|
|
15
|
+
# - or both in dual license, as here. #
|
|
16
|
+
# #
|
|
17
|
+
# (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) #
|
|
18
|
+
# #
|
|
19
|
+
# This aGrUM/pyAgrum library is distributed in the hope that it will be #
|
|
20
|
+
# useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, #
|
|
21
|
+
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS #
|
|
22
|
+
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE #
|
|
23
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER #
|
|
24
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, #
|
|
25
|
+
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR #
|
|
26
|
+
# OTHER DEALINGS IN THE SOFTWARE. #
|
|
27
|
+
# #
|
|
28
|
+
# See LICENCES for more details. #
|
|
29
|
+
# #
|
|
30
|
+
# SPDX-FileCopyrightText: Copyright 2005-2025 #
|
|
31
|
+
# - Pierre-Henri WUILLEMIN(_at_LIP6) #
|
|
32
|
+
# - Christophe GONZALES(_at_AMU) #
|
|
33
|
+
# SPDX-License-Identifier: LGPL-3.0-or-later OR MIT #
|
|
34
|
+
# #
|
|
35
|
+
# Contact : info_at_agrum_dot_org #
|
|
36
|
+
# homepage : http://agrum.gitlab.io #
|
|
37
|
+
# gitlab : https://gitlab.com/agrumery/agrum #
|
|
38
|
+
# #
|
|
39
|
+
############################################################################
|
|
40
|
+
|
|
41
|
+
import pyagrum as gum
|
|
42
|
+
from pyagrum.explain._ShapleyValues import ShapleyValues
|
|
43
|
+
from pyagrum.explain._ComputationCausal import CausalComputation
|
|
44
|
+
from pyagrum.explain._CustomShapleyCache import CustomShapleyCache
|
|
45
|
+
from pyagrum.explain._FIFOCache import FIFOCache
|
|
46
|
+
|
|
47
|
+
# Calculus
|
|
48
|
+
import numpy as np
|
|
49
|
+
import pandas as pd
|
|
50
|
+
|
|
51
|
+
# GL
|
|
52
|
+
import warnings
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class CausalShapValues(ShapleyValues, CausalComputation):
|
|
56
|
+
"""
|
|
57
|
+
The CausalShapValues class computes the Causal Shapley values for a given target node in a Bayesian Network.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
def __init__(self, bn, target, background: tuple | None, sample_size=1000, logit=True):
|
|
61
|
+
"""
|
|
62
|
+
Parameters:
|
|
63
|
+
------
|
|
64
|
+
bn : pyagrum.BayesNet
|
|
65
|
+
The Bayesian Network.
|
|
66
|
+
target : int | str
|
|
67
|
+
The node id (or node name) of the target.
|
|
68
|
+
background : Tuple(pandas.DataFrame, bool) | None
|
|
69
|
+
A tuple containing a pandas DataFrame and a boolean indicating whether the DataFrame contains labels or positions.
|
|
70
|
+
sample_size : int
|
|
71
|
+
The size of the background sample to generate if `background` is None.
|
|
72
|
+
logit : bool
|
|
73
|
+
If True, applies the logit transformation to the probabilities.
|
|
74
|
+
|
|
75
|
+
Raises:
|
|
76
|
+
------
|
|
77
|
+
TypeError : If bn is not a gum.BayesNet instance, background is not a tuple or target is not an integer or string.
|
|
78
|
+
ValueError : If target is not a valid node id in the Bayesian Network or if sample_size is not a positive integer.
|
|
79
|
+
"""
|
|
80
|
+
super().__init__(bn, target, logit)
|
|
81
|
+
# Processing background data
|
|
82
|
+
if background is None:
|
|
83
|
+
if not isinstance(sample_size, int):
|
|
84
|
+
raise TypeError("When `data`=None, `sample_size` must be an integer, but got {}".format(type(sample_size)))
|
|
85
|
+
else:
|
|
86
|
+
if sample_size <= 1:
|
|
87
|
+
raise ValueError("`sample_size` must be greater than 1, but got {}".format(sample_size))
|
|
88
|
+
data = gum.generateSample(self.bn, sample_size, with_labels=False)[0].reindex(columns=self.feat_names).to_numpy()
|
|
89
|
+
else:
|
|
90
|
+
if not isinstance(background, tuple):
|
|
91
|
+
raise TypeError(f"`background` must be a tuple (pd.DataFrame, bool).")
|
|
92
|
+
data, with_labels = background
|
|
93
|
+
if not isinstance(with_labels, bool):
|
|
94
|
+
warnings.warn(
|
|
95
|
+
f"The second element of `background` should be a boolean, but got {type(with_labels)}. Unexpected calculations may occur."
|
|
96
|
+
)
|
|
97
|
+
if not isinstance(data, pd.DataFrame):
|
|
98
|
+
raise TypeError("The first element of `background` must be a pandas DataFrame, but got {}".format(type(data)))
|
|
99
|
+
if data.shape[0] < 2:
|
|
100
|
+
warnings.warn("You are giving a single row as a background data, which will lead to biased Shapley values.")
|
|
101
|
+
if data.shape[1] != self.M:
|
|
102
|
+
raise ValueError(
|
|
103
|
+
"The number of columns in the background data must match the number of variables in the Bayesian network. Although values outside the Markov blanket, including the target, are unused, they are required for indexing purposes."
|
|
104
|
+
)
|
|
105
|
+
data = data.reindex(columns=self.feat_names).to_numpy()
|
|
106
|
+
if with_labels:
|
|
107
|
+
data = self._labelToPos_df(data, [i for i in range(self.M) if i != self.target])
|
|
108
|
+
self._data, self.counts = np.unique(data, return_counts=True, axis=0)
|
|
109
|
+
self._N = len(self._data)
|
|
110
|
+
# Calculating the baseline
|
|
111
|
+
self.baseline = self.func(
|
|
112
|
+
self._value(
|
|
113
|
+
data=self._data,
|
|
114
|
+
counts=self.counts,
|
|
115
|
+
elements=[i for i in range(self.M) if i != self.target],
|
|
116
|
+
sigma=self._mb,
|
|
117
|
+
cache=FIFOCache(100),
|
|
118
|
+
func1=self._posterior,
|
|
119
|
+
params1={},
|
|
120
|
+
func2=self._weight,
|
|
121
|
+
params2={"doLazy": gum.LazyPropagation(self.bn)},
|
|
122
|
+
)
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
def _shap_1dim(self, x, elements):
|
|
126
|
+
# Computes the Shapley values for a 1-dimensional input x (local explanation).
|
|
127
|
+
contributions = np.zeros((self.M, self.bn.variable(self.target).domainSize())) # Initializes contributions array.
|
|
128
|
+
cache = CustomShapleyCache(5000)
|
|
129
|
+
markovImpact = FIFOCache(1000)
|
|
130
|
+
cache.set(0, (), self.baseline) # Sets the baseline probability in the cache.
|
|
131
|
+
coalitions = self._coalitions(elements) # Compute the coalitions
|
|
132
|
+
|
|
133
|
+
for tau in coalitions:
|
|
134
|
+
self.ie.eraseAllEvidence() # Clears all evidence from the inference engine.
|
|
135
|
+
doNet = self._doCalculus(self.bn, tau) # Creates a new Bayesian Network to perform do-calculus.
|
|
136
|
+
sigma = self._outOfCoalition(tau, range(self.M)) # Extracts the nodes outside the coalition tau.
|
|
137
|
+
alpha = x[tau] # Instanciation of tau
|
|
138
|
+
self._chgCpt(doNet, tau, alpha) # Changes the conditional probability tables to perform do-calculus.
|
|
139
|
+
doLazy = gum.LazyPropagation(
|
|
140
|
+
doNet
|
|
141
|
+
) # Creates a lazy propagation inference engine to compute partial join probabilities.
|
|
142
|
+
doLazy.addTarget(self.target)
|
|
143
|
+
idx = self._extract(self._data, tau, alpha)
|
|
144
|
+
posterior_with = self.func(
|
|
145
|
+
self._value(
|
|
146
|
+
data=self._data[idx],
|
|
147
|
+
counts=self.counts[idx],
|
|
148
|
+
elements=elements,
|
|
149
|
+
sigma=sigma,
|
|
150
|
+
cache=markovImpact,
|
|
151
|
+
func1=self._posterior,
|
|
152
|
+
params1={},
|
|
153
|
+
func2=self._weight,
|
|
154
|
+
params2={"doLazy": doLazy},
|
|
155
|
+
)
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
cache.set(0, tuple(tau), posterior_with)
|
|
159
|
+
# Contribution of each feature
|
|
160
|
+
for t in tau:
|
|
161
|
+
key = tuple((f for f in tau if f != t))
|
|
162
|
+
posterior_without = cache.get(0, key)
|
|
163
|
+
contributions[t] += self._shap_term(posterior_with, posterior_without, len(elements), len(tau) - 1)
|
|
164
|
+
return contributions
|
|
165
|
+
|
|
166
|
+
def _shap_ndim(self, x, elements):
|
|
167
|
+
contributions = np.zeros(
|
|
168
|
+
(self.M, len(x), self.bn.variable(self.target).domainSize())
|
|
169
|
+
) # Initializes contributions array.
|
|
170
|
+
cache = CustomShapleyCache(5000) # Initializes the custom cache.
|
|
171
|
+
markovImpact = FIFOCache(1000)
|
|
172
|
+
cache.set(0, (), self.baseline) # Sets the baseline probability in the cache.
|
|
173
|
+
coalitions = self._coalitions(elements) # Compute the coalitions
|
|
174
|
+
|
|
175
|
+
for tau in coalitions:
|
|
176
|
+
self.ie.eraseAllEvidence() # Clears all evidence from the inference engine.
|
|
177
|
+
doNet = self._doCalculus(self.bn, tau) # Creates a new Bayesian Network to perform do-calculus.
|
|
178
|
+
sigma = self._outOfCoalition(tau, range(self.M)) # Extracts the nodes outside the coalition tau.
|
|
179
|
+
|
|
180
|
+
for i in range(len(x)): # Iterates over each example in x
|
|
181
|
+
alpha = x[i, tau] # Instanciation of tau
|
|
182
|
+
self._chgCpt(doNet, tau, alpha) # Changes the conditional probability tables to perform do-calculus.
|
|
183
|
+
doLazy = gum.LazyPropagation(
|
|
184
|
+
doNet
|
|
185
|
+
) # Creates a lazy propagation inference engine to compute partial join probabilities.
|
|
186
|
+
doLazy.addTarget(self.target)
|
|
187
|
+
idx = self._extract(self._data, tau, alpha)
|
|
188
|
+
posterior_with = self.func(
|
|
189
|
+
self._value(
|
|
190
|
+
data=self._data[idx],
|
|
191
|
+
counts=self.counts[idx],
|
|
192
|
+
elements=elements,
|
|
193
|
+
sigma=sigma,
|
|
194
|
+
cache=markovImpact,
|
|
195
|
+
func1=self._posterior,
|
|
196
|
+
params1={},
|
|
197
|
+
func2=self._weight,
|
|
198
|
+
params2={"doLazy": doLazy},
|
|
199
|
+
)
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
cache.set(i, tuple(tau), posterior_with)
|
|
203
|
+
# Contribution of each feature
|
|
204
|
+
for t in tau:
|
|
205
|
+
key = tuple((f for f in tau if f != t))
|
|
206
|
+
posterior_without = cache.get(i, key) if len(key) > 0 else cache.get(0, ())
|
|
207
|
+
contributions[t, i] += self._shap_term(posterior_with, posterior_without, len(elements), len(tau) - 1)
|
|
208
|
+
return contributions
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
############################################################################
|
|
2
|
+
# This file is part of the aGrUM/pyAgrum library. #
|
|
3
|
+
# #
|
|
4
|
+
# Copyright (c) 2005-2025 by #
|
|
5
|
+
# - Pierre-Henri WUILLEMIN(_at_LIP6) #
|
|
6
|
+
# - Christophe GONZALES(_at_AMU) #
|
|
7
|
+
# #
|
|
8
|
+
# The aGrUM/pyAgrum library is free software; you can redistribute it #
|
|
9
|
+
# and/or modify it under the terms of either : #
|
|
10
|
+
# #
|
|
11
|
+
# - the GNU Lesser General Public License as published by #
|
|
12
|
+
# the Free Software Foundation, either version 3 of the License, #
|
|
13
|
+
# or (at your option) any later version, #
|
|
14
|
+
# - the MIT license (MIT), #
|
|
15
|
+
# - or both in dual license, as here. #
|
|
16
|
+
# #
|
|
17
|
+
# (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) #
|
|
18
|
+
# #
|
|
19
|
+
# This aGrUM/pyAgrum library is distributed in the hope that it will be #
|
|
20
|
+
# useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, #
|
|
21
|
+
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS #
|
|
22
|
+
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE #
|
|
23
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER #
|
|
24
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, #
|
|
25
|
+
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR #
|
|
26
|
+
# OTHER DEALINGS IN THE SOFTWARE. #
|
|
27
|
+
# #
|
|
28
|
+
# See LICENCES for more details. #
|
|
29
|
+
# #
|
|
30
|
+
# SPDX-FileCopyrightText: Copyright 2005-2025 #
|
|
31
|
+
# - Pierre-Henri WUILLEMIN(_at_LIP6) #
|
|
32
|
+
# - Christophe GONZALES(_at_AMU) #
|
|
33
|
+
# SPDX-License-Identifier: LGPL-3.0-or-later OR MIT #
|
|
34
|
+
# #
|
|
35
|
+
# Contact : info_at_agrum_dot_org #
|
|
36
|
+
# homepage : http://agrum.gitlab.io #
|
|
37
|
+
# gitlab : https://gitlab.com/agrumery/agrum #
|
|
38
|
+
# #
|
|
39
|
+
############################################################################
|
|
40
|
+
|
|
41
|
+
from pyagrum.explain._ShapleyValues import ShapleyValues
|
|
42
|
+
from pyagrum.explain._CustomShapleyCache import CustomShapleyCache
|
|
43
|
+
|
|
44
|
+
# Calculations
|
|
45
|
+
import numpy as np
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class ConditionalShapValues(ShapleyValues):
|
|
49
|
+
"""
|
|
50
|
+
The ConditionalShapValues class computes the conditional Shapley values for a given target node in a Bayesian Network.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
def __init__(self, bn, target, logit=True):
|
|
54
|
+
"""
|
|
55
|
+
Parameters:
|
|
56
|
+
------
|
|
57
|
+
bn : pyagrum.BayesNet
|
|
58
|
+
The Bayesian Network.
|
|
59
|
+
target : int | str
|
|
60
|
+
The node id (or node name) of the target.
|
|
61
|
+
logit : bool
|
|
62
|
+
If True, applies the logit transformation to the probabilities.
|
|
63
|
+
|
|
64
|
+
Raises:
|
|
65
|
+
------
|
|
66
|
+
TypeError : If bn is not a gum.BayesNet or target is not an integer or string.
|
|
67
|
+
ValueError : If target is not a valid node id in the Bayesian Network.
|
|
68
|
+
"""
|
|
69
|
+
super().__init__(bn, target, logit) # Initializes the ShapleyValues class.
|
|
70
|
+
self.baseline = self.func(self.ie.posterior(self.target).toarray()) # Sets the baseline probability.
|
|
71
|
+
|
|
72
|
+
def _coalition_contribution(self, k, ex, _M, feature, nodes_id, nodes_vals, cache):
|
|
73
|
+
key1, key2, key_m = cache.generate_keys(self.bn, self.target, feature, nodes_id)
|
|
74
|
+
if k == 0:
|
|
75
|
+
self.ie.eraseAllEvidence()
|
|
76
|
+
if len(key1) == len(key_m):
|
|
77
|
+
# If all nodes impact the target.
|
|
78
|
+
# In this case, we are obliged to calculate the posterior probability.
|
|
79
|
+
evidces = {key: int(value) for key, value in zip(nodes_id, nodes_vals)}
|
|
80
|
+
self.ie.updateEvidence(evidces) # Updates the evidence in the inference engine.
|
|
81
|
+
cache.set(
|
|
82
|
+
ex, key1, self.func(self.ie.posterior(self.target).toarray())
|
|
83
|
+
) # Sets the posterior probability in the cache.
|
|
84
|
+
else:
|
|
85
|
+
# Otherwise, we have already calculated the posterior probability.
|
|
86
|
+
if len(key1) > 1:
|
|
87
|
+
cache.set(ex, key1, cache.get(ex, key_m))
|
|
88
|
+
else:
|
|
89
|
+
cache.set(ex, key1, cache.get(-1, ""))
|
|
90
|
+
posterior_prob_with = cache.get(ex, key1)
|
|
91
|
+
posterior_prob_without = cache.get(ex, key2) if len(key1) > 1 else cache.get(-1, "")
|
|
92
|
+
return self._shap_term(posterior_prob_with, posterior_prob_without, _M, len(nodes_id) - 1)
|
|
93
|
+
|
|
94
|
+
def _shap_1dim(self, x, elements) -> np.ndarray:
|
|
95
|
+
contributions = np.zeros((self.M, self.bn.variable(self.target).domainSize())) # Initializes contributions array.
|
|
96
|
+
self.ie.eraseAllEvidence() # Clears all evidence from the inference engine.
|
|
97
|
+
cache = CustomShapleyCache(5000) # Initializes the custom cache.
|
|
98
|
+
cache.set(-1, "", self.baseline) # Sets the baseline probability in the cache.
|
|
99
|
+
|
|
100
|
+
coalitions = self._coalitions(elements) # Generates coalitions.
|
|
101
|
+
for nodes_id in coalitions:
|
|
102
|
+
nodes_vals = x[nodes_id] # Gets the values of the nodes in the coalition.
|
|
103
|
+
for k, feature in enumerate(nodes_id):
|
|
104
|
+
# Accumulates the contribution for each feature.
|
|
105
|
+
contributions[feature] += self._coalition_contribution(
|
|
106
|
+
k, 0, len(elements), int(feature), nodes_id, nodes_vals, cache
|
|
107
|
+
)
|
|
108
|
+
return contributions
|
|
109
|
+
|
|
110
|
+
def _shap_ndim(self, x, elements) -> np.ndarray:
|
|
111
|
+
contributions = np.zeros(
|
|
112
|
+
(self.M, len(x), self.bn.variable(self.target).domainSize())
|
|
113
|
+
) # Initializes contributions array.
|
|
114
|
+
self.ie.eraseAllEvidence() # Clears all evidence from the inference engine.
|
|
115
|
+
cache = CustomShapleyCache(5000) # Initializes the custom cache.
|
|
116
|
+
cache.set(-1, "", self.baseline) # Sets the baseline probability in the cache.
|
|
117
|
+
|
|
118
|
+
coalitions = self._coalitions(elements) # Generates coalitions.
|
|
119
|
+
for nodes_id in coalitions:
|
|
120
|
+
data_vals = x[:, nodes_id] # Gets the values of the nodes in the coalition.
|
|
121
|
+
for ex, nodes_vals in enumerate(data_vals):
|
|
122
|
+
for k, feature in enumerate(nodes_id):
|
|
123
|
+
contributions[feature, ex] += self._coalition_contribution(
|
|
124
|
+
k, ex, len(elements), int(feature), nodes_id, nodes_vals, cache
|
|
125
|
+
)
|
|
126
|
+
return contributions
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
############################################################################
|
|
2
|
+
# This file is part of the aGrUM/pyAgrum library. #
|
|
3
|
+
# #
|
|
4
|
+
# Copyright (c) 2005-2025 by #
|
|
5
|
+
# - Pierre-Henri WUILLEMIN(_at_LIP6) #
|
|
6
|
+
# - Christophe GONZALES(_at_AMU) #
|
|
7
|
+
# #
|
|
8
|
+
# The aGrUM/pyAgrum library is free software; you can redistribute it #
|
|
9
|
+
# and/or modify it under the terms of either : #
|
|
10
|
+
# #
|
|
11
|
+
# - the GNU Lesser General Public License as published by #
|
|
12
|
+
# the Free Software Foundation, either version 3 of the License, #
|
|
13
|
+
# or (at your option) any later version, #
|
|
14
|
+
# - the MIT license (MIT), #
|
|
15
|
+
# - or both in dual license, as here. #
|
|
16
|
+
# #
|
|
17
|
+
# (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) #
|
|
18
|
+
# #
|
|
19
|
+
# This aGrUM/pyAgrum library is distributed in the hope that it will be #
|
|
20
|
+
# useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, #
|
|
21
|
+
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS #
|
|
22
|
+
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE #
|
|
23
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER #
|
|
24
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, #
|
|
25
|
+
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR #
|
|
26
|
+
# OTHER DEALINGS IN THE SOFTWARE. #
|
|
27
|
+
# #
|
|
28
|
+
# See LICENCES for more details. #
|
|
29
|
+
# #
|
|
30
|
+
# SPDX-FileCopyrightText: Copyright 2005-2025 #
|
|
31
|
+
# - Pierre-Henri WUILLEMIN(_at_LIP6) #
|
|
32
|
+
# - Christophe GONZALES(_at_AMU) #
|
|
33
|
+
# SPDX-License-Identifier: LGPL-3.0-or-later OR MIT #
|
|
34
|
+
# #
|
|
35
|
+
# Contact : info_at_agrum_dot_org #
|
|
36
|
+
# homepage : http://agrum.gitlab.io #
|
|
37
|
+
# gitlab : https://gitlab.com/agrumery/agrum #
|
|
38
|
+
# #
|
|
39
|
+
############################################################################
|
|
40
|
+
|
|
41
|
+
from pyagrum.explain._ShapleyValues import ShapleyValues
|
|
42
|
+
from pyagrum.explain._ComputationMarginal import MarginalComputation
|
|
43
|
+
from pyagrum.explain._CustomShapleyCache import CustomShapleyCache
|
|
44
|
+
from pyagrum.explain._FIFOCache import FIFOCache
|
|
45
|
+
|
|
46
|
+
# Calculations
|
|
47
|
+
import pandas as pd
|
|
48
|
+
import numpy as np
|
|
49
|
+
|
|
50
|
+
# aGrUM
|
|
51
|
+
import pyagrum as gum
|
|
52
|
+
|
|
53
|
+
# GL
|
|
54
|
+
import warnings
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class MarginalShapValues(ShapleyValues, MarginalComputation):
|
|
58
|
+
"""
|
|
59
|
+
The MarginalShapValues class computes the Marginal Shapley values for a given target node in a Bayesian Network.
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
def __init__(self, bn: gum.BayesNet, target: int, background: tuple | None, sample_size=1000, logit=True):
|
|
63
|
+
"""
|
|
64
|
+
Parameters:
|
|
65
|
+
------
|
|
66
|
+
bn : pyagrum.BayesNet
|
|
67
|
+
The Bayesian Network.
|
|
68
|
+
target : int | str
|
|
69
|
+
The node id (or node name) of the target.
|
|
70
|
+
background : Tuple(pandas.DataFrame, bool) | None
|
|
71
|
+
A tuple containing a pandas DataFrame and a boolean indicating whether the DataFrame contains labels or positions.
|
|
72
|
+
sample_size : int
|
|
73
|
+
The size of the background sample to generate if `background` is None.
|
|
74
|
+
logit : bool
|
|
75
|
+
If True, applies the logit transformation to the probabilities.
|
|
76
|
+
|
|
77
|
+
Raises:
|
|
78
|
+
------
|
|
79
|
+
TypeError : If bn is not a gum.BayesNet instance, background is not a tuple or target is not an integer or string.
|
|
80
|
+
ValueError : If target is not a valid node id in the Bayesian Network or if sample_size is not a positive integer.
|
|
81
|
+
"""
|
|
82
|
+
super().__init__(bn, target, logit)
|
|
83
|
+
self._mb = self._markov_blanket()
|
|
84
|
+
# Processing background data
|
|
85
|
+
if background is None:
|
|
86
|
+
if not isinstance(sample_size, int):
|
|
87
|
+
raise TypeError("When `data`=None, `sample_size` must be an integer, but got {}".format(type(sample_size)))
|
|
88
|
+
else:
|
|
89
|
+
if sample_size < 1:
|
|
90
|
+
raise ValueError("`sample_size` must be greater than 1, but got {}".format(sample_size))
|
|
91
|
+
elif sample_size < 10:
|
|
92
|
+
warnings.warn("The sample size is small, which may lead to biased Shapley values.")
|
|
93
|
+
data = gum.generateSample(self.bn, sample_size, with_labels=False)[0].reindex(columns=self.feat_names).to_numpy()
|
|
94
|
+
else:
|
|
95
|
+
if not isinstance(background, tuple):
|
|
96
|
+
raise TypeError(f"`background` must be a tuple (pd.DataFrame, bool).")
|
|
97
|
+
data, with_labels = background
|
|
98
|
+
if not isinstance(with_labels, bool):
|
|
99
|
+
warnings.warn(
|
|
100
|
+
f"The second element of `background` should be a boolean, but got {type(with_labels)}. Unexpected calculations may occur."
|
|
101
|
+
)
|
|
102
|
+
if not isinstance(data, pd.DataFrame):
|
|
103
|
+
raise TypeError("The first element of `background` must be a pandas DataFrame, but got {}".format(type(data)))
|
|
104
|
+
if data.shape[0] < 2:
|
|
105
|
+
warnings.warn("You are giving a single row as a background data, which will lead to biased Shapley values.")
|
|
106
|
+
if data.shape[1] != self.M:
|
|
107
|
+
raise ValueError(
|
|
108
|
+
"The number of columns in the background data must match the number of variables in the Bayesian network. Although values outside the Markov blanket, including the target, are unused, they are required for indexing purposes."
|
|
109
|
+
)
|
|
110
|
+
data = data.reindex(columns=self.feat_names).to_numpy()
|
|
111
|
+
if with_labels:
|
|
112
|
+
data = self._labelToPos_df(data, [i for i in range(self.M) if i != self.target])
|
|
113
|
+
self._data, self.counts = np.unique(data, axis=0, return_counts=True)
|
|
114
|
+
self._N = int(np.sum(self.counts))
|
|
115
|
+
if self._N == 0:
|
|
116
|
+
raise ValueError("Background data can't be empty.")
|
|
117
|
+
self.baseline = self.func(
|
|
118
|
+
self._value(
|
|
119
|
+
data=self._data,
|
|
120
|
+
counts=self.counts,
|
|
121
|
+
elements=[i for i in range(self.M) if i != self.target],
|
|
122
|
+
sigma=[],
|
|
123
|
+
cache=FIFOCache(100),
|
|
124
|
+
func1=self._posterior,
|
|
125
|
+
params1={},
|
|
126
|
+
func2=self._weight,
|
|
127
|
+
params2={},
|
|
128
|
+
)
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
def _coalition_contribution(self, k, ex, elements, feature, markovImpact, nodes_id, nodes_vals, cache):
|
|
132
|
+
# Computes the contribution of a coalition to the Shapley value.
|
|
133
|
+
key1, key2, _ = cache.generate_keys(self.bn, self.target, feature, nodes_id)
|
|
134
|
+
if k == 0:
|
|
135
|
+
interv = self._data.copy()
|
|
136
|
+
interv[:, nodes_id] = nodes_vals
|
|
137
|
+
cache.set(
|
|
138
|
+
ex,
|
|
139
|
+
key1,
|
|
140
|
+
self.func(
|
|
141
|
+
self._value(
|
|
142
|
+
data=interv,
|
|
143
|
+
counts=self.counts,
|
|
144
|
+
elements=elements,
|
|
145
|
+
sigma=[],
|
|
146
|
+
cache=markovImpact,
|
|
147
|
+
func1=self._posterior,
|
|
148
|
+
params1={},
|
|
149
|
+
func2=self._weight,
|
|
150
|
+
params2={},
|
|
151
|
+
)
|
|
152
|
+
),
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
posterior_prob_with = cache.get(ex, key1)
|
|
156
|
+
posterior_prob_without = cache.get(ex, key2) if len(key1) > 1 else cache.get(-1, ())
|
|
157
|
+
return self._shap_term(posterior_prob_with, posterior_prob_without, len(elements), len(nodes_id) - 1)
|
|
158
|
+
|
|
159
|
+
def _shap_1dim(self, x, elements):
|
|
160
|
+
contributions = np.zeros((self.M, self.bn.variable(self.target).domainSize())) # Initializes contributions array.
|
|
161
|
+
self.ie.eraseAllEvidence() # Clears all evidence from the inference engine.
|
|
162
|
+
markovImpact = FIFOCache(2000)
|
|
163
|
+
cache = CustomShapleyCache(5000) # Initializes the custom cache.
|
|
164
|
+
cache.set(-1, (), self.baseline) # Sets the baseline probability in the cache.
|
|
165
|
+
coalitions = self._coalitions(elements)
|
|
166
|
+
for nodes_id in coalitions:
|
|
167
|
+
nodes_vals = x[nodes_id] # Gets the values of the nodes in the coalition.
|
|
168
|
+
for k, feature in enumerate(nodes_id):
|
|
169
|
+
# Accumulates the contribution for each feature.
|
|
170
|
+
contributions[feature] += self._coalition_contribution(
|
|
171
|
+
k, 0, elements, int(feature), markovImpact, nodes_id, nodes_vals, cache
|
|
172
|
+
)
|
|
173
|
+
return contributions
|
|
174
|
+
|
|
175
|
+
def _shap_ndim(self, x, elements):
|
|
176
|
+
# Result initialisation.
|
|
177
|
+
contributions = np.zeros(
|
|
178
|
+
(self.M, len(x), self.bn.variable(self.target).domainSize())
|
|
179
|
+
) # Initializes contributions array.
|
|
180
|
+
self.ie.eraseAllEvidence() # Clears all evidence from the inference engine.
|
|
181
|
+
markovImpact = FIFOCache(2000)
|
|
182
|
+
cache = CustomShapleyCache(5000) # Initializes the custom cache.
|
|
183
|
+
cache.set(-1, (), self.baseline) # Sets the baseline probability in the cache.
|
|
184
|
+
coalitions = self._coalitions(elements) # Generates coalitions.
|
|
185
|
+
for nodes_id in coalitions:
|
|
186
|
+
for ex, nodes_values in enumerate(x[:, nodes_id]):
|
|
187
|
+
for k, feature in enumerate(nodes_id):
|
|
188
|
+
contributions[feature, ex] += self._coalition_contribution(
|
|
189
|
+
k, ex, elements, int(feature), markovImpact, nodes_id, nodes_values, cache
|
|
190
|
+
)
|
|
191
|
+
return contributions
|