pyAgrum-nightly 2.2.1.9.dev202510271761405498__cp310-abi3-win_amd64.whl → 2.3.0.9.dev202510291761586496__cp310-abi3-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyAgrum-nightly might be problematic. Click here for more details.
- pyagrum/_pyagrum.pyd +0 -0
- pyagrum/common.py +1 -1
- pyagrum/config.py +1 -0
- pyagrum/explain/_ComputationCausal.py +75 -0
- pyagrum/explain/_ComputationConditional.py +48 -0
- pyagrum/explain/_ComputationMarginal.py +48 -0
- pyagrum/explain/_CustomShapleyCache.py +110 -0
- pyagrum/explain/_Explainer.py +176 -0
- pyagrum/explain/_Explanation.py +70 -0
- pyagrum/explain/_FIFOCache.py +54 -0
- pyagrum/explain/_ShallCausalValues.py +204 -0
- pyagrum/explain/_ShallConditionalValues.py +155 -0
- pyagrum/explain/_ShallMarginalValues.py +155 -0
- pyagrum/explain/_ShallValues.py +296 -0
- pyagrum/explain/_ShapCausalValues.py +208 -0
- pyagrum/explain/_ShapConditionalValues.py +126 -0
- pyagrum/explain/_ShapMarginalValues.py +191 -0
- pyagrum/explain/_ShapleyValues.py +298 -0
- pyagrum/explain/__init__.py +81 -0
- pyagrum/explain/_explGeneralizedMarkovBlanket.py +152 -0
- pyagrum/explain/_explIndependenceListForPairs.py +146 -0
- pyagrum/explain/_explInformationGraph.py +264 -0
- pyagrum/explain/notebook/__init__.py +54 -0
- pyagrum/explain/notebook/_bar.py +142 -0
- pyagrum/explain/notebook/_beeswarm.py +174 -0
- pyagrum/explain/notebook/_showShapValues.py +97 -0
- pyagrum/explain/notebook/_waterfall.py +220 -0
- pyagrum/explain/shapley.py +225 -0
- pyagrum/lib/explain.py +11 -490
- pyagrum/pyagrum.py +17 -10
- {pyagrum_nightly-2.2.1.9.dev202510271761405498.dist-info → pyagrum_nightly-2.3.0.9.dev202510291761586496.dist-info}/METADATA +1 -1
- {pyagrum_nightly-2.2.1.9.dev202510271761405498.dist-info → pyagrum_nightly-2.3.0.9.dev202510291761586496.dist-info}/RECORD +36 -12
- pyagrum/lib/shapley.py +0 -661
- {pyagrum_nightly-2.2.1.9.dev202510271761405498.dist-info → pyagrum_nightly-2.3.0.9.dev202510291761586496.dist-info}/LICENSE.md +0 -0
- {pyagrum_nightly-2.2.1.9.dev202510271761405498.dist-info → pyagrum_nightly-2.3.0.9.dev202510291761586496.dist-info}/LICENSES/LGPL-3.0-or-later.txt +0 -0
- {pyagrum_nightly-2.2.1.9.dev202510271761405498.dist-info → pyagrum_nightly-2.3.0.9.dev202510291761586496.dist-info}/LICENSES/MIT.txt +0 -0
- {pyagrum_nightly-2.2.1.9.dev202510271761405498.dist-info → pyagrum_nightly-2.3.0.9.dev202510291761586496.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
############################################################################
|
|
2
|
+
# This file is part of the aGrUM/pyAgrum library. #
|
|
3
|
+
# #
|
|
4
|
+
# Copyright (c) 2005-2025 by #
|
|
5
|
+
# - Pierre-Henri WUILLEMIN(_at_LIP6) #
|
|
6
|
+
# - Christophe GONZALES(_at_AMU) #
|
|
7
|
+
# #
|
|
8
|
+
# The aGrUM/pyAgrum library is free software; you can redistribute it #
|
|
9
|
+
# and/or modify it under the terms of either : #
|
|
10
|
+
# #
|
|
11
|
+
# - the GNU Lesser General Public License as published by #
|
|
12
|
+
# the Free Software Foundation, either version 3 of the License, #
|
|
13
|
+
# or (at your option) any later version, #
|
|
14
|
+
# - the MIT license (MIT), #
|
|
15
|
+
# - or both in dual license, as here. #
|
|
16
|
+
# #
|
|
17
|
+
# (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) #
|
|
18
|
+
# #
|
|
19
|
+
# This aGrUM/pyAgrum library is distributed in the hope that it will be #
|
|
20
|
+
# useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, #
|
|
21
|
+
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS #
|
|
22
|
+
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE #
|
|
23
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER #
|
|
24
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, #
|
|
25
|
+
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR #
|
|
26
|
+
# OTHER DEALINGS IN THE SOFTWARE. #
|
|
27
|
+
# #
|
|
28
|
+
# See LICENCES for more details. #
|
|
29
|
+
# #
|
|
30
|
+
# SPDX-FileCopyrightText: Copyright 2005-2025 #
|
|
31
|
+
# - Pierre-Henri WUILLEMIN(_at_LIP6) #
|
|
32
|
+
# - Christophe GONZALES(_at_AMU) #
|
|
33
|
+
# SPDX-License-Identifier: LGPL-3.0-or-later OR MIT #
|
|
34
|
+
# #
|
|
35
|
+
# Contact : info_at_agrum_dot_org #
|
|
36
|
+
# homepage : http://agrum.gitlab.io #
|
|
37
|
+
# gitlab : https://gitlab.com/agrumery/agrum #
|
|
38
|
+
# #
|
|
39
|
+
############################################################################
|
|
40
|
+
|
|
41
|
+
from pyagrum.explain._ShapleyValues import ShapleyValues
|
|
42
|
+
from pyagrum.explain._ComputationMarginal import MarginalComputation
|
|
43
|
+
from pyagrum.explain._CustomShapleyCache import CustomShapleyCache
|
|
44
|
+
from pyagrum.explain._FIFOCache import FIFOCache
|
|
45
|
+
|
|
46
|
+
# Calculations
|
|
47
|
+
import pandas as pd
|
|
48
|
+
import numpy as np
|
|
49
|
+
|
|
50
|
+
# aGrUM
|
|
51
|
+
import pyagrum as gum
|
|
52
|
+
|
|
53
|
+
# GL
|
|
54
|
+
import warnings
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class MarginalShapValues(ShapleyValues, MarginalComputation):
|
|
58
|
+
"""
|
|
59
|
+
The MarginalShapValues class computes the Marginal Shapley values for a given target node in a Bayesian Network.
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
def __init__(self, bn: gum.BayesNet, target: int, background: tuple | None, sample_size=1000, logit=True):
|
|
63
|
+
"""
|
|
64
|
+
Parameters:
|
|
65
|
+
------
|
|
66
|
+
bn : pyagrum.BayesNet
|
|
67
|
+
The Bayesian Network.
|
|
68
|
+
target : int | str
|
|
69
|
+
The node id (or node name) of the target.
|
|
70
|
+
background : Tuple(pandas.DataFrame, bool) | None
|
|
71
|
+
A tuple containing a pandas DataFrame and a boolean indicating whether the DataFrame contains labels or positions.
|
|
72
|
+
sample_size : int
|
|
73
|
+
The size of the background sample to generate if `background` is None.
|
|
74
|
+
logit : bool
|
|
75
|
+
If True, applies the logit transformation to the probabilities.
|
|
76
|
+
|
|
77
|
+
Raises:
|
|
78
|
+
------
|
|
79
|
+
TypeError : If bn is not a gum.BayesNet instance, background is not a tuple or target is not an integer or string.
|
|
80
|
+
ValueError : If target is not a valid node id in the Bayesian Network or if sample_size is not a positive integer.
|
|
81
|
+
"""
|
|
82
|
+
super().__init__(bn, target, logit)
|
|
83
|
+
self._mb = self._markov_blanket()
|
|
84
|
+
# Processing background data
|
|
85
|
+
if background is None:
|
|
86
|
+
if not isinstance(sample_size, int):
|
|
87
|
+
raise TypeError("When `data`=None, `sample_size` must be an integer, but got {}".format(type(sample_size)))
|
|
88
|
+
else:
|
|
89
|
+
if sample_size < 1:
|
|
90
|
+
raise ValueError("`sample_size` must be greater than 1, but got {}".format(sample_size))
|
|
91
|
+
elif sample_size < 10:
|
|
92
|
+
warnings.warn("The sample size is small, which may lead to biased Shapley values.")
|
|
93
|
+
data = gum.generateSample(self.bn, sample_size, with_labels=False)[0].reindex(columns=self.feat_names).to_numpy()
|
|
94
|
+
else:
|
|
95
|
+
if not isinstance(background, tuple):
|
|
96
|
+
raise TypeError(f"`background` must be a tuple (pd.DataFrame, bool).")
|
|
97
|
+
data, with_labels = background
|
|
98
|
+
if not isinstance(with_labels, bool):
|
|
99
|
+
warnings.warn(
|
|
100
|
+
f"The second element of `background` should be a boolean, but got {type(with_labels)}. Unexpected calculations may occur."
|
|
101
|
+
)
|
|
102
|
+
if not isinstance(data, pd.DataFrame):
|
|
103
|
+
raise TypeError("The first element of `background` must be a pandas DataFrame, but got {}".format(type(data)))
|
|
104
|
+
if data.shape[0] < 2:
|
|
105
|
+
warnings.warn("You are giving a single row as a background data, which will lead to biased Shapley values.")
|
|
106
|
+
if data.shape[1] != self.M:
|
|
107
|
+
raise ValueError(
|
|
108
|
+
"The number of columns in the background data must match the number of variables in the Bayesian network. Although values outside the Markov blanket, including the target, are unused, they are required for indexing purposes."
|
|
109
|
+
)
|
|
110
|
+
data = data.reindex(columns=self.feat_names).to_numpy()
|
|
111
|
+
if with_labels:
|
|
112
|
+
data = self._labelToPos_df(data, [i for i in range(self.M) if i != self.target])
|
|
113
|
+
self._data, self.counts = np.unique(data, axis=0, return_counts=True)
|
|
114
|
+
self._N = int(np.sum(self.counts))
|
|
115
|
+
if self._N == 0:
|
|
116
|
+
raise ValueError("Background data can't be empty.")
|
|
117
|
+
self.baseline = self.func(
|
|
118
|
+
self._value(
|
|
119
|
+
data=self._data,
|
|
120
|
+
counts=self.counts,
|
|
121
|
+
elements=[i for i in range(self.M) if i != self.target],
|
|
122
|
+
sigma=[],
|
|
123
|
+
cache=FIFOCache(100),
|
|
124
|
+
func1=self._posterior,
|
|
125
|
+
params1={},
|
|
126
|
+
func2=self._weight,
|
|
127
|
+
params2={},
|
|
128
|
+
)
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
def _coalition_contribution(self, k, ex, elements, feature, markovImpact, nodes_id, nodes_vals, cache):
|
|
132
|
+
# Computes the contribution of a coalition to the Shapley value.
|
|
133
|
+
key1, key2, _ = cache.generate_keys(self.bn, self.target, feature, nodes_id)
|
|
134
|
+
if k == 0:
|
|
135
|
+
interv = self._data.copy()
|
|
136
|
+
interv[:, nodes_id] = nodes_vals
|
|
137
|
+
cache.set(
|
|
138
|
+
ex,
|
|
139
|
+
key1,
|
|
140
|
+
self.func(
|
|
141
|
+
self._value(
|
|
142
|
+
data=interv,
|
|
143
|
+
counts=self.counts,
|
|
144
|
+
elements=elements,
|
|
145
|
+
sigma=[],
|
|
146
|
+
cache=markovImpact,
|
|
147
|
+
func1=self._posterior,
|
|
148
|
+
params1={},
|
|
149
|
+
func2=self._weight,
|
|
150
|
+
params2={},
|
|
151
|
+
)
|
|
152
|
+
),
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
posterior_prob_with = cache.get(ex, key1)
|
|
156
|
+
posterior_prob_without = cache.get(ex, key2) if len(key1) > 1 else cache.get(-1, ())
|
|
157
|
+
return self._shap_term(posterior_prob_with, posterior_prob_without, len(elements), len(nodes_id) - 1)
|
|
158
|
+
|
|
159
|
+
def _shap_1dim(self, x, elements):
|
|
160
|
+
contributions = np.zeros((self.M, self.bn.variable(self.target).domainSize())) # Initializes contributions array.
|
|
161
|
+
self.ie.eraseAllEvidence() # Clears all evidence from the inference engine.
|
|
162
|
+
markovImpact = FIFOCache(2000)
|
|
163
|
+
cache = CustomShapleyCache(5000) # Initializes the custom cache.
|
|
164
|
+
cache.set(-1, (), self.baseline) # Sets the baseline probability in the cache.
|
|
165
|
+
coalitions = self._coalitions(elements)
|
|
166
|
+
for nodes_id in coalitions:
|
|
167
|
+
nodes_vals = x[nodes_id] # Gets the values of the nodes in the coalition.
|
|
168
|
+
for k, feature in enumerate(nodes_id):
|
|
169
|
+
# Accumulates the contribution for each feature.
|
|
170
|
+
contributions[feature] += self._coalition_contribution(
|
|
171
|
+
k, 0, elements, int(feature), markovImpact, nodes_id, nodes_vals, cache
|
|
172
|
+
)
|
|
173
|
+
return contributions
|
|
174
|
+
|
|
175
|
+
def _shap_ndim(self, x, elements):
|
|
176
|
+
# Result initialisation.
|
|
177
|
+
contributions = np.zeros(
|
|
178
|
+
(self.M, len(x), self.bn.variable(self.target).domainSize())
|
|
179
|
+
) # Initializes contributions array.
|
|
180
|
+
self.ie.eraseAllEvidence() # Clears all evidence from the inference engine.
|
|
181
|
+
markovImpact = FIFOCache(2000)
|
|
182
|
+
cache = CustomShapleyCache(5000) # Initializes the custom cache.
|
|
183
|
+
cache.set(-1, (), self.baseline) # Sets the baseline probability in the cache.
|
|
184
|
+
coalitions = self._coalitions(elements) # Generates coalitions.
|
|
185
|
+
for nodes_id in coalitions:
|
|
186
|
+
for ex, nodes_values in enumerate(x[:, nodes_id]):
|
|
187
|
+
for k, feature in enumerate(nodes_id):
|
|
188
|
+
contributions[feature, ex] += self._coalition_contribution(
|
|
189
|
+
k, ex, elements, int(feature), markovImpact, nodes_id, nodes_values, cache
|
|
190
|
+
)
|
|
191
|
+
return contributions
|
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
############################################################################
|
|
2
|
+
# This file is part of the aGrUM/pyAgrum library. #
|
|
3
|
+
# #
|
|
4
|
+
# Copyright (c) 2005-2025 by #
|
|
5
|
+
# - Pierre-Henri WUILLEMIN(_at_LIP6) #
|
|
6
|
+
# - Christophe GONZALES(_at_AMU) #
|
|
7
|
+
# #
|
|
8
|
+
# The aGrUM/pyAgrum library is free software; you can redistribute it #
|
|
9
|
+
# and/or modify it under the terms of either : #
|
|
10
|
+
# #
|
|
11
|
+
# - the GNU Lesser General Public License as published by #
|
|
12
|
+
# the Free Software Foundation, either version 3 of the License, #
|
|
13
|
+
# or (at your option) any later version, #
|
|
14
|
+
# - the MIT license (MIT), #
|
|
15
|
+
# - or both in dual license, as here. #
|
|
16
|
+
# #
|
|
17
|
+
# (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) #
|
|
18
|
+
# #
|
|
19
|
+
# This aGrUM/pyAgrum library is distributed in the hope that it will be #
|
|
20
|
+
# useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, #
|
|
21
|
+
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS #
|
|
22
|
+
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE #
|
|
23
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER #
|
|
24
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, #
|
|
25
|
+
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR #
|
|
26
|
+
# OTHER DEALINGS IN THE SOFTWARE. #
|
|
27
|
+
# #
|
|
28
|
+
# See LICENCES for more details. #
|
|
29
|
+
# #
|
|
30
|
+
# SPDX-FileCopyrightText: Copyright 2005-2025 #
|
|
31
|
+
# - Pierre-Henri WUILLEMIN(_at_LIP6) #
|
|
32
|
+
# - Christophe GONZALES(_at_AMU) #
|
|
33
|
+
# SPDX-License-Identifier: LGPL-3.0-or-later OR MIT #
|
|
34
|
+
# #
|
|
35
|
+
# Contact : info_at_agrum_dot_org #
|
|
36
|
+
# homepage : http://agrum.gitlab.io #
|
|
37
|
+
# gitlab : https://gitlab.com/agrumery/agrum #
|
|
38
|
+
# #
|
|
39
|
+
############################################################################
|
|
40
|
+
|
|
41
|
+
from pyagrum.explain._Explainer import Explainer
|
|
42
|
+
from abc import abstractmethod
|
|
43
|
+
from pyagrum.explain._Explanation import Explanation
|
|
44
|
+
|
|
45
|
+
# Calculations
|
|
46
|
+
import pandas as pd
|
|
47
|
+
import numpy as np
|
|
48
|
+
|
|
49
|
+
# aGrUM
|
|
50
|
+
import pyagrum as gum
|
|
51
|
+
|
|
52
|
+
# GL
|
|
53
|
+
import warnings
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class ShapleyValues(Explainer):
|
|
57
|
+
"""
|
|
58
|
+
The ShapleyValues class is an abstract base class for computing Shapley values in a Bayesian Network.
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
def __init__(self, bn, target, logit):
|
|
62
|
+
"""
|
|
63
|
+
Parameters:
|
|
64
|
+
------
|
|
65
|
+
bn : pyagrum.BayesNet
|
|
66
|
+
The Bayesian Network.
|
|
67
|
+
target : int | str
|
|
68
|
+
The node id (or node name) of the target.
|
|
69
|
+
logit : bool
|
|
70
|
+
If True, applies the logit transformation to the probabilities.
|
|
71
|
+
|
|
72
|
+
Raises:
|
|
73
|
+
------
|
|
74
|
+
TypeError : If bn is not a gum.BayesNet or target is not an integer or string.
|
|
75
|
+
ValueError : If target is not a valid node id in the Bayesian Network.
|
|
76
|
+
"""
|
|
77
|
+
super().__init__(bn)
|
|
78
|
+
if isinstance(target, str):
|
|
79
|
+
if target not in bn.names():
|
|
80
|
+
raise ValueError("Target node name '{}' not found in the Bayesian Network.".format(target))
|
|
81
|
+
target = bn.idFromName(target) # Convert node name to ID.
|
|
82
|
+
elif isinstance(target, int):
|
|
83
|
+
if target not in bn.nodes():
|
|
84
|
+
raise ValueError("Target node ID {} not found in the Bayesian Network.".format(target))
|
|
85
|
+
else:
|
|
86
|
+
raise TypeError("Target must be a node ID (int) or a node name (str), but got {}".format(type(target)))
|
|
87
|
+
if not isinstance(logit, bool):
|
|
88
|
+
warnings.warn("logit should be a boolean, unexpected calculation may occur.", UserWarning)
|
|
89
|
+
|
|
90
|
+
# Class attributes.
|
|
91
|
+
self.target = target # ID of the target node.
|
|
92
|
+
self.target_name = self.feat_names[self.target]
|
|
93
|
+
self._mb = self._markov_blanket()
|
|
94
|
+
self.ie = gum.LazyPropagation(self.bn) # Inference engine for the Bayesian Network.
|
|
95
|
+
self.ie.addTarget(self.target) # Setting the target for inference.
|
|
96
|
+
self.func = self._logit if logit else self._identity # Function to apply to the probabilities.
|
|
97
|
+
|
|
98
|
+
def _markov_blanket(self):
|
|
99
|
+
# Retrieves the Markov blanket of the target node.
|
|
100
|
+
mb = gum.MarkovBlanket(self.bn, self.target).nodes()
|
|
101
|
+
mb.remove(self.target)
|
|
102
|
+
return sorted(list(mb))
|
|
103
|
+
|
|
104
|
+
def _posterior(self, evidces: dict[int, int]):
|
|
105
|
+
# Returns the posterior probability of the target given the evidence.
|
|
106
|
+
self.ie.updateEvidence(evidces)
|
|
107
|
+
return self.ie.posterior(self.target).toarray()
|
|
108
|
+
|
|
109
|
+
@abstractmethod
|
|
110
|
+
def _shap_1dim(self, x, elements):
|
|
111
|
+
# Computes the Shapley values for a single instance.
|
|
112
|
+
# This method should be implemented in subclasses.
|
|
113
|
+
raise NotImplementedError("This method should be implemented in subclasses.")
|
|
114
|
+
|
|
115
|
+
@abstractmethod
|
|
116
|
+
def _shap_ndim(self, x, elements):
|
|
117
|
+
# Computes the Shapley values for multiple instances.
|
|
118
|
+
# This method should be implemented in subclasses.
|
|
119
|
+
raise NotImplementedError("This method should be implemented in subclasses.")
|
|
120
|
+
|
|
121
|
+
def compute(self, data: tuple | None, N=100):
|
|
122
|
+
"""
|
|
123
|
+
Computes the Shapley values for the target node based on the provided data.
|
|
124
|
+
|
|
125
|
+
Parameters:
|
|
126
|
+
----------
|
|
127
|
+
data : tuple | None
|
|
128
|
+
A tuple containing a pandas DataFrame, Series or a dictionary and a boolean indicating whether data are provided with labels. If None, a random sample of size N is generated.
|
|
129
|
+
N : int
|
|
130
|
+
The number of samples to generate if data is None.
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
-------
|
|
134
|
+
Explanation
|
|
135
|
+
An Explanation object containing the Shapley values and importances for the target node.
|
|
136
|
+
|
|
137
|
+
Raises:
|
|
138
|
+
------
|
|
139
|
+
TypeError : If the first element of data is not a pd.DataFrame, pd.Series or dict, or if N is not an integer when data is None.
|
|
140
|
+
ValueError : If N is less than 2 when data is None.
|
|
141
|
+
"""
|
|
142
|
+
if data is None:
|
|
143
|
+
if not isinstance(N, int):
|
|
144
|
+
raise TypeError("Since df is None, N must be an integer, but got {}".format(type(N)))
|
|
145
|
+
if N < 2:
|
|
146
|
+
raise ValueError("N must be greater than 1, but got {}".format(N))
|
|
147
|
+
y = gum.generateSample(self.bn, N, with_labels=False)[0].reindex(columns=self.feat_names).to_numpy()
|
|
148
|
+
elements = [i for i in range(self.M) if i != self.target]
|
|
149
|
+
# Remove duplicate rows in x and unused columns.
|
|
150
|
+
mask_cols = [i for i in range(self.M) if i not in elements]
|
|
151
|
+
_, idx = np.unique(y[:, elements], axis=0, return_index=True)
|
|
152
|
+
y = y[idx, :]
|
|
153
|
+
y[:, mask_cols] = 0
|
|
154
|
+
contributions = self._shap_ndim(y, sorted(elements))
|
|
155
|
+
|
|
156
|
+
else:
|
|
157
|
+
if not isinstance(data, tuple):
|
|
158
|
+
raise TypeError(f"`data` must be a tuple (pd.DataFrame, bool).")
|
|
159
|
+
df, with_labels = data
|
|
160
|
+
if not isinstance(with_labels, bool):
|
|
161
|
+
warnings.warn(
|
|
162
|
+
f"The second element of `data` should be a boolean, but got {type(with_labels)}. Unexpected calculations may occur."
|
|
163
|
+
)
|
|
164
|
+
dtype = "U50" if with_labels else int
|
|
165
|
+
|
|
166
|
+
if isinstance(df, pd.Series):
|
|
167
|
+
# Here we are sure that df is a single instance (a Series).
|
|
168
|
+
s = df.dropna()
|
|
169
|
+
x = np.empty(self.M, dtype=dtype)
|
|
170
|
+
elements = []
|
|
171
|
+
for feat in s.index:
|
|
172
|
+
id = self.bn.idFromName(feat)
|
|
173
|
+
x[id] = s[feat]
|
|
174
|
+
if id != self.target:
|
|
175
|
+
elements.append(id)
|
|
176
|
+
if with_labels:
|
|
177
|
+
y = self._labelToPos_row(x, elements)
|
|
178
|
+
else:
|
|
179
|
+
y = x
|
|
180
|
+
contributions = self._shap_1dim(y, sorted(elements))
|
|
181
|
+
|
|
182
|
+
elif isinstance(df, pd.DataFrame):
|
|
183
|
+
df_clean = df.dropna(axis=1)
|
|
184
|
+
if len(df_clean) == 1:
|
|
185
|
+
# Here we are sure that df is a single instance (a DataFrame with one row).
|
|
186
|
+
x = np.empty(self.M, dtype=dtype)
|
|
187
|
+
elements = []
|
|
188
|
+
for feat in df_clean.columns:
|
|
189
|
+
id = self.bn.idFromName(feat)
|
|
190
|
+
x[id] = df_clean[feat].values[0]
|
|
191
|
+
if id != self.target:
|
|
192
|
+
elements.append(id)
|
|
193
|
+
if with_labels:
|
|
194
|
+
y = self._labelToPos_row(x, elements)
|
|
195
|
+
else:
|
|
196
|
+
y = x
|
|
197
|
+
contributions = self._shap_1dim(y, sorted(elements))
|
|
198
|
+
|
|
199
|
+
else:
|
|
200
|
+
x = np.empty((len(df_clean), self.M), dtype=dtype)
|
|
201
|
+
elements = []
|
|
202
|
+
for feat in df_clean.columns:
|
|
203
|
+
id = self.bn.idFromName(feat)
|
|
204
|
+
x[:, id] = df_clean[feat].values
|
|
205
|
+
if id != self.target:
|
|
206
|
+
elements.append(id)
|
|
207
|
+
# Remove duplicate rows in x and unused columns.
|
|
208
|
+
mask_cols = [i for i in range(self.M) if i not in elements]
|
|
209
|
+
_, idx = np.unique(x[:, elements], axis=0, return_index=True)
|
|
210
|
+
x = x[idx, :]
|
|
211
|
+
x[:, mask_cols] = 0
|
|
212
|
+
if with_labels:
|
|
213
|
+
y = self._labelToPos_df(x, elements)
|
|
214
|
+
else:
|
|
215
|
+
y = x
|
|
216
|
+
contributions = self._shap_ndim(y, sorted(elements))
|
|
217
|
+
|
|
218
|
+
elif isinstance(df, dict):
|
|
219
|
+
try:
|
|
220
|
+
N = len(list(df.values())[0])
|
|
221
|
+
if not isinstance(list(df.values())[0], (list, np.ndarray)):
|
|
222
|
+
raise TypeError("Each value in the dictionary must be a list or a numpy array.")
|
|
223
|
+
elements = []
|
|
224
|
+
x = np.empty((N, self.M), dtype=dtype)
|
|
225
|
+
for feat in df.keys():
|
|
226
|
+
if all(not (x is None) and not (isinstance(x, float) and np.isnan(x)) for x in df[feat]):
|
|
227
|
+
id = self.bn.idFromName(feat)
|
|
228
|
+
x[:, id] = df[feat]
|
|
229
|
+
if id != self.target:
|
|
230
|
+
elements.append(id)
|
|
231
|
+
# Remove duplicate rows in x and unused columns.
|
|
232
|
+
mask_cols = [i for i in range(self.M) if i not in elements]
|
|
233
|
+
_, idx = np.unique(x[:, elements], axis=0, return_index=True)
|
|
234
|
+
x = x[idx, :]
|
|
235
|
+
x[:, mask_cols] = 0
|
|
236
|
+
if with_labels:
|
|
237
|
+
y = self._labelToPos_df(x, elements)
|
|
238
|
+
else:
|
|
239
|
+
y = x
|
|
240
|
+
contributions = self._shap_ndim(y, sorted(elements))
|
|
241
|
+
|
|
242
|
+
except TypeError:
|
|
243
|
+
# Here we are sure that df is a single instance (a dictionary with one row).
|
|
244
|
+
x = np.empty(self.M, dtype=dtype)
|
|
245
|
+
elements = []
|
|
246
|
+
for feat in df.keys():
|
|
247
|
+
if not (df[feat] is None):
|
|
248
|
+
id = self.bn.idFromName(feat)
|
|
249
|
+
x[id] = df[feat]
|
|
250
|
+
if id != self.target:
|
|
251
|
+
elements.append(id)
|
|
252
|
+
if with_labels:
|
|
253
|
+
y = self._labelToPos_row(x, elements)
|
|
254
|
+
else:
|
|
255
|
+
y = x
|
|
256
|
+
contributions = self._shap_1dim(y, sorted(elements))
|
|
257
|
+
|
|
258
|
+
else:
|
|
259
|
+
raise TypeError(
|
|
260
|
+
"The first element of `data` must be a pandas DataFrame, Series or a dictionary, but got {}".format(type(df))
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
if contributions.ndim == 2:
|
|
264
|
+
values = {
|
|
265
|
+
z: {self.feat_names[i]: float(contributions[i, z]) for i in elements} for z in range(contributions.shape[1])
|
|
266
|
+
}
|
|
267
|
+
importances = {
|
|
268
|
+
z: {self.feat_names[i]: abs(float(contributions[i, z])) for i in elements}
|
|
269
|
+
for z in range(contributions.shape[1])
|
|
270
|
+
}
|
|
271
|
+
explanation = Explanation(
|
|
272
|
+
values,
|
|
273
|
+
importances,
|
|
274
|
+
list(self.feat_names[sorted(elements)]),
|
|
275
|
+
x[sorted(elements)],
|
|
276
|
+
self.baseline,
|
|
277
|
+
self.func.__name__,
|
|
278
|
+
"SHAP",
|
|
279
|
+
)
|
|
280
|
+
else:
|
|
281
|
+
values = {
|
|
282
|
+
z: {self.feat_names[i]: [float(v) for v in contributions[i, :, z]] for i in elements}
|
|
283
|
+
for z in range(contributions.shape[2])
|
|
284
|
+
}
|
|
285
|
+
mean_abs = np.mean(np.abs(contributions), axis=1)
|
|
286
|
+
importances = {
|
|
287
|
+
z: {self.feat_names[i]: abs(float(mean_abs[i, z])) for i in elements} for z in range(contributions.shape[2])
|
|
288
|
+
}
|
|
289
|
+
explanation = Explanation(
|
|
290
|
+
values,
|
|
291
|
+
importances,
|
|
292
|
+
list(self.feat_names[sorted(elements)]),
|
|
293
|
+
y[:, sorted(elements)],
|
|
294
|
+
self.baseline,
|
|
295
|
+
self.func.__name__,
|
|
296
|
+
"SHAP",
|
|
297
|
+
)
|
|
298
|
+
return explanation
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
############################################################################
|
|
2
|
+
# This file is part of the aGrUM/pyAgrum library. #
|
|
3
|
+
# #
|
|
4
|
+
# Copyright (c) 2005-2025 by #
|
|
5
|
+
# - Pierre-Henri WUILLEMIN(_at_LIP6) #
|
|
6
|
+
# - Christophe GONZALES(_at_AMU) #
|
|
7
|
+
# #
|
|
8
|
+
# The aGrUM/pyAgrum library is free software; you can redistribute it #
|
|
9
|
+
# and/or modify it under the terms of either : #
|
|
10
|
+
# #
|
|
11
|
+
# - the GNU Lesser General Public License as published by #
|
|
12
|
+
# the Free Software Foundation, either version 3 of the License, #
|
|
13
|
+
# or (at your option) any later version, #
|
|
14
|
+
# - the MIT license (MIT), #
|
|
15
|
+
# - or both in dual license, as here. #
|
|
16
|
+
# #
|
|
17
|
+
# (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) #
|
|
18
|
+
# #
|
|
19
|
+
# This aGrUM/pyAgrum library is distributed in the hope that it will be #
|
|
20
|
+
# useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, #
|
|
21
|
+
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS #
|
|
22
|
+
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE #
|
|
23
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER #
|
|
24
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, #
|
|
25
|
+
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR #
|
|
26
|
+
# OTHER DEALINGS IN THE SOFTWARE. #
|
|
27
|
+
# #
|
|
28
|
+
# See LICENCES for more details. #
|
|
29
|
+
# #
|
|
30
|
+
# SPDX-FileCopyrightText: Copyright 2005-2025 #
|
|
31
|
+
# - Pierre-Henri WUILLEMIN(_at_LIP6) #
|
|
32
|
+
# - Christophe GONZALES(_at_AMU) #
|
|
33
|
+
# SPDX-License-Identifier: LGPL-3.0-or-later OR MIT #
|
|
34
|
+
# #
|
|
35
|
+
# Contact : info_at_agrum_dot_org #
|
|
36
|
+
# homepage : http://agrum.gitlab.io #
|
|
37
|
+
# gitlab : https://gitlab.com/agrumery/agrum #
|
|
38
|
+
# #
|
|
39
|
+
############################################################################
|
|
40
|
+
|
|
41
|
+
"""
|
|
42
|
+
This module provides classes for explaining predictions and other computations made by the bayesian networks.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
__author__ = "Pierre-Henri Wuillemin"
|
|
46
|
+
__copyright__ = "(c) 2019-2025 PARIS"
|
|
47
|
+
|
|
48
|
+
# Shapley Values
|
|
49
|
+
from ._ShapConditionalValues import ConditionalShapValues
|
|
50
|
+
from ._ShapMarginalValues import MarginalShapValues
|
|
51
|
+
from ._ShapCausalValues import CausalShapValues
|
|
52
|
+
from ._Explanation import Explanation
|
|
53
|
+
|
|
54
|
+
# Shall Values
|
|
55
|
+
from ._ShallConditionalValues import ConditionalShallValues
|
|
56
|
+
from ._ShallMarginalValues import MarginalShallValues
|
|
57
|
+
from ._ShallCausalValues import CausalShallValues
|
|
58
|
+
|
|
59
|
+
# Independence List For Pairs
|
|
60
|
+
from ._explIndependenceListForPairs import independenceListForPairs
|
|
61
|
+
|
|
62
|
+
# Generalized Markov Blanket
|
|
63
|
+
from ._explGeneralizedMarkovBlanket import generalizedMarkovBlanket
|
|
64
|
+
|
|
65
|
+
# Entropy and Mutual Information
|
|
66
|
+
from ._explInformationGraph import getInformationGraph, getInformation, showInformation
|
|
67
|
+
|
|
68
|
+
__all__ = [
|
|
69
|
+
"ConditionalShapValues",
|
|
70
|
+
"MarginalShapValues",
|
|
71
|
+
"CausalShapValues",
|
|
72
|
+
"ConditionalShallValues",
|
|
73
|
+
"MarginalShallValues",
|
|
74
|
+
"CausalShallValues",
|
|
75
|
+
"Explanation",
|
|
76
|
+
"independenceListForPairs",
|
|
77
|
+
"generalizedMarkovBlanket",
|
|
78
|
+
"getInformationGraph",
|
|
79
|
+
"getInformation",
|
|
80
|
+
"showInformation",
|
|
81
|
+
]
|