pyAgrum-nightly 2.1.1.9.dev202506061747485979__cp310-abi3-manylinux2014_aarch64.whl → 2.3.1.9.dev202601031765915415__cp310-abi3-manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyagrum/__init__.py +6 -2
- pyagrum/_pyagrum.so +0 -0
- pyagrum/bnmixture/BNMInference.py +6 -2
- pyagrum/bnmixture/BNMLearning.py +12 -2
- pyagrum/bnmixture/BNMixture.py +6 -2
- pyagrum/bnmixture/__init__.py +6 -2
- pyagrum/bnmixture/notebook.py +6 -2
- pyagrum/causal/_CausalFormula.py +6 -2
- pyagrum/causal/_CausalModel.py +6 -2
- pyagrum/causal/__init__.py +6 -2
- pyagrum/causal/_causalImpact.py +6 -2
- pyagrum/causal/_dSeparation.py +6 -2
- pyagrum/causal/_doAST.py +6 -2
- pyagrum/causal/_doCalculus.py +6 -2
- pyagrum/causal/_doorCriteria.py +6 -2
- pyagrum/causal/_exceptions.py +6 -2
- pyagrum/causal/_types.py +6 -2
- pyagrum/causal/causalEffectEstimation/_CausalEffectEstimation.py +6 -2
- pyagrum/causal/causalEffectEstimation/_IVEstimators.py +6 -2
- pyagrum/causal/causalEffectEstimation/_RCTEstimators.py +6 -2
- pyagrum/causal/causalEffectEstimation/__init__.py +6 -2
- pyagrum/causal/causalEffectEstimation/_backdoorEstimators.py +6 -2
- pyagrum/causal/causalEffectEstimation/_causalBNEstimator.py +6 -2
- pyagrum/causal/causalEffectEstimation/_frontdoorEstimators.py +6 -2
- pyagrum/causal/causalEffectEstimation/_learners.py +6 -2
- pyagrum/causal/causalEffectEstimation/_utils.py +6 -2
- pyagrum/causal/notebook.py +8 -3
- pyagrum/clg/CLG.py +6 -2
- pyagrum/clg/GaussianVariable.py +6 -2
- pyagrum/clg/SEM.py +6 -2
- pyagrum/clg/__init__.py +6 -2
- pyagrum/clg/canonicalForm.py +6 -2
- pyagrum/clg/constants.py +6 -2
- pyagrum/clg/forwardSampling.py +6 -2
- pyagrum/clg/learning.py +6 -2
- pyagrum/clg/notebook.py +6 -2
- pyagrum/clg/variableElimination.py +6 -2
- pyagrum/common.py +7 -3
- pyagrum/config.py +7 -2
- pyagrum/ctbn/CIM.py +6 -2
- pyagrum/ctbn/CTBN.py +6 -2
- pyagrum/ctbn/CTBNGenerator.py +6 -2
- pyagrum/ctbn/CTBNInference.py +6 -2
- pyagrum/ctbn/CTBNLearner.py +6 -2
- pyagrum/ctbn/SamplesStats.py +6 -2
- pyagrum/ctbn/StatsIndepTest.py +6 -2
- pyagrum/ctbn/__init__.py +6 -2
- pyagrum/ctbn/constants.py +6 -2
- pyagrum/ctbn/notebook.py +6 -2
- pyagrum/deprecated.py +6 -2
- pyagrum/explain/_ComputationCausal.py +75 -0
- pyagrum/explain/_ComputationConditional.py +48 -0
- pyagrum/explain/_ComputationMarginal.py +48 -0
- pyagrum/explain/_CustomShapleyCache.py +110 -0
- pyagrum/explain/_Explainer.py +176 -0
- pyagrum/explain/_Explanation.py +70 -0
- pyagrum/explain/_FIFOCache.py +54 -0
- pyagrum/explain/_ShallCausalValues.py +204 -0
- pyagrum/explain/_ShallConditionalValues.py +155 -0
- pyagrum/explain/_ShallMarginalValues.py +155 -0
- pyagrum/explain/_ShallValues.py +296 -0
- pyagrum/explain/_ShapCausalValues.py +208 -0
- pyagrum/explain/_ShapConditionalValues.py +126 -0
- pyagrum/explain/_ShapMarginalValues.py +191 -0
- pyagrum/explain/_ShapleyValues.py +298 -0
- pyagrum/explain/__init__.py +81 -0
- pyagrum/explain/_explGeneralizedMarkovBlanket.py +152 -0
- pyagrum/explain/_explIndependenceListForPairs.py +146 -0
- pyagrum/explain/_explInformationGraph.py +264 -0
- pyagrum/explain/notebook/__init__.py +54 -0
- pyagrum/explain/notebook/_bar.py +142 -0
- pyagrum/explain/notebook/_beeswarm.py +174 -0
- pyagrum/explain/notebook/_showShapValues.py +97 -0
- pyagrum/explain/notebook/_waterfall.py +220 -0
- pyagrum/explain/shapley.py +225 -0
- pyagrum/lib/__init__.py +6 -2
- pyagrum/lib/_colors.py +6 -2
- pyagrum/lib/bn2graph.py +6 -2
- pyagrum/lib/bn2roc.py +6 -2
- pyagrum/lib/bn2scores.py +6 -2
- pyagrum/lib/bn_vs_bn.py +6 -2
- pyagrum/lib/cn2graph.py +6 -2
- pyagrum/lib/discreteTypeProcessor.py +99 -81
- pyagrum/lib/discretizer.py +6 -2
- pyagrum/lib/dynamicBN.py +6 -2
- pyagrum/lib/explain.py +17 -492
- pyagrum/lib/export.py +6 -2
- pyagrum/lib/id2graph.py +6 -2
- pyagrum/lib/image.py +6 -2
- pyagrum/lib/ipython.py +6 -2
- pyagrum/lib/mrf2graph.py +6 -2
- pyagrum/lib/notebook.py +6 -2
- pyagrum/lib/proba_histogram.py +6 -2
- pyagrum/lib/utils.py +6 -2
- pyagrum/pyagrum.py +976 -126
- pyagrum/skbn/_MBCalcul.py +6 -2
- pyagrum/skbn/__init__.py +6 -2
- pyagrum/skbn/_learningMethods.py +6 -2
- pyagrum/skbn/_utils.py +6 -2
- pyagrum/skbn/bnclassifier.py +6 -2
- pyagrum_nightly-2.1.1.9.dev202506061747485979.dist-info/LICENSE → pyagrum_nightly-2.3.1.9.dev202601031765915415.dist-info/LICENSE.md +3 -1
- pyagrum_nightly-2.3.1.9.dev202601031765915415.dist-info/LICENSES/LGPL-3.0-or-later.txt +304 -0
- pyagrum_nightly-2.3.1.9.dev202601031765915415.dist-info/LICENSES/MIT.txt +18 -0
- {pyagrum_nightly-2.1.1.9.dev202506061747485979.dist-info → pyagrum_nightly-2.3.1.9.dev202601031765915415.dist-info}/METADATA +3 -1
- pyagrum_nightly-2.3.1.9.dev202601031765915415.dist-info/RECORD +107 -0
- {pyagrum_nightly-2.1.1.9.dev202506061747485979.dist-info → pyagrum_nightly-2.3.1.9.dev202601031765915415.dist-info}/WHEEL +1 -1
- pyagrum/lib/shapley.py +0 -657
- pyagrum_nightly-2.1.1.9.dev202506061747485979.dist-info/LICENSE.LGPL +0 -165
- pyagrum_nightly-2.1.1.9.dev202506061747485979.dist-info/LICENSE.MIT +0 -17
- pyagrum_nightly-2.1.1.9.dev202506061747485979.dist-info/RECORD +0 -83
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
############################################################################
|
|
2
|
+
# This file is part of the aGrUM/pyAgrum library. #
|
|
3
|
+
# #
|
|
4
|
+
# Copyright (c) 2005-2025 by #
|
|
5
|
+
# - Pierre-Henri WUILLEMIN(_at_LIP6) #
|
|
6
|
+
# - Christophe GONZALES(_at_AMU) #
|
|
7
|
+
# #
|
|
8
|
+
# The aGrUM/pyAgrum library is free software; you can redistribute it #
|
|
9
|
+
# and/or modify it under the terms of either : #
|
|
10
|
+
# #
|
|
11
|
+
# - the GNU Lesser General Public License as published by #
|
|
12
|
+
# the Free Software Foundation, either version 3 of the License, #
|
|
13
|
+
# or (at your option) any later version, #
|
|
14
|
+
# - the MIT license (MIT), #
|
|
15
|
+
# - or both in dual license, as here. #
|
|
16
|
+
# #
|
|
17
|
+
# (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) #
|
|
18
|
+
# #
|
|
19
|
+
# This aGrUM/pyAgrum library is distributed in the hope that it will be #
|
|
20
|
+
# useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, #
|
|
21
|
+
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS #
|
|
22
|
+
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE #
|
|
23
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER #
|
|
24
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, #
|
|
25
|
+
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR #
|
|
26
|
+
# OTHER DEALINGS IN THE SOFTWARE. #
|
|
27
|
+
# #
|
|
28
|
+
# See LICENCES for more details. #
|
|
29
|
+
# #
|
|
30
|
+
# SPDX-FileCopyrightText: Copyright 2005-2025 #
|
|
31
|
+
# - Pierre-Henri WUILLEMIN(_at_LIP6) #
|
|
32
|
+
# - Christophe GONZALES(_at_AMU) #
|
|
33
|
+
# SPDX-License-Identifier: LGPL-3.0-or-later OR MIT #
|
|
34
|
+
# #
|
|
35
|
+
# Contact : info_at_agrum_dot_org #
|
|
36
|
+
# homepage : http://agrum.gitlab.io #
|
|
37
|
+
# gitlab : https://gitlab.com/agrumery/agrum #
|
|
38
|
+
# #
|
|
39
|
+
############################################################################
|
|
40
|
+
|
|
41
|
+
import pyagrum as gum
|
|
42
|
+
from pyagrum.explain._ShallValues import ShallValues
|
|
43
|
+
from pyagrum.explain._ComputationMarginal import MarginalComputation
|
|
44
|
+
from pyagrum.explain._CustomShapleyCache import CustomShapleyCache
|
|
45
|
+
from pyagrum.explain._FIFOCache import FIFOCache
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
import numpy as np
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class MarginalShallValues(ShallValues, MarginalComputation):
|
|
52
|
+
"""
|
|
53
|
+
The MarginalShallValues class computes the Marginal Shall values in a Bayesian Network.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
def __init__(self, bn: gum.BayesNet, background: tuple | None, sample_size: int = 1000, log: bool = True):
|
|
57
|
+
"""
|
|
58
|
+
Note: All rows in the background data that contain NaN values in columns corresponding to variables in the Bayesian Network will be dropped.
|
|
59
|
+
|
|
60
|
+
Parameters:
|
|
61
|
+
------
|
|
62
|
+
bn : pyagrum.BayesNet
|
|
63
|
+
The Bayesian Network.
|
|
64
|
+
background : tuple[pandas.DataFrame, bool] | None
|
|
65
|
+
A tuple containing a pandas DataFrame and a boolean indicating whether the DataFrame includes labels or positional values.
|
|
66
|
+
sample_size : int
|
|
67
|
+
The size of the background sample to generate if `background` is None.
|
|
68
|
+
log : bool
|
|
69
|
+
If True, applies a logarithmic transformation to the probabilities.
|
|
70
|
+
|
|
71
|
+
Raises
|
|
72
|
+
------
|
|
73
|
+
TypeError : If bn is not a gum.BayesNet instance, background is not a tuple.
|
|
74
|
+
ValueError : If background data does not contain all variables present in the Bayesian Network or if
|
|
75
|
+
background data is empty after rows with NaNs were dropped.
|
|
76
|
+
|
|
77
|
+
Raises:
|
|
78
|
+
------
|
|
79
|
+
TypeError : If bn is not a gum.BayesNet instance, background is not a tuple.
|
|
80
|
+
ValueError : If background data does not contain all variables present in the Bayesian Network or if
|
|
81
|
+
background data is empty after rows with NaNs were dropped.
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
super().__init__(bn, background, sample_size, log)
|
|
85
|
+
|
|
86
|
+
self.baseline = self._value(
|
|
87
|
+
data=self._data,
|
|
88
|
+
counts=self.counts,
|
|
89
|
+
elements=self.vars_ids,
|
|
90
|
+
sigma=[],
|
|
91
|
+
cache=FIFOCache(100),
|
|
92
|
+
func1=self._joint,
|
|
93
|
+
params1={},
|
|
94
|
+
func2=self._weight,
|
|
95
|
+
params2={},
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
def _coalition_contribution(self, k, ex, feature, fifo_cache, nodes_id, nodes_vals, cache):
|
|
99
|
+
key1, key2, _ = cache.generate_keys(self.bn, None, feature, nodes_id)
|
|
100
|
+
if k == 0:
|
|
101
|
+
interv = self._data.copy()
|
|
102
|
+
interv[:, nodes_id] = nodes_vals
|
|
103
|
+
cache.set(
|
|
104
|
+
ex,
|
|
105
|
+
key1,
|
|
106
|
+
self._value(
|
|
107
|
+
data=interv,
|
|
108
|
+
counts=self.counts,
|
|
109
|
+
elements=self.vars_ids,
|
|
110
|
+
sigma=[],
|
|
111
|
+
cache=fifo_cache,
|
|
112
|
+
func1=self._joint,
|
|
113
|
+
params1={},
|
|
114
|
+
func2=self._weight,
|
|
115
|
+
params2={},
|
|
116
|
+
),
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
joint_prob_with = cache.get(ex, key1)
|
|
120
|
+
joint_prob_without = cache.get(ex, key2) if len(key1) > 1 else cache.get(-1, ())
|
|
121
|
+
return (joint_prob_with - joint_prob_without) / self._invcoeff_shap(len(self.vars_ids), len(nodes_id) - 1)
|
|
122
|
+
|
|
123
|
+
def _shall_1dim(self, x):
|
|
124
|
+
# Result initialisation.
|
|
125
|
+
contributions = np.zeros(self.M)
|
|
126
|
+
# Cache management.
|
|
127
|
+
fifo_cache = FIFOCache(2000)
|
|
128
|
+
custom_cache = CustomShapleyCache(5000)
|
|
129
|
+
# Sets the baseline probability in the cache.
|
|
130
|
+
custom_cache.set(-1, (), self.baseline)
|
|
131
|
+
coalitions = self._coalitions(self.vars_ids)
|
|
132
|
+
for nodes_id in coalitions:
|
|
133
|
+
nodes_vals = x[nodes_id]
|
|
134
|
+
for k, feature in enumerate(nodes_id):
|
|
135
|
+
contributions[feature] += self._coalition_contribution(
|
|
136
|
+
k, 0, int(feature), fifo_cache, nodes_id, nodes_vals, custom_cache
|
|
137
|
+
)
|
|
138
|
+
return contributions
|
|
139
|
+
|
|
140
|
+
def _shall_ndim(self, x):
|
|
141
|
+
# Result initialisation.
|
|
142
|
+
contributions = np.zeros((self.M, len(x)))
|
|
143
|
+
# Cache management.
|
|
144
|
+
fifo_cache = FIFOCache(2000)
|
|
145
|
+
custom_cache = CustomShapleyCache(5000)
|
|
146
|
+
# Sets the baseline probability in the cache.
|
|
147
|
+
custom_cache.set(-1, (), self.baseline)
|
|
148
|
+
coalitions = self._coalitions(self.vars_ids)
|
|
149
|
+
for nodes_id in coalitions:
|
|
150
|
+
for ex, nodes_values in enumerate(x[:, nodes_id]):
|
|
151
|
+
for k, feature in enumerate(nodes_id):
|
|
152
|
+
contributions[feature, ex] += self._coalition_contribution(
|
|
153
|
+
k, ex, int(feature), fifo_cache, nodes_id, nodes_values, custom_cache
|
|
154
|
+
)
|
|
155
|
+
return contributions
|
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
from abc import abstractmethod
|
|
2
|
+
|
|
3
|
+
############################################################################
|
|
4
|
+
# This file is part of the aGrUM/pyAgrum library. #
|
|
5
|
+
# #
|
|
6
|
+
# Copyright (c) 2005-2025 by #
|
|
7
|
+
# - Pierre-Henri WUILLEMIN(_at_LIP6) #
|
|
8
|
+
# - Christophe GONZALES(_at_AMU) #
|
|
9
|
+
# #
|
|
10
|
+
# The aGrUM/pyAgrum library is free software; you can redistribute it #
|
|
11
|
+
# and/or modify it under the terms of either : #
|
|
12
|
+
# #
|
|
13
|
+
# - the GNU Lesser General Public License as published by #
|
|
14
|
+
# the Free Software Foundation, either version 3 of the License, #
|
|
15
|
+
# or (at your option) any later version, #
|
|
16
|
+
# - the MIT license (MIT), #
|
|
17
|
+
# - or both in dual license, as here. #
|
|
18
|
+
# #
|
|
19
|
+
# (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) #
|
|
20
|
+
# #
|
|
21
|
+
# This aGrUM/pyAgrum library is distributed in the hope that it will be #
|
|
22
|
+
# useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, #
|
|
23
|
+
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS #
|
|
24
|
+
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE #
|
|
25
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER #
|
|
26
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, #
|
|
27
|
+
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR #
|
|
28
|
+
# OTHER DEALINGS IN THE SOFTWARE. #
|
|
29
|
+
# #
|
|
30
|
+
# See LICENCES for more details. #
|
|
31
|
+
# #
|
|
32
|
+
# SPDX-FileCopyrightText: Copyright 2005-2025 #
|
|
33
|
+
# - Pierre-Henri WUILLEMIN(_at_LIP6) #
|
|
34
|
+
# - Christophe GONZALES(_at_AMU) #
|
|
35
|
+
# SPDX-License-Identifier: LGPL-3.0-or-later OR MIT #
|
|
36
|
+
# #
|
|
37
|
+
# Contact : info_at_agrum_dot_org #
|
|
38
|
+
# homepage : http://agrum.gitlab.io #
|
|
39
|
+
# gitlab : https://gitlab.com/agrumery/agrum #
|
|
40
|
+
# #
|
|
41
|
+
############################################################################
|
|
42
|
+
|
|
43
|
+
import pyagrum as gum
|
|
44
|
+
from pyagrum.explain._Explainer import Explainer
|
|
45
|
+
from pyagrum.explain._Explanation import Explanation
|
|
46
|
+
|
|
47
|
+
import pandas as pd
|
|
48
|
+
import numpy as np
|
|
49
|
+
import warnings
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class ShallValues(Explainer):
|
|
53
|
+
"""
|
|
54
|
+
The ShallValues class is an abstract base class for computing Shall values in a Bayesian Network.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
def __init__(self, bn: gum.BayesNet, background: tuple | None, sample_size: int = 1000, log: bool = True):
|
|
58
|
+
"""
|
|
59
|
+
Note: All rows in the background data that contain NaN values in columns corresponding to variables in the Bayesian Network will be dropped.
|
|
60
|
+
|
|
61
|
+
Parameters
|
|
62
|
+
----------
|
|
63
|
+
bn : pyagrum.BayesNet
|
|
64
|
+
The Bayesian Network.
|
|
65
|
+
background : tuple[pandas.DataFrame, bool] | None
|
|
66
|
+
A tuple containing a pandas DataFrame and a boolean indicating whether the DataFrame includes labels or positional values.
|
|
67
|
+
sample_size : int
|
|
68
|
+
The size of the background sample to generate if `background` is None.
|
|
69
|
+
log : bool
|
|
70
|
+
If True, applies a logarithmic transformation to the probabilities.
|
|
71
|
+
|
|
72
|
+
Raises
|
|
73
|
+
------
|
|
74
|
+
TypeError : If bn is not a gum.BayesNet instance, background is not a tuple.
|
|
75
|
+
ValueError : If background data does not contain all variables present in the Bayesian Network or if
|
|
76
|
+
background data is empty after rows with NaNs were dropped.
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
super().__init__(bn)
|
|
80
|
+
self.vars_ids = sorted(bn.ids(self.feat_names))
|
|
81
|
+
|
|
82
|
+
# Processing background data
|
|
83
|
+
if background is None:
|
|
84
|
+
if not isinstance(sample_size, int):
|
|
85
|
+
raise TypeError("When `data`=None, `sample_size` must be an integer, but got {}".format(type(sample_size)))
|
|
86
|
+
else:
|
|
87
|
+
if sample_size < 1:
|
|
88
|
+
raise ValueError("`sample_size` must be greater than 1, but got {}".format(sample_size))
|
|
89
|
+
elif sample_size < 10:
|
|
90
|
+
warnings.warn("The sample size is small, which may lead to biased Shapley values.")
|
|
91
|
+
data = gum.generateSample(self.bn, sample_size, with_labels=False)[0].reindex(columns=self.feat_names).to_numpy()
|
|
92
|
+
else:
|
|
93
|
+
if not isinstance(background, tuple):
|
|
94
|
+
raise TypeError(f"`background` must be a tuple (pd.DataFrame, bool).")
|
|
95
|
+
data, with_labels = background
|
|
96
|
+
if not isinstance(with_labels, bool):
|
|
97
|
+
warnings.warn(
|
|
98
|
+
f"The second element of `background` should be a boolean, but got {type(with_labels)}. Unexpected calculations may occur."
|
|
99
|
+
)
|
|
100
|
+
if not isinstance(data, pd.DataFrame):
|
|
101
|
+
raise TypeError("The first element of `background` must be a pandas DataFrame, but got {}".format(type(data)))
|
|
102
|
+
if data.shape[0] < 2:
|
|
103
|
+
warnings.warn("You are giving a single row as a background data, which will lead to biased Shapley values.")
|
|
104
|
+
if data.shape[1] != self.M:
|
|
105
|
+
raise ValueError(
|
|
106
|
+
"The number of columns in the background data must match the number of variables in the Bayesian network. Although values outside the Markov blanket, including the target, are unused, they are required for indexing purposes."
|
|
107
|
+
)
|
|
108
|
+
data = data.reindex(columns=self.feat_names).dropna(axis=0).to_numpy()
|
|
109
|
+
if with_labels:
|
|
110
|
+
data = self._labelToPos_df(data, self.vars_ids)
|
|
111
|
+
|
|
112
|
+
self._N = len(data)
|
|
113
|
+
if self._N == 0:
|
|
114
|
+
raise ValueError("Background data can't be empty.")
|
|
115
|
+
|
|
116
|
+
self._data, self.counts = np.unique(data, axis=0, return_counts=True)
|
|
117
|
+
self.func = self._log if log else self._identity
|
|
118
|
+
|
|
119
|
+
# For jointProbability
|
|
120
|
+
self.inst = gum.Instantiation()
|
|
121
|
+
for var in self.bn.ids(self.feat_names):
|
|
122
|
+
self.inst.add(self.bn.variable(var))
|
|
123
|
+
|
|
124
|
+
# Note: We use BayesNet.jointProbability instead of lazyPropagation.evidenceProbability because joint probability is much faster.
|
|
125
|
+
def _joint(self, row_values):
|
|
126
|
+
self.inst.fromdict(row_values)
|
|
127
|
+
return self.func(self.bn.jointProbability(self.inst))
|
|
128
|
+
|
|
129
|
+
@abstractmethod
|
|
130
|
+
def _shall_1dim(self, x, elements):
|
|
131
|
+
# Computes the Shall values for a single instance.
|
|
132
|
+
# This method should be implemented in subclasses.
|
|
133
|
+
raise NotImplementedError("This method should be implemented in subclasses.")
|
|
134
|
+
|
|
135
|
+
@abstractmethod
|
|
136
|
+
def _shall_ndim(self, x, elements):
|
|
137
|
+
# Computes the Shall values for multiple instances.
|
|
138
|
+
# This method should be implemented in subclasses.
|
|
139
|
+
raise NotImplementedError("This method should be implemented in subclasses.")
|
|
140
|
+
|
|
141
|
+
def compute(self, data: tuple | None, N: int = 100):
|
|
142
|
+
"""
|
|
143
|
+
Computes the SHALL values for all rows in the provided data.
|
|
144
|
+
|
|
145
|
+
Note 1: Since this is a partial explanation, all rows in `data` must contain all variables present in the initialized Bayesian Network.
|
|
146
|
+
Note 2: All rows containing NaN values in columns corresponding to variables in the Bayesian Network will be dropped.
|
|
147
|
+
|
|
148
|
+
Parameters
|
|
149
|
+
----------
|
|
150
|
+
data : tuple | None
|
|
151
|
+
A tuple containing either a pandas DataFrame, Series, or dictionary, and a boolean indicating whether labels are provided.
|
|
152
|
+
If None, a random sample of size N is generated.
|
|
153
|
+
N : int
|
|
154
|
+
The number of samples to generate if data is None.
|
|
155
|
+
|
|
156
|
+
Returns
|
|
157
|
+
-------
|
|
158
|
+
Explanation
|
|
159
|
+
An Explanation object containing the SHALL values and variable importances for each row in the data, after rows with NaN values have been dropped.
|
|
160
|
+
|
|
161
|
+
Raises
|
|
162
|
+
------
|
|
163
|
+
TypeError : If the first element of data is not a pd.DataFrame, pd.Series or dict, or if N is not an integer when data is None.
|
|
164
|
+
ValueError : If N is less than 2 when data is None, or if the provided data does not contain all variables present in the initialized Bayesian Network.
|
|
165
|
+
"""
|
|
166
|
+
|
|
167
|
+
# Note : elements (like in ShapValues are no longer needed since partial explanation is impossible)
|
|
168
|
+
if data is None:
|
|
169
|
+
if not isinstance(N, int):
|
|
170
|
+
raise TypeError("Since df is None, N must be an integer, but got {}".format(type(N)))
|
|
171
|
+
if N < 2:
|
|
172
|
+
raise ValueError("N must be greater than 1, but got {}".format(N))
|
|
173
|
+
y = gum.generateSample(self.bn, N, with_labels=False)[0].reindex(columns=self.feat_names).to_numpy()
|
|
174
|
+
# Remove duplicate rows in generated data
|
|
175
|
+
_, idx = np.unique(y, axis=0, return_index=True)
|
|
176
|
+
y = y[idx, :]
|
|
177
|
+
contributions = self._shall_ndim(y)
|
|
178
|
+
else:
|
|
179
|
+
if not isinstance(data, tuple):
|
|
180
|
+
raise TypeError(f"`data` must be a tuple (pd.DataFrame, bool).")
|
|
181
|
+
df, with_labels = data
|
|
182
|
+
if not isinstance(with_labels, bool):
|
|
183
|
+
warnings.warn(
|
|
184
|
+
f"The second element of `data` should be a boolean, but got {type(with_labels)}. Unexpected calculations may occur."
|
|
185
|
+
)
|
|
186
|
+
dtype = object if with_labels else int
|
|
187
|
+
|
|
188
|
+
if isinstance(df, pd.Series):
|
|
189
|
+
# Here we are sure that df is a single instance (a Series).
|
|
190
|
+
if np.setdiff1d(self.feat_names, df.index).size != 0:
|
|
191
|
+
raise ValueError(
|
|
192
|
+
"For SHALL values, you must provide all variables used in the Bayesian Network; passing only a subset is not allowed."
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
x = df.reindex(self.feat_names).dropna().to_numpy()
|
|
196
|
+
if x.size == 0:
|
|
197
|
+
raise ValueError("DataFrame is empty")
|
|
198
|
+
|
|
199
|
+
y = self._labelToPos_row(x, self.vars_ids) if with_labels else x
|
|
200
|
+
contributions = self._shall_1dim(y)
|
|
201
|
+
|
|
202
|
+
elif isinstance(df, pd.DataFrame):
|
|
203
|
+
if np.setdiff1d(self.feat_names, df.columns).size != 0:
|
|
204
|
+
raise ValueError(
|
|
205
|
+
"For SHALL values, you must provide all variables used in the Bayesian Network; passing only a subset is not allowed."
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
df_clean = df.dropna(axis=0, subset=self.feat_names)
|
|
209
|
+
if len(df_clean) == 1:
|
|
210
|
+
# Here we are sure that df is a single instance (a DataFrame with one row).
|
|
211
|
+
x = df_clean.reindex(columns=self.feat_names).to_numpy()[0]
|
|
212
|
+
if x.size == 0:
|
|
213
|
+
raise ValueError("DataFrame is empty")
|
|
214
|
+
y = self._labelToPos_row(x, self.vars_ids) if with_labels else x
|
|
215
|
+
contributions = self._shall_1dim(y)
|
|
216
|
+
|
|
217
|
+
else:
|
|
218
|
+
x = df.reindex(columns=self.feat_names).to_numpy()
|
|
219
|
+
if x.size == 0:
|
|
220
|
+
raise ValueError("DataFrame is empty")
|
|
221
|
+
y = self._labelToPos_df(x, self.vars_ids) if with_labels else x
|
|
222
|
+
_, idx = np.unique(y, axis=0, return_index=True)
|
|
223
|
+
y = y[idx, :]
|
|
224
|
+
contributions = self._shall_ndim(y)
|
|
225
|
+
|
|
226
|
+
elif isinstance(df, dict):
|
|
227
|
+
if len(set(self.feat_names) - set(df.keys())) != 0:
|
|
228
|
+
raise ValueError(
|
|
229
|
+
"For SHALL values, you must provide all variables used in the Bayesian Network; passing only a subset is not allowed."
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
try:
|
|
233
|
+
N = len(list(df.values())[0])
|
|
234
|
+
if not isinstance(list(df.values())[0], (list, np.ndarray)):
|
|
235
|
+
raise TypeError("Each value in the dictionary must be a list or a numpy array.")
|
|
236
|
+
|
|
237
|
+
x = np.empty((N, self.M), dtype=dtype)
|
|
238
|
+
for feat in df.keys():
|
|
239
|
+
id = self.bn.idFromName(feat)
|
|
240
|
+
x[:, id] = df[feat]
|
|
241
|
+
mask = [
|
|
242
|
+
all(x is not None and not (isinstance(x, float) and np.isnan(val)) for val in row) for row in x
|
|
243
|
+
] # drop lines with None and np.nan
|
|
244
|
+
x = x[mask]
|
|
245
|
+
if x.size == 0:
|
|
246
|
+
raise ValueError("DataFrame is empty")
|
|
247
|
+
# Remove duplicate rows in x and unused columns.
|
|
248
|
+
y = self._labelToPos_df(x, self.vars_ids) if with_labels else x
|
|
249
|
+
_, idx = np.unique(y, axis=0, return_index=True)
|
|
250
|
+
y = y[idx, :]
|
|
251
|
+
contributions = self._shall_ndim(y)
|
|
252
|
+
|
|
253
|
+
except TypeError:
|
|
254
|
+
# Here we are sure that df is a single instance (a dictionary with one row).
|
|
255
|
+
x = np.empty(self.M, dtype=dtype)
|
|
256
|
+
for feat in df.keys():
|
|
257
|
+
if not (df[feat] is None):
|
|
258
|
+
id = self.bn.idFromName(feat)
|
|
259
|
+
x[id] = df[feat]
|
|
260
|
+
if x.size == 0:
|
|
261
|
+
raise ValueError("DataFrame is empty")
|
|
262
|
+
y = self._labelToPos_row(x, self.vars_ids) if with_labels else x
|
|
263
|
+
contributions = self._shall_1dim(y)
|
|
264
|
+
|
|
265
|
+
else:
|
|
266
|
+
raise TypeError(
|
|
267
|
+
"The first element of `data` must be a pandas DataFrame, Series or a dictionary, but got {}".format(type(df))
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
if contributions.ndim == 1:
|
|
271
|
+
values = {self.feat_names[i]: float(contributions[i]) for i in self.vars_ids}
|
|
272
|
+
importances = {self.feat_names[i]: abs(float(contributions[i])) for i in self.vars_ids}
|
|
273
|
+
|
|
274
|
+
explanation = Explanation(
|
|
275
|
+
values,
|
|
276
|
+
importances,
|
|
277
|
+
list(self.feat_names[self.vars_ids]),
|
|
278
|
+
x[self.vars_ids],
|
|
279
|
+
self.baseline,
|
|
280
|
+
self.func.__name__,
|
|
281
|
+
"SHALL",
|
|
282
|
+
)
|
|
283
|
+
else:
|
|
284
|
+
values = {self.feat_names[i]: [float(v) for v in contributions[i, :]] for i in self.vars_ids}
|
|
285
|
+
mean_abs = np.mean(np.abs(contributions), axis=1)
|
|
286
|
+
importances = {self.feat_names[i]: abs(float(mean_abs[i])) for i in self.vars_ids}
|
|
287
|
+
explanation = Explanation(
|
|
288
|
+
values,
|
|
289
|
+
importances,
|
|
290
|
+
list(self.feat_names[self.vars_ids]),
|
|
291
|
+
y[:, self.vars_ids],
|
|
292
|
+
self.baseline,
|
|
293
|
+
self.func.__name__,
|
|
294
|
+
"SHALL",
|
|
295
|
+
)
|
|
296
|
+
return explanation
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
############################################################################
|
|
2
|
+
# This file is part of the aGrUM/pyAgrum library. #
|
|
3
|
+
# #
|
|
4
|
+
# Copyright (c) 2005-2025 by #
|
|
5
|
+
# - Pierre-Henri WUILLEMIN(_at_LIP6) #
|
|
6
|
+
# - Christophe GONZALES(_at_AMU) #
|
|
7
|
+
# #
|
|
8
|
+
# The aGrUM/pyAgrum library is free software; you can redistribute it #
|
|
9
|
+
# and/or modify it under the terms of either : #
|
|
10
|
+
# #
|
|
11
|
+
# - the GNU Lesser General Public License as published by #
|
|
12
|
+
# the Free Software Foundation, either version 3 of the License, #
|
|
13
|
+
# or (at your option) any later version, #
|
|
14
|
+
# - the MIT license (MIT), #
|
|
15
|
+
# - or both in dual license, as here. #
|
|
16
|
+
# #
|
|
17
|
+
# (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) #
|
|
18
|
+
# #
|
|
19
|
+
# This aGrUM/pyAgrum library is distributed in the hope that it will be #
|
|
20
|
+
# useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, #
|
|
21
|
+
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS #
|
|
22
|
+
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE #
|
|
23
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER #
|
|
24
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, #
|
|
25
|
+
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR #
|
|
26
|
+
# OTHER DEALINGS IN THE SOFTWARE. #
|
|
27
|
+
# #
|
|
28
|
+
# See LICENCES for more details. #
|
|
29
|
+
# #
|
|
30
|
+
# SPDX-FileCopyrightText: Copyright 2005-2025 #
|
|
31
|
+
# - Pierre-Henri WUILLEMIN(_at_LIP6) #
|
|
32
|
+
# - Christophe GONZALES(_at_AMU) #
|
|
33
|
+
# SPDX-License-Identifier: LGPL-3.0-or-later OR MIT #
|
|
34
|
+
# #
|
|
35
|
+
# Contact : info_at_agrum_dot_org #
|
|
36
|
+
# homepage : http://agrum.gitlab.io #
|
|
37
|
+
# gitlab : https://gitlab.com/agrumery/agrum #
|
|
38
|
+
# #
|
|
39
|
+
############################################################################
|
|
40
|
+
|
|
41
|
+
import pyagrum as gum
|
|
42
|
+
from pyagrum.explain._ShapleyValues import ShapleyValues
|
|
43
|
+
from pyagrum.explain._ComputationCausal import CausalComputation
|
|
44
|
+
from pyagrum.explain._CustomShapleyCache import CustomShapleyCache
|
|
45
|
+
from pyagrum.explain._FIFOCache import FIFOCache
|
|
46
|
+
|
|
47
|
+
# Calculus
|
|
48
|
+
import numpy as np
|
|
49
|
+
import pandas as pd
|
|
50
|
+
|
|
51
|
+
# GL
|
|
52
|
+
import warnings
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class CausalShapValues(ShapleyValues, CausalComputation):
|
|
56
|
+
"""
|
|
57
|
+
The CausalShapValues class computes the Causal Shapley values for a given target node in a Bayesian Network.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
def __init__(self, bn, target, background: tuple | None, sample_size=1000, logit=True):
|
|
61
|
+
"""
|
|
62
|
+
Parameters:
|
|
63
|
+
------
|
|
64
|
+
bn : pyagrum.BayesNet
|
|
65
|
+
The Bayesian Network.
|
|
66
|
+
target : int | str
|
|
67
|
+
The node id (or node name) of the target.
|
|
68
|
+
background : Tuple(pandas.DataFrame, bool) | None
|
|
69
|
+
A tuple containing a pandas DataFrame and a boolean indicating whether the DataFrame contains labels or positions.
|
|
70
|
+
sample_size : int
|
|
71
|
+
The size of the background sample to generate if `background` is None.
|
|
72
|
+
logit : bool
|
|
73
|
+
If True, applies the logit transformation to the probabilities.
|
|
74
|
+
|
|
75
|
+
Raises:
|
|
76
|
+
------
|
|
77
|
+
TypeError : If bn is not a gum.BayesNet instance, background is not a tuple or target is not an integer or string.
|
|
78
|
+
ValueError : If target is not a valid node id in the Bayesian Network or if sample_size is not a positive integer.
|
|
79
|
+
"""
|
|
80
|
+
super().__init__(bn, target, logit)
|
|
81
|
+
# Processing background data
|
|
82
|
+
if background is None:
|
|
83
|
+
if not isinstance(sample_size, int):
|
|
84
|
+
raise TypeError("When `data`=None, `sample_size` must be an integer, but got {}".format(type(sample_size)))
|
|
85
|
+
else:
|
|
86
|
+
if sample_size <= 1:
|
|
87
|
+
raise ValueError("`sample_size` must be greater than 1, but got {}".format(sample_size))
|
|
88
|
+
data = gum.generateSample(self.bn, sample_size, with_labels=False)[0].reindex(columns=self.feat_names).to_numpy()
|
|
89
|
+
else:
|
|
90
|
+
if not isinstance(background, tuple):
|
|
91
|
+
raise TypeError(f"`background` must be a tuple (pd.DataFrame, bool).")
|
|
92
|
+
data, with_labels = background
|
|
93
|
+
if not isinstance(with_labels, bool):
|
|
94
|
+
warnings.warn(
|
|
95
|
+
f"The second element of `background` should be a boolean, but got {type(with_labels)}. Unexpected calculations may occur."
|
|
96
|
+
)
|
|
97
|
+
if not isinstance(data, pd.DataFrame):
|
|
98
|
+
raise TypeError("The first element of `background` must be a pandas DataFrame, but got {}".format(type(data)))
|
|
99
|
+
if data.shape[0] < 2:
|
|
100
|
+
warnings.warn("You are giving a single row as a background data, which will lead to biased Shapley values.")
|
|
101
|
+
if data.shape[1] != self.M:
|
|
102
|
+
raise ValueError(
|
|
103
|
+
"The number of columns in the background data must match the number of variables in the Bayesian network. Although values outside the Markov blanket, including the target, are unused, they are required for indexing purposes."
|
|
104
|
+
)
|
|
105
|
+
data = data.reindex(columns=self.feat_names).to_numpy()
|
|
106
|
+
if with_labels:
|
|
107
|
+
data = self._labelToPos_df(data, [i for i in range(self.M) if i != self.target])
|
|
108
|
+
self._data, self.counts = np.unique(data, return_counts=True, axis=0)
|
|
109
|
+
self._N = len(self._data)
|
|
110
|
+
# Calculating the baseline
|
|
111
|
+
self.baseline = self.func(
|
|
112
|
+
self._value(
|
|
113
|
+
data=self._data,
|
|
114
|
+
counts=self.counts,
|
|
115
|
+
elements=[i for i in range(self.M) if i != self.target],
|
|
116
|
+
sigma=self._mb,
|
|
117
|
+
cache=FIFOCache(100),
|
|
118
|
+
func1=self._posterior,
|
|
119
|
+
params1={},
|
|
120
|
+
func2=self._weight,
|
|
121
|
+
params2={"doLazy": gum.LazyPropagation(self.bn)},
|
|
122
|
+
)
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
def _shap_1dim(self, x, elements):
|
|
126
|
+
# Computes the Shapley values for a 1-dimensional input x (local explanation).
|
|
127
|
+
contributions = np.zeros((self.M, self.bn.variable(self.target).domainSize())) # Initializes contributions array.
|
|
128
|
+
cache = CustomShapleyCache(5000)
|
|
129
|
+
markovImpact = FIFOCache(1000)
|
|
130
|
+
cache.set(0, (), self.baseline) # Sets the baseline probability in the cache.
|
|
131
|
+
coalitions = self._coalitions(elements) # Compute the coalitions
|
|
132
|
+
|
|
133
|
+
for tau in coalitions:
|
|
134
|
+
self.ie.eraseAllEvidence() # Clears all evidence from the inference engine.
|
|
135
|
+
doNet = self._doCalculus(self.bn, tau) # Creates a new Bayesian Network to perform do-calculus.
|
|
136
|
+
sigma = self._outOfCoalition(tau, range(self.M)) # Extracts the nodes outside the coalition tau.
|
|
137
|
+
alpha = x[tau] # Instanciation of tau
|
|
138
|
+
self._chgCpt(doNet, tau, alpha) # Changes the conditional probability tables to perform do-calculus.
|
|
139
|
+
doLazy = gum.LazyPropagation(
|
|
140
|
+
doNet
|
|
141
|
+
) # Creates a lazy propagation inference engine to compute partial join probabilities.
|
|
142
|
+
doLazy.addTarget(self.target)
|
|
143
|
+
idx = self._extract(self._data, tau, alpha)
|
|
144
|
+
posterior_with = self.func(
|
|
145
|
+
self._value(
|
|
146
|
+
data=self._data[idx],
|
|
147
|
+
counts=self.counts[idx],
|
|
148
|
+
elements=elements,
|
|
149
|
+
sigma=sigma,
|
|
150
|
+
cache=markovImpact,
|
|
151
|
+
func1=self._posterior,
|
|
152
|
+
params1={},
|
|
153
|
+
func2=self._weight,
|
|
154
|
+
params2={"doLazy": doLazy},
|
|
155
|
+
)
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
cache.set(0, tuple(tau), posterior_with)
|
|
159
|
+
# Contribution of each feature
|
|
160
|
+
for t in tau:
|
|
161
|
+
key = tuple((f for f in tau if f != t))
|
|
162
|
+
posterior_without = cache.get(0, key)
|
|
163
|
+
contributions[t] += self._shap_term(posterior_with, posterior_without, len(elements), len(tau) - 1)
|
|
164
|
+
return contributions
|
|
165
|
+
|
|
166
|
+
def _shap_ndim(self, x, elements):
|
|
167
|
+
contributions = np.zeros(
|
|
168
|
+
(self.M, len(x), self.bn.variable(self.target).domainSize())
|
|
169
|
+
) # Initializes contributions array.
|
|
170
|
+
cache = CustomShapleyCache(5000) # Initializes the custom cache.
|
|
171
|
+
markovImpact = FIFOCache(1000)
|
|
172
|
+
cache.set(0, (), self.baseline) # Sets the baseline probability in the cache.
|
|
173
|
+
coalitions = self._coalitions(elements) # Compute the coalitions
|
|
174
|
+
|
|
175
|
+
for tau in coalitions:
|
|
176
|
+
self.ie.eraseAllEvidence() # Clears all evidence from the inference engine.
|
|
177
|
+
doNet = self._doCalculus(self.bn, tau) # Creates a new Bayesian Network to perform do-calculus.
|
|
178
|
+
sigma = self._outOfCoalition(tau, range(self.M)) # Extracts the nodes outside the coalition tau.
|
|
179
|
+
|
|
180
|
+
for i in range(len(x)): # Iterates over each example in x
|
|
181
|
+
alpha = x[i, tau] # Instanciation of tau
|
|
182
|
+
self._chgCpt(doNet, tau, alpha) # Changes the conditional probability tables to perform do-calculus.
|
|
183
|
+
doLazy = gum.LazyPropagation(
|
|
184
|
+
doNet
|
|
185
|
+
) # Creates a lazy propagation inference engine to compute partial join probabilities.
|
|
186
|
+
doLazy.addTarget(self.target)
|
|
187
|
+
idx = self._extract(self._data, tau, alpha)
|
|
188
|
+
posterior_with = self.func(
|
|
189
|
+
self._value(
|
|
190
|
+
data=self._data[idx],
|
|
191
|
+
counts=self.counts[idx],
|
|
192
|
+
elements=elements,
|
|
193
|
+
sigma=sigma,
|
|
194
|
+
cache=markovImpact,
|
|
195
|
+
func1=self._posterior,
|
|
196
|
+
params1={},
|
|
197
|
+
func2=self._weight,
|
|
198
|
+
params2={"doLazy": doLazy},
|
|
199
|
+
)
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
cache.set(i, tuple(tau), posterior_with)
|
|
203
|
+
# Contribution of each feature
|
|
204
|
+
for t in tau:
|
|
205
|
+
key = tuple((f for f in tau if f != t))
|
|
206
|
+
posterior_without = cache.get(i, key) if len(key) > 0 else cache.get(0, ())
|
|
207
|
+
contributions[t, i] += self._shap_term(posterior_with, posterior_without, len(elements), len(tau) - 1)
|
|
208
|
+
return contributions
|