pyAgrum-nightly 2.2.1.9.dev202510271761405498__cp310-abi3-win_amd64.whl → 2.3.0.9.dev202510281761586496__cp310-abi3-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyAgrum-nightly might be problematic. Click here for more details.
- pyagrum/_pyagrum.pyd +0 -0
- pyagrum/common.py +1 -1
- pyagrum/config.py +1 -0
- pyagrum/explain/_ComputationCausal.py +75 -0
- pyagrum/explain/_ComputationConditional.py +48 -0
- pyagrum/explain/_ComputationMarginal.py +48 -0
- pyagrum/explain/_CustomShapleyCache.py +110 -0
- pyagrum/explain/_Explainer.py +176 -0
- pyagrum/explain/_Explanation.py +70 -0
- pyagrum/explain/_FIFOCache.py +54 -0
- pyagrum/explain/_ShallCausalValues.py +204 -0
- pyagrum/explain/_ShallConditionalValues.py +155 -0
- pyagrum/explain/_ShallMarginalValues.py +155 -0
- pyagrum/explain/_ShallValues.py +296 -0
- pyagrum/explain/_ShapCausalValues.py +208 -0
- pyagrum/explain/_ShapConditionalValues.py +126 -0
- pyagrum/explain/_ShapMarginalValues.py +191 -0
- pyagrum/explain/_ShapleyValues.py +298 -0
- pyagrum/explain/__init__.py +81 -0
- pyagrum/explain/_explGeneralizedMarkovBlanket.py +152 -0
- pyagrum/explain/_explIndependenceListForPairs.py +146 -0
- pyagrum/explain/_explInformationGraph.py +264 -0
- pyagrum/explain/notebook/__init__.py +54 -0
- pyagrum/explain/notebook/_bar.py +142 -0
- pyagrum/explain/notebook/_beeswarm.py +174 -0
- pyagrum/explain/notebook/_showShapValues.py +97 -0
- pyagrum/explain/notebook/_waterfall.py +220 -0
- pyagrum/explain/shapley.py +225 -0
- pyagrum/lib/explain.py +11 -490
- pyagrum/pyagrum.py +17 -10
- {pyagrum_nightly-2.2.1.9.dev202510271761405498.dist-info → pyagrum_nightly-2.3.0.9.dev202510281761586496.dist-info}/METADATA +1 -1
- {pyagrum_nightly-2.2.1.9.dev202510271761405498.dist-info → pyagrum_nightly-2.3.0.9.dev202510281761586496.dist-info}/RECORD +36 -12
- pyagrum/lib/shapley.py +0 -661
- {pyagrum_nightly-2.2.1.9.dev202510271761405498.dist-info → pyagrum_nightly-2.3.0.9.dev202510281761586496.dist-info}/LICENSE.md +0 -0
- {pyagrum_nightly-2.2.1.9.dev202510271761405498.dist-info → pyagrum_nightly-2.3.0.9.dev202510281761586496.dist-info}/LICENSES/LGPL-3.0-or-later.txt +0 -0
- {pyagrum_nightly-2.2.1.9.dev202510271761405498.dist-info → pyagrum_nightly-2.3.0.9.dev202510281761586496.dist-info}/LICENSES/MIT.txt +0 -0
- {pyagrum_nightly-2.2.1.9.dev202510271761405498.dist-info → pyagrum_nightly-2.3.0.9.dev202510281761586496.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
############################################################################
|
|
2
|
+
# This file is part of the aGrUM/pyAgrum library. #
|
|
3
|
+
# #
|
|
4
|
+
# Copyright (c) 2005-2025 by #
|
|
5
|
+
# - Pierre-Henri WUILLEMIN(_at_LIP6) #
|
|
6
|
+
# - Christophe GONZALES(_at_AMU) #
|
|
7
|
+
# #
|
|
8
|
+
# The aGrUM/pyAgrum library is free software; you can redistribute it #
|
|
9
|
+
# and/or modify it under the terms of either : #
|
|
10
|
+
# #
|
|
11
|
+
# - the GNU Lesser General Public License as published by #
|
|
12
|
+
# the Free Software Foundation, either version 3 of the License, #
|
|
13
|
+
# or (at your option) any later version, #
|
|
14
|
+
# - the MIT license (MIT), #
|
|
15
|
+
# - or both in dual license, as here. #
|
|
16
|
+
# #
|
|
17
|
+
# (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) #
|
|
18
|
+
# #
|
|
19
|
+
# This aGrUM/pyAgrum library is distributed in the hope that it will be #
|
|
20
|
+
# useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, #
|
|
21
|
+
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS #
|
|
22
|
+
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE #
|
|
23
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER #
|
|
24
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, #
|
|
25
|
+
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR #
|
|
26
|
+
# OTHER DEALINGS IN THE SOFTWARE. #
|
|
27
|
+
# #
|
|
28
|
+
# See LICENCES for more details. #
|
|
29
|
+
# #
|
|
30
|
+
# SPDX-FileCopyrightText: Copyright 2005-2025 #
|
|
31
|
+
# - Pierre-Henri WUILLEMIN(_at_LIP6) #
|
|
32
|
+
# - Christophe GONZALES(_at_AMU) #
|
|
33
|
+
# SPDX-License-Identifier: LGPL-3.0-or-later OR MIT #
|
|
34
|
+
# #
|
|
35
|
+
# Contact : info_at_agrum_dot_org #
|
|
36
|
+
# homepage : http://agrum.gitlab.io #
|
|
37
|
+
# gitlab : https://gitlab.com/agrumery/agrum #
|
|
38
|
+
# #
|
|
39
|
+
############################################################################
|
|
40
|
+
|
|
41
|
+
import warnings
|
|
42
|
+
|
|
43
|
+
# ShapValues
|
|
44
|
+
from pyagrum.explain import CausalShapValues, ConditionalShapValues, Explanation, MarginalShapValues
|
|
45
|
+
|
|
46
|
+
# Calculations
|
|
47
|
+
import pyagrum as gum
|
|
48
|
+
|
|
49
|
+
# Plots
|
|
50
|
+
import matplotlib.pyplot as plt
|
|
51
|
+
from pyagrum.explain.notebook import bar, beeswarm, waterfall
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class ShapValues:
|
|
55
|
+
"""
|
|
56
|
+
Class to compute Shapley values for a target variable in a Bayesian network.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
def __init__(self, bn, target, logit=True):
|
|
60
|
+
"""
|
|
61
|
+
Parameters:
|
|
62
|
+
------
|
|
63
|
+
bn : pyagrum.BayesNet
|
|
64
|
+
The Bayesian Network.
|
|
65
|
+
target : int | str
|
|
66
|
+
The node id (or node name) of the target.
|
|
67
|
+
background : Tuple(pandas.DataFrame, bool) | None
|
|
68
|
+
A tuple containing a pandas DataFrame and a boolean indicating whether the DataFrame contains labels or positions.
|
|
69
|
+
sample_size : int
|
|
70
|
+
The size of the background sample to generate if `background` is None.
|
|
71
|
+
logit : bool
|
|
72
|
+
If True, applies the logit transformation to the probabilities.
|
|
73
|
+
|
|
74
|
+
Raises:
|
|
75
|
+
------
|
|
76
|
+
TypeError : If bn is not a gum.BayesNet instance or target is not an integer or string.
|
|
77
|
+
ValueError : If target is not a valid node id in the Bayesian Network.
|
|
78
|
+
"""
|
|
79
|
+
if not isinstance(bn, gum.BayesNet):
|
|
80
|
+
raise TypeError("bn must be a gum.BayesNet instance, but got {}".format(type(bn)))
|
|
81
|
+
if isinstance(target, str):
|
|
82
|
+
if target not in bn.names():
|
|
83
|
+
raise ValueError("Target node name '{}' not found in the Bayesian Network.".format(target))
|
|
84
|
+
target = bn.idFromName(target) # Convert node name to ID.
|
|
85
|
+
elif isinstance(target, int):
|
|
86
|
+
if target not in bn.nodes():
|
|
87
|
+
raise ValueError("Target node ID {} not found in the Bayesian Network.".format(target))
|
|
88
|
+
else:
|
|
89
|
+
raise TypeError("Target must be a node ID (int) or a node name (str), but got {}".format(type(target)))
|
|
90
|
+
if not isinstance(logit, bool):
|
|
91
|
+
warnings.warn("logit should be a boolean, unexpected calculation may occur.", UserWarning)
|
|
92
|
+
|
|
93
|
+
# Class attributes.
|
|
94
|
+
self.bn = bn
|
|
95
|
+
self.target = target
|
|
96
|
+
self.logit = logit
|
|
97
|
+
|
|
98
|
+
@staticmethod
|
|
99
|
+
def _plot(explanation: Explanation, y: int, plot: bool, plot_importance: bool, percentage: bool, filename: str):
|
|
100
|
+
ndim = explanation.data.ndim
|
|
101
|
+
n_figures = sum([plot, plot_importance])
|
|
102
|
+
plot_index = 0
|
|
103
|
+
# Local Explanation
|
|
104
|
+
if n_figures > 0:
|
|
105
|
+
_, axs = plt.subplots(1, n_figures, figsize=(n_figures * 6, 5))
|
|
106
|
+
if n_figures == 1:
|
|
107
|
+
axs = [axs] # consistent type for axs
|
|
108
|
+
|
|
109
|
+
if plot:
|
|
110
|
+
if ndim == 1:
|
|
111
|
+
waterfall(explanation=explanation, y=y, ax=axs[plot_index])
|
|
112
|
+
else:
|
|
113
|
+
beeswarm(explanation=explanation, y=y, ax=axs[plot_index])
|
|
114
|
+
plot_index += 1
|
|
115
|
+
if plot_importance:
|
|
116
|
+
bar(explanation=explanation, y=y, ax=axs[plot_index], percentage=percentage)
|
|
117
|
+
|
|
118
|
+
plt.tight_layout()
|
|
119
|
+
|
|
120
|
+
if filename is None:
|
|
121
|
+
plt.show()
|
|
122
|
+
else:
|
|
123
|
+
plt.savefig(filename)
|
|
124
|
+
plt.close()
|
|
125
|
+
|
|
126
|
+
def conditional(
|
|
127
|
+
self,
|
|
128
|
+
df,
|
|
129
|
+
y: int = 1,
|
|
130
|
+
plot: bool = False,
|
|
131
|
+
plot_importance: bool = False,
|
|
132
|
+
percentage: bool = False,
|
|
133
|
+
filename: str = None,
|
|
134
|
+
):
|
|
135
|
+
"""
|
|
136
|
+
Computes the conditional Shapley values for each variable.
|
|
137
|
+
|
|
138
|
+
Parameters:
|
|
139
|
+
----------
|
|
140
|
+
df : pandas DataFrame
|
|
141
|
+
The input data for which to compute the Shapley values.
|
|
142
|
+
y : int, optional
|
|
143
|
+
The target class for which to compute the Shapley values (default is 1).
|
|
144
|
+
plot : bool, optional
|
|
145
|
+
If True, plots the waterfall or beeswarm plot depending on the number of rows in df (default is False).
|
|
146
|
+
plot_importance : bool, optional
|
|
147
|
+
If True, plots the bar chart of feature importance (default is False).
|
|
148
|
+
percentage: bool
|
|
149
|
+
if True, the importance plot is shown in percent.
|
|
150
|
+
filename : str, optional
|
|
151
|
+
If provided, saves the plots to the specified filename instead of displaying them.
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
-------
|
|
155
|
+
Dict[str, float]
|
|
156
|
+
A dictionary containing the importances of each variable in the input data.
|
|
157
|
+
"""
|
|
158
|
+
explainer = ConditionalShapValues(self.bn, self.target, self.logit)
|
|
159
|
+
explanation = explainer.compute((df, True))
|
|
160
|
+
self._plot(explanation, y, plot, plot_importance, percentage, filename)
|
|
161
|
+
return explanation.importances[y]
|
|
162
|
+
|
|
163
|
+
def marginal(
|
|
164
|
+
self, df, y=1, sample_size=200, plot=False, plot_importance=False, percentage: bool = False, filename: str = None
|
|
165
|
+
):
|
|
166
|
+
"""
|
|
167
|
+
Computes the marginal Shapley values for each variable.
|
|
168
|
+
|
|
169
|
+
Parameters:
|
|
170
|
+
----------
|
|
171
|
+
df : pandas DataFrame
|
|
172
|
+
The input data for which to compute the Shapley values.
|
|
173
|
+
y : int, optional
|
|
174
|
+
The target class for which to compute the Shapley values (default is 1).
|
|
175
|
+
sample_size : int, optional
|
|
176
|
+
The number of samples to use for the background data (default is 200).
|
|
177
|
+
plot : bool, optional
|
|
178
|
+
If True, plots the waterfall or beeswarm plot depending on the number of rows in df (default is False).
|
|
179
|
+
plot_importance : bool, optional
|
|
180
|
+
If True, plots the bar chart of feature importance (default is False).
|
|
181
|
+
percentage: bool
|
|
182
|
+
if True, the importance plot is shown in percent.
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
-------
|
|
186
|
+
Dict[str, float]
|
|
187
|
+
A dictionary containing the importances of each variable in the input data.
|
|
188
|
+
"""
|
|
189
|
+
explainer = MarginalShapValues(self.bn, self.target, None, sample_size, self.logit)
|
|
190
|
+
explanation = explainer.compute((df, True))
|
|
191
|
+
self._plot(explanation, y, plot, plot_importance, percentage, filename)
|
|
192
|
+
return explanation.importances[y]
|
|
193
|
+
|
|
194
|
+
def causal(
|
|
195
|
+
self, df, y=1, sample_size=200, plot=False, plot_importance=False, percentage: bool = False, filename: str = None
|
|
196
|
+
):
|
|
197
|
+
"""
|
|
198
|
+
Computes the causal Shapley values for each variable.
|
|
199
|
+
|
|
200
|
+
Parameters:
|
|
201
|
+
----------
|
|
202
|
+
df : pandas DataFrame
|
|
203
|
+
The input data for which to compute the Shapley values.
|
|
204
|
+
y : int, optional
|
|
205
|
+
The target class for which to compute the Shapley values (default is 1).
|
|
206
|
+
sample_size : int, optional
|
|
207
|
+
The number of samples to use for the background data (default is 200).
|
|
208
|
+
plot : bool, optional
|
|
209
|
+
If True, plots the waterfall or beeswarm plot depending on the number of rows in df (default is False).
|
|
210
|
+
plot_importance : bool, optional
|
|
211
|
+
If True, plots the bar chart of feature importance (default is False).
|
|
212
|
+
percentage: bool
|
|
213
|
+
if True, the importance plot is shown in percent.
|
|
214
|
+
filename : str, optional
|
|
215
|
+
If provided, saves the plots to the specified filename instead of displaying them.
|
|
216
|
+
|
|
217
|
+
Returns:
|
|
218
|
+
-------
|
|
219
|
+
Dict[str, float]
|
|
220
|
+
A dictionary containing the importances of each variable in the input data.
|
|
221
|
+
"""
|
|
222
|
+
explainer = CausalShapValues(self.bn, self.target, None, sample_size, self.logit)
|
|
223
|
+
explanation = explainer.compute((df, True))
|
|
224
|
+
self._plot(explanation, y, plot, plot_importance, percentage, filename)
|
|
225
|
+
return explanation.importances[y]
|