pyAgrum-nightly 2.3.1.9.dev202512261765915415__cp310-abi3-macosx_10_15_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. pyagrum/__init__.py +165 -0
  2. pyagrum/_pyagrum.so +0 -0
  3. pyagrum/bnmixture/BNMInference.py +268 -0
  4. pyagrum/bnmixture/BNMLearning.py +376 -0
  5. pyagrum/bnmixture/BNMixture.py +464 -0
  6. pyagrum/bnmixture/__init__.py +60 -0
  7. pyagrum/bnmixture/notebook.py +1058 -0
  8. pyagrum/causal/_CausalFormula.py +280 -0
  9. pyagrum/causal/_CausalModel.py +436 -0
  10. pyagrum/causal/__init__.py +81 -0
  11. pyagrum/causal/_causalImpact.py +356 -0
  12. pyagrum/causal/_dSeparation.py +598 -0
  13. pyagrum/causal/_doAST.py +761 -0
  14. pyagrum/causal/_doCalculus.py +361 -0
  15. pyagrum/causal/_doorCriteria.py +374 -0
  16. pyagrum/causal/_exceptions.py +95 -0
  17. pyagrum/causal/_types.py +61 -0
  18. pyagrum/causal/causalEffectEstimation/_CausalEffectEstimation.py +1175 -0
  19. pyagrum/causal/causalEffectEstimation/_IVEstimators.py +718 -0
  20. pyagrum/causal/causalEffectEstimation/_RCTEstimators.py +132 -0
  21. pyagrum/causal/causalEffectEstimation/__init__.py +46 -0
  22. pyagrum/causal/causalEffectEstimation/_backdoorEstimators.py +774 -0
  23. pyagrum/causal/causalEffectEstimation/_causalBNEstimator.py +324 -0
  24. pyagrum/causal/causalEffectEstimation/_frontdoorEstimators.py +396 -0
  25. pyagrum/causal/causalEffectEstimation/_learners.py +118 -0
  26. pyagrum/causal/causalEffectEstimation/_utils.py +466 -0
  27. pyagrum/causal/notebook.py +172 -0
  28. pyagrum/clg/CLG.py +658 -0
  29. pyagrum/clg/GaussianVariable.py +111 -0
  30. pyagrum/clg/SEM.py +312 -0
  31. pyagrum/clg/__init__.py +63 -0
  32. pyagrum/clg/canonicalForm.py +408 -0
  33. pyagrum/clg/constants.py +54 -0
  34. pyagrum/clg/forwardSampling.py +202 -0
  35. pyagrum/clg/learning.py +776 -0
  36. pyagrum/clg/notebook.py +480 -0
  37. pyagrum/clg/variableElimination.py +271 -0
  38. pyagrum/common.py +60 -0
  39. pyagrum/config.py +319 -0
  40. pyagrum/ctbn/CIM.py +513 -0
  41. pyagrum/ctbn/CTBN.py +573 -0
  42. pyagrum/ctbn/CTBNGenerator.py +216 -0
  43. pyagrum/ctbn/CTBNInference.py +459 -0
  44. pyagrum/ctbn/CTBNLearner.py +161 -0
  45. pyagrum/ctbn/SamplesStats.py +671 -0
  46. pyagrum/ctbn/StatsIndepTest.py +355 -0
  47. pyagrum/ctbn/__init__.py +79 -0
  48. pyagrum/ctbn/constants.py +54 -0
  49. pyagrum/ctbn/notebook.py +264 -0
  50. pyagrum/defaults.ini +199 -0
  51. pyagrum/deprecated.py +95 -0
  52. pyagrum/explain/_ComputationCausal.py +75 -0
  53. pyagrum/explain/_ComputationConditional.py +48 -0
  54. pyagrum/explain/_ComputationMarginal.py +48 -0
  55. pyagrum/explain/_CustomShapleyCache.py +110 -0
  56. pyagrum/explain/_Explainer.py +176 -0
  57. pyagrum/explain/_Explanation.py +70 -0
  58. pyagrum/explain/_FIFOCache.py +54 -0
  59. pyagrum/explain/_ShallCausalValues.py +204 -0
  60. pyagrum/explain/_ShallConditionalValues.py +155 -0
  61. pyagrum/explain/_ShallMarginalValues.py +155 -0
  62. pyagrum/explain/_ShallValues.py +296 -0
  63. pyagrum/explain/_ShapCausalValues.py +208 -0
  64. pyagrum/explain/_ShapConditionalValues.py +126 -0
  65. pyagrum/explain/_ShapMarginalValues.py +191 -0
  66. pyagrum/explain/_ShapleyValues.py +298 -0
  67. pyagrum/explain/__init__.py +81 -0
  68. pyagrum/explain/_explGeneralizedMarkovBlanket.py +152 -0
  69. pyagrum/explain/_explIndependenceListForPairs.py +146 -0
  70. pyagrum/explain/_explInformationGraph.py +264 -0
  71. pyagrum/explain/notebook/__init__.py +54 -0
  72. pyagrum/explain/notebook/_bar.py +142 -0
  73. pyagrum/explain/notebook/_beeswarm.py +174 -0
  74. pyagrum/explain/notebook/_showShapValues.py +97 -0
  75. pyagrum/explain/notebook/_waterfall.py +220 -0
  76. pyagrum/explain/shapley.py +225 -0
  77. pyagrum/lib/__init__.py +46 -0
  78. pyagrum/lib/_colors.py +390 -0
  79. pyagrum/lib/bn2graph.py +299 -0
  80. pyagrum/lib/bn2roc.py +1026 -0
  81. pyagrum/lib/bn2scores.py +217 -0
  82. pyagrum/lib/bn_vs_bn.py +605 -0
  83. pyagrum/lib/cn2graph.py +305 -0
  84. pyagrum/lib/discreteTypeProcessor.py +1102 -0
  85. pyagrum/lib/discretizer.py +58 -0
  86. pyagrum/lib/dynamicBN.py +390 -0
  87. pyagrum/lib/explain.py +57 -0
  88. pyagrum/lib/export.py +84 -0
  89. pyagrum/lib/id2graph.py +258 -0
  90. pyagrum/lib/image.py +387 -0
  91. pyagrum/lib/ipython.py +307 -0
  92. pyagrum/lib/mrf2graph.py +471 -0
  93. pyagrum/lib/notebook.py +1821 -0
  94. pyagrum/lib/proba_histogram.py +552 -0
  95. pyagrum/lib/utils.py +138 -0
  96. pyagrum/pyagrum.py +31495 -0
  97. pyagrum/skbn/_MBCalcul.py +242 -0
  98. pyagrum/skbn/__init__.py +49 -0
  99. pyagrum/skbn/_learningMethods.py +282 -0
  100. pyagrum/skbn/_utils.py +297 -0
  101. pyagrum/skbn/bnclassifier.py +1014 -0
  102. pyagrum_nightly-2.3.1.9.dev202512261765915415.dist-info/LICENSE.md +12 -0
  103. pyagrum_nightly-2.3.1.9.dev202512261765915415.dist-info/LICENSES/LGPL-3.0-or-later.txt +304 -0
  104. pyagrum_nightly-2.3.1.9.dev202512261765915415.dist-info/LICENSES/MIT.txt +18 -0
  105. pyagrum_nightly-2.3.1.9.dev202512261765915415.dist-info/METADATA +145 -0
  106. pyagrum_nightly-2.3.1.9.dev202512261765915415.dist-info/RECORD +107 -0
  107. pyagrum_nightly-2.3.1.9.dev202512261765915415.dist-info/WHEEL +4 -0
@@ -0,0 +1,155 @@
1
+ ############################################################################
2
+ # This file is part of the aGrUM/pyAgrum library. #
3
+ # #
4
+ # Copyright (c) 2005-2025 by #
5
+ # - Pierre-Henri WUILLEMIN(_at_LIP6) #
6
+ # - Christophe GONZALES(_at_AMU) #
7
+ # #
8
+ # The aGrUM/pyAgrum library is free software; you can redistribute it #
9
+ # and/or modify it under the terms of either : #
10
+ # #
11
+ # - the GNU Lesser General Public License as published by #
12
+ # the Free Software Foundation, either version 3 of the License, #
13
+ # or (at your option) any later version, #
14
+ # - the MIT license (MIT), #
15
+ # - or both in dual license, as here. #
16
+ # #
17
+ # (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) #
18
+ # #
19
+ # This aGrUM/pyAgrum library is distributed in the hope that it will be #
20
+ # useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, #
21
+ # INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS #
22
+ # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE #
23
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER #
24
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, #
25
+ # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR #
26
+ # OTHER DEALINGS IN THE SOFTWARE. #
27
+ # #
28
+ # See LICENCES for more details. #
29
+ # #
30
+ # SPDX-FileCopyrightText: Copyright 2005-2025 #
31
+ # - Pierre-Henri WUILLEMIN(_at_LIP6) #
32
+ # - Christophe GONZALES(_at_AMU) #
33
+ # SPDX-License-Identifier: LGPL-3.0-or-later OR MIT #
34
+ # #
35
+ # Contact : info_at_agrum_dot_org #
36
+ # homepage : http://agrum.gitlab.io #
37
+ # gitlab : https://gitlab.com/agrumery/agrum #
38
+ # #
39
+ ############################################################################
40
+
41
+ import pyagrum as gum
42
+
43
+ from pyagrum.explain._ShallValues import ShallValues
44
+ from pyagrum.explain._CustomShapleyCache import CustomShapleyCache
45
+ from pyagrum.explain._FIFOCache import FIFOCache
46
+ from pyagrum.explain._ComputationConditional import ConditionalComputation
47
+
48
+ import numpy as np
49
+ from warnings import warn
50
+
51
+
52
+ class ConditionalShallValues(ShallValues, ConditionalComputation):
53
+ """
54
+ The ConditionalShallValues class computes the conditional Shall values in a Bayesian Network.
55
+ """
56
+
57
+ def __init__(self, bn: gum.BayesNet, background: tuple | None, sample_size: int = 1000, log: bool = True):
58
+ """
59
+ Note 1 : All rows in the background data that contain NaN values in columns corresponding to variables in the Bayesian Network will be dropped.
60
+ Note 2 : For small databases SHALL values can be incorrect.
61
+
62
+ Parameters:
63
+ ------
64
+ bn : pyagrum.BayesNet
65
+ The Bayesian Network.
66
+ background : tuple[pandas.DataFrame, bool] | None
67
+ A tuple containing a pandas DataFrame and a boolean indicating whether the DataFrame includes labels or positional values.
68
+ sample_size : int
69
+ The size of the background sample to generate if `background` is None.
70
+ log : bool
71
+ If True, applies a logarithmic transformation to the probabilities.
72
+
73
+ Raises
74
+ ------
75
+ TypeError : If bn is not a gum.BayesNet instance, background is not a tuple.
76
+ ValueError : If background data does not contain all variables present in the Bayesian Network or if
77
+ background data is empty after rows with NaNs were dropped.
78
+
79
+ Raises:
80
+ ------
81
+ TypeError : If bn is not a gum.BayesNet instance, background is not a tuple.
82
+ ValueError : If background data does not contain all variables present in the Bayesian Network or if
83
+ background data is empty after rows with NaNs were dropped.
84
+ """
85
+
86
+ super().__init__(bn, background, sample_size, log) # Initializes the ShapleyValues class.
87
+ self.baseline = self._value(
88
+ data=self._data,
89
+ counts=self.counts,
90
+ elements=self.vars_ids,
91
+ sigma=[],
92
+ cache=FIFOCache(100),
93
+ func1=self._joint,
94
+ params1={},
95
+ func2=self._weight,
96
+ params2={},
97
+ )
98
+
99
+ def _coalition_contribution(self, k, ex, feature, nodes_id, nodes_vals, cache, fifo_cache):
100
+ # key2 is always set since subsets are sorted by length
101
+ key1, key2, _ = cache.generate_keys(self.bn, None, feature, nodes_id)
102
+ # key1 : nodes_id, key2 : nodes id without feature
103
+ if k == 0:
104
+ idx = self._extract(self._data, nodes_id, nodes_vals)
105
+ # warn(f"Extracted database is empty ({self.feat_names[nodes_id]} = {nodes_vals}). Conditional SHALL values may be incorrect. ")
106
+ cache.set(
107
+ ex,
108
+ key1,
109
+ self._value(
110
+ data=self._data[idx],
111
+ counts=self.counts[idx],
112
+ elements=self.vars_ids,
113
+ sigma=[],
114
+ cache=fifo_cache,
115
+ func1=self._joint,
116
+ params1={},
117
+ func2=self._weight,
118
+ params2={},
119
+ ),
120
+ )
121
+
122
+ joint_prob_with = cache.get(ex, key1) # with feature
123
+ joint_prob_without = cache.get(ex, key2) if len(key1) > 1 else cache.get(-1, "") # without feature
124
+ return (joint_prob_with - joint_prob_without) / self._invcoeff_shap(len(self.vars_ids), len(nodes_id) - 1)
125
+
126
+ def _shall_1dim(self, x) -> np.ndarray:
127
+ contributions = np.zeros((self.M)) # Initializes contributions array.
128
+ fifo_cache = FIFOCache(2000)
129
+ cache = CustomShapleyCache(5000) # Initializes the custom cache.
130
+ cache.set(-1, "", self.baseline) # Sets the baseline probability in the cache.
131
+ coalitions = self._coalitions(self.vars_ids) # Generates coalitions.
132
+ for nodes_id in coalitions:
133
+ nodes_vals = x[nodes_id] # Gets the values of the nodes in the coalition.
134
+ for k, feature in enumerate(nodes_id):
135
+ # Accumulates the contribution for each feature.
136
+ contributions[feature] += self._coalition_contribution(
137
+ k, 0, int(feature), nodes_id, nodes_vals, cache, fifo_cache
138
+ )
139
+ return contributions
140
+
141
+ def _shall_ndim(self, x) -> np.ndarray:
142
+ contributions = np.zeros((self.M, len(x))) # Initializes contributions array.
143
+ fifo_cache = FIFOCache(2000)
144
+ cache = CustomShapleyCache(5000) # Initializes the custom cache.
145
+ cache.set(-1, "", self.baseline) # Sets the baseline probability in the cache.
146
+
147
+ coalitions = self._coalitions(self.vars_ids) # Generates coalitions.
148
+ for i, nodes_id in enumerate(coalitions):
149
+ data_vals = x[:, nodes_id] # Gets the values of the nodes in the coalition.
150
+ for ex, nodes_vals in enumerate(data_vals):
151
+ for k, feature in enumerate(nodes_id):
152
+ contributions[feature, ex] += self._coalition_contribution(
153
+ k, ex, int(feature), nodes_id, nodes_vals, cache, fifo_cache
154
+ )
155
+ return contributions
@@ -0,0 +1,155 @@
1
+ ############################################################################
2
+ # This file is part of the aGrUM/pyAgrum library. #
3
+ # #
4
+ # Copyright (c) 2005-2025 by #
5
+ # - Pierre-Henri WUILLEMIN(_at_LIP6) #
6
+ # - Christophe GONZALES(_at_AMU) #
7
+ # #
8
+ # The aGrUM/pyAgrum library is free software; you can redistribute it #
9
+ # and/or modify it under the terms of either : #
10
+ # #
11
+ # - the GNU Lesser General Public License as published by #
12
+ # the Free Software Foundation, either version 3 of the License, #
13
+ # or (at your option) any later version, #
14
+ # - the MIT license (MIT), #
15
+ # - or both in dual license, as here. #
16
+ # #
17
+ # (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) #
18
+ # #
19
+ # This aGrUM/pyAgrum library is distributed in the hope that it will be #
20
+ # useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, #
21
+ # INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS #
22
+ # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE #
23
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER #
24
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, #
25
+ # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR #
26
+ # OTHER DEALINGS IN THE SOFTWARE. #
27
+ # #
28
+ # See LICENCES for more details. #
29
+ # #
30
+ # SPDX-FileCopyrightText: Copyright 2005-2025 #
31
+ # - Pierre-Henri WUILLEMIN(_at_LIP6) #
32
+ # - Christophe GONZALES(_at_AMU) #
33
+ # SPDX-License-Identifier: LGPL-3.0-or-later OR MIT #
34
+ # #
35
+ # Contact : info_at_agrum_dot_org #
36
+ # homepage : http://agrum.gitlab.io #
37
+ # gitlab : https://gitlab.com/agrumery/agrum #
38
+ # #
39
+ ############################################################################
40
+
41
+ import pyagrum as gum
42
+ from pyagrum.explain._ShallValues import ShallValues
43
+ from pyagrum.explain._ComputationMarginal import MarginalComputation
44
+ from pyagrum.explain._CustomShapleyCache import CustomShapleyCache
45
+ from pyagrum.explain._FIFOCache import FIFOCache
46
+
47
+
48
+ import numpy as np
49
+
50
+
51
+ class MarginalShallValues(ShallValues, MarginalComputation):
52
+ """
53
+ The MarginalShallValues class computes the Marginal Shall values in a Bayesian Network.
54
+ """
55
+
56
+ def __init__(self, bn: gum.BayesNet, background: tuple | None, sample_size: int = 1000, log: bool = True):
57
+ """
58
+ Note: All rows in the background data that contain NaN values in columns corresponding to variables in the Bayesian Network will be dropped.
59
+
60
+ Parameters:
61
+ ------
62
+ bn : pyagrum.BayesNet
63
+ The Bayesian Network.
64
+ background : tuple[pandas.DataFrame, bool] | None
65
+ A tuple containing a pandas DataFrame and a boolean indicating whether the DataFrame includes labels or positional values.
66
+ sample_size : int
67
+ The size of the background sample to generate if `background` is None.
68
+ log : bool
69
+ If True, applies a logarithmic transformation to the probabilities.
70
+
71
+ Raises
72
+ ------
73
+ TypeError : If bn is not a gum.BayesNet instance, background is not a tuple.
74
+ ValueError : If background data does not contain all variables present in the Bayesian Network or if
75
+ background data is empty after rows with NaNs were dropped.
76
+
77
+ Raises:
78
+ ------
79
+ TypeError : If bn is not a gum.BayesNet instance, background is not a tuple.
80
+ ValueError : If background data does not contain all variables present in the Bayesian Network or if
81
+ background data is empty after rows with NaNs were dropped.
82
+ """
83
+
84
+ super().__init__(bn, background, sample_size, log)
85
+
86
+ self.baseline = self._value(
87
+ data=self._data,
88
+ counts=self.counts,
89
+ elements=self.vars_ids,
90
+ sigma=[],
91
+ cache=FIFOCache(100),
92
+ func1=self._joint,
93
+ params1={},
94
+ func2=self._weight,
95
+ params2={},
96
+ )
97
+
98
+ def _coalition_contribution(self, k, ex, feature, fifo_cache, nodes_id, nodes_vals, cache):
99
+ key1, key2, _ = cache.generate_keys(self.bn, None, feature, nodes_id)
100
+ if k == 0:
101
+ interv = self._data.copy()
102
+ interv[:, nodes_id] = nodes_vals
103
+ cache.set(
104
+ ex,
105
+ key1,
106
+ self._value(
107
+ data=interv,
108
+ counts=self.counts,
109
+ elements=self.vars_ids,
110
+ sigma=[],
111
+ cache=fifo_cache,
112
+ func1=self._joint,
113
+ params1={},
114
+ func2=self._weight,
115
+ params2={},
116
+ ),
117
+ )
118
+
119
+ joint_prob_with = cache.get(ex, key1)
120
+ joint_prob_without = cache.get(ex, key2) if len(key1) > 1 else cache.get(-1, ())
121
+ return (joint_prob_with - joint_prob_without) / self._invcoeff_shap(len(self.vars_ids), len(nodes_id) - 1)
122
+
123
+ def _shall_1dim(self, x):
124
+ # Result initialisation.
125
+ contributions = np.zeros(self.M)
126
+ # Cache management.
127
+ fifo_cache = FIFOCache(2000)
128
+ custom_cache = CustomShapleyCache(5000)
129
+ # Sets the baseline probability in the cache.
130
+ custom_cache.set(-1, (), self.baseline)
131
+ coalitions = self._coalitions(self.vars_ids)
132
+ for nodes_id in coalitions:
133
+ nodes_vals = x[nodes_id]
134
+ for k, feature in enumerate(nodes_id):
135
+ contributions[feature] += self._coalition_contribution(
136
+ k, 0, int(feature), fifo_cache, nodes_id, nodes_vals, custom_cache
137
+ )
138
+ return contributions
139
+
140
+ def _shall_ndim(self, x):
141
+ # Result initialisation.
142
+ contributions = np.zeros((self.M, len(x)))
143
+ # Cache management.
144
+ fifo_cache = FIFOCache(2000)
145
+ custom_cache = CustomShapleyCache(5000)
146
+ # Sets the baseline probability in the cache.
147
+ custom_cache.set(-1, (), self.baseline)
148
+ coalitions = self._coalitions(self.vars_ids)
149
+ for nodes_id in coalitions:
150
+ for ex, nodes_values in enumerate(x[:, nodes_id]):
151
+ for k, feature in enumerate(nodes_id):
152
+ contributions[feature, ex] += self._coalition_contribution(
153
+ k, ex, int(feature), fifo_cache, nodes_id, nodes_values, custom_cache
154
+ )
155
+ return contributions
@@ -0,0 +1,296 @@
1
+ from abc import abstractmethod
2
+
3
+ ############################################################################
4
+ # This file is part of the aGrUM/pyAgrum library. #
5
+ # #
6
+ # Copyright (c) 2005-2025 by #
7
+ # - Pierre-Henri WUILLEMIN(_at_LIP6) #
8
+ # - Christophe GONZALES(_at_AMU) #
9
+ # #
10
+ # The aGrUM/pyAgrum library is free software; you can redistribute it #
11
+ # and/or modify it under the terms of either : #
12
+ # #
13
+ # - the GNU Lesser General Public License as published by #
14
+ # the Free Software Foundation, either version 3 of the License, #
15
+ # or (at your option) any later version, #
16
+ # - the MIT license (MIT), #
17
+ # - or both in dual license, as here. #
18
+ # #
19
+ # (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) #
20
+ # #
21
+ # This aGrUM/pyAgrum library is distributed in the hope that it will be #
22
+ # useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, #
23
+ # INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS #
24
+ # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE #
25
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER #
26
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, #
27
+ # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR #
28
+ # OTHER DEALINGS IN THE SOFTWARE. #
29
+ # #
30
+ # See LICENCES for more details. #
31
+ # #
32
+ # SPDX-FileCopyrightText: Copyright 2005-2025 #
33
+ # - Pierre-Henri WUILLEMIN(_at_LIP6) #
34
+ # - Christophe GONZALES(_at_AMU) #
35
+ # SPDX-License-Identifier: LGPL-3.0-or-later OR MIT #
36
+ # #
37
+ # Contact : info_at_agrum_dot_org #
38
+ # homepage : http://agrum.gitlab.io #
39
+ # gitlab : https://gitlab.com/agrumery/agrum #
40
+ # #
41
+ ############################################################################
42
+
43
+ import pyagrum as gum
44
+ from pyagrum.explain._Explainer import Explainer
45
+ from pyagrum.explain._Explanation import Explanation
46
+
47
+ import pandas as pd
48
+ import numpy as np
49
+ import warnings
50
+
51
+
52
+ class ShallValues(Explainer):
53
+ """
54
+ The ShallValues class is an abstract base class for computing Shall values in a Bayesian Network.
55
+ """
56
+
57
+ def __init__(self, bn: gum.BayesNet, background: tuple | None, sample_size: int = 1000, log: bool = True):
58
+ """
59
+ Note: All rows in the background data that contain NaN values in columns corresponding to variables in the Bayesian Network will be dropped.
60
+
61
+ Parameters
62
+ ----------
63
+ bn : pyagrum.BayesNet
64
+ The Bayesian Network.
65
+ background : tuple[pandas.DataFrame, bool] | None
66
+ A tuple containing a pandas DataFrame and a boolean indicating whether the DataFrame includes labels or positional values.
67
+ sample_size : int
68
+ The size of the background sample to generate if `background` is None.
69
+ log : bool
70
+ If True, applies a logarithmic transformation to the probabilities.
71
+
72
+ Raises
73
+ ------
74
+ TypeError : If bn is not a gum.BayesNet instance, background is not a tuple.
75
+ ValueError : If background data does not contain all variables present in the Bayesian Network or if
76
+ background data is empty after rows with NaNs were dropped.
77
+ """
78
+
79
+ super().__init__(bn)
80
+ self.vars_ids = sorted(bn.ids(self.feat_names))
81
+
82
+ # Processing background data
83
+ if background is None:
84
+ if not isinstance(sample_size, int):
85
+ raise TypeError("When `data`=None, `sample_size` must be an integer, but got {}".format(type(sample_size)))
86
+ else:
87
+ if sample_size < 1:
88
+ raise ValueError("`sample_size` must be greater than 1, but got {}".format(sample_size))
89
+ elif sample_size < 10:
90
+ warnings.warn("The sample size is small, which may lead to biased Shapley values.")
91
+ data = gum.generateSample(self.bn, sample_size, with_labels=False)[0].reindex(columns=self.feat_names).to_numpy()
92
+ else:
93
+ if not isinstance(background, tuple):
94
+ raise TypeError(f"`background` must be a tuple (pd.DataFrame, bool).")
95
+ data, with_labels = background
96
+ if not isinstance(with_labels, bool):
97
+ warnings.warn(
98
+ f"The second element of `background` should be a boolean, but got {type(with_labels)}. Unexpected calculations may occur."
99
+ )
100
+ if not isinstance(data, pd.DataFrame):
101
+ raise TypeError("The first element of `background` must be a pandas DataFrame, but got {}".format(type(data)))
102
+ if data.shape[0] < 2:
103
+ warnings.warn("You are giving a single row as a background data, which will lead to biased Shapley values.")
104
+ if data.shape[1] != self.M:
105
+ raise ValueError(
106
+ "The number of columns in the background data must match the number of variables in the Bayesian network. Although values outside the Markov blanket, including the target, are unused, they are required for indexing purposes."
107
+ )
108
+ data = data.reindex(columns=self.feat_names).dropna(axis=0).to_numpy()
109
+ if with_labels:
110
+ data = self._labelToPos_df(data, self.vars_ids)
111
+
112
+ self._N = len(data)
113
+ if self._N == 0:
114
+ raise ValueError("Background data can't be empty.")
115
+
116
+ self._data, self.counts = np.unique(data, axis=0, return_counts=True)
117
+ self.func = self._log if log else self._identity
118
+
119
+ # For jointProbability
120
+ self.inst = gum.Instantiation()
121
+ for var in self.bn.ids(self.feat_names):
122
+ self.inst.add(self.bn.variable(var))
123
+
124
+ # Note: We use BayesNet.jointProbability instead of lazyPropagation.evidenceProbability because joint probability is much faster.
125
+ def _joint(self, row_values):
126
+ self.inst.fromdict(row_values)
127
+ return self.func(self.bn.jointProbability(self.inst))
128
+
129
+ @abstractmethod
130
+ def _shall_1dim(self, x, elements):
131
+ # Computes the Shall values for a single instance.
132
+ # This method should be implemented in subclasses.
133
+ raise NotImplementedError("This method should be implemented in subclasses.")
134
+
135
+ @abstractmethod
136
+ def _shall_ndim(self, x, elements):
137
+ # Computes the Shall values for multiple instances.
138
+ # This method should be implemented in subclasses.
139
+ raise NotImplementedError("This method should be implemented in subclasses.")
140
+
141
+ def compute(self, data: tuple | None, N: int = 100):
142
+ """
143
+ Computes the SHALL values for all rows in the provided data.
144
+
145
+ Note 1: Since this is a partial explanation, all rows in `data` must contain all variables present in the initialized Bayesian Network.
146
+ Note 2: All rows containing NaN values in columns corresponding to variables in the Bayesian Network will be dropped.
147
+
148
+ Parameters
149
+ ----------
150
+ data : tuple | None
151
+ A tuple containing either a pandas DataFrame, Series, or dictionary, and a boolean indicating whether labels are provided.
152
+ If None, a random sample of size N is generated.
153
+ N : int
154
+ The number of samples to generate if data is None.
155
+
156
+ Returns
157
+ -------
158
+ Explanation
159
+ An Explanation object containing the SHALL values and variable importances for each row in the data, after rows with NaN values have been dropped.
160
+
161
+ Raises
162
+ ------
163
+ TypeError : If the first element of data is not a pd.DataFrame, pd.Series or dict, or if N is not an integer when data is None.
164
+ ValueError : If N is less than 2 when data is None, or if the provided data does not contain all variables present in the initialized Bayesian Network.
165
+ """
166
+
167
+ # Note : elements (like in ShapValues are no longer needed since partial explanation is impossible)
168
+ if data is None:
169
+ if not isinstance(N, int):
170
+ raise TypeError("Since df is None, N must be an integer, but got {}".format(type(N)))
171
+ if N < 2:
172
+ raise ValueError("N must be greater than 1, but got {}".format(N))
173
+ y = gum.generateSample(self.bn, N, with_labels=False)[0].reindex(columns=self.feat_names).to_numpy()
174
+ # Remove duplicate rows in generated data
175
+ _, idx = np.unique(y, axis=0, return_index=True)
176
+ y = y[idx, :]
177
+ contributions = self._shall_ndim(y)
178
+ else:
179
+ if not isinstance(data, tuple):
180
+ raise TypeError(f"`data` must be a tuple (pd.DataFrame, bool).")
181
+ df, with_labels = data
182
+ if not isinstance(with_labels, bool):
183
+ warnings.warn(
184
+ f"The second element of `data` should be a boolean, but got {type(with_labels)}. Unexpected calculations may occur."
185
+ )
186
+ dtype = object if with_labels else int
187
+
188
+ if isinstance(df, pd.Series):
189
+ # Here we are sure that df is a single instance (a Series).
190
+ if np.setdiff1d(self.feat_names, df.index).size != 0:
191
+ raise ValueError(
192
+ "For SHALL values, you must provide all variables used in the Bayesian Network; passing only a subset is not allowed."
193
+ )
194
+
195
+ x = df.reindex(self.feat_names).dropna().to_numpy()
196
+ if x.size == 0:
197
+ raise ValueError("DataFrame is empty")
198
+
199
+ y = self._labelToPos_row(x, self.vars_ids) if with_labels else x
200
+ contributions = self._shall_1dim(y)
201
+
202
+ elif isinstance(df, pd.DataFrame):
203
+ if np.setdiff1d(self.feat_names, df.columns).size != 0:
204
+ raise ValueError(
205
+ "For SHALL values, you must provide all variables used in the Bayesian Network; passing only a subset is not allowed."
206
+ )
207
+
208
+ df_clean = df.dropna(axis=0, subset=self.feat_names)
209
+ if len(df_clean) == 1:
210
+ # Here we are sure that df is a single instance (a DataFrame with one row).
211
+ x = df_clean.reindex(columns=self.feat_names).to_numpy()[0]
212
+ if x.size == 0:
213
+ raise ValueError("DataFrame is empty")
214
+ y = self._labelToPos_row(x, self.vars_ids) if with_labels else x
215
+ contributions = self._shall_1dim(y)
216
+
217
+ else:
218
+ x = df.reindex(columns=self.feat_names).to_numpy()
219
+ if x.size == 0:
220
+ raise ValueError("DataFrame is empty")
221
+ y = self._labelToPos_df(x, self.vars_ids) if with_labels else x
222
+ _, idx = np.unique(y, axis=0, return_index=True)
223
+ y = y[idx, :]
224
+ contributions = self._shall_ndim(y)
225
+
226
+ elif isinstance(df, dict):
227
+ if len(set(self.feat_names) - set(df.keys())) != 0:
228
+ raise ValueError(
229
+ "For SHALL values, you must provide all variables used in the Bayesian Network; passing only a subset is not allowed."
230
+ )
231
+
232
+ try:
233
+ N = len(list(df.values())[0])
234
+ if not isinstance(list(df.values())[0], (list, np.ndarray)):
235
+ raise TypeError("Each value in the dictionary must be a list or a numpy array.")
236
+
237
+ x = np.empty((N, self.M), dtype=dtype)
238
+ for feat in df.keys():
239
+ id = self.bn.idFromName(feat)
240
+ x[:, id] = df[feat]
241
+ mask = [
242
+ all(x is not None and not (isinstance(x, float) and np.isnan(val)) for val in row) for row in x
243
+ ] # drop lines with None and np.nan
244
+ x = x[mask]
245
+ if x.size == 0:
246
+ raise ValueError("DataFrame is empty")
247
+ # Remove duplicate rows in x and unused columns.
248
+ y = self._labelToPos_df(x, self.vars_ids) if with_labels else x
249
+ _, idx = np.unique(y, axis=0, return_index=True)
250
+ y = y[idx, :]
251
+ contributions = self._shall_ndim(y)
252
+
253
+ except TypeError:
254
+ # Here we are sure that df is a single instance (a dictionary with one row).
255
+ x = np.empty(self.M, dtype=dtype)
256
+ for feat in df.keys():
257
+ if not (df[feat] is None):
258
+ id = self.bn.idFromName(feat)
259
+ x[id] = df[feat]
260
+ if x.size == 0:
261
+ raise ValueError("DataFrame is empty")
262
+ y = self._labelToPos_row(x, self.vars_ids) if with_labels else x
263
+ contributions = self._shall_1dim(y)
264
+
265
+ else:
266
+ raise TypeError(
267
+ "The first element of `data` must be a pandas DataFrame, Series or a dictionary, but got {}".format(type(df))
268
+ )
269
+
270
+ if contributions.ndim == 1:
271
+ values = {self.feat_names[i]: float(contributions[i]) for i in self.vars_ids}
272
+ importances = {self.feat_names[i]: abs(float(contributions[i])) for i in self.vars_ids}
273
+
274
+ explanation = Explanation(
275
+ values,
276
+ importances,
277
+ list(self.feat_names[self.vars_ids]),
278
+ x[self.vars_ids],
279
+ self.baseline,
280
+ self.func.__name__,
281
+ "SHALL",
282
+ )
283
+ else:
284
+ values = {self.feat_names[i]: [float(v) for v in contributions[i, :]] for i in self.vars_ids}
285
+ mean_abs = np.mean(np.abs(contributions), axis=1)
286
+ importances = {self.feat_names[i]: abs(float(mean_abs[i])) for i in self.vars_ids}
287
+ explanation = Explanation(
288
+ values,
289
+ importances,
290
+ list(self.feat_names[self.vars_ids]),
291
+ y[:, self.vars_ids],
292
+ self.baseline,
293
+ self.func.__name__,
294
+ "SHALL",
295
+ )
296
+ return explanation