pyAgrum-nightly 2.3.0.9.dev202512061764412981__cp310-abi3-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyagrum/__init__.py +165 -0
- pyagrum/_pyagrum.so +0 -0
- pyagrum/bnmixture/BNMInference.py +268 -0
- pyagrum/bnmixture/BNMLearning.py +376 -0
- pyagrum/bnmixture/BNMixture.py +464 -0
- pyagrum/bnmixture/__init__.py +60 -0
- pyagrum/bnmixture/notebook.py +1058 -0
- pyagrum/causal/_CausalFormula.py +280 -0
- pyagrum/causal/_CausalModel.py +436 -0
- pyagrum/causal/__init__.py +81 -0
- pyagrum/causal/_causalImpact.py +356 -0
- pyagrum/causal/_dSeparation.py +598 -0
- pyagrum/causal/_doAST.py +761 -0
- pyagrum/causal/_doCalculus.py +361 -0
- pyagrum/causal/_doorCriteria.py +374 -0
- pyagrum/causal/_exceptions.py +95 -0
- pyagrum/causal/_types.py +61 -0
- pyagrum/causal/causalEffectEstimation/_CausalEffectEstimation.py +1175 -0
- pyagrum/causal/causalEffectEstimation/_IVEstimators.py +718 -0
- pyagrum/causal/causalEffectEstimation/_RCTEstimators.py +132 -0
- pyagrum/causal/causalEffectEstimation/__init__.py +46 -0
- pyagrum/causal/causalEffectEstimation/_backdoorEstimators.py +774 -0
- pyagrum/causal/causalEffectEstimation/_causalBNEstimator.py +324 -0
- pyagrum/causal/causalEffectEstimation/_frontdoorEstimators.py +396 -0
- pyagrum/causal/causalEffectEstimation/_learners.py +118 -0
- pyagrum/causal/causalEffectEstimation/_utils.py +466 -0
- pyagrum/causal/notebook.py +171 -0
- pyagrum/clg/CLG.py +658 -0
- pyagrum/clg/GaussianVariable.py +111 -0
- pyagrum/clg/SEM.py +312 -0
- pyagrum/clg/__init__.py +63 -0
- pyagrum/clg/canonicalForm.py +408 -0
- pyagrum/clg/constants.py +54 -0
- pyagrum/clg/forwardSampling.py +202 -0
- pyagrum/clg/learning.py +776 -0
- pyagrum/clg/notebook.py +480 -0
- pyagrum/clg/variableElimination.py +271 -0
- pyagrum/common.py +60 -0
- pyagrum/config.py +319 -0
- pyagrum/ctbn/CIM.py +513 -0
- pyagrum/ctbn/CTBN.py +573 -0
- pyagrum/ctbn/CTBNGenerator.py +216 -0
- pyagrum/ctbn/CTBNInference.py +459 -0
- pyagrum/ctbn/CTBNLearner.py +161 -0
- pyagrum/ctbn/SamplesStats.py +671 -0
- pyagrum/ctbn/StatsIndepTest.py +355 -0
- pyagrum/ctbn/__init__.py +79 -0
- pyagrum/ctbn/constants.py +54 -0
- pyagrum/ctbn/notebook.py +264 -0
- pyagrum/defaults.ini +199 -0
- pyagrum/deprecated.py +95 -0
- pyagrum/explain/_ComputationCausal.py +75 -0
- pyagrum/explain/_ComputationConditional.py +48 -0
- pyagrum/explain/_ComputationMarginal.py +48 -0
- pyagrum/explain/_CustomShapleyCache.py +110 -0
- pyagrum/explain/_Explainer.py +176 -0
- pyagrum/explain/_Explanation.py +70 -0
- pyagrum/explain/_FIFOCache.py +54 -0
- pyagrum/explain/_ShallCausalValues.py +204 -0
- pyagrum/explain/_ShallConditionalValues.py +155 -0
- pyagrum/explain/_ShallMarginalValues.py +155 -0
- pyagrum/explain/_ShallValues.py +296 -0
- pyagrum/explain/_ShapCausalValues.py +208 -0
- pyagrum/explain/_ShapConditionalValues.py +126 -0
- pyagrum/explain/_ShapMarginalValues.py +191 -0
- pyagrum/explain/_ShapleyValues.py +298 -0
- pyagrum/explain/__init__.py +81 -0
- pyagrum/explain/_explGeneralizedMarkovBlanket.py +152 -0
- pyagrum/explain/_explIndependenceListForPairs.py +146 -0
- pyagrum/explain/_explInformationGraph.py +264 -0
- pyagrum/explain/notebook/__init__.py +54 -0
- pyagrum/explain/notebook/_bar.py +142 -0
- pyagrum/explain/notebook/_beeswarm.py +174 -0
- pyagrum/explain/notebook/_showShapValues.py +97 -0
- pyagrum/explain/notebook/_waterfall.py +220 -0
- pyagrum/explain/shapley.py +225 -0
- pyagrum/lib/__init__.py +46 -0
- pyagrum/lib/_colors.py +390 -0
- pyagrum/lib/bn2graph.py +299 -0
- pyagrum/lib/bn2roc.py +1026 -0
- pyagrum/lib/bn2scores.py +217 -0
- pyagrum/lib/bn_vs_bn.py +605 -0
- pyagrum/lib/cn2graph.py +305 -0
- pyagrum/lib/discreteTypeProcessor.py +1102 -0
- pyagrum/lib/discretizer.py +58 -0
- pyagrum/lib/dynamicBN.py +390 -0
- pyagrum/lib/explain.py +57 -0
- pyagrum/lib/export.py +84 -0
- pyagrum/lib/id2graph.py +258 -0
- pyagrum/lib/image.py +387 -0
- pyagrum/lib/ipython.py +307 -0
- pyagrum/lib/mrf2graph.py +471 -0
- pyagrum/lib/notebook.py +1821 -0
- pyagrum/lib/proba_histogram.py +552 -0
- pyagrum/lib/utils.py +138 -0
- pyagrum/pyagrum.py +31495 -0
- pyagrum/skbn/_MBCalcul.py +242 -0
- pyagrum/skbn/__init__.py +49 -0
- pyagrum/skbn/_learningMethods.py +282 -0
- pyagrum/skbn/_utils.py +297 -0
- pyagrum/skbn/bnclassifier.py +1014 -0
- pyagrum_nightly-2.3.0.9.dev202512061764412981.dist-info/LICENSE.md +12 -0
- pyagrum_nightly-2.3.0.9.dev202512061764412981.dist-info/LICENSES/LGPL-3.0-or-later.txt +304 -0
- pyagrum_nightly-2.3.0.9.dev202512061764412981.dist-info/LICENSES/MIT.txt +18 -0
- pyagrum_nightly-2.3.0.9.dev202512061764412981.dist-info/METADATA +145 -0
- pyagrum_nightly-2.3.0.9.dev202512061764412981.dist-info/RECORD +107 -0
- pyagrum_nightly-2.3.0.9.dev202512061764412981.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
############################################################################
|
|
2
|
+
# This file is part of the aGrUM/pyAgrum library. #
|
|
3
|
+
# #
|
|
4
|
+
# Copyright (c) 2005-2025 by #
|
|
5
|
+
# - Pierre-Henri WUILLEMIN(_at_LIP6) #
|
|
6
|
+
# - Christophe GONZALES(_at_AMU) #
|
|
7
|
+
# #
|
|
8
|
+
# The aGrUM/pyAgrum library is free software; you can redistribute it #
|
|
9
|
+
# and/or modify it under the terms of either : #
|
|
10
|
+
# #
|
|
11
|
+
# - the GNU Lesser General Public License as published by #
|
|
12
|
+
# the Free Software Foundation, either version 3 of the License, #
|
|
13
|
+
# or (at your option) any later version, #
|
|
14
|
+
# - the MIT license (MIT), #
|
|
15
|
+
# - or both in dual license, as here. #
|
|
16
|
+
# #
|
|
17
|
+
# (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) #
|
|
18
|
+
# #
|
|
19
|
+
# This aGrUM/pyAgrum library is distributed in the hope that it will be #
|
|
20
|
+
# useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, #
|
|
21
|
+
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS #
|
|
22
|
+
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE #
|
|
23
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER #
|
|
24
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, #
|
|
25
|
+
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR #
|
|
26
|
+
# OTHER DEALINGS IN THE SOFTWARE. #
|
|
27
|
+
# #
|
|
28
|
+
# See LICENCES for more details. #
|
|
29
|
+
# #
|
|
30
|
+
# SPDX-FileCopyrightText: Copyright 2005-2025 #
|
|
31
|
+
# - Pierre-Henri WUILLEMIN(_at_LIP6) #
|
|
32
|
+
# - Christophe GONZALES(_at_AMU) #
|
|
33
|
+
# SPDX-License-Identifier: LGPL-3.0-or-later OR MIT #
|
|
34
|
+
# #
|
|
35
|
+
# Contact : info_at_agrum_dot_org #
|
|
36
|
+
# homepage : http://agrum.gitlab.io #
|
|
37
|
+
# gitlab : https://gitlab.com/agrumery/agrum #
|
|
38
|
+
# #
|
|
39
|
+
############################################################################
|
|
40
|
+
|
|
41
|
+
import pyagrum as gum
|
|
42
|
+
from ._utils import _listIdtoName as listIdtoName
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def compileMarkovBlanket(bn, target):
|
|
46
|
+
"""
|
|
47
|
+
Create a Bayesian network with the children, their parents and the parents of the node target
|
|
48
|
+
|
|
49
|
+
Parameters
|
|
50
|
+
----------
|
|
51
|
+
bn: pyagrum.BayesNet
|
|
52
|
+
Bayesian network to work on
|
|
53
|
+
target: str or int
|
|
54
|
+
Name or id of the target
|
|
55
|
+
Returns
|
|
56
|
+
-------
|
|
57
|
+
MarkovBlanket: pyagrum.BayesNet
|
|
58
|
+
Markov Blanket from bn
|
|
59
|
+
"""
|
|
60
|
+
mb = gum.BayesNet("MarkovBlanket")
|
|
61
|
+
|
|
62
|
+
# add target to Markov Blanket
|
|
63
|
+
mb.add(bn.variable(target))
|
|
64
|
+
|
|
65
|
+
# list of target's children
|
|
66
|
+
children = listIdtoName(bn, list(bn.children(target)))
|
|
67
|
+
|
|
68
|
+
# list of target's parents
|
|
69
|
+
parents = listIdtoName(bn, list(bn.parents(target)))
|
|
70
|
+
|
|
71
|
+
for c in children:
|
|
72
|
+
# list of c's parents
|
|
73
|
+
parents_child = listIdtoName(bn, list(bn.parents(c)))
|
|
74
|
+
|
|
75
|
+
# if c is not already in Markov Blanket
|
|
76
|
+
if c not in mb.names():
|
|
77
|
+
# add c in Markov Blanket
|
|
78
|
+
mb.add(bn.variable(c))
|
|
79
|
+
|
|
80
|
+
# create arc between target and his child c
|
|
81
|
+
mb.addArc(target, c)
|
|
82
|
+
|
|
83
|
+
# add c's parents in Markov Blanket
|
|
84
|
+
for pc in parents_child:
|
|
85
|
+
# if pc is a target's parent
|
|
86
|
+
if pc in mb.names():
|
|
87
|
+
if pc != target:
|
|
88
|
+
mb.addArc(pc, c)
|
|
89
|
+
continue
|
|
90
|
+
|
|
91
|
+
# add pc in Markov Blanket
|
|
92
|
+
mb.add(bn.variable(pc))
|
|
93
|
+
|
|
94
|
+
# if pc is not a children, his cpt doesn't matter (use for predict)
|
|
95
|
+
if pc not in children:
|
|
96
|
+
mb.cpt(pc).fillWith(1).normalize()
|
|
97
|
+
else:
|
|
98
|
+
mb.addArc(target, pc)
|
|
99
|
+
|
|
100
|
+
# create arc between c and his parent pc
|
|
101
|
+
mb.addArc(pc, c)
|
|
102
|
+
|
|
103
|
+
for p in parents:
|
|
104
|
+
# if p is not already in Markov Blanket
|
|
105
|
+
if p in mb.names():
|
|
106
|
+
# create arc between target and his parent p
|
|
107
|
+
mb.addArc(p, target)
|
|
108
|
+
continue
|
|
109
|
+
|
|
110
|
+
# add p in Markov Blanket
|
|
111
|
+
mb.add(bn.variable(p))
|
|
112
|
+
|
|
113
|
+
# cpt doesn't matter for parents
|
|
114
|
+
mb.cpt(p).fillWith(1).normalize()
|
|
115
|
+
|
|
116
|
+
# create arc between target and his parent p
|
|
117
|
+
mb.addArc(p, target)
|
|
118
|
+
|
|
119
|
+
# update cpt for target and his children
|
|
120
|
+
mb.cpt(target).fillWith(bn.cpt(target))
|
|
121
|
+
for i in children:
|
|
122
|
+
mb.cpt(i).fillWith(bn.cpt(i))
|
|
123
|
+
|
|
124
|
+
return mb
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _calcul_proba_for_nary_class(row, local_inst, dictName, MarkovBlanket, target):
|
|
128
|
+
"""
|
|
129
|
+
Calculate the posterior distribution of variable target (given its Markov blanket)
|
|
130
|
+
|
|
131
|
+
Parameters
|
|
132
|
+
----------
|
|
133
|
+
row: numpyArray shape: (n features)
|
|
134
|
+
test data
|
|
135
|
+
local_inst: pyagrum.Tensor
|
|
136
|
+
Instantiation of the Markov Blanket EXCEPT the target
|
|
137
|
+
dictName: Dict[str : int]
|
|
138
|
+
dictionary of the name of a variable and his column in the data base
|
|
139
|
+
MarkovBlanket: pyagrum.BayesNet
|
|
140
|
+
Markov Blanket to work on
|
|
141
|
+
target: str
|
|
142
|
+
Name of the target
|
|
143
|
+
Returns
|
|
144
|
+
-------
|
|
145
|
+
proba:
|
|
146
|
+
the probability distribution for target
|
|
147
|
+
"""
|
|
148
|
+
# create Instantiation with Markov Blanket's variables
|
|
149
|
+
for n in MarkovBlanket.names():
|
|
150
|
+
if n == target:
|
|
151
|
+
continue
|
|
152
|
+
local_inst.chgVal(n, str(row[dictName.get(n)]))
|
|
153
|
+
|
|
154
|
+
p = MarkovBlanket.cpt(target).extract(local_inst)
|
|
155
|
+
for i in MarkovBlanket.children(target):
|
|
156
|
+
p *= MarkovBlanket.cpt(i).extract(local_inst)
|
|
157
|
+
p.normalize()
|
|
158
|
+
|
|
159
|
+
return p
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _calcul_most_probable_for_nary_class(row, local_inst, dictName, MarkovBlanket, target):
|
|
163
|
+
"""
|
|
164
|
+
Calculate the most probable class for variable target
|
|
165
|
+
|
|
166
|
+
Parameters
|
|
167
|
+
----------
|
|
168
|
+
row: numpyArray shape: (n features)
|
|
169
|
+
test data
|
|
170
|
+
local_inst: pyagrum.Tensor
|
|
171
|
+
Instantiation of the Markov Blanket EXCEPT the target
|
|
172
|
+
dictName: Dict[str : int]
|
|
173
|
+
dictionary of the name of a variable and his column in the data base
|
|
174
|
+
MarkovBlanket: pyagrum.BayesNet
|
|
175
|
+
Markov Blanket to work on
|
|
176
|
+
target: str
|
|
177
|
+
Name of the target
|
|
178
|
+
Returns
|
|
179
|
+
-------
|
|
180
|
+
Tuple[int,proba]:
|
|
181
|
+
the value and the probability of the most probable class
|
|
182
|
+
"""
|
|
183
|
+
p = _calcul_proba_for_nary_class(row, local_inst, dictName, MarkovBlanket, target)
|
|
184
|
+
return p.argmax(), p.max()
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def _calcul_proba_for_binary_class(row, label1, labels, Inst, dictName, MarkovBlanket, target):
|
|
188
|
+
"""
|
|
189
|
+
Calculate the probability of having label1 to the binary variable y
|
|
190
|
+
|
|
191
|
+
Parameters
|
|
192
|
+
----------
|
|
193
|
+
row: numpyArray shape: (n features)
|
|
194
|
+
test data
|
|
195
|
+
label1:
|
|
196
|
+
the True value of y
|
|
197
|
+
labels:
|
|
198
|
+
the False values of y
|
|
199
|
+
Inst: Tensor
|
|
200
|
+
Instantiation of the Markov Blanket
|
|
201
|
+
dictName: dict[str : int]
|
|
202
|
+
dictionary of the name of a variable and his column in the data base
|
|
203
|
+
MarkovBlanket: pyagrum.BayesNet
|
|
204
|
+
Markov Blanket to work on
|
|
205
|
+
target: str
|
|
206
|
+
Name of the target
|
|
207
|
+
Returns
|
|
208
|
+
-------
|
|
209
|
+
double
|
|
210
|
+
probability of getting label1 to the variable y
|
|
211
|
+
"""
|
|
212
|
+
|
|
213
|
+
# create Instantiation with Markov Blanket's variables
|
|
214
|
+
for n in MarkovBlanket.names():
|
|
215
|
+
if n == target:
|
|
216
|
+
continue
|
|
217
|
+
Inst.chgVal(n, str(row[dictName.get(n)]))
|
|
218
|
+
|
|
219
|
+
# probability of Positive value
|
|
220
|
+
Inst.chgVal(target, str(label1))
|
|
221
|
+
res1 = MarkovBlanket.cpt(target).get(Inst)
|
|
222
|
+
|
|
223
|
+
# probability of Negative value
|
|
224
|
+
res2 = 0.0
|
|
225
|
+
for label0 in labels:
|
|
226
|
+
Inst.chgVal(target, str(label0))
|
|
227
|
+
res2 += MarkovBlanket.cpt(target).get(Inst)
|
|
228
|
+
|
|
229
|
+
# probability for all the children
|
|
230
|
+
for n in MarkovBlanket.children(target):
|
|
231
|
+
Inst.chgVal(target, str(label1))
|
|
232
|
+
res1 = res1 * (MarkovBlanket.cpt(n).get(Inst))
|
|
233
|
+
|
|
234
|
+
tmp = 0.0
|
|
235
|
+
for label0 in labels:
|
|
236
|
+
Inst.chgVal(target, str(label0))
|
|
237
|
+
tmp += MarkovBlanket.cpt(n).get(Inst)
|
|
238
|
+
res2 = res2 * tmp
|
|
239
|
+
|
|
240
|
+
# normalize to have probabilities
|
|
241
|
+
|
|
242
|
+
return res1 / (res1 + res2)
|
pyagrum/skbn/__init__.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
############################################################################
|
|
2
|
+
# This file is part of the aGrUM/pyAgrum library. #
|
|
3
|
+
# #
|
|
4
|
+
# Copyright (c) 2005-2025 by #
|
|
5
|
+
# - Pierre-Henri WUILLEMIN(_at_LIP6) #
|
|
6
|
+
# - Christophe GONZALES(_at_AMU) #
|
|
7
|
+
# #
|
|
8
|
+
# The aGrUM/pyAgrum library is free software; you can redistribute it #
|
|
9
|
+
# and/or modify it under the terms of either : #
|
|
10
|
+
# #
|
|
11
|
+
# - the GNU Lesser General Public License as published by #
|
|
12
|
+
# the Free Software Foundation, either version 3 of the License, #
|
|
13
|
+
# or (at your option) any later version, #
|
|
14
|
+
# - the MIT license (MIT), #
|
|
15
|
+
# - or both in dual license, as here. #
|
|
16
|
+
# #
|
|
17
|
+
# (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) #
|
|
18
|
+
# #
|
|
19
|
+
# This aGrUM/pyAgrum library is distributed in the hope that it will be #
|
|
20
|
+
# useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, #
|
|
21
|
+
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS #
|
|
22
|
+
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE #
|
|
23
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER #
|
|
24
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, #
|
|
25
|
+
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR #
|
|
26
|
+
# OTHER DEALINGS IN THE SOFTWARE. #
|
|
27
|
+
# #
|
|
28
|
+
# See LICENCES for more details. #
|
|
29
|
+
# #
|
|
30
|
+
# SPDX-FileCopyrightText: Copyright 2005-2025 #
|
|
31
|
+
# - Pierre-Henri WUILLEMIN(_at_LIP6) #
|
|
32
|
+
# - Christophe GONZALES(_at_AMU) #
|
|
33
|
+
# SPDX-License-Identifier: LGPL-3.0-or-later OR MIT #
|
|
34
|
+
# #
|
|
35
|
+
# Contact : info_at_agrum_dot_org #
|
|
36
|
+
# homepage : http://agrum.gitlab.io #
|
|
37
|
+
# gitlab : https://gitlab.com/agrumery/agrum #
|
|
38
|
+
# #
|
|
39
|
+
############################################################################
|
|
40
|
+
|
|
41
|
+
""" """
|
|
42
|
+
|
|
43
|
+
__author__ = "Pierre-Henri Wuillemin, Nikola Matevski, Louise Mouillé"
|
|
44
|
+
__copyright__ = "(c) 2020-2024 PARIS"
|
|
45
|
+
|
|
46
|
+
from .bnclassifier import BNClassifier
|
|
47
|
+
from ._MBCalcul import compileMarkovBlanket
|
|
48
|
+
|
|
49
|
+
__all__ = ["BNClassifier", "compileMarkovBlanket"]
|
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
############################################################################
|
|
2
|
+
# This file is part of the aGrUM/pyAgrum library. #
|
|
3
|
+
# #
|
|
4
|
+
# Copyright (c) 2005-2025 by #
|
|
5
|
+
# - Pierre-Henri WUILLEMIN(_at_LIP6) #
|
|
6
|
+
# - Christophe GONZALES(_at_AMU) #
|
|
7
|
+
# #
|
|
8
|
+
# The aGrUM/pyAgrum library is free software; you can redistribute it #
|
|
9
|
+
# and/or modify it under the terms of either : #
|
|
10
|
+
# #
|
|
11
|
+
# - the GNU Lesser General Public License as published by #
|
|
12
|
+
# the Free Software Foundation, either version 3 of the License, #
|
|
13
|
+
# or (at your option) any later version, #
|
|
14
|
+
# - the MIT license (MIT), #
|
|
15
|
+
# - or both in dual license, as here. #
|
|
16
|
+
# #
|
|
17
|
+
# (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) #
|
|
18
|
+
# #
|
|
19
|
+
# This aGrUM/pyAgrum library is distributed in the hope that it will be #
|
|
20
|
+
# useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, #
|
|
21
|
+
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS #
|
|
22
|
+
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE #
|
|
23
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER #
|
|
24
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, #
|
|
25
|
+
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR #
|
|
26
|
+
# OTHER DEALINGS IN THE SOFTWARE. #
|
|
27
|
+
# #
|
|
28
|
+
# See LICENCES for more details. #
|
|
29
|
+
# #
|
|
30
|
+
# SPDX-FileCopyrightText: Copyright 2005-2025 #
|
|
31
|
+
# - Pierre-Henri WUILLEMIN(_at_LIP6) #
|
|
32
|
+
# - Christophe GONZALES(_at_AMU) #
|
|
33
|
+
# SPDX-License-Identifier: LGPL-3.0-or-later OR MIT #
|
|
34
|
+
# #
|
|
35
|
+
# Contact : info_at_agrum_dot_org #
|
|
36
|
+
# homepage : http://agrum.gitlab.io #
|
|
37
|
+
# gitlab : https://gitlab.com/agrumery/agrum #
|
|
38
|
+
# #
|
|
39
|
+
############################################################################
|
|
40
|
+
|
|
41
|
+
"""
|
|
42
|
+
Created on Thu Jun 11 14:08:14 2020
|
|
43
|
+
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
import numpy
|
|
47
|
+
import math
|
|
48
|
+
import warnings
|
|
49
|
+
|
|
50
|
+
import pyagrum as gum
|
|
51
|
+
|
|
52
|
+
from ._utils import _ImplementConstraints as implementConstraints
|
|
53
|
+
from ._utils import _ImplementScore as implementScore
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _fitStandard(X, y, learner, learningMethod, possibleSkeleton, scoringType, constraints):
|
|
57
|
+
"""
|
|
58
|
+
The default fit function that uses MIIC, Greedy Hill Climbing or Tabu list sorting depending on the learning method chosen.
|
|
59
|
+
|
|
60
|
+
Parameters
|
|
61
|
+
---------
|
|
62
|
+
X: {array-like, sparse matrix} of shape (n_samples, n_features)
|
|
63
|
+
training data
|
|
64
|
+
y: array-like of shape (n_samples)
|
|
65
|
+
Target values
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
implementConstraints(constraints, learner)
|
|
69
|
+
|
|
70
|
+
if learningMethod == "MIIC":
|
|
71
|
+
learner.useMIIC()
|
|
72
|
+
elif learningMethod == "MIICMDL":
|
|
73
|
+
learner.useMIIC()
|
|
74
|
+
learner.useMDLCorrection()
|
|
75
|
+
elif learningMethod == "MIICNML":
|
|
76
|
+
learner.useMIIC()
|
|
77
|
+
learner.useNMLCorrection()
|
|
78
|
+
elif learningMethod == "GHC": # default
|
|
79
|
+
learner.useGreedyHillClimbing()
|
|
80
|
+
elif learningMethod == "Tabu":
|
|
81
|
+
learner.useLocalSearchWithTabuList()
|
|
82
|
+
else:
|
|
83
|
+
raise ValueError("Learning Method not recognized.")
|
|
84
|
+
|
|
85
|
+
if possibleSkeleton is not None:
|
|
86
|
+
learner.setPossibleSkeleton(possibleSkeleton)
|
|
87
|
+
|
|
88
|
+
implementScore(scoringType, learner)
|
|
89
|
+
bn = learner.learnBN()
|
|
90
|
+
return bn
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _fitTAN(X, y, bn, learner, variableList, target):
|
|
94
|
+
"""
|
|
95
|
+
Uses Tree-Augmented NaiveBayes to learn the network structure and its parameters.
|
|
96
|
+
|
|
97
|
+
Parameters
|
|
98
|
+
---------
|
|
99
|
+
X: {array-like, sparse matrix} of shape (n_samples, n_features)
|
|
100
|
+
training data
|
|
101
|
+
y: array-like of shape (n_samples)
|
|
102
|
+
Target values
|
|
103
|
+
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
# a list of all the variables in our Bayesian network sorted by their index
|
|
107
|
+
|
|
108
|
+
# the number of columns in our data
|
|
109
|
+
d = X.shape[1]
|
|
110
|
+
|
|
111
|
+
# If there is only one input column, TAN works exactly the same as NaiveBayes
|
|
112
|
+
if d < 2:
|
|
113
|
+
_fitNaiveBayes(X, y, bn, learner, variableList, target, None)
|
|
114
|
+
return
|
|
115
|
+
|
|
116
|
+
probabilityY = learner.pseudoCount([target]).normalize().tolist()
|
|
117
|
+
mutualInformation = dict()
|
|
118
|
+
undirectedGraph = gum.UndiGraph()
|
|
119
|
+
|
|
120
|
+
# we calculate the mutual information of all pairs of variables
|
|
121
|
+
for i in range(d):
|
|
122
|
+
undirectedGraph.addNodeWithId(i)
|
|
123
|
+
for j in range(i):
|
|
124
|
+
probabilityList = learner.pseudoCount([variableList[i], variableList[j], target]).normalize().tolist()
|
|
125
|
+
probabilityXi = learner.pseudoCount([variableList[i], target]).normalize().tolist()
|
|
126
|
+
probabilityXj = learner.pseudoCount([variableList[j], target]).normalize().tolist()
|
|
127
|
+
temp = 0
|
|
128
|
+
for yIndex in range(len(probabilityList)):
|
|
129
|
+
for xjIndex in range(len(probabilityList[yIndex])):
|
|
130
|
+
for xiIndex in range(len(probabilityList[yIndex][xjIndex])):
|
|
131
|
+
if probabilityList[yIndex][xjIndex][xiIndex] > 0:
|
|
132
|
+
temp = temp + probabilityList[yIndex][xjIndex][xiIndex] * math.log(
|
|
133
|
+
probabilityList[yIndex][xjIndex][xiIndex]
|
|
134
|
+
* probabilityY[yIndex]
|
|
135
|
+
/ (probabilityXi[yIndex][xiIndex] * probabilityXj[yIndex][xjIndex])
|
|
136
|
+
)
|
|
137
|
+
mutualInformation[(i, j)] = temp
|
|
138
|
+
# if the mutual information between two variables is bigger than this threshold, we add an edge between them
|
|
139
|
+
threshold = 0
|
|
140
|
+
for var in mutualInformation:
|
|
141
|
+
threshold = threshold + mutualInformation[var]
|
|
142
|
+
threshold = float(threshold) / (d * (d - 1))
|
|
143
|
+
|
|
144
|
+
mutualInformation = {k: v for k, v in sorted(mutualInformation.items(), key=(lambda item: item[1]), reverse=True)}
|
|
145
|
+
|
|
146
|
+
for var in mutualInformation:
|
|
147
|
+
(i, j) = var
|
|
148
|
+
# since it's sorted in descending order we know that if this value is under the threshold all the other following values will also be under the threshold
|
|
149
|
+
if mutualInformation[var] < threshold:
|
|
150
|
+
break
|
|
151
|
+
# if the mutual information between xi and xj we add an edge between the two nodes
|
|
152
|
+
undirectedGraph.addEdge(i, j)
|
|
153
|
+
|
|
154
|
+
# if the edge causes a cycle, we delete the edge and pass on to the following pair of variables
|
|
155
|
+
if undirectedGraph.hasUndirectedCycle():
|
|
156
|
+
undirectedGraph.eraseEdge(i, j)
|
|
157
|
+
# dict(int:set(int)): each key is a node from every connected part of the graph. The set associated is a set of all nodes that are part of the same connected part of the graph
|
|
158
|
+
connectedParts = undirectedGraph.connectedComponents()
|
|
159
|
+
|
|
160
|
+
for node in connectedParts:
|
|
161
|
+
# int: the id of the node that will be used as a root to orient the undirected graph, initialised as 0
|
|
162
|
+
root = 0
|
|
163
|
+
# we choose the node with the largest mutual information with y as the root. We save the largest mutual information in the following variable
|
|
164
|
+
maxMutualInformation = -99999
|
|
165
|
+
for x0 in connectedParts[node]:
|
|
166
|
+
mutual = 0
|
|
167
|
+
probabilityList = learner.pseudoCount([variableList[x0], target]).normalize().tolist()
|
|
168
|
+
probabilityY = learner.pseudoCount([target]).normalize().tolist()
|
|
169
|
+
probabilityX = learner.pseudoCount([variableList[x0]]).normalize().tolist()
|
|
170
|
+
for yIndex in range(len(probabilityList)):
|
|
171
|
+
for xIndex in range(len(probabilityList[yIndex])):
|
|
172
|
+
if probabilityList[yIndex][xIndex] > 0:
|
|
173
|
+
mutual = mutual + probabilityList[yIndex][xIndex] * math.log(
|
|
174
|
+
probabilityList[yIndex][xIndex] / (probabilityY[yIndex] * probabilityX[xIndex])
|
|
175
|
+
)
|
|
176
|
+
if mutual > maxMutualInformation:
|
|
177
|
+
maxMutualInformation = mutual
|
|
178
|
+
root = x0
|
|
179
|
+
ListOfNodes = [root]
|
|
180
|
+
for tailId in ListOfNodes:
|
|
181
|
+
# for every element in the list of nodes we create an arc between this element and every neighbour of the element in the undirected graph that is not already in the list of nodes.
|
|
182
|
+
# Since the graph contains no cycles we know that if headId is already in the list then the arc (headId,tailId) has already been added, meaning the arc (tailId,headId) shouldn't be added
|
|
183
|
+
neighbours = undirectedGraph.neighbours(tailId)
|
|
184
|
+
for headId in neighbours:
|
|
185
|
+
if headId not in ListOfNodes:
|
|
186
|
+
bn.addArc(variableList[tailId], variableList[headId])
|
|
187
|
+
ListOfNodes.append(headId)
|
|
188
|
+
for i in range(d):
|
|
189
|
+
bn.addArc(target, variableList[i])
|
|
190
|
+
|
|
191
|
+
bn = learner.learnParameters(bn.dag())
|
|
192
|
+
return bn
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def _fitChowLiu(X, y, bn, learner, variableList, target):
|
|
196
|
+
"""
|
|
197
|
+
Uses the Chow-Liu algorithm to learn the network structure and its parameters.
|
|
198
|
+
|
|
199
|
+
Parameters
|
|
200
|
+
---------
|
|
201
|
+
X: {array-like, sparse matrix} of shape (n_samples, n_features)
|
|
202
|
+
training data
|
|
203
|
+
y: array-like of shape (n_samples)
|
|
204
|
+
Target values
|
|
205
|
+
"""
|
|
206
|
+
|
|
207
|
+
# since the chow liu algorithm doesn't differentiate between input and output variables, we construct a matrix that includes them both
|
|
208
|
+
dimension = y.shape
|
|
209
|
+
yColumn = numpy.reshape(y, (dimension[0], 1))
|
|
210
|
+
xAndY = numpy.concatenate((yColumn, X), axis=1)
|
|
211
|
+
d = xAndY.shape[1]
|
|
212
|
+
|
|
213
|
+
mutualInformation = dict()
|
|
214
|
+
undirectedGraph = gum.UndiGraph()
|
|
215
|
+
|
|
216
|
+
# we calculate the mutual information of all pairs of variables
|
|
217
|
+
for i in range(d):
|
|
218
|
+
undirectedGraph.addNodeWithId(i)
|
|
219
|
+
if i > 0:
|
|
220
|
+
probabilityXi = learner.pseudoCount([variableList[i - 1]]).normalize().tolist()
|
|
221
|
+
for j in range(i):
|
|
222
|
+
if j > 0:
|
|
223
|
+
probabilityList = learner.pseudoCount([variableList[i - 1], variableList[j - 1]]).normalize().tolist()
|
|
224
|
+
probabilityXj = learner.pseudoCount([variableList[j - 1]]).normalize().tolist()
|
|
225
|
+
else:
|
|
226
|
+
probabilityList = learner.pseudoCount([variableList[i - 1], target]).normalize().tolist()
|
|
227
|
+
probabilityXj = learner.pseudoCount([target]).normalize().tolist()
|
|
228
|
+
mutual = 0
|
|
229
|
+
for xjIndex in range(len(probabilityList)):
|
|
230
|
+
for xiIndex in range(len(probabilityList[xjIndex])):
|
|
231
|
+
if probabilityList[xjIndex][xiIndex] > 0:
|
|
232
|
+
mutual = mutual + probabilityList[xjIndex][xiIndex] * math.log(
|
|
233
|
+
probabilityList[xjIndex][xiIndex] / (probabilityXi[xiIndex] * probabilityXj[xjIndex])
|
|
234
|
+
)
|
|
235
|
+
mutualInformation[(i, j)] = mutual
|
|
236
|
+
# sorting the dictionary of mutualInformation in descending order by the values associated
|
|
237
|
+
mutualInformation = {k: v for k, v in sorted(mutualInformation.items(), key=(lambda item: item[1]), reverse=True)}
|
|
238
|
+
|
|
239
|
+
for i, j in mutualInformation:
|
|
240
|
+
# if the mutual information between xi and xj we add an edge between the two nodes
|
|
241
|
+
undirectedGraph.addEdge(i, j)
|
|
242
|
+
|
|
243
|
+
# if the edge causes a cycle, we delete the edge and pass on to the following pair of variables
|
|
244
|
+
if undirectedGraph.hasUndirectedCycle():
|
|
245
|
+
undirectedGraph.eraseEdge(i, j)
|
|
246
|
+
|
|
247
|
+
ListOfNodes = [0]
|
|
248
|
+
for tailId in ListOfNodes:
|
|
249
|
+
# for every element in the list of nodes we create an arc between this element and every neighbour of the element in the undirected graph that is not already in the list of nodes.
|
|
250
|
+
# Since the graph contains no cycles we know that if headId is already in the list then the arc (headId,tailId) has already been added, meaning the arc (tailId,headId) shouldn't be added
|
|
251
|
+
neighbours = undirectedGraph.neighbours(tailId)
|
|
252
|
+
for headId in neighbours:
|
|
253
|
+
if headId not in ListOfNodes:
|
|
254
|
+
if tailId > 0:
|
|
255
|
+
bn.addArc(variableList[tailId - 1], variableList[headId - 1])
|
|
256
|
+
else:
|
|
257
|
+
bn.addArc(target, variableList[headId - 1])
|
|
258
|
+
ListOfNodes.append(headId)
|
|
259
|
+
|
|
260
|
+
bn = learner.learnParameters(bn.dag())
|
|
261
|
+
return bn
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def _fitNaiveBayes(X, y, bn, learner, variableList, target, constraints):
|
|
265
|
+
"""
|
|
266
|
+
Uses the Naive Bayes algorithm to learn the network parameters. The network structure isn't learned since it is fixed.
|
|
267
|
+
|
|
268
|
+
Parameters
|
|
269
|
+
---------
|
|
270
|
+
X: {array-like, sparse matrix} of shape (n_samples, n_features)
|
|
271
|
+
training data
|
|
272
|
+
y: array-like of shape (n_samples)
|
|
273
|
+
Target values
|
|
274
|
+
"""
|
|
275
|
+
|
|
276
|
+
for variableName in variableList:
|
|
277
|
+
bn.addArc(target, variableName)
|
|
278
|
+
if constraints is not None:
|
|
279
|
+
warnings.warn("The structure of Naive Bayes is fixed, so it is impossible to add any new constraints")
|
|
280
|
+
|
|
281
|
+
bn = learner.learnParameters(bn.dag())
|
|
282
|
+
return bn
|