pyAgrum-nightly 2.3.1.9.dev202512261765915415__cp310-abi3-macosx_10_15_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. pyagrum/__init__.py +165 -0
  2. pyagrum/_pyagrum.so +0 -0
  3. pyagrum/bnmixture/BNMInference.py +268 -0
  4. pyagrum/bnmixture/BNMLearning.py +376 -0
  5. pyagrum/bnmixture/BNMixture.py +464 -0
  6. pyagrum/bnmixture/__init__.py +60 -0
  7. pyagrum/bnmixture/notebook.py +1058 -0
  8. pyagrum/causal/_CausalFormula.py +280 -0
  9. pyagrum/causal/_CausalModel.py +436 -0
  10. pyagrum/causal/__init__.py +81 -0
  11. pyagrum/causal/_causalImpact.py +356 -0
  12. pyagrum/causal/_dSeparation.py +598 -0
  13. pyagrum/causal/_doAST.py +761 -0
  14. pyagrum/causal/_doCalculus.py +361 -0
  15. pyagrum/causal/_doorCriteria.py +374 -0
  16. pyagrum/causal/_exceptions.py +95 -0
  17. pyagrum/causal/_types.py +61 -0
  18. pyagrum/causal/causalEffectEstimation/_CausalEffectEstimation.py +1175 -0
  19. pyagrum/causal/causalEffectEstimation/_IVEstimators.py +718 -0
  20. pyagrum/causal/causalEffectEstimation/_RCTEstimators.py +132 -0
  21. pyagrum/causal/causalEffectEstimation/__init__.py +46 -0
  22. pyagrum/causal/causalEffectEstimation/_backdoorEstimators.py +774 -0
  23. pyagrum/causal/causalEffectEstimation/_causalBNEstimator.py +324 -0
  24. pyagrum/causal/causalEffectEstimation/_frontdoorEstimators.py +396 -0
  25. pyagrum/causal/causalEffectEstimation/_learners.py +118 -0
  26. pyagrum/causal/causalEffectEstimation/_utils.py +466 -0
  27. pyagrum/causal/notebook.py +172 -0
  28. pyagrum/clg/CLG.py +658 -0
  29. pyagrum/clg/GaussianVariable.py +111 -0
  30. pyagrum/clg/SEM.py +312 -0
  31. pyagrum/clg/__init__.py +63 -0
  32. pyagrum/clg/canonicalForm.py +408 -0
  33. pyagrum/clg/constants.py +54 -0
  34. pyagrum/clg/forwardSampling.py +202 -0
  35. pyagrum/clg/learning.py +776 -0
  36. pyagrum/clg/notebook.py +480 -0
  37. pyagrum/clg/variableElimination.py +271 -0
  38. pyagrum/common.py +60 -0
  39. pyagrum/config.py +319 -0
  40. pyagrum/ctbn/CIM.py +513 -0
  41. pyagrum/ctbn/CTBN.py +573 -0
  42. pyagrum/ctbn/CTBNGenerator.py +216 -0
  43. pyagrum/ctbn/CTBNInference.py +459 -0
  44. pyagrum/ctbn/CTBNLearner.py +161 -0
  45. pyagrum/ctbn/SamplesStats.py +671 -0
  46. pyagrum/ctbn/StatsIndepTest.py +355 -0
  47. pyagrum/ctbn/__init__.py +79 -0
  48. pyagrum/ctbn/constants.py +54 -0
  49. pyagrum/ctbn/notebook.py +264 -0
  50. pyagrum/defaults.ini +199 -0
  51. pyagrum/deprecated.py +95 -0
  52. pyagrum/explain/_ComputationCausal.py +75 -0
  53. pyagrum/explain/_ComputationConditional.py +48 -0
  54. pyagrum/explain/_ComputationMarginal.py +48 -0
  55. pyagrum/explain/_CustomShapleyCache.py +110 -0
  56. pyagrum/explain/_Explainer.py +176 -0
  57. pyagrum/explain/_Explanation.py +70 -0
  58. pyagrum/explain/_FIFOCache.py +54 -0
  59. pyagrum/explain/_ShallCausalValues.py +204 -0
  60. pyagrum/explain/_ShallConditionalValues.py +155 -0
  61. pyagrum/explain/_ShallMarginalValues.py +155 -0
  62. pyagrum/explain/_ShallValues.py +296 -0
  63. pyagrum/explain/_ShapCausalValues.py +208 -0
  64. pyagrum/explain/_ShapConditionalValues.py +126 -0
  65. pyagrum/explain/_ShapMarginalValues.py +191 -0
  66. pyagrum/explain/_ShapleyValues.py +298 -0
  67. pyagrum/explain/__init__.py +81 -0
  68. pyagrum/explain/_explGeneralizedMarkovBlanket.py +152 -0
  69. pyagrum/explain/_explIndependenceListForPairs.py +146 -0
  70. pyagrum/explain/_explInformationGraph.py +264 -0
  71. pyagrum/explain/notebook/__init__.py +54 -0
  72. pyagrum/explain/notebook/_bar.py +142 -0
  73. pyagrum/explain/notebook/_beeswarm.py +174 -0
  74. pyagrum/explain/notebook/_showShapValues.py +97 -0
  75. pyagrum/explain/notebook/_waterfall.py +220 -0
  76. pyagrum/explain/shapley.py +225 -0
  77. pyagrum/lib/__init__.py +46 -0
  78. pyagrum/lib/_colors.py +390 -0
  79. pyagrum/lib/bn2graph.py +299 -0
  80. pyagrum/lib/bn2roc.py +1026 -0
  81. pyagrum/lib/bn2scores.py +217 -0
  82. pyagrum/lib/bn_vs_bn.py +605 -0
  83. pyagrum/lib/cn2graph.py +305 -0
  84. pyagrum/lib/discreteTypeProcessor.py +1102 -0
  85. pyagrum/lib/discretizer.py +58 -0
  86. pyagrum/lib/dynamicBN.py +390 -0
  87. pyagrum/lib/explain.py +57 -0
  88. pyagrum/lib/export.py +84 -0
  89. pyagrum/lib/id2graph.py +258 -0
  90. pyagrum/lib/image.py +387 -0
  91. pyagrum/lib/ipython.py +307 -0
  92. pyagrum/lib/mrf2graph.py +471 -0
  93. pyagrum/lib/notebook.py +1821 -0
  94. pyagrum/lib/proba_histogram.py +552 -0
  95. pyagrum/lib/utils.py +138 -0
  96. pyagrum/pyagrum.py +31495 -0
  97. pyagrum/skbn/_MBCalcul.py +242 -0
  98. pyagrum/skbn/__init__.py +49 -0
  99. pyagrum/skbn/_learningMethods.py +282 -0
  100. pyagrum/skbn/_utils.py +297 -0
  101. pyagrum/skbn/bnclassifier.py +1014 -0
  102. pyagrum_nightly-2.3.1.9.dev202512261765915415.dist-info/LICENSE.md +12 -0
  103. pyagrum_nightly-2.3.1.9.dev202512261765915415.dist-info/LICENSES/LGPL-3.0-or-later.txt +304 -0
  104. pyagrum_nightly-2.3.1.9.dev202512261765915415.dist-info/LICENSES/MIT.txt +18 -0
  105. pyagrum_nightly-2.3.1.9.dev202512261765915415.dist-info/METADATA +145 -0
  106. pyagrum_nightly-2.3.1.9.dev202512261765915415.dist-info/RECORD +107 -0
  107. pyagrum_nightly-2.3.1.9.dev202512261765915415.dist-info/WHEEL +4 -0
@@ -0,0 +1,242 @@
1
+ ############################################################################
2
+ # This file is part of the aGrUM/pyAgrum library. #
3
+ # #
4
+ # Copyright (c) 2005-2025 by #
5
+ # - Pierre-Henri WUILLEMIN(_at_LIP6) #
6
+ # - Christophe GONZALES(_at_AMU) #
7
+ # #
8
+ # The aGrUM/pyAgrum library is free software; you can redistribute it #
9
+ # and/or modify it under the terms of either : #
10
+ # #
11
+ # - the GNU Lesser General Public License as published by #
12
+ # the Free Software Foundation, either version 3 of the License, #
13
+ # or (at your option) any later version, #
14
+ # - the MIT license (MIT), #
15
+ # - or both in dual license, as here. #
16
+ # #
17
+ # (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) #
18
+ # #
19
+ # This aGrUM/pyAgrum library is distributed in the hope that it will be #
20
+ # useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, #
21
+ # INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS #
22
+ # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE #
23
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER #
24
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, #
25
+ # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR #
26
+ # OTHER DEALINGS IN THE SOFTWARE. #
27
+ # #
28
+ # See LICENCES for more details. #
29
+ # #
30
+ # SPDX-FileCopyrightText: Copyright 2005-2025 #
31
+ # - Pierre-Henri WUILLEMIN(_at_LIP6) #
32
+ # - Christophe GONZALES(_at_AMU) #
33
+ # SPDX-License-Identifier: LGPL-3.0-or-later OR MIT #
34
+ # #
35
+ # Contact : info_at_agrum_dot_org #
36
+ # homepage : http://agrum.gitlab.io #
37
+ # gitlab : https://gitlab.com/agrumery/agrum #
38
+ # #
39
+ ############################################################################
40
+
41
+ import pyagrum as gum
42
+ from ._utils import _listIdtoName as listIdtoName
43
+
44
+
45
+ def compileMarkovBlanket(bn, target):
46
+ """
47
+ Create a Bayesian network with the children, their parents and the parents of the node target
48
+
49
+ Parameters
50
+ ----------
51
+ bn: pyagrum.BayesNet
52
+ Bayesian network to work on
53
+ target: str or int
54
+ Name or id of the target
55
+ Returns
56
+ -------
57
+ MarkovBlanket: pyagrum.BayesNet
58
+ Markov Blanket from bn
59
+ """
60
+ mb = gum.BayesNet("MarkovBlanket")
61
+
62
+ # add target to Markov Blanket
63
+ mb.add(bn.variable(target))
64
+
65
+ # list of target's children
66
+ children = listIdtoName(bn, list(bn.children(target)))
67
+
68
+ # list of target's parents
69
+ parents = listIdtoName(bn, list(bn.parents(target)))
70
+
71
+ for c in children:
72
+ # list of c's parents
73
+ parents_child = listIdtoName(bn, list(bn.parents(c)))
74
+
75
+ # if c is not already in Markov Blanket
76
+ if c not in mb.names():
77
+ # add c in Markov Blanket
78
+ mb.add(bn.variable(c))
79
+
80
+ # create arc between target and his child c
81
+ mb.addArc(target, c)
82
+
83
+ # add c's parents in Markov Blanket
84
+ for pc in parents_child:
85
+ # if pc is a target's parent
86
+ if pc in mb.names():
87
+ if pc != target:
88
+ mb.addArc(pc, c)
89
+ continue
90
+
91
+ # add pc in Markov Blanket
92
+ mb.add(bn.variable(pc))
93
+
94
+ # if pc is not a children, his cpt doesn't matter (use for predict)
95
+ if pc not in children:
96
+ mb.cpt(pc).fillWith(1).normalize()
97
+ else:
98
+ mb.addArc(target, pc)
99
+
100
+ # create arc between c and his parent pc
101
+ mb.addArc(pc, c)
102
+
103
+ for p in parents:
104
+ # if p is not already in Markov Blanket
105
+ if p in mb.names():
106
+ # create arc between target and his parent p
107
+ mb.addArc(p, target)
108
+ continue
109
+
110
+ # add p in Markov Blanket
111
+ mb.add(bn.variable(p))
112
+
113
+ # cpt doesn't matter for parents
114
+ mb.cpt(p).fillWith(1).normalize()
115
+
116
+ # create arc between target and his parent p
117
+ mb.addArc(p, target)
118
+
119
+ # update cpt for target and his children
120
+ mb.cpt(target).fillWith(bn.cpt(target))
121
+ for i in children:
122
+ mb.cpt(i).fillWith(bn.cpt(i))
123
+
124
+ return mb
125
+
126
+
127
+ def _calcul_proba_for_nary_class(row, local_inst, dictName, MarkovBlanket, target):
128
+ """
129
+ Calculate the posterior distribution of variable target (given its Markov blanket)
130
+
131
+ Parameters
132
+ ----------
133
+ row: numpyArray shape: (n features)
134
+ test data
135
+ local_inst: pyagrum.Tensor
136
+ Instantiation of the Markov Blanket EXCEPT the target
137
+ dictName: Dict[str : int]
138
+ dictionary of the name of a variable and his column in the data base
139
+ MarkovBlanket: pyagrum.BayesNet
140
+ Markov Blanket to work on
141
+ target: str
142
+ Name of the target
143
+ Returns
144
+ -------
145
+ proba:
146
+ the probability distribution for target
147
+ """
148
+ # create Instantiation with Markov Blanket's variables
149
+ for n in MarkovBlanket.names():
150
+ if n == target:
151
+ continue
152
+ local_inst.chgVal(n, str(row[dictName.get(n)]))
153
+
154
+ p = MarkovBlanket.cpt(target).extract(local_inst)
155
+ for i in MarkovBlanket.children(target):
156
+ p *= MarkovBlanket.cpt(i).extract(local_inst)
157
+ p.normalize()
158
+
159
+ return p
160
+
161
+
162
+ def _calcul_most_probable_for_nary_class(row, local_inst, dictName, MarkovBlanket, target):
163
+ """
164
+ Calculate the most probable class for variable target
165
+
166
+ Parameters
167
+ ----------
168
+ row: numpyArray shape: (n features)
169
+ test data
170
+ local_inst: pyagrum.Tensor
171
+ Instantiation of the Markov Blanket EXCEPT the target
172
+ dictName: Dict[str : int]
173
+ dictionary of the name of a variable and his column in the data base
174
+ MarkovBlanket: pyagrum.BayesNet
175
+ Markov Blanket to work on
176
+ target: str
177
+ Name of the target
178
+ Returns
179
+ -------
180
+ Tuple[int,proba]:
181
+ the value and the probability of the most probable class
182
+ """
183
+ p = _calcul_proba_for_nary_class(row, local_inst, dictName, MarkovBlanket, target)
184
+ return p.argmax(), p.max()
185
+
186
+
187
+ def _calcul_proba_for_binary_class(row, label1, labels, Inst, dictName, MarkovBlanket, target):
188
+ """
189
+ Calculate the probability of having label1 to the binary variable y
190
+
191
+ Parameters
192
+ ----------
193
+ row: numpyArray shape: (n features)
194
+ test data
195
+ label1:
196
+ the True value of y
197
+ labels:
198
+ the False values of y
199
+ Inst: Tensor
200
+ Instantiation of the Markov Blanket
201
+ dictName: dict[str : int]
202
+ dictionary of the name of a variable and his column in the data base
203
+ MarkovBlanket: pyagrum.BayesNet
204
+ Markov Blanket to work on
205
+ target: str
206
+ Name of the target
207
+ Returns
208
+ -------
209
+ double
210
+ probability of getting label1 to the variable y
211
+ """
212
+
213
+ # create Instantiation with Markov Blanket's variables
214
+ for n in MarkovBlanket.names():
215
+ if n == target:
216
+ continue
217
+ Inst.chgVal(n, str(row[dictName.get(n)]))
218
+
219
+ # probability of Positive value
220
+ Inst.chgVal(target, str(label1))
221
+ res1 = MarkovBlanket.cpt(target).get(Inst)
222
+
223
+ # probability of Negative value
224
+ res2 = 0.0
225
+ for label0 in labels:
226
+ Inst.chgVal(target, str(label0))
227
+ res2 += MarkovBlanket.cpt(target).get(Inst)
228
+
229
+ # probability for all the children
230
+ for n in MarkovBlanket.children(target):
231
+ Inst.chgVal(target, str(label1))
232
+ res1 = res1 * (MarkovBlanket.cpt(n).get(Inst))
233
+
234
+ tmp = 0.0
235
+ for label0 in labels:
236
+ Inst.chgVal(target, str(label0))
237
+ tmp += MarkovBlanket.cpt(n).get(Inst)
238
+ res2 = res2 * tmp
239
+
240
+ # normalize to have probabilities
241
+
242
+ return res1 / (res1 + res2)
@@ -0,0 +1,49 @@
1
+ ############################################################################
2
+ # This file is part of the aGrUM/pyAgrum library. #
3
+ # #
4
+ # Copyright (c) 2005-2025 by #
5
+ # - Pierre-Henri WUILLEMIN(_at_LIP6) #
6
+ # - Christophe GONZALES(_at_AMU) #
7
+ # #
8
+ # The aGrUM/pyAgrum library is free software; you can redistribute it #
9
+ # and/or modify it under the terms of either : #
10
+ # #
11
+ # - the GNU Lesser General Public License as published by #
12
+ # the Free Software Foundation, either version 3 of the License, #
13
+ # or (at your option) any later version, #
14
+ # - the MIT license (MIT), #
15
+ # - or both in dual license, as here. #
16
+ # #
17
+ # (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) #
18
+ # #
19
+ # This aGrUM/pyAgrum library is distributed in the hope that it will be #
20
+ # useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, #
21
+ # INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS #
22
+ # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE #
23
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER #
24
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, #
25
+ # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR #
26
+ # OTHER DEALINGS IN THE SOFTWARE. #
27
+ # #
28
+ # See LICENCES for more details. #
29
+ # #
30
+ # SPDX-FileCopyrightText: Copyright 2005-2025 #
31
+ # - Pierre-Henri WUILLEMIN(_at_LIP6) #
32
+ # - Christophe GONZALES(_at_AMU) #
33
+ # SPDX-License-Identifier: LGPL-3.0-or-later OR MIT #
34
+ # #
35
+ # Contact : info_at_agrum_dot_org #
36
+ # homepage : http://agrum.gitlab.io #
37
+ # gitlab : https://gitlab.com/agrumery/agrum #
38
+ # #
39
+ ############################################################################
40
+
41
+ """ """
42
+
43
+ __author__ = "Pierre-Henri Wuillemin, Nikola Matevski, Louise Mouillé"
44
+ __copyright__ = "(c) 2020-2024 PARIS"
45
+
46
+ from .bnclassifier import BNClassifier
47
+ from ._MBCalcul import compileMarkovBlanket
48
+
49
+ __all__ = ["BNClassifier", "compileMarkovBlanket"]
@@ -0,0 +1,282 @@
1
+ ############################################################################
2
+ # This file is part of the aGrUM/pyAgrum library. #
3
+ # #
4
+ # Copyright (c) 2005-2025 by #
5
+ # - Pierre-Henri WUILLEMIN(_at_LIP6) #
6
+ # - Christophe GONZALES(_at_AMU) #
7
+ # #
8
+ # The aGrUM/pyAgrum library is free software; you can redistribute it #
9
+ # and/or modify it under the terms of either : #
10
+ # #
11
+ # - the GNU Lesser General Public License as published by #
12
+ # the Free Software Foundation, either version 3 of the License, #
13
+ # or (at your option) any later version, #
14
+ # - the MIT license (MIT), #
15
+ # - or both in dual license, as here. #
16
+ # #
17
+ # (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) #
18
+ # #
19
+ # This aGrUM/pyAgrum library is distributed in the hope that it will be #
20
+ # useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, #
21
+ # INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS #
22
+ # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE #
23
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER #
24
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, #
25
+ # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR #
26
+ # OTHER DEALINGS IN THE SOFTWARE. #
27
+ # #
28
+ # See LICENCES for more details. #
29
+ # #
30
+ # SPDX-FileCopyrightText: Copyright 2005-2025 #
31
+ # - Pierre-Henri WUILLEMIN(_at_LIP6) #
32
+ # - Christophe GONZALES(_at_AMU) #
33
+ # SPDX-License-Identifier: LGPL-3.0-or-later OR MIT #
34
+ # #
35
+ # Contact : info_at_agrum_dot_org #
36
+ # homepage : http://agrum.gitlab.io #
37
+ # gitlab : https://gitlab.com/agrumery/agrum #
38
+ # #
39
+ ############################################################################
40
+
41
+ """
42
+ Created on Thu Jun 11 14:08:14 2020
43
+
44
+ """
45
+
46
+ import numpy
47
+ import math
48
+ import warnings
49
+
50
+ import pyagrum as gum
51
+
52
+ from ._utils import _ImplementConstraints as implementConstraints
53
+ from ._utils import _ImplementScore as implementScore
54
+
55
+
56
+ def _fitStandard(X, y, learner, learningMethod, possibleSkeleton, scoringType, constraints):
57
+ """
58
+ The default fit function that uses MIIC, Greedy Hill Climbing or Tabu list sorting depending on the learning method chosen.
59
+
60
+ Parameters
61
+ ---------
62
+ X: {array-like, sparse matrix} of shape (n_samples, n_features)
63
+ training data
64
+ y: array-like of shape (n_samples)
65
+ Target values
66
+ """
67
+
68
+ implementConstraints(constraints, learner)
69
+
70
+ if learningMethod == "MIIC":
71
+ learner.useMIIC()
72
+ elif learningMethod == "MIICMDL":
73
+ learner.useMIIC()
74
+ learner.useMDLCorrection()
75
+ elif learningMethod == "MIICNML":
76
+ learner.useMIIC()
77
+ learner.useNMLCorrection()
78
+ elif learningMethod == "GHC": # default
79
+ learner.useGreedyHillClimbing()
80
+ elif learningMethod == "Tabu":
81
+ learner.useLocalSearchWithTabuList()
82
+ else:
83
+ raise ValueError("Learning Method not recognized.")
84
+
85
+ if possibleSkeleton is not None:
86
+ learner.setPossibleSkeleton(possibleSkeleton)
87
+
88
+ implementScore(scoringType, learner)
89
+ bn = learner.learnBN()
90
+ return bn
91
+
92
+
93
+ def _fitTAN(X, y, bn, learner, variableList, target):
94
+ """
95
+ Uses Tree-Augmented NaiveBayes to learn the network structure and its parameters.
96
+
97
+ Parameters
98
+ ---------
99
+ X: {array-like, sparse matrix} of shape (n_samples, n_features)
100
+ training data
101
+ y: array-like of shape (n_samples)
102
+ Target values
103
+
104
+ """
105
+
106
+ # a list of all the variables in our Bayesian network sorted by their index
107
+
108
+ # the number of columns in our data
109
+ d = X.shape[1]
110
+
111
+ # If there is only one input column, TAN works exactly the same as NaiveBayes
112
+ if d < 2:
113
+ _fitNaiveBayes(X, y, bn, learner, variableList, target, None)
114
+ return
115
+
116
+ probabilityY = learner.pseudoCount([target]).normalize().tolist()
117
+ mutualInformation = dict()
118
+ undirectedGraph = gum.UndiGraph()
119
+
120
+ # we calculate the mutual information of all pairs of variables
121
+ for i in range(d):
122
+ undirectedGraph.addNodeWithId(i)
123
+ for j in range(i):
124
+ probabilityList = learner.pseudoCount([variableList[i], variableList[j], target]).normalize().tolist()
125
+ probabilityXi = learner.pseudoCount([variableList[i], target]).normalize().tolist()
126
+ probabilityXj = learner.pseudoCount([variableList[j], target]).normalize().tolist()
127
+ temp = 0
128
+ for yIndex in range(len(probabilityList)):
129
+ for xjIndex in range(len(probabilityList[yIndex])):
130
+ for xiIndex in range(len(probabilityList[yIndex][xjIndex])):
131
+ if probabilityList[yIndex][xjIndex][xiIndex] > 0:
132
+ temp = temp + probabilityList[yIndex][xjIndex][xiIndex] * math.log(
133
+ probabilityList[yIndex][xjIndex][xiIndex]
134
+ * probabilityY[yIndex]
135
+ / (probabilityXi[yIndex][xiIndex] * probabilityXj[yIndex][xjIndex])
136
+ )
137
+ mutualInformation[(i, j)] = temp
138
+ # if the mutual information between two variables is bigger than this threshold, we add an edge between them
139
+ threshold = 0
140
+ for var in mutualInformation:
141
+ threshold = threshold + mutualInformation[var]
142
+ threshold = float(threshold) / (d * (d - 1))
143
+
144
+ mutualInformation = {k: v for k, v in sorted(mutualInformation.items(), key=(lambda item: item[1]), reverse=True)}
145
+
146
+ for var in mutualInformation:
147
+ (i, j) = var
148
+ # since it's sorted in descending order we know that if this value is under the threshold all the other following values will also be under the threshold
149
+ if mutualInformation[var] < threshold:
150
+ break
151
+ # if the mutual information between xi and xj we add an edge between the two nodes
152
+ undirectedGraph.addEdge(i, j)
153
+
154
+ # if the edge causes a cycle, we delete the edge and pass on to the following pair of variables
155
+ if undirectedGraph.hasUndirectedCycle():
156
+ undirectedGraph.eraseEdge(i, j)
157
+ # dict(int:set(int)): each key is a node from every connected part of the graph. The set associated is a set of all nodes that are part of the same connected part of the graph
158
+ connectedParts = undirectedGraph.connectedComponents()
159
+
160
+ for node in connectedParts:
161
+ # int: the id of the node that will be used as a root to orient the undirected graph, initialised as 0
162
+ root = 0
163
+ # we choose the node with the largest mutual information with y as the root. We save the largest mutual information in the following variable
164
+ maxMutualInformation = -99999
165
+ for x0 in connectedParts[node]:
166
+ mutual = 0
167
+ probabilityList = learner.pseudoCount([variableList[x0], target]).normalize().tolist()
168
+ probabilityY = learner.pseudoCount([target]).normalize().tolist()
169
+ probabilityX = learner.pseudoCount([variableList[x0]]).normalize().tolist()
170
+ for yIndex in range(len(probabilityList)):
171
+ for xIndex in range(len(probabilityList[yIndex])):
172
+ if probabilityList[yIndex][xIndex] > 0:
173
+ mutual = mutual + probabilityList[yIndex][xIndex] * math.log(
174
+ probabilityList[yIndex][xIndex] / (probabilityY[yIndex] * probabilityX[xIndex])
175
+ )
176
+ if mutual > maxMutualInformation:
177
+ maxMutualInformation = mutual
178
+ root = x0
179
+ ListOfNodes = [root]
180
+ for tailId in ListOfNodes:
181
+ # for every element in the list of nodes we create an arc between this element and every neighbour of the element in the undirected graph that is not already in the list of nodes.
182
+ # Since the graph contains no cycles we know that if headId is already in the list then the arc (headId,tailId) has already been added, meaning the arc (tailId,headId) shouldn't be added
183
+ neighbours = undirectedGraph.neighbours(tailId)
184
+ for headId in neighbours:
185
+ if headId not in ListOfNodes:
186
+ bn.addArc(variableList[tailId], variableList[headId])
187
+ ListOfNodes.append(headId)
188
+ for i in range(d):
189
+ bn.addArc(target, variableList[i])
190
+
191
+ bn = learner.learnParameters(bn.dag())
192
+ return bn
193
+
194
+
195
+ def _fitChowLiu(X, y, bn, learner, variableList, target):
196
+ """
197
+ Uses the Chow-Liu algorithm to learn the network structure and its parameters.
198
+
199
+ Parameters
200
+ ---------
201
+ X: {array-like, sparse matrix} of shape (n_samples, n_features)
202
+ training data
203
+ y: array-like of shape (n_samples)
204
+ Target values
205
+ """
206
+
207
+ # since the chow liu algorithm doesn't differentiate between input and output variables, we construct a matrix that includes them both
208
+ dimension = y.shape
209
+ yColumn = numpy.reshape(y, (dimension[0], 1))
210
+ xAndY = numpy.concatenate((yColumn, X), axis=1)
211
+ d = xAndY.shape[1]
212
+
213
+ mutualInformation = dict()
214
+ undirectedGraph = gum.UndiGraph()
215
+
216
+ # we calculate the mutual information of all pairs of variables
217
+ for i in range(d):
218
+ undirectedGraph.addNodeWithId(i)
219
+ if i > 0:
220
+ probabilityXi = learner.pseudoCount([variableList[i - 1]]).normalize().tolist()
221
+ for j in range(i):
222
+ if j > 0:
223
+ probabilityList = learner.pseudoCount([variableList[i - 1], variableList[j - 1]]).normalize().tolist()
224
+ probabilityXj = learner.pseudoCount([variableList[j - 1]]).normalize().tolist()
225
+ else:
226
+ probabilityList = learner.pseudoCount([variableList[i - 1], target]).normalize().tolist()
227
+ probabilityXj = learner.pseudoCount([target]).normalize().tolist()
228
+ mutual = 0
229
+ for xjIndex in range(len(probabilityList)):
230
+ for xiIndex in range(len(probabilityList[xjIndex])):
231
+ if probabilityList[xjIndex][xiIndex] > 0:
232
+ mutual = mutual + probabilityList[xjIndex][xiIndex] * math.log(
233
+ probabilityList[xjIndex][xiIndex] / (probabilityXi[xiIndex] * probabilityXj[xjIndex])
234
+ )
235
+ mutualInformation[(i, j)] = mutual
236
+ # sorting the dictionary of mutualInformation in descending order by the values associated
237
+ mutualInformation = {k: v for k, v in sorted(mutualInformation.items(), key=(lambda item: item[1]), reverse=True)}
238
+
239
+ for i, j in mutualInformation:
240
+ # if the mutual information between xi and xj we add an edge between the two nodes
241
+ undirectedGraph.addEdge(i, j)
242
+
243
+ # if the edge causes a cycle, we delete the edge and pass on to the following pair of variables
244
+ if undirectedGraph.hasUndirectedCycle():
245
+ undirectedGraph.eraseEdge(i, j)
246
+
247
+ ListOfNodes = [0]
248
+ for tailId in ListOfNodes:
249
+ # for every element in the list of nodes we create an arc between this element and every neighbour of the element in the undirected graph that is not already in the list of nodes.
250
+ # Since the graph contains no cycles we know that if headId is already in the list then the arc (headId,tailId) has already been added, meaning the arc (tailId,headId) shouldn't be added
251
+ neighbours = undirectedGraph.neighbours(tailId)
252
+ for headId in neighbours:
253
+ if headId not in ListOfNodes:
254
+ if tailId > 0:
255
+ bn.addArc(variableList[tailId - 1], variableList[headId - 1])
256
+ else:
257
+ bn.addArc(target, variableList[headId - 1])
258
+ ListOfNodes.append(headId)
259
+
260
+ bn = learner.learnParameters(bn.dag())
261
+ return bn
262
+
263
+
264
+ def _fitNaiveBayes(X, y, bn, learner, variableList, target, constraints):
265
+ """
266
+ Uses the Naive Bayes algorithm to learn the network parameters. The network structure isn't learned since it is fixed.
267
+
268
+ Parameters
269
+ ---------
270
+ X: {array-like, sparse matrix} of shape (n_samples, n_features)
271
+ training data
272
+ y: array-like of shape (n_samples)
273
+ Target values
274
+ """
275
+
276
+ for variableName in variableList:
277
+ bn.addArc(target, variableName)
278
+ if constraints is not None:
279
+ warnings.warn("The structure of Naive Bayes is fixed, so it is impossible to add any new constraints")
280
+
281
+ bn = learner.learnParameters(bn.dag())
282
+ return bn