pyAgrum-nightly 2.3.1.9.dev202512261765915415__cp310-abi3-macosx_10_15_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyagrum/__init__.py +165 -0
- pyagrum/_pyagrum.so +0 -0
- pyagrum/bnmixture/BNMInference.py +268 -0
- pyagrum/bnmixture/BNMLearning.py +376 -0
- pyagrum/bnmixture/BNMixture.py +464 -0
- pyagrum/bnmixture/__init__.py +60 -0
- pyagrum/bnmixture/notebook.py +1058 -0
- pyagrum/causal/_CausalFormula.py +280 -0
- pyagrum/causal/_CausalModel.py +436 -0
- pyagrum/causal/__init__.py +81 -0
- pyagrum/causal/_causalImpact.py +356 -0
- pyagrum/causal/_dSeparation.py +598 -0
- pyagrum/causal/_doAST.py +761 -0
- pyagrum/causal/_doCalculus.py +361 -0
- pyagrum/causal/_doorCriteria.py +374 -0
- pyagrum/causal/_exceptions.py +95 -0
- pyagrum/causal/_types.py +61 -0
- pyagrum/causal/causalEffectEstimation/_CausalEffectEstimation.py +1175 -0
- pyagrum/causal/causalEffectEstimation/_IVEstimators.py +718 -0
- pyagrum/causal/causalEffectEstimation/_RCTEstimators.py +132 -0
- pyagrum/causal/causalEffectEstimation/__init__.py +46 -0
- pyagrum/causal/causalEffectEstimation/_backdoorEstimators.py +774 -0
- pyagrum/causal/causalEffectEstimation/_causalBNEstimator.py +324 -0
- pyagrum/causal/causalEffectEstimation/_frontdoorEstimators.py +396 -0
- pyagrum/causal/causalEffectEstimation/_learners.py +118 -0
- pyagrum/causal/causalEffectEstimation/_utils.py +466 -0
- pyagrum/causal/notebook.py +172 -0
- pyagrum/clg/CLG.py +658 -0
- pyagrum/clg/GaussianVariable.py +111 -0
- pyagrum/clg/SEM.py +312 -0
- pyagrum/clg/__init__.py +63 -0
- pyagrum/clg/canonicalForm.py +408 -0
- pyagrum/clg/constants.py +54 -0
- pyagrum/clg/forwardSampling.py +202 -0
- pyagrum/clg/learning.py +776 -0
- pyagrum/clg/notebook.py +480 -0
- pyagrum/clg/variableElimination.py +271 -0
- pyagrum/common.py +60 -0
- pyagrum/config.py +319 -0
- pyagrum/ctbn/CIM.py +513 -0
- pyagrum/ctbn/CTBN.py +573 -0
- pyagrum/ctbn/CTBNGenerator.py +216 -0
- pyagrum/ctbn/CTBNInference.py +459 -0
- pyagrum/ctbn/CTBNLearner.py +161 -0
- pyagrum/ctbn/SamplesStats.py +671 -0
- pyagrum/ctbn/StatsIndepTest.py +355 -0
- pyagrum/ctbn/__init__.py +79 -0
- pyagrum/ctbn/constants.py +54 -0
- pyagrum/ctbn/notebook.py +264 -0
- pyagrum/defaults.ini +199 -0
- pyagrum/deprecated.py +95 -0
- pyagrum/explain/_ComputationCausal.py +75 -0
- pyagrum/explain/_ComputationConditional.py +48 -0
- pyagrum/explain/_ComputationMarginal.py +48 -0
- pyagrum/explain/_CustomShapleyCache.py +110 -0
- pyagrum/explain/_Explainer.py +176 -0
- pyagrum/explain/_Explanation.py +70 -0
- pyagrum/explain/_FIFOCache.py +54 -0
- pyagrum/explain/_ShallCausalValues.py +204 -0
- pyagrum/explain/_ShallConditionalValues.py +155 -0
- pyagrum/explain/_ShallMarginalValues.py +155 -0
- pyagrum/explain/_ShallValues.py +296 -0
- pyagrum/explain/_ShapCausalValues.py +208 -0
- pyagrum/explain/_ShapConditionalValues.py +126 -0
- pyagrum/explain/_ShapMarginalValues.py +191 -0
- pyagrum/explain/_ShapleyValues.py +298 -0
- pyagrum/explain/__init__.py +81 -0
- pyagrum/explain/_explGeneralizedMarkovBlanket.py +152 -0
- pyagrum/explain/_explIndependenceListForPairs.py +146 -0
- pyagrum/explain/_explInformationGraph.py +264 -0
- pyagrum/explain/notebook/__init__.py +54 -0
- pyagrum/explain/notebook/_bar.py +142 -0
- pyagrum/explain/notebook/_beeswarm.py +174 -0
- pyagrum/explain/notebook/_showShapValues.py +97 -0
- pyagrum/explain/notebook/_waterfall.py +220 -0
- pyagrum/explain/shapley.py +225 -0
- pyagrum/lib/__init__.py +46 -0
- pyagrum/lib/_colors.py +390 -0
- pyagrum/lib/bn2graph.py +299 -0
- pyagrum/lib/bn2roc.py +1026 -0
- pyagrum/lib/bn2scores.py +217 -0
- pyagrum/lib/bn_vs_bn.py +605 -0
- pyagrum/lib/cn2graph.py +305 -0
- pyagrum/lib/discreteTypeProcessor.py +1102 -0
- pyagrum/lib/discretizer.py +58 -0
- pyagrum/lib/dynamicBN.py +390 -0
- pyagrum/lib/explain.py +57 -0
- pyagrum/lib/export.py +84 -0
- pyagrum/lib/id2graph.py +258 -0
- pyagrum/lib/image.py +387 -0
- pyagrum/lib/ipython.py +307 -0
- pyagrum/lib/mrf2graph.py +471 -0
- pyagrum/lib/notebook.py +1821 -0
- pyagrum/lib/proba_histogram.py +552 -0
- pyagrum/lib/utils.py +138 -0
- pyagrum/pyagrum.py +31495 -0
- pyagrum/skbn/_MBCalcul.py +242 -0
- pyagrum/skbn/__init__.py +49 -0
- pyagrum/skbn/_learningMethods.py +282 -0
- pyagrum/skbn/_utils.py +297 -0
- pyagrum/skbn/bnclassifier.py +1014 -0
- pyagrum_nightly-2.3.1.9.dev202512261765915415.dist-info/LICENSE.md +12 -0
- pyagrum_nightly-2.3.1.9.dev202512261765915415.dist-info/LICENSES/LGPL-3.0-or-later.txt +304 -0
- pyagrum_nightly-2.3.1.9.dev202512261765915415.dist-info/LICENSES/MIT.txt +18 -0
- pyagrum_nightly-2.3.1.9.dev202512261765915415.dist-info/METADATA +145 -0
- pyagrum_nightly-2.3.1.9.dev202512261765915415.dist-info/RECORD +107 -0
- pyagrum_nightly-2.3.1.9.dev202512261765915415.dist-info/WHEEL +4 -0
pyagrum/skbn/_utils.py
ADDED
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
############################################################################
|
|
2
|
+
# This file is part of the aGrUM/pyAgrum library. #
|
|
3
|
+
# #
|
|
4
|
+
# Copyright (c) 2005-2025 by #
|
|
5
|
+
# - Pierre-Henri WUILLEMIN(_at_LIP6) #
|
|
6
|
+
# - Christophe GONZALES(_at_AMU) #
|
|
7
|
+
# #
|
|
8
|
+
# The aGrUM/pyAgrum library is free software; you can redistribute it #
|
|
9
|
+
# and/or modify it under the terms of either : #
|
|
10
|
+
# #
|
|
11
|
+
# - the GNU Lesser General Public License as published by #
|
|
12
|
+
# the Free Software Foundation, either version 3 of the License, #
|
|
13
|
+
# or (at your option) any later version, #
|
|
14
|
+
# - the MIT license (MIT), #
|
|
15
|
+
# - or both in dual license, as here. #
|
|
16
|
+
# #
|
|
17
|
+
# (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) #
|
|
18
|
+
# #
|
|
19
|
+
# This aGrUM/pyAgrum library is distributed in the hope that it will be #
|
|
20
|
+
# useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, #
|
|
21
|
+
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS #
|
|
22
|
+
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE #
|
|
23
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER #
|
|
24
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, #
|
|
25
|
+
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR #
|
|
26
|
+
# OTHER DEALINGS IN THE SOFTWARE. #
|
|
27
|
+
# #
|
|
28
|
+
# See LICENCES for more details. #
|
|
29
|
+
# #
|
|
30
|
+
# SPDX-FileCopyrightText: Copyright 2005-2025 #
|
|
31
|
+
# - Pierre-Henri WUILLEMIN(_at_LIP6) #
|
|
32
|
+
# - Christophe GONZALES(_at_AMU) #
|
|
33
|
+
# SPDX-License-Identifier: LGPL-3.0-or-later OR MIT #
|
|
34
|
+
# #
|
|
35
|
+
# Contact : info_at_agrum_dot_org #
|
|
36
|
+
# homepage : http://agrum.gitlab.io #
|
|
37
|
+
# gitlab : https://gitlab.com/agrumery/agrum #
|
|
38
|
+
# #
|
|
39
|
+
############################################################################
|
|
40
|
+
|
|
41
|
+
from typing import Any
|
|
42
|
+
|
|
43
|
+
import pandas
|
|
44
|
+
import sklearn
|
|
45
|
+
import pandas as pd
|
|
46
|
+
|
|
47
|
+
import pyagrum
|
|
48
|
+
import pyagrum.lib.bn2roc as bn2roc
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _CalculateThreshold(
|
|
52
|
+
bn: pyagrum.BayesNet, targetName: str, csvfilename: str, usePR: bool, beta: float, significant_digits: int
|
|
53
|
+
):
|
|
54
|
+
"""
|
|
55
|
+
The Bayesian network gives us the probability of the target knowing the values of the other variables.
|
|
56
|
+
The value above which the probability needs to be for the input to be classified as that class is called the threshold.
|
|
57
|
+
This method calculates the optimal threshold using the roc or precision-recall curve.
|
|
58
|
+
|
|
59
|
+
Parameters
|
|
60
|
+
----------
|
|
61
|
+
bn: gum.BayesNet
|
|
62
|
+
Bayesian network to work on
|
|
63
|
+
targetName: str
|
|
64
|
+
Name of the target
|
|
65
|
+
csvfilename: str
|
|
66
|
+
Name of the csv file
|
|
67
|
+
usePR: bool
|
|
68
|
+
indicates if the threshold to choose is Prevision-Recall curve's threhsold or ROC's threshold by default.
|
|
69
|
+
ROC curves should be used when there are roughly equal numbers of observations for each class.
|
|
70
|
+
Precision-Recall curves should be used when there is a moderate to large class imbalance especially for the target's class.
|
|
71
|
+
beta: float
|
|
72
|
+
the beta value to use when computing the F score. Only used when usePR is True.
|
|
73
|
+
significant_digits: int
|
|
74
|
+
number of significant digits when computing probabilities
|
|
75
|
+
Returns
|
|
76
|
+
-------
|
|
77
|
+
float
|
|
78
|
+
optimal threshold for predictions
|
|
79
|
+
"""
|
|
80
|
+
target = bn.variableFromName(targetName)
|
|
81
|
+
|
|
82
|
+
if usePR:
|
|
83
|
+
_, _, _, threshold = bn2roc.showROC_PR(
|
|
84
|
+
bn,
|
|
85
|
+
csvfilename,
|
|
86
|
+
targetName,
|
|
87
|
+
target.labels()[1],
|
|
88
|
+
beta=beta,
|
|
89
|
+
show_fig=False,
|
|
90
|
+
show_ROC=False,
|
|
91
|
+
show_PR=False,
|
|
92
|
+
significant_digits=significant_digits,
|
|
93
|
+
show_progress=False,
|
|
94
|
+
)
|
|
95
|
+
else:
|
|
96
|
+
_, threshold, _, _ = bn2roc.showROC_PR(
|
|
97
|
+
bn,
|
|
98
|
+
csvfilename,
|
|
99
|
+
targetName,
|
|
100
|
+
target.labels()[1],
|
|
101
|
+
beta=beta,
|
|
102
|
+
show_fig=False,
|
|
103
|
+
show_ROC=False,
|
|
104
|
+
show_PR=False,
|
|
105
|
+
significant_digits=significant_digits,
|
|
106
|
+
show_progress=False,
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
return threshold
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _ImplementScore(scoringType: str, learner: pyagrum.BNLearner):
|
|
113
|
+
"""
|
|
114
|
+
Tells the Bayesian network which scoring type to use.
|
|
115
|
+
|
|
116
|
+
Parameters
|
|
117
|
+
----------
|
|
118
|
+
scoringType: str
|
|
119
|
+
A string designating the scoring we want to use. Since scoring is used
|
|
120
|
+
while constructing the network and not when learning its Parameters,
|
|
121
|
+
the scoring will be ignored if using a learning algorithm
|
|
122
|
+
with a fixed network structure such as Chow-Liu, TAN or NaiveBayes.
|
|
123
|
+
possible values are: AIC, BIC, BD, BDeu, K2, Log2
|
|
124
|
+
AIC means Akaike information criterion
|
|
125
|
+
BIC means Bayesian Information criterion
|
|
126
|
+
BD means Bayesian-Dirichlet scoring
|
|
127
|
+
BDeu means Bayesian-Dirichlet equivalent uniform
|
|
128
|
+
Log2 means log2 likelihood ratio test
|
|
129
|
+
learner:
|
|
130
|
+
learner object from pyagrum to apply the score
|
|
131
|
+
"""
|
|
132
|
+
if scoringType is None:
|
|
133
|
+
return
|
|
134
|
+
elif scoringType == "AIC":
|
|
135
|
+
learner.useScoreAIC()
|
|
136
|
+
elif scoringType == "BD":
|
|
137
|
+
learner.useScoreBD()
|
|
138
|
+
elif scoringType == "BDeu": # default
|
|
139
|
+
learner.useScoreBDeu()
|
|
140
|
+
elif scoringType == "BIC":
|
|
141
|
+
learner.useScoreBIC()
|
|
142
|
+
elif scoringType == "K2":
|
|
143
|
+
learner.useScoreK2()
|
|
144
|
+
elif scoringType == "Log2":
|
|
145
|
+
learner.useScoreLog2Likelihood()
|
|
146
|
+
else:
|
|
147
|
+
raise ValueError(
|
|
148
|
+
"Invalid scoringType! Possible values are : \
|
|
149
|
+
AIC , BD , BDeu, BIC , K2 and Log2"
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _ImplementPrior(prior: str, learner: pyagrum.BNLearner, priorWeight: float, DirichletSrc: str):
|
|
154
|
+
"""
|
|
155
|
+
Tells the Bayesian network which prior to use
|
|
156
|
+
|
|
157
|
+
Parameters
|
|
158
|
+
----------
|
|
159
|
+
prior: str
|
|
160
|
+
A string designating the type of prior we want to use.
|
|
161
|
+
Possible values are Smoothing, BDeu , Dirichlet and NoPrior.
|
|
162
|
+
Note: if using Dirichlet smoothing DirichletCsv cannot be set to none
|
|
163
|
+
learner:
|
|
164
|
+
learner object from pyagrum to apply the score
|
|
165
|
+
priorWeight: float
|
|
166
|
+
The weight used for the prior.
|
|
167
|
+
DirichletSrc: str
|
|
168
|
+
the file name of the csv file we want to use for the dirichlet prior.
|
|
169
|
+
Will be ignored if prior is not set to Dirichlet.
|
|
170
|
+
"""
|
|
171
|
+
if prior == "Smoothing":
|
|
172
|
+
learner.useSmoothingPrior(priorWeight)
|
|
173
|
+
elif prior == "Dirichlet":
|
|
174
|
+
if DirichletSrc is None:
|
|
175
|
+
raise ValueError("A source (csv or model) must be specified for dirichlet prior")
|
|
176
|
+
learner.useDirichletPrior(DirichletSrc, priorWeight)
|
|
177
|
+
elif prior == "BDeu":
|
|
178
|
+
learner.useBDeuPrior(priorWeight)
|
|
179
|
+
elif prior == "NoPrior":
|
|
180
|
+
learner.useNoPrior()
|
|
181
|
+
elif prior is None: # default : (small) Laplace's adjustment
|
|
182
|
+
learner.useSmoothingPrior(0.01)
|
|
183
|
+
else:
|
|
184
|
+
raise ValueError("Invalid prior! Possible values are : Smoothing , Dirichlet , BDeu and NoPrior")
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def _ImplementConstraints(constraints: dict[str, Any], learner: pyagrum.BNLearner):
|
|
188
|
+
"""
|
|
189
|
+
Tells the Bayesian network which constraints should be put on the structure of the network.
|
|
190
|
+
More details on the nature of these constraints can be found in the documentation for the constructor of this class
|
|
191
|
+
|
|
192
|
+
Parameters
|
|
193
|
+
----------
|
|
194
|
+
constraints: dict()
|
|
195
|
+
A dictionary designating the constraints that we want to put on the structure of the Bayesian network.
|
|
196
|
+
Ignored if using a learning algorithm where the structure is fixed, such as TAN or NaiveBayes.
|
|
197
|
+
The keys of the dictionary should be the strings "PossibleEdges", "MandatoryArcs" and "ForbiddenArcs".
|
|
198
|
+
The format of the values should be a tuple of strings (tail,head) which designates the string arc from tail to
|
|
199
|
+
head. For example, if we put the value ("x0"."y") in MandatoryArcs, the network will surely have an arc
|
|
200
|
+
going from x0 to y.
|
|
201
|
+
Note: PossibleEdge between nodes x and y allows for either (x,y) or (y,x) (or none of them) to be added to
|
|
202
|
+
the Bayesian network, while the others are not symmetric.
|
|
203
|
+
learner:
|
|
204
|
+
learner object from pyagrum to apply the score
|
|
205
|
+
"""
|
|
206
|
+
if constraints is None: # default
|
|
207
|
+
return
|
|
208
|
+
if type(constraints) is not dict:
|
|
209
|
+
raise ValueError("Invalid syntax for constraints. Constraints should be passed as a dictionary")
|
|
210
|
+
for key in constraints:
|
|
211
|
+
if key == "MandatoryArcs":
|
|
212
|
+
for tail, head in constraints[key]:
|
|
213
|
+
learner.addMandatoryArc(tail, head)
|
|
214
|
+
elif key == "ForbiddenArcs":
|
|
215
|
+
for tail, head in constraints[key]:
|
|
216
|
+
learner.addForbiddenArc(tail, head)
|
|
217
|
+
elif key == "PossibleEdges":
|
|
218
|
+
for tail, head in constraints[key]:
|
|
219
|
+
learner.addPossibleEdge(tail, head)
|
|
220
|
+
else:
|
|
221
|
+
raise ValueError(
|
|
222
|
+
"Invalid syntax: the only keys in the constraints dictionary should be \
|
|
223
|
+
MandatoryArcs, PossibleEdges and ForbiddenArcs"
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def _DFNames(X: pandas.DataFrame):
|
|
228
|
+
"""
|
|
229
|
+
Return a dictionary of variable's name and his index from a DataFrame
|
|
230
|
+
|
|
231
|
+
Parameters
|
|
232
|
+
----------
|
|
233
|
+
X: pandas.DataFrame
|
|
234
|
+
DataFrame to read
|
|
235
|
+
|
|
236
|
+
Returns
|
|
237
|
+
-------
|
|
238
|
+
Dict[str,int]
|
|
239
|
+
Dictionary of variable's name and his index
|
|
240
|
+
"""
|
|
241
|
+
res = dict()
|
|
242
|
+
i = 0
|
|
243
|
+
for var in X.columns.tolist():
|
|
244
|
+
res[var] = i
|
|
245
|
+
i = i + 1
|
|
246
|
+
|
|
247
|
+
return res
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def _listIdtoName(bn: pyagrum.BayesNet, liste: list[int]):
|
|
251
|
+
"""
|
|
252
|
+
Return a list of names of the variable which have their id in list.
|
|
253
|
+
|
|
254
|
+
Parameters
|
|
255
|
+
----------
|
|
256
|
+
bn: gum.BayesNet
|
|
257
|
+
Bayesian network to work on
|
|
258
|
+
liste: list[int]
|
|
259
|
+
List of id
|
|
260
|
+
|
|
261
|
+
Returns
|
|
262
|
+
-------
|
|
263
|
+
List[str]
|
|
264
|
+
List of names
|
|
265
|
+
"""
|
|
266
|
+
return [bn.variable(i).name() for i in liste]
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def _createCSVfromNDArrays(X, y, target: str, variableNameIndexDictionary: dict[str, int], csvfilename: str):
|
|
270
|
+
"""
|
|
271
|
+
Creates a csv file from the matrices passed as Parameters.
|
|
272
|
+
csvfilename is used by the fit function to learn the network structure and its Parameters
|
|
273
|
+
|
|
274
|
+
Parameters
|
|
275
|
+
----------
|
|
276
|
+
X: {array-like, sparse matrix} of shape (n_samples, n_features)
|
|
277
|
+
training data
|
|
278
|
+
y: array-like of shape (n_samples)
|
|
279
|
+
Target values
|
|
280
|
+
target: str
|
|
281
|
+
Name of the target
|
|
282
|
+
variableNameIndexDictionary: dict[str , int]
|
|
283
|
+
dictionnary of the csvfilename of a variable and his column in the data base
|
|
284
|
+
csvfilename: str
|
|
285
|
+
csv's title
|
|
286
|
+
"""
|
|
287
|
+
|
|
288
|
+
# verifies if the shape of
|
|
289
|
+
X, y = sklearn.utils.check_X_y(X, y, dtype=None, accept_sparse=True)
|
|
290
|
+
y = pd.DataFrame(y, columns=[target])
|
|
291
|
+
variableList = [k for k, v in sorted(variableNameIndexDictionary.items(), key=(lambda item: item[1]), reverse=False)]
|
|
292
|
+
X = pd.DataFrame(X, columns=variableList)
|
|
293
|
+
|
|
294
|
+
# We construct the list of variable names.
|
|
295
|
+
# This will serve as the first line in the csv file since it is needed by pyAgrum to function properly
|
|
296
|
+
training_file = pd.concat([y, X], axis=1)
|
|
297
|
+
training_file.to_csv(csvfilename, index=False)
|