compiled-knowledge 4.0.0a25__cp312-cp312-win32.whl → 4.1.0__cp312-cp312-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of compiled-knowledge might be problematic. Click here for more details.
- ck/circuit/_circuit_cy.c +1 -1
- ck/circuit/_circuit_cy.cp312-win32.pyd +0 -0
- ck/circuit_compiler/cython_vm_compiler/_compiler.c +152 -152
- ck/circuit_compiler/cython_vm_compiler/_compiler.cp312-win32.pyd +0 -0
- ck/circuit_compiler/interpret_compiler.py +2 -2
- ck/circuit_compiler/llvm_compiler.py +4 -4
- ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.c +1 -1
- ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.cp312-win32.pyd +0 -0
- ck/circuit_compiler/support/input_vars.py +4 -4
- ck/dataset/__init__.py +1 -0
- ck/dataset/cross_table.py +334 -0
- ck/dataset/dataset.py +682 -0
- ck/dataset/dataset_builder.py +519 -0
- ck/dataset/dataset_compute.py +140 -0
- ck/dataset/dataset_from_crosstable.py +64 -0
- ck/dataset/dataset_from_csv.py +151 -0
- ck/dataset/sampled_dataset.py +96 -0
- ck/learning/__init__.py +0 -0
- ck/learning/coalesce_cross_tables.py +403 -0
- ck/learning/model_from_cross_tables.py +296 -0
- ck/learning/parameters.py +117 -0
- ck/learning/train_generative_bn.py +198 -0
- ck/pgm.py +39 -35
- ck/pgm_circuit/marginals_program.py +5 -0
- ck/pgm_circuit/program_with_slotmap.py +23 -45
- ck/pgm_circuit/support/compile_circuit.py +2 -4
- ck/pgm_circuit/wmc_program.py +5 -0
- ck/pgm_compiler/support/circuit_table/_circuit_table_cy.c +1 -1
- ck/pgm_compiler/support/circuit_table/_circuit_table_cy.cp312-win32.pyd +0 -0
- ck/probability/cross_table_probability_space.py +53 -0
- ck/probability/divergence.py +226 -0
- ck/probability/empirical_probability_space.py +1 -0
- ck/probability/probability_space.py +43 -19
- ck_demos/dataset/__init__.py +0 -0
- ck_demos/dataset/demo_dataset_builder.py +37 -0
- ck_demos/dataset/demo_dataset_from_sampler.py +18 -0
- ck_demos/learning/__init__.py +0 -0
- ck_demos/learning/demo_bayesian_network_from_cross_tables.py +70 -0
- ck_demos/learning/demo_simple_learning.py +55 -0
- ck_demos/sampling/demo_wmc_direct_sampler.py +2 -2
- {compiled_knowledge-4.0.0a25.dist-info → compiled_knowledge-4.1.0.dist-info}/METADATA +2 -1
- {compiled_knowledge-4.0.0a25.dist-info → compiled_knowledge-4.1.0.dist-info}/RECORD +45 -24
- {compiled_knowledge-4.0.0a25.dist-info → compiled_knowledge-4.1.0.dist-info}/WHEEL +0 -0
- {compiled_knowledge-4.0.0a25.dist-info → compiled_knowledge-4.1.0.dist-info}/licenses/LICENSE.txt +0 -0
- {compiled_knowledge-4.0.0a25.dist-info → compiled_knowledge-4.1.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""
|
|
2
|
+
General functions for setting the parameter values of a PGM.
|
|
3
|
+
"""
|
|
4
|
+
from typing import List, Tuple, TypeAlias
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
from ck.dataset.cross_table import CrossTable
|
|
9
|
+
from ck.pgm import PGM, CPTPotentialFunction, Instance, SparsePotentialFunction, DensePotentialFunction, Factor
|
|
10
|
+
from ck.utils.map_list import MapList
|
|
11
|
+
from ck.utils.np_extras import NDArrayFloat64
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
ParameterValues: TypeAlias = List[CrossTable]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def make_factors(pgm: PGM, parameter_values: List[CrossTable]) -> None:
|
|
18
|
+
for factor in parameter_values:
|
|
19
|
+
pgm.new_factor(*factor.rvs)
|
|
20
|
+
set_potential_functions(pgm, parameter_values)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def set_potential_functions(pgm: PGM, parameter_values: List[CrossTable]) -> None:
|
|
24
|
+
"""
|
|
25
|
+
Set the potential function of each PGM factor to one heuristically chosen,
|
|
26
|
+
using the given parameter values. Then set the parameter values of the potential
|
|
27
|
+
function to those given by `parameter_values`.
|
|
28
|
+
|
|
29
|
+
This function modifies `pgm` in-place, iteratively calling `set_potential_function`.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
pgm (PGM): the PGM to have its potential functions set.
|
|
33
|
+
parameter_values: the parameter values,
|
|
34
|
+
"""
|
|
35
|
+
for factor, factor_parameter_values in zip(pgm.factors, parameter_values):
|
|
36
|
+
set_potential_function(factor, factor_parameter_values)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def set_potential_function(factor: Factor, parameter_values: CrossTable) -> None:
|
|
40
|
+
"""
|
|
41
|
+
Set the potential function of the given factor to one heuristically chosen,
|
|
42
|
+
using the given parameter values. Then set the parameter values of the potential
|
|
43
|
+
function to those given by `parameter_values`.
|
|
44
|
+
|
|
45
|
+
The potential function will be either a ZeroPotentialFunction, DensePotentialFunction,
|
|
46
|
+
or SparsePotentialFunction.
|
|
47
|
+
|
|
48
|
+
This function modifies `factor` in-place.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
factor: The factor to update.
|
|
52
|
+
parameter_values: the parameter values,
|
|
53
|
+
"""
|
|
54
|
+
number_of_parameters: int = len(parameter_values)
|
|
55
|
+
if number_of_parameters == 0:
|
|
56
|
+
factor.set_zero()
|
|
57
|
+
else:
|
|
58
|
+
if number_of_parameters < 100 or number_of_parameters > factor.number_of_states * 0.9:
|
|
59
|
+
pot_function: DensePotentialFunction = factor.set_dense()
|
|
60
|
+
else:
|
|
61
|
+
pot_function: SparsePotentialFunction = factor.set_sparse()
|
|
62
|
+
for instance, weight in parameter_values.items():
|
|
63
|
+
pot_function[instance] = weight
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def set_zero(pgm: PGM) -> None:
|
|
67
|
+
"""
|
|
68
|
+
Set the potential function of each PGM factor to zero.
|
|
69
|
+
"""
|
|
70
|
+
for factor in pgm.factors:
|
|
71
|
+
factor.set_zero()
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def set_dense(pgm: PGM, parameter_values: List[CrossTable]) -> None:
|
|
75
|
+
"""
|
|
76
|
+
Set the potential function of each PGM factor to a DensePotentialFunction,
|
|
77
|
+
using the given parameter values.
|
|
78
|
+
"""
|
|
79
|
+
for factor, cpt in zip(pgm.factors, parameter_values):
|
|
80
|
+
pot_function: DensePotentialFunction = factor.set_dense()
|
|
81
|
+
for instance, weight in cpt.items():
|
|
82
|
+
pot_function[instance] = weight
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def set_sparse(pgm: PGM, parameter_values: List[CrossTable]) -> None:
|
|
86
|
+
"""
|
|
87
|
+
Set the potential function of each PGM factor to a SparsePotentialFunction,
|
|
88
|
+
using the given parameter values.
|
|
89
|
+
"""
|
|
90
|
+
for factor, cpt in zip(pgm.factors, parameter_values):
|
|
91
|
+
pot_function: SparsePotentialFunction = factor.set_sparse()
|
|
92
|
+
for instance, weight in cpt.items():
|
|
93
|
+
pot_function[instance] = weight
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def set_cpt(pgm: PGM, parameter_values: List[CrossTable], normalise_cpds: bool = True) -> None:
|
|
97
|
+
"""
|
|
98
|
+
Set the potential function of each PGM factor to a CPTPotentialFunction,
|
|
99
|
+
using the given parameter values.
|
|
100
|
+
"""
|
|
101
|
+
for factor, cpt in zip(pgm.factors, parameter_values):
|
|
102
|
+
pot_function: CPTPotentialFunction = factor.set_cpt()
|
|
103
|
+
|
|
104
|
+
# Group cpt values by parent instance
|
|
105
|
+
cpds: MapList[Instance, Tuple[int, float]] = MapList()
|
|
106
|
+
for instance, weight in cpt.items():
|
|
107
|
+
cpds.append(instance[1:], (instance[0], weight))
|
|
108
|
+
|
|
109
|
+
# Set the CPDs
|
|
110
|
+
cpd_size = len(cpt.rvs[0]) # size of the child random variable
|
|
111
|
+
for parent_instance, cpd in cpds.items():
|
|
112
|
+
cpd_array: NDArrayFloat64 = np.zeros(cpd_size, dtype=np.float64)
|
|
113
|
+
for child_state_index, weight in cpd:
|
|
114
|
+
cpd_array[child_state_index] = weight
|
|
115
|
+
if normalise_cpds:
|
|
116
|
+
cpd_array /= cpd_array.sum()
|
|
117
|
+
pot_function.set_cpd(parent_instance, cpd_array)
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import List, Mapping, Tuple
|
|
4
|
+
|
|
5
|
+
from ck.dataset import SoftDataset, HardDataset
|
|
6
|
+
from ck.dataset.cross_table import CrossTable, cross_table_from_dataset
|
|
7
|
+
from ck.learning.parameters import set_potential_functions, ParameterValues
|
|
8
|
+
from ck.pgm import PGM
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def train_generative_bn(
|
|
12
|
+
pgm: PGM,
|
|
13
|
+
dataset: HardDataset | SoftDataset,
|
|
14
|
+
*,
|
|
15
|
+
dirichlet_prior: float | Mapping[int, float | CrossTable] = 0,
|
|
16
|
+
check_bayesian_network: bool = True,
|
|
17
|
+
) -> None:
|
|
18
|
+
"""
|
|
19
|
+
Maximum-likelihood, generative training for a Bayesian network.
|
|
20
|
+
|
|
21
|
+
The potential function of the given PGM will be set to new potential functions
|
|
22
|
+
with the learned parameter values.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
pgm: the probabilistic graphical model defining the model structure.
|
|
26
|
+
Potential function values are ignored and need not be set.
|
|
27
|
+
dataset: a dataset of random variable states.
|
|
28
|
+
dirichlet_prior: provides a Dirichlet prior for each factor in `pgm`.
|
|
29
|
+
This can be represented in multiple ways:
|
|
30
|
+
(a) as a uniform prior that is the same for all factors, represented as a float value,
|
|
31
|
+
(b) as a mapping from a factor index to a uniform prior, i.e., a float value,
|
|
32
|
+
(c) as a mapping from a factor index to an arbitrary Dirichlet prior, i.e., a cross-table.
|
|
33
|
+
If there is no entry in the mapping for a factor, then the value 0 will be used for that factor.
|
|
34
|
+
If a cross-table is provided as a prior, then it must have the same random variables as
|
|
35
|
+
the factor it pertains to.
|
|
36
|
+
The default value for `dirichlet_prior` is 0.
|
|
37
|
+
See `CrossTable` for more explanation.
|
|
38
|
+
check_bayesian_network: if true and not `pgm.is_structure_bayesian` an exception will be raised.
|
|
39
|
+
|
|
40
|
+
Raises:
|
|
41
|
+
ValueError: if the given PGM does not have a Bayesian network structure, and check_bayesian_network is True.
|
|
42
|
+
"""
|
|
43
|
+
if check_bayesian_network and not pgm.is_structure_bayesian:
|
|
44
|
+
raise ValueError('the given PGM is not a Bayesian network')
|
|
45
|
+
cpts: List[CrossTable] = get_cpts(
|
|
46
|
+
pgm=pgm,
|
|
47
|
+
dataset=dataset,
|
|
48
|
+
dirichlet_prior=dirichlet_prior,
|
|
49
|
+
)
|
|
50
|
+
set_potential_functions(pgm, cpts)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def get_cpts(
|
|
54
|
+
pgm: PGM,
|
|
55
|
+
dataset: HardDataset | SoftDataset,
|
|
56
|
+
*,
|
|
57
|
+
dirichlet_prior: float | Mapping[int, float | CrossTable] = 0,
|
|
58
|
+
) -> ParameterValues:
|
|
59
|
+
"""
|
|
60
|
+
This function applies `cpt_from_crosstab` to each cross-table from `get_factor_cross_tables`.
|
|
61
|
+
The resulting parameter values are CPTs that can be used directly to update the parameters
|
|
62
|
+
of the given PGM, so long as it has a Bayesian network structure.
|
|
63
|
+
|
|
64
|
+
To update the given PGM from the resulting `cpts` use `set_potential_functions(pgm, cpts)`.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
pgm: the probabilistic graphical model defining the model structure.
|
|
68
|
+
Potential function values are ignored and need not be set.
|
|
69
|
+
dataset: a dataset of random variable states.
|
|
70
|
+
dirichlet_prior: provides a Dirichlet prior for each factor in `pgm`.
|
|
71
|
+
This can be represented in multiple ways:
|
|
72
|
+
(a) as a uniform prior that is the same for all factors, represented as a float value,
|
|
73
|
+
(b) as a mapping from a factor index to a uniform prior, i.e., a float value,
|
|
74
|
+
(c) as a mapping from a factor index to an arbitrary Dirichlet prior, i.e., a cross-table.
|
|
75
|
+
If there is no entry in the mapping for a factor, then the value 0 will be used for that factor.
|
|
76
|
+
If a cross-table is provided as a prior, then it must have the same random variables as
|
|
77
|
+
the factor it pertains to.
|
|
78
|
+
The default value for `dirichlet_prior` is 0.
|
|
79
|
+
See `CrossTable` for more explanation.
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
ParameterValues object, a CPT for each factor in the given PGM, as a list of cross-tables, co-indexed
|
|
83
|
+
with the PGM factors.
|
|
84
|
+
"""
|
|
85
|
+
cross_tables: List[CrossTable] = get_factor_cross_tables(
|
|
86
|
+
pgm=pgm,
|
|
87
|
+
dataset=dataset,
|
|
88
|
+
dirichlet_prior=dirichlet_prior,
|
|
89
|
+
)
|
|
90
|
+
cpts: List[CrossTable] = list(map(cpt_from_crosstab, cross_tables))
|
|
91
|
+
return cpts
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def get_factor_cross_tables(
|
|
95
|
+
pgm: PGM,
|
|
96
|
+
dataset: HardDataset | SoftDataset,
|
|
97
|
+
*,
|
|
98
|
+
dirichlet_prior: float | Mapping[int, float | CrossTable] = 0,
|
|
99
|
+
) -> ParameterValues:
|
|
100
|
+
"""
|
|
101
|
+
Compute a cross-table for each factor of the given PGM, using the data from
|
|
102
|
+
the given dataset.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
pgm: the probabilistic graphical model defining the model structure.
|
|
106
|
+
Potential function values are ignored and need not be set.
|
|
107
|
+
dataset: a dataset of random variable states.
|
|
108
|
+
dirichlet_prior: provides a Dirichlet prior for each factor in `pgm`.
|
|
109
|
+
This can be represented in multiple ways:
|
|
110
|
+
(a) as a uniform prior that is the same for all factors, represented as a float value,
|
|
111
|
+
(b) as a mapping from a factor index to a uniform prior, i.e., a float value,
|
|
112
|
+
(c) as a mapping from a factor index to an arbitrary Dirichlet prior, i.e., a cross-table.
|
|
113
|
+
If there is no entry in the mapping for a factor, then the value 0 will be used for that factor.
|
|
114
|
+
If a cross-table is provided as a prior, then it must have the same random variables as
|
|
115
|
+
the factor it pertains to.
|
|
116
|
+
The default value for `dirichlet_prior` is 0.
|
|
117
|
+
See `CrossTable` for more explanation.
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
ParameterValues object, a crosstable for each factor in the given PGM, as
|
|
121
|
+
per `cross_table_from_dataset`.
|
|
122
|
+
|
|
123
|
+
Assumes:
|
|
124
|
+
every random variable of the PGM is in the dataset.
|
|
125
|
+
"""
|
|
126
|
+
factor_dict: Mapping[int, float | CrossTable]
|
|
127
|
+
default_prior: float
|
|
128
|
+
if isinstance(dirichlet_prior, (float, int)):
|
|
129
|
+
factor_dict = {}
|
|
130
|
+
default_prior = dirichlet_prior
|
|
131
|
+
else:
|
|
132
|
+
factor_dict = dirichlet_prior
|
|
133
|
+
default_prior = 0
|
|
134
|
+
|
|
135
|
+
cross_tables: List[CrossTable] = [
|
|
136
|
+
cross_table_from_dataset(
|
|
137
|
+
dataset,
|
|
138
|
+
factor.rvs,
|
|
139
|
+
dirichlet_prior=factor_dict.get(factor.idx, default_prior),
|
|
140
|
+
)
|
|
141
|
+
for factor in pgm.factors
|
|
142
|
+
]
|
|
143
|
+
return cross_tables
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def cpt_from_crosstab(crosstab: CrossTable) -> CrossTable:
|
|
147
|
+
"""
|
|
148
|
+
Convert the given cross-table to a conditional probability table (CPT),
|
|
149
|
+
assuming the first random variable of the cross-table is the child
|
|
150
|
+
and remaining random variables are the parents.
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
crosstab: a CrossTable representing the weight of unique instances.
|
|
154
|
+
|
|
155
|
+
Returns:
|
|
156
|
+
A cross-table that is a conditional probability table.
|
|
157
|
+
|
|
158
|
+
Assumes:
|
|
159
|
+
the first random variable in `crosstab.rvs` is the child random variable.
|
|
160
|
+
"""
|
|
161
|
+
return cpt_and_parent_sums_from_crosstab(crosstab)[0]
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def cpt_and_parent_sums_from_crosstab(crosstab: CrossTable) -> Tuple[CrossTable, CrossTable]:
|
|
165
|
+
"""
|
|
166
|
+
Convert the given cross-table to a conditional probability table (CPT),
|
|
167
|
+
assuming the first random variable of the cross-table is the child
|
|
168
|
+
and remaining random variables are the parents.
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
crosstab: a CrossTable representing the weight of unique instances.
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
A cross-table that is a conditional probability table.
|
|
175
|
+
A cross-table of the parent sums that were divided out of `crosstab`
|
|
176
|
+
|
|
177
|
+
Assumes:
|
|
178
|
+
the first random variable in `crosstab.rvs` is the child random variable.
|
|
179
|
+
"""
|
|
180
|
+
# Get the sum of weights for parent states
|
|
181
|
+
parent_sums: CrossTable = CrossTable(
|
|
182
|
+
rvs=crosstab.rvs[1:],
|
|
183
|
+
update=(
|
|
184
|
+
(instance[1:], weight)
|
|
185
|
+
for instance, weight in crosstab.items()
|
|
186
|
+
)
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
# Construct the normalised cross-tables, i.e., the CPTs.
|
|
190
|
+
cpt = CrossTable(
|
|
191
|
+
rvs=crosstab.rvs,
|
|
192
|
+
update=(
|
|
193
|
+
(instance, weight / parent_sums[instance[1:]])
|
|
194
|
+
for instance, weight in crosstab.items()
|
|
195
|
+
)
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
return cpt, parent_sums
|
ck/pgm.py
CHANGED
|
@@ -19,16 +19,18 @@ State: TypeAlias = Union[int, str, bool, float, None]
|
|
|
19
19
|
The type for a possible state of a random variable.
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
|
-
Instance: TypeAlias =
|
|
22
|
+
Instance: TypeAlias = Tuple[int, ...]
|
|
23
23
|
"""
|
|
24
|
-
An instance (of a sequence of random variables) is a
|
|
24
|
+
An instance (of a sequence of random variables) is a tuple of integers
|
|
25
25
|
that are state indexes, co-indexed with a known sequence of random variables.
|
|
26
26
|
"""
|
|
27
27
|
|
|
28
|
-
Key: TypeAlias = Union[
|
|
28
|
+
Key: TypeAlias = Union[Sequence[int], int]
|
|
29
29
|
"""
|
|
30
|
-
A key identifies an instance, either as
|
|
31
|
-
single integer
|
|
30
|
+
A key identifies an instance, either as a sequence of integers or a
|
|
31
|
+
single integer. The integers are state indexes, co-indexed with a known
|
|
32
|
+
sequence of random variables. A single integer represents an instance with
|
|
33
|
+
one dimension.
|
|
32
34
|
"""
|
|
33
35
|
|
|
34
36
|
Shape: TypeAlias = Sequence[int]
|
|
@@ -594,9 +596,11 @@ class PGM:
|
|
|
594
596
|
|
|
595
597
|
# Factors form a DAG
|
|
596
598
|
states: NDArrayUInt8 = np.zeros(self.number_of_factors, dtype=np.uint8)
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
599
|
+
if any(
|
|
600
|
+
self._has_cycle(factor, child_to_factor, states)
|
|
601
|
+
for factor in self._factors
|
|
602
|
+
):
|
|
603
|
+
return False
|
|
600
604
|
|
|
601
605
|
# All tests passed
|
|
602
606
|
return True
|
|
@@ -776,7 +780,7 @@ class PGM:
|
|
|
776
780
|
next_prefix: str = prefix + indent
|
|
777
781
|
next_next_prefix: str = next_prefix + indent
|
|
778
782
|
|
|
779
|
-
print(f'{prefix}PGM id={id(self)}
|
|
783
|
+
print(f'{prefix}PGM id={id(self)}')
|
|
780
784
|
self.dump_synopsis(prefix=next_prefix, precision=precision, max_state_digits=max_state_digits)
|
|
781
785
|
|
|
782
786
|
print(f'{prefix}random variables ({self.number_of_rvs})')
|
|
@@ -790,16 +794,16 @@ class PGM:
|
|
|
790
794
|
|
|
791
795
|
print(f'{prefix}factors ({self.number_of_factors})')
|
|
792
796
|
for factor in self.factors:
|
|
793
|
-
|
|
797
|
+
factor_rvs = ', '.join(repr(rv.name) for rv in factor.rvs)
|
|
794
798
|
if factor.is_zero:
|
|
795
|
-
function_ref = '<
|
|
799
|
+
function_ref = '<ZeroPotentialFunction>'
|
|
796
800
|
else:
|
|
797
801
|
function = factor.function
|
|
798
802
|
function_ref = f'{id(function)}: {function.__class__.__name__}'
|
|
799
803
|
|
|
800
|
-
print(f'{next_prefix}{factor.idx:>3} rvs={
|
|
804
|
+
print(f'{next_prefix}{factor.idx:>3} rvs=({factor_rvs}) function={function_ref}')
|
|
801
805
|
|
|
802
|
-
print(f'{prefix}functions ({self.
|
|
806
|
+
print(f'{prefix}functions, excluding ZeroPotentialFunction ({sum(1 for _ in self.non_zero_functions)})')
|
|
803
807
|
for function in sorted(self.non_zero_functions, key=lambda f: id(f)):
|
|
804
808
|
print(f'{next_prefix}{id(function):>13}: {function.__class__.__name__}')
|
|
805
809
|
function.dump(prefix=next_next_prefix, show_function_values=show_function_values, show_id_class=False)
|
|
@@ -1871,7 +1875,7 @@ class PotentialFunction(ABC):
|
|
|
1871
1875
|
a hypothetical parameter index assuming that every valid key has a unique parameter
|
|
1872
1876
|
as per DensePotentialFunction.
|
|
1873
1877
|
"""
|
|
1874
|
-
return
|
|
1878
|
+
return natural_key_idx(self._shape, key)
|
|
1875
1879
|
|
|
1876
1880
|
def param_id(self, param_idx: int) -> ParamId:
|
|
1877
1881
|
"""
|
|
@@ -2029,7 +2033,7 @@ class ZeroPotentialFunction(PotentialFunction):
|
|
|
2029
2033
|
return 0
|
|
2030
2034
|
|
|
2031
2035
|
def param_idx(self, key: Key) -> int:
|
|
2032
|
-
return
|
|
2036
|
+
return natural_key_idx(self._shape, key)
|
|
2033
2037
|
|
|
2034
2038
|
def is_cpt(self, tolerance=DEFAULT_CPT_TOLERANCE) -> bool:
|
|
2035
2039
|
return True
|
|
@@ -3364,26 +3368,7 @@ def rv_instances_as_indicators(*rvs: RandomVariable, flip: bool = False) -> Iter
|
|
|
3364
3368
|
return _combos(rvs, flip=not flip)
|
|
3365
3369
|
|
|
3366
3370
|
|
|
3367
|
-
def
|
|
3368
|
-
"""
|
|
3369
|
-
Convert a key to an instance.
|
|
3370
|
-
|
|
3371
|
-
Args:
|
|
3372
|
-
key: a key into a state space.
|
|
3373
|
-
|
|
3374
|
-
Returns:
|
|
3375
|
-
A instance from the state space, as a tuple of state indexes, co-indexed with the given shape.
|
|
3376
|
-
|
|
3377
|
-
Assumes:
|
|
3378
|
-
The key is valid for the implied state space.
|
|
3379
|
-
"""
|
|
3380
|
-
if isinstance(key, int):
|
|
3381
|
-
return (key,)
|
|
3382
|
-
else:
|
|
3383
|
-
return tuple(key)
|
|
3384
|
-
|
|
3385
|
-
|
|
3386
|
-
def _natural_key_idx(shape: Shape, key: Key) -> int:
|
|
3371
|
+
def natural_key_idx(shape: Shape, key: Key) -> int:
|
|
3387
3372
|
"""
|
|
3388
3373
|
What is the natural index of the given key, assuming the given shape.
|
|
3389
3374
|
|
|
@@ -3409,6 +3394,25 @@ def _natural_key_idx(shape: Shape, key: Key) -> int:
|
|
|
3409
3394
|
return result
|
|
3410
3395
|
|
|
3411
3396
|
|
|
3397
|
+
def _key_to_instance(key: Key) -> Instance:
|
|
3398
|
+
"""
|
|
3399
|
+
Convert a key to an instance.
|
|
3400
|
+
|
|
3401
|
+
Args:
|
|
3402
|
+
key: a key into a state space.
|
|
3403
|
+
|
|
3404
|
+
Returns:
|
|
3405
|
+
A instance from the state space, as a tuple of state indexes, co-indexed with the given shape.
|
|
3406
|
+
|
|
3407
|
+
Assumes:
|
|
3408
|
+
The key is valid for the implied state space.
|
|
3409
|
+
"""
|
|
3410
|
+
if isinstance(key, int):
|
|
3411
|
+
return (key,)
|
|
3412
|
+
else:
|
|
3413
|
+
return tuple(key)
|
|
3414
|
+
|
|
3415
|
+
|
|
3412
3416
|
def _zero_space(shape: Shape) -> int:
|
|
3413
3417
|
"""
|
|
3414
3418
|
Return the size of the zero space of the given shape. This is the number
|
|
@@ -308,6 +308,11 @@ class MarginalsProgram(ProgramWithSlotmap, ProbabilitySpace):
|
|
|
308
308
|
The sampler will yield state lists, where the state
|
|
309
309
|
values are co-indexed with rvs, or self.rvs if rvs is None.
|
|
310
310
|
|
|
311
|
+
For more information about this sampler, see the publication:
|
|
312
|
+
Suresh, S., Drake, B. (2025). Sampling of Large Probabilistic Graphical Models
|
|
313
|
+
Using Arithmetic Circuits. AI 2024: Advances in Artificial Intelligence. AI 2024.
|
|
314
|
+
Lecture Notes in Computer Science, vol 15443. https://doi.org/10.1007/978-981-96-0351-0_13.
|
|
315
|
+
|
|
311
316
|
Args:
|
|
312
317
|
rvs: the list of random variables to sample; the
|
|
313
318
|
yielded state vectors are co-indexed with rvs; if None,
|
|
@@ -1,6 +1,8 @@
|
|
|
1
|
-
from typing import Tuple, Sequence, Dict
|
|
1
|
+
from typing import Tuple, Sequence, Dict
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
from ck.pgm import RandomVariable, Indicator, ParamId
|
|
4
6
|
from ck.pgm_circuit.slot_map import SlotMap, SlotKey
|
|
5
7
|
from ck.probability.probability_space import Condition, check_condition
|
|
6
8
|
from ck.program.program_buffer import ProgramBuffer
|
|
@@ -69,40 +71,6 @@ class ProgramWithSlotmap:
|
|
|
69
71
|
def slot_map(self) -> SlotMap:
|
|
70
72
|
return self._slot_map
|
|
71
73
|
|
|
72
|
-
def instances(self, flip: bool = False) -> Iterable[Instance]:
|
|
73
|
-
"""
|
|
74
|
-
Enumerate instances of the random variables.
|
|
75
|
-
|
|
76
|
-
Each instance is a tuples of state indexes, co-indexed with the given random variables.
|
|
77
|
-
|
|
78
|
-
The order is the natural index order (i.e., last random variable changing most quickly).
|
|
79
|
-
|
|
80
|
-
Args:
|
|
81
|
-
flip: if true, then first random variable changes most quickly.
|
|
82
|
-
|
|
83
|
-
Returns:
|
|
84
|
-
an iteration over tuples, each tuple holds state indexes
|
|
85
|
-
co-indexed with the given random variables.
|
|
86
|
-
"""
|
|
87
|
-
return rv_instances(*self._rvs, flip=flip)
|
|
88
|
-
|
|
89
|
-
def instances_as_indicators(self, flip: bool = False) -> Iterable[Sequence[Indicator]]:
|
|
90
|
-
"""
|
|
91
|
-
Enumerate instances of the random variables.
|
|
92
|
-
|
|
93
|
-
Each instance is a tuples of indicators, co-indexed with the given random variables.
|
|
94
|
-
|
|
95
|
-
The order is the natural index order (i.e., last random variable changing most quickly).
|
|
96
|
-
|
|
97
|
-
Args:
|
|
98
|
-
flip: if true, then first random variable changes most quickly.
|
|
99
|
-
|
|
100
|
-
Returns:
|
|
101
|
-
an iteration over tuples, each tuples holds random variable indicators
|
|
102
|
-
co-indexed with the given random variables.
|
|
103
|
-
"""
|
|
104
|
-
return rv_instances_as_indicators(*self._rvs, flip=flip)
|
|
105
|
-
|
|
106
74
|
def compute(self) -> NDArrayNumeric:
|
|
107
75
|
"""
|
|
108
76
|
Execute the program to compute and return the result. As per `ProgramBuffer.compute`.
|
|
@@ -146,29 +114,36 @@ class ProgramWithSlotmap:
|
|
|
146
114
|
"""
|
|
147
115
|
return self._program_buffer.vars
|
|
148
116
|
|
|
149
|
-
def __setitem__(self, item: int | slice | SlotKey |
|
|
117
|
+
def __setitem__(self, item: int | slice | SlotKey | RandomVariable, value: float) -> None:
|
|
150
118
|
"""
|
|
151
|
-
Set
|
|
119
|
+
Set input slot value/s.
|
|
152
120
|
"""
|
|
153
121
|
if isinstance(item, (int, slice)):
|
|
154
122
|
self._program_buffer[item] = value
|
|
155
123
|
elif isinstance(item, (Indicator, ParamId)):
|
|
156
124
|
self._program_buffer[self._slot_map[item]] = value
|
|
125
|
+
elif isinstance(item, RandomVariable):
|
|
126
|
+
for ind in item:
|
|
127
|
+
self._program_buffer[self._slot_map[ind]] = value
|
|
157
128
|
else:
|
|
158
|
-
|
|
159
|
-
for i in item:
|
|
160
|
-
self[i] = value
|
|
129
|
+
raise IndexError(f'unknown index type: {type(item)}')
|
|
161
130
|
|
|
162
|
-
def __getitem__(self, item: int | slice | SlotKey) -> NDArrayNumeric:
|
|
131
|
+
def __getitem__(self, item: int | slice | SlotKey | RandomVariable) -> NDArrayNumeric:
|
|
163
132
|
"""
|
|
164
|
-
Get
|
|
133
|
+
Get input slot value/s.
|
|
165
134
|
"""
|
|
166
135
|
if isinstance(item, (int, slice)):
|
|
167
136
|
return self._program_buffer[item]
|
|
168
137
|
elif isinstance(item, (Indicator, ParamId)):
|
|
169
138
|
return self._program_buffer[self._slot_map[item]]
|
|
139
|
+
elif isinstance(item, RandomVariable):
|
|
140
|
+
return np.fromiter(
|
|
141
|
+
(self._program_buffer[self._slot_map[ind]] for ind in item),
|
|
142
|
+
dtype=self._program_buffer.dtype,
|
|
143
|
+
count=len(item)
|
|
144
|
+
)
|
|
170
145
|
else:
|
|
171
|
-
raise IndexError('unknown index type')
|
|
146
|
+
raise IndexError(f'unknown index type: {type(item)}')
|
|
172
147
|
|
|
173
148
|
def set_condition(self, *condition: Condition) -> None:
|
|
174
149
|
"""
|
|
@@ -211,7 +186,10 @@ class ProgramWithSlotmap:
|
|
|
211
186
|
|
|
212
187
|
Args:
|
|
213
188
|
rv: a random variable whose indicators are in the slot map.
|
|
214
|
-
values: list of values
|
|
189
|
+
values: list of values
|
|
190
|
+
|
|
191
|
+
Assumes:
|
|
192
|
+
len(values) == len(rv).
|
|
215
193
|
"""
|
|
216
194
|
for i in range(len(rv)):
|
|
217
195
|
self[rv[i]] = values[i]
|
|
@@ -30,11 +30,9 @@ def compile_results(
|
|
|
30
30
|
a compiled RawProgram.
|
|
31
31
|
"""
|
|
32
32
|
circuit: Circuit = pgm_circuit.circuit_top.circuit
|
|
33
|
-
if const_parameters:
|
|
34
|
-
parameter_values = pgm_circuit.parameter_values
|
|
35
|
-
number_of_indicators = pgm_circuit.number_of_indicators
|
|
33
|
+
if const_parameters and len(pgm_circuit.parameter_values) > 0:
|
|
36
34
|
with TmpConst(circuit) as tmp:
|
|
37
|
-
for slot, value in enumerate(parameter_values, start=number_of_indicators):
|
|
35
|
+
for slot, value in enumerate(pgm_circuit.parameter_values, start=pgm_circuit.number_of_indicators):
|
|
38
36
|
tmp.set_const(slot, value)
|
|
39
37
|
raw_program: RawProgram = compiler(*results, circuit=circuit)
|
|
40
38
|
else:
|
ck/pgm_circuit/wmc_program.py
CHANGED
|
@@ -132,6 +132,11 @@ class WMCProgram(ProgramWithSlotmap, ProbabilitySpace):
|
|
|
132
132
|
* calls rand.random() once and rand.randrange(...) n times,
|
|
133
133
|
* calls self.program().compute_result() at least once and <= 1 + m.
|
|
134
134
|
|
|
135
|
+
For more information about this sampler, see the publication:
|
|
136
|
+
Suresh, S., Drake, B. (2025). Sampling of Large Probabilistic Graphical Models
|
|
137
|
+
Using Arithmetic Circuits. AI 2024: Advances in Artificial Intelligence. AI 2024.
|
|
138
|
+
Lecture Notes in Computer Science, vol 15443. https://doi.org/10.1007/978-981-96-0351-0_13.
|
|
139
|
+
|
|
135
140
|
Args:
|
|
136
141
|
rvs: the list of random variables to sample; the
|
|
137
142
|
yielded state vectors are co-indexed with rvs; if None,
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
"/O2"
|
|
14
14
|
],
|
|
15
15
|
"include_dirs": [
|
|
16
|
-
"C:\\Users\\runneradmin\\AppData\\Local\\Temp\\build-env-
|
|
16
|
+
"C:\\Users\\runneradmin\\AppData\\Local\\Temp\\build-env-gp1o6j1g\\Lib\\site-packages\\numpy\\_core\\include"
|
|
17
17
|
],
|
|
18
18
|
"name": "ck.pgm_compiler.support.circuit_table._circuit_table_cy",
|
|
19
19
|
"sources": [
|
|
Binary file
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from typing import Sequence, Tuple, Dict
|
|
2
|
+
|
|
3
|
+
from ck.dataset.cross_table import CrossTable, Instance
|
|
4
|
+
from ck.pgm import RandomVariable, Indicator
|
|
5
|
+
from ck.probability.probability_space import ProbabilitySpace, Condition, check_condition
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class CrossTableProbabilitySpace(ProbabilitySpace):
|
|
9
|
+
def __init__(self, cross_table: CrossTable):
|
|
10
|
+
"""
|
|
11
|
+
Enable probabilistic queries over a sample from a sample space.
|
|
12
|
+
Note that this is not necessarily an efficient approach to calculating probabilities and statistics.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
cross_table: a CrossTable to adapt to a ProbabilitySpace.
|
|
16
|
+
"""
|
|
17
|
+
self._cross_table: CrossTable = cross_table
|
|
18
|
+
self._rv_idx_to_sample_idx: Dict[int, int] = {
|
|
19
|
+
rv.idx: i
|
|
20
|
+
for i, rv in enumerate(cross_table.rvs)
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
@property
|
|
24
|
+
def rvs(self) -> Sequence[RandomVariable]:
|
|
25
|
+
return self._cross_table.rvs
|
|
26
|
+
|
|
27
|
+
def wmc(self, *condition: Condition) -> float:
|
|
28
|
+
condition: Tuple[Indicator, ...] = check_condition(condition)
|
|
29
|
+
rvs: Sequence[RandomVariable] = self._cross_table.rvs
|
|
30
|
+
|
|
31
|
+
checks = [set() for _ in rvs]
|
|
32
|
+
for ind in condition:
|
|
33
|
+
checks[self._rv_idx_to_sample_idx[ind.rv_idx]].add(ind.state_idx)
|
|
34
|
+
for i in range(len(checks)):
|
|
35
|
+
if len(checks[i]) > 0:
|
|
36
|
+
checks[i] = set(range(len(rvs[i]))).difference(checks[i])
|
|
37
|
+
|
|
38
|
+
def satisfied(item: Tuple[Instance, float]) -> float:
|
|
39
|
+
"""
|
|
40
|
+
Return the weight of the instance, if the instance satisfies
|
|
41
|
+
the condition, else return 0.
|
|
42
|
+
"""
|
|
43
|
+
instance, weight = item
|
|
44
|
+
if any((state in check) for state, check in zip(instance, checks)):
|
|
45
|
+
return 0
|
|
46
|
+
else:
|
|
47
|
+
return weight
|
|
48
|
+
|
|
49
|
+
return sum(map(satisfied, self._cross_table.items()))
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def z(self) -> float:
|
|
53
|
+
return self._cross_table.total_weight()
|