compiled-knowledge 4.0.0a25__cp312-cp312-macosx_11_0_arm64.whl → 4.1.0__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of compiled-knowledge might be problematic. Click here for more details.

Files changed (45) hide show
  1. ck/circuit/_circuit_cy.c +1 -1
  2. ck/circuit/_circuit_cy.cpython-312-darwin.so +0 -0
  3. ck/circuit_compiler/cython_vm_compiler/_compiler.c +152 -152
  4. ck/circuit_compiler/cython_vm_compiler/_compiler.cpython-312-darwin.so +0 -0
  5. ck/circuit_compiler/interpret_compiler.py +2 -2
  6. ck/circuit_compiler/llvm_compiler.py +4 -4
  7. ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.c +1 -1
  8. ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.cpython-312-darwin.so +0 -0
  9. ck/circuit_compiler/support/input_vars.py +4 -4
  10. ck/dataset/__init__.py +1 -0
  11. ck/dataset/cross_table.py +334 -0
  12. ck/dataset/dataset.py +682 -0
  13. ck/dataset/dataset_builder.py +519 -0
  14. ck/dataset/dataset_compute.py +140 -0
  15. ck/dataset/dataset_from_crosstable.py +64 -0
  16. ck/dataset/dataset_from_csv.py +151 -0
  17. ck/dataset/sampled_dataset.py +96 -0
  18. ck/learning/__init__.py +0 -0
  19. ck/learning/coalesce_cross_tables.py +403 -0
  20. ck/learning/model_from_cross_tables.py +296 -0
  21. ck/learning/parameters.py +117 -0
  22. ck/learning/train_generative_bn.py +198 -0
  23. ck/pgm.py +39 -35
  24. ck/pgm_circuit/marginals_program.py +5 -0
  25. ck/pgm_circuit/program_with_slotmap.py +23 -45
  26. ck/pgm_circuit/support/compile_circuit.py +2 -4
  27. ck/pgm_circuit/wmc_program.py +5 -0
  28. ck/pgm_compiler/support/circuit_table/_circuit_table_cy.c +1 -1
  29. ck/pgm_compiler/support/circuit_table/_circuit_table_cy.cpython-312-darwin.so +0 -0
  30. ck/probability/cross_table_probability_space.py +53 -0
  31. ck/probability/divergence.py +226 -0
  32. ck/probability/empirical_probability_space.py +1 -0
  33. ck/probability/probability_space.py +43 -19
  34. ck_demos/dataset/__init__.py +0 -0
  35. ck_demos/dataset/demo_dataset_builder.py +37 -0
  36. ck_demos/dataset/demo_dataset_from_sampler.py +18 -0
  37. ck_demos/learning/__init__.py +0 -0
  38. ck_demos/learning/demo_bayesian_network_from_cross_tables.py +70 -0
  39. ck_demos/learning/demo_simple_learning.py +55 -0
  40. ck_demos/sampling/demo_wmc_direct_sampler.py +2 -2
  41. {compiled_knowledge-4.0.0a25.dist-info → compiled_knowledge-4.1.0.dist-info}/METADATA +2 -1
  42. {compiled_knowledge-4.0.0a25.dist-info → compiled_knowledge-4.1.0.dist-info}/RECORD +45 -24
  43. {compiled_knowledge-4.0.0a25.dist-info → compiled_knowledge-4.1.0.dist-info}/WHEEL +0 -0
  44. {compiled_knowledge-4.0.0a25.dist-info → compiled_knowledge-4.1.0.dist-info}/licenses/LICENSE.txt +0 -0
  45. {compiled_knowledge-4.0.0a25.dist-info → compiled_knowledge-4.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,117 @@
1
+ """
2
+ General functions for setting the parameter values of a PGM.
3
+ """
4
+ from typing import List, Tuple, TypeAlias
5
+
6
+ import numpy as np
7
+
8
+ from ck.dataset.cross_table import CrossTable
9
+ from ck.pgm import PGM, CPTPotentialFunction, Instance, SparsePotentialFunction, DensePotentialFunction, Factor
10
+ from ck.utils.map_list import MapList
11
+ from ck.utils.np_extras import NDArrayFloat64
12
+
13
+
14
+ ParameterValues: TypeAlias = List[CrossTable]
15
+
16
+
17
+ def make_factors(pgm: PGM, parameter_values: List[CrossTable]) -> None:
18
+ for factor in parameter_values:
19
+ pgm.new_factor(*factor.rvs)
20
+ set_potential_functions(pgm, parameter_values)
21
+
22
+
23
+ def set_potential_functions(pgm: PGM, parameter_values: List[CrossTable]) -> None:
24
+ """
25
+ Set the potential function of each PGM factor to one heuristically chosen,
26
+ using the given parameter values. Then set the parameter values of the potential
27
+ function to those given by `parameter_values`.
28
+
29
+ This function modifies `pgm` in-place, iteratively calling `set_potential_function`.
30
+
31
+ Args:
32
+ pgm (PGM): the PGM to have its potential functions set.
33
+ parameter_values: the parameter values,
34
+ """
35
+ for factor, factor_parameter_values in zip(pgm.factors, parameter_values):
36
+ set_potential_function(factor, factor_parameter_values)
37
+
38
+
39
+ def set_potential_function(factor: Factor, parameter_values: CrossTable) -> None:
40
+ """
41
+ Set the potential function of the given factor to one heuristically chosen,
42
+ using the given parameter values. Then set the parameter values of the potential
43
+ function to those given by `parameter_values`.
44
+
45
+ The potential function will be either a ZeroPotentialFunction, DensePotentialFunction,
46
+ or SparsePotentialFunction.
47
+
48
+ This function modifies `factor` in-place.
49
+
50
+ Args:
51
+ factor: The factor to update.
52
+ parameter_values: the parameter values,
53
+ """
54
+ number_of_parameters: int = len(parameter_values)
55
+ if number_of_parameters == 0:
56
+ factor.set_zero()
57
+ else:
58
+ if number_of_parameters < 100 or number_of_parameters > factor.number_of_states * 0.9:
59
+ pot_function: DensePotentialFunction = factor.set_dense()
60
+ else:
61
+ pot_function: SparsePotentialFunction = factor.set_sparse()
62
+ for instance, weight in parameter_values.items():
63
+ pot_function[instance] = weight
64
+
65
+
66
+ def set_zero(pgm: PGM) -> None:
67
+ """
68
+ Set the potential function of each PGM factor to zero.
69
+ """
70
+ for factor in pgm.factors:
71
+ factor.set_zero()
72
+
73
+
74
+ def set_dense(pgm: PGM, parameter_values: List[CrossTable]) -> None:
75
+ """
76
+ Set the potential function of each PGM factor to a DensePotentialFunction,
77
+ using the given parameter values.
78
+ """
79
+ for factor, cpt in zip(pgm.factors, parameter_values):
80
+ pot_function: DensePotentialFunction = factor.set_dense()
81
+ for instance, weight in cpt.items():
82
+ pot_function[instance] = weight
83
+
84
+
85
+ def set_sparse(pgm: PGM, parameter_values: List[CrossTable]) -> None:
86
+ """
87
+ Set the potential function of each PGM factor to a SparsePotentialFunction,
88
+ using the given parameter values.
89
+ """
90
+ for factor, cpt in zip(pgm.factors, parameter_values):
91
+ pot_function: SparsePotentialFunction = factor.set_sparse()
92
+ for instance, weight in cpt.items():
93
+ pot_function[instance] = weight
94
+
95
+
96
+ def set_cpt(pgm: PGM, parameter_values: List[CrossTable], normalise_cpds: bool = True) -> None:
97
+ """
98
+ Set the potential function of each PGM factor to a CPTPotentialFunction,
99
+ using the given parameter values.
100
+ """
101
+ for factor, cpt in zip(pgm.factors, parameter_values):
102
+ pot_function: CPTPotentialFunction = factor.set_cpt()
103
+
104
+ # Group cpt values by parent instance
105
+ cpds: MapList[Instance, Tuple[int, float]] = MapList()
106
+ for instance, weight in cpt.items():
107
+ cpds.append(instance[1:], (instance[0], weight))
108
+
109
+ # Set the CPDs
110
+ cpd_size = len(cpt.rvs[0]) # size of the child random variable
111
+ for parent_instance, cpd in cpds.items():
112
+ cpd_array: NDArrayFloat64 = np.zeros(cpd_size, dtype=np.float64)
113
+ for child_state_index, weight in cpd:
114
+ cpd_array[child_state_index] = weight
115
+ if normalise_cpds:
116
+ cpd_array /= cpd_array.sum()
117
+ pot_function.set_cpd(parent_instance, cpd_array)
@@ -0,0 +1,198 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import List, Mapping, Tuple
4
+
5
+ from ck.dataset import SoftDataset, HardDataset
6
+ from ck.dataset.cross_table import CrossTable, cross_table_from_dataset
7
+ from ck.learning.parameters import set_potential_functions, ParameterValues
8
+ from ck.pgm import PGM
9
+
10
+
11
+ def train_generative_bn(
12
+ pgm: PGM,
13
+ dataset: HardDataset | SoftDataset,
14
+ *,
15
+ dirichlet_prior: float | Mapping[int, float | CrossTable] = 0,
16
+ check_bayesian_network: bool = True,
17
+ ) -> None:
18
+ """
19
+ Maximum-likelihood, generative training for a Bayesian network.
20
+
21
+ The potential function of the given PGM will be set to new potential functions
22
+ with the learned parameter values.
23
+
24
+ Args:
25
+ pgm: the probabilistic graphical model defining the model structure.
26
+ Potential function values are ignored and need not be set.
27
+ dataset: a dataset of random variable states.
28
+ dirichlet_prior: provides a Dirichlet prior for each factor in `pgm`.
29
+ This can be represented in multiple ways:
30
+ (a) as a uniform prior that is the same for all factors, represented as a float value,
31
+ (b) as a mapping from a factor index to a uniform prior, i.e., a float value,
32
+ (c) as a mapping from a factor index to an arbitrary Dirichlet prior, i.e., a cross-table.
33
+ If there is no entry in the mapping for a factor, then the value 0 will be used for that factor.
34
+ If a cross-table is provided as a prior, then it must have the same random variables as
35
+ the factor it pertains to.
36
+ The default value for `dirichlet_prior` is 0.
37
+ See `CrossTable` for more explanation.
38
+ check_bayesian_network: if true and not `pgm.is_structure_bayesian` an exception will be raised.
39
+
40
+ Raises:
41
+ ValueError: if the given PGM does not have a Bayesian network structure, and check_bayesian_network is True.
42
+ """
43
+ if check_bayesian_network and not pgm.is_structure_bayesian:
44
+ raise ValueError('the given PGM is not a Bayesian network')
45
+ cpts: List[CrossTable] = get_cpts(
46
+ pgm=pgm,
47
+ dataset=dataset,
48
+ dirichlet_prior=dirichlet_prior,
49
+ )
50
+ set_potential_functions(pgm, cpts)
51
+
52
+
53
+ def get_cpts(
54
+ pgm: PGM,
55
+ dataset: HardDataset | SoftDataset,
56
+ *,
57
+ dirichlet_prior: float | Mapping[int, float | CrossTable] = 0,
58
+ ) -> ParameterValues:
59
+ """
60
+ This function applies `cpt_from_crosstab` to each cross-table from `get_factor_cross_tables`.
61
+ The resulting parameter values are CPTs that can be used directly to update the parameters
62
+ of the given PGM, so long as it has a Bayesian network structure.
63
+
64
+ To update the given PGM from the resulting `cpts` use `set_potential_functions(pgm, cpts)`.
65
+
66
+ Args:
67
+ pgm: the probabilistic graphical model defining the model structure.
68
+ Potential function values are ignored and need not be set.
69
+ dataset: a dataset of random variable states.
70
+ dirichlet_prior: provides a Dirichlet prior for each factor in `pgm`.
71
+ This can be represented in multiple ways:
72
+ (a) as a uniform prior that is the same for all factors, represented as a float value,
73
+ (b) as a mapping from a factor index to a uniform prior, i.e., a float value,
74
+ (c) as a mapping from a factor index to an arbitrary Dirichlet prior, i.e., a cross-table.
75
+ If there is no entry in the mapping for a factor, then the value 0 will be used for that factor.
76
+ If a cross-table is provided as a prior, then it must have the same random variables as
77
+ the factor it pertains to.
78
+ The default value for `dirichlet_prior` is 0.
79
+ See `CrossTable` for more explanation.
80
+
81
+ Returns:
82
+ ParameterValues object, a CPT for each factor in the given PGM, as a list of cross-tables, co-indexed
83
+ with the PGM factors.
84
+ """
85
+ cross_tables: List[CrossTable] = get_factor_cross_tables(
86
+ pgm=pgm,
87
+ dataset=dataset,
88
+ dirichlet_prior=dirichlet_prior,
89
+ )
90
+ cpts: List[CrossTable] = list(map(cpt_from_crosstab, cross_tables))
91
+ return cpts
92
+
93
+
94
+ def get_factor_cross_tables(
95
+ pgm: PGM,
96
+ dataset: HardDataset | SoftDataset,
97
+ *,
98
+ dirichlet_prior: float | Mapping[int, float | CrossTable] = 0,
99
+ ) -> ParameterValues:
100
+ """
101
+ Compute a cross-table for each factor of the given PGM, using the data from
102
+ the given dataset.
103
+
104
+ Args:
105
+ pgm: the probabilistic graphical model defining the model structure.
106
+ Potential function values are ignored and need not be set.
107
+ dataset: a dataset of random variable states.
108
+ dirichlet_prior: provides a Dirichlet prior for each factor in `pgm`.
109
+ This can be represented in multiple ways:
110
+ (a) as a uniform prior that is the same for all factors, represented as a float value,
111
+ (b) as a mapping from a factor index to a uniform prior, i.e., a float value,
112
+ (c) as a mapping from a factor index to an arbitrary Dirichlet prior, i.e., a cross-table.
113
+ If there is no entry in the mapping for a factor, then the value 0 will be used for that factor.
114
+ If a cross-table is provided as a prior, then it must have the same random variables as
115
+ the factor it pertains to.
116
+ The default value for `dirichlet_prior` is 0.
117
+ See `CrossTable` for more explanation.
118
+
119
+ Returns:
120
+ ParameterValues object, a crosstable for each factor in the given PGM, as
121
+ per `cross_table_from_dataset`.
122
+
123
+ Assumes:
124
+ every random variable of the PGM is in the dataset.
125
+ """
126
+ factor_dict: Mapping[int, float | CrossTable]
127
+ default_prior: float
128
+ if isinstance(dirichlet_prior, (float, int)):
129
+ factor_dict = {}
130
+ default_prior = dirichlet_prior
131
+ else:
132
+ factor_dict = dirichlet_prior
133
+ default_prior = 0
134
+
135
+ cross_tables: List[CrossTable] = [
136
+ cross_table_from_dataset(
137
+ dataset,
138
+ factor.rvs,
139
+ dirichlet_prior=factor_dict.get(factor.idx, default_prior),
140
+ )
141
+ for factor in pgm.factors
142
+ ]
143
+ return cross_tables
144
+
145
+
146
+ def cpt_from_crosstab(crosstab: CrossTable) -> CrossTable:
147
+ """
148
+ Convert the given cross-table to a conditional probability table (CPT),
149
+ assuming the first random variable of the cross-table is the child
150
+ and remaining random variables are the parents.
151
+
152
+ Args:
153
+ crosstab: a CrossTable representing the weight of unique instances.
154
+
155
+ Returns:
156
+ A cross-table that is a conditional probability table.
157
+
158
+ Assumes:
159
+ the first random variable in `crosstab.rvs` is the child random variable.
160
+ """
161
+ return cpt_and_parent_sums_from_crosstab(crosstab)[0]
162
+
163
+
164
+ def cpt_and_parent_sums_from_crosstab(crosstab: CrossTable) -> Tuple[CrossTable, CrossTable]:
165
+ """
166
+ Convert the given cross-table to a conditional probability table (CPT),
167
+ assuming the first random variable of the cross-table is the child
168
+ and remaining random variables are the parents.
169
+
170
+ Args:
171
+ crosstab: a CrossTable representing the weight of unique instances.
172
+
173
+ Returns:
174
+ A cross-table that is a conditional probability table.
175
+ A cross-table of the parent sums that were divided out of `crosstab`
176
+
177
+ Assumes:
178
+ the first random variable in `crosstab.rvs` is the child random variable.
179
+ """
180
+ # Get the sum of weights for parent states
181
+ parent_sums: CrossTable = CrossTable(
182
+ rvs=crosstab.rvs[1:],
183
+ update=(
184
+ (instance[1:], weight)
185
+ for instance, weight in crosstab.items()
186
+ )
187
+ )
188
+
189
+ # Construct the normalised cross-tables, i.e., the CPTs.
190
+ cpt = CrossTable(
191
+ rvs=crosstab.rvs,
192
+ update=(
193
+ (instance, weight / parent_sums[instance[1:]])
194
+ for instance, weight in crosstab.items()
195
+ )
196
+ )
197
+
198
+ return cpt, parent_sums
ck/pgm.py CHANGED
@@ -19,16 +19,18 @@ State: TypeAlias = Union[int, str, bool, float, None]
19
19
  The type for a possible state of a random variable.
20
20
  """
21
21
 
22
- Instance: TypeAlias = Sequence[int]
22
+ Instance: TypeAlias = Tuple[int, ...]
23
23
  """
24
- An instance (of a sequence of random variables) is a sequence of integers
24
+ An instance (of a sequence of random variables) is a tuple of integers
25
25
  that are state indexes, co-indexed with a known sequence of random variables.
26
26
  """
27
27
 
28
- Key: TypeAlias = Union[Instance, int]
28
+ Key: TypeAlias = Union[Sequence[int], int]
29
29
  """
30
- A key identifies an instance, either as an instance itself or a
31
- single integer, representing an instance with one dimension.
30
+ A key identifies an instance, either as a sequence of integers or a
31
+ single integer. The integers are state indexes, co-indexed with a known
32
+ sequence of random variables. A single integer represents an instance with
33
+ one dimension.
32
34
  """
33
35
 
34
36
  Shape: TypeAlias = Sequence[int]
@@ -594,9 +596,11 @@ class PGM:
594
596
 
595
597
  # Factors form a DAG
596
598
  states: NDArrayUInt8 = np.zeros(self.number_of_factors, dtype=np.uint8)
597
- for factor in self._factors:
598
- if self._has_cycle(factor, child_to_factor, states):
599
- return False
599
+ if any(
600
+ self._has_cycle(factor, child_to_factor, states)
601
+ for factor in self._factors
602
+ ):
603
+ return False
600
604
 
601
605
  # All tests passed
602
606
  return True
@@ -776,7 +780,7 @@ class PGM:
776
780
  next_prefix: str = prefix + indent
777
781
  next_next_prefix: str = next_prefix + indent
778
782
 
779
- print(f'{prefix}PGM id={id(self)} name={self.name!r}')
783
+ print(f'{prefix}PGM id={id(self)}')
780
784
  self.dump_synopsis(prefix=next_prefix, precision=precision, max_state_digits=max_state_digits)
781
785
 
782
786
  print(f'{prefix}random variables ({self.number_of_rvs})')
@@ -790,16 +794,16 @@ class PGM:
790
794
 
791
795
  print(f'{prefix}factors ({self.number_of_factors})')
792
796
  for factor in self.factors:
793
- rv_idxs = [rv.idx for rv in factor.rvs]
797
+ factor_rvs = ', '.join(repr(rv.name) for rv in factor.rvs)
794
798
  if factor.is_zero:
795
- function_ref = '<zero>'
799
+ function_ref = '<ZeroPotentialFunction>'
796
800
  else:
797
801
  function = factor.function
798
802
  function_ref = f'{id(function)}: {function.__class__.__name__}'
799
803
 
800
- print(f'{next_prefix}{factor.idx:>3} rvs={rv_idxs} function={function_ref}')
804
+ print(f'{next_prefix}{factor.idx:>3} rvs=({factor_rvs}) function={function_ref}')
801
805
 
802
- print(f'{prefix}functions ({self.number_of_functions})')
806
+ print(f'{prefix}functions, excluding ZeroPotentialFunction ({sum(1 for _ in self.non_zero_functions)})')
803
807
  for function in sorted(self.non_zero_functions, key=lambda f: id(f)):
804
808
  print(f'{next_prefix}{id(function):>13}: {function.__class__.__name__}')
805
809
  function.dump(prefix=next_next_prefix, show_function_values=show_function_values, show_id_class=False)
@@ -1871,7 +1875,7 @@ class PotentialFunction(ABC):
1871
1875
  a hypothetical parameter index assuming that every valid key has a unique parameter
1872
1876
  as per DensePotentialFunction.
1873
1877
  """
1874
- return _natural_key_idx(self._shape, key)
1878
+ return natural_key_idx(self._shape, key)
1875
1879
 
1876
1880
  def param_id(self, param_idx: int) -> ParamId:
1877
1881
  """
@@ -2029,7 +2033,7 @@ class ZeroPotentialFunction(PotentialFunction):
2029
2033
  return 0
2030
2034
 
2031
2035
  def param_idx(self, key: Key) -> int:
2032
- return _natural_key_idx(self._shape, key)
2036
+ return natural_key_idx(self._shape, key)
2033
2037
 
2034
2038
  def is_cpt(self, tolerance=DEFAULT_CPT_TOLERANCE) -> bool:
2035
2039
  return True
@@ -3364,26 +3368,7 @@ def rv_instances_as_indicators(*rvs: RandomVariable, flip: bool = False) -> Iter
3364
3368
  return _combos(rvs, flip=not flip)
3365
3369
 
3366
3370
 
3367
- def _key_to_instance(key: Key) -> Instance:
3368
- """
3369
- Convert a key to an instance.
3370
-
3371
- Args:
3372
- key: a key into a state space.
3373
-
3374
- Returns:
3375
- A instance from the state space, as a tuple of state indexes, co-indexed with the given shape.
3376
-
3377
- Assumes:
3378
- The key is valid for the implied state space.
3379
- """
3380
- if isinstance(key, int):
3381
- return (key,)
3382
- else:
3383
- return tuple(key)
3384
-
3385
-
3386
- def _natural_key_idx(shape: Shape, key: Key) -> int:
3371
+ def natural_key_idx(shape: Shape, key: Key) -> int:
3387
3372
  """
3388
3373
  What is the natural index of the given key, assuming the given shape.
3389
3374
 
@@ -3409,6 +3394,25 @@ def _natural_key_idx(shape: Shape, key: Key) -> int:
3409
3394
  return result
3410
3395
 
3411
3396
 
3397
+ def _key_to_instance(key: Key) -> Instance:
3398
+ """
3399
+ Convert a key to an instance.
3400
+
3401
+ Args:
3402
+ key: a key into a state space.
3403
+
3404
+ Returns:
3405
+ A instance from the state space, as a tuple of state indexes, co-indexed with the given shape.
3406
+
3407
+ Assumes:
3408
+ The key is valid for the implied state space.
3409
+ """
3410
+ if isinstance(key, int):
3411
+ return (key,)
3412
+ else:
3413
+ return tuple(key)
3414
+
3415
+
3412
3416
  def _zero_space(shape: Shape) -> int:
3413
3417
  """
3414
3418
  Return the size of the zero space of the given shape. This is the number
@@ -308,6 +308,11 @@ class MarginalsProgram(ProgramWithSlotmap, ProbabilitySpace):
308
308
  The sampler will yield state lists, where the state
309
309
  values are co-indexed with rvs, or self.rvs if rvs is None.
310
310
 
311
+ For more information about this sampler, see the publication:
312
+ Suresh, S., Drake, B. (2025). Sampling of Large Probabilistic Graphical Models
313
+ Using Arithmetic Circuits. AI 2024: Advances in Artificial Intelligence. AI 2024.
314
+ Lecture Notes in Computer Science, vol 15443. https://doi.org/10.1007/978-981-96-0351-0_13.
315
+
311
316
  Args:
312
317
  rvs: the list of random variables to sample; the
313
318
  yielded state vectors are co-indexed with rvs; if None,
@@ -1,6 +1,8 @@
1
- from typing import Tuple, Sequence, Dict, Iterable
1
+ from typing import Tuple, Sequence, Dict
2
2
 
3
- from ck.pgm import RandomVariable, rv_instances, Instance, rv_instances_as_indicators, Indicator, ParamId
3
+ import numpy as np
4
+
5
+ from ck.pgm import RandomVariable, Indicator, ParamId
4
6
  from ck.pgm_circuit.slot_map import SlotMap, SlotKey
5
7
  from ck.probability.probability_space import Condition, check_condition
6
8
  from ck.program.program_buffer import ProgramBuffer
@@ -69,40 +71,6 @@ class ProgramWithSlotmap:
69
71
  def slot_map(self) -> SlotMap:
70
72
  return self._slot_map
71
73
 
72
- def instances(self, flip: bool = False) -> Iterable[Instance]:
73
- """
74
- Enumerate instances of the random variables.
75
-
76
- Each instance is a tuples of state indexes, co-indexed with the given random variables.
77
-
78
- The order is the natural index order (i.e., last random variable changing most quickly).
79
-
80
- Args:
81
- flip: if true, then first random variable changes most quickly.
82
-
83
- Returns:
84
- an iteration over tuples, each tuple holds state indexes
85
- co-indexed with the given random variables.
86
- """
87
- return rv_instances(*self._rvs, flip=flip)
88
-
89
- def instances_as_indicators(self, flip: bool = False) -> Iterable[Sequence[Indicator]]:
90
- """
91
- Enumerate instances of the random variables.
92
-
93
- Each instance is a tuples of indicators, co-indexed with the given random variables.
94
-
95
- The order is the natural index order (i.e., last random variable changing most quickly).
96
-
97
- Args:
98
- flip: if true, then first random variable changes most quickly.
99
-
100
- Returns:
101
- an iteration over tuples, each tuples holds random variable indicators
102
- co-indexed with the given random variables.
103
- """
104
- return rv_instances_as_indicators(*self._rvs, flip=flip)
105
-
106
74
  def compute(self) -> NDArrayNumeric:
107
75
  """
108
76
  Execute the program to compute and return the result. As per `ProgramBuffer.compute`.
@@ -146,29 +114,36 @@ class ProgramWithSlotmap:
146
114
  """
147
115
  return self._program_buffer.vars
148
116
 
149
- def __setitem__(self, item: int | slice | SlotKey | Iterable[SlotKey], value: float) -> None:
117
+ def __setitem__(self, item: int | slice | SlotKey | RandomVariable, value: float) -> None:
150
118
  """
151
- Set one or more input slot values, identified by slot keys.
119
+ Set input slot value/s.
152
120
  """
153
121
  if isinstance(item, (int, slice)):
154
122
  self._program_buffer[item] = value
155
123
  elif isinstance(item, (Indicator, ParamId)):
156
124
  self._program_buffer[self._slot_map[item]] = value
125
+ elif isinstance(item, RandomVariable):
126
+ for ind in item:
127
+ self._program_buffer[self._slot_map[ind]] = value
157
128
  else:
158
- # Assume its iterable
159
- for i in item:
160
- self[i] = value
129
+ raise IndexError(f'unknown index type: {type(item)}')
161
130
 
162
- def __getitem__(self, item: int | slice | SlotKey) -> NDArrayNumeric:
131
+ def __getitem__(self, item: int | slice | SlotKey | RandomVariable) -> NDArrayNumeric:
163
132
  """
164
- Get an input slot value, identified by a slot key.
133
+ Get input slot value/s.
165
134
  """
166
135
  if isinstance(item, (int, slice)):
167
136
  return self._program_buffer[item]
168
137
  elif isinstance(item, (Indicator, ParamId)):
169
138
  return self._program_buffer[self._slot_map[item]]
139
+ elif isinstance(item, RandomVariable):
140
+ return np.fromiter(
141
+ (self._program_buffer[self._slot_map[ind]] for ind in item),
142
+ dtype=self._program_buffer.dtype,
143
+ count=len(item)
144
+ )
170
145
  else:
171
- raise IndexError('unknown index type')
146
+ raise IndexError(f'unknown index type: {type(item)}')
172
147
 
173
148
  def set_condition(self, *condition: Condition) -> None:
174
149
  """
@@ -211,7 +186,10 @@ class ProgramWithSlotmap:
211
186
 
212
187
  Args:
213
188
  rv: a random variable whose indicators are in the slot map.
214
- values: list of values, assumes len(values) == len(rv).
189
+ values: list of values
190
+
191
+ Assumes:
192
+ len(values) == len(rv).
215
193
  """
216
194
  for i in range(len(rv)):
217
195
  self[rv[i]] = values[i]
@@ -30,11 +30,9 @@ def compile_results(
30
30
  a compiled RawProgram.
31
31
  """
32
32
  circuit: Circuit = pgm_circuit.circuit_top.circuit
33
- if const_parameters:
34
- parameter_values = pgm_circuit.parameter_values
35
- number_of_indicators = pgm_circuit.number_of_indicators
33
+ if const_parameters and len(pgm_circuit.parameter_values) > 0:
36
34
  with TmpConst(circuit) as tmp:
37
- for slot, value in enumerate(parameter_values, start=number_of_indicators):
35
+ for slot, value in enumerate(pgm_circuit.parameter_values, start=pgm_circuit.number_of_indicators):
38
36
  tmp.set_const(slot, value)
39
37
  raw_program: RawProgram = compiler(*results, circuit=circuit)
40
38
  else:
@@ -132,6 +132,11 @@ class WMCProgram(ProgramWithSlotmap, ProbabilitySpace):
132
132
  * calls rand.random() once and rand.randrange(...) n times,
133
133
  * calls self.program().compute_result() at least once and <= 1 + m.
134
134
 
135
+ For more information about this sampler, see the publication:
136
+ Suresh, S., Drake, B. (2025). Sampling of Large Probabilistic Graphical Models
137
+ Using Arithmetic Circuits. AI 2024: Advances in Artificial Intelligence. AI 2024.
138
+ Lecture Notes in Computer Science, vol 15443. https://doi.org/10.1007/978-981-96-0351-0_13.
139
+
135
140
  Args:
136
141
  rvs: the list of random variables to sample; the
137
142
  yielded state vectors are co-indexed with rvs; if None,
@@ -15,7 +15,7 @@
15
15
  "-O3"
16
16
  ],
17
17
  "include_dirs": [
18
- "/private/var/folders/y6/nj790rtn62lfktb1sh__79hc0000gn/T/build-env-q2xes9a1/lib/python3.12/site-packages/numpy/_core/include"
18
+ "/private/var/folders/y6/nj790rtn62lfktb1sh__79hc0000gn/T/build-env-bjhshlet/lib/python3.12/site-packages/numpy/_core/include"
19
19
  ],
20
20
  "name": "ck.pgm_compiler.support.circuit_table._circuit_table_cy",
21
21
  "sources": [
@@ -0,0 +1,53 @@
1
+ from typing import Sequence, Tuple, Dict
2
+
3
+ from ck.dataset.cross_table import CrossTable, Instance
4
+ from ck.pgm import RandomVariable, Indicator
5
+ from ck.probability.probability_space import ProbabilitySpace, Condition, check_condition
6
+
7
+
8
+ class CrossTableProbabilitySpace(ProbabilitySpace):
9
+ def __init__(self, cross_table: CrossTable):
10
+ """
11
+ Enable probabilistic queries over a sample from a sample space.
12
+ Note that this is not necessarily an efficient approach to calculating probabilities and statistics.
13
+
14
+ Args:
15
+ cross_table: a CrossTable to adapt to a ProbabilitySpace.
16
+ """
17
+ self._cross_table: CrossTable = cross_table
18
+ self._rv_idx_to_sample_idx: Dict[int, int] = {
19
+ rv.idx: i
20
+ for i, rv in enumerate(cross_table.rvs)
21
+ }
22
+
23
+ @property
24
+ def rvs(self) -> Sequence[RandomVariable]:
25
+ return self._cross_table.rvs
26
+
27
+ def wmc(self, *condition: Condition) -> float:
28
+ condition: Tuple[Indicator, ...] = check_condition(condition)
29
+ rvs: Sequence[RandomVariable] = self._cross_table.rvs
30
+
31
+ checks = [set() for _ in rvs]
32
+ for ind in condition:
33
+ checks[self._rv_idx_to_sample_idx[ind.rv_idx]].add(ind.state_idx)
34
+ for i in range(len(checks)):
35
+ if len(checks[i]) > 0:
36
+ checks[i] = set(range(len(rvs[i]))).difference(checks[i])
37
+
38
+ def satisfied(item: Tuple[Instance, float]) -> float:
39
+ """
40
+ Return the weight of the instance, if the instance satisfies
41
+ the condition, else return 0.
42
+ """
43
+ instance, weight = item
44
+ if any((state in check) for state, check in zip(instance, checks)):
45
+ return 0
46
+ else:
47
+ return weight
48
+
49
+ return sum(map(satisfied, self._cross_table.items()))
50
+
51
+ @property
52
+ def z(self) -> float:
53
+ return self._cross_table.total_weight()