compiled-knowledge 4.0.0a25__cp312-cp312-win32.whl → 4.1.0__cp312-cp312-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of compiled-knowledge might be problematic. Click here for more details.

Files changed (45) hide show
  1. ck/circuit/_circuit_cy.c +1 -1
  2. ck/circuit/_circuit_cy.cp312-win32.pyd +0 -0
  3. ck/circuit_compiler/cython_vm_compiler/_compiler.c +152 -152
  4. ck/circuit_compiler/cython_vm_compiler/_compiler.cp312-win32.pyd +0 -0
  5. ck/circuit_compiler/interpret_compiler.py +2 -2
  6. ck/circuit_compiler/llvm_compiler.py +4 -4
  7. ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.c +1 -1
  8. ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.cp312-win32.pyd +0 -0
  9. ck/circuit_compiler/support/input_vars.py +4 -4
  10. ck/dataset/__init__.py +1 -0
  11. ck/dataset/cross_table.py +334 -0
  12. ck/dataset/dataset.py +682 -0
  13. ck/dataset/dataset_builder.py +519 -0
  14. ck/dataset/dataset_compute.py +140 -0
  15. ck/dataset/dataset_from_crosstable.py +64 -0
  16. ck/dataset/dataset_from_csv.py +151 -0
  17. ck/dataset/sampled_dataset.py +96 -0
  18. ck/learning/__init__.py +0 -0
  19. ck/learning/coalesce_cross_tables.py +403 -0
  20. ck/learning/model_from_cross_tables.py +296 -0
  21. ck/learning/parameters.py +117 -0
  22. ck/learning/train_generative_bn.py +198 -0
  23. ck/pgm.py +39 -35
  24. ck/pgm_circuit/marginals_program.py +5 -0
  25. ck/pgm_circuit/program_with_slotmap.py +23 -45
  26. ck/pgm_circuit/support/compile_circuit.py +2 -4
  27. ck/pgm_circuit/wmc_program.py +5 -0
  28. ck/pgm_compiler/support/circuit_table/_circuit_table_cy.c +1 -1
  29. ck/pgm_compiler/support/circuit_table/_circuit_table_cy.cp312-win32.pyd +0 -0
  30. ck/probability/cross_table_probability_space.py +53 -0
  31. ck/probability/divergence.py +226 -0
  32. ck/probability/empirical_probability_space.py +1 -0
  33. ck/probability/probability_space.py +43 -19
  34. ck_demos/dataset/__init__.py +0 -0
  35. ck_demos/dataset/demo_dataset_builder.py +37 -0
  36. ck_demos/dataset/demo_dataset_from_sampler.py +18 -0
  37. ck_demos/learning/__init__.py +0 -0
  38. ck_demos/learning/demo_bayesian_network_from_cross_tables.py +70 -0
  39. ck_demos/learning/demo_simple_learning.py +55 -0
  40. ck_demos/sampling/demo_wmc_direct_sampler.py +2 -2
  41. {compiled_knowledge-4.0.0a25.dist-info → compiled_knowledge-4.1.0.dist-info}/METADATA +2 -1
  42. {compiled_knowledge-4.0.0a25.dist-info → compiled_knowledge-4.1.0.dist-info}/RECORD +45 -24
  43. {compiled_knowledge-4.0.0a25.dist-info → compiled_knowledge-4.1.0.dist-info}/WHEEL +0 -0
  44. {compiled_knowledge-4.0.0a25.dist-info → compiled_knowledge-4.1.0.dist-info}/licenses/LICENSE.txt +0 -0
  45. {compiled_knowledge-4.0.0a25.dist-info → compiled_knowledge-4.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,226 @@
1
+ """
2
+ This module implements several divergences which measure the difference
3
+ between two distributions.
4
+ """
5
+ import math
6
+ from typing import Sequence
7
+
8
+ import numpy as np
9
+
10
+ from ck.pgm import RandomVariable, rv_instances_as_indicators, PGM
11
+ from ck.probability.probability_space import ProbabilitySpace
12
+
13
+ _NAN: float = np.nan # Not-a-number (i.e., the result of an invalid calculation).
14
+
15
+
16
+ def kl(p: ProbabilitySpace, q: ProbabilitySpace) -> float:
17
+ """
18
+ Compute the Kullback-Leibler divergence between p & q,
19
+ where p is the true distribution.
20
+
21
+ This implementation uses logarithms, base 2.
22
+
23
+ Args:
24
+ p: a probability space to compare to.
25
+ q: the other probability space.
26
+
27
+ Returns:
28
+ the Kullback–Leibler (KL) divergence of p & q, where p is
29
+ the true distribution.
30
+
31
+ Raises:
32
+ ValueError: if `p` and `q` do not have compatible random variables.specifically:
33
+ * `len(self.rvs) == len(other.rvs)`
34
+ * `len(other.rvs[i]) == len(self.rvs[i])` for all `i`
35
+ * `other.rvs[i].idx == self.rvs[i].idx` for all `i`.
36
+
37
+ Warning:
38
+ this method will enumerate the whole probability space.
39
+ """
40
+ if not _compatible_rvs(p.rvs, q.rvs):
41
+ raise ValueError('incompatible random variables')
42
+
43
+ total = 0.0
44
+ for x in rv_instances_as_indicators(*p.rvs):
45
+ p_x = p.probability(*x)
46
+ q_x = q.probability(*x)
47
+ if p_x <= 0 or q_x <= 0:
48
+ return _NAN
49
+ total += p_x * math.log2(p_x / q_x)
50
+ return total
51
+
52
+
53
+ def pseudo_kl(p: ProbabilitySpace, q: ProbabilitySpace) -> float:
54
+ """
55
+ A kind of KL divergence, factored by the structure of `p`.
56
+ This is an experimental measure.
57
+
58
+ This implementation uses logarithms, base 2.
59
+
60
+ Args:
61
+ p: a probability space to compare to.
62
+ q: the other probability space.
63
+
64
+ Returns:
65
+ the factored histogram intersection between the two probability spaces.
66
+
67
+ Raises:
68
+ ValueError: if `p` and `q` do not have compatible random variables.specifically:
69
+ * `len(self.rvs) == len(other.rvs)`
70
+ * `len(other.rvs[i]) == len(self.rvs[i])` for all `i`
71
+ * `other.rvs[i].idx == self.rvs[i].idx` for all `i`.
72
+ ValueError: if not all random variable of `p` are from a single PGM, which must
73
+ have a Bayesian network structure.
74
+ """
75
+ p_rvs: Sequence[RandomVariable] = p.rvs
76
+ q_rvs: Sequence[RandomVariable] = q.rvs
77
+
78
+ if not _compatible_rvs(p_rvs, q_rvs):
79
+ raise ValueError('incompatible random variables')
80
+
81
+ if len(p_rvs) == 0:
82
+ return _NAN
83
+
84
+ pgm: PGM = p_rvs[0].pgm
85
+ if any(rv.pgm is not pgm for rv in p_rvs):
86
+ raise ValueError('p random variables are not from a single PGM.')
87
+ if not pgm.is_structure_bayesian:
88
+ raise ValueError('p does not have Bayesian network structure.')
89
+
90
+ # Across the two spaces, corresponding random variables are equivalent;
91
+ # i.e., same number of states and same `idx` values. Therefore,
92
+ # indicators from either one space can be used in both spaces.
93
+
94
+ total: float = 0
95
+ for factor in pgm.factors:
96
+ for x in rv_instances_as_indicators(*factor.rvs): # every possible state of factor rvs
97
+ p_x = p.probability(*x)
98
+ q_x = q.probability(*x)
99
+ if p_x <= 0 or q_x <= 0:
100
+ return _NAN
101
+ total += p_x * math.log2(p_x / q_x)
102
+ return total
103
+
104
+
105
+ def hi(p: ProbabilitySpace, q: ProbabilitySpace) -> float:
106
+ """
107
+ Compute the histogram intersection between this probability spaces and the given other.
108
+
109
+ The histogram intersection between two probability spaces P and Q,
110
+ with state spaces X, is defined as:
111
+ HI(P, Q) = sum(min(P(x), Q(x)) for x in X)
112
+
113
+ Args:
114
+ p: a probability space to compare to.
115
+ q: the other probability space.
116
+
117
+ Returns:
118
+ the histogram intersection between the two probability spaces.
119
+
120
+ Raises:
121
+ ValueError: if `p` and `q` do not have compatible random variables.specifically:
122
+ * `len(self.rvs) == len(other.rvs)`
123
+ * `len(other.rvs[i]) == len(self.rvs[i])` for all `i`
124
+ * `other.rvs[i].idx == self.rvs[i].idx` for all `i`.
125
+
126
+ Warning:
127
+ this method will enumerate the whole probability space.
128
+
129
+ """
130
+ p_rvs: Sequence[RandomVariable] = p.rvs
131
+ q_rvs: Sequence[RandomVariable] = q.rvs
132
+
133
+ if not _compatible_rvs(p_rvs, q_rvs):
134
+ raise ValueError('incompatible random variables')
135
+
136
+ # Across the two spaces, corresponding random variables are equivalent;
137
+ # i.e., same number of states and same `idx` values. Therefore,
138
+ # indicators from either one space can be used in both spaces.
139
+
140
+ return sum(
141
+ min(p.probability(*x), q.probability(*x))
142
+ for x in rv_instances_as_indicators(*p_rvs)
143
+ )
144
+
145
+
146
+ def fhi(p: ProbabilitySpace, q: ProbabilitySpace) -> float:
147
+ """
148
+ Compute the factored histogram intersection between this probability spaces and the given other.
149
+
150
+ The factored histogram intersection between two probability spaces P and Q,
151
+ with state spaces X and factorisation F, is defined as:
152
+ FHI(P, Q) = 1/n sum(P(Y=y) CHI(P, Q, X | Y=y)
153
+ where:
154
+ CHI(P, Q, X | Y=y) = HI(P(X | Y=y), Q(X | Y=y))
155
+ HI(P, Q) = sum(min(P(X=x), Q(X=x)) for x in f)
156
+
157
+ The value of _n_ is the sum ofP(Y=y) over all CPT rows. However,
158
+ this always equals the number of CPTs, i.e., the number of random
159
+ variables.
160
+
161
+ The factorisation F is taken from the `p`.
162
+
163
+ For more information about factored histogram intersection, see the publication:
164
+ Suresh, S., Drake, B. (2025). Sampling of Large Probabilistic Graphical Models
165
+ Using Arithmetic Circuits. AI 2024: Advances in Artificial Intelligence. AI 2024.
166
+ Lecture Notes in Computer Science, vol 15443. https://doi.org/10.1007/978-981-96-0351-0_13.
167
+
168
+ Args:
169
+ p: a probability space to compare to.
170
+ q: the other probability space.
171
+
172
+ Returns:
173
+ the factored histogram intersection between the two probability spaces.
174
+
175
+ Raises:
176
+ ValueError: if `p` and `q` do not have compatible random variables.specifically:
177
+ * `len(self.rvs) == len(other.rvs)`
178
+ * `len(other.rvs[i]) == len(self.rvs[i])` for all `i`
179
+ * `other.rvs[i].idx == self.rvs[i].idx` for all `i`.
180
+ ValueError: if not all random variable of `p` are from a single PGM, which must
181
+ have a Bayesian network structure.
182
+ """
183
+ p_rvs: Sequence[RandomVariable] = p.rvs
184
+ q_rvs: Sequence[RandomVariable] = q.rvs
185
+
186
+ if not _compatible_rvs(p_rvs, q_rvs):
187
+ raise ValueError('incompatible random variables')
188
+
189
+ if len(p_rvs) == 0:
190
+ return 0
191
+
192
+ pgm: PGM = p_rvs[0].pgm
193
+ if any(rv.pgm is not pgm for rv in p_rvs):
194
+ raise ValueError('p random variables are not from a single PGM.')
195
+ if not pgm.is_structure_bayesian:
196
+ raise ValueError('p does not have Bayesian network structure.')
197
+
198
+ # Across the two spaces, corresponding random variables are equivalent;
199
+ # i.e., same number of states and same `idx` values. Therefore,
200
+ # indicators from either one space can be used in both spaces.
201
+
202
+ # Loop over all CPTs, accumulating the total
203
+ total: float = 0
204
+ for factor in pgm.factors:
205
+ child: RandomVariable = factor.rvs[0]
206
+ parents: Sequence[RandomVariable] = factor.rvs[1:]
207
+ # Loop over all rows of the CPT
208
+ for parent_indicators in rv_instances_as_indicators(*parents):
209
+ p_marginal = p.marginal_distribution(child, condition=parent_indicators)
210
+ q_marginal = q.marginal_distribution(child, condition=parent_indicators)
211
+ row_hi = np.minimum(p_marginal, q_marginal).sum().item()
212
+ pr_row = p.probability(*parent_indicators)
213
+ total += pr_row * row_hi
214
+
215
+ return total / len(p_rvs)
216
+
217
+
218
+ def _compatible_rvs(rvs1: Sequence[RandomVariable], rvs2: Sequence[RandomVariable]) -> bool:
219
+ """
220
+ The rvs are compatible if they have the same number of random variables
221
+ and the corresponding indicators are equal.
222
+ """
223
+ return (
224
+ len(rvs1) == len(rvs2)
225
+ and all(len(rv1) == len(rv2) and rv1.idx == rv2.idx for rv1, rv2 in zip(rvs1, rvs2))
226
+ )
@@ -11,6 +11,7 @@ class EmpiricalProbabilitySpace(ProbabilitySpace):
11
11
  Note that this is not necessarily an efficient approach to calculating probabilities and statistics.
12
12
 
13
13
  This probability space treats each of the samples as equally weighted.
14
+ For a probability space over unequally weighted samples, consider using `CrossTableProbabilitySpace`.
14
15
 
15
16
  Assumes:
16
17
  len(sample) == len(rvs), for each sample in samples.
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  import math
2
4
  from abc import ABC, abstractmethod
3
5
  from itertools import chain
@@ -203,16 +205,19 @@ class ProbabilitySpace(ABC):
203
205
  loop_rvs.append([rv[i] for i in sorted(states)])
204
206
  reduced_space = True
205
207
 
208
+ best_probability = float('-inf')
209
+ best_states = None
210
+
206
211
  # If the random variables we are looping over does not have any conditions
207
212
  # then it is expected to be faster by using computed marginal probabilities.
208
213
  if not reduced_space:
209
214
  prs = self.marginal_distribution(*rvs, condition=condition)
210
- best_probability = float('-inf')
211
- best_states = None
212
215
  for probability, inst in zip(prs, rv_instances(*rvs)):
213
216
  if probability > best_probability:
214
217
  best_probability = probability
215
218
  best_states = inst
219
+ if best_states is None:
220
+ return _NAN, ()
216
221
  return best_probability, best_states
217
222
 
218
223
  else:
@@ -220,8 +225,6 @@ class ProbabilitySpace(ABC):
220
225
  new_conditions = tuple(ind for ind in condition if ind.rv_idx not in rv_indexes)
221
226
 
222
227
  # Loop over the state space of the 'loop' rvs
223
- best_probability = float('-inf')
224
- best_states = None
225
228
  indicators: Tuple[Indicator, ...]
226
229
  for indicators in _combos(loop_rvs):
227
230
  probability = self.wmc(*(indicators + new_conditions))
@@ -229,6 +232,8 @@ class ProbabilitySpace(ABC):
229
232
  best_probability = probability
230
233
  best_states = tuple(ind.state_idx for ind in indicators)
231
234
  condition_probability = self.wmc(*condition)
235
+ if best_states is None:
236
+ return _NAN, ()
232
237
  return best_probability / condition_probability, best_states
233
238
 
234
239
  def correlation(self, indicator1: Indicator, indicator2: Indicator, condition: Condition = ()) -> float:
@@ -245,6 +250,20 @@ class ProbabilitySpace(ABC):
245
250
  """
246
251
  condition = check_condition(condition)
247
252
 
253
+ if indicator1.rv_idx == indicator2.rv_idx:
254
+ # Special case - same random variable
255
+ condition_groups: MapSet[int, Indicator] = _group_indicators(condition)
256
+ rv_idx: int = indicator1.rv_idx
257
+ if indicator1 not in condition_groups.get(rv_idx, (indicator1,)):
258
+ return _NAN
259
+ if indicator1 == indicator2:
260
+ return 1
261
+ else:
262
+ if indicator2 not in condition_groups.get(rv_idx, (indicator2,)):
263
+ return _NAN
264
+ else:
265
+ return 0
266
+
248
267
  p1 = self.probability(indicator1, condition=condition)
249
268
  p2 = self.probability(indicator2, condition=condition)
250
269
  p12 = self._joint_probability(indicator1, indicator2, condition=condition)
@@ -267,12 +286,7 @@ class ProbabilitySpace(ABC):
267
286
  entropy of the given random variable.
268
287
  """
269
288
  condition = check_condition(condition)
270
- e = 0.0
271
- for ind in rv:
272
- p = self.probability(ind, condition=condition)
273
- if p > 0.0:
274
- e -= p * math.log2(p)
275
- return e
289
+ return -sum(plogp(self.probability(ind, condition=condition)) for ind in rv)
276
290
 
277
291
  def conditional_entropy(self, rv1: RandomVariable, rv2: RandomVariable, condition: Condition = ()) -> float:
278
292
  """
@@ -309,13 +323,11 @@ class ProbabilitySpace(ABC):
309
323
  joint entropy of the given random variables.
310
324
  """
311
325
  condition = check_condition(condition)
312
- e = 0.0
313
- for ind1 in rv1:
314
- for ind2 in rv2:
315
- p = self._joint_probability(ind1, ind2, condition=condition)
316
- if p > 0.0:
317
- e -= p * math.log2(p)
318
- return e
326
+ return -sum(
327
+ plogp(self._joint_probability(ind1, ind2, condition=condition))
328
+ for ind1 in rv1
329
+ for ind2 in rv2
330
+ )
319
331
 
320
332
  def mutual_information(self, rv1: RandomVariable, rv2: RandomVariable, condition: Condition = ()) -> float:
321
333
  """
@@ -419,8 +431,12 @@ class ProbabilitySpace(ABC):
419
431
  denominator = self.joint_entropy(rv1, rv2, condition=condition)
420
432
  return self._normalised_mutual_information(rv1, rv2, denominator, condition=condition)
421
433
 
422
- def covariant_normalised_mutual_information(self, rv1: RandomVariable, rv2: RandomVariable,
423
- condition: Condition = ()) -> float:
434
+ def covariant_normalised_mutual_information(
435
+ self,
436
+ rv1: RandomVariable,
437
+ rv2: RandomVariable,
438
+ condition: Condition = (),
439
+ ) -> float:
424
440
  """
425
441
  Calculate the covariant normalised mutual information
426
442
  = I(rv1; rv2) / sqrt(H(rv1) * H(rv2)).
@@ -549,6 +565,14 @@ class ProbabilitySpace(ABC):
549
565
  return wmc
550
566
 
551
567
 
568
+ def plogp(p: float) -> float:
569
+ """
570
+ Returns:
571
+ p * log2(p)
572
+ """
573
+ return p * math.log2(p) if p > 0 else 0
574
+
575
+
552
576
  def check_condition(condition: Condition) -> Tuple[Indicator, ...]:
553
577
  """
554
578
  Make the best effort to interpret the given condition.
File without changes
@@ -0,0 +1,37 @@
1
+ from ck.dataset import HardDataset, SoftDataset
2
+ from ck.dataset.dataset_builder import DatasetBuilder, soft_dataset_from_builder, hard_dataset_from_builder
3
+ from ck.pgm import PGM
4
+
5
+
6
+ def main() -> None:
7
+ pgm = PGM()
8
+ x = pgm.new_rv('x', (True, False))
9
+ y = pgm.new_rv('y', ('yes', 'no', 'maybe'))
10
+
11
+ builder = DatasetBuilder([x, y])
12
+ builder.append()
13
+ builder.append(1, 2).weight = 3
14
+ builder.append(None, [0.7, 0.1, 0.2])
15
+ builder.append().set_states(True, 'maybe')
16
+
17
+ print('DatasetBuilder dump')
18
+ builder.dump()
19
+ print()
20
+
21
+ print('DatasetBuilder dump, showing states and custom missing values')
22
+ builder.dump(as_states=True, missing='?')
23
+ print()
24
+
25
+ print('HardDataset dump')
26
+ dataset: HardDataset = hard_dataset_from_builder(builder, missing=99)
27
+ dataset.dump()
28
+ print()
29
+
30
+ print('SoftDataset dump')
31
+ dataset: SoftDataset = soft_dataset_from_builder(builder)
32
+ dataset.dump()
33
+ print()
34
+
35
+
36
+ if __name__ == '__main__':
37
+ main()
@@ -0,0 +1,18 @@
1
+ from ck import example
2
+ from ck.dataset.sampled_dataset import dataset_from_sampler
3
+ from ck.pgm import PGM
4
+ from ck.pgm_circuit.wmc_program import WMCProgram
5
+ from ck.pgm_compiler import DEFAULT_PGM_COMPILER
6
+ from ck.sampling.sampler import Sampler
7
+
8
+
9
+ def main() -> None:
10
+ pgm: PGM = example.Student()
11
+ sampler: Sampler = WMCProgram(DEFAULT_PGM_COMPILER(pgm)).sample_direct()
12
+ dataset = dataset_from_sampler(sampler, 10)
13
+
14
+ dataset.dump()
15
+
16
+
17
+ if __name__ == '__main__':
18
+ main()
File without changes
@@ -0,0 +1,70 @@
1
+ from typing import List, Set
2
+
3
+ from ck import example
4
+ from ck.dataset import HardDataset
5
+ from ck.dataset.cross_table import CrossTable, cross_table_from_hard_dataset
6
+ from ck.dataset.sampled_dataset import dataset_from_sampler
7
+ from ck.learning.model_from_cross_tables import model_from_cross_tables
8
+ from ck.pgm import PGM, RandomVariable
9
+ from ck.pgm_circuit.wmc_program import WMCProgram
10
+ from ck.pgm_compiler import DEFAULT_PGM_COMPILER
11
+ from ck.probability import divergence
12
+
13
+ EXCLUDE_UNNECESSARY_CROSS_TABLES = True
14
+
15
+
16
+ def main() -> None:
17
+ # Create a dataset based on model which is an example PGM
18
+ number_of_samples: int = 10000 # How many instances to make for the model dataset
19
+ model: PGM = example.Student()
20
+ model_dataset: HardDataset = dataset_from_sampler(
21
+ WMCProgram(DEFAULT_PGM_COMPILER(model)).sample_direct(),
22
+ number_of_samples,
23
+ )
24
+
25
+ # Clone the model, without factors, and transport the dataset to the new PGM
26
+ pgm = PGM()
27
+ dataset = HardDataset(weights=model_dataset.weights)
28
+ for model_rv in model.rvs:
29
+ rv = pgm.new_rv(model_rv.name, model_rv.states)
30
+ dataset.add_rv_from_state_idxs(rv, model_dataset.state_idxs(model_rv))
31
+
32
+ # What model rvs have a child
33
+ model_rvs_with_children: Set[RandomVariable] = set()
34
+ for model_factor in model.factors:
35
+ for parent_rv in model_factor.rvs[1:]:
36
+ model_rvs_with_children.add(parent_rv)
37
+
38
+ # Construct cross-tables from the dataset
39
+ cross_tables: List[CrossTable] = []
40
+ for model_factor in model.factors:
41
+ if (
42
+ EXCLUDE_UNNECESSARY_CROSS_TABLES
43
+ and len(model_factor.rvs) == 1
44
+ and model_factor.rvs[0] in model_rvs_with_children
45
+ ):
46
+ # The factor relates to a single random variable (has
47
+ # no parents) but it does have children.
48
+ # No need to include a cross-table as it is inferable from
49
+ # cross-tables of its children.
50
+ continue
51
+
52
+ rvs = tuple(pgm.rvs[model_rv.idx] for model_rv in model_factor.rvs)
53
+ cross_tables.append(cross_table_from_hard_dataset(dataset, rvs))
54
+ print('cross-table:', *rvs)
55
+
56
+ # Train the PGM
57
+ model_from_cross_tables(pgm, cross_tables)
58
+
59
+ # Show results
60
+ print()
61
+ pgm.dump(show_function_values=True)
62
+ print()
63
+ model_space = WMCProgram(DEFAULT_PGM_COMPILER(model))
64
+ pgm_space = WMCProgram(DEFAULT_PGM_COMPILER(pgm))
65
+ print('HI', divergence.hi(model_space, pgm_space))
66
+ print('KL', divergence.kl(model_space, pgm_space))
67
+
68
+
69
+ if __name__ == '__main__':
70
+ main()
@@ -0,0 +1,55 @@
1
+ from ck.dataset.dataset_from_csv import hard_dataset_from_csv
2
+ from ck.learning.train_generative_bn import train_generative_bn
3
+ from ck.pgm import PGM
4
+
5
+
6
+ def main() -> None:
7
+ pgm = PGM('Student')
8
+
9
+ difficult = pgm.new_rv('difficult', ['y', 'n'])
10
+ intelligent = pgm.new_rv('intelligent', ['y', 'n'])
11
+ grade = pgm.new_rv('grade', ['low', 'medium', 'high'])
12
+ award = pgm.new_rv('award', ['y', 'n'])
13
+ letter = pgm.new_rv('letter', ['y', 'n'])
14
+
15
+ pgm.new_factor(difficult)
16
+ pgm.new_factor(intelligent)
17
+ pgm.new_factor(grade, intelligent, difficult)
18
+ pgm.new_factor(award, intelligent)
19
+ pgm.new_factor(letter, grade)
20
+
21
+ rvs = (difficult, intelligent, grade, award, letter)
22
+ csv = """
23
+ 0,1,2,0,1
24
+ 1,1,2,0,1
25
+ 1,1,2,0,1
26
+ 0,0,2,0,0
27
+ 0,1,1,1,0
28
+ 1,1,1,1,1
29
+ 1,1,0,0,0
30
+ 1,1,0,0,1
31
+ 1,0,0,0,0
32
+ """
33
+
34
+ dataset = hard_dataset_from_csv(rvs, csv.splitlines())
35
+
36
+ # Learn parameters values for `pgm` using the training data `dataset`.
37
+ # This updates the PGMs potential functions.
38
+ train_generative_bn(pgm, dataset)
39
+
40
+ show_pgm_factors(pgm)
41
+
42
+ print('Done.')
43
+
44
+
45
+ def show_pgm_factors(pgm: PGM) -> None:
46
+ for factor in pgm.factors:
47
+ potential_function = factor.function
48
+ print(f'Factor: {factor} {type(potential_function)}')
49
+ for instance, _, param_value in potential_function.keys_with_param:
50
+ print(f'Factor{instance} = {param_value}')
51
+ print()
52
+
53
+
54
+ if __name__ == '__main__':
55
+ main()
@@ -2,7 +2,7 @@ import random
2
2
 
3
3
  from ck import example
4
4
  from ck.pgm import PGM
5
- from ck.pgm_compiler import factor_elimination
5
+ from ck.pgm_compiler import DEFAULT_PGM_COMPILER
6
6
  from ck.pgm_circuit import PGMCircuit
7
7
  from ck.pgm_circuit.wmc_program import WMCProgram
8
8
  from ck.probability.empirical_probability_space import EmpiricalProbabilitySpace
@@ -18,7 +18,7 @@ def main():
18
18
 
19
19
  pgm: PGM = example.Rain()
20
20
 
21
- pgm_cct: PGMCircuit = factor_elimination.compile_pgm(pgm)
21
+ pgm_cct: PGMCircuit = DEFAULT_PGM_COMPILER(pgm)
22
22
  wmc = WMCProgram(pgm_cct)
23
23
  sampler = wmc.sample_direct()
24
24
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compiled-knowledge
3
- Version: 4.0.0a25
3
+ Version: 4.1.0
4
4
  Summary: A Python package for compiling and querying discrete probabilistic graphical models.
5
5
  Author-email: Barry Drake <barry@compiledknowledge.org>
6
6
  License-Expression: MIT
@@ -13,6 +13,7 @@ Description-Content-Type: text/markdown
13
13
  License-File: LICENSE.txt
14
14
  Requires-Dist: llvmlite
15
15
  Requires-Dist: numpy
16
+ Requires-Dist: scipy
16
17
  Dynamic: license-file
17
18
 
18
19
  Compiled Knowledge