compiled-knowledge 4.1.0a1__cp312-cp312-win_amd64.whl → 4.1.0a3__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of compiled-knowledge might be problematic. Click here for more details.

Files changed (36) hide show
  1. ck/circuit/_circuit_cy.c +1 -1
  2. ck/circuit/_circuit_cy.cp312-win_amd64.pyd +0 -0
  3. ck/circuit_compiler/cython_vm_compiler/_compiler.c +152 -152
  4. ck/circuit_compiler/cython_vm_compiler/_compiler.cp312-win_amd64.pyd +0 -0
  5. ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.c +1 -1
  6. ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.cp312-win_amd64.pyd +0 -0
  7. ck/dataset/cross_table.py +143 -79
  8. ck/dataset/dataset.py +143 -38
  9. ck/dataset/dataset_builder.py +519 -0
  10. ck/dataset/dataset_from_crosstable.py +21 -2
  11. ck/dataset/dataset_from_csv.py +5 -1
  12. ck/learning/coalesce_cross_tables.py +395 -0
  13. ck/learning/model_from_cross_tables.py +242 -0
  14. ck/learning/parameters.py +117 -0
  15. ck/learning/train_generative_bn.py +198 -0
  16. ck/pgm.py +10 -8
  17. ck/pgm_circuit/marginals_program.py +5 -0
  18. ck/pgm_circuit/wmc_program.py +5 -0
  19. ck/pgm_compiler/support/circuit_table/_circuit_table_cy.c +1 -1
  20. ck/pgm_compiler/support/circuit_table/_circuit_table_cy.cp312-win_amd64.pyd +0 -0
  21. ck/probability/divergence.py +226 -0
  22. ck/probability/probability_space.py +43 -19
  23. ck_demos/dataset/__init__.py +0 -0
  24. ck_demos/dataset/demo_dataset_builder.py +37 -0
  25. ck_demos/dataset/demo_dataset_from_sampler.py +18 -0
  26. ck_demos/learning/__init__.py +0 -0
  27. ck_demos/learning/demo_bayesian_network_from_cross_tables.py +71 -0
  28. ck_demos/learning/demo_simple_learning.py +55 -0
  29. ck_demos/sampling/demo_wmc_direct_sampler.py +2 -2
  30. {compiled_knowledge-4.1.0a1.dist-info → compiled_knowledge-4.1.0a3.dist-info}/METADATA +2 -1
  31. {compiled_knowledge-4.1.0a1.dist-info → compiled_knowledge-4.1.0a3.dist-info}/RECORD +35 -24
  32. ck/learning/train_generative.py +0 -149
  33. /ck/{dataset/cross_table_probabilities.py → probability/cross_table_probability_space.py} +0 -0
  34. {compiled_knowledge-4.1.0a1.dist-info → compiled_knowledge-4.1.0a3.dist-info}/WHEEL +0 -0
  35. {compiled_knowledge-4.1.0a1.dist-info → compiled_knowledge-4.1.0a3.dist-info}/licenses/LICENSE.txt +0 -0
  36. {compiled_knowledge-4.1.0a1.dist-info → compiled_knowledge-4.1.0a3.dist-info}/top_level.txt +0 -0
ck/dataset/dataset.py CHANGED
@@ -1,11 +1,12 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Sequence, Optional, Dict, Iterable, Tuple
3
+ from itertools import repeat
4
+ from typing import Sequence, Optional, Dict, Iterable, Tuple, List, Iterator
4
5
 
5
6
  import numpy as np
6
7
 
7
- from ck.pgm import RandomVariable, State
8
- from ck.utils.np_extras import NDArray, DTypeStates, dtype_for_number_of_states
8
+ from ck.pgm import RandomVariable, State, Instance
9
+ from ck.utils.np_extras import DTypeStates, dtype_for_number_of_states, NDArrayNumeric, NDArrayStates
9
10
 
10
11
 
11
12
  class Dataset:
@@ -17,7 +18,7 @@ class Dataset:
17
18
 
18
19
  def __init__(
19
20
  self,
20
- weights: Optional[NDArray | Sequence],
21
+ weights: Optional[NDArrayNumeric | Sequence],
21
22
  length: Optional[int],
22
23
  ):
23
24
  # Infer the length of the dataset.
@@ -30,14 +31,16 @@ class Dataset:
30
31
  self._rvs: Tuple[RandomVariable, ...] = ()
31
32
 
32
33
  # Set the weights array, and confirm its shape
33
- self._weights: NDArray
34
+ self._weights: NDArrayNumeric
34
35
  if weights is None:
35
36
  weights = np.ones(self._length)
36
37
  elif not isinstance(weights, np.ndarray):
37
- weights = np.array(weights)
38
+ weights = np.array(weights, dtype=np.float64)
38
39
  expected_shape = (self._length,)
39
40
  if weights.shape != expected_shape:
40
41
  raise ValueError(f'weights expected shape {expected_shape}, got {weights.shape}')
42
+ # if not isinstance(weights.dtype, NDArrayNumeric):
43
+ # raise ValueError('weights expected numeric dtype')
41
44
 
42
45
  self._weights = weights
43
46
 
@@ -55,7 +58,7 @@ class Dataset:
55
58
  return self._rvs
56
59
 
57
60
  @property
58
- def weights(self) -> NDArray:
61
+ def weights(self) -> NDArrayNumeric:
59
62
  """
60
63
  Get the instance weights.
61
64
  The notional weight of an instance is 1.
@@ -84,7 +87,7 @@ class Dataset:
84
87
  """
85
88
  rvs = self._rvs
86
89
  i: int = self._rvs.index(rv)
87
- self._rvs = rvs[:i] + rvs[i+1:]
90
+ self._rvs = rvs[:i] + rvs[i + 1:]
88
91
 
89
92
 
90
93
  class HardDataset(Dataset):
@@ -97,6 +100,7 @@ class HardDataset(Dataset):
97
100
  @staticmethod
98
101
  def from_soft_dataset(
99
102
  soft_dataset: SoftDataset,
103
+ *,
100
104
  adjust_instance_weights: bool = True,
101
105
  ) -> HardDataset:
102
106
  """
@@ -124,20 +128,24 @@ class HardDataset(Dataset):
124
128
 
125
129
  def __init__(
126
130
  self,
127
- data: Iterable[Tuple[RandomVariable, NDArray | Sequence[int]]] = (),
128
- weights: Optional[NDArray | Sequence] = None,
131
+ data: Iterable[Tuple[RandomVariable, NDArrayStates | Sequence[int]]] = (),
132
+ *,
133
+ weights: Optional[NDArrayNumeric | Sequence[float | int]] = None,
129
134
  length: Optional[int] = None,
130
135
  ):
131
136
  """
132
137
  Create a hard dataset.
133
138
 
139
+ When `weights` is a numpy array, then the dataset will directly reference the given array.
140
+ When `data` contains a numpy array, then the dataset will directly reference the given array.
141
+
134
142
  Args:
135
143
  data: optional iterable of (random variable, state idxs), passed
136
144
  to `self.add_rv_from_state_idxs`.
137
145
  weights: optional array of instance weights.
138
146
  length: optional length of the dataset, if omitted, the length is inferred.
139
147
  """
140
- self._data: Dict[RandomVariable, NDArray] = {}
148
+ self._data: Dict[RandomVariable, NDArrayStates] = {}
141
149
 
142
150
  # Initialise super by either weights, length or first data item.
143
151
  super_initialised: bool = False
@@ -154,9 +162,9 @@ class HardDataset(Dataset):
154
162
  if not super_initialised:
155
163
  super().__init__(weights, 0)
156
164
 
157
- def states(self, rv: RandomVariable) -> NDArray:
165
+ def state_idxs(self, rv: RandomVariable) -> NDArrayStates:
158
166
  """
159
- Get the state values (by state index) for one random variable.
167
+ Get the state indexes for one random variable.
160
168
  The index into the returned array is the instance index.
161
169
 
162
170
  Returns:
@@ -167,7 +175,7 @@ class HardDataset(Dataset):
167
175
  """
168
176
  return self._data[rv]
169
177
 
170
- def add_rv(self, rv: RandomVariable) -> NDArray:
178
+ def add_rv(self, rv: RandomVariable) -> NDArrayStates:
171
179
  """
172
180
  Add a random variable to the dataset, allocating and returning
173
181
  the state indices for the random variable.
@@ -198,11 +206,11 @@ class HardDataset(Dataset):
198
206
  del self._data[rv]
199
207
  self._remove_rv(rv)
200
208
 
201
- def add_rv_from_state_idxs(self, rv: RandomVariable, state_idxs: NDArray | Sequence[int]) -> NDArray:
209
+ def add_rv_from_state_idxs(self, rv: RandomVariable, state_idxs: NDArrayStates | Sequence[int]) -> NDArrayStates:
202
210
  """
203
211
  Add a random variable to the dataset.
204
212
 
205
- The dataset will directly reference the given `states` array.
213
+ When `state_idxs` is a numpy array, then the dataset will directly reference the given array.
206
214
 
207
215
  Args:
208
216
  rv: The random variable to add.
@@ -234,7 +242,7 @@ class HardDataset(Dataset):
234
242
  self._add_rv(rv)
235
243
  return rv_data
236
244
 
237
- def add_rv_from_states(self, rv: RandomVariable, states: Sequence[State]) -> NDArray:
245
+ def add_rv_from_states(self, rv: RandomVariable, states: Sequence[State]) -> NDArrayStates:
238
246
  """
239
247
  Add a random variable to the dataset.
240
248
 
@@ -266,9 +274,9 @@ class HardDataset(Dataset):
266
274
  def add_rv_from_state_weights(
267
275
  self,
268
276
  rv: RandomVariable,
269
- state_weights: NDArray,
277
+ state_weights: NDArrayNumeric,
270
278
  adjust_instance_weights: bool = True,
271
- ) -> NDArray:
279
+ ) -> NDArrayStates:
272
280
  """
273
281
  Add a random variable to the dataset.
274
282
 
@@ -306,12 +314,32 @@ class HardDataset(Dataset):
306
314
  )
307
315
 
308
316
  if adjust_instance_weights:
309
- row: NDArray
317
+ row: NDArrayNumeric
310
318
  for i, row in enumerate(state_weights):
311
319
  self._weights[i] *= row.sum()
312
320
 
313
321
  return self.add_rv_from_state_idxs(rv, rv_data)
314
322
 
323
+ def instances(self, rvs: Optional[Sequence[RandomVariable]] = None) -> Iterator[Tuple[Instance, float]]:
324
+ """
325
+ Iterate over weighted instances.
326
+
327
+ Args:
328
+ rvs: The random variables to include in iteration. Default is all dataset random variables.
329
+
330
+ Returns:
331
+ an iterator over (instance, weight) pairs, in the same order and number of instances in this dataset.
332
+ An instance is a sequence of state indexes, co-indexed with `self.rvs`.
333
+ """
334
+ if rvs is None:
335
+ rvs = self._rvs
336
+ # Special case - no random variables
337
+ if len(rvs) == 0:
338
+ return zip(repeat(()), self.weights)
339
+ else:
340
+ cols = [self.state_idxs(rv) for rv in rvs]
341
+ return zip(zip(*cols), self.weights)
342
+
315
343
  def dump(self, *, show_rvs: bool = True, show_weights: bool = True, as_states: bool = False) -> None:
316
344
  """
317
345
  Dump the dataset in a human-readable format.
@@ -320,14 +348,13 @@ class HardDataset(Dataset):
320
348
  Args:
321
349
  show_rvs: If `True`, the random variables are dumped.
322
350
  show_weights: If `True`, the instance weights are dumped.
323
- as_states: If `True`, the states are dumped Instead of just state indexes.
351
+ as_states: If `True`, the states are dumped instead of just state indexes.
324
352
  """
325
353
  if show_rvs:
326
354
  rvs = ', '.join(str(rv) for rv in self.rvs)
327
355
  print(f'rvs: [{rvs}]')
328
356
  print(f'instances ({len(self)}, with total weight {self.total_weight()}):')
329
- cols = [self.states(rv) for rv in self.rvs]
330
- for instance, weight in zip(zip(*cols), self.weights):
357
+ for instance, weight in self.instances():
331
358
  if as_states:
332
359
  instance_str = ', '.join(repr(rv.states[idx]) for idx, rv in zip(instance, self.rvs))
333
360
  else:
@@ -367,25 +394,29 @@ class SoftDataset(Dataset):
367
394
  """
368
395
  dataset = SoftDataset(weights=hard_dataset.weights.copy())
369
396
  for rv in hard_dataset.rvs:
370
- dataset.add_rv_from_state_idxs(rv, hard_dataset.states(rv))
397
+ dataset.add_rv_from_state_idxs(rv, hard_dataset.state_idxs(rv))
371
398
  return dataset
372
399
 
373
400
  def __init__(
374
401
  self,
375
- data: Iterable[Tuple[RandomVariable, NDArray]] = (),
376
- weights: Optional[NDArray | Sequence] = None,
402
+ data: Iterable[Tuple[RandomVariable, NDArrayNumeric | Sequence[Sequence[float]]]] = (),
403
+ *,
404
+ weights: Optional[NDArrayNumeric | Sequence[float | int]] = None,
377
405
  length: Optional[int] = None,
378
406
  ):
379
407
  """
380
408
  Create a soft dataset.
381
409
 
410
+ When `weights` is a numpy array, then the dataset will directly reference the given array.
411
+ When `data` contains a numpy array, then the dataset will directly reference the given array.
412
+
382
413
  Args:
383
414
  data: optional iterable of (random variable, state weights), passed
384
415
  to `self.add_rv_from_state_weights`.
385
416
  weights: optional array of instance weights.
386
417
  length: optional length of the dataset, if omitted, the length is inferred.
387
418
  """
388
- self._data: Dict[RandomVariable, NDArray] = {}
419
+ self._data: Dict[RandomVariable, NDArrayNumeric] = {}
389
420
 
390
421
  # Initialise super by either weights, length or first data item.
391
422
  super_initialised: bool = False
@@ -423,10 +454,10 @@ class SoftDataset(Dataset):
423
454
  RuntimeError: if `check_negative_instance` is true and a negative
424
455
  instance weight is encountered.
425
456
  """
426
- state_weights: NDArray
457
+ state_weights: NDArrayNumeric
427
458
  i: int
428
459
 
429
- weights: NDArray = self.weights
460
+ weights: NDArrayNumeric = self.weights
430
461
  for i in range(self._length):
431
462
  for state_weights in self._data.values():
432
463
  weight_sum = state_weights[i].sum()
@@ -442,7 +473,7 @@ class SoftDataset(Dataset):
442
473
  elif check_negative_instance and instance_weight < 0:
443
474
  raise RuntimeError(f'negative instance weight: {i}')
444
475
 
445
- def state_weights(self, rv: RandomVariable) -> NDArray:
476
+ def state_weights(self, rv: RandomVariable) -> NDArrayNumeric:
446
477
  """
447
478
  Get the state weights for one random variable.
448
479
  The first index into the returned array is the instance index.
@@ -456,7 +487,7 @@ class SoftDataset(Dataset):
456
487
  """
457
488
  return self._data[rv]
458
489
 
459
- def add_rv(self, rv: RandomVariable) -> NDArray:
490
+ def add_rv(self, rv: RandomVariable) -> NDArrayNumeric:
460
491
  """
461
492
  Add a random variable to the dataset, allocating and returning
462
493
  the state indices for the random variable.
@@ -487,11 +518,15 @@ class SoftDataset(Dataset):
487
518
  del self._data[rv]
488
519
  self._remove_rv(rv)
489
520
 
490
- def add_rv_from_state_weights(self, rv: RandomVariable, state_weights: NDArray) -> NDArray:
521
+ def add_rv_from_state_weights(
522
+ self,
523
+ rv: RandomVariable,
524
+ state_weights: NDArrayNumeric | Sequence[Sequence[float]],
525
+ ) -> NDArrayNumeric:
491
526
  """
492
527
  Add a random variable to the dataset.
493
528
 
494
- The dataset will directly reference the given `states` array.
529
+ When `state_weights` is a numpy array, then the dataset will directly reference the given array.
495
530
 
496
531
  Args:
497
532
  rv: The random variable to add.
@@ -503,6 +538,9 @@ class SoftDataset(Dataset):
503
538
  if rv in self._data.keys():
504
539
  raise ValueError(f'data for {rv} already exists in the dataset')
505
540
 
541
+ if not isinstance(state_weights, np.ndarray):
542
+ state_weights = np.array(state_weights, dtype=np.float64)
543
+
506
544
  expected_shape = (self._length, len(rv))
507
545
  if state_weights.shape == expected_shape:
508
546
  rv_data = state_weights
@@ -513,7 +551,7 @@ class SoftDataset(Dataset):
513
551
  self._add_rv(rv)
514
552
  return rv_data
515
553
 
516
- def add_rv_from_state_idxs(self, rv: RandomVariable, state_idxs: NDArray | Sequence[int]) -> NDArray:
554
+ def add_rv_from_state_idxs(self, rv: RandomVariable, state_idxs: NDArrayStates | Sequence[int]) -> NDArrayNumeric:
517
555
  """
518
556
  Add a random variable to the dataset.
519
557
 
@@ -533,7 +571,7 @@ class SoftDataset(Dataset):
533
571
 
534
572
  return self.add_rv_from_state_weights(rv, rv_data)
535
573
 
536
- def add_rv_from_states(self, rv: RandomVariable, states: Sequence[State]) -> NDArray:
574
+ def add_rv_from_states(self, rv: RandomVariable, states: Sequence[State]) -> NDArrayNumeric:
537
575
  """
538
576
  Add a random variable to the dataset.
539
577
 
@@ -555,10 +593,55 @@ class SoftDataset(Dataset):
555
593
 
556
594
  return self.add_rv_from_state_weights(rv, rv_data)
557
595
 
596
+ def soft_instances(
597
+ self,
598
+ rvs: Optional[Sequence[RandomVariable]] = None,
599
+ ) -> Iterator[Tuple[Tuple[NDArrayNumeric], float]]:
600
+ """
601
+ Iterate over weighted instances of soft evidence.
602
+
603
+ Args:
604
+ rvs: The random variables to include in iteration. Default is all dataset random variables.
605
+
606
+ Returns:
607
+ an iterator over (instance, weight) pairs, in the same order and number of instances in this dataset.
608
+ An instance is a sequence of soft weights, co-indexed with `self.rvs`.
609
+ """
610
+ if rvs is None:
611
+ rvs = self.rvs
612
+ # Special case - no random variables
613
+ if len(rvs) == 0:
614
+ return zip(repeat(()), self.weights)
615
+ else:
616
+ cols: List[NDArrayNumeric] = [self.state_weights(rv) for rv in rvs]
617
+ return zip(zip(*cols), self.weights)
618
+
619
+ def hard_instances(self, rvs: Optional[Sequence[RandomVariable]] = None) -> Iterator[Tuple[Instance, float]]:
620
+ """
621
+ Iterate over equivalent weighted hard instances.
622
+
623
+ Args:
624
+ rvs: The random variables to include in iteration. Default is all dataset random variables.
625
+
626
+ Returns:
627
+ an iterator over (instance, weight) pairs where the order and number of instances
628
+ is not guaranteed.
629
+ An instance is a sequence of state indexes, co-indexed with `self.rvs`.
630
+ """
631
+ if rvs is None:
632
+ rvs = self.rvs
633
+ # Special case - no random variables
634
+ if len(rvs) == 0:
635
+ yield (), self.total_weight()
636
+ else:
637
+ for instance_weights, weight in self.soft_instances(rvs):
638
+ if weight != 0:
639
+ for instance, instance_weight in _product_instance_weights(instance_weights):
640
+ yield instance, instance_weight * weight
641
+
558
642
  def dump(self, *, show_rvs: bool = True, show_weights: bool = True) -> None:
559
643
  """
560
644
  Dump the dataset in a human-readable format.
561
- If as_states is true, then instance states are dumped instead of just state indexes.
562
645
 
563
646
  Args:
564
647
  show_rvs: If `True`, the random variables are dumped.
@@ -568,10 +651,32 @@ class SoftDataset(Dataset):
568
651
  rvs = ', '.join(str(rv) for rv in self.rvs)
569
652
  print(f'rvs: [{rvs}]')
570
653
  print(f'instances ({len(self)}, with total weight {self.total_weight()}):')
571
- cols = [self.state_weights(rv) for rv in self.rvs]
572
- for instance, weight in zip(zip(*cols), self.weights):
654
+ for instance, weight in self.soft_instances():
573
655
  instance_str = ', '.join(str(state_weights) for state_weights in instance)
574
656
  if show_weights:
575
657
  print(f'({instance_str}) * {weight}')
576
658
  else:
577
659
  print(f'({instance_str})')
660
+
661
+
662
+ def _product_instance_weights(instance_weights: Sequence[NDArrayNumeric]) -> Iterator[Tuple[Tuple[int, ...], float]]:
663
+ """
664
+ Iterate over all possible hard instances for the given
665
+ instance weights, where the weight is not zero.
666
+
667
+ This is a support function for `SoftDataset.hard_instances`.
668
+ """
669
+
670
+ # Base case
671
+ if len(instance_weights) == 0:
672
+ yield (), 1
673
+
674
+ # Recursive case
675
+ else:
676
+ next_weights: NDArrayNumeric = instance_weights[-1]
677
+ pre_weights: Sequence[NDArrayNumeric] = instance_weights[:-1]
678
+ weight: float
679
+ for pre_instance, pre_weight in _product_instance_weights(pre_weights):
680
+ for i, weight in enumerate(next_weights):
681
+ if weight != 0:
682
+ yield pre_instance + (int(i),), pre_weight * weight