compiled-knowledge 4.0.0a25__cp313-cp313-macosx_11_0_arm64.whl → 4.1.0a2__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of compiled-knowledge might be problematic. Click here for more details.

Files changed (31) hide show
  1. ck/circuit/_circuit_cy.c +1 -1
  2. ck/circuit/_circuit_cy.cpython-313-darwin.so +0 -0
  3. ck/circuit_compiler/cython_vm_compiler/_compiler.c +152 -152
  4. ck/circuit_compiler/cython_vm_compiler/_compiler.cpython-313-darwin.so +0 -0
  5. ck/circuit_compiler/interpret_compiler.py +2 -2
  6. ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.c +1 -1
  7. ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.cpython-313-darwin.so +0 -0
  8. ck/dataset/__init__.py +1 -0
  9. ck/dataset/cross_table.py +270 -0
  10. ck/dataset/cross_table_probabilities.py +53 -0
  11. ck/dataset/dataset.py +594 -0
  12. ck/dataset/dataset_builder.py +512 -0
  13. ck/dataset/dataset_compute.py +140 -0
  14. ck/dataset/dataset_from_crosstable.py +45 -0
  15. ck/dataset/dataset_from_csv.py +151 -0
  16. ck/dataset/sampled_dataset.py +96 -0
  17. ck/learning/__init__.py +0 -0
  18. ck/learning/train_generative.py +149 -0
  19. ck/pgm.py +29 -27
  20. ck/pgm_circuit/program_with_slotmap.py +23 -45
  21. ck/pgm_circuit/support/compile_circuit.py +2 -4
  22. ck/pgm_compiler/support/circuit_table/_circuit_table_cy.c +1 -1
  23. ck/pgm_compiler/support/circuit_table/_circuit_table_cy.cpython-313-darwin.so +0 -0
  24. ck/probability/empirical_probability_space.py +1 -0
  25. ck_demos/dataset/__init__.py +0 -0
  26. ck_demos/dataset/demo_dataset_builder.py +37 -0
  27. {compiled_knowledge-4.0.0a25.dist-info → compiled_knowledge-4.1.0a2.dist-info}/METADATA +1 -1
  28. {compiled_knowledge-4.0.0a25.dist-info → compiled_knowledge-4.1.0a2.dist-info}/RECORD +31 -18
  29. {compiled_knowledge-4.0.0a25.dist-info → compiled_knowledge-4.1.0a2.dist-info}/WHEEL +0 -0
  30. {compiled_knowledge-4.0.0a25.dist-info → compiled_knowledge-4.1.0a2.dist-info}/licenses/LICENSE.txt +0 -0
  31. {compiled_knowledge-4.0.0a25.dist-info → compiled_knowledge-4.1.0a2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,151 @@
1
+ from typing import Iterable, List, Sequence, Optional
2
+
3
+ from ck.dataset import HardDataset
4
+ from ck.pgm import RandomVariable
5
+
6
+
7
+ def hard_dataset_from_csv(
8
+ rvs: Iterable[RandomVariable],
9
+ lines: Iterable[str],
10
+ *,
11
+ weights: Optional[int | str] = None,
12
+ sep: Optional[str] = ',',
13
+ comment: str = '#',
14
+ ) -> HardDataset:
15
+ """
16
+ Interpret the given sequence of lines as CSV for a HardDataset.
17
+
18
+ Each line is a list of state indexes (ints) separated by `sep`.
19
+
20
+ Every line should have the same number of values.
21
+
22
+ If the first line contains a non-integer value, then the first
23
+ line will be interpreted as a header line.
24
+
25
+ If there is no header line, then the values will be interpreted in the
26
+ same order as `rvs` and the number of values on each line should be
27
+ the same as the number of random variables in `rvs`.
28
+
29
+ If there is a header line, then it will be interpreted as the order
30
+ of random variables. There must be a column name in the header to match
31
+ each name of the given random variables. Additional columns will be ignored.
32
+
33
+ Leading and trailing whitespace is ignored for each field, including header column names.
34
+
35
+ As text file (and StringIO) objects are iterable over lines, here is how to read a csv file:
36
+ ```
37
+ with open(csv_filename, 'r') as file:
38
+ hard_dataset_from_csv(rvs, file)
39
+ ```
40
+ Here is an example to read from a csv string:
41
+ ```
42
+ hard_dataset_from_csv(rvs, csv_string.splitlines())
43
+ ```
44
+
45
+ Args:
46
+ rvs: the random variables for the returned dataset.
47
+ lines: the sequence of lines to interpret, each line is an instance in the dataset.
48
+ weights: the column in the csv file holding instance weights. Can be either the
49
+ column number (counting from zero) or a column name (requires a header line).
50
+ sep: the string to use to separate values in a line, default is a comma.
51
+ If set to `None`, lines will be split on any consecutive run of whitespace characters
52
+ (including \n \r \t \f and spaces).
53
+ comment: text starting with this will be treated as a comment. Set to '' to disallow comments.
54
+
55
+ Returns:
56
+ a HardDataset.
57
+
58
+ Raises:
59
+ ValueError: if the lines do not conform to a CSV format.
60
+ """
61
+ rvs: Sequence[RandomVariable] = tuple(rvs)
62
+
63
+ # Define `clean_line` being sensitive to comments.
64
+ if len(comment) > 0:
65
+ def clean_line(l: str) -> str:
66
+ i = l.find(comment)
67
+ if i >= 0:
68
+ l = l[:i]
69
+ return l.strip()
70
+ else:
71
+ def clean_line(l: str) -> str:
72
+ return l.strip()
73
+
74
+ # Get the first line which may be a header line or data line
75
+ it = iter(lines)
76
+ try:
77
+ while True:
78
+ line = clean_line(next(it))
79
+ if len(line) > 0:
80
+ break
81
+ except StopIteration:
82
+ # Empty dataset with the given random variables
83
+ return HardDataset((rv, []) for rv in rvs)
84
+
85
+ values: List[str] = [value.strip() for value in line.split(sep)]
86
+ number_of_columns: int = len(values)
87
+ series: List[List[int]] # series[dataset-column] = list of values
88
+ weight_series: Optional[List[float]] = None
89
+ column_map: List[int] # column_map[dataset-column] = input-column
90
+ if all(_is_number(value) for value in values):
91
+ # First line is not a header line
92
+ if weights is None:
93
+ if number_of_columns != len(rvs):
94
+ raise ValueError('number of columns does not match number of random variables')
95
+ column_map = list(range(len(rvs)))
96
+ else:
97
+ if number_of_columns != len(rvs) + 1:
98
+ raise ValueError('number of columns does not match number of random variables and weight column')
99
+ if not isinstance(weights, int):
100
+ raise ValueError('no header detected - `weights` must be a column number')
101
+ if not (-number_of_columns <= weights < number_of_columns):
102
+ raise ValueError('`weights` column number out of range')
103
+ column_map = list(range(len(rvs) + 1))
104
+ column_map.pop(weights)
105
+
106
+ # Initialise series with the first line of data
107
+ series = [[int(values[i])] for i in column_map]
108
+ if weights is not None:
109
+ weight_series = [float(values[weights])]
110
+
111
+ else:
112
+ # First line is a header line
113
+ # Lookup each random variable to find its column
114
+ column_map = [
115
+ values.index(rv.name) # will raise ValueError if not found
116
+ for rv in rvs
117
+ ]
118
+ if isinstance(weights, str):
119
+ # Convert weights column name to column number
120
+ weights: int = values.index(weights) # will raise ValueError if not found
121
+ elif isinstance(weights, int) and not (number_of_columns <= weights < number_of_columns):
122
+ raise ValueError('`weights` column number out of range')
123
+
124
+ # Initialise each series as empty
125
+ series = [[] for _ in rvs]
126
+ if weights is not None:
127
+ weight_series = []
128
+
129
+ # Read remaining data lines
130
+ for line in it:
131
+ line = clean_line(line)
132
+ if len(line) == 0:
133
+ continue
134
+ if len(values) != number_of_columns:
135
+ raise ValueError('number of values does not match number of columns')
136
+ values = line.split(sep)
137
+ for series_i, i in zip(series, column_map):
138
+ series_i.append(int(values[i]))
139
+ if weights is not None:
140
+ weight_series.append(float(values[weights]))
141
+
142
+ # Construct the dataset
143
+ return HardDataset(zip(rvs, series), weights=weight_series)
144
+
145
+
146
+ def _is_number(s: str) -> bool:
147
+ try:
148
+ float(s)
149
+ return True
150
+ except ValueError:
151
+ return False
@@ -0,0 +1,96 @@
1
+ import random
2
+ from dataclasses import dataclass
3
+ from typing import Sequence, List, Iterator, Tuple, Dict
4
+
5
+ import numpy as np
6
+
7
+ from ck.dataset import HardDataset
8
+ from ck.dataset.cross_table import CrossTable
9
+ from ck.pgm import RandomVariable, Instance
10
+ from ck.sampling.sampler import Sampler
11
+ from ck.utils.np_extras import dtype_for_number_of_states, NDArray
12
+ from ck.utils.random_extras import Random
13
+
14
+
15
+ def dataset_from_sampler(sampler: Sampler, length: int) -> HardDataset:
16
+ """
17
+ Create a hard dataset using samples from a sampler.
18
+
19
+ Args:
20
+ sampler: A sampler which defined the random variables and provides samples.
21
+ length: The length of the dataset to create.
22
+
23
+ Returns:
24
+ A HardDataset of the given length.
25
+ """
26
+ rvs: Sequence[RandomVariable] = sampler.rvs
27
+ columns: List[NDArray] = [
28
+ np.zeros(length, dtype=dtype_for_number_of_states(len(rv)))
29
+ for rv in rvs
30
+ ]
31
+ for i, instance in enumerate(sampler.take(length)):
32
+ for column, state in zip(columns, instance):
33
+ column[i] = state
34
+ return HardDataset(zip(rvs, columns))
35
+
36
+
37
+ class CrossTableSampler(Sampler):
38
+ def __init__(self, crosstab: CrossTable, rand: Random = random):
39
+ """
40
+ Adapt a cross table to a sampler.
41
+
42
+ Instances will be drawn from the sampler according to their
43
+ weight in the given cross-table. If the given cross-table is
44
+ modified after constructing the sampler, the sampler will not
45
+ be affected.
46
+ """
47
+ if len(crosstab) == 0:
48
+ raise ValueError('no instances to sample')
49
+
50
+ super().__init__(rvs=crosstab.rvs, condition=())
51
+
52
+ # Group instances by weight.
53
+ # We do this in anticipation that it makes sampling more efficient.
54
+ weight_groups: Dict[float, _WeightGroup] = {}
55
+ for instance, weight in crosstab.items():
56
+ weight_group = weight_groups.get(weight)
57
+ if weight_group is None:
58
+ weight_groups[weight] = _WeightGroup(weight, weight, [instance])
59
+ else:
60
+ weight_group.append(instance)
61
+
62
+ self._weight_groups: List[_WeightGroup] = list(weight_groups.values())
63
+ self._total_weight = sum(group.total for group in weight_groups.values())
64
+ self._rand = rand
65
+
66
+ def __iter__(self) -> Iterator[Instance]:
67
+ while True:
68
+ # This code performs inverse transform sampling
69
+ r: float = self._rand.random() * self._total_weight
70
+
71
+ # This does a serial search to find the weight group.
72
+ # This is efficient for small numbers of groups, but this may be
73
+ # improved for large numbers of groups.
74
+ it = iter(self._weight_groups)
75
+ group = next(it)
76
+ while r >= group.total:
77
+ r -= group.total
78
+ group = next(it)
79
+
80
+ # Pick an instance in the group
81
+ i = int(r / group.weight)
82
+ yield group.instances[i]
83
+
84
+
85
+ @dataclass
86
+ class _WeightGroup:
87
+ """
88
+ Support for CrossTableSampler.
89
+ """
90
+ weight: float
91
+ total: float
92
+ instances: List[Tuple[int, ...]]
93
+
94
+ def append(self, instance: Tuple[int, ...]) -> None:
95
+ self.total += self.weight
96
+ self.instances.append(instance)
File without changes
@@ -0,0 +1,149 @@
1
+ from dataclasses import dataclass
2
+ from typing import Dict, Tuple, List
3
+
4
+ import numpy as np
5
+
6
+ from ck.dataset import SoftDataset, HardDataset
7
+ from ck.dataset.cross_table import CrossTable, cross_table_from_dataset
8
+ from ck.pgm import PGM, Instance, DensePotentialFunction, Shape, natural_key_idx, SparsePotentialFunction
9
+ from ck.utils.iter_extras import multiply
10
+ from ck.utils.np_extras import NDArrayFloat64
11
+
12
+
13
+ @dataclass
14
+ class ParameterValues:
15
+ """
16
+ A ParameterValues object represents learned parameter values of a PGM.
17
+ """
18
+ pgm: PGM
19
+ """
20
+ The PGM that the parameter values pertains to.
21
+ """
22
+
23
+ cpts: List[Dict[Instance, NDArrayFloat64]]
24
+ """
25
+ A list of CPTs co-indexed with `pgm.factors`. Each CPT is a dict
26
+ mapping from instances of the parent random variables (of the factors)
27
+ to the child conditional probability distribution (CPD).
28
+ """
29
+
30
+ def set_zero(self) -> None:
31
+ """
32
+ Set the potential function of each PGM factor to zero.
33
+ """
34
+ for factor in self.pgm.factors:
35
+ factor.set_zero()
36
+
37
+ def set_cpt(self) -> None:
38
+ """
39
+ Set the potential function of each PGM factor to a CPTPotentialFunction,
40
+ using our parameter values.
41
+ """
42
+ for factor, cpt in zip(self.pgm.factors, self.cpts):
43
+ factor.set_cpt().set(*cpt.items())
44
+
45
+ def set_dense(self) -> None:
46
+ """
47
+ Set the potential function of each PGM factor to a DensePotentialFunction,
48
+ using our parameter values.
49
+ """
50
+ for factor, cpt in zip(self.pgm.factors, self.cpts):
51
+ pot_function: DensePotentialFunction = factor.set_dense()
52
+ parent_shape: Shape = factor.shape[1:]
53
+ child_state: int
54
+ value: float
55
+ if len(parent_shape) == 0:
56
+ cpd: NDArrayFloat64 = cpt[()]
57
+ for child_state, value in enumerate(cpd):
58
+ pot_function[child_state] = value
59
+ else:
60
+ parent_space: int = multiply(parent_shape)
61
+ parent_states: Instance
62
+ cpd: NDArrayFloat64
63
+ for parent_states, cpd in cpt.items():
64
+ idx: int = natural_key_idx(parent_shape, parent_states)
65
+ for value in cpd:
66
+ pot_function[idx] = value
67
+ idx += parent_space
68
+
69
+ def set_sparse(self) -> None:
70
+ """
71
+ Set the potential function of each PGM factor to a SparsePotentialFunction,
72
+ using our parameter values.
73
+ """
74
+ for factor, cpt in zip(self.pgm.factors, self.cpts):
75
+ pot_function: SparsePotentialFunction = factor.set_sparse()
76
+ parent_states: Instance
77
+ child_state: int
78
+ cpd: NDArrayFloat64
79
+ value: float
80
+ for parent_states, cpd in cpt.items():
81
+ for child_state, value in enumerate(cpd):
82
+ key = (child_state,) + parent_states
83
+ pot_function[key] = value
84
+
85
+
86
+ def train_generative_bn(
87
+ pgm: PGM,
88
+ dataset: HardDataset | SoftDataset,
89
+ *,
90
+ dirichlet_prior: float = 0,
91
+ check_bayesian_network: bool = True,
92
+ ) -> ParameterValues:
93
+ """
94
+ Maximum-likelihood, generative training for a Bayesian network.
95
+
96
+ Args:
97
+ pgm: the probabilistic graphical model defining the model structure.
98
+ Potential function values are ignored and need not be set.
99
+ dataset: a dataset of random variable states.
100
+ dirichlet_prior: a real number >= 0. See `CrossTable` for an explanation.
101
+ check_bayesian_network: if true and not pgm.is_structure_bayesian an exception will be raised.
102
+
103
+ Returns:
104
+ a ParameterValues object that can be used to update the parameters of the given PGM.
105
+
106
+ Raises:
107
+ ValueError: if the given PGM does not have a Bayesian network structure, and check_bayesian_network is True.
108
+ """
109
+ if check_bayesian_network and not pgm.is_structure_bayesian:
110
+ raise ValueError('the given PGM is not a Bayesian network')
111
+ cpts: List[Dict[Instance, NDArrayFloat64]] = [
112
+ cpt_from_crosstab(cross_table_from_dataset(dataset, factor.rvs, dirichlet_prior=dirichlet_prior))
113
+ for factor in pgm.factors
114
+ ]
115
+ return ParameterValues(pgm, cpts)
116
+
117
+
118
+ def cpt_from_crosstab(crosstab: CrossTable) -> Dict[Instance, NDArrayFloat64]:
119
+ """
120
+ Make a conditional probability table (CPT) from a cross-table.
121
+
122
+ Args:
123
+ crosstab: a CrossTable representing the weight of unique instances.
124
+
125
+ Returns:
126
+ a mapping from instances of the parent random variables to the child
127
+ conditional probability distribution (CPD).
128
+
129
+ Assumes:
130
+ the first random variable in `crosstab.rvs` is the child random variable.
131
+ """
132
+ # Number of states for the child random variable.
133
+ child_size: int = len(crosstab.rvs[0])
134
+
135
+ # Get distribution over child states for seen parent states
136
+ parents_weights: Dict[Instance, NDArrayFloat64] = {}
137
+ for state, weight in crosstab.items():
138
+ parent_state: Tuple[int, ...] = state[1:]
139
+ child_state: int = state[0]
140
+ parent_weights = parents_weights.get(parent_state)
141
+ if parent_weights is None:
142
+ parents_weights[parent_state] = parent_weights = np.zeros(child_size, dtype=np.float64)
143
+ parent_weights[child_state] += weight
144
+
145
+ # Normalise
146
+ for parent_state, parent_weights in parents_weights.items():
147
+ parent_weights /= parent_weights.sum()
148
+
149
+ return parents_weights
ck/pgm.py CHANGED
@@ -19,16 +19,18 @@ State: TypeAlias = Union[int, str, bool, float, None]
19
19
  The type for a possible state of a random variable.
20
20
  """
21
21
 
22
- Instance: TypeAlias = Sequence[int]
22
+ Instance: TypeAlias = Tuple[int, ...]
23
23
  """
24
- An instance (of a sequence of random variables) is a sequence of integers
24
+ An instance (of a sequence of random variables) is a tuple of integers
25
25
  that are state indexes, co-indexed with a known sequence of random variables.
26
26
  """
27
27
 
28
- Key: TypeAlias = Union[Instance, int]
28
+ Key: TypeAlias = Union[Sequence[int], int]
29
29
  """
30
- A key identifies an instance, either as an instance itself or a
31
- single integer, representing an instance with one dimension.
30
+ A key identifies an instance, either as a sequence of integers or a
31
+ single integer. The integers are state indexes, co-indexed with a known
32
+ sequence of random variables. A single integer represents an instance with
33
+ one dimension.
32
34
  """
33
35
 
34
36
  Shape: TypeAlias = Sequence[int]
@@ -1871,7 +1873,7 @@ class PotentialFunction(ABC):
1871
1873
  a hypothetical parameter index assuming that every valid key has a unique parameter
1872
1874
  as per DensePotentialFunction.
1873
1875
  """
1874
- return _natural_key_idx(self._shape, key)
1876
+ return natural_key_idx(self._shape, key)
1875
1877
 
1876
1878
  def param_id(self, param_idx: int) -> ParamId:
1877
1879
  """
@@ -2029,7 +2031,7 @@ class ZeroPotentialFunction(PotentialFunction):
2029
2031
  return 0
2030
2032
 
2031
2033
  def param_idx(self, key: Key) -> int:
2032
- return _natural_key_idx(self._shape, key)
2034
+ return natural_key_idx(self._shape, key)
2033
2035
 
2034
2036
  def is_cpt(self, tolerance=DEFAULT_CPT_TOLERANCE) -> bool:
2035
2037
  return True
@@ -3364,26 +3366,7 @@ def rv_instances_as_indicators(*rvs: RandomVariable, flip: bool = False) -> Iter
3364
3366
  return _combos(rvs, flip=not flip)
3365
3367
 
3366
3368
 
3367
- def _key_to_instance(key: Key) -> Instance:
3368
- """
3369
- Convert a key to an instance.
3370
-
3371
- Args:
3372
- key: a key into a state space.
3373
-
3374
- Returns:
3375
- A instance from the state space, as a tuple of state indexes, co-indexed with the given shape.
3376
-
3377
- Assumes:
3378
- The key is valid for the implied state space.
3379
- """
3380
- if isinstance(key, int):
3381
- return (key,)
3382
- else:
3383
- return tuple(key)
3384
-
3385
-
3386
- def _natural_key_idx(shape: Shape, key: Key) -> int:
3369
+ def natural_key_idx(shape: Shape, key: Key) -> int:
3387
3370
  """
3388
3371
  What is the natural index of the given key, assuming the given shape.
3389
3372
 
@@ -3409,6 +3392,25 @@ def _natural_key_idx(shape: Shape, key: Key) -> int:
3409
3392
  return result
3410
3393
 
3411
3394
 
3395
+ def _key_to_instance(key: Key) -> Instance:
3396
+ """
3397
+ Convert a key to an instance.
3398
+
3399
+ Args:
3400
+ key: a key into a state space.
3401
+
3402
+ Returns:
3403
+ A instance from the state space, as a tuple of state indexes, co-indexed with the given shape.
3404
+
3405
+ Assumes:
3406
+ The key is valid for the implied state space.
3407
+ """
3408
+ if isinstance(key, int):
3409
+ return (key,)
3410
+ else:
3411
+ return tuple(key)
3412
+
3413
+
3412
3414
  def _zero_space(shape: Shape) -> int:
3413
3415
  """
3414
3416
  Return the size of the zero space of the given shape. This is the number
@@ -1,6 +1,8 @@
1
- from typing import Tuple, Sequence, Dict, Iterable
1
+ from typing import Tuple, Sequence, Dict
2
2
 
3
- from ck.pgm import RandomVariable, rv_instances, Instance, rv_instances_as_indicators, Indicator, ParamId
3
+ import numpy as np
4
+
5
+ from ck.pgm import RandomVariable, Indicator, ParamId
4
6
  from ck.pgm_circuit.slot_map import SlotMap, SlotKey
5
7
  from ck.probability.probability_space import Condition, check_condition
6
8
  from ck.program.program_buffer import ProgramBuffer
@@ -69,40 +71,6 @@ class ProgramWithSlotmap:
69
71
  def slot_map(self) -> SlotMap:
70
72
  return self._slot_map
71
73
 
72
- def instances(self, flip: bool = False) -> Iterable[Instance]:
73
- """
74
- Enumerate instances of the random variables.
75
-
76
- Each instance is a tuples of state indexes, co-indexed with the given random variables.
77
-
78
- The order is the natural index order (i.e., last random variable changing most quickly).
79
-
80
- Args:
81
- flip: if true, then first random variable changes most quickly.
82
-
83
- Returns:
84
- an iteration over tuples, each tuple holds state indexes
85
- co-indexed with the given random variables.
86
- """
87
- return rv_instances(*self._rvs, flip=flip)
88
-
89
- def instances_as_indicators(self, flip: bool = False) -> Iterable[Sequence[Indicator]]:
90
- """
91
- Enumerate instances of the random variables.
92
-
93
- Each instance is a tuples of indicators, co-indexed with the given random variables.
94
-
95
- The order is the natural index order (i.e., last random variable changing most quickly).
96
-
97
- Args:
98
- flip: if true, then first random variable changes most quickly.
99
-
100
- Returns:
101
- an iteration over tuples, each tuples holds random variable indicators
102
- co-indexed with the given random variables.
103
- """
104
- return rv_instances_as_indicators(*self._rvs, flip=flip)
105
-
106
74
  def compute(self) -> NDArrayNumeric:
107
75
  """
108
76
  Execute the program to compute and return the result. As per `ProgramBuffer.compute`.
@@ -146,29 +114,36 @@ class ProgramWithSlotmap:
146
114
  """
147
115
  return self._program_buffer.vars
148
116
 
149
- def __setitem__(self, item: int | slice | SlotKey | Iterable[SlotKey], value: float) -> None:
117
+ def __setitem__(self, item: int | slice | SlotKey | RandomVariable, value: float) -> None:
150
118
  """
151
- Set one or more input slot values, identified by slot keys.
119
+ Set input slot value/s.
152
120
  """
153
121
  if isinstance(item, (int, slice)):
154
122
  self._program_buffer[item] = value
155
123
  elif isinstance(item, (Indicator, ParamId)):
156
124
  self._program_buffer[self._slot_map[item]] = value
125
+ elif isinstance(item, RandomVariable):
126
+ for ind in item:
127
+ self._program_buffer[self._slot_map[ind]] = value
157
128
  else:
158
- # Assume its iterable
159
- for i in item:
160
- self[i] = value
129
+ raise IndexError(f'unknown index type: {type(item)}')
161
130
 
162
- def __getitem__(self, item: int | slice | SlotKey) -> NDArrayNumeric:
131
+ def __getitem__(self, item: int | slice | SlotKey | RandomVariable) -> NDArrayNumeric:
163
132
  """
164
- Get an input slot value, identified by a slot key.
133
+ Get input slot value/s.
165
134
  """
166
135
  if isinstance(item, (int, slice)):
167
136
  return self._program_buffer[item]
168
137
  elif isinstance(item, (Indicator, ParamId)):
169
138
  return self._program_buffer[self._slot_map[item]]
139
+ elif isinstance(item, RandomVariable):
140
+ return np.fromiter(
141
+ (self._program_buffer[self._slot_map[ind]] for ind in item),
142
+ dtype=self._program_buffer.dtype,
143
+ count=len(item)
144
+ )
170
145
  else:
171
- raise IndexError('unknown index type')
146
+ raise IndexError(f'unknown index type: {type(item)}')
172
147
 
173
148
  def set_condition(self, *condition: Condition) -> None:
174
149
  """
@@ -211,7 +186,10 @@ class ProgramWithSlotmap:
211
186
 
212
187
  Args:
213
188
  rv: a random variable whose indicators are in the slot map.
214
- values: list of values, assumes len(values) == len(rv).
189
+ values: list of values
190
+
191
+ Assumes:
192
+ len(values) == len(rv).
215
193
  """
216
194
  for i in range(len(rv)):
217
195
  self[rv[i]] = values[i]
@@ -30,11 +30,9 @@ def compile_results(
30
30
  a compiled RawProgram.
31
31
  """
32
32
  circuit: Circuit = pgm_circuit.circuit_top.circuit
33
- if const_parameters:
34
- parameter_values = pgm_circuit.parameter_values
35
- number_of_indicators = pgm_circuit.number_of_indicators
33
+ if const_parameters and len(pgm_circuit.parameter_values) > 0:
36
34
  with TmpConst(circuit) as tmp:
37
- for slot, value in enumerate(parameter_values, start=number_of_indicators):
35
+ for slot, value in enumerate(pgm_circuit.parameter_values, start=pgm_circuit.number_of_indicators):
38
36
  tmp.set_const(slot, value)
39
37
  raw_program: RawProgram = compiler(*results, circuit=circuit)
40
38
  else:
@@ -15,7 +15,7 @@
15
15
  "-O3"
16
16
  ],
17
17
  "include_dirs": [
18
- "/private/var/folders/y6/nj790rtn62lfktb1sh__79hc0000gn/T/build-env-q2xes9a1/lib/python3.12/site-packages/numpy/_core/include"
18
+ "/private/var/folders/y6/nj790rtn62lfktb1sh__79hc0000gn/T/build-env-xq76d94c/lib/python3.12/site-packages/numpy/_core/include"
19
19
  ],
20
20
  "name": "ck.pgm_compiler.support.circuit_table._circuit_table_cy",
21
21
  "sources": [
@@ -11,6 +11,7 @@ class EmpiricalProbabilitySpace(ProbabilitySpace):
11
11
  Note that this is not necessarily an efficient approach to calculating probabilities and statistics.
12
12
 
13
13
  This probability space treats each of the samples as equally weighted.
14
+ For a probability space over unequally weighted samples, consider using `CrossTableProbabilitySpace`.
14
15
 
15
16
  Assumes:
16
17
  len(sample) == len(rvs), for each sample in samples.
File without changes
@@ -0,0 +1,37 @@
1
+ from ck.dataset import HardDataset, SoftDataset
2
+ from ck.dataset.dataset_builder import DatasetBuilder, soft_dataset_from_builder, hard_dataset_from_builder
3
+ from ck.pgm import PGM
4
+
5
+
6
+ def main() -> None:
7
+ pgm = PGM()
8
+ x = pgm.new_rv('x', (True, False))
9
+ y = pgm.new_rv('y', ('yes', 'no', 'maybe'))
10
+
11
+ builder = DatasetBuilder([x, y])
12
+ builder.append()
13
+ builder.append(1, 2).weight = 3
14
+ builder.append(None, [0.7, 0.1, 0.2])
15
+ builder.append().set_states(True, 'maybe')
16
+
17
+ print('DatasetBuilder dump')
18
+ builder.dump()
19
+ print()
20
+
21
+ print('DatasetBuilder dump, showing states and custom missing values')
22
+ builder.dump(as_states=True, missing='?')
23
+ print()
24
+
25
+ print('HardDataset dump')
26
+ dataset: HardDataset = hard_dataset_from_builder(builder, missing=99)
27
+ dataset.dump()
28
+ print()
29
+
30
+ print('SoftDataset dump')
31
+ dataset: SoftDataset = soft_dataset_from_builder(builder)
32
+ dataset.dump()
33
+ print()
34
+
35
+
36
+ if __name__ == '__main__':
37
+ main()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compiled-knowledge
3
- Version: 4.0.0a25
3
+ Version: 4.1.0a2
4
4
  Summary: A Python package for compiling and querying discrete probabilistic graphical models.
5
5
  Author-email: Barry Drake <barry@compiledknowledge.org>
6
6
  License-Expression: MIT