compiled-knowledge 4.0.0a25__cp312-cp312-win_amd64.whl → 4.1.0a2__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of compiled-knowledge might be problematic. Click here for more details.
- ck/circuit/_circuit_cy.c +1 -1
- ck/circuit/_circuit_cy.cp312-win_amd64.pyd +0 -0
- ck/circuit_compiler/cython_vm_compiler/_compiler.c +152 -152
- ck/circuit_compiler/cython_vm_compiler/_compiler.cp312-win_amd64.pyd +0 -0
- ck/circuit_compiler/interpret_compiler.py +2 -2
- ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.c +1 -1
- ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.cp312-win_amd64.pyd +0 -0
- ck/dataset/__init__.py +1 -0
- ck/dataset/cross_table.py +270 -0
- ck/dataset/cross_table_probabilities.py +53 -0
- ck/dataset/dataset.py +594 -0
- ck/dataset/dataset_builder.py +512 -0
- ck/dataset/dataset_compute.py +140 -0
- ck/dataset/dataset_from_crosstable.py +45 -0
- ck/dataset/dataset_from_csv.py +151 -0
- ck/dataset/sampled_dataset.py +96 -0
- ck/learning/__init__.py +0 -0
- ck/learning/train_generative.py +149 -0
- ck/pgm.py +29 -27
- ck/pgm_circuit/program_with_slotmap.py +23 -45
- ck/pgm_circuit/support/compile_circuit.py +2 -4
- ck/pgm_compiler/support/circuit_table/_circuit_table_cy.c +1 -1
- ck/pgm_compiler/support/circuit_table/_circuit_table_cy.cp312-win_amd64.pyd +0 -0
- ck/probability/empirical_probability_space.py +1 -0
- ck_demos/dataset/__init__.py +0 -0
- ck_demos/dataset/demo_dataset_builder.py +37 -0
- {compiled_knowledge-4.0.0a25.dist-info → compiled_knowledge-4.1.0a2.dist-info}/METADATA +1 -1
- {compiled_knowledge-4.0.0a25.dist-info → compiled_knowledge-4.1.0a2.dist-info}/RECORD +31 -18
- {compiled_knowledge-4.0.0a25.dist-info → compiled_knowledge-4.1.0a2.dist-info}/WHEEL +0 -0
- {compiled_knowledge-4.0.0a25.dist-info → compiled_knowledge-4.1.0a2.dist-info}/licenses/LICENSE.txt +0 -0
- {compiled_knowledge-4.0.0a25.dist-info → compiled_knowledge-4.1.0a2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
from typing import Iterable, List, Sequence, Optional
|
|
2
|
+
|
|
3
|
+
from ck.dataset import HardDataset
|
|
4
|
+
from ck.pgm import RandomVariable
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def hard_dataset_from_csv(
|
|
8
|
+
rvs: Iterable[RandomVariable],
|
|
9
|
+
lines: Iterable[str],
|
|
10
|
+
*,
|
|
11
|
+
weights: Optional[int | str] = None,
|
|
12
|
+
sep: Optional[str] = ',',
|
|
13
|
+
comment: str = '#',
|
|
14
|
+
) -> HardDataset:
|
|
15
|
+
"""
|
|
16
|
+
Interpret the given sequence of lines as CSV for a HardDataset.
|
|
17
|
+
|
|
18
|
+
Each line is a list of state indexes (ints) separated by `sep`.
|
|
19
|
+
|
|
20
|
+
Every line should have the same number of values.
|
|
21
|
+
|
|
22
|
+
If the first line contains a non-integer value, then the first
|
|
23
|
+
line will be interpreted as a header line.
|
|
24
|
+
|
|
25
|
+
If there is no header line, then the values will be interpreted in the
|
|
26
|
+
same order as `rvs` and the number of values on each line should be
|
|
27
|
+
the same as the number of random variables in `rvs`.
|
|
28
|
+
|
|
29
|
+
If there is a header line, then it will be interpreted as the order
|
|
30
|
+
of random variables. There must be a column name in the header to match
|
|
31
|
+
each name of the given random variables. Additional columns will be ignored.
|
|
32
|
+
|
|
33
|
+
Leading and trailing whitespace is ignored for each field, including header column names.
|
|
34
|
+
|
|
35
|
+
As text file (and StringIO) objects are iterable over lines, here is how to read a csv file:
|
|
36
|
+
```
|
|
37
|
+
with open(csv_filename, 'r') as file:
|
|
38
|
+
hard_dataset_from_csv(rvs, file)
|
|
39
|
+
```
|
|
40
|
+
Here is an example to read from a csv string:
|
|
41
|
+
```
|
|
42
|
+
hard_dataset_from_csv(rvs, csv_string.splitlines())
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
rvs: the random variables for the returned dataset.
|
|
47
|
+
lines: the sequence of lines to interpret, each line is an instance in the dataset.
|
|
48
|
+
weights: the column in the csv file holding instance weights. Can be either the
|
|
49
|
+
column number (counting from zero) or a column name (requires a header line).
|
|
50
|
+
sep: the string to use to separate values in a line, default is a comma.
|
|
51
|
+
If set to `None`, lines will be split on any consecutive run of whitespace characters
|
|
52
|
+
(including \n \r \t \f and spaces).
|
|
53
|
+
comment: text starting with this will be treated as a comment. Set to '' to disallow comments.
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
a HardDataset.
|
|
57
|
+
|
|
58
|
+
Raises:
|
|
59
|
+
ValueError: if the lines do not conform to a CSV format.
|
|
60
|
+
"""
|
|
61
|
+
rvs: Sequence[RandomVariable] = tuple(rvs)
|
|
62
|
+
|
|
63
|
+
# Define `clean_line` being sensitive to comments.
|
|
64
|
+
if len(comment) > 0:
|
|
65
|
+
def clean_line(l: str) -> str:
|
|
66
|
+
i = l.find(comment)
|
|
67
|
+
if i >= 0:
|
|
68
|
+
l = l[:i]
|
|
69
|
+
return l.strip()
|
|
70
|
+
else:
|
|
71
|
+
def clean_line(l: str) -> str:
|
|
72
|
+
return l.strip()
|
|
73
|
+
|
|
74
|
+
# Get the first line which may be a header line or data line
|
|
75
|
+
it = iter(lines)
|
|
76
|
+
try:
|
|
77
|
+
while True:
|
|
78
|
+
line = clean_line(next(it))
|
|
79
|
+
if len(line) > 0:
|
|
80
|
+
break
|
|
81
|
+
except StopIteration:
|
|
82
|
+
# Empty dataset with the given random variables
|
|
83
|
+
return HardDataset((rv, []) for rv in rvs)
|
|
84
|
+
|
|
85
|
+
values: List[str] = [value.strip() for value in line.split(sep)]
|
|
86
|
+
number_of_columns: int = len(values)
|
|
87
|
+
series: List[List[int]] # series[dataset-column] = list of values
|
|
88
|
+
weight_series: Optional[List[float]] = None
|
|
89
|
+
column_map: List[int] # column_map[dataset-column] = input-column
|
|
90
|
+
if all(_is_number(value) for value in values):
|
|
91
|
+
# First line is not a header line
|
|
92
|
+
if weights is None:
|
|
93
|
+
if number_of_columns != len(rvs):
|
|
94
|
+
raise ValueError('number of columns does not match number of random variables')
|
|
95
|
+
column_map = list(range(len(rvs)))
|
|
96
|
+
else:
|
|
97
|
+
if number_of_columns != len(rvs) + 1:
|
|
98
|
+
raise ValueError('number of columns does not match number of random variables and weight column')
|
|
99
|
+
if not isinstance(weights, int):
|
|
100
|
+
raise ValueError('no header detected - `weights` must be a column number')
|
|
101
|
+
if not (-number_of_columns <= weights < number_of_columns):
|
|
102
|
+
raise ValueError('`weights` column number out of range')
|
|
103
|
+
column_map = list(range(len(rvs) + 1))
|
|
104
|
+
column_map.pop(weights)
|
|
105
|
+
|
|
106
|
+
# Initialise series with the first line of data
|
|
107
|
+
series = [[int(values[i])] for i in column_map]
|
|
108
|
+
if weights is not None:
|
|
109
|
+
weight_series = [float(values[weights])]
|
|
110
|
+
|
|
111
|
+
else:
|
|
112
|
+
# First line is a header line
|
|
113
|
+
# Lookup each random variable to find its column
|
|
114
|
+
column_map = [
|
|
115
|
+
values.index(rv.name) # will raise ValueError if not found
|
|
116
|
+
for rv in rvs
|
|
117
|
+
]
|
|
118
|
+
if isinstance(weights, str):
|
|
119
|
+
# Convert weights column name to column number
|
|
120
|
+
weights: int = values.index(weights) # will raise ValueError if not found
|
|
121
|
+
elif isinstance(weights, int) and not (number_of_columns <= weights < number_of_columns):
|
|
122
|
+
raise ValueError('`weights` column number out of range')
|
|
123
|
+
|
|
124
|
+
# Initialise each series as empty
|
|
125
|
+
series = [[] for _ in rvs]
|
|
126
|
+
if weights is not None:
|
|
127
|
+
weight_series = []
|
|
128
|
+
|
|
129
|
+
# Read remaining data lines
|
|
130
|
+
for line in it:
|
|
131
|
+
line = clean_line(line)
|
|
132
|
+
if len(line) == 0:
|
|
133
|
+
continue
|
|
134
|
+
if len(values) != number_of_columns:
|
|
135
|
+
raise ValueError('number of values does not match number of columns')
|
|
136
|
+
values = line.split(sep)
|
|
137
|
+
for series_i, i in zip(series, column_map):
|
|
138
|
+
series_i.append(int(values[i]))
|
|
139
|
+
if weights is not None:
|
|
140
|
+
weight_series.append(float(values[weights]))
|
|
141
|
+
|
|
142
|
+
# Construct the dataset
|
|
143
|
+
return HardDataset(zip(rvs, series), weights=weight_series)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _is_number(s: str) -> bool:
|
|
147
|
+
try:
|
|
148
|
+
float(s)
|
|
149
|
+
return True
|
|
150
|
+
except ValueError:
|
|
151
|
+
return False
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import random
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Sequence, List, Iterator, Tuple, Dict
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
from ck.dataset import HardDataset
|
|
8
|
+
from ck.dataset.cross_table import CrossTable
|
|
9
|
+
from ck.pgm import RandomVariable, Instance
|
|
10
|
+
from ck.sampling.sampler import Sampler
|
|
11
|
+
from ck.utils.np_extras import dtype_for_number_of_states, NDArray
|
|
12
|
+
from ck.utils.random_extras import Random
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def dataset_from_sampler(sampler: Sampler, length: int) -> HardDataset:
|
|
16
|
+
"""
|
|
17
|
+
Create a hard dataset using samples from a sampler.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
sampler: A sampler which defined the random variables and provides samples.
|
|
21
|
+
length: The length of the dataset to create.
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
A HardDataset of the given length.
|
|
25
|
+
"""
|
|
26
|
+
rvs: Sequence[RandomVariable] = sampler.rvs
|
|
27
|
+
columns: List[NDArray] = [
|
|
28
|
+
np.zeros(length, dtype=dtype_for_number_of_states(len(rv)))
|
|
29
|
+
for rv in rvs
|
|
30
|
+
]
|
|
31
|
+
for i, instance in enumerate(sampler.take(length)):
|
|
32
|
+
for column, state in zip(columns, instance):
|
|
33
|
+
column[i] = state
|
|
34
|
+
return HardDataset(zip(rvs, columns))
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class CrossTableSampler(Sampler):
|
|
38
|
+
def __init__(self, crosstab: CrossTable, rand: Random = random):
|
|
39
|
+
"""
|
|
40
|
+
Adapt a cross table to a sampler.
|
|
41
|
+
|
|
42
|
+
Instances will be drawn from the sampler according to their
|
|
43
|
+
weight in the given cross-table. If the given cross-table is
|
|
44
|
+
modified after constructing the sampler, the sampler will not
|
|
45
|
+
be affected.
|
|
46
|
+
"""
|
|
47
|
+
if len(crosstab) == 0:
|
|
48
|
+
raise ValueError('no instances to sample')
|
|
49
|
+
|
|
50
|
+
super().__init__(rvs=crosstab.rvs, condition=())
|
|
51
|
+
|
|
52
|
+
# Group instances by weight.
|
|
53
|
+
# We do this in anticipation that it makes sampling more efficient.
|
|
54
|
+
weight_groups: Dict[float, _WeightGroup] = {}
|
|
55
|
+
for instance, weight in crosstab.items():
|
|
56
|
+
weight_group = weight_groups.get(weight)
|
|
57
|
+
if weight_group is None:
|
|
58
|
+
weight_groups[weight] = _WeightGroup(weight, weight, [instance])
|
|
59
|
+
else:
|
|
60
|
+
weight_group.append(instance)
|
|
61
|
+
|
|
62
|
+
self._weight_groups: List[_WeightGroup] = list(weight_groups.values())
|
|
63
|
+
self._total_weight = sum(group.total for group in weight_groups.values())
|
|
64
|
+
self._rand = rand
|
|
65
|
+
|
|
66
|
+
def __iter__(self) -> Iterator[Instance]:
|
|
67
|
+
while True:
|
|
68
|
+
# This code performs inverse transform sampling
|
|
69
|
+
r: float = self._rand.random() * self._total_weight
|
|
70
|
+
|
|
71
|
+
# This does a serial search to find the weight group.
|
|
72
|
+
# This is efficient for small numbers of groups, but this may be
|
|
73
|
+
# improved for large numbers of groups.
|
|
74
|
+
it = iter(self._weight_groups)
|
|
75
|
+
group = next(it)
|
|
76
|
+
while r >= group.total:
|
|
77
|
+
r -= group.total
|
|
78
|
+
group = next(it)
|
|
79
|
+
|
|
80
|
+
# Pick an instance in the group
|
|
81
|
+
i = int(r / group.weight)
|
|
82
|
+
yield group.instances[i]
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@dataclass
|
|
86
|
+
class _WeightGroup:
|
|
87
|
+
"""
|
|
88
|
+
Support for CrossTableSampler.
|
|
89
|
+
"""
|
|
90
|
+
weight: float
|
|
91
|
+
total: float
|
|
92
|
+
instances: List[Tuple[int, ...]]
|
|
93
|
+
|
|
94
|
+
def append(self, instance: Tuple[int, ...]) -> None:
|
|
95
|
+
self.total += self.weight
|
|
96
|
+
self.instances.append(instance)
|
ck/learning/__init__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import Dict, Tuple, List
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
from ck.dataset import SoftDataset, HardDataset
|
|
7
|
+
from ck.dataset.cross_table import CrossTable, cross_table_from_dataset
|
|
8
|
+
from ck.pgm import PGM, Instance, DensePotentialFunction, Shape, natural_key_idx, SparsePotentialFunction
|
|
9
|
+
from ck.utils.iter_extras import multiply
|
|
10
|
+
from ck.utils.np_extras import NDArrayFloat64
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class ParameterValues:
|
|
15
|
+
"""
|
|
16
|
+
A ParameterValues object represents learned parameter values of a PGM.
|
|
17
|
+
"""
|
|
18
|
+
pgm: PGM
|
|
19
|
+
"""
|
|
20
|
+
The PGM that the parameter values pertains to.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
cpts: List[Dict[Instance, NDArrayFloat64]]
|
|
24
|
+
"""
|
|
25
|
+
A list of CPTs co-indexed with `pgm.factors`. Each CPT is a dict
|
|
26
|
+
mapping from instances of the parent random variables (of the factors)
|
|
27
|
+
to the child conditional probability distribution (CPD).
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def set_zero(self) -> None:
|
|
31
|
+
"""
|
|
32
|
+
Set the potential function of each PGM factor to zero.
|
|
33
|
+
"""
|
|
34
|
+
for factor in self.pgm.factors:
|
|
35
|
+
factor.set_zero()
|
|
36
|
+
|
|
37
|
+
def set_cpt(self) -> None:
|
|
38
|
+
"""
|
|
39
|
+
Set the potential function of each PGM factor to a CPTPotentialFunction,
|
|
40
|
+
using our parameter values.
|
|
41
|
+
"""
|
|
42
|
+
for factor, cpt in zip(self.pgm.factors, self.cpts):
|
|
43
|
+
factor.set_cpt().set(*cpt.items())
|
|
44
|
+
|
|
45
|
+
def set_dense(self) -> None:
|
|
46
|
+
"""
|
|
47
|
+
Set the potential function of each PGM factor to a DensePotentialFunction,
|
|
48
|
+
using our parameter values.
|
|
49
|
+
"""
|
|
50
|
+
for factor, cpt in zip(self.pgm.factors, self.cpts):
|
|
51
|
+
pot_function: DensePotentialFunction = factor.set_dense()
|
|
52
|
+
parent_shape: Shape = factor.shape[1:]
|
|
53
|
+
child_state: int
|
|
54
|
+
value: float
|
|
55
|
+
if len(parent_shape) == 0:
|
|
56
|
+
cpd: NDArrayFloat64 = cpt[()]
|
|
57
|
+
for child_state, value in enumerate(cpd):
|
|
58
|
+
pot_function[child_state] = value
|
|
59
|
+
else:
|
|
60
|
+
parent_space: int = multiply(parent_shape)
|
|
61
|
+
parent_states: Instance
|
|
62
|
+
cpd: NDArrayFloat64
|
|
63
|
+
for parent_states, cpd in cpt.items():
|
|
64
|
+
idx: int = natural_key_idx(parent_shape, parent_states)
|
|
65
|
+
for value in cpd:
|
|
66
|
+
pot_function[idx] = value
|
|
67
|
+
idx += parent_space
|
|
68
|
+
|
|
69
|
+
def set_sparse(self) -> None:
|
|
70
|
+
"""
|
|
71
|
+
Set the potential function of each PGM factor to a SparsePotentialFunction,
|
|
72
|
+
using our parameter values.
|
|
73
|
+
"""
|
|
74
|
+
for factor, cpt in zip(self.pgm.factors, self.cpts):
|
|
75
|
+
pot_function: SparsePotentialFunction = factor.set_sparse()
|
|
76
|
+
parent_states: Instance
|
|
77
|
+
child_state: int
|
|
78
|
+
cpd: NDArrayFloat64
|
|
79
|
+
value: float
|
|
80
|
+
for parent_states, cpd in cpt.items():
|
|
81
|
+
for child_state, value in enumerate(cpd):
|
|
82
|
+
key = (child_state,) + parent_states
|
|
83
|
+
pot_function[key] = value
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def train_generative_bn(
|
|
87
|
+
pgm: PGM,
|
|
88
|
+
dataset: HardDataset | SoftDataset,
|
|
89
|
+
*,
|
|
90
|
+
dirichlet_prior: float = 0,
|
|
91
|
+
check_bayesian_network: bool = True,
|
|
92
|
+
) -> ParameterValues:
|
|
93
|
+
"""
|
|
94
|
+
Maximum-likelihood, generative training for a Bayesian network.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
pgm: the probabilistic graphical model defining the model structure.
|
|
98
|
+
Potential function values are ignored and need not be set.
|
|
99
|
+
dataset: a dataset of random variable states.
|
|
100
|
+
dirichlet_prior: a real number >= 0. See `CrossTable` for an explanation.
|
|
101
|
+
check_bayesian_network: if true and not pgm.is_structure_bayesian an exception will be raised.
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
a ParameterValues object that can be used to update the parameters of the given PGM.
|
|
105
|
+
|
|
106
|
+
Raises:
|
|
107
|
+
ValueError: if the given PGM does not have a Bayesian network structure, and check_bayesian_network is True.
|
|
108
|
+
"""
|
|
109
|
+
if check_bayesian_network and not pgm.is_structure_bayesian:
|
|
110
|
+
raise ValueError('the given PGM is not a Bayesian network')
|
|
111
|
+
cpts: List[Dict[Instance, NDArrayFloat64]] = [
|
|
112
|
+
cpt_from_crosstab(cross_table_from_dataset(dataset, factor.rvs, dirichlet_prior=dirichlet_prior))
|
|
113
|
+
for factor in pgm.factors
|
|
114
|
+
]
|
|
115
|
+
return ParameterValues(pgm, cpts)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def cpt_from_crosstab(crosstab: CrossTable) -> Dict[Instance, NDArrayFloat64]:
|
|
119
|
+
"""
|
|
120
|
+
Make a conditional probability table (CPT) from a cross-table.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
crosstab: a CrossTable representing the weight of unique instances.
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
a mapping from instances of the parent random variables to the child
|
|
127
|
+
conditional probability distribution (CPD).
|
|
128
|
+
|
|
129
|
+
Assumes:
|
|
130
|
+
the first random variable in `crosstab.rvs` is the child random variable.
|
|
131
|
+
"""
|
|
132
|
+
# Number of states for the child random variable.
|
|
133
|
+
child_size: int = len(crosstab.rvs[0])
|
|
134
|
+
|
|
135
|
+
# Get distribution over child states for seen parent states
|
|
136
|
+
parents_weights: Dict[Instance, NDArrayFloat64] = {}
|
|
137
|
+
for state, weight in crosstab.items():
|
|
138
|
+
parent_state: Tuple[int, ...] = state[1:]
|
|
139
|
+
child_state: int = state[0]
|
|
140
|
+
parent_weights = parents_weights.get(parent_state)
|
|
141
|
+
if parent_weights is None:
|
|
142
|
+
parents_weights[parent_state] = parent_weights = np.zeros(child_size, dtype=np.float64)
|
|
143
|
+
parent_weights[child_state] += weight
|
|
144
|
+
|
|
145
|
+
# Normalise
|
|
146
|
+
for parent_state, parent_weights in parents_weights.items():
|
|
147
|
+
parent_weights /= parent_weights.sum()
|
|
148
|
+
|
|
149
|
+
return parents_weights
|
ck/pgm.py
CHANGED
|
@@ -19,16 +19,18 @@ State: TypeAlias = Union[int, str, bool, float, None]
|
|
|
19
19
|
The type for a possible state of a random variable.
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
|
-
Instance: TypeAlias =
|
|
22
|
+
Instance: TypeAlias = Tuple[int, ...]
|
|
23
23
|
"""
|
|
24
|
-
An instance (of a sequence of random variables) is a
|
|
24
|
+
An instance (of a sequence of random variables) is a tuple of integers
|
|
25
25
|
that are state indexes, co-indexed with a known sequence of random variables.
|
|
26
26
|
"""
|
|
27
27
|
|
|
28
|
-
Key: TypeAlias = Union[
|
|
28
|
+
Key: TypeAlias = Union[Sequence[int], int]
|
|
29
29
|
"""
|
|
30
|
-
A key identifies an instance, either as
|
|
31
|
-
single integer
|
|
30
|
+
A key identifies an instance, either as a sequence of integers or a
|
|
31
|
+
single integer. The integers are state indexes, co-indexed with a known
|
|
32
|
+
sequence of random variables. A single integer represents an instance with
|
|
33
|
+
one dimension.
|
|
32
34
|
"""
|
|
33
35
|
|
|
34
36
|
Shape: TypeAlias = Sequence[int]
|
|
@@ -1871,7 +1873,7 @@ class PotentialFunction(ABC):
|
|
|
1871
1873
|
a hypothetical parameter index assuming that every valid key has a unique parameter
|
|
1872
1874
|
as per DensePotentialFunction.
|
|
1873
1875
|
"""
|
|
1874
|
-
return
|
|
1876
|
+
return natural_key_idx(self._shape, key)
|
|
1875
1877
|
|
|
1876
1878
|
def param_id(self, param_idx: int) -> ParamId:
|
|
1877
1879
|
"""
|
|
@@ -2029,7 +2031,7 @@ class ZeroPotentialFunction(PotentialFunction):
|
|
|
2029
2031
|
return 0
|
|
2030
2032
|
|
|
2031
2033
|
def param_idx(self, key: Key) -> int:
|
|
2032
|
-
return
|
|
2034
|
+
return natural_key_idx(self._shape, key)
|
|
2033
2035
|
|
|
2034
2036
|
def is_cpt(self, tolerance=DEFAULT_CPT_TOLERANCE) -> bool:
|
|
2035
2037
|
return True
|
|
@@ -3364,26 +3366,7 @@ def rv_instances_as_indicators(*rvs: RandomVariable, flip: bool = False) -> Iter
|
|
|
3364
3366
|
return _combos(rvs, flip=not flip)
|
|
3365
3367
|
|
|
3366
3368
|
|
|
3367
|
-
def
|
|
3368
|
-
"""
|
|
3369
|
-
Convert a key to an instance.
|
|
3370
|
-
|
|
3371
|
-
Args:
|
|
3372
|
-
key: a key into a state space.
|
|
3373
|
-
|
|
3374
|
-
Returns:
|
|
3375
|
-
A instance from the state space, as a tuple of state indexes, co-indexed with the given shape.
|
|
3376
|
-
|
|
3377
|
-
Assumes:
|
|
3378
|
-
The key is valid for the implied state space.
|
|
3379
|
-
"""
|
|
3380
|
-
if isinstance(key, int):
|
|
3381
|
-
return (key,)
|
|
3382
|
-
else:
|
|
3383
|
-
return tuple(key)
|
|
3384
|
-
|
|
3385
|
-
|
|
3386
|
-
def _natural_key_idx(shape: Shape, key: Key) -> int:
|
|
3369
|
+
def natural_key_idx(shape: Shape, key: Key) -> int:
|
|
3387
3370
|
"""
|
|
3388
3371
|
What is the natural index of the given key, assuming the given shape.
|
|
3389
3372
|
|
|
@@ -3409,6 +3392,25 @@ def _natural_key_idx(shape: Shape, key: Key) -> int:
|
|
|
3409
3392
|
return result
|
|
3410
3393
|
|
|
3411
3394
|
|
|
3395
|
+
def _key_to_instance(key: Key) -> Instance:
|
|
3396
|
+
"""
|
|
3397
|
+
Convert a key to an instance.
|
|
3398
|
+
|
|
3399
|
+
Args:
|
|
3400
|
+
key: a key into a state space.
|
|
3401
|
+
|
|
3402
|
+
Returns:
|
|
3403
|
+
A instance from the state space, as a tuple of state indexes, co-indexed with the given shape.
|
|
3404
|
+
|
|
3405
|
+
Assumes:
|
|
3406
|
+
The key is valid for the implied state space.
|
|
3407
|
+
"""
|
|
3408
|
+
if isinstance(key, int):
|
|
3409
|
+
return (key,)
|
|
3410
|
+
else:
|
|
3411
|
+
return tuple(key)
|
|
3412
|
+
|
|
3413
|
+
|
|
3412
3414
|
def _zero_space(shape: Shape) -> int:
|
|
3413
3415
|
"""
|
|
3414
3416
|
Return the size of the zero space of the given shape. This is the number
|
|
@@ -1,6 +1,8 @@
|
|
|
1
|
-
from typing import Tuple, Sequence, Dict
|
|
1
|
+
from typing import Tuple, Sequence, Dict
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
from ck.pgm import RandomVariable, Indicator, ParamId
|
|
4
6
|
from ck.pgm_circuit.slot_map import SlotMap, SlotKey
|
|
5
7
|
from ck.probability.probability_space import Condition, check_condition
|
|
6
8
|
from ck.program.program_buffer import ProgramBuffer
|
|
@@ -69,40 +71,6 @@ class ProgramWithSlotmap:
|
|
|
69
71
|
def slot_map(self) -> SlotMap:
|
|
70
72
|
return self._slot_map
|
|
71
73
|
|
|
72
|
-
def instances(self, flip: bool = False) -> Iterable[Instance]:
|
|
73
|
-
"""
|
|
74
|
-
Enumerate instances of the random variables.
|
|
75
|
-
|
|
76
|
-
Each instance is a tuples of state indexes, co-indexed with the given random variables.
|
|
77
|
-
|
|
78
|
-
The order is the natural index order (i.e., last random variable changing most quickly).
|
|
79
|
-
|
|
80
|
-
Args:
|
|
81
|
-
flip: if true, then first random variable changes most quickly.
|
|
82
|
-
|
|
83
|
-
Returns:
|
|
84
|
-
an iteration over tuples, each tuple holds state indexes
|
|
85
|
-
co-indexed with the given random variables.
|
|
86
|
-
"""
|
|
87
|
-
return rv_instances(*self._rvs, flip=flip)
|
|
88
|
-
|
|
89
|
-
def instances_as_indicators(self, flip: bool = False) -> Iterable[Sequence[Indicator]]:
|
|
90
|
-
"""
|
|
91
|
-
Enumerate instances of the random variables.
|
|
92
|
-
|
|
93
|
-
Each instance is a tuples of indicators, co-indexed with the given random variables.
|
|
94
|
-
|
|
95
|
-
The order is the natural index order (i.e., last random variable changing most quickly).
|
|
96
|
-
|
|
97
|
-
Args:
|
|
98
|
-
flip: if true, then first random variable changes most quickly.
|
|
99
|
-
|
|
100
|
-
Returns:
|
|
101
|
-
an iteration over tuples, each tuples holds random variable indicators
|
|
102
|
-
co-indexed with the given random variables.
|
|
103
|
-
"""
|
|
104
|
-
return rv_instances_as_indicators(*self._rvs, flip=flip)
|
|
105
|
-
|
|
106
74
|
def compute(self) -> NDArrayNumeric:
|
|
107
75
|
"""
|
|
108
76
|
Execute the program to compute and return the result. As per `ProgramBuffer.compute`.
|
|
@@ -146,29 +114,36 @@ class ProgramWithSlotmap:
|
|
|
146
114
|
"""
|
|
147
115
|
return self._program_buffer.vars
|
|
148
116
|
|
|
149
|
-
def __setitem__(self, item: int | slice | SlotKey |
|
|
117
|
+
def __setitem__(self, item: int | slice | SlotKey | RandomVariable, value: float) -> None:
|
|
150
118
|
"""
|
|
151
|
-
Set
|
|
119
|
+
Set input slot value/s.
|
|
152
120
|
"""
|
|
153
121
|
if isinstance(item, (int, slice)):
|
|
154
122
|
self._program_buffer[item] = value
|
|
155
123
|
elif isinstance(item, (Indicator, ParamId)):
|
|
156
124
|
self._program_buffer[self._slot_map[item]] = value
|
|
125
|
+
elif isinstance(item, RandomVariable):
|
|
126
|
+
for ind in item:
|
|
127
|
+
self._program_buffer[self._slot_map[ind]] = value
|
|
157
128
|
else:
|
|
158
|
-
|
|
159
|
-
for i in item:
|
|
160
|
-
self[i] = value
|
|
129
|
+
raise IndexError(f'unknown index type: {type(item)}')
|
|
161
130
|
|
|
162
|
-
def __getitem__(self, item: int | slice | SlotKey) -> NDArrayNumeric:
|
|
131
|
+
def __getitem__(self, item: int | slice | SlotKey | RandomVariable) -> NDArrayNumeric:
|
|
163
132
|
"""
|
|
164
|
-
Get
|
|
133
|
+
Get input slot value/s.
|
|
165
134
|
"""
|
|
166
135
|
if isinstance(item, (int, slice)):
|
|
167
136
|
return self._program_buffer[item]
|
|
168
137
|
elif isinstance(item, (Indicator, ParamId)):
|
|
169
138
|
return self._program_buffer[self._slot_map[item]]
|
|
139
|
+
elif isinstance(item, RandomVariable):
|
|
140
|
+
return np.fromiter(
|
|
141
|
+
(self._program_buffer[self._slot_map[ind]] for ind in item),
|
|
142
|
+
dtype=self._program_buffer.dtype,
|
|
143
|
+
count=len(item)
|
|
144
|
+
)
|
|
170
145
|
else:
|
|
171
|
-
raise IndexError('unknown index type')
|
|
146
|
+
raise IndexError(f'unknown index type: {type(item)}')
|
|
172
147
|
|
|
173
148
|
def set_condition(self, *condition: Condition) -> None:
|
|
174
149
|
"""
|
|
@@ -211,7 +186,10 @@ class ProgramWithSlotmap:
|
|
|
211
186
|
|
|
212
187
|
Args:
|
|
213
188
|
rv: a random variable whose indicators are in the slot map.
|
|
214
|
-
values: list of values
|
|
189
|
+
values: list of values
|
|
190
|
+
|
|
191
|
+
Assumes:
|
|
192
|
+
len(values) == len(rv).
|
|
215
193
|
"""
|
|
216
194
|
for i in range(len(rv)):
|
|
217
195
|
self[rv[i]] = values[i]
|
|
@@ -30,11 +30,9 @@ def compile_results(
|
|
|
30
30
|
a compiled RawProgram.
|
|
31
31
|
"""
|
|
32
32
|
circuit: Circuit = pgm_circuit.circuit_top.circuit
|
|
33
|
-
if const_parameters:
|
|
34
|
-
parameter_values = pgm_circuit.parameter_values
|
|
35
|
-
number_of_indicators = pgm_circuit.number_of_indicators
|
|
33
|
+
if const_parameters and len(pgm_circuit.parameter_values) > 0:
|
|
36
34
|
with TmpConst(circuit) as tmp:
|
|
37
|
-
for slot, value in enumerate(parameter_values, start=number_of_indicators):
|
|
35
|
+
for slot, value in enumerate(pgm_circuit.parameter_values, start=pgm_circuit.number_of_indicators):
|
|
38
36
|
tmp.set_const(slot, value)
|
|
39
37
|
raw_program: RawProgram = compiler(*results, circuit=circuit)
|
|
40
38
|
else:
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
"/O2"
|
|
14
14
|
],
|
|
15
15
|
"include_dirs": [
|
|
16
|
-
"C:\\Users\\runneradmin\\AppData\\Local\\Temp\\build-env-
|
|
16
|
+
"C:\\Users\\runneradmin\\AppData\\Local\\Temp\\build-env-zvpv36cx\\Lib\\site-packages\\numpy\\_core\\include"
|
|
17
17
|
],
|
|
18
18
|
"name": "ck.pgm_compiler.support.circuit_table._circuit_table_cy",
|
|
19
19
|
"sources": [
|
|
Binary file
|
|
@@ -11,6 +11,7 @@ class EmpiricalProbabilitySpace(ProbabilitySpace):
|
|
|
11
11
|
Note that this is not necessarily an efficient approach to calculating probabilities and statistics.
|
|
12
12
|
|
|
13
13
|
This probability space treats each of the samples as equally weighted.
|
|
14
|
+
For a probability space over unequally weighted samples, consider using `CrossTableProbabilitySpace`.
|
|
14
15
|
|
|
15
16
|
Assumes:
|
|
16
17
|
len(sample) == len(rvs), for each sample in samples.
|
|
File without changes
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from ck.dataset import HardDataset, SoftDataset
|
|
2
|
+
from ck.dataset.dataset_builder import DatasetBuilder, soft_dataset_from_builder, hard_dataset_from_builder
|
|
3
|
+
from ck.pgm import PGM
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def main() -> None:
|
|
7
|
+
pgm = PGM()
|
|
8
|
+
x = pgm.new_rv('x', (True, False))
|
|
9
|
+
y = pgm.new_rv('y', ('yes', 'no', 'maybe'))
|
|
10
|
+
|
|
11
|
+
builder = DatasetBuilder([x, y])
|
|
12
|
+
builder.append()
|
|
13
|
+
builder.append(1, 2).weight = 3
|
|
14
|
+
builder.append(None, [0.7, 0.1, 0.2])
|
|
15
|
+
builder.append().set_states(True, 'maybe')
|
|
16
|
+
|
|
17
|
+
print('DatasetBuilder dump')
|
|
18
|
+
builder.dump()
|
|
19
|
+
print()
|
|
20
|
+
|
|
21
|
+
print('DatasetBuilder dump, showing states and custom missing values')
|
|
22
|
+
builder.dump(as_states=True, missing='?')
|
|
23
|
+
print()
|
|
24
|
+
|
|
25
|
+
print('HardDataset dump')
|
|
26
|
+
dataset: HardDataset = hard_dataset_from_builder(builder, missing=99)
|
|
27
|
+
dataset.dump()
|
|
28
|
+
print()
|
|
29
|
+
|
|
30
|
+
print('SoftDataset dump')
|
|
31
|
+
dataset: SoftDataset = soft_dataset_from_builder(builder)
|
|
32
|
+
dataset.dump()
|
|
33
|
+
print()
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
if __name__ == '__main__':
|
|
37
|
+
main()
|