compiled-knowledge 4.0.0a24__cp312-cp312-win32.whl → 4.1.0a1__cp312-cp312-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of compiled-knowledge might be problematic. Click here for more details.
- ck/circuit/_circuit_cy.c +1 -1
- ck/circuit/_circuit_cy.cp312-win32.pyd +0 -0
- ck/circuit/tmp_const.py +5 -4
- ck/circuit_compiler/cython_vm_compiler/_compiler.c +152 -152
- ck/circuit_compiler/cython_vm_compiler/_compiler.cp312-win32.pyd +0 -0
- ck/circuit_compiler/interpret_compiler.py +2 -2
- ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.c +1 -1
- ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.cp312-win32.pyd +0 -0
- ck/circuit_compiler/support/llvm_ir_function.py +4 -4
- ck/dataset/__init__.py +1 -0
- ck/dataset/cross_table.py +270 -0
- ck/dataset/cross_table_probabilities.py +53 -0
- ck/dataset/dataset.py +577 -0
- ck/dataset/dataset_compute.py +140 -0
- ck/dataset/dataset_from_crosstable.py +45 -0
- ck/dataset/dataset_from_csv.py +147 -0
- ck/dataset/sampled_dataset.py +96 -0
- ck/example/diamond_square.py +3 -1
- ck/example/triangle_square.py +3 -1
- ck/example/truss.py +3 -1
- ck/in_out/parse_net.py +21 -19
- ck/in_out/parser_utils.py +7 -3
- ck/learning/__init__.py +0 -0
- ck/learning/train_generative.py +149 -0
- ck/pgm.py +95 -84
- ck/pgm_circuit/mpe_program.py +3 -4
- ck/pgm_circuit/pgm_circuit.py +27 -18
- ck/pgm_circuit/program_with_slotmap.py +27 -46
- ck/pgm_circuit/support/compile_circuit.py +2 -4
- ck/pgm_compiler/support/circuit_table/_circuit_table_cy.c +1 -1
- ck/pgm_compiler/support/circuit_table/_circuit_table_cy.cp312-win32.pyd +0 -0
- ck/probability/empirical_probability_space.py +1 -0
- ck/probability/probability_space.py +10 -11
- ck/program/raw_program.py +23 -16
- ck/sampling/sampler_support.py +5 -6
- ck/utils/iter_extras.py +3 -2
- ck/utils/local_config.py +16 -8
- {compiled_knowledge-4.0.0a24.dist-info → compiled_knowledge-4.1.0a1.dist-info}/METADATA +1 -1
- {compiled_knowledge-4.0.0a24.dist-info → compiled_knowledge-4.1.0a1.dist-info}/RECORD +42 -32
- {compiled_knowledge-4.0.0a24.dist-info → compiled_knowledge-4.1.0a1.dist-info}/WHEEL +0 -0
- {compiled_knowledge-4.0.0a24.dist-info → compiled_knowledge-4.1.0a1.dist-info}/licenses/LICENSE.txt +0 -0
- {compiled_knowledge-4.0.0a24.dist-info → compiled_knowledge-4.1.0a1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
"""
|
|
2
|
+
A collection of functions to compute values over datasets using programs.
|
|
3
|
+
"""
|
|
4
|
+
import ctypes as ct
|
|
5
|
+
from typing import Optional, List, Dict
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
|
|
9
|
+
from ck.dataset import SoftDataset
|
|
10
|
+
from ck.pgm import Indicator, RandomVariable
|
|
11
|
+
from ck.pgm_circuit.slot_map import SlotMap
|
|
12
|
+
from ck.program import RawProgram
|
|
13
|
+
from ck.utils.np_extras import NDArray, NDArrayNumeric
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def accumulate_compute(
|
|
17
|
+
program: RawProgram,
|
|
18
|
+
slot_arrays: NDArray,
|
|
19
|
+
*,
|
|
20
|
+
weights: Optional[NDArray] = None,
|
|
21
|
+
accumulator: Optional[NDArray] = None,
|
|
22
|
+
) -> NDArray:
|
|
23
|
+
"""
|
|
24
|
+
Apply the given program to every instance in the dataset, summing all results over the instances.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
program: the mathematical transformation to apply to the data.
|
|
28
|
+
slot_arrays: a 2D numpy array of shape (number_of_instances, number_of_slots). Appropriate
|
|
29
|
+
slot arrays can be constructed from a soft dataset using `get_slot_arrays`.
|
|
30
|
+
weights: and optional 1D array of instance weights, of shape (number_of_instances, ), and
|
|
31
|
+
co-indexed with slot_arrays.
|
|
32
|
+
accumulator: an optional array to perform the result accumulation, summing with the initial
|
|
33
|
+
values of the provided accumulator.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
total_weight, accumulator
|
|
37
|
+
|
|
38
|
+
Raises:
|
|
39
|
+
ValueError: if slot_arrays.shape is not `(..., program.number_of_vars)`.
|
|
40
|
+
ValueError: if an accumulator is provided, but is not shape `(program.number_of_results, )`.
|
|
41
|
+
ValueError: if weights provided, but is not shape `(slot_arrays.shape[0],)`.
|
|
42
|
+
"""
|
|
43
|
+
number_of_results: int = program.number_of_results
|
|
44
|
+
number_of_vars: int = program.number_of_vars
|
|
45
|
+
|
|
46
|
+
if len(slot_arrays.shape) != 2 or slot_arrays.shape[1] != program.number_of_vars:
|
|
47
|
+
raise ValueError(f'slot arrays expected shape (..., {number_of_vars}) but got {slot_arrays.shape}')
|
|
48
|
+
|
|
49
|
+
if accumulator is None:
|
|
50
|
+
accumulator = np.zeros(number_of_results, dtype=program.dtype)
|
|
51
|
+
elif accumulator.shape != (number_of_results,):
|
|
52
|
+
raise ValueError(f'accumulator shape {accumulator.shape} does not match number of results: {number_of_results}')
|
|
53
|
+
|
|
54
|
+
if slot_arrays.dtype != program.dtype:
|
|
55
|
+
raise ValueError(f'slot arrays dtype {slot_arrays.dtype} does not match program.dtype: {program.dtype}')
|
|
56
|
+
if accumulator.dtype != program.dtype:
|
|
57
|
+
raise ValueError(f'accumulator dtype {slot_arrays.dtype} does not match program.dtype: {program.dtype}')
|
|
58
|
+
|
|
59
|
+
ptr_type = ct.POINTER(np.ctypeslib.as_ctypes_type(program.dtype))
|
|
60
|
+
|
|
61
|
+
# Create buffers for program function tmps and outputs
|
|
62
|
+
# We do not need to create a buffer for program function inputs as that
|
|
63
|
+
# will be provided by `slot_arrays`.
|
|
64
|
+
array_outs: NDArrayNumeric = np.zeros(program.number_of_results, dtype=program.dtype)
|
|
65
|
+
array_tmps: NDArrayNumeric = np.zeros(program.number_of_tmps, dtype=program.dtype)
|
|
66
|
+
c_array_tmps = array_tmps.ctypes.data_as(ptr_type)
|
|
67
|
+
c_array_outs = array_outs.ctypes.data_as(ptr_type)
|
|
68
|
+
|
|
69
|
+
if weights is None:
|
|
70
|
+
# This is the unweighed version
|
|
71
|
+
for instance in slot_arrays:
|
|
72
|
+
c_array_vars = instance.ctypes.data_as(ptr_type)
|
|
73
|
+
program.function(c_array_vars, c_array_tmps, c_array_outs)
|
|
74
|
+
accumulator += array_outs
|
|
75
|
+
|
|
76
|
+
else:
|
|
77
|
+
# This is the weighed version
|
|
78
|
+
expected_shape = (slot_arrays.shape[0],)
|
|
79
|
+
if weights.shape != expected_shape:
|
|
80
|
+
raise ValueError(f'weight shape {weights.shape} is not as expected : {expected_shape}')
|
|
81
|
+
|
|
82
|
+
for weight, instance in zip(weights, slot_arrays):
|
|
83
|
+
c_array_vars = instance.ctypes.data_as(ptr_type)
|
|
84
|
+
program.function(c_array_vars, c_array_tmps, c_array_outs)
|
|
85
|
+
accumulator += array_outs * weight
|
|
86
|
+
|
|
87
|
+
return accumulator
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def get_slot_arrays(
|
|
91
|
+
dataset: SoftDataset,
|
|
92
|
+
number_of_slots: int,
|
|
93
|
+
slot_map: SlotMap,
|
|
94
|
+
) -> NDArray:
|
|
95
|
+
"""
|
|
96
|
+
For each slot from 0 to number_of_slots - 1, get the 1D vector
|
|
97
|
+
from the dataset that can be used to set each slot.
|
|
98
|
+
|
|
99
|
+
This function can be used to prepare slot arrays for `accumulate_compute`.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
a 2D numpy array of shape (len(dataset), number_of_slots),
|
|
103
|
+
|
|
104
|
+
Raises:
|
|
105
|
+
ValueError: if multiple indicators for a slot in the slot map
|
|
106
|
+
ValueError: if there are slots with no indicator in slot map
|
|
107
|
+
"""
|
|
108
|
+
|
|
109
|
+
# Special case, no slots
|
|
110
|
+
# We treat this specially to ensure the right shape of the result
|
|
111
|
+
if number_of_slots == 0:
|
|
112
|
+
return np.empty(shape=(len(dataset), 0))
|
|
113
|
+
|
|
114
|
+
# Use the slot map to work out which indicator corresponds to each slot.
|
|
115
|
+
indicators: List[Optional[Indicator]] = [None] * number_of_slots
|
|
116
|
+
for indicator, slot in slot_map.items():
|
|
117
|
+
if 0 <= slot < number_of_slots and indicator is not None:
|
|
118
|
+
if indicators[slot] is not None and indicators[slot] != indicator:
|
|
119
|
+
raise ValueError(f'multiple indicators for slot: {slot}')
|
|
120
|
+
indicators[slot] = indicator
|
|
121
|
+
missing_slots = [i for i, indicator in enumerate(indicators) if indicator is None]
|
|
122
|
+
if len(missing_slots) > 0:
|
|
123
|
+
missing_slots_str = ', '.join(str(slot) for slot in missing_slots)
|
|
124
|
+
raise ValueError(f'slots with no indicator in slot map: {missing_slots_str}')
|
|
125
|
+
|
|
126
|
+
# Map rv index to state_weights of the dataset
|
|
127
|
+
rv: RandomVariable
|
|
128
|
+
state_weights: Dict[int, NDArray] = {
|
|
129
|
+
rv.idx: dataset.state_weights(rv)
|
|
130
|
+
for rv in dataset.rvs
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
# Get the columns of the resulting matrix
|
|
134
|
+
columns = [
|
|
135
|
+
state_weights[indicator.rv_idx][:, indicator.state_idx]
|
|
136
|
+
for indicator in indicators
|
|
137
|
+
]
|
|
138
|
+
|
|
139
|
+
# Concatenate the columns into a matrix
|
|
140
|
+
return np.column_stack(columns)
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
from typing import Sequence
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
from ck.dataset import HardDataset
|
|
6
|
+
from ck.dataset.cross_table import CrossTable
|
|
7
|
+
from ck.pgm import RandomVariable
|
|
8
|
+
from ck.utils.np_extras import dtype_for_number_of_states
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def dataset_from_cross_table(cross_table: CrossTable) -> HardDataset:
|
|
12
|
+
"""
|
|
13
|
+
Construct a HardDataset from the given cross-table.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
cross_table: A cross-table represented as a dictionary.
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
A dataset where instances and instance weights are those of the
|
|
20
|
+
given cross-table.
|
|
21
|
+
|
|
22
|
+
Ensures:
|
|
23
|
+
`result.total_weight() == dataset.total_weight()`.
|
|
24
|
+
Zero weighted instances are not counted.
|
|
25
|
+
"""
|
|
26
|
+
rvs: Sequence[RandomVariable] = cross_table.rvs
|
|
27
|
+
|
|
28
|
+
# Unzip the cross-table dictionary
|
|
29
|
+
rvs_series = [[] for _ in range(len(rvs))]
|
|
30
|
+
weights = []
|
|
31
|
+
for instance, weight in cross_table.items():
|
|
32
|
+
for series, state in zip(rvs_series, instance):
|
|
33
|
+
series.append(state)
|
|
34
|
+
weights.append(weight)
|
|
35
|
+
|
|
36
|
+
# Put the hard dataset together
|
|
37
|
+
return HardDataset(
|
|
38
|
+
data=(
|
|
39
|
+
(rv, np.array(series, dtype=dtype_for_number_of_states(len(rv))))
|
|
40
|
+
for rv, series in zip(rvs, rvs_series)
|
|
41
|
+
),
|
|
42
|
+
weights=np.array(weights, dtype=np.float64),
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
from typing import Iterable, List, Sequence, Optional
|
|
2
|
+
|
|
3
|
+
from ck.dataset import HardDataset
|
|
4
|
+
from ck.pgm import RandomVariable
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def hard_dataset_from_csv(
|
|
8
|
+
rvs: Iterable[RandomVariable],
|
|
9
|
+
lines: Iterable[str],
|
|
10
|
+
*,
|
|
11
|
+
weights: Optional[int | str] = None,
|
|
12
|
+
sep: str = ',',
|
|
13
|
+
comment: str = '#',
|
|
14
|
+
) -> HardDataset:
|
|
15
|
+
"""
|
|
16
|
+
Interpret the given sequence of lines as CSV for a HardDataset.
|
|
17
|
+
|
|
18
|
+
Each line is a list of state indexes (ints) separated by `sep`.
|
|
19
|
+
|
|
20
|
+
Every line should have the same number of values.
|
|
21
|
+
|
|
22
|
+
If the first line contains a non-integer value, then the first
|
|
23
|
+
line will be interpreted as a header line.
|
|
24
|
+
|
|
25
|
+
If there is no header line, then the values will be interpreted in the
|
|
26
|
+
same order as `rvs` and the number of values on each line should be
|
|
27
|
+
the same as the number of random variables in `rvs`.
|
|
28
|
+
|
|
29
|
+
If there is a header line, then it will be interpreted as the order
|
|
30
|
+
of random variables. There must be a column name in the header to match
|
|
31
|
+
each name of the given random variables. Additional columns will be ignored.
|
|
32
|
+
|
|
33
|
+
As text file (and StringIO) objects are iterable over lines, here is how to read a csv file:
|
|
34
|
+
```
|
|
35
|
+
with open(csv_filename, 'r') as file:
|
|
36
|
+
hard_dataset_from_csv(rvs, file)
|
|
37
|
+
```
|
|
38
|
+
Here is an example to read from a csv string:
|
|
39
|
+
```
|
|
40
|
+
hard_dataset_from_csv(rvs, csv_string.splitlines())
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
rvs: the random variables for the returned dataset.
|
|
45
|
+
lines: the sequence of lines to interpret, each line is an instance in the dataset.
|
|
46
|
+
weights: the column in the csv file holding instance weights. Can be either the
|
|
47
|
+
column number (counting from zero) or a column name (requires a header line).
|
|
48
|
+
sep: the string to use to separate values in a line, default is a comma.
|
|
49
|
+
comment: text starting with this will be treated as a comment. Set to '' to disallow comments.
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
a HardDataset.
|
|
53
|
+
|
|
54
|
+
Raises:
|
|
55
|
+
ValueError: if the lines do not conform to a CSV format.
|
|
56
|
+
"""
|
|
57
|
+
rvs: Sequence[RandomVariable] = tuple(rvs)
|
|
58
|
+
|
|
59
|
+
# Define `clean_line` being sensitive to comments.
|
|
60
|
+
if len(comment) > 0:
|
|
61
|
+
def clean_line(l: str) -> str:
|
|
62
|
+
i = l.find(comment)
|
|
63
|
+
if i >= 0:
|
|
64
|
+
l = l[:i]
|
|
65
|
+
return l.strip()
|
|
66
|
+
else:
|
|
67
|
+
def clean_line(l: str) -> str:
|
|
68
|
+
return l.strip()
|
|
69
|
+
|
|
70
|
+
# Get the first line which may be a header line or data line
|
|
71
|
+
it = iter(lines)
|
|
72
|
+
try:
|
|
73
|
+
while True:
|
|
74
|
+
line = clean_line(next(it))
|
|
75
|
+
if len(line) > 0:
|
|
76
|
+
break
|
|
77
|
+
except StopIteration:
|
|
78
|
+
# Empty dataset with the given random variables
|
|
79
|
+
return HardDataset((rv, []) for rv in rvs)
|
|
80
|
+
|
|
81
|
+
values: List[str] = [value.strip() for value in line.split(sep)]
|
|
82
|
+
number_of_columns: int = len(values)
|
|
83
|
+
series: List[List[int]] # series[dataset-column] = list of values
|
|
84
|
+
weight_series: Optional[List[float]] = None
|
|
85
|
+
column_map: List[int] # column_map[dataset-column] = input-column
|
|
86
|
+
if all(_is_number(value) for value in values):
|
|
87
|
+
# First line is not a header line
|
|
88
|
+
if weights is None:
|
|
89
|
+
if number_of_columns != len(rvs):
|
|
90
|
+
raise ValueError('number of columns does not match number of random variables')
|
|
91
|
+
column_map = list(range(len(rvs)))
|
|
92
|
+
else:
|
|
93
|
+
if number_of_columns != len(rvs) + 1:
|
|
94
|
+
raise ValueError('number of columns does not match number of random variables and weight column')
|
|
95
|
+
if not isinstance(weights, int):
|
|
96
|
+
raise ValueError('no header detected - `weights` must be a column number')
|
|
97
|
+
if not (-number_of_columns <= weights < number_of_columns):
|
|
98
|
+
raise ValueError('`weights` column number out of range')
|
|
99
|
+
column_map = list(range(len(rvs) + 1))
|
|
100
|
+
column_map.pop(weights)
|
|
101
|
+
|
|
102
|
+
# Initialise series with the first line of data
|
|
103
|
+
series = [[int(values[i])] for i in column_map]
|
|
104
|
+
if weights is not None:
|
|
105
|
+
weight_series = [float(values[weights])]
|
|
106
|
+
|
|
107
|
+
else:
|
|
108
|
+
# First line is a header line
|
|
109
|
+
# Lookup each random variable to find its column
|
|
110
|
+
column_map = [
|
|
111
|
+
values.index(rv.name) # will raise ValueError if not found
|
|
112
|
+
for rv in rvs
|
|
113
|
+
]
|
|
114
|
+
if isinstance(weights, str):
|
|
115
|
+
# Convert weights column name to column number
|
|
116
|
+
weights: int = values.index(weights) # will raise ValueError if not found
|
|
117
|
+
elif isinstance(weights, int) and not (number_of_columns <= weights < number_of_columns):
|
|
118
|
+
raise ValueError('`weights` column number out of range')
|
|
119
|
+
|
|
120
|
+
# Initialise each series as empty
|
|
121
|
+
series = [[] for _ in rvs]
|
|
122
|
+
if weights is not None:
|
|
123
|
+
weight_series = []
|
|
124
|
+
|
|
125
|
+
# Read remaining data lines
|
|
126
|
+
for line in it:
|
|
127
|
+
line = clean_line(line)
|
|
128
|
+
if len(line) == 0:
|
|
129
|
+
continue
|
|
130
|
+
if len(values) != number_of_columns:
|
|
131
|
+
raise ValueError('number of values does not match number of columns')
|
|
132
|
+
values = line.split(sep)
|
|
133
|
+
for series_i, i in zip(series, column_map):
|
|
134
|
+
series_i.append(int(values[i]))
|
|
135
|
+
if weights is not None:
|
|
136
|
+
weight_series.append(float(values[weights]))
|
|
137
|
+
|
|
138
|
+
# Construct the dataset
|
|
139
|
+
return HardDataset(zip(rvs, series), weights=weight_series)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _is_number(s: str) -> bool:
|
|
143
|
+
try:
|
|
144
|
+
float(s)
|
|
145
|
+
return True
|
|
146
|
+
except ValueError:
|
|
147
|
+
return False
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import random
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Sequence, List, Iterator, Tuple, Dict
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
from ck.dataset import HardDataset
|
|
8
|
+
from ck.dataset.cross_table import CrossTable
|
|
9
|
+
from ck.pgm import RandomVariable, Instance
|
|
10
|
+
from ck.sampling.sampler import Sampler
|
|
11
|
+
from ck.utils.np_extras import dtype_for_number_of_states, NDArray
|
|
12
|
+
from ck.utils.random_extras import Random
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def dataset_from_sampler(sampler: Sampler, length: int) -> HardDataset:
|
|
16
|
+
"""
|
|
17
|
+
Create a hard dataset using samples from a sampler.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
sampler: A sampler which defined the random variables and provides samples.
|
|
21
|
+
length: The length of the dataset to create.
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
A HardDataset of the given length.
|
|
25
|
+
"""
|
|
26
|
+
rvs: Sequence[RandomVariable] = sampler.rvs
|
|
27
|
+
columns: List[NDArray] = [
|
|
28
|
+
np.zeros(length, dtype=dtype_for_number_of_states(len(rv)))
|
|
29
|
+
for rv in rvs
|
|
30
|
+
]
|
|
31
|
+
for i, instance in enumerate(sampler.take(length)):
|
|
32
|
+
for column, state in zip(columns, instance):
|
|
33
|
+
column[i] = state
|
|
34
|
+
return HardDataset(zip(rvs, columns))
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class CrossTableSampler(Sampler):
|
|
38
|
+
def __init__(self, crosstab: CrossTable, rand: Random = random):
|
|
39
|
+
"""
|
|
40
|
+
Adapt a cross table to a sampler.
|
|
41
|
+
|
|
42
|
+
Instances will be drawn from the sampler according to their
|
|
43
|
+
weight in the given cross-table. If the given cross-table is
|
|
44
|
+
modified after constructing the sampler, the sampler will not
|
|
45
|
+
be affected.
|
|
46
|
+
"""
|
|
47
|
+
if len(crosstab) == 0:
|
|
48
|
+
raise ValueError('no instances to sample')
|
|
49
|
+
|
|
50
|
+
super().__init__(rvs=crosstab.rvs, condition=())
|
|
51
|
+
|
|
52
|
+
# Group instances by weight.
|
|
53
|
+
# We do this in anticipation that it makes sampling more efficient.
|
|
54
|
+
weight_groups: Dict[float, _WeightGroup] = {}
|
|
55
|
+
for instance, weight in crosstab.items():
|
|
56
|
+
weight_group = weight_groups.get(weight)
|
|
57
|
+
if weight_group is None:
|
|
58
|
+
weight_groups[weight] = _WeightGroup(weight, weight, [instance])
|
|
59
|
+
else:
|
|
60
|
+
weight_group.append(instance)
|
|
61
|
+
|
|
62
|
+
self._weight_groups: List[_WeightGroup] = list(weight_groups.values())
|
|
63
|
+
self._total_weight = sum(group.total for group in weight_groups.values())
|
|
64
|
+
self._rand = rand
|
|
65
|
+
|
|
66
|
+
def __iter__(self) -> Iterator[Instance]:
|
|
67
|
+
while True:
|
|
68
|
+
# This code performs inverse transform sampling
|
|
69
|
+
r: float = self._rand.random() * self._total_weight
|
|
70
|
+
|
|
71
|
+
# This does a serial search to find the weight group.
|
|
72
|
+
# This is efficient for small numbers of groups, but this may be
|
|
73
|
+
# improved for large numbers of groups.
|
|
74
|
+
it = iter(self._weight_groups)
|
|
75
|
+
group = next(it)
|
|
76
|
+
while r >= group.total:
|
|
77
|
+
r -= group.total
|
|
78
|
+
group = next(it)
|
|
79
|
+
|
|
80
|
+
# Pick an instance in the group
|
|
81
|
+
i = int(r / group.weight)
|
|
82
|
+
yield group.instances[i]
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@dataclass
|
|
86
|
+
class _WeightGroup:
|
|
87
|
+
"""
|
|
88
|
+
Support for CrossTableSampler.
|
|
89
|
+
"""
|
|
90
|
+
weight: float
|
|
91
|
+
total: float
|
|
92
|
+
instances: List[Tuple[int, ...]]
|
|
93
|
+
|
|
94
|
+
def append(self, instance: Tuple[int, ...]) -> None:
|
|
95
|
+
self.total += self.weight
|
|
96
|
+
self.instances.append(instance)
|
ck/example/diamond_square.py
CHANGED
|
@@ -8,7 +8,8 @@ class DiamondSquare(PGM):
|
|
|
8
8
|
This PGM is the 'DiamondSquare' factor graph.
|
|
9
9
|
|
|
10
10
|
The DiamondSquare is a factor graph with seven random variables (a, b, c, ..., h).
|
|
11
|
-
Binary factors are between pairs of random variables creating the pattern
|
|
11
|
+
Binary factors are between pairs of random variables creating the pattern::
|
|
12
|
+
|
|
12
13
|
b
|
|
13
14
|
/ \
|
|
14
15
|
/ \
|
|
@@ -20,6 +21,7 @@ class DiamondSquare(PGM):
|
|
|
20
21
|
\ /
|
|
21
22
|
\ /
|
|
22
23
|
g
|
|
24
|
+
|
|
23
25
|
If include_unaries then, also includes one unary factor per random variable.
|
|
24
26
|
"""
|
|
25
27
|
|
ck/example/triangle_square.py
CHANGED
|
@@ -8,12 +8,14 @@ class TriangleSquare(PGM):
|
|
|
8
8
|
This PGM is the 'TriangleSquare' factor graph.
|
|
9
9
|
|
|
10
10
|
The TriangleSquare is a factor graph with six random variables (a, b, c, ..., f).
|
|
11
|
-
Binary factors are between pairs of random variables crating the pattern
|
|
11
|
+
Binary factors are between pairs of random variables crating the pattern::
|
|
12
|
+
|
|
12
13
|
b -- d
|
|
13
14
|
/ | | \
|
|
14
15
|
a | | f
|
|
15
16
|
\ | | /
|
|
16
17
|
c -- e
|
|
18
|
+
|
|
17
19
|
If include_unaries then, also includes one unary factor per random variable.
|
|
18
20
|
"""
|
|
19
21
|
|
ck/example/truss.py
CHANGED
|
@@ -7,12 +7,14 @@ class Truss(PGM):
|
|
|
7
7
|
This PGM is the 'Truss' factor graph.
|
|
8
8
|
|
|
9
9
|
The Truss is a factor graph with five random variables (a, b, c, d, e).
|
|
10
|
-
Binary factors are between pairs of random variables creating the pattern
|
|
10
|
+
Binary factors are between pairs of random variables creating the pattern::
|
|
11
|
+
|
|
11
12
|
b ---- d
|
|
12
13
|
/ | / |
|
|
13
14
|
a | / |
|
|
14
15
|
\ | / |
|
|
15
16
|
c ---- e
|
|
17
|
+
|
|
16
18
|
If include_unaries then, also includes one unary factor per random variable.
|
|
17
19
|
"""
|
|
18
20
|
|
ck/in_out/parse_net.py
CHANGED
|
@@ -16,36 +16,37 @@ def read_network(input_stream, *, name: Optional[str] = None, network_builder: O
|
|
|
16
16
|
The input can be a string or a stream.
|
|
17
17
|
If the input is empty, then its is treated as an error.
|
|
18
18
|
|
|
19
|
-
This input is expected to conform to the following format
|
|
19
|
+
This input is expected to conform to the following format::
|
|
20
20
|
|
|
21
|
-
|
|
21
|
+
<network> ::= <net_block> <node_block>* <potential_block>*
|
|
22
22
|
|
|
23
|
-
|
|
23
|
+
<net_block> ::= 'net' <block>
|
|
24
24
|
|
|
25
|
-
|
|
25
|
+
<node_block> ::= 'node' <NAME> <block>
|
|
26
26
|
|
|
27
|
-
|
|
27
|
+
<potential_block> ::= 'potential' <link> <block>
|
|
28
28
|
|
|
29
|
-
|
|
30
|
-
|
|
29
|
+
<block> ::= '{' <sentence>* '}'
|
|
30
|
+
<sentence> ::= <NAME> '=' <value> ';'
|
|
31
31
|
|
|
32
|
-
|
|
33
|
-
|
|
32
|
+
<link> ::= '(' <NAME> ')'
|
|
33
|
+
| '(' <NAME> '|' <NAME>+ ')'
|
|
34
34
|
|
|
35
|
-
|
|
36
|
-
|
|
35
|
+
<value> ::= <STRING> | <NUMBER> | <list>
|
|
36
|
+
<list> ::='(' <value>* ')'
|
|
37
37
|
|
|
38
38
|
The sentences of a <net_block> are ignored.
|
|
39
39
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
40
|
+
In a <node_block>,
|
|
41
|
+
<name> of 'states' mandatory, with value that is a list of <STRING>
|
|
42
|
+
other sentences are ignored.
|
|
43
43
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
44
|
+
In a <potential_block>,
|
|
45
|
+
<name> of 'data' is mandatory, with value that is a list of (list of) <NUMBER> (shape matching the link)
|
|
46
|
+
other sentences are ignored.
|
|
47
|
+
|
|
48
|
+
Here is a simple example input::
|
|
47
49
|
|
|
48
|
-
Here is a simple example input:
|
|
49
50
|
net{}
|
|
50
51
|
node a
|
|
51
52
|
{
|
|
@@ -62,8 +63,9 @@ def read_network(input_stream, *, name: Optional[str] = None, network_builder: O
|
|
|
62
63
|
}
|
|
63
64
|
potential ( b | a )
|
|
64
65
|
{
|
|
65
|
-
data = ((0.4 0.4 0.2)(0.4 0.4 0.2))
|
|
66
|
+
data = ((0.4 0.4 0.2)(0.4 0.4 0.2));
|
|
66
67
|
}
|
|
68
|
+
|
|
67
69
|
"""
|
|
68
70
|
# Decorate the input stream
|
|
69
71
|
input_stream = ParserInput(input_stream)
|
ck/in_out/parser_utils.py
CHANGED
|
@@ -57,8 +57,10 @@ class ParserInput:
|
|
|
57
57
|
|
|
58
58
|
def readline(self) -> str:
|
|
59
59
|
"""
|
|
60
|
+
Read a line of input.
|
|
61
|
+
|
|
60
62
|
Returns:
|
|
61
|
-
the next line (including the trailing
|
|
63
|
+
the next line (including the trailing newline) or empty string if EOF.
|
|
62
64
|
"""
|
|
63
65
|
line = ''
|
|
64
66
|
while True:
|
|
@@ -69,9 +71,11 @@ class ParserInput:
|
|
|
69
71
|
|
|
70
72
|
def read_past_space(self, single_line: bool, comment_char=None) -> str:
|
|
71
73
|
"""
|
|
74
|
+
Read the input up to and including the first non-whitespace character.
|
|
75
|
+
|
|
72
76
|
Returns:
|
|
73
|
-
either empty string,
|
|
74
|
-
If single_line is True, then
|
|
77
|
+
either empty string, if end of input, otherwise a single character string that is not whitespace.
|
|
78
|
+
If single_line is True, then newline is treated as eof.
|
|
75
79
|
"""
|
|
76
80
|
c = self.read_one()
|
|
77
81
|
while True:
|
ck/learning/__init__.py
ADDED
|
File without changes
|