compiled-knowledge 4.1.0a1__cp312-cp312-win_amd64.whl → 4.1.0a2__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of compiled-knowledge might be problematic. Click here for more details.
- ck/circuit/_circuit_cy.c +1 -1
- ck/circuit/_circuit_cy.cp312-win_amd64.pyd +0 -0
- ck/circuit_compiler/cython_vm_compiler/_compiler.c +152 -152
- ck/circuit_compiler/cython_vm_compiler/_compiler.cp312-win_amd64.pyd +0 -0
- ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.c +1 -1
- ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.cp312-win_amd64.pyd +0 -0
- ck/dataset/cross_table.py +2 -2
- ck/dataset/dataset.py +50 -33
- ck/dataset/dataset_builder.py +512 -0
- ck/dataset/dataset_from_csv.py +5 -1
- ck/pgm_compiler/support/circuit_table/_circuit_table_cy.c +1 -1
- ck/pgm_compiler/support/circuit_table/_circuit_table_cy.cp312-win_amd64.pyd +0 -0
- ck_demos/dataset/__init__.py +0 -0
- ck_demos/dataset/demo_dataset_builder.py +37 -0
- {compiled_knowledge-4.1.0a1.dist-info → compiled_knowledge-4.1.0a2.dist-info}/METADATA +1 -1
- {compiled_knowledge-4.1.0a1.dist-info → compiled_knowledge-4.1.0a2.dist-info}/RECORD +19 -16
- {compiled_knowledge-4.1.0a1.dist-info → compiled_knowledge-4.1.0a2.dist-info}/WHEEL +0 -0
- {compiled_knowledge-4.1.0a1.dist-info → compiled_knowledge-4.1.0a2.dist-info}/licenses/LICENSE.txt +0 -0
- {compiled_knowledge-4.1.0a1.dist-info → compiled_knowledge-4.1.0a2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,512 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from itertools import count
|
|
4
|
+
from typing import Iterable, List, TypeAlias, Sequence, overload, Set, Tuple, MutableSequence, Dict, Optional, \
|
|
5
|
+
assert_never
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
|
|
9
|
+
from ck.dataset import HardDataset, SoftDataset
|
|
10
|
+
from ck.pgm import RandomVariable, State
|
|
11
|
+
from ck.utils.np_extras import NDArrayFloat64, NDArrayStates, dtype_for_number_of_states, DTypeStates, NDArrayNumeric
|
|
12
|
+
|
|
13
|
+
HardValue: TypeAlias = int
|
|
14
|
+
SoftValue: TypeAlias = Sequence[float]
|
|
15
|
+
Value: TypeAlias = HardValue | SoftValue | None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class Record(Sequence[Value]):
|
|
19
|
+
"""
|
|
20
|
+
A record is a sequence of values, co-indexed with dataset columns.
|
|
21
|
+
|
|
22
|
+
A value is either a state index (HardValue), a sequence of state
|
|
23
|
+
weights (SoftValue), or missing (None).
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(self, dataset: DatasetBuilder, values: Optional[Iterable[Value]] = None):
|
|
27
|
+
self.weight: float = 1
|
|
28
|
+
self._dataset: DatasetBuilder = dataset
|
|
29
|
+
self._values: List[Value] = [] if values is None else list(values)
|
|
30
|
+
|
|
31
|
+
def __len__(self) -> int:
|
|
32
|
+
return len(self._dataset.rvs)
|
|
33
|
+
|
|
34
|
+
@overload
|
|
35
|
+
def __getitem__(self, index: int | RandomVariable) -> Value:
|
|
36
|
+
...
|
|
37
|
+
|
|
38
|
+
@overload
|
|
39
|
+
def __getitem__(self, index: slice) -> Sequence[Value]:
|
|
40
|
+
...
|
|
41
|
+
|
|
42
|
+
def __getitem__(self, index):
|
|
43
|
+
if isinstance(index, slice):
|
|
44
|
+
return [self._getitem(i) for i in range(*index.indices(len(self)))]
|
|
45
|
+
if isinstance(index, RandomVariable):
|
|
46
|
+
# noinspection PyProtectedMember
|
|
47
|
+
return self._getitem(self._dataset._rvs_index[index])
|
|
48
|
+
|
|
49
|
+
size = len(self)
|
|
50
|
+
if index < 0:
|
|
51
|
+
index += size
|
|
52
|
+
if not 0 <= index < size:
|
|
53
|
+
raise IndexError('index out of range')
|
|
54
|
+
return self._getitem(index)
|
|
55
|
+
|
|
56
|
+
def _getitem(self, index: int) -> Value:
|
|
57
|
+
"""
|
|
58
|
+
Assumes:
|
|
59
|
+
0 <= index < len(self).
|
|
60
|
+
"""
|
|
61
|
+
if index >= len(self._values):
|
|
62
|
+
return None
|
|
63
|
+
return self._values[index]
|
|
64
|
+
|
|
65
|
+
@overload
|
|
66
|
+
def __setitem__(self, index: int | RandomVariable, value: Value) -> None:
|
|
67
|
+
...
|
|
68
|
+
|
|
69
|
+
@overload
|
|
70
|
+
def __setitem__(self, index: slice, value: Iterable[Value]) -> None:
|
|
71
|
+
...
|
|
72
|
+
|
|
73
|
+
def __setitem__(self, index, value):
|
|
74
|
+
if isinstance(index, slice):
|
|
75
|
+
for i, v in zip(range(*index.indices(len(self))), value):
|
|
76
|
+
self._setitem(i, v)
|
|
77
|
+
return
|
|
78
|
+
if isinstance(index, RandomVariable):
|
|
79
|
+
# noinspection PyProtectedMember
|
|
80
|
+
self._setitem(self._dataset._rvs_index[index], value)
|
|
81
|
+
return
|
|
82
|
+
|
|
83
|
+
size = len(self)
|
|
84
|
+
if index < 0:
|
|
85
|
+
index += size
|
|
86
|
+
if not 0 <= index < size:
|
|
87
|
+
raise IndexError('index out of range')
|
|
88
|
+
self._setitem(index, value)
|
|
89
|
+
|
|
90
|
+
def _setitem(self, index: int, value: Value) -> None:
|
|
91
|
+
"""
|
|
92
|
+
Assumes:
|
|
93
|
+
0 <= index < len(self).
|
|
94
|
+
"""
|
|
95
|
+
to_append: int = index + 1 - len(self._values)
|
|
96
|
+
self._values += [None] * to_append
|
|
97
|
+
|
|
98
|
+
if value is None:
|
|
99
|
+
self._values[index] = None
|
|
100
|
+
return
|
|
101
|
+
|
|
102
|
+
rv: RandomVariable = self._dataset.rvs[index]
|
|
103
|
+
if isinstance(value, int):
|
|
104
|
+
if not (0 <= value < len(rv)):
|
|
105
|
+
raise ValueError(f'state index out of range, expected: 0 <= {value!r} < {len(rv)}')
|
|
106
|
+
self._values[index] = value
|
|
107
|
+
return
|
|
108
|
+
|
|
109
|
+
# Expect the value is a sequence of floats
|
|
110
|
+
if len(value) != len(rv):
|
|
111
|
+
raise ValueError(f'state weights incorrect length, expected: {len(rv)}, got: {len(value)}')
|
|
112
|
+
self._values[index] = tuple(value)
|
|
113
|
+
|
|
114
|
+
def set(self, *values: Value) -> None:
|
|
115
|
+
"""
|
|
116
|
+
Set all the values of this record, using state indexes or state weights.
|
|
117
|
+
|
|
118
|
+
If insufficient or additional values are provided, a ValueError will be raised.
|
|
119
|
+
"""
|
|
120
|
+
if len(values) != len(self):
|
|
121
|
+
raise ValueError('incorrect number of values provided')
|
|
122
|
+
for i, value in enumerate(values):
|
|
123
|
+
self._setitem(i, value)
|
|
124
|
+
|
|
125
|
+
def set_states(self, *values: State) -> None:
|
|
126
|
+
"""
|
|
127
|
+
Set all the values of this record from random variable states.
|
|
128
|
+
|
|
129
|
+
State indexes are resolved using `RandomVariable.state_idx`.
|
|
130
|
+
If insufficient or additional values are provided, a ValueError will be raised.
|
|
131
|
+
"""
|
|
132
|
+
rvs = self._dataset.rvs
|
|
133
|
+
if len(values) != len(rvs):
|
|
134
|
+
raise ValueError('incorrect number of values provided')
|
|
135
|
+
for i, rv, value in zip(count(), rvs, values):
|
|
136
|
+
self._setitem(i, rv.state_idx(value))
|
|
137
|
+
|
|
138
|
+
def __str__(self) -> str:
|
|
139
|
+
return self.to_str()
|
|
140
|
+
|
|
141
|
+
def to_str(
|
|
142
|
+
self,
|
|
143
|
+
*,
|
|
144
|
+
show_weight: bool = True,
|
|
145
|
+
as_states: bool = False,
|
|
146
|
+
missing: str = 'None',
|
|
147
|
+
sep: str = ', ',
|
|
148
|
+
) -> str:
|
|
149
|
+
"""
|
|
150
|
+
Render the record as a human-readable string.
|
|
151
|
+
If as_states is true, then hard values states are dumped instead of just state indexes.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
show_weight: If `True`, the instance weight is included.
|
|
155
|
+
as_states: If `True`, the states are used instead of just state indexes.
|
|
156
|
+
missing: the string to use for missing values.
|
|
157
|
+
sep: the string to use for separating values.
|
|
158
|
+
"""
|
|
159
|
+
|
|
160
|
+
def _value_str(rv_idx: int, v: Value) -> str:
|
|
161
|
+
if v is None:
|
|
162
|
+
return missing
|
|
163
|
+
if isinstance(v, int):
|
|
164
|
+
if as_states:
|
|
165
|
+
return repr(self._dataset.rvs[rv_idx].states[v])
|
|
166
|
+
else:
|
|
167
|
+
return str(v)
|
|
168
|
+
else:
|
|
169
|
+
return str(v)
|
|
170
|
+
|
|
171
|
+
instance_str = sep.join(_value_str(i, self._getitem(i)) for i in range(len(self)))
|
|
172
|
+
if show_weight:
|
|
173
|
+
return f'({instance_str}) * {self.weight}'
|
|
174
|
+
else:
|
|
175
|
+
return f'({instance_str})'
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
class DatasetBuilder(Sequence[Record]):
|
|
179
|
+
"""
|
|
180
|
+
A dataset builder can be used for making a hard or soft dataset, incrementally growing
|
|
181
|
+
the dataset as needed. This represents a flexible but inefficient interim representation of data.
|
|
182
|
+
"""
|
|
183
|
+
|
|
184
|
+
def __init__(self, rvs: Iterable[RandomVariable] = ()):
|
|
185
|
+
"""
|
|
186
|
+
Args:
|
|
187
|
+
rvs: Optional random variables to include in the dataset. Default is no random variables.
|
|
188
|
+
"""
|
|
189
|
+
self._rvs: Tuple[RandomVariable, ...] = ()
|
|
190
|
+
self._rvs_index: Dict[RandomVariable, int] = {}
|
|
191
|
+
self._records: List[Record] = []
|
|
192
|
+
self.new_column(*rvs)
|
|
193
|
+
|
|
194
|
+
@property
|
|
195
|
+
def rvs(self) -> Sequence[RandomVariable]:
|
|
196
|
+
return self._rvs
|
|
197
|
+
|
|
198
|
+
def new_column(self, *rv: RandomVariable) -> None:
|
|
199
|
+
"""
|
|
200
|
+
Adds one, or more, new random variables to the dataset. For existing rows,
|
|
201
|
+
value for the new random variable will be `None`.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
rv: a new random variable to include in the dataset.
|
|
205
|
+
|
|
206
|
+
Raises:
|
|
207
|
+
ValueError: if the given random variable already exists in the dataset.
|
|
208
|
+
"""
|
|
209
|
+
# Do all consistency checks first to fail early, before modifying the dataset.
|
|
210
|
+
rvs_to_add: Set[RandomVariable] = set(rv)
|
|
211
|
+
if len(rvs_to_add) != len(rv):
|
|
212
|
+
raise ValueError(f'request to add a column includes duplicates')
|
|
213
|
+
duplicate_rvs: Set[RandomVariable] = rvs_to_add.intersection(self._rvs_index.keys())
|
|
214
|
+
if len(duplicate_rvs) > 0:
|
|
215
|
+
duplicate_rv_names = ', '.join(rv.name for rv in duplicate_rvs)
|
|
216
|
+
raise ValueError(f'column already exists in the dataset: {duplicate_rv_names}')
|
|
217
|
+
|
|
218
|
+
for rv in rvs_to_add:
|
|
219
|
+
self._rvs_index[rv] = len(self._rvs)
|
|
220
|
+
self._rvs += (rv,)
|
|
221
|
+
|
|
222
|
+
def ensure_column(self, *rv: RandomVariable) -> None:
|
|
223
|
+
"""
|
|
224
|
+
Add a column for one, or more, random variables, only
|
|
225
|
+
adding a random variable if it is not already present in the dataset.
|
|
226
|
+
"""
|
|
227
|
+
all_rvs = self._rvs_index.keys()
|
|
228
|
+
self.new_column(*(_rv for _rv in rv if _rv not in all_rvs))
|
|
229
|
+
|
|
230
|
+
def del_column(self, *rv: RandomVariable) -> None:
|
|
231
|
+
"""
|
|
232
|
+
Delete one, or more, random variables from the dataset.
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
rv: a random variable to remove from the dataset.
|
|
236
|
+
|
|
237
|
+
Raises:
|
|
238
|
+
ValueError: if the given random variable does not exist in the dataset.
|
|
239
|
+
"""
|
|
240
|
+
# Do all consistency checks first to fail early, before modifying the dataset.
|
|
241
|
+
rvs_to_del: Set[RandomVariable] = set(rv)
|
|
242
|
+
if len(rvs_to_del) != len(rv):
|
|
243
|
+
raise ValueError(f'request to delete a column includes duplicates')
|
|
244
|
+
missing_columns = rvs_to_del.difference(self._rvs_index.keys())
|
|
245
|
+
if len(missing_columns) > 0:
|
|
246
|
+
missing_rv_names = ', '.join(rv.name for rv in missing_columns)
|
|
247
|
+
raise ValueError(f'missing columns: {missing_rv_names}')
|
|
248
|
+
|
|
249
|
+
# Get column indices to remove, in descending order
|
|
250
|
+
indices = sorted((self._rvs_index[rv] for rv in rvs_to_del), reverse=True)
|
|
251
|
+
|
|
252
|
+
# Remove from the index
|
|
253
|
+
for rv in rvs_to_del:
|
|
254
|
+
self._rvs_index.pop(rv)
|
|
255
|
+
|
|
256
|
+
# Remove from column sequence
|
|
257
|
+
rvs_list: List[RandomVariable] = list(self._rvs)
|
|
258
|
+
for i in indices:
|
|
259
|
+
rvs_list.pop(i)
|
|
260
|
+
self._rvs = tuple(rvs_list)
|
|
261
|
+
|
|
262
|
+
# Remove from records
|
|
263
|
+
for record in self._records:
|
|
264
|
+
# noinspection PyProtectedMember
|
|
265
|
+
record_values: List[Value] = record._values
|
|
266
|
+
for i in indices:
|
|
267
|
+
if i < len(record_values):
|
|
268
|
+
record_values.pop(i)
|
|
269
|
+
|
|
270
|
+
def total_weight(self) -> float:
|
|
271
|
+
"""
|
|
272
|
+
Calculate the total weight of this dataset.
|
|
273
|
+
"""
|
|
274
|
+
return sum(record.weight for record in self._records)
|
|
275
|
+
|
|
276
|
+
def get_weights(self) -> NDArrayFloat64:
|
|
277
|
+
"""
|
|
278
|
+
Allocate and return a 1D numpy array of instance weights.
|
|
279
|
+
|
|
280
|
+
Ensures:
|
|
281
|
+
shape of the result == `(len(self), )`.
|
|
282
|
+
"""
|
|
283
|
+
result: NDArrayStates = np.fromiter(
|
|
284
|
+
(record.weight for record in self._records),
|
|
285
|
+
count=len(self._records),
|
|
286
|
+
dtype=np.float64,
|
|
287
|
+
)
|
|
288
|
+
return result
|
|
289
|
+
|
|
290
|
+
def get_column_hard(self, rv: RandomVariable, *, missing: Optional[int] = None) -> NDArrayStates:
|
|
291
|
+
"""
|
|
292
|
+
Allocate and return a 1D numpy array of state indexes.
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
rv: a random variable in this dataset.
|
|
296
|
+
missing: the value to use in the result to represent missing values. If not provided,
|
|
297
|
+
then the default missing value is len(rv), which is an invalid state index.
|
|
298
|
+
|
|
299
|
+
Raises:
|
|
300
|
+
ValueError: if the supplied missing value is negative.
|
|
301
|
+
|
|
302
|
+
Ensures:
|
|
303
|
+
shape of the result == `(len(self), )`.
|
|
304
|
+
"""
|
|
305
|
+
index: int = self._rvs_index[rv]
|
|
306
|
+
if missing is None:
|
|
307
|
+
missing = len(rv)
|
|
308
|
+
if missing < 0:
|
|
309
|
+
raise ValueError(f'missing value must be >= 0')
|
|
310
|
+
number_of_states = max(len(rv), missing + 1)
|
|
311
|
+
dtype: DTypeStates = dtype_for_number_of_states(number_of_states)
|
|
312
|
+
result: NDArrayStates = np.fromiter(
|
|
313
|
+
(_get_state(record[index], missing) for record in self._records),
|
|
314
|
+
count=len(self._records),
|
|
315
|
+
dtype=dtype,
|
|
316
|
+
)
|
|
317
|
+
return result
|
|
318
|
+
|
|
319
|
+
def get_column_soft(self, rv: RandomVariable, *, missing: float | Sequence[float] = np.nan) -> NDArrayFloat64:
|
|
320
|
+
"""
|
|
321
|
+
Allocate and return a numpy array of state weights.
|
|
322
|
+
|
|
323
|
+
Args:
|
|
324
|
+
rv: a random variable in this dataset.
|
|
325
|
+
missing: the value to use in the result to represent missing values. Default is all NaN.
|
|
326
|
+
|
|
327
|
+
Ensures:
|
|
328
|
+
shape of the result == `(len(self), len(rv))`.
|
|
329
|
+
"""
|
|
330
|
+
index: int = self._rvs_index[rv]
|
|
331
|
+
size: int = len(rv)
|
|
332
|
+
|
|
333
|
+
if isinstance(missing, (float, int)):
|
|
334
|
+
missing_weights: NDArrayFloat64 = np.array([missing] * size, dtype=np.float64)
|
|
335
|
+
else:
|
|
336
|
+
missing_weights: NDArrayFloat64 = np.array(missing, dtype=np.float64)
|
|
337
|
+
if missing_weights.shape != (size,):
|
|
338
|
+
raise ValueError(f'missing weights shape expected {(size,)}, but got {missing_weights.shape}')
|
|
339
|
+
|
|
340
|
+
result: NDArrayFloat64 = np.empty(shape=(len(self._records), size), dtype=np.float64)
|
|
341
|
+
for i, record in enumerate(self._records):
|
|
342
|
+
result[i, :] = _get_state_weights(size, record[index], missing_weights)
|
|
343
|
+
return result
|
|
344
|
+
|
|
345
|
+
def append(self, *values: Value) -> Record:
|
|
346
|
+
"""
|
|
347
|
+
Appends a new record to the dataset.
|
|
348
|
+
|
|
349
|
+
Args:
|
|
350
|
+
values: the new record to append. If omitted, a new record will be created
|
|
351
|
+
with all values missing (`None`).
|
|
352
|
+
|
|
353
|
+
Returns:
|
|
354
|
+
the new record.
|
|
355
|
+
"""
|
|
356
|
+
record = Record(self, values)
|
|
357
|
+
self._records.append(record)
|
|
358
|
+
return record
|
|
359
|
+
|
|
360
|
+
def insert(self, index: int, values: Optional[Iterable[Value]] = None) -> Record:
|
|
361
|
+
"""
|
|
362
|
+
Inserts a new record to the dataset at the given index.
|
|
363
|
+
|
|
364
|
+
Args:
|
|
365
|
+
index: where to insert the record (interpreted as per builtin `list.insert`).
|
|
366
|
+
values: the new record to append. If omitted, a new record will be created
|
|
367
|
+
with all values missing (`None`).
|
|
368
|
+
|
|
369
|
+
Returns:
|
|
370
|
+
the new record.
|
|
371
|
+
"""
|
|
372
|
+
record = Record(self, values)
|
|
373
|
+
self._records.insert(index, record)
|
|
374
|
+
return record
|
|
375
|
+
|
|
376
|
+
def append_dataset(self, dataset: HardDataset | SoftDataset) -> None:
|
|
377
|
+
"""
|
|
378
|
+
Append all the records of the given dataset to this dataset builder.
|
|
379
|
+
|
|
380
|
+
Args:
|
|
381
|
+
dataset: the dataset of records to append.
|
|
382
|
+
|
|
383
|
+
Raises:
|
|
384
|
+
KeyError: if `dataset.rvs` is not a superset of `this.rvs`.
|
|
385
|
+
"""
|
|
386
|
+
if isinstance(dataset, HardDataset):
|
|
387
|
+
cols: Tuple = tuple(dataset.state_idxs(rv).tolist() for rv in self.rvs)
|
|
388
|
+
elif isinstance(dataset, SoftDataset):
|
|
389
|
+
cols: Tuple = tuple(dataset.state_weights(rv) for rv in self.rvs)
|
|
390
|
+
else:
|
|
391
|
+
assert_never('not reached')
|
|
392
|
+
weights: NDArrayNumeric = dataset.weights
|
|
393
|
+
for weight, vals in zip(weights, zip(*cols)):
|
|
394
|
+
self.append(*vals).weight = weight
|
|
395
|
+
|
|
396
|
+
@overload
|
|
397
|
+
def __getitem__(self, index: int) -> Record:
|
|
398
|
+
...
|
|
399
|
+
|
|
400
|
+
@overload
|
|
401
|
+
def __getitem__(self, index: slice) -> MutableSequence[Record]:
|
|
402
|
+
...
|
|
403
|
+
|
|
404
|
+
def __getitem__(self, index):
|
|
405
|
+
return self._records[index]
|
|
406
|
+
|
|
407
|
+
def __delitem__(self, index: int | slice) -> None:
|
|
408
|
+
del self._records[index]
|
|
409
|
+
|
|
410
|
+
def __len__(self) -> int:
|
|
411
|
+
return len(self._records)
|
|
412
|
+
|
|
413
|
+
def dump(
|
|
414
|
+
self,
|
|
415
|
+
*,
|
|
416
|
+
show_rvs: bool = True,
|
|
417
|
+
show_weights: bool = True,
|
|
418
|
+
as_states: bool = False,
|
|
419
|
+
missing: str = 'None',
|
|
420
|
+
sep: str = ', ',
|
|
421
|
+
) -> None:
|
|
422
|
+
"""
|
|
423
|
+
Dump the dataset in a human-readable format.
|
|
424
|
+
If as_states is true, then hard values states are dumped instead of just state indexes.
|
|
425
|
+
|
|
426
|
+
Args:
|
|
427
|
+
show_rvs: If `True`, the random variables are dumped.
|
|
428
|
+
show_weights: If `True`, the instance weights are dumped.
|
|
429
|
+
as_states: If `True`, the states are dumped instead of just state indexes.
|
|
430
|
+
missing: the string to use for missing values.
|
|
431
|
+
sep: the string to use for separating values.
|
|
432
|
+
"""
|
|
433
|
+
if show_rvs:
|
|
434
|
+
rvs = ', '.join(str(rv) for rv in self.rvs)
|
|
435
|
+
print(f'rvs: [{rvs}]')
|
|
436
|
+
print(f'instances ({len(self)}, with total weight {self.total_weight()}):')
|
|
437
|
+
for record in self._records:
|
|
438
|
+
print(record.to_str(show_weight=show_weights, as_states=as_states, missing=missing, sep=sep))
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
def hard_dataset_from_builder(dataset_builder: DatasetBuilder, *, missing: Optional[int] = None) -> HardDataset:
|
|
442
|
+
"""
|
|
443
|
+
Create a hard dataset from a soft dataset by repeated application
|
|
444
|
+
of `HardDataset.add_rv_from_state_idxs`.
|
|
445
|
+
|
|
446
|
+
The instance weights of the returned dataset will be a copy
|
|
447
|
+
of the instance weights of the soft dataset.
|
|
448
|
+
|
|
449
|
+
No adjustments are made to the resulting dataset weights, even if
|
|
450
|
+
a value in the dataset builder is soft evidence that does not sum to
|
|
451
|
+
one.
|
|
452
|
+
|
|
453
|
+
Args:
|
|
454
|
+
dataset_builder: The dataset builder providing random variables,
|
|
455
|
+
their states, and instance weights.
|
|
456
|
+
missing: the value to use in the result to represent missing values. If not provided,
|
|
457
|
+
then the default missing value is len(rv) for each rv, which is an invalid state index.
|
|
458
|
+
|
|
459
|
+
Returns:
|
|
460
|
+
A `HardDataset` instance.
|
|
461
|
+
"""
|
|
462
|
+
dataset = HardDataset(weights=dataset_builder.get_weights())
|
|
463
|
+
for rv in dataset_builder.rvs:
|
|
464
|
+
dataset.add_rv_from_state_idxs(rv, dataset_builder.get_column_hard(rv, missing=missing))
|
|
465
|
+
return dataset
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
def soft_dataset_from_builder(
|
|
469
|
+
dataset_builder: DatasetBuilder,
|
|
470
|
+
*,
|
|
471
|
+
missing: float | Sequence[float] = np.nan,
|
|
472
|
+
) -> SoftDataset:
|
|
473
|
+
"""
|
|
474
|
+
Create a soft dataset from a hard dataset by repeated application
|
|
475
|
+
of `SoftDataset.add_rv_from_state_idxs`.
|
|
476
|
+
|
|
477
|
+
The instance weights of the returned dataset will be a copy
|
|
478
|
+
of the instance weights of the hard dataset.
|
|
479
|
+
|
|
480
|
+
Args:
|
|
481
|
+
dataset_builder: The dataset builder providing random variables,
|
|
482
|
+
their state weights, and instance weights.
|
|
483
|
+
missing: the value to use in the result to represent missing values.
|
|
484
|
+
If a single float is provided, all state weights will have that value. Alternatively,
|
|
485
|
+
a sequence of state weights can be provided, but all random variables will need
|
|
486
|
+
to be the same size. Default is all state weights set to NaN.
|
|
487
|
+
|
|
488
|
+
Returns:
|
|
489
|
+
A `SoftDataset` instance.
|
|
490
|
+
"""
|
|
491
|
+
dataset = SoftDataset(weights=dataset_builder.get_weights())
|
|
492
|
+
for rv in dataset_builder.rvs:
|
|
493
|
+
dataset.add_rv_from_state_weights(rv, dataset_builder.get_column_soft(rv, missing=missing))
|
|
494
|
+
return dataset
|
|
495
|
+
|
|
496
|
+
|
|
497
|
+
def _get_state(value: Value, missing: int) -> int:
|
|
498
|
+
if value is None:
|
|
499
|
+
return missing
|
|
500
|
+
if isinstance(value, int):
|
|
501
|
+
return value
|
|
502
|
+
return np.argmax(value).item()
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
def _get_state_weights(size: int, value: Value, missing: Sequence[float]) -> Sequence[float]:
|
|
506
|
+
if value is None:
|
|
507
|
+
return missing
|
|
508
|
+
if isinstance(value, int):
|
|
509
|
+
result = np.zeros(size, dtype=np.float64)
|
|
510
|
+
result[value] = 1
|
|
511
|
+
return result
|
|
512
|
+
return value
|
ck/dataset/dataset_from_csv.py
CHANGED
|
@@ -9,7 +9,7 @@ def hard_dataset_from_csv(
|
|
|
9
9
|
lines: Iterable[str],
|
|
10
10
|
*,
|
|
11
11
|
weights: Optional[int | str] = None,
|
|
12
|
-
sep: str = ',',
|
|
12
|
+
sep: Optional[str] = ',',
|
|
13
13
|
comment: str = '#',
|
|
14
14
|
) -> HardDataset:
|
|
15
15
|
"""
|
|
@@ -30,6 +30,8 @@ def hard_dataset_from_csv(
|
|
|
30
30
|
of random variables. There must be a column name in the header to match
|
|
31
31
|
each name of the given random variables. Additional columns will be ignored.
|
|
32
32
|
|
|
33
|
+
Leading and trailing whitespace is ignored for each field, including header column names.
|
|
34
|
+
|
|
33
35
|
As text file (and StringIO) objects are iterable over lines, here is how to read a csv file:
|
|
34
36
|
```
|
|
35
37
|
with open(csv_filename, 'r') as file:
|
|
@@ -46,6 +48,8 @@ def hard_dataset_from_csv(
|
|
|
46
48
|
weights: the column in the csv file holding instance weights. Can be either the
|
|
47
49
|
column number (counting from zero) or a column name (requires a header line).
|
|
48
50
|
sep: the string to use to separate values in a line, default is a comma.
|
|
51
|
+
If set to `None`, lines will be split on any consecutive run of whitespace characters
|
|
52
|
+
(including \n \r \t \f and spaces).
|
|
49
53
|
comment: text starting with this will be treated as a comment. Set to '' to disallow comments.
|
|
50
54
|
|
|
51
55
|
Returns:
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
"/O2"
|
|
14
14
|
],
|
|
15
15
|
"include_dirs": [
|
|
16
|
-
"C:\\Users\\runneradmin\\AppData\\Local\\Temp\\build-env-
|
|
16
|
+
"C:\\Users\\runneradmin\\AppData\\Local\\Temp\\build-env-zvpv36cx\\Lib\\site-packages\\numpy\\_core\\include"
|
|
17
17
|
],
|
|
18
18
|
"name": "ck.pgm_compiler.support.circuit_table._circuit_table_cy",
|
|
19
19
|
"sources": [
|
|
Binary file
|
|
File without changes
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from ck.dataset import HardDataset, SoftDataset
|
|
2
|
+
from ck.dataset.dataset_builder import DatasetBuilder, soft_dataset_from_builder, hard_dataset_from_builder
|
|
3
|
+
from ck.pgm import PGM
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def main() -> None:
|
|
7
|
+
pgm = PGM()
|
|
8
|
+
x = pgm.new_rv('x', (True, False))
|
|
9
|
+
y = pgm.new_rv('y', ('yes', 'no', 'maybe'))
|
|
10
|
+
|
|
11
|
+
builder = DatasetBuilder([x, y])
|
|
12
|
+
builder.append()
|
|
13
|
+
builder.append(1, 2).weight = 3
|
|
14
|
+
builder.append(None, [0.7, 0.1, 0.2])
|
|
15
|
+
builder.append().set_states(True, 'maybe')
|
|
16
|
+
|
|
17
|
+
print('DatasetBuilder dump')
|
|
18
|
+
builder.dump()
|
|
19
|
+
print()
|
|
20
|
+
|
|
21
|
+
print('DatasetBuilder dump, showing states and custom missing values')
|
|
22
|
+
builder.dump(as_states=True, missing='?')
|
|
23
|
+
print()
|
|
24
|
+
|
|
25
|
+
print('HardDataset dump')
|
|
26
|
+
dataset: HardDataset = hard_dataset_from_builder(builder, missing=99)
|
|
27
|
+
dataset.dump()
|
|
28
|
+
print()
|
|
29
|
+
|
|
30
|
+
print('SoftDataset dump')
|
|
31
|
+
dataset: SoftDataset = soft_dataset_from_builder(builder)
|
|
32
|
+
dataset.dump()
|
|
33
|
+
print()
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
if __name__ == '__main__':
|
|
37
|
+
main()
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
ck/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
ck/pgm.py,sha256=DXI3RZWBlk6wNj9VlAEDGeEUQ1WIpQwojVAt231K-Co,121095
|
|
3
3
|
ck/circuit/__init__.py,sha256=klUR7OVESf53-8Ho4f32clHFsR2SOz4XgwZzfDlms88,418
|
|
4
|
-
ck/circuit/_circuit_cy.c,sha256=
|
|
5
|
-
ck/circuit/_circuit_cy.cp312-win_amd64.pyd,sha256=
|
|
4
|
+
ck/circuit/_circuit_cy.c,sha256=bEZxHMg02hky24K05HBdsLCHl8n-z6btIx5v-3q6nBE,1741715
|
|
5
|
+
ck/circuit/_circuit_cy.cp312-win_amd64.pyd,sha256=k1pLiEcGqusMBordd72JgCMf2MYSEdrgrSg6TEofuYA,242688
|
|
6
6
|
ck/circuit/_circuit_cy.pxd,sha256=F1WU4KuX_knXQX-hwNKoHsoUK3fJLbLOxEnomWMJFpI,1057
|
|
7
7
|
ck/circuit/_circuit_cy.pyx,sha256=TIjqsdyN_IzOm9XQw26kEURpL6GSL1kJO3K-UUlkbQc,27763
|
|
8
8
|
ck/circuit/_circuit_py.py,sha256=gQZoEphxut2UyBL0ZqmNc8KlNBSMST_VOCqOpDMIRSM,28331
|
|
@@ -14,25 +14,26 @@ ck/circuit_compiler/llvm_compiler.py,sha256=6RHUCVWCOgt3ZNyjRTl2Z2npYJMWyAFJVAIc
|
|
|
14
14
|
ck/circuit_compiler/llvm_vm_compiler.py,sha256=XJhiAZmGMRRz49XUfng9lgETxVw6NgD6XCI0H3fX-1E,22200
|
|
15
15
|
ck/circuit_compiler/named_circuit_compilers.py,sha256=snlD3JnhAZC-atKpf5GD0v4CGdvj2_ZhCZ3O-tsxzxc,2284
|
|
16
16
|
ck/circuit_compiler/cython_vm_compiler/__init__.py,sha256=pEAwTleuZgdYhTAQMea2f9YsFK54eoNbZSbrWkW8aeE,49
|
|
17
|
-
ck/circuit_compiler/cython_vm_compiler/_compiler.c,sha256=
|
|
18
|
-
ck/circuit_compiler/cython_vm_compiler/_compiler.cp312-win_amd64.pyd,sha256=
|
|
17
|
+
ck/circuit_compiler/cython_vm_compiler/_compiler.c,sha256=HRZKcv1K8sCrRDPMgsbQuZYmmzmxtUjX7c8Y0Pi4aYw,871325
|
|
18
|
+
ck/circuit_compiler/cython_vm_compiler/_compiler.cp312-win_amd64.pyd,sha256=Ie7z7m05E59XoBbyBf2GVNKXgbgYcHR27wMcCpCoals,103936
|
|
19
19
|
ck/circuit_compiler/cython_vm_compiler/_compiler.pyx,sha256=550r0AkOh8ZuuTRy3e6Aq-YqBQ82EKcap8e6f3zlEuM,13278
|
|
20
20
|
ck/circuit_compiler/cython_vm_compiler/cython_vm_compiler.py,sha256=3Q-HCyA7VWFoXS9gn-k4LXedzqHPvdFNvWHfDcKYv6s,4473
|
|
21
21
|
ck/circuit_compiler/support/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
22
22
|
ck/circuit_compiler/support/input_vars.py,sha256=x6krN6sW9A-vZTteY4M4on_0vS4ChaaCcmnXcnQ4y0s,4812
|
|
23
23
|
ck/circuit_compiler/support/llvm_ir_function.py,sha256=07IUmx4bGReDu-BsUhJEWM_onm8NmsHwQzJan1rnAFE,8572
|
|
24
24
|
ck/circuit_compiler/support/circuit_analyser/__init__.py,sha256=RbyIObAAb-w0Ky4fB198xAfxTh2doquN9ez68SZSZgo,536
|
|
25
|
-
ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.c,sha256=
|
|
26
|
-
ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.cp312-win_amd64.pyd,sha256=
|
|
25
|
+
ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.c,sha256=Z_i7EFoOaeztsOJK2iuUPJe6BVUGylcIXyn3WCxFBR0,448751
|
|
26
|
+
ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.cp312-win_amd64.pyd,sha256=p4b31PxmdYPJEMn6eJlOaOV4nbM1KMlL4q-ItwJem2E,54272
|
|
27
27
|
ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.pyx,sha256=ctDOHOjnUhdsR_XHBrgchFVTImLhsEUjU3cqkP-GdF8,3331
|
|
28
28
|
ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_py.py,sha256=eWjc2B946LeSzcGa9PyI4XQvlx_B1KPEiQuM6OOFjjQ,3086
|
|
29
29
|
ck/dataset/__init__.py,sha256=3vxB-b8LdgtoZ73q28sdPYPUbCbXARH-fNGUZcxIzIo,56
|
|
30
|
-
ck/dataset/cross_table.py,sha256=
|
|
30
|
+
ck/dataset/cross_table.py,sha256=D8Y0WpISouAkMnt0j8bs4Csw_MhxT6h4eUucaQQOkmw,9745
|
|
31
31
|
ck/dataset/cross_table_probabilities.py,sha256=U7LlmWUc_Ow6CVEk_6CLK0p2g8GlRpHEGPZFXrV8tmI,1997
|
|
32
|
-
ck/dataset/dataset.py,sha256=
|
|
32
|
+
ck/dataset/dataset.py,sha256=fGxjEqv9qfdEq8s1ph8B_ptFftpf1wddVgYH8jDNtr0,22746
|
|
33
|
+
ck/dataset/dataset_builder.py,sha256=e9fNbvDl1HhM49DxvvBtXWOHpvPPocBsTTzUsCpkR_A,18912
|
|
33
34
|
ck/dataset/dataset_compute.py,sha256=B1BxTJ-JAsNk8l79iDRmJAQ5QwEvWgsI1wepRg4aQCQ,5845
|
|
34
35
|
ck/dataset/dataset_from_crosstable.py,sha256=0I4jr4PtfBtniTFBHlAOlcl_ZH0NSD37azkcA2CaWSQ,1323
|
|
35
|
-
ck/dataset/dataset_from_csv.py,sha256=
|
|
36
|
+
ck/dataset/dataset_from_csv.py,sha256=Nn2RzZP_fhag2UaXzv27YEnKSo6oogKEVGEEafQuB6U,5953
|
|
36
37
|
ck/dataset/sampled_dataset.py,sha256=8PfiTXqraSQ5rXpFUFFMN13jQL1x--_z9g6FYs2VW1U,3357
|
|
37
38
|
ck/example/__init__.py,sha256=BXVxvTcl3tqgai-xJiGQIaG_ChlpPRdfWg985MQ7dwM,1744
|
|
38
39
|
ck/example/alarm.py,sha256=QkHoUb6MxZzCOCX4nLE8UJazNEqAPqrFWQ01lslvdsk,25274
|
|
@@ -101,8 +102,8 @@ ck/pgm_compiler/support/factor_tables.py,sha256=tV9qE2zC8iwEQxTuXE6qiE6lmMpz4-Vc
|
|
|
101
102
|
ck/pgm_compiler/support/join_tree.py,sha256=Chkyyo--ChgWEsDqTh8RCxPN7Z1NyvL--GjTC4ONvkY,12897
|
|
102
103
|
ck/pgm_compiler/support/named_compiler_maker.py,sha256=g2MLnlkWXkISHL6dh23EY53SptTo7-itfuZogSpMdB8,1420
|
|
103
104
|
ck/pgm_compiler/support/circuit_table/__init__.py,sha256=yJ05NvuNE9j0E_QnjDzHYfLqcHn5TdOleEpG3wSRgXQ,579
|
|
104
|
-
ck/pgm_compiler/support/circuit_table/_circuit_table_cy.c,sha256=
|
|
105
|
-
ck/pgm_compiler/support/circuit_table/_circuit_table_cy.cp312-win_amd64.pyd,sha256=
|
|
105
|
+
ck/pgm_compiler/support/circuit_table/_circuit_table_cy.c,sha256=5TqAN98gLTZ3inQFBc_b6xpJJjjs2RJsbJ3loHHybho,730350
|
|
106
|
+
ck/pgm_compiler/support/circuit_table/_circuit_table_cy.cp312-win_amd64.pyd,sha256=r97wGUlqYT8oLecHbdPLUKnyGMgct6uPaGoypx1xIIA,97280
|
|
106
107
|
ck/pgm_compiler/support/circuit_table/_circuit_table_cy.pyx,sha256=rVO1yxjZmZ6yv5s0zKq4Ji9WYrDuYTZsRG_zeF1_1xE,12015
|
|
107
108
|
ck/pgm_compiler/support/circuit_table/_circuit_table_py.py,sha256=h6xPYGBSy6XHQBFLPD2D1-V7Kiw9utY68nWrcGRMEg4,11287
|
|
108
109
|
ck/probability/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -143,6 +144,8 @@ ck_demos/circuit/demo_derivatives.py,sha256=3JoWVAEKLEoLjq6QzWkq4Z-qVq1l0tHvGDn5
|
|
|
143
144
|
ck_demos/circuit_compiler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
144
145
|
ck_demos/circuit_compiler/compare_circuit_compilers.py,sha256=IEzwvKt6c8wrmAyd6F0sUaNaWYEx1BBFQhRyDt7cibI,756
|
|
145
146
|
ck_demos/circuit_compiler/show_llvm_program.py,sha256=HKUuyLfBjH6ZgD8l4gQWVSBPUh55ZCXjPa7ZEdm5OyU,712
|
|
147
|
+
ck_demos/dataset/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
148
|
+
ck_demos/dataset/demo_dataset_builder.py,sha256=1OycYIr0C_3NCn0SLNoHftjStnRrGk_f0yNlckD6nh4,1024
|
|
146
149
|
ck_demos/getting_started/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
147
150
|
ck_demos/getting_started/simple_demo.py,sha256=AR40OtUVd-CJOxFlsu8_RtGLL2LLnZg506SzrIx7OQA,668
|
|
148
151
|
ck_demos/pgm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -185,8 +188,8 @@ ck_demos/utils/compare.py,sha256=Bwjpflevl4nusfA0zp96rInaVKFGuhC5Xv7HzA1Fobk,508
|
|
|
185
188
|
ck_demos/utils/convert_network.py,sha256=TSKj8q7L7J5rhrvwjaDkdYZ0Sg8vV5FRL_vCanX1CQw,1363
|
|
186
189
|
ck_demos/utils/sample_model.py,sha256=in-Nlv-iuNIu6y9fDuMyo7nzgimBuTAnCWcpnVqvqDQ,8839
|
|
187
190
|
ck_demos/utils/stop_watch.py,sha256=VzXHRWx0V8vPSD-bLgLlEYkCkR2FA0-KmM_pfKx-Pxo,13205
|
|
188
|
-
compiled_knowledge-4.1.
|
|
189
|
-
compiled_knowledge-4.1.
|
|
190
|
-
compiled_knowledge-4.1.
|
|
191
|
-
compiled_knowledge-4.1.
|
|
192
|
-
compiled_knowledge-4.1.
|
|
191
|
+
compiled_knowledge-4.1.0a2.dist-info/licenses/LICENSE.txt,sha256=uMYx7tmroEKNASizbCOwPveMQsD5UErLDC1_SANmNn8,1089
|
|
192
|
+
compiled_knowledge-4.1.0a2.dist-info/METADATA,sha256=41mdCLKoTBDANacEu1V3Al9pup0b_6HjQ-DvBO4WUuk,1837
|
|
193
|
+
compiled_knowledge-4.1.0a2.dist-info/WHEEL,sha256=8UP9x9puWI0P1V_d7K2oMTBqfeLNm21CTzZ_Ptr0NXU,101
|
|
194
|
+
compiled_knowledge-4.1.0a2.dist-info/top_level.txt,sha256=Cf8DAfd2vcnLiA7HlxoduOzV0Q-8surE3kzX8P9qdks,12
|
|
195
|
+
compiled_knowledge-4.1.0a2.dist-info/RECORD,,
|
|
File without changes
|