compiled-knowledge 4.0.0a25__cp312-cp312-macosx_11_0_arm64.whl → 4.1.0a2__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of compiled-knowledge might be problematic. Click here for more details.

Files changed (31) hide show
  1. ck/circuit/_circuit_cy.c +1 -1
  2. ck/circuit/_circuit_cy.cpython-312-darwin.so +0 -0
  3. ck/circuit_compiler/cython_vm_compiler/_compiler.c +152 -152
  4. ck/circuit_compiler/cython_vm_compiler/_compiler.cpython-312-darwin.so +0 -0
  5. ck/circuit_compiler/interpret_compiler.py +2 -2
  6. ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.c +1 -1
  7. ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.cpython-312-darwin.so +0 -0
  8. ck/dataset/__init__.py +1 -0
  9. ck/dataset/cross_table.py +270 -0
  10. ck/dataset/cross_table_probabilities.py +53 -0
  11. ck/dataset/dataset.py +594 -0
  12. ck/dataset/dataset_builder.py +512 -0
  13. ck/dataset/dataset_compute.py +140 -0
  14. ck/dataset/dataset_from_crosstable.py +45 -0
  15. ck/dataset/dataset_from_csv.py +151 -0
  16. ck/dataset/sampled_dataset.py +96 -0
  17. ck/learning/__init__.py +0 -0
  18. ck/learning/train_generative.py +149 -0
  19. ck/pgm.py +29 -27
  20. ck/pgm_circuit/program_with_slotmap.py +23 -45
  21. ck/pgm_circuit/support/compile_circuit.py +2 -4
  22. ck/pgm_compiler/support/circuit_table/_circuit_table_cy.c +1 -1
  23. ck/pgm_compiler/support/circuit_table/_circuit_table_cy.cpython-312-darwin.so +0 -0
  24. ck/probability/empirical_probability_space.py +1 -0
  25. ck_demos/dataset/__init__.py +0 -0
  26. ck_demos/dataset/demo_dataset_builder.py +37 -0
  27. {compiled_knowledge-4.0.0a25.dist-info → compiled_knowledge-4.1.0a2.dist-info}/METADATA +1 -1
  28. {compiled_knowledge-4.0.0a25.dist-info → compiled_knowledge-4.1.0a2.dist-info}/RECORD +31 -18
  29. {compiled_knowledge-4.0.0a25.dist-info → compiled_knowledge-4.1.0a2.dist-info}/WHEEL +0 -0
  30. {compiled_knowledge-4.0.0a25.dist-info → compiled_knowledge-4.1.0a2.dist-info}/licenses/LICENSE.txt +0 -0
  31. {compiled_knowledge-4.0.0a25.dist-info → compiled_knowledge-4.1.0a2.dist-info}/top_level.txt +0 -0
ck/dataset/dataset.py ADDED
@@ -0,0 +1,594 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Sequence, Optional, Dict, Iterable, Tuple
4
+
5
+ import numpy as np
6
+
7
+ from ck.pgm import RandomVariable, State
8
+ from ck.utils.np_extras import DTypeStates, dtype_for_number_of_states, NDArrayNumeric, NDArrayStates
9
+
10
+
11
+ class Dataset:
12
+ """
13
+ A dataset has instances (rows) for zero or more random variables.
14
+ Each instance has a weight, which is notionally one.
15
+ Weights of instances should be non-negative, and are normally positive.
16
+ """
17
+
18
+ def __init__(
19
+ self,
20
+ weights: Optional[NDArrayNumeric | Sequence],
21
+ length: Optional[int],
22
+ ):
23
+ # Infer the length of the dataset.
24
+ if length is not None:
25
+ self._length: int = length
26
+ else:
27
+ self._length: int = len(weights)
28
+
29
+ # Set no random variables
30
+ self._rvs: Tuple[RandomVariable, ...] = ()
31
+
32
+ # Set the weights array, and confirm its shape
33
+ self._weights: NDArrayNumeric
34
+ if weights is None:
35
+ weights = np.ones(self._length)
36
+ elif not isinstance(weights, np.ndarray):
37
+ weights = np.array(weights, dtype=np.float64)
38
+ expected_shape = (self._length,)
39
+ if weights.shape != expected_shape:
40
+ raise ValueError(f'weights expected shape {expected_shape}, got {weights.shape}')
41
+ # if not isinstance(weights.dtype, NDArrayNumeric):
42
+ # raise ValueError(f'weights expected numeric dtype, got {weights.dtype}')
43
+
44
+ self._weights = weights
45
+
46
+ def __len__(self) -> int:
47
+ """
48
+ How many instances in the dataset.
49
+ """
50
+ return self._length
51
+
52
+ @property
53
+ def rvs(self) -> Sequence[RandomVariable]:
54
+ """
55
+ Return the random variables covered by this dataset.
56
+ """
57
+ return self._rvs
58
+
59
+ @property
60
+ def weights(self) -> NDArrayNumeric:
61
+ """
62
+ Get the instance weights.
63
+ The notional weight of an instance is 1.
64
+ The index into the returned array is the instance index.
65
+
66
+ Returns:
67
+ A 1D array of random variable states, with shape = `(len(self), )`.
68
+ """
69
+ return self._weights
70
+
71
+ def total_weight(self) -> float:
72
+ """
73
+ Calculate the total weight of this dataset.
74
+ """
75
+ return self._weights.sum().item()
76
+
77
+ def _add_rv(self, rv: RandomVariable) -> None:
78
+ """
79
+ Add a random variable to self.rvs.
80
+ """
81
+ self._rvs += (rv,)
82
+
83
+ def _remove_rv(self, rv: RandomVariable) -> None:
84
+ """
85
+ Remove a random variable from self.rvs.
86
+ """
87
+ rvs = self._rvs
88
+ i: int = self._rvs.index(rv)
89
+ self._rvs = rvs[:i] + rvs[i + 1:]
90
+
91
+
92
+ class HardDataset(Dataset):
93
+ """
94
+ A hard dataset is a dataset where for each instance (row) and each random variable,
95
+ there is a state for that random variable (a state is represented as a state index).
96
+ Each instance has a weight, which is notionally one.
97
+ """
98
+
99
+ @staticmethod
100
+ def from_soft_dataset(
101
+ soft_dataset: SoftDataset,
102
+ *,
103
+ adjust_instance_weights: bool = True,
104
+ ) -> HardDataset:
105
+ """
106
+ Create a hard dataset from a soft dataset by repeated application
107
+ of `SoftDataset.add_rv_from_state_weights`.
108
+
109
+ The instance weights of the returned dataset will be a copy
110
+ of the instance weights of the soft dataset.
111
+
112
+ Args:
113
+ soft_dataset: The soft dataset providing random variables,
114
+ their states, and instance weights.
115
+ adjust_instance_weights: If `True` (default), then the instance weights will be
116
+ adjusted according to sum of state weights for each instance. That is, if
117
+ the sum is not one for some instance, then the weight of that instance will
118
+ be adjusted.
119
+
120
+ Returns:
121
+ A `HardDataset` instance.
122
+ """
123
+ dataset = HardDataset(weights=soft_dataset.weights.copy())
124
+ for rv in soft_dataset.rvs:
125
+ dataset.add_rv_from_state_weights(rv, soft_dataset.state_weights(rv), adjust_instance_weights)
126
+ return dataset
127
+
128
+ def __init__(
129
+ self,
130
+ data: Iterable[Tuple[RandomVariable, NDArrayStates | Sequence[int]]] = (),
131
+ *,
132
+ weights: Optional[NDArrayNumeric | Sequence[float | int]] = None,
133
+ length: Optional[int] = None,
134
+ ):
135
+ """
136
+ Create a hard dataset.
137
+
138
+ When `weights` is a numpy array, then the dataset will directly reference the given array.
139
+ When `data` contains a numpy array, then the dataset will directly reference the given array.
140
+
141
+ Args:
142
+ data: optional iterable of (random variable, state idxs), passed
143
+ to `self.add_rv_from_state_idxs`.
144
+ weights: optional array of instance weights.
145
+ length: optional length of the dataset, if omitted, the length is inferred.
146
+ """
147
+ self._data: Dict[RandomVariable, NDArrayStates] = {}
148
+
149
+ # Initialise super by either weights, length or first data item.
150
+ super_initialised: bool = False
151
+ if weights is not None or length is not None:
152
+ super().__init__(weights, length)
153
+ super_initialised = True
154
+
155
+ for rv, states in data:
156
+ if not super_initialised:
157
+ super().__init__(weights, len(states))
158
+ super_initialised = True
159
+ self.add_rv_from_state_idxs(rv, states)
160
+
161
+ if not super_initialised:
162
+ super().__init__(weights, 0)
163
+
164
+ def state_idxs(self, rv: RandomVariable) -> NDArrayStates:
165
+ """
166
+ Get the state indexes for one random variable.
167
+ The index into the returned array is the instance index.
168
+
169
+ Returns:
170
+ A 1D array of random variable states, with shape = `(len(self), )`.
171
+
172
+ Raises:
173
+ KeyError: If the random variable is not in the dataset.
174
+ """
175
+ return self._data[rv]
176
+
177
+ def add_rv(self, rv: RandomVariable) -> NDArrayStates:
178
+ """
179
+ Add a random variable to the dataset, allocating and returning
180
+ the state indices for the random variable.
181
+
182
+ Args:
183
+ rv: The random variable to add.
184
+
185
+ Returns:
186
+ A 1D array of random variable states, with shape = `(len(self), )`, initialised to zero.
187
+
188
+ Raises:
189
+ ValueError: If the random variable is already in the dataset.
190
+ """
191
+ dtype: DTypeStates = dtype_for_number_of_states(len(rv))
192
+ rv_data = np.zeros(len(self), dtype=dtype)
193
+ return self.add_rv_from_state_idxs(rv, rv_data)
194
+
195
+ def remove_rv(self, rv: RandomVariable) -> None:
196
+ """
197
+ Remove a random variable from the dataset.
198
+
199
+ Args:
200
+ rv: The random variable to remove.
201
+
202
+ Raises:
203
+ KeyError: If the random variable is not in the dataset.
204
+ """
205
+ del self._data[rv]
206
+ self._remove_rv(rv)
207
+
208
+ def add_rv_from_state_idxs(self, rv: RandomVariable, state_idxs: NDArrayStates | Sequence[int]) -> NDArrayStates:
209
+ """
210
+ Add a random variable to the dataset.
211
+
212
+ When `state_idxs` is a numpy array, then the dataset will directly reference the given array.
213
+
214
+ Args:
215
+ rv: The random variable to add.
216
+ state_idxs: An 1D array of state indexes to add, with shape = `(len(self),)`.
217
+ Each element `state` should be `0 <= state < len(rv)`.
218
+
219
+ Returns:
220
+ A 1D array of random variable states, with shape = `(len(self), )`.
221
+
222
+ Raises:
223
+ ValueError: If the random variable is already in the dataset.
224
+ """
225
+ if rv in self._data.keys():
226
+ raise ValueError(f'data for {rv} already exists in the dataset')
227
+
228
+ if isinstance(state_idxs, np.ndarray):
229
+ expected_shape = (self._length,)
230
+ if state_idxs.shape == expected_shape:
231
+ rv_data = state_idxs
232
+ else:
233
+ raise ValueError(f'data for {rv} expected shape {expected_shape}, got {state_idxs.shape}')
234
+ else:
235
+ dtype: DTypeStates = dtype_for_number_of_states(len(rv))
236
+ if len(state_idxs) != self._length:
237
+ raise ValueError(f'data for {rv} expected length {self._length}, got {len(state_idxs)}')
238
+ rv_data = np.array(state_idxs, dtype=dtype)
239
+
240
+ self._data[rv] = rv_data
241
+ self._add_rv(rv)
242
+ return rv_data
243
+
244
+ def add_rv_from_states(self, rv: RandomVariable, states: Sequence[State]) -> NDArrayStates:
245
+ """
246
+ Add a random variable to the dataset.
247
+
248
+ The dataset will allocate and populate a states array containing state indexes.
249
+ This will call `rv.state_idx(state)` for each state in `states`.
250
+
251
+ Args:
252
+ rv: The random variable to add.
253
+ states: An 1D array of state to add, with `len(states)` = `len(self)`.
254
+ Each element `state` should be in `rv.states`.
255
+
256
+ Returns:
257
+ A 1D array of random variable states, with shape = `(len(self), )`.
258
+
259
+ Raises:
260
+ ValueError: If the random variable is already in the dataset.
261
+ """
262
+ dtype: DTypeStates = dtype_for_number_of_states(len(rv))
263
+ rv_data = np.fromiter(
264
+ iter=(
265
+ rv.state_idx(state)
266
+ for state in states
267
+ ),
268
+ dtype=dtype,
269
+ count=len(states)
270
+ )
271
+ return self.add_rv_from_state_idxs(rv, rv_data)
272
+
273
+ def add_rv_from_state_weights(
274
+ self,
275
+ rv: RandomVariable,
276
+ state_weights: NDArrayNumeric,
277
+ adjust_instance_weights: bool = True,
278
+ ) -> NDArrayStates:
279
+ """
280
+ Add a random variable to the dataset.
281
+
282
+ The dataset will allocate and populate a states array containing state indexes.
283
+ For each instance, the state with the highest weight will be taken to be the
284
+ state of the random variable, with ties broken arbitrarily.
285
+
286
+ Args:
287
+ rv: The random variable to add.
288
+ state_weights: An 2D array of state weights, with shape = `(len(self), len(rv))`.
289
+ Each element `state` should be in `rv.states`.
290
+ adjust_instance_weights: If `True` (default), then the instance weights will be
291
+ adjusted according to sum of state weights for each instance. That is, if
292
+ the sum is not one for some instance, then the weight of that instance will
293
+ be adjusted.
294
+
295
+ Returns:
296
+ A 1D array of random variable states, with shape = `(len(self), )`.
297
+
298
+ Raises:
299
+ ValueError: If the random variable is already in the dataset.
300
+ """
301
+ expected_shape = (self._length, len(rv))
302
+ if state_weights.shape != expected_shape:
303
+ raise ValueError(f'data for {rv} expected shape {expected_shape}, got {state_weights.shape}')
304
+
305
+ dtype: DTypeStates = dtype_for_number_of_states(len(rv))
306
+ rv_data = np.fromiter(
307
+ iter=(
308
+ np.argmax(row)
309
+ for row in state_weights
310
+ ),
311
+ dtype=dtype,
312
+ count=self._length
313
+ )
314
+
315
+ if adjust_instance_weights:
316
+ row: NDArrayNumeric
317
+ for i, row in enumerate(state_weights):
318
+ self._weights[i] *= row.sum()
319
+
320
+ return self.add_rv_from_state_idxs(rv, rv_data)
321
+
322
+ def dump(self, *, show_rvs: bool = True, show_weights: bool = True, as_states: bool = False) -> None:
323
+ """
324
+ Dump the dataset in a human-readable format.
325
+ If as_states is true, then instance states are dumped instead of just state indexes.
326
+
327
+ Args:
328
+ show_rvs: If `True`, the random variables are dumped.
329
+ show_weights: If `True`, the instance weights are dumped.
330
+ as_states: If `True`, the states are dumped instead of just state indexes.
331
+ """
332
+ if show_rvs:
333
+ rvs = ', '.join(str(rv) for rv in self.rvs)
334
+ print(f'rvs: [{rvs}]')
335
+ print(f'instances ({len(self)}, with total weight {self.total_weight()}):')
336
+ cols = [self.state_idxs(rv) for rv in self.rvs]
337
+ for instance, weight in zip(zip(*cols), self.weights):
338
+ if as_states:
339
+ instance_str = ', '.join(repr(rv.states[idx]) for idx, rv in zip(instance, self.rvs))
340
+ else:
341
+ instance_str = ', '.join(str(idx) for idx in instance)
342
+ if show_weights:
343
+ print(f'({instance_str}) * {weight}')
344
+ else:
345
+ print(f'({instance_str})')
346
+
347
+
348
+ class SoftDataset(Dataset):
349
+ """
350
+ A soft dataset is a dataset where for each instance (row) and each random variable,
351
+ there is a distribution over the states of that random variable. That is,
352
+ for each instance, for each indicator, there is a weight. Additionally,
353
+ each instance has a weight.
354
+
355
+ Weights of random variable states are expected to be non-negative.
356
+ Notionally, the sum of weights for an instance and random variable is one.
357
+ """
358
+
359
+ @staticmethod
360
+ def from_hard_dataset(hard_dataset: HardDataset) -> SoftDataset:
361
+ """
362
+ Create a soft dataset from a hard dataset by repeated application
363
+ of `SoftDataset.add_rv_from_state_idxs`.
364
+
365
+ The instance weights of the returned dataset will be a copy
366
+ of the instance weights of the hard dataset.
367
+
368
+ Args:
369
+ hard_dataset: The hard dataset providing random variables,
370
+ their states, and instance weights.
371
+
372
+ Returns:
373
+ A `SoftDataset` instance.
374
+ """
375
+ dataset = SoftDataset(weights=hard_dataset.weights.copy())
376
+ for rv in hard_dataset.rvs:
377
+ dataset.add_rv_from_state_idxs(rv, hard_dataset.state_idxs(rv))
378
+ return dataset
379
+
380
+ def __init__(
381
+ self,
382
+ data: Iterable[Tuple[RandomVariable, NDArrayNumeric | Sequence[Sequence[float]]]] = (),
383
+ *,
384
+ weights: Optional[NDArrayNumeric | Sequence[float | int]] = None,
385
+ length: Optional[int] = None,
386
+ ):
387
+ """
388
+ Create a soft dataset.
389
+
390
+ When `weights` is a numpy array, then the dataset will directly reference the given array.
391
+ When `data` contains a numpy array, then the dataset will directly reference the given array.
392
+
393
+ Args:
394
+ data: optional iterable of (random variable, state weights), passed
395
+ to `self.add_rv_from_state_weights`.
396
+ weights: optional array of instance weights.
397
+ length: optional length of the dataset, if omitted, the length is inferred.
398
+ """
399
+ self._data: Dict[RandomVariable, NDArrayNumeric] = {}
400
+
401
+ # Initialise super by either weights, length or first data item.
402
+ super_initialised: bool = False
403
+ if weights is not None or length is not None:
404
+ super().__init__(weights, length)
405
+ super_initialised = True
406
+
407
+ for rv, states_weights in data:
408
+ if not super_initialised:
409
+ super().__init__(weights, len(states_weights))
410
+ super_initialised = True
411
+ self.add_rv_from_state_weights(rv, states_weights)
412
+
413
+ if not super_initialised:
414
+ super().__init__(weights, 0)
415
+
416
+ def normalise(self, check_negative_instance: bool = True) -> None:
417
+ """
418
+ Adjust weights (for states and instances) so that the sum of state weights
419
+ for any random variable is 1 (or zero).
420
+
421
+ This performs an in-place modification.
422
+
423
+ If an instance weight is zero then all state weights for that instance will be zero.
424
+ If the state weights of an instance for any random variable sum to zero, then
425
+ that instance weight will be zero.
426
+
427
+ All other state weights of an instance for each random variable will sum to one.
428
+
429
+ Args:
430
+ check_negative_instance: if true (the default),then a RuntimeError is
431
+ raised if a negative instance weight is encountered.
432
+
433
+ Raises:
434
+ RuntimeError: if `check_negative_instance` is true and a negative
435
+ instance weight is encountered.
436
+ """
437
+ state_weights: NDArrayNumeric
438
+ i: int
439
+
440
+ weights: NDArrayNumeric = self.weights
441
+ for i in range(self._length):
442
+ for state_weights in self._data.values():
443
+ weight_sum = state_weights[i].sum()
444
+ if weight_sum == 0:
445
+ weights[i] = 0
446
+ elif weight_sum != 1:
447
+ state_weights[i] /= weight_sum
448
+ weights[i] *= weight_sum
449
+ instance_weight = weights[i]
450
+ if instance_weight == 0:
451
+ for state_weights in self._data.values():
452
+ state_weights[i, :] = 0
453
+ elif check_negative_instance and instance_weight < 0:
454
+ raise RuntimeError(f'negative instance weight: {i}')
455
+
456
+ def state_weights(self, rv: RandomVariable) -> NDArrayNumeric:
457
+ """
458
+ Get the state weights for one random variable.
459
+ The first index into the returned array is the instance index.
460
+ The second index into the returned array is the state index.
461
+
462
+ Returns:
463
+ A 2D array of random variable states, with shape = `(len(self), len(rv))`.
464
+
465
+ Raises:
466
+ KeyError: If the random variable is not in the dataset.
467
+ """
468
+ return self._data[rv]
469
+
470
+ def add_rv(self, rv: RandomVariable) -> NDArrayNumeric:
471
+ """
472
+ Add a random variable to the dataset, allocating and returning
473
+ the state indices for the random variable.
474
+
475
+ Args:
476
+ rv: The random variable to add.
477
+
478
+ Returns:
479
+ A 2D array of random variable states, with shape = `(len(self), len(rv))`,
480
+ initialised to zero.
481
+
482
+ Raises:
483
+ ValueError: If the random variable is already in the dataset.
484
+ """
485
+ rv_data = np.zeros((len(self), len(rv)), dtype=np.float64)
486
+ return self.add_rv_from_state_weights(rv, rv_data)
487
+
488
+ def remove_rv(self, rv: RandomVariable) -> None:
489
+ """
490
+ Remove a random variable from the dataset.
491
+
492
+ Args:
493
+ rv: The random variable to remove.
494
+
495
+ Raises:
496
+ KeyError: If the random variable is not in the dataset.
497
+ """
498
+ del self._data[rv]
499
+ self._remove_rv(rv)
500
+
501
+ def add_rv_from_state_weights(
502
+ self,
503
+ rv: RandomVariable,
504
+ state_weights: NDArrayNumeric | Sequence[Sequence[float]],
505
+ ) -> NDArrayNumeric:
506
+ """
507
+ Add a random variable to the dataset.
508
+
509
+ When `state_weights` is a numpy array, then the dataset will directly reference the given array.
510
+
511
+ Args:
512
+ rv: The random variable to add.
513
+ state_weights: A 2D array of state weights, with shape = `(len(self), len(rv))`.
514
+
515
+ Raises:
516
+ ValueError: If the random variable is already in the dataset.
517
+ """
518
+ if rv in self._data.keys():
519
+ raise ValueError(f'data for {rv} already exists in the dataset')
520
+
521
+ if not isinstance(state_weights, np.ndarray):
522
+ state_weights = np.array(state_weights, dtype=np.float64)
523
+
524
+ expected_shape = (self._length, len(rv))
525
+ if state_weights.shape == expected_shape:
526
+ rv_data = state_weights
527
+ else:
528
+ raise ValueError(f'data for {rv} expected shape {expected_shape}, got {state_weights.shape}')
529
+
530
+ self._data[rv] = rv_data
531
+ self._add_rv(rv)
532
+ return rv_data
533
+
534
+ def add_rv_from_state_idxs(self, rv: RandomVariable, state_idxs: NDArrayStates | Sequence[int]) -> NDArrayNumeric:
535
+ """
536
+ Add a random variable to the dataset.
537
+
538
+ The dataset will directly reference the given `states` array.
539
+
540
+ Args:
541
+ rv: The random variable to add.
542
+ state_idxs: An 1D array of state indexes to add, with shape = `(len(self),)`.
543
+ Each element `state` should be `0 <= state < len(rv)`.
544
+
545
+ Raises:
546
+ ValueError: If the random variable is already in the dataset.
547
+ """
548
+ rv_data = np.zeros((len(state_idxs), len(rv)), dtype=np.float64)
549
+ for i, state_idx in enumerate(state_idxs):
550
+ rv_data[i, state_idx] = 1
551
+
552
+ return self.add_rv_from_state_weights(rv, rv_data)
553
+
554
+ def add_rv_from_states(self, rv: RandomVariable, states: Sequence[State]) -> NDArrayNumeric:
555
+ """
556
+ Add a random variable to the dataset.
557
+
558
+ The dataset will allocate and populate a states array containing state indexes.
559
+ This will call `rv.state_idx(state)` for each state in `states`.
560
+
561
+ Args:
562
+ rv: The random variable to add.
563
+ states: An 1D array of state to add, with `len(states)` = `len(self)`.
564
+ Each element `state` should be in `rv.states`.
565
+
566
+ Raises:
567
+ ValueError: If the random variable is already in the dataset.
568
+ """
569
+ rv_data = np.zeros((len(states), len(rv)), dtype=np.float64)
570
+ for i, state in enumerate(states):
571
+ state_idx = rv.state_idx(state)
572
+ rv_data[i, state_idx] = 1
573
+
574
+ return self.add_rv_from_state_weights(rv, rv_data)
575
+
576
+ def dump(self, *, show_rvs: bool = True, show_weights: bool = True) -> None:
577
+ """
578
+ Dump the dataset in a human-readable format.
579
+
580
+ Args:
581
+ show_rvs: If `True`, the random variables are dumped.
582
+ show_weights: If `True`, the instance weights are dumped.
583
+ """
584
+ if show_rvs:
585
+ rvs = ', '.join(str(rv) for rv in self.rvs)
586
+ print(f'rvs: [{rvs}]')
587
+ print(f'instances ({len(self)}, with total weight {self.total_weight()}):')
588
+ cols = [self.state_weights(rv) for rv in self.rvs]
589
+ for instance, weight in zip(zip(*cols), self.weights):
590
+ instance_str = ', '.join(str(state_weights) for state_weights in instance)
591
+ if show_weights:
592
+ print(f'({instance_str}) * {weight}')
593
+ else:
594
+ print(f'({instance_str})')