PyPI - compiled-knowledge - Versions diffs - 4.1.0a2__cp313-cp313-macosx_11_0_arm64.whl → 4.1.0a3__cp313-cp313-macosx_11_0_arm64.whl - Mend

compiled-knowledge 4.1.0a2__cp313-cp313-macosx_11_0_arm64.whl → 4.1.0a3__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of compiled-knowledge might be problematic. Click here for more details.

Files changed (33) hide show

ck/circuit/_circuit_cy.c +1 -1
ck/circuit/_circuit_cy.cpython-313-darwin.so +0 -0
ck/circuit_compiler/cython_vm_compiler/_compiler.c +152 -152
ck/circuit_compiler/cython_vm_compiler/_compiler.cpython-313-darwin.so +0 -0
ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.c +1 -1
ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.cpython-313-darwin.so +0 -0
ck/dataset/cross_table.py +143 -79
ck/dataset/dataset.py +95 -7
ck/dataset/dataset_builder.py +11 -4
ck/dataset/dataset_from_crosstable.py +21 -2
ck/learning/coalesce_cross_tables.py +395 -0
ck/learning/model_from_cross_tables.py +242 -0
ck/learning/parameters.py +117 -0
ck/learning/train_generative_bn.py +198 -0
ck/pgm.py +10 -8
ck/pgm_circuit/marginals_program.py +5 -0
ck/pgm_circuit/wmc_program.py +5 -0
ck/pgm_compiler/support/circuit_table/_circuit_table_cy.c +1 -1
ck/pgm_compiler/support/circuit_table/_circuit_table_cy.cpython-313-darwin.so +0 -0
ck/probability/divergence.py +226 -0
ck/probability/probability_space.py +43 -19
ck_demos/dataset/demo_dataset_from_sampler.py +18 -0
ck_demos/learning/__init__.py +0 -0
ck_demos/learning/demo_bayesian_network_from_cross_tables.py +71 -0
ck_demos/learning/demo_simple_learning.py +55 -0
ck_demos/sampling/demo_wmc_direct_sampler.py +2 -2
{compiled_knowledge-4.1.0a2.dist-info → compiled_knowledge-4.1.0a3.dist-info}/METADATA +2 -1
{compiled_knowledge-4.1.0a2.dist-info → compiled_knowledge-4.1.0a3.dist-info}/RECORD +32 -24
ck/learning/train_generative.py +0 -149
/ck/{dataset/cross_table_probabilities.py → probability/cross_table_probability_space.py} +0 -0
{compiled_knowledge-4.1.0a2.dist-info → compiled_knowledge-4.1.0a3.dist-info}/WHEEL +0 -0
{compiled_knowledge-4.1.0a2.dist-info → compiled_knowledge-4.1.0a3.dist-info}/licenses/LICENSE.txt +0 -0
{compiled_knowledge-4.1.0a2.dist-info → compiled_knowledge-4.1.0a3.dist-info}/top_level.txt +0 -0

ck/circuit_compiler/cython_vm_compiler/_compiler.cpython-313-darwin.so CHANGED Viewed

Binary file

ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.c CHANGED Viewed

@@ -15,7 +15,7 @@
             "-O3"
         ],
         "include_dirs": [
-            "/private/var/folders/y6/nj790rtn62lfktb1sh__79hc0000gn/T/build-env-xq76d94c/lib/python3.12/site-packages/numpy/_core/include"
+            "/private/var/folders/y6/nj790rtn62lfktb1sh__79hc0000gn/T/build-env-ocuq5f7z/lib/python3.12/site-packages/numpy/_core/include"
         ],
         "name": "ck.circuit_compiler.support.circuit_analyser._circuit_analyser_cy",
         "sources": [

ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.cpython-313-darwin.so CHANGED Viewed

Binary file

ck/dataset/cross_table.py CHANGED Viewed

@@ -1,8 +1,9 @@
-from typing import List, Tuple, Sequence, Iterator, Iterable, Optional, MutableMapping, Dict
+from __future__ import annotations
+from typing import List, Tuple, Sequence, Iterator, Iterable, Optional, MutableMapping, Dict, assert_never
 from ck.dataset import SoftDataset, HardDataset
 from ck.pgm import RandomVariable, rv_instances, Instance
-from ck.utils.np_extras import NDArray
 class CrossTable(MutableMapping[Instance, float]):
@@ -19,12 +20,14 @@ class CrossTable(MutableMapping[Instance, float]):
         and `0 < ct[instance]`.
     Zero weighted instances are not explicitly represented in a cross-table.
+    Given a cross-table `ct` then the following is always true.
+    `x in ct.keys()` is true if and only if `ct[x] != 0`.
     """
     def __init__(
             self,
             rvs: Sequence[RandomVariable],
-            dirichlet_prior: float = 0,
+            dirichlet_prior: float | CrossTable = 0,
             update: Iterable[Tuple[Instance, float]] = (),
     ):
         """
@@ -38,24 +41,46 @@ class CrossTable(MutableMapping[Instance, float]):
         Args:
             rvs: the random variables that this cross-table records weights for. Instances
                 in this cross-table are tuples of state indexes, co-indexed with `rvs`.
-            dirichlet_prior: a real number >= 0, representing a Dirichlet prior.
+            dirichlet_prior: provides a prior for `rvs`. This can be represented either:
+                (a) as a uniform prior, represented as a float value,
+                (b) as an arbitrary prior, represented as a cross-table.
+                If a cross-table is provided as a prior, then it must have the same random variables as `rvs`.
+                The default value for `dirichlet_prior` is 0.
             update: an optional iterable of (instance, weight) tuples to add to
                 the cross-table at construction time.
         """
         self._rvs: Tuple[RandomVariable, ...] = tuple(rvs)
         self._dict: Dict[Instance, float]
-        if dirichlet_prior != 0:
-            instance: Tuple[int, ...]
+        if isinstance(dirichlet_prior, CrossTable):
+            # rv_map[i] is where rvs[i] appears in the dirichlet_prior cross-table
+            # It will be used to map instances of the prior to instances of self.
+            rv_map: List[int] = [
+                dirichlet_prior.rvs.index(rv)
+                for rv in rvs
+            ]
+            # Copy items from the prior to self, mapping the instances as needed
             self._dict = {
-                instance: dirichlet_prior
-                for instance in rv_instances(*self._rvs)
+                tuple(prior_instance[select] for select in rv_map): weight
+                for prior_instance, weight in dirichlet_prior.items()
             }
+        elif isinstance(dirichlet_prior, (float, int)):
+            if dirichlet_prior != 0:
+                # Initialise self with every possible combination of rvs states.
+                instance: Instance
+                self._dict = {
+                    instance: dirichlet_prior
+                    for instance in rv_instances(*self._rvs)
+                }
+            else:
+                self._dict = {}
         else:
-            self._dict = {}
+            assert_never('not reached')
-        for instance, weight in update:
-            self.add(instance, weight)
+        # Apply any provided updates
+        self.add_all(update)
     def __eq__(self, other) -> bool:
         """
@@ -66,7 +91,7 @@ class CrossTable(MutableMapping[Instance, float]):
     def __setitem__(self, key: Instance, value) -> None:
         if value == 0:
-            self._dict.pop(key)
+            self._dict.pop(key, None)
         else:
             self._dict[key] = value
@@ -120,18 +145,87 @@ class CrossTable(MutableMapping[Instance, float]):
         """
         self[instance] = self._dict.get(instance, 0) + weight
+    def add_all(self, to_add: Iterable[Tuple[Instance, float]]) -> None:
+        """
+        Add the given weighted instances to the cross-table.
+        Args:
+            to_add: an iterable of (instance, weight) tuples to add to the cross-table.
+        """
+        for instance, weight in to_add:
+            self.add(instance, weight)
+    def mul(self, multiplier: float) -> None:
+        """
+        Multiply all weights by the given multiplier.
+        """
+        if multiplier == 0:
+            self._dict.clear()
+        elif multiplier == 1:
+            pass
+        else:
+            for instance in self._dict.keys():
+                self._dict[instance] *= multiplier
     def total_weight(self) -> float:
         """
         Calculate the total weight of this cross-table.
         """
         return sum(self.values())
+    def project(self, rvs: Sequence[RandomVariable]) -> CrossTable:
+        """
+        Project this cross-table onto the given set of random variables.
+        If successful, this method will always return a new CrossTable object.
+        Returns:
+            a CrossTable with the given sequence of random variables.
+        Assumes:
+            `rvs` is a subset of the cross-table's random variables.
+        """
+        # Mapping rv_map[i] is the index into `self.rvs` for `rvs[i]`.
+        rv_map: List[int] = [self.rvs.index(rv) for rv in rvs]
+        return CrossTable(
+            rvs=rvs,
+            update=(
+                (tuple(instance[i] for i in rv_map), weight)
+                for instance, weight in self._dict.items()
+            ),
+        )
+    def dump(self, *, show_rvs: bool = True, show_weights: bool = True, as_states: bool = False) -> None:
+        """
+        Dump the cross-table in a human-readable format.
+        If as_states is true, then instance states are dumped instead of just state indexes.
+        Args:
+            show_rvs: If `True`, the random variables are dumped.
+            show_weights: If `True`, the instance weights are dumped.
+            as_states: If `True`, the states are dumped instead of just state indexes.
+        """
+        if show_rvs:
+            rvs = ', '.join(str(rv) for rv in self.rvs)
+            print(f'rvs: [{rvs}]')
+        print(f'instances ({len(self)}, with total weight {self.total_weight()}):')
+        for instance, weight in self.items():
+            if as_states:
+                instance_str = ', '.join(repr(rv.states[idx]) for idx, rv in zip(instance, self.rvs))
+            else:
+                instance_str = ', '.join(str(idx) for idx in instance)
+            if show_weights:
+                print(f'({instance_str}) * {weight}')
+            else:
+                print(f'({instance_str})')
 def cross_table_from_dataset(
         dataset: HardDataset | SoftDataset,
         rvs: Optional[Sequence[RandomVariable]] = None,
         *,
-        dirichlet_prior: float = 0,
+        dirichlet_prior: float | CrossTable = 0,
 ) -> CrossTable:
     """
     Generate a cross-table for the given random variables, using the given dataset, represented
@@ -141,7 +235,12 @@ def cross_table_from_dataset(
         dataset: The dataset to use to compute the cross-table.
         rvs: The random variables to compute the cross-table for. If omitted
             then `dataset.rvs` will be used.
-        dirichlet_prior: a real number >= 0. See `CrossTable` for an explanation.
+        dirichlet_prior: provides a Dirichlet prior for `rvs`. This can be represented either:
+            (a) as a uniform prior, represented as a float value,
+            (b) as an arbitrary Dirichlet prior, represented as a cross-table.
+            If a cross-table is provided as a prior, then it must have the same random variables as `rvs`.
+            The default value for `dirichlet_prior` is 0.
+            See `CrossTable` for more explanation.
     Returns:
         The cross-table for the given random variables, using the given dataset,
@@ -151,18 +250,18 @@ def cross_table_from_dataset(
     Raises:
         KeyError: If any random variable in `rvs` does not appear in the dataset.
     """
-    if isinstance(dataset, SoftDataset):
-        return cross_table_from_soft_dataset(dataset, rvs, dirichlet_prior=dirichlet_prior)
     if isinstance(dataset, HardDataset):
         return cross_table_from_hard_dataset(dataset, rvs, dirichlet_prior=dirichlet_prior)
+    if isinstance(dataset, SoftDataset):
+        return cross_table_from_soft_dataset(dataset, rvs, dirichlet_prior=dirichlet_prior)
     raise TypeError('dataset must be either a SoftDataset or HardDataset')
-def cross_table_from_soft_dataset(
-        dataset: SoftDataset,
+def cross_table_from_hard_dataset(
+        dataset: HardDataset,
         rvs: Optional[Sequence[RandomVariable]] = None,
         *,
-        dirichlet_prior: float = 0
+        dirichlet_prior: float | CrossTable = 0
 ) -> CrossTable:
     """
     Generate a cross-table for the given random variables, using the given dataset, represented
@@ -172,7 +271,12 @@ def cross_table_from_soft_dataset(
         dataset: The dataset to use to compute the cross-table.
         rvs: The random variables to compute the cross-table for. If omitted
             then `dataset.rvs` will be used.
-        dirichlet_prior: a real number >= 0. See `CrossTable` for an explanation.
+        dirichlet_prior: provides a Dirichlet prior for `rvs`. This can be represented either:
+            (a) as a uniform prior, represented as a float value,
+            (b) as an arbitrary Dirichlet prior, represented as a cross-table.
+            If a cross-table is provided as a prior, then it must have the same random variables as `rvs`.
+            The default value for `dirichlet_prior` is 0.
+            See `CrossTable` for more explanation.
     Returns:
         The cross-table for the given random variables, using the given dataset,
@@ -184,31 +288,18 @@ def cross_table_from_soft_dataset(
     """
     if rvs is None:
         rvs = dataset.rvs
+    return CrossTable(
+        rvs=rvs,
+        dirichlet_prior=dirichlet_prior,
+        update=dataset.instances(rvs)
+    )
-    # Special case
-    if len(rvs) == 0:
-        return CrossTable((), 0, [((), dataset.total_weight() + dirichlet_prior)])
-    weights: CrossTable = CrossTable(rvs, dirichlet_prior)
-    columns: List[NDArray] = [
-        dataset.state_weights(rv)
-        for rv in rvs
-    ]
-    for instance_weights, weight in zip(zip(*columns), dataset.weights):
-        if weight != 0:
-            for instance, instance_weight in _product_instance_weights(instance_weights):
-                weights.add(instance, instance_weight * weight)
-    return weights
-def cross_table_from_hard_dataset(
-        dataset: HardDataset,
+def cross_table_from_soft_dataset(
+        dataset: SoftDataset,
         rvs: Optional[Sequence[RandomVariable]] = None,
         *,
-        dirichlet_prior: float = 0
+        dirichlet_prior: float | CrossTable = 0
 ) -> CrossTable:
     """
     Generate a cross-table for the given random variables, using the given dataset, represented
@@ -218,7 +309,12 @@ def cross_table_from_hard_dataset(
         dataset: The dataset to use to compute the cross-table.
         rvs: The random variables to compute the cross-table for. If omitted
             then `dataset.rvs` will be used.
-        dirichlet_prior: a real number >= 0. See `CrossTable` for an explanation.
+        dirichlet_prior: provides a Dirichlet prior for `rvs`. This can be represented either:
+            (a) as a uniform prior, represented as a float value,
+            (b) as an arbitrary Dirichlet prior, represented as a cross-table.
+            If a cross-table is provided as a prior, then it must have the same random variables as `rvs`.
+            The default value for `dirichlet_prior` is 0.
+            See `CrossTable` for more explanation.
     Returns:
         The cross-table for the given random variables, using the given dataset,
@@ -231,40 +327,8 @@ def cross_table_from_hard_dataset(
     if rvs is None:
         rvs = dataset.rvs
-    # Special case
-    if len(rvs) == 0:
-        return CrossTable((), 0, [((), dataset.total_weight() + dirichlet_prior)])
-    weights: CrossTable = CrossTable(rvs, dirichlet_prior)
-    columns: List[NDArray] = [
-        dataset.state_idxs(rv)
-        for rv in rvs
-    ]
-    for instance, weight in zip(zip(*columns), dataset.weights):
-        if weight != 0:
-            instance: Tuple[int, ...] = tuple(int(i) for i in instance)
-            weights.add(instance, weight)
-    return weights
-def _product_instance_weights(instance_weights: Sequence[NDArray]) -> Iterator[Tuple[Tuple[int, ...], float]]:
-    """
-    Iterate over all possible instance for the given instance weights,
-    where the weight is not zero.
-    """
-    # Base case
-    if len(instance_weights) == 0:
-        yield (), 1
-    # Recursive case
-    else:
-        next_weights: NDArray = instance_weights[-1]
-        pre_weights: Sequence[NDArray] = instance_weights[:-1]
-        for pre_instance, pre_weight in _product_instance_weights(pre_weights):
-            for i, weight in enumerate(next_weights):
-                if weight != 0:
-                    yield pre_instance + (int(i),), pre_weight * weight
+    return CrossTable(
+        rvs=rvs,
+        dirichlet_prior=dirichlet_prior,
+        update=dataset.hard_instances(rvs)
+    )

ck/dataset/dataset.py CHANGED Viewed

@@ -1,10 +1,11 @@
 from __future__ import annotations
-from typing import Sequence, Optional, Dict, Iterable, Tuple
+from itertools import repeat
+from typing import Sequence, Optional, Dict, Iterable, Tuple, List, Iterator
 import numpy as np
-from ck.pgm import RandomVariable, State
+from ck.pgm import RandomVariable, State, Instance
 from ck.utils.np_extras import DTypeStates, dtype_for_number_of_states, NDArrayNumeric, NDArrayStates
@@ -39,7 +40,7 @@ class Dataset:
         if weights.shape != expected_shape:
             raise ValueError(f'weights expected shape {expected_shape}, got {weights.shape}')
         # if not isinstance(weights.dtype, NDArrayNumeric):
-        #     raise ValueError(f'weights expected numeric dtype, got {weights.dtype}')
+        #     raise ValueError('weights expected numeric dtype')
         self._weights = weights
@@ -319,6 +320,26 @@ class HardDataset(Dataset):
         return self.add_rv_from_state_idxs(rv, rv_data)
+    def instances(self, rvs: Optional[Sequence[RandomVariable]] = None) -> Iterator[Tuple[Instance, float]]:
+        """
+        Iterate over weighted instances.
+        Args:
+            rvs: The random variables to include in iteration. Default is all dataset random variables.
+        Returns:
+            an iterator over (instance, weight) pairs, in the same order and number of instances in this dataset.
+            An instance is a sequence of state indexes, co-indexed with `self.rvs`.
+        """
+        if rvs is None:
+            rvs = self._rvs
+        # Special case - no random variables
+        if len(rvs) == 0:
+            return zip(repeat(()), self.weights)
+        else:
+            cols = [self.state_idxs(rv) for rv in rvs]
+            return zip(zip(*cols), self.weights)
     def dump(self, *, show_rvs: bool = True, show_weights: bool = True, as_states: bool = False) -> None:
         """
         Dump the dataset in a human-readable format.
@@ -333,8 +354,7 @@ class HardDataset(Dataset):
             rvs = ', '.join(str(rv) for rv in self.rvs)
             print(f'rvs: [{rvs}]')
         print(f'instances ({len(self)}, with total weight {self.total_weight()}):')
-        cols = [self.state_idxs(rv) for rv in self.rvs]
-        for instance, weight in zip(zip(*cols), self.weights):
+        for instance, weight in self.instances():
             if as_states:
                 instance_str = ', '.join(repr(rv.states[idx]) for idx, rv in zip(instance, self.rvs))
             else:
@@ -573,6 +593,52 @@ class SoftDataset(Dataset):
         return self.add_rv_from_state_weights(rv, rv_data)
+    def soft_instances(
+            self,
+            rvs: Optional[Sequence[RandomVariable]] = None,
+    ) -> Iterator[Tuple[Tuple[NDArrayNumeric], float]]:
+        """
+        Iterate over weighted instances  of soft evidence.
+        Args:
+            rvs: The random variables to include in iteration. Default is all dataset random variables.
+        Returns:
+            an iterator over (instance, weight) pairs, in the same order and number of instances in this dataset.
+            An instance is a sequence of soft weights, co-indexed with `self.rvs`.
+        """
+        if rvs is None:
+            rvs = self.rvs
+        # Special case - no random variables
+        if len(rvs) == 0:
+            return zip(repeat(()), self.weights)
+        else:
+            cols: List[NDArrayNumeric] = [self.state_weights(rv) for rv in rvs]
+            return zip(zip(*cols), self.weights)
+    def hard_instances(self, rvs: Optional[Sequence[RandomVariable]] = None) -> Iterator[Tuple[Instance, float]]:
+        """
+        Iterate over equivalent weighted hard instances.
+        Args:
+            rvs: The random variables to include in iteration. Default is all dataset random variables.
+        Returns:
+            an iterator over (instance, weight) pairs where the order and number of instances
+            is not guaranteed.
+            An instance is a sequence of state indexes, co-indexed with `self.rvs`.
+        """
+        if rvs is None:
+            rvs = self.rvs
+        # Special case - no random variables
+        if len(rvs) == 0:
+            yield (), self.total_weight()
+        else:
+            for instance_weights, weight in self.soft_instances(rvs):
+                if weight != 0:
+                    for instance, instance_weight in _product_instance_weights(instance_weights):
+                        yield instance, instance_weight * weight
     def dump(self, *, show_rvs: bool = True, show_weights: bool = True) -> None:
         """
         Dump the dataset in a human-readable format.
@@ -585,10 +651,32 @@ class SoftDataset(Dataset):
             rvs = ', '.join(str(rv) for rv in self.rvs)
             print(f'rvs: [{rvs}]')
         print(f'instances ({len(self)}, with total weight {self.total_weight()}):')
-        cols = [self.state_weights(rv) for rv in self.rvs]
-        for instance, weight in zip(zip(*cols), self.weights):
+        for instance, weight in self.soft_instances():
             instance_str = ', '.join(str(state_weights) for state_weights in instance)
             if show_weights:
                 print(f'({instance_str}) * {weight}')
             else:
                 print(f'({instance_str})')
+def _product_instance_weights(instance_weights: Sequence[NDArrayNumeric]) -> Iterator[Tuple[Tuple[int, ...], float]]:
+    """
+    Iterate over all possible hard instances for the given
+    instance weights, where the weight is not zero.
+    This is a support function for `SoftDataset.hard_instances`.
+    """
+    # Base case
+    if len(instance_weights) == 0:
+        yield (), 1
+    # Recursive case
+    else:
+        next_weights: NDArrayNumeric = instance_weights[-1]
+        pre_weights: Sequence[NDArrayNumeric] = instance_weights[:-1]
+        weight: float
+        for pre_instance, pre_weight in _product_instance_weights(pre_weights):
+            for i, weight in enumerate(next_weights):
+                if weight != 0:
+                    yield pre_instance + (int(i),), pre_weight * weight

ck/dataset/dataset_builder.py CHANGED Viewed

@@ -291,6 +291,9 @@ class DatasetBuilder(Sequence[Record]):
         """
         Allocate and return a 1D numpy array of state indexes.
+        The state of a random variable (for an instance) where the value is soft evidence,
+        is the state with the maximum weight. Ties are broken arbitrarily.
         Args:
             rv: a random variable in this dataset.
             missing: the value to use in the result to represent missing values. If not provided,
@@ -381,7 +384,8 @@ class DatasetBuilder(Sequence[Record]):
             dataset: the dataset of records to append.
         Raises:
-            KeyError: if `dataset.rvs` is not a superset of `this.rvs`.
+            KeyError: if `dataset.rvs` is not a superset of `this.rvs` and ensure_cols is false.
+                If you want to avoid this error, first call `self.ensure_column(*dataset.rvs)`.
         """
         if isinstance(dataset, HardDataset):
             cols: Tuple = tuple(dataset.state_idxs(rv).tolist() for rv in self.rvs)
@@ -441,10 +445,13 @@ class DatasetBuilder(Sequence[Record]):
 def hard_dataset_from_builder(dataset_builder: DatasetBuilder, *, missing: Optional[int] = None) -> HardDataset:
     """
     Create a hard dataset from a soft dataset by repeated application
-    of `HardDataset.add_rv_from_state_idxs`.
+    of `HardDataset.add_rv_from_state_idxs` using values from `self.get_column_hard`.
-    The instance weights of the returned dataset will be a copy
-    of the instance weights of the soft dataset.
+    The state of a random variable (for an instance) where the value is soft evidence,
+    is the state with the maximum weight. Ties are broken arbitrarily.
+    The instance weights of the returned dataset will simply
+    be the weights from the builder.
     No adjustments are made to the resulting dataset weights, even if
     a value in the dataset builder is soft evidence that does not sum to

ck/dataset/dataset_from_crosstable.py CHANGED Viewed

@@ -2,8 +2,8 @@ from typing import Sequence
 import numpy as np
-from ck.dataset import HardDataset
-from ck.dataset.cross_table import CrossTable
+from ck.dataset import HardDataset, SoftDataset
+from ck.dataset.cross_table import CrossTable, cross_table_from_soft_dataset
 from ck.pgm import RandomVariable
 from ck.utils.np_extras import dtype_for_number_of_states
@@ -43,3 +43,22 @@ def dataset_from_cross_table(cross_table: CrossTable) -> HardDataset:
     )
+def expand_soft_dataset(soft_dataset: SoftDataset) -> HardDataset:
+    """
+    Construct a hard dataset with the same data semantics as the given soft dataset
+    by expanding soft evidence.
+    Any state weights in `soft_dataset` that represents uncertainty over states
+    of a random variable will be converted to an equivalent set of weighted hard
+    instances. This means that the returned dataset may have a number of instances
+    different to that of the given soft dataset.
+    The ordering of instances in the returned dataset is not guaranteed.
+    This method works by constructing a cross-table from the given soft dataset,
+    then converting the crosstable to a hard dataset using `dataset_from_cross_table`.
+    This implies that the result will have no duplicated instances and no
+    instances with weight zero.
+    """
+    crosstab: CrossTable = cross_table_from_soft_dataset(soft_dataset)
+    return dataset_from_cross_table(crosstab)