PyPI - compiled-knowledge - Versions diffs - 4.0.0a24__cp313-cp313-win32.whl → 4.1.0a1__cp313-cp313-win32.whl - Mend

compiled-knowledge 4.0.0a24cp313-cp313-win32.whl → 4.1.0a1cp313-cp313-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of compiled-knowledge might be problematic. Click here for more details.

Files changed (42) hide show

ck/circuit/_circuit_cy.c +1 -1
ck/circuit/_circuit_cy.cp313-win32.pyd +0 -0
ck/circuit/tmp_const.py +5 -4
ck/circuit_compiler/cython_vm_compiler/_compiler.c +152 -152
ck/circuit_compiler/cython_vm_compiler/_compiler.cp313-win32.pyd +0 -0
ck/circuit_compiler/interpret_compiler.py +2 -2
ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.c +1 -1
ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.cp313-win32.pyd +0 -0
ck/circuit_compiler/support/llvm_ir_function.py +4 -4
ck/dataset/__init__.py +1 -0
ck/dataset/cross_table.py +270 -0
ck/dataset/cross_table_probabilities.py +53 -0
ck/dataset/dataset.py +577 -0
ck/dataset/dataset_compute.py +140 -0
ck/dataset/dataset_from_crosstable.py +45 -0
ck/dataset/dataset_from_csv.py +147 -0
ck/dataset/sampled_dataset.py +96 -0
ck/example/diamond_square.py +3 -1
ck/example/triangle_square.py +3 -1
ck/example/truss.py +3 -1
ck/in_out/parse_net.py +21 -19
ck/in_out/parser_utils.py +7 -3
ck/learning/__init__.py +0 -0
ck/learning/train_generative.py +149 -0
ck/pgm.py +95 -84
ck/pgm_circuit/mpe_program.py +3 -4
ck/pgm_circuit/pgm_circuit.py +27 -18
ck/pgm_circuit/program_with_slotmap.py +27 -46
ck/pgm_circuit/support/compile_circuit.py +2 -4
ck/pgm_compiler/support/circuit_table/_circuit_table_cy.c +1 -1
ck/pgm_compiler/support/circuit_table/_circuit_table_cy.cp313-win32.pyd +0 -0
ck/probability/empirical_probability_space.py +1 -0
ck/probability/probability_space.py +10 -11
ck/program/raw_program.py +23 -16
ck/sampling/sampler_support.py +5 -6
ck/utils/iter_extras.py +3 -2
ck/utils/local_config.py +16 -8
{compiled_knowledge-4.0.0a24.dist-info → compiled_knowledge-4.1.0a1.dist-info}/METADATA +1 -1
{compiled_knowledge-4.0.0a24.dist-info → compiled_knowledge-4.1.0a1.dist-info}/RECORD +42 -32
{compiled_knowledge-4.0.0a24.dist-info → compiled_knowledge-4.1.0a1.dist-info}/WHEEL +0 -0
{compiled_knowledge-4.0.0a24.dist-info → compiled_knowledge-4.1.0a1.dist-info}/licenses/LICENSE.txt +0 -0
{compiled_knowledge-4.0.0a24.dist-info → compiled_knowledge-4.1.0a1.dist-info}/top_level.txt +0 -0

ck/learning/train_generative.py ADDED Viewed

@@ -0,0 +1,149 @@
+from dataclasses import dataclass
+from typing import Dict, Tuple, List
+import numpy as np
+from ck.dataset import SoftDataset, HardDataset
+from ck.dataset.cross_table import CrossTable, cross_table_from_dataset
+from ck.pgm import PGM, Instance, DensePotentialFunction, Shape, natural_key_idx, SparsePotentialFunction
+from ck.utils.iter_extras import multiply
+from ck.utils.np_extras import NDArrayFloat64
+@dataclass
+class ParameterValues:
+    """
+    A ParameterValues object represents learned parameter values of a PGM.
+    """
+    pgm: PGM
+    """
+    The PGM that the parameter values pertains to.
+    """
+    cpts: List[Dict[Instance, NDArrayFloat64]]
+    """
+    A list of CPTs co-indexed with `pgm.factors`. Each CPT is a dict
+    mapping from instances of the parent random variables (of the factors)
+    to the child conditional probability distribution (CPD).
+    """
+    def set_zero(self) -> None:
+        """
+        Set the potential function of each PGM factor to zero.
+        """
+        for factor in self.pgm.factors:
+            factor.set_zero()
+    def set_cpt(self) -> None:
+        """
+        Set the potential function of each PGM factor to a CPTPotentialFunction,
+        using our parameter values.
+        """
+        for factor, cpt in zip(self.pgm.factors, self.cpts):
+            factor.set_cpt().set(*cpt.items())
+    def set_dense(self) -> None:
+        """
+        Set the potential function of each PGM factor to a DensePotentialFunction,
+        using our parameter values.
+        """
+        for factor, cpt in zip(self.pgm.factors, self.cpts):
+            pot_function: DensePotentialFunction = factor.set_dense()
+            parent_shape: Shape = factor.shape[1:]
+            child_state: int
+            value: float
+            if len(parent_shape) == 0:
+                cpd: NDArrayFloat64 = cpt[()]
+                for child_state, value in enumerate(cpd):
+                    pot_function[child_state] = value
+            else:
+                parent_space: int = multiply(parent_shape)
+                parent_states: Instance
+                cpd: NDArrayFloat64
+                for parent_states, cpd in cpt.items():
+                    idx: int = natural_key_idx(parent_shape, parent_states)
+                    for value in cpd:
+                        pot_function[idx] = value
+                        idx += parent_space
+    def set_sparse(self) -> None:
+        """
+        Set the potential function of each PGM factor to a SparsePotentialFunction,
+        using our parameter values.
+        """
+        for factor, cpt in zip(self.pgm.factors, self.cpts):
+            pot_function: SparsePotentialFunction = factor.set_sparse()
+            parent_states: Instance
+            child_state: int
+            cpd: NDArrayFloat64
+            value: float
+            for parent_states, cpd in cpt.items():
+                for child_state, value in enumerate(cpd):
+                    key = (child_state,) + parent_states
+                    pot_function[key] = value
+def train_generative_bn(
+        pgm: PGM,
+        dataset: HardDataset | SoftDataset,
+        *,
+        dirichlet_prior: float = 0,
+        check_bayesian_network: bool = True,
+) -> ParameterValues:
+    """
+    Maximum-likelihood, generative training for a Bayesian network.
+    Args:
+        pgm: the probabilistic graphical model defining the model structure.
+            Potential function values are ignored and need not be set.
+        dataset: a dataset of random variable states.
+        dirichlet_prior: a real number >= 0. See `CrossTable` for an explanation.
+        check_bayesian_network: if true and not pgm.is_structure_bayesian an exception will be raised.
+    Returns:
+        a  ParameterValues object that can be used to update the parameters of the given PGM.
+    Raises:
+        ValueError: if the given PGM does not have a Bayesian network structure, and check_bayesian_network is True.
+    """
+    if check_bayesian_network and not pgm.is_structure_bayesian:
+        raise ValueError('the given PGM is not a Bayesian network')
+    cpts: List[Dict[Instance, NDArrayFloat64]] = [
+        cpt_from_crosstab(cross_table_from_dataset(dataset, factor.rvs, dirichlet_prior=dirichlet_prior))
+        for factor in pgm.factors
+    ]
+    return ParameterValues(pgm, cpts)
+def cpt_from_crosstab(crosstab: CrossTable) -> Dict[Instance, NDArrayFloat64]:
+    """
+    Make a conditional probability table (CPT) from a cross-table.
+    Args:
+        crosstab: a CrossTable representing the weight of unique instances.
+    Returns:
+        a mapping from instances of the parent random variables to the child
+        conditional probability distribution (CPD).
+    Assumes:
+        the first random variable in `crosstab.rvs` is the child random variable.
+    """
+    # Number of states for the child random variable.
+    child_size: int = len(crosstab.rvs[0])
+    # Get distribution over child states for seen parent states
+    parents_weights: Dict[Instance, NDArrayFloat64] = {}
+    for state, weight in crosstab.items():
+        parent_state: Tuple[int, ...] = state[1:]
+        child_state: int = state[0]
+        parent_weights = parents_weights.get(parent_state)
+        if parent_weights is None:
+            parents_weights[parent_state] = parent_weights = np.zeros(child_size, dtype=np.float64)
+        parent_weights[child_state] += weight
+    # Normalise
+    for parent_state, parent_weights in parents_weights.items():
+        parent_weights /= parent_weights.sum()
+    return parents_weights

ck/pgm.py CHANGED Viewed

@@ -15,33 +15,34 @@ from ck.utils.iter_extras import (
 from ck.utils.np_extras import NDArrayFloat64, NDArrayUInt8
 State: TypeAlias = Union[int, str, bool, float, None]
-State.__doc__ = \
-    """
-    The type for a possible state of a random variable.
-    """
-Instance: TypeAlias = Sequence[int]
-Instance.__doc__ = \
-    """
-    An instance (of a sequence of random variables) is a sequence of integers
-    that are state indexes, co-indexed with a known sequence of random variables.
-    """
-Key: TypeAlias = Union[Instance, int]
-Key.__doc__ = \
-    """
-    A key identifies an instance, either as an instance itself or a
-    single integer, representing an instance with one dimension.
-    """
+"""
+The type for a possible state of a random variable.
+"""
+Instance: TypeAlias = Tuple[int, ...]
+"""
+An instance (of a sequence of random variables) is a tuple of integers
+that are state indexes, co-indexed with a known sequence of random variables.
+"""
+Key: TypeAlias = Union[Sequence[int], int]
+"""
+A key identifies an instance, either as a sequence of integers or a
+single integer. The integers are state indexes, co-indexed with a known
+sequence of random variables. A single integer represents an instance with
+one dimension.
+"""
 Shape: TypeAlias = Sequence[int]
-Key.__doc__ = \
-    """
-    The type for the "shape" of a sequence of random variables.
-    That is, the shape of (rv1, rv2, rv3) is (len(rv1), len(rv2), len(rv3)).
-    """
+"""
+The type for the "shape" of a sequence of random variables.
+That is, the shape of (rv1, rv2, rv3) is (len(rv1), len(rv2), len(rv3)).
+"""
-DEFAULT_CPT_TOLERANCE: float = 0.000001  # A tolerance when checking CPT distributions sum to one (or zero).
+DEFAULT_CPT_TOLERANCE: float = 0.000001
+"""
+A tolerance when checking CPT distributions sum to one (or zero).
+"""
 class PGM:
@@ -214,14 +215,17 @@ class PGM:
         The returned random variable will have an `idx` equal to the value of
         `self.number_of_rvs` just prior to adding the new random variable.
+        The states of the random variable can be specified either as an integer
+        representing the number of states, or as a sequence of state values. If a
+        single integer, `n`, is provided then the states will be: 0, 1, ..., n-1.
+        If a sequence of states are provided then the states must be unique.
         Assumes:
             Provided states contain no duplicates.
         Args:
             name: a name for the random variable.
-            states: either an integer number of states or a sequence of state values. If a
-                single integer, `n`, is provided then the states will be 0, 1, ..., n-1.
-                If a sequence of states are provided then the states must be unique.
+            states: either the number of states or a sequence of state values.
         Returns:
             a RandomVariable object belonging to this PGM.
@@ -241,10 +245,11 @@ class PGM:
         Assumes:
             The given random variables all belong to this PGM.
             The random variables contain no duplicates.
         Args:
-            *rvs: the random variables.
+            rvs: the random variables.
         Returns:
             a Factor object belonging to this PGM.
@@ -336,17 +341,18 @@ class PGM:
             *input_rvs: RandomVariable
     ) -> Factor:
         """
-        Add a sparse 0/1 factor to this PGM representing:
-            result_rv ==  function(*rvs).
-        That is:
+        Add a sparse 0/1 factor to this PGM representing `result_rv == function(*rvs)`.
+        That is::
             factor[result_s, *input_s] = 1, if result_s == function(*input_s);
                                        = 0, otherwise.
         Args:
             function: a function from state indexes of the input random variables to a state index
                 of the result random variable. The function should take the same number of arguments
                 as `input_rvs` and return a state index for `result_rv`.
             result_rv: the random variable defining result values.
-            *input_rvs: the random variables defining input values.
+            input_rvs: the random variables defining input values.
         Returns:
             a Factor object belonging to this PGM, with a configured sparse potential function.
@@ -378,16 +384,17 @@ class PGM:
         """
         Render indicators as a string.
-        For example:
+        For example::
             pgm = PGM()
             a = pgm.new_rv('A', ('x', 'y', 'z'))
             b = pgm.new_rv('B', (3, 5))
             print(pgm.indicator_str(a[0], b[1], a[2]))
-        will print:
+        will print::
             A=x, B=5, A=z
         Args:
-            *indicators: the indicators to render.
+            indicators: the indicators to render.
             sep: the separator to use between the random variable and its state.
             delim: the delimiter to used when rendering multiple indicators.
@@ -406,16 +413,17 @@ class PGM:
         """
         Render indicators as a string, grouping indicators by random variable.
-        For example:
+        For example::
             pgm = PGM()
             a = pgm.new_rv('A', ('x', 'y', 'z'))
             b = pgm.new_rv('B', (3, 5))
             print(pgm.condition_str(a[0], b[1], a[2]))
-        will print:
+        will print::
             A in {x, z}, B=5
         Args:
-            *indicators: the indicators to render.
+            indicators: the indicators to render.
         Return:
             a string representation of the given indicators, as a condition.
         """
@@ -930,9 +938,9 @@ class RandomVariable(Sequence[Indicator]):
     in the random variable's PGM list of random variables.
     A random variable behaves like a sequence of Indicators, where each indicator represents a random
-    variable being in a particular state. Specifically for a random variable rv, len(rv) is the
+    variable being in a particular state. Specifically for a random variable rv, `len(rv)` is the
     number of states of the random variable and rv[i] is the Indicators representing that
-    rv is in the ith state. When sliced, the result is a tuple, i.e. rv[1:3] = (rv[1], rv[2]).
+    rv is in the ith state. When sliced, the result is a tuple, i.e. `rv[1:3] = (rv[1], rv[2])`.
     A RandomVariable has a name. This is for human convenience and has no functional purpose
     within a PGM.
@@ -942,15 +950,18 @@ class RandomVariable(Sequence[Indicator]):
         """
         Create a new random variable, in the given PGM.
+        The states of the random variable can be specified either as an integer
+        representing the number of states, or as a sequence of state values. If a
+        single integer, `n`, is provided then the states will be: 0, 1, ..., n-1.
+        If a sequence of states are provided then the states must be unique.
         Assumes:
             Provided states contain no duplicates.
         Args:
             pgm: the PGM that the random variable will belong to.
             name: a name for the random variable.
-            states: either an integer number of states or a sequence of state values. If a
-                single integer, `n`, is provided then the states will be 0, 1, ..., n-1.
-                If a sequence of states are provided then the states must be unique.
+            states: either the number of states or a sequence of state values.
         """
         self._pgm: PGM = pgm
         self._name: str = name
@@ -1212,15 +1223,14 @@ class RVMap(Sequence[RandomVariable]):
     In addition to accessing a random variable by its index, an RVMap enables
     access to the PGM random variable via the name of each random variable.
-    For example, if `pgm.rvs[1]` is a random variable named `xray`, then:
-    ```
-    rvs = RVMap(pgm)
+    For example, if `pgm.rvs[1]` is a random variable named `xray`, then::
+        rvs = RVMap(pgm)
-    # These all retrieve the same random variable object.
-    xray = rvs[1]
-    xray = rvs('xray')
-    xray = rvs.xray
-    ```
+        # These all retrieve the same random variable object.
+        xray = rvs[1]
+        xray = rvs('xray')
+        xray = rvs.xray
     To use an RVMap on a PGM, the random variable names must be unique across the PGM.
     """
@@ -1527,7 +1537,7 @@ class Factor:
         Set to the potential function to a new `ClausePotentialFunction` object.
         Args:
-            *key: defines the random variable states of the clause. The key is a sequence of
+            key: defines the random variable states of the clause. The key is a sequence of
                 random variable state indexes, co-indexed with `Factor.rvs`.
         Returns:
@@ -1556,7 +1566,7 @@ class Factor:
         return self._potential_function
-@dataclass(frozen=True, eq=True)
+@dataclass(frozen=True, eq=True, slots=True)
 class ParamId:
     """
     A ParamId identifies a parameter of a potential function.
@@ -1863,7 +1873,7 @@ class PotentialFunction(ABC):
             a hypothetical parameter index assuming that every valid key has a unique parameter
             as per DensePotentialFunction.
         """
-        return _natural_key_idx(self._shape, key)
+        return natural_key_idx(self._shape, key)
     def param_id(self, param_idx: int) -> ParamId:
         """
@@ -2021,7 +2031,7 @@ class ZeroPotentialFunction(PotentialFunction):
         return 0
     def param_idx(self, key: Key) -> int:
-        return _natural_key_idx(self._shape, key)
+        return natural_key_idx(self._shape, key)
     def is_cpt(self, tolerance=DEFAULT_CPT_TOLERANCE) -> bool:
         return True
@@ -2164,7 +2174,7 @@ class DensePotentialFunction(PotentialFunction):
         """
         Set the values of the potential function using the given iterator.
-        Mapping instances to *values is as follows:
+        Mapping instances to values is as follows:
             Given Factor(rv1, rv2) where rv1 has 2 states, and rv2 has 3 states:
             values[0] represents instance (0,0)
             values[1] represents instance (0,1)
@@ -2209,7 +2219,7 @@ class DensePotentialFunction(PotentialFunction):
         The order of values is the same as set_iter.
         Args:
-            *value: the values to use.
+            value: the values to use.
         Returns:
             self
@@ -2414,7 +2424,7 @@ class SparsePotentialFunction(PotentialFunction):
         """
         Set the values of the potential function using the given iterator.
-        Mapping instances to *values is as follows:
+        Mapping instances to values is as follows:
             Given Factor(rv1, rv2) where rv1 has 2 states, and rv2 has 3 states:
             values[0] represents instance (0,0)
             values[1] represents instance (0,1)
@@ -2636,7 +2646,7 @@ class CompactPotentialFunction(PotentialFunction):
         """
         Set the values of the potential function using the given iterator.
-        Mapping instances to *values is as follows:
+        Mapping instances to `values` is as follows:
             Given Factor(rv1, rv2) where rv1 has 2 states, and rv2 has 3 states:
             values[0] represents instance (0,0)
             values[1] represents instance (0,1)
@@ -2679,7 +2689,7 @@ class CompactPotentialFunction(PotentialFunction):
         The order of values is the same as set_iter.
         Args:
-            *value: the values to use.
+            value: the values to use.
         Returns:
             self
@@ -3071,7 +3081,8 @@ class CPTPotentialFunction(PotentialFunction):
         Calls self.set_cpd(parent_states, cpd) for each row (parent_states, cpd)
         in rows. Any unmentioned parent states will have zero probabilities.
-        Example usage, assuming three Boolean random variables:
+        Example usage, assuming three Boolean random variables::
             pgm.Factor(x, y, z).set_cpt().set(
                 # y  z    x[0] x[1]
                 ((0, 0), (0.1, 0.9)),
@@ -3079,9 +3090,9 @@ class CPTPotentialFunction(PotentialFunction):
                 ((1, 0), (0.1, 0.9)),
                 ((1, 1), (0.1, 0.9))
             )
         Args:
-            *rows: are tuples (key, cpd) used to set the potential function values.
+            rows: are tuples (key, cpd) used to set the potential function values.
         Raises:
             ValueError: if a CPD is not valid.
@@ -3105,7 +3116,7 @@ class CPTPotentialFunction(PotentialFunction):
         Any list entry may be None, indicating 'guaranteed zero' for the associated parent states.
         Args:
-            *cpds: are the CPDs used to set the potential function values.
+            cpds: are the CPDs used to set the potential function values.
         Raises:
             ValueError: if a CPD is not valid.
@@ -3355,26 +3366,7 @@ def rv_instances_as_indicators(*rvs: RandomVariable, flip: bool = False) -> Iter
     return _combos(rvs, flip=not flip)
-def _key_to_instance(key: Key) -> Instance:
-    """
-    Convert a key to an instance.
-    Args:
-        key: a key into a state space.
-    Returns:
-        A instance from the state space, as a tuple of state indexes, co-indexed with the given shape.
-    Assumes:
-        The key is valid for the implied state space.
-    """
-    if isinstance(key, int):
-        return (key,)
-    else:
-        return tuple(key)
-def _natural_key_idx(shape: Shape, key: Key) -> int:
+def natural_key_idx(shape: Shape, key: Key) -> int:
     """
     What is the natural index of the given key, assuming the given shape.
@@ -3400,6 +3392,25 @@ def _natural_key_idx(shape: Shape, key: Key) -> int:
     return result
+def _key_to_instance(key: Key) -> Instance:
+    """
+    Convert a key to an instance.
+    Args:
+        key: a key into a state space.
+    Returns:
+        A instance from the state space, as a tuple of state indexes, co-indexed with the given shape.
+    Assumes:
+        The key is valid for the implied state space.
+    """
+    if isinstance(key, int):
+        return (key,)
+    else:
+        return tuple(key)
 def _zero_space(shape: Shape) -> int:
     """
     Return the size of the zero space of the given shape. This is the number

ck/pgm_circuit/mpe_program.py CHANGED Viewed

@@ -228,10 +228,9 @@ class MPEProgram(ProgramWithSlotmap):
 class MPEResult:
     """
     An MPE result is the result of MPE inference.
-    Fields:
-        wmc: the weighted model count value of the MPE solution.
-        mpe: The MPE solution instance. If there are ties then this will just be once instance.
     """
     wmc: float
+    """the weighted model count value of the MPE solution."""
     mpe: Instance
+    """the MPE solution instance. If there are ties then this will just be once instance."""

ck/pgm_circuit/pgm_circuit.py CHANGED Viewed

@@ -16,33 +16,42 @@ class PGMCircuit:
     holds the values of the parameters. Specifically, given parameter id `param_id`, then
     `parameter_values[slot_map[param_id] - number_of_indicators]` is the value of the
     identified parameter as it was in the PGM.
-    Fields:
-        rvs: holds the random variables from the PGM as it was compiled, in order.
-        conditions: any conditions on `rvs` that were compiled into the circuit.
-        number_of_indicators: is the number of indicators in `rvs` which is
-            `sum(len(rv) for rv in rvs`. Specifically, `circuit.vars[i]` is the circuit variable
-            corresponding to the ith indicator, where `circuit` is `circuit_top.circuit` and
-            indicators are ordered as per `rvs`.
-        number_of_parameters: is the number of parameters from the PGM that are
-            represented as circuit variables. This may be zero if parameters from the PGM
-            were compiled as constants.
-        slot_map[x]: gives the index of the circuit variable corresponding to x,
-            where x is either a random variable indicator (Indicator) or a parameter id (ParamId).
     """
     rvs: Sequence[RandomVariable]
+    """holds the random variables from the PGM as it was compiled, in order."""
     conditions: Sequence[Indicator]
+    """any conditions on `rvs` that were compiled into the circuit."""
     circuit_top: CircuitNode
+    """the top circuit node defining the network function."""
     number_of_indicators: int
+    """
+    the number of indicators in `rvs` which is
+    `sum(len(rv) for rv in rvs`. Specifically, `circuit.vars[i]` is the circuit variable
+    corresponding to the ith indicator, where `circuit` is `circuit_top.circuit` and
+    indicators are ordered as per `rvs`.
+    """
     number_of_parameters: int
+    """
+    the number of parameters from the PGM that are
+    represented as circuit variables. This may be zero if parameters from the PGM
+    were compiled as constants.
+    """
     slot_map: SlotMap
+    """
+    gives the index of the circuit variable corresponding to x,
+    where x is either a random variable indicator (Indicator) or a parameter id (ParamId).
+    """
     parameter_values: NDArray
+    """
+    parameter values, co-indexed with the circuit variables, counting beyond `number_of_indicators`.
+    """
     def dump(self, *, prefix: str = '', indent: str = '    ') -> None:
         """

compiled-knowledge 4.0.0a24__cp313-cp313-win32.whl → 4.1.0a1__cp313-cp313-win32.whl

Potentially problematic release.

compiled-knowledge 4.0.0a24cp313-cp313-win32.whl → 4.1.0a1cp313-cp313-win32.whl