PyPI - compiled-knowledge - Versions diffs - 4.0.0a25__cp312-cp312-win_amd64.whl → 4.1.0a2__cp312-cp312-win_amd64.whl - Mend

compiled-knowledge 4.0.0a25__cp312-cp312-win_amd64.whl → 4.1.0a2__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of compiled-knowledge might be problematic. Click here for more details.

Files changed (31) hide show

ck/circuit/_circuit_cy.c +1 -1
ck/circuit/_circuit_cy.cp312-win_amd64.pyd +0 -0
ck/circuit_compiler/cython_vm_compiler/_compiler.c +152 -152
ck/circuit_compiler/cython_vm_compiler/_compiler.cp312-win_amd64.pyd +0 -0
ck/circuit_compiler/interpret_compiler.py +2 -2
ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.c +1 -1
ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.cp312-win_amd64.pyd +0 -0
ck/dataset/__init__.py +1 -0
ck/dataset/cross_table.py +270 -0
ck/dataset/cross_table_probabilities.py +53 -0
ck/dataset/dataset.py +594 -0
ck/dataset/dataset_builder.py +512 -0
ck/dataset/dataset_compute.py +140 -0
ck/dataset/dataset_from_crosstable.py +45 -0
ck/dataset/dataset_from_csv.py +151 -0
ck/dataset/sampled_dataset.py +96 -0
ck/learning/__init__.py +0 -0
ck/learning/train_generative.py +149 -0
ck/pgm.py +29 -27
ck/pgm_circuit/program_with_slotmap.py +23 -45
ck/pgm_circuit/support/compile_circuit.py +2 -4
ck/pgm_compiler/support/circuit_table/_circuit_table_cy.c +1 -1
ck/pgm_compiler/support/circuit_table/_circuit_table_cy.cp312-win_amd64.pyd +0 -0
ck/probability/empirical_probability_space.py +1 -0
ck_demos/dataset/__init__.py +0 -0
ck_demos/dataset/demo_dataset_builder.py +37 -0
{compiled_knowledge-4.0.0a25.dist-info → compiled_knowledge-4.1.0a2.dist-info}/METADATA +1 -1
{compiled_knowledge-4.0.0a25.dist-info → compiled_knowledge-4.1.0a2.dist-info}/RECORD +31 -18
{compiled_knowledge-4.0.0a25.dist-info → compiled_knowledge-4.1.0a2.dist-info}/WHEEL +0 -0
{compiled_knowledge-4.0.0a25.dist-info → compiled_knowledge-4.1.0a2.dist-info}/licenses/LICENSE.txt +0 -0
{compiled_knowledge-4.0.0a25.dist-info → compiled_knowledge-4.1.0a2.dist-info}/top_level.txt +0 -0

ck/dataset/dataset_from_csv.py ADDED Viewed

@@ -0,0 +1,151 @@
+from typing import Iterable, List, Sequence, Optional
+from ck.dataset import HardDataset
+from ck.pgm import RandomVariable
+def hard_dataset_from_csv(
+        rvs: Iterable[RandomVariable],
+        lines: Iterable[str],
+        *,
+        weights: Optional[int | str] = None,
+        sep: Optional[str] = ',',
+        comment: str = '#',
+) -> HardDataset:
+    """
+    Interpret the given sequence of lines as CSV for a HardDataset.
+    Each line is a list of state indexes (ints) separated by `sep`.
+    Every line should have the same number of values.
+    If the first line contains a non-integer value, then the first
+    line will be interpreted as a header line.
+    If there is no header line, then the values will be interpreted in the
+    same order as `rvs` and the number of values on each line should be
+    the same as the number of random variables in `rvs`.
+    If there is a header line, then it will be interpreted as the order
+    of random variables. There must be a column name in the header to match
+    each name of the given random variables. Additional columns will be ignored.
+    Leading and trailing whitespace is ignored for each field, including header column names.
+    As text file (and StringIO) objects are iterable over lines, here is how to read a csv file:
+    ```
+        with open(csv_filename, 'r') as file:
+            hard_dataset_from_csv(rvs, file)
+    ```
+    Here is an example to read from a csv string:
+    ```
+        hard_dataset_from_csv(rvs, csv_string.splitlines())
+    ```
+    Args:
+        rvs: the random variables for the returned dataset.
+        lines: the sequence of lines to interpret, each line is an instance in the dataset.
+        weights: the column in the csv file holding instance weights. Can be either the
+            column number (counting from zero) or a column name (requires a header line).
+        sep: the string to use to separate values in a line, default is a comma.
+            If set to `None`, lines will be split on any consecutive run of whitespace characters
+            (including \n \r \t \f and spaces).
+        comment: text starting with this will be treated as a comment. Set to '' to disallow comments.
+    Returns:
+        a HardDataset.
+    Raises:
+        ValueError: if the lines do not conform to a CSV format.
+    """
+    rvs: Sequence[RandomVariable] = tuple(rvs)
+    # Define `clean_line` being sensitive to comments.
+    if len(comment) > 0:
+        def clean_line(l: str) -> str:
+            i = l.find(comment)
+            if i >= 0:
+                l = l[:i]
+            return l.strip()
+    else:
+        def clean_line(l: str) -> str:
+            return l.strip()
+    # Get the first line which may be a header line or data line
+    it = iter(lines)
+    try:
+        while True:
+            line = clean_line(next(it))
+            if len(line) > 0:
+                break
+    except StopIteration:
+        # Empty dataset with the given random variables
+        return HardDataset((rv, []) for rv in rvs)
+    values: List[str] = [value.strip() for value in line.split(sep)]
+    number_of_columns: int = len(values)
+    series: List[List[int]]  # series[dataset-column] = list of values
+    weight_series: Optional[List[float]] = None
+    column_map: List[int]  # column_map[dataset-column] = input-column
+    if all(_is_number(value) for value in values):
+        # First line is not a header line
+        if weights is None:
+            if number_of_columns != len(rvs):
+                raise ValueError('number of columns does not match number of random variables')
+            column_map = list(range(len(rvs)))
+        else:
+            if number_of_columns != len(rvs) + 1:
+                raise ValueError('number of columns does not match number of random variables and weight column')
+            if not isinstance(weights, int):
+                raise ValueError('no header detected - `weights` must be a column number')
+            if not (-number_of_columns <= weights < number_of_columns):
+                raise ValueError('`weights` column number out of range')
+            column_map = list(range(len(rvs) + 1))
+            column_map.pop(weights)
+        # Initialise series with the first line of data
+        series = [[int(values[i])] for i in column_map]
+        if weights is not None:
+            weight_series = [float(values[weights])]
+    else:
+        # First line is a header line
+        # Lookup each random variable to find its column
+        column_map = [
+            values.index(rv.name)  # will raise ValueError if not found
+            for rv in rvs
+        ]
+        if isinstance(weights, str):
+            # Convert weights column name to column number
+            weights: int = values.index(weights)  # will raise ValueError if not found
+        elif isinstance(weights, int) and not (number_of_columns <= weights < number_of_columns):
+            raise ValueError('`weights` column number out of range')
+        # Initialise each series as empty
+        series = [[] for _ in rvs]
+        if weights is not None:
+            weight_series = []
+    # Read remaining data lines
+    for line in it:
+        line = clean_line(line)
+        if len(line) == 0:
+            continue
+        if len(values) != number_of_columns:
+            raise ValueError('number of values does not match number of columns')
+        values = line.split(sep)
+        for series_i, i in zip(series, column_map):
+            series_i.append(int(values[i]))
+        if weights is not None:
+            weight_series.append(float(values[weights]))
+    # Construct the dataset
+    return HardDataset(zip(rvs, series), weights=weight_series)
+def _is_number(s: str) -> bool:
+    try:
+        float(s)
+        return True
+    except ValueError:
+        return False

ck/dataset/sampled_dataset.py ADDED Viewed

@@ -0,0 +1,96 @@
+import random
+from dataclasses import dataclass
+from typing import Sequence, List, Iterator, Tuple, Dict
+import numpy as np
+from ck.dataset import HardDataset
+from ck.dataset.cross_table import CrossTable
+from ck.pgm import RandomVariable, Instance
+from ck.sampling.sampler import Sampler
+from ck.utils.np_extras import dtype_for_number_of_states, NDArray
+from ck.utils.random_extras import Random
+def dataset_from_sampler(sampler: Sampler, length: int) -> HardDataset:
+    """
+    Create a hard dataset using samples from a sampler.
+    Args:
+        sampler: A sampler which defined the random variables and provides samples.
+        length: The length of the dataset to create.
+    Returns:
+        A HardDataset of the given length.
+    """
+    rvs: Sequence[RandomVariable] = sampler.rvs
+    columns: List[NDArray] = [
+        np.zeros(length, dtype=dtype_for_number_of_states(len(rv)))
+        for rv in rvs
+    ]
+    for i, instance in enumerate(sampler.take(length)):
+        for column, state in zip(columns, instance):
+            column[i] = state
+    return HardDataset(zip(rvs, columns))
+class CrossTableSampler(Sampler):
+    def __init__(self, crosstab: CrossTable, rand: Random = random):
+        """
+        Adapt a cross table to a sampler.
+        Instances will be drawn from the sampler according to their
+        weight in the given cross-table. If the given cross-table is
+        modified after constructing the sampler, the sampler will not
+        be affected.
+        """
+        if len(crosstab) == 0:
+            raise ValueError('no instances to sample')
+        super().__init__(rvs=crosstab.rvs, condition=())
+        # Group instances by weight.
+        # We do this in anticipation that it makes sampling more efficient.
+        weight_groups: Dict[float, _WeightGroup] = {}
+        for instance, weight in crosstab.items():
+            weight_group = weight_groups.get(weight)
+            if weight_group is None:
+                weight_groups[weight] = _WeightGroup(weight, weight, [instance])
+            else:
+                weight_group.append(instance)
+        self._weight_groups: List[_WeightGroup] = list(weight_groups.values())
+        self._total_weight = sum(group.total for group in weight_groups.values())
+        self._rand = rand
+    def __iter__(self) -> Iterator[Instance]:
+        while True:
+            # This code performs inverse transform sampling
+            r: float = self._rand.random() * self._total_weight
+            # This does a serial search to find the weight group.
+            # This is efficient for small numbers of groups, but this may be
+            # improved for large numbers of groups.
+            it = iter(self._weight_groups)
+            group = next(it)
+            while r >= group.total:
+                r -= group.total
+                group = next(it)
+            # Pick an instance in the group
+            i = int(r / group.weight)
+            yield group.instances[i]
+@dataclass
+class _WeightGroup:
+    """
+    Support for CrossTableSampler.
+    """
+    weight: float
+    total: float
+    instances: List[Tuple[int, ...]]
+    def append(self, instance: Tuple[int, ...]) -> None:
+        self.total += self.weight
+        self.instances.append(instance)

ck/learning/__init__.py ADDED Viewed

File without changes

ck/learning/train_generative.py ADDED Viewed

@@ -0,0 +1,149 @@
+from dataclasses import dataclass
+from typing import Dict, Tuple, List
+import numpy as np
+from ck.dataset import SoftDataset, HardDataset
+from ck.dataset.cross_table import CrossTable, cross_table_from_dataset
+from ck.pgm import PGM, Instance, DensePotentialFunction, Shape, natural_key_idx, SparsePotentialFunction
+from ck.utils.iter_extras import multiply
+from ck.utils.np_extras import NDArrayFloat64
+@dataclass
+class ParameterValues:
+    """
+    A ParameterValues object represents learned parameter values of a PGM.
+    """
+    pgm: PGM
+    """
+    The PGM that the parameter values pertains to.
+    """
+    cpts: List[Dict[Instance, NDArrayFloat64]]
+    """
+    A list of CPTs co-indexed with `pgm.factors`. Each CPT is a dict
+    mapping from instances of the parent random variables (of the factors)
+    to the child conditional probability distribution (CPD).
+    """
+    def set_zero(self) -> None:
+        """
+        Set the potential function of each PGM factor to zero.
+        """
+        for factor in self.pgm.factors:
+            factor.set_zero()
+    def set_cpt(self) -> None:
+        """
+        Set the potential function of each PGM factor to a CPTPotentialFunction,
+        using our parameter values.
+        """
+        for factor, cpt in zip(self.pgm.factors, self.cpts):
+            factor.set_cpt().set(*cpt.items())
+    def set_dense(self) -> None:
+        """
+        Set the potential function of each PGM factor to a DensePotentialFunction,
+        using our parameter values.
+        """
+        for factor, cpt in zip(self.pgm.factors, self.cpts):
+            pot_function: DensePotentialFunction = factor.set_dense()
+            parent_shape: Shape = factor.shape[1:]
+            child_state: int
+            value: float
+            if len(parent_shape) == 0:
+                cpd: NDArrayFloat64 = cpt[()]
+                for child_state, value in enumerate(cpd):
+                    pot_function[child_state] = value
+            else:
+                parent_space: int = multiply(parent_shape)
+                parent_states: Instance
+                cpd: NDArrayFloat64
+                for parent_states, cpd in cpt.items():
+                    idx: int = natural_key_idx(parent_shape, parent_states)
+                    for value in cpd:
+                        pot_function[idx] = value
+                        idx += parent_space
+    def set_sparse(self) -> None:
+        """
+        Set the potential function of each PGM factor to a SparsePotentialFunction,
+        using our parameter values.
+        """
+        for factor, cpt in zip(self.pgm.factors, self.cpts):
+            pot_function: SparsePotentialFunction = factor.set_sparse()
+            parent_states: Instance
+            child_state: int
+            cpd: NDArrayFloat64
+            value: float
+            for parent_states, cpd in cpt.items():
+                for child_state, value in enumerate(cpd):
+                    key = (child_state,) + parent_states
+                    pot_function[key] = value
+def train_generative_bn(
+        pgm: PGM,
+        dataset: HardDataset | SoftDataset,
+        *,
+        dirichlet_prior: float = 0,
+        check_bayesian_network: bool = True,
+) -> ParameterValues:
+    """
+    Maximum-likelihood, generative training for a Bayesian network.
+    Args:
+        pgm: the probabilistic graphical model defining the model structure.
+            Potential function values are ignored and need not be set.
+        dataset: a dataset of random variable states.
+        dirichlet_prior: a real number >= 0. See `CrossTable` for an explanation.
+        check_bayesian_network: if true and not pgm.is_structure_bayesian an exception will be raised.
+    Returns:
+        a  ParameterValues object that can be used to update the parameters of the given PGM.
+    Raises:
+        ValueError: if the given PGM does not have a Bayesian network structure, and check_bayesian_network is True.
+    """
+    if check_bayesian_network and not pgm.is_structure_bayesian:
+        raise ValueError('the given PGM is not a Bayesian network')
+    cpts: List[Dict[Instance, NDArrayFloat64]] = [
+        cpt_from_crosstab(cross_table_from_dataset(dataset, factor.rvs, dirichlet_prior=dirichlet_prior))
+        for factor in pgm.factors
+    ]
+    return ParameterValues(pgm, cpts)
+def cpt_from_crosstab(crosstab: CrossTable) -> Dict[Instance, NDArrayFloat64]:
+    """
+    Make a conditional probability table (CPT) from a cross-table.
+    Args:
+        crosstab: a CrossTable representing the weight of unique instances.
+    Returns:
+        a mapping from instances of the parent random variables to the child
+        conditional probability distribution (CPD).
+    Assumes:
+        the first random variable in `crosstab.rvs` is the child random variable.
+    """
+    # Number of states for the child random variable.
+    child_size: int = len(crosstab.rvs[0])
+    # Get distribution over child states for seen parent states
+    parents_weights: Dict[Instance, NDArrayFloat64] = {}
+    for state, weight in crosstab.items():
+        parent_state: Tuple[int, ...] = state[1:]
+        child_state: int = state[0]
+        parent_weights = parents_weights.get(parent_state)
+        if parent_weights is None:
+            parents_weights[parent_state] = parent_weights = np.zeros(child_size, dtype=np.float64)
+        parent_weights[child_state] += weight
+    # Normalise
+    for parent_state, parent_weights in parents_weights.items():
+        parent_weights /= parent_weights.sum()
+    return parents_weights

ck/pgm.py CHANGED Viewed

@@ -19,16 +19,18 @@ State: TypeAlias = Union[int, str, bool, float, None]
 The type for a possible state of a random variable.
 """
-Instance: TypeAlias = Sequence[int]
+Instance: TypeAlias = Tuple[int, ...]
 """
-An instance (of a sequence of random variables) is a sequence of integers
+An instance (of a sequence of random variables) is a tuple of integers
 that are state indexes, co-indexed with a known sequence of random variables.
 """
-Key: TypeAlias = Union[Instance, int]
+Key: TypeAlias = Union[Sequence[int], int]
 """
-A key identifies an instance, either as an instance itself or a
-single integer, representing an instance with one dimension.
+A key identifies an instance, either as a sequence of integers or a
+single integer. The integers are state indexes, co-indexed with a known
+sequence of random variables. A single integer represents an instance with
+one dimension.
 """
 Shape: TypeAlias = Sequence[int]
@@ -1871,7 +1873,7 @@ class PotentialFunction(ABC):
             a hypothetical parameter index assuming that every valid key has a unique parameter
             as per DensePotentialFunction.
         """
-        return _natural_key_idx(self._shape, key)
+        return natural_key_idx(self._shape, key)
     def param_id(self, param_idx: int) -> ParamId:
         """
@@ -2029,7 +2031,7 @@ class ZeroPotentialFunction(PotentialFunction):
         return 0
     def param_idx(self, key: Key) -> int:
-        return _natural_key_idx(self._shape, key)
+        return natural_key_idx(self._shape, key)
     def is_cpt(self, tolerance=DEFAULT_CPT_TOLERANCE) -> bool:
         return True
@@ -3364,26 +3366,7 @@ def rv_instances_as_indicators(*rvs: RandomVariable, flip: bool = False) -> Iter
     return _combos(rvs, flip=not flip)
-def _key_to_instance(key: Key) -> Instance:
-    """
-    Convert a key to an instance.
-    Args:
-        key: a key into a state space.
-    Returns:
-        A instance from the state space, as a tuple of state indexes, co-indexed with the given shape.
-    Assumes:
-        The key is valid for the implied state space.
-    """
-    if isinstance(key, int):
-        return (key,)
-    else:
-        return tuple(key)
-def _natural_key_idx(shape: Shape, key: Key) -> int:
+def natural_key_idx(shape: Shape, key: Key) -> int:
     """
     What is the natural index of the given key, assuming the given shape.
@@ -3409,6 +3392,25 @@ def _natural_key_idx(shape: Shape, key: Key) -> int:
     return result
+def _key_to_instance(key: Key) -> Instance:
+    """
+    Convert a key to an instance.
+    Args:
+        key: a key into a state space.
+    Returns:
+        A instance from the state space, as a tuple of state indexes, co-indexed with the given shape.
+    Assumes:
+        The key is valid for the implied state space.
+    """
+    if isinstance(key, int):
+        return (key,)
+    else:
+        return tuple(key)
 def _zero_space(shape: Shape) -> int:
     """
     Return the size of the zero space of the given shape. This is the number

ck/pgm_circuit/program_with_slotmap.py CHANGED Viewed

@@ -1,6 +1,8 @@
-from typing import Tuple, Sequence, Dict, Iterable
+from typing import Tuple, Sequence, Dict
-from ck.pgm import RandomVariable, rv_instances, Instance, rv_instances_as_indicators, Indicator, ParamId
+import numpy as np
+from ck.pgm import RandomVariable, Indicator, ParamId
 from ck.pgm_circuit.slot_map import SlotMap, SlotKey
 from ck.probability.probability_space import Condition, check_condition
 from ck.program.program_buffer import ProgramBuffer
@@ -69,40 +71,6 @@ class ProgramWithSlotmap:
     def slot_map(self) -> SlotMap:
         return self._slot_map
-    def instances(self, flip: bool = False) -> Iterable[Instance]:
-        """
-        Enumerate instances of the random variables.
-        Each instance is a tuples of state indexes, co-indexed with the given random variables.
-        The order is the natural index order (i.e., last random variable changing most quickly).
-        Args:
-            flip: if true, then first random variable changes most quickly.
-        Returns:
-            an iteration over tuples, each tuple holds state indexes
-            co-indexed with the given random variables.
-        """
-        return rv_instances(*self._rvs, flip=flip)
-    def instances_as_indicators(self, flip: bool = False) -> Iterable[Sequence[Indicator]]:
-        """
-        Enumerate instances of the random variables.
-        Each instance is a tuples of indicators, co-indexed with the given random variables.
-        The order is the natural index order (i.e., last random variable changing most quickly).
-        Args:
-            flip: if true, then first random variable changes most quickly.
-        Returns:
-            an iteration over tuples, each tuples holds random variable indicators
-            co-indexed with the given random variables.
-        """
-        return rv_instances_as_indicators(*self._rvs, flip=flip)
     def compute(self) -> NDArrayNumeric:
         """
         Execute the program to compute and return the result. As per `ProgramBuffer.compute`.
@@ -146,29 +114,36 @@ class ProgramWithSlotmap:
         """
         return self._program_buffer.vars
-    def __setitem__(self, item: int | slice | SlotKey | Iterable[SlotKey], value: float) -> None:
+    def __setitem__(self, item: int | slice | SlotKey | RandomVariable, value: float) -> None:
         """
-        Set one or more input slot values, identified by slot keys.
+        Set input slot value/s.
         """
         if isinstance(item, (int, slice)):
             self._program_buffer[item] = value
         elif isinstance(item, (Indicator, ParamId)):
             self._program_buffer[self._slot_map[item]] = value
+        elif isinstance(item, RandomVariable):
+            for ind in item:
+                self._program_buffer[self._slot_map[ind]] = value
         else:
-            # Assume its iterable
-            for i in item:
-                self[i] = value
+            raise IndexError(f'unknown index type: {type(item)}')
-    def __getitem__(self, item: int | slice | SlotKey) -> NDArrayNumeric:
+    def __getitem__(self, item: int | slice | SlotKey | RandomVariable) -> NDArrayNumeric:
         """
-        Get an input slot value, identified by a slot key.
+        Get input slot value/s.
         """
         if isinstance(item, (int, slice)):
             return self._program_buffer[item]
         elif isinstance(item, (Indicator, ParamId)):
             return self._program_buffer[self._slot_map[item]]
+        elif isinstance(item, RandomVariable):
+            return np.fromiter(
+                (self._program_buffer[self._slot_map[ind]] for ind in item),
+                dtype=self._program_buffer.dtype,
+                count=len(item)
+            )
         else:
-            raise IndexError('unknown index type')
+            raise IndexError(f'unknown index type: {type(item)}')
     def set_condition(self, *condition: Condition) -> None:
         """
@@ -211,7 +186,10 @@ class ProgramWithSlotmap:
         Args:
             rv: a random variable whose indicators are in the slot map.
-            values: list of values, assumes len(values) == len(rv).
+            values: list of values
+        Assumes:
+            len(values) == len(rv).
         """
         for i in range(len(rv)):
             self[rv[i]] = values[i]

ck/pgm_circuit/support/compile_circuit.py CHANGED Viewed

@@ -30,11 +30,9 @@ def compile_results(
         a compiled RawProgram.
     """
     circuit: Circuit = pgm_circuit.circuit_top.circuit
-    if const_parameters:
-        parameter_values = pgm_circuit.parameter_values
-        number_of_indicators = pgm_circuit.number_of_indicators
+    if const_parameters and len(pgm_circuit.parameter_values) > 0:
         with TmpConst(circuit) as tmp:
-            for slot, value in enumerate(parameter_values, start=number_of_indicators):
+            for slot, value in enumerate(pgm_circuit.parameter_values, start=pgm_circuit.number_of_indicators):
                 tmp.set_const(slot, value)
             raw_program: RawProgram = compiler(*results, circuit=circuit)
     else:

ck/pgm_compiler/support/circuit_table/_circuit_table_cy.c CHANGED Viewed

@@ -13,7 +13,7 @@
             "/O2"
         ],
         "include_dirs": [
-            "C:\\Users\\runneradmin\\AppData\\Local\\Temp\\build-env-8cwjzb8_\\Lib\\site-packages\\numpy\\_core\\include"
+            "C:\\Users\\runneradmin\\AppData\\Local\\Temp\\build-env-zvpv36cx\\Lib\\site-packages\\numpy\\_core\\include"
         ],
         "name": "ck.pgm_compiler.support.circuit_table._circuit_table_cy",
         "sources": [

ck/pgm_compiler/support/circuit_table/_circuit_table_cy.cp312-win_amd64.pyd CHANGED Viewed

Binary file

ck/probability/empirical_probability_space.py CHANGED Viewed

@@ -11,6 +11,7 @@ class EmpiricalProbabilitySpace(ProbabilitySpace):
         Note that this is not necessarily an efficient approach to calculating probabilities and statistics.
         This probability space treats each of the samples as equally weighted.
+        For a probability space over unequally weighted samples, consider using `CrossTableProbabilitySpace`.
         Assumes:
             len(sample) == len(rvs), for each sample in samples.

ck_demos/dataset/__init__.py ADDED Viewed

File without changes

ck_demos/dataset/demo_dataset_builder.py ADDED Viewed

@@ -0,0 +1,37 @@
+from ck.dataset import HardDataset, SoftDataset
+from ck.dataset.dataset_builder import DatasetBuilder, soft_dataset_from_builder, hard_dataset_from_builder
+from ck.pgm import PGM
+def main() -> None:
+    pgm = PGM()
+    x = pgm.new_rv('x', (True, False))
+    y = pgm.new_rv('y', ('yes', 'no', 'maybe'))
+    builder = DatasetBuilder([x, y])
+    builder.append()
+    builder.append(1, 2).weight = 3
+    builder.append(None, [0.7, 0.1, 0.2])
+    builder.append().set_states(True, 'maybe')
+    print('DatasetBuilder dump')
+    builder.dump()
+    print()
+    print('DatasetBuilder dump, showing states and custom missing values')
+    builder.dump(as_states=True, missing='?')
+    print()
+    print('HardDataset dump')
+    dataset: HardDataset = hard_dataset_from_builder(builder, missing=99)
+    dataset.dump()
+    print()
+    print('SoftDataset dump')
+    dataset: SoftDataset = soft_dataset_from_builder(builder)
+    dataset.dump()
+    print()
+if __name__ == '__main__':
+    main()

{compiled_knowledge-4.0.0a25.dist-info → compiled_knowledge-4.1.0a2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: compiled-knowledge
-Version: 4.0.0a25
+Version: 4.1.0a2
 Summary: A Python package for compiling and querying discrete probabilistic graphical models.
 Author-email: Barry Drake <barry@compiledknowledge.org>
 License-Expression: MIT