PyPI - compiled-knowledge - Versions diffs - 4.0.0a24__cp313-cp313-macosx_11_0_arm64.whl → 4.1.0__cp313-cp313-macosx_11_0_arm64.whl - Mend

compiled-knowledge 4.0.0a24__cp313-cp313-macosx_11_0_arm64.whl → 4.1.0__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of compiled-knowledge might be problematic. Click here for more details.

Files changed (58) hide show

ck/circuit/_circuit_cy.c +1 -1
ck/circuit/_circuit_cy.cpython-313-darwin.so +0 -0
ck/circuit/tmp_const.py +5 -4
ck/circuit_compiler/cython_vm_compiler/_compiler.c +152 -152
ck/circuit_compiler/cython_vm_compiler/_compiler.cpython-313-darwin.so +0 -0
ck/circuit_compiler/interpret_compiler.py +2 -2
ck/circuit_compiler/llvm_compiler.py +4 -4
ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.c +1 -1
ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.cpython-313-darwin.so +0 -0
ck/circuit_compiler/support/input_vars.py +4 -4
ck/circuit_compiler/support/llvm_ir_function.py +4 -4
ck/dataset/__init__.py +1 -0
ck/dataset/cross_table.py +334 -0
ck/dataset/dataset.py +682 -0
ck/dataset/dataset_builder.py +519 -0
ck/dataset/dataset_compute.py +140 -0
ck/dataset/dataset_from_crosstable.py +64 -0
ck/dataset/dataset_from_csv.py +151 -0
ck/dataset/sampled_dataset.py +96 -0
ck/example/diamond_square.py +3 -1
ck/example/triangle_square.py +3 -1
ck/example/truss.py +3 -1
ck/in_out/parse_net.py +21 -19
ck/in_out/parser_utils.py +7 -3
ck/learning/__init__.py +0 -0
ck/learning/coalesce_cross_tables.py +403 -0
ck/learning/model_from_cross_tables.py +296 -0
ck/learning/parameters.py +117 -0
ck/learning/train_generative_bn.py +198 -0
ck/pgm.py +105 -92
ck/pgm_circuit/marginals_program.py +5 -0
ck/pgm_circuit/mpe_program.py +3 -4
ck/pgm_circuit/pgm_circuit.py +27 -18
ck/pgm_circuit/program_with_slotmap.py +27 -46
ck/pgm_circuit/support/compile_circuit.py +2 -4
ck/pgm_circuit/wmc_program.py +5 -0
ck/pgm_compiler/support/circuit_table/_circuit_table_cy.c +1 -1
ck/pgm_compiler/support/circuit_table/_circuit_table_cy.cpython-313-darwin.so +0 -0
ck/probability/cross_table_probability_space.py +53 -0
ck/probability/divergence.py +226 -0
ck/probability/empirical_probability_space.py +1 -0
ck/probability/probability_space.py +53 -30
ck/program/raw_program.py +23 -16
ck/sampling/sampler_support.py +5 -6
ck/utils/iter_extras.py +3 -2
ck/utils/local_config.py +16 -8
ck_demos/dataset/__init__.py +0 -0
ck_demos/dataset/demo_dataset_builder.py +37 -0
ck_demos/dataset/demo_dataset_from_sampler.py +18 -0
ck_demos/learning/__init__.py +0 -0
ck_demos/learning/demo_bayesian_network_from_cross_tables.py +70 -0
ck_demos/learning/demo_simple_learning.py +55 -0
ck_demos/sampling/demo_wmc_direct_sampler.py +2 -2
{compiled_knowledge-4.0.0a24.dist-info → compiled_knowledge-4.1.0.dist-info}/METADATA +2 -1
{compiled_knowledge-4.0.0a24.dist-info → compiled_knowledge-4.1.0.dist-info}/RECORD +58 -37
{compiled_knowledge-4.0.0a24.dist-info → compiled_knowledge-4.1.0.dist-info}/WHEEL +0 -0
{compiled_knowledge-4.0.0a24.dist-info → compiled_knowledge-4.1.0.dist-info}/licenses/LICENSE.txt +0 -0
{compiled_knowledge-4.0.0a24.dist-info → compiled_knowledge-4.1.0.dist-info}/top_level.txt +0 -0

ck/probability/cross_table_probability_space.py ADDED Viewed

@@ -0,0 +1,53 @@
+from typing import Sequence, Tuple, Dict
+from ck.dataset.cross_table import CrossTable, Instance
+from ck.pgm import RandomVariable, Indicator
+from ck.probability.probability_space import ProbabilitySpace, Condition, check_condition
+class CrossTableProbabilitySpace(ProbabilitySpace):
+    def __init__(self, cross_table: CrossTable):
+        """
+        Enable probabilistic queries over a sample from a sample space.
+        Note that this is not necessarily an efficient approach to calculating probabilities and statistics.
+        Args:
+            cross_table: a CrossTable to adapt to a ProbabilitySpace.
+        """
+        self._cross_table: CrossTable = cross_table
+        self._rv_idx_to_sample_idx: Dict[int, int] = {
+            rv.idx: i
+            for i, rv in enumerate(cross_table.rvs)
+        }
+    @property
+    def rvs(self) -> Sequence[RandomVariable]:
+        return self._cross_table.rvs
+    def wmc(self, *condition: Condition) -> float:
+        condition: Tuple[Indicator, ...] = check_condition(condition)
+        rvs: Sequence[RandomVariable] = self._cross_table.rvs
+        checks = [set() for _ in rvs]
+        for ind in condition:
+            checks[self._rv_idx_to_sample_idx[ind.rv_idx]].add(ind.state_idx)
+        for i in range(len(checks)):
+            if len(checks[i]) > 0:
+                checks[i] = set(range(len(rvs[i]))).difference(checks[i])
+        def satisfied(item: Tuple[Instance, float]) -> float:
+            """
+            Return the weight of the instance, if the instance satisfies
+            the condition, else return 0.
+            """
+            instance, weight = item
+            if any((state in check) for state, check in zip(instance, checks)):
+                return 0
+            else:
+                return weight
+        return sum(map(satisfied, self._cross_table.items()))
+    @property
+    def z(self) -> float:
+        return self._cross_table.total_weight()

ck/probability/divergence.py ADDED Viewed

@@ -0,0 +1,226 @@
+"""
+This module implements several divergences which measure the difference
+between two distributions.
+"""
+import math
+from typing import Sequence
+import numpy as np
+from ck.pgm import RandomVariable, rv_instances_as_indicators, PGM
+from ck.probability.probability_space import ProbabilitySpace
+_NAN: float = np.nan  # Not-a-number (i.e., the result of an invalid calculation).
+def kl(p: ProbabilitySpace, q: ProbabilitySpace) -> float:
+    """
+    Compute the Kullback-Leibler divergence between p & q,
+    where p is the true distribution.
+    This implementation uses logarithms, base 2.
+    Args:
+        p: a probability space to compare to.
+        q: the other probability space.
+    Returns:
+        the Kullback–Leibler (KL) divergence of p & q, where p is
+        the true distribution.
+    Raises:
+        ValueError: if `p` and `q` do not have compatible random variables.specifically:
+            * `len(self.rvs) == len(other.rvs)`
+            * `len(other.rvs[i]) == len(self.rvs[i])` for all `i`
+            * `other.rvs[i].idx == self.rvs[i].idx` for all `i`.
+    Warning:
+        this method will enumerate the whole probability space.
+    """
+    if not _compatible_rvs(p.rvs, q.rvs):
+        raise ValueError('incompatible random variables')
+    total = 0.0
+    for x in rv_instances_as_indicators(*p.rvs):
+        p_x = p.probability(*x)
+        q_x = q.probability(*x)
+        if p_x <= 0 or q_x <= 0:
+            return _NAN
+        total += p_x * math.log2(p_x / q_x)
+    return total
+def pseudo_kl(p: ProbabilitySpace, q: ProbabilitySpace) -> float:
+    """
+    A kind of KL divergence, factored by the structure of `p`.
+    This is an experimental measure.
+    This implementation uses logarithms, base 2.
+    Args:
+        p: a probability space to compare to.
+        q: the other probability space.
+    Returns:
+        the factored histogram intersection between the two probability spaces.
+    Raises:
+        ValueError: if `p` and `q` do not have compatible random variables.specifically:
+            * `len(self.rvs) == len(other.rvs)`
+            * `len(other.rvs[i]) == len(self.rvs[i])` for all `i`
+            * `other.rvs[i].idx == self.rvs[i].idx` for all `i`.
+        ValueError: if not all random variable of `p` are from a single PGM, which must
+            have a Bayesian network structure.
+    """
+    p_rvs: Sequence[RandomVariable] = p.rvs
+    q_rvs: Sequence[RandomVariable] = q.rvs
+    if not _compatible_rvs(p_rvs, q_rvs):
+        raise ValueError('incompatible random variables')
+    if len(p_rvs) == 0:
+        return _NAN
+    pgm: PGM = p_rvs[0].pgm
+    if any(rv.pgm is not pgm for rv in p_rvs):
+        raise ValueError('p random variables are not from a single PGM.')
+    if not pgm.is_structure_bayesian:
+        raise ValueError('p does not have Bayesian network structure.')
+    # Across the two spaces, corresponding random variables are equivalent;
+    # i.e., same number of states and same `idx` values. Therefore,
+    # indicators from either one space can be used in both spaces.
+    total: float = 0
+    for factor in pgm.factors:
+        for x in rv_instances_as_indicators(*factor.rvs):  # every possible state of factor rvs
+            p_x = p.probability(*x)
+            q_x = q.probability(*x)
+            if p_x <= 0 or q_x <= 0:
+                return _NAN
+            total += p_x * math.log2(p_x / q_x)
+    return total
+def hi(p: ProbabilitySpace, q: ProbabilitySpace) -> float:
+    """
+    Compute the histogram intersection between this probability spaces and the given other.
+    The histogram intersection between two probability spaces P and Q,
+    with state spaces X, is defined as:
+        HI(P, Q) = sum(min(P(x), Q(x)) for x in X)
+    Args:
+        p: a probability space to compare to.
+        q: the other probability space.
+    Returns:
+        the histogram intersection between the two probability spaces.
+    Raises:
+        ValueError: if `p` and `q` do not have compatible random variables.specifically:
+            * `len(self.rvs) == len(other.rvs)`
+            * `len(other.rvs[i]) == len(self.rvs[i])` for all `i`
+            * `other.rvs[i].idx == self.rvs[i].idx` for all `i`.
+    Warning:
+        this method will enumerate the whole probability space.
+    """
+    p_rvs: Sequence[RandomVariable] = p.rvs
+    q_rvs: Sequence[RandomVariable] = q.rvs
+    if not _compatible_rvs(p_rvs, q_rvs):
+        raise ValueError('incompatible random variables')
+    # Across the two spaces, corresponding random variables are equivalent;
+    # i.e., same number of states and same `idx` values. Therefore,
+    # indicators from either one space can be used in both spaces.
+    return sum(
+        min(p.probability(*x), q.probability(*x))
+        for x in rv_instances_as_indicators(*p_rvs)
+    )
+def fhi(p: ProbabilitySpace, q: ProbabilitySpace) -> float:
+    """
+    Compute the factored histogram intersection between this probability spaces and the given other.
+    The factored histogram intersection between two probability spaces P and Q,
+    with state spaces X and factorisation F, is defined as:
+        FHI(P, Q) = 1/n sum(P(Y=y) CHI(P, Q, X | Y=y)
+        where:
+            CHI(P, Q, X | Y=y) = HI(P(X | Y=y), Q(X | Y=y))
+            HI(P, Q) = sum(min(P(X=x), Q(X=x)) for x in f)
+    The value of _n_ is the sum ofP(Y=y) over all CPT rows. However,
+    this always equals the number of CPTs, i.e., the number of random
+    variables.
+    The factorisation F is taken from the `p`.
+    For more information about factored histogram intersection, see the publication:
+    Suresh, S., Drake, B. (2025). Sampling of Large Probabilistic Graphical Models
+    Using Arithmetic Circuits. AI 2024: Advances in Artificial Intelligence. AI 2024.
+    Lecture Notes in Computer Science, vol 15443. https://doi.org/10.1007/978-981-96-0351-0_13.
+    Args:
+        p: a probability space to compare to.
+        q: the other probability space.
+    Returns:
+        the factored histogram intersection between the two probability spaces.
+    Raises:
+        ValueError: if `p` and `q` do not have compatible random variables.specifically:
+            * `len(self.rvs) == len(other.rvs)`
+            * `len(other.rvs[i]) == len(self.rvs[i])` for all `i`
+            * `other.rvs[i].idx == self.rvs[i].idx` for all `i`.
+        ValueError: if not all random variable of `p` are from a single PGM, which must
+            have a Bayesian network structure.
+    """
+    p_rvs: Sequence[RandomVariable] = p.rvs
+    q_rvs: Sequence[RandomVariable] = q.rvs
+    if not _compatible_rvs(p_rvs, q_rvs):
+        raise ValueError('incompatible random variables')
+    if len(p_rvs) == 0:
+        return 0
+    pgm: PGM = p_rvs[0].pgm
+    if any(rv.pgm is not pgm for rv in p_rvs):
+        raise ValueError('p random variables are not from a single PGM.')
+    if not pgm.is_structure_bayesian:
+        raise ValueError('p does not have Bayesian network structure.')
+    # Across the two spaces, corresponding random variables are equivalent;
+    # i.e., same number of states and same `idx` values. Therefore,
+    # indicators from either one space can be used in both spaces.
+    # Loop over all CPTs, accumulating the total
+    total: float = 0
+    for factor in pgm.factors:
+        child: RandomVariable = factor.rvs[0]
+        parents: Sequence[RandomVariable] = factor.rvs[1:]
+        # Loop over all rows of the CPT
+        for parent_indicators in rv_instances_as_indicators(*parents):
+            p_marginal = p.marginal_distribution(child, condition=parent_indicators)
+            q_marginal = q.marginal_distribution(child, condition=parent_indicators)
+            row_hi = np.minimum(p_marginal, q_marginal).sum().item()
+            pr_row = p.probability(*parent_indicators)
+            total += pr_row * row_hi
+    return total / len(p_rvs)
+def _compatible_rvs(rvs1: Sequence[RandomVariable], rvs2: Sequence[RandomVariable]) -> bool:
+    """
+    The rvs are compatible if they have the same number of random variables
+    and the corresponding indicators are equal.
+    """
+    return (
+            len(rvs1) == len(rvs2)
+            and all(len(rv1) == len(rv2) and rv1.idx == rv2.idx for rv1, rv2 in zip(rvs1, rvs2))
+    )

ck/probability/empirical_probability_space.py CHANGED Viewed

@@ -11,6 +11,7 @@ class EmpiricalProbabilitySpace(ProbabilitySpace):
         Note that this is not necessarily an efficient approach to calculating probabilities and statistics.
         This probability space treats each of the samples as equally weighted.
+        For a probability space over unequally weighted samples, consider using `CrossTableProbabilitySpace`.
         Assumes:
             len(sample) == len(rvs), for each sample in samples.

ck/probability/probability_space.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 import math
 from abc import ABC, abstractmethod
 from itertools import chain
@@ -11,17 +13,16 @@ from ck.utils.map_set import MapSet
 from ck.utils.np_extras import dtype_for_number_of_states, NDArrayFloat64, DTypeStates, NDArrayNumeric
 Condition: TypeAlias = None | Indicator | Iterable[Indicator]
-Condition.__doc__ = \
-    """
-    Type defining a condition. A condition is logically a set of
-    indicators, each indicator representing a random variable being in some state.
-    If multiple indicators of the same random variable appear in
-    a condition, then they are interpreted as
-    a disjunction, otherwise indicators are interpreted as
-    a conjunction. E.g.,  the condition (X=0, Y=1, Y=3) means
-    X=0 and (Y=1 or Y=3).
-    """
+"""
+Type defining a condition. A condition is logically a set of
+indicators, each indicator representing a random variable being in some state.
+If multiple indicators of the same random variable appear in
+a condition, then they are interpreted as
+a disjunction, otherwise indicators are interpreted as
+a conjunction. E.g.,  the condition (X=0, Y=1, Y=3) means
+X=0 and (Y=1 or Y=3).
+"""
 _NAN: float = np.nan  # Not-a-number (i.e., the result of an invalid calculation).
@@ -204,16 +205,19 @@ class ProbabilitySpace(ABC):
                 loop_rvs.append([rv[i] for i in sorted(states)])
                 reduced_space = True
+        best_probability = float('-inf')
+        best_states = None
         # If the random variables we are looping over does not have any conditions
         # then it is expected to be faster by using computed marginal probabilities.
         if not reduced_space:
             prs = self.marginal_distribution(*rvs, condition=condition)
-            best_probability = float('-inf')
-            best_states = None
             for probability, inst in zip(prs, rv_instances(*rvs)):
                 if probability > best_probability:
                     best_probability = probability
                     best_states = inst
+            if best_states is None:
+                return _NAN, ()
             return best_probability, best_states
         else:
@@ -221,8 +225,6 @@ class ProbabilitySpace(ABC):
             new_conditions = tuple(ind for ind in condition if ind.rv_idx not in rv_indexes)
             # Loop over the state space of the 'loop' rvs
-            best_probability = float('-inf')
-            best_states = None
             indicators: Tuple[Indicator, ...]
             for indicators in _combos(loop_rvs):
                 probability = self.wmc(*(indicators + new_conditions))
@@ -230,6 +232,8 @@ class ProbabilitySpace(ABC):
                     best_probability = probability
                     best_states = tuple(ind.state_idx for ind in indicators)
             condition_probability = self.wmc(*condition)
+            if best_states is None:
+                return _NAN, ()
             return best_probability / condition_probability, best_states
     def correlation(self, indicator1: Indicator, indicator2: Indicator, condition: Condition = ()) -> float:
@@ -246,6 +250,20 @@ class ProbabilitySpace(ABC):
         """
         condition = check_condition(condition)
+        if indicator1.rv_idx == indicator2.rv_idx:
+            # Special case - same random variable
+            condition_groups: MapSet[int, Indicator] = _group_indicators(condition)
+            rv_idx: int = indicator1.rv_idx
+            if indicator1 not in condition_groups.get(rv_idx, (indicator1,)):
+                return _NAN
+            if indicator1 == indicator2:
+                return 1
+            else:
+                if indicator2 not in condition_groups.get(rv_idx, (indicator2,)):
+                    return _NAN
+                else:
+                    return 0
         p1 = self.probability(indicator1, condition=condition)
         p2 = self.probability(indicator2, condition=condition)
         p12 = self._joint_probability(indicator1, indicator2, condition=condition)
@@ -268,12 +286,7 @@ class ProbabilitySpace(ABC):
             entropy of the given random variable.
         """
         condition = check_condition(condition)
-        e = 0.0
-        for ind in rv:
-            p = self.probability(ind, condition=condition)
-            if p > 0.0:
-                e -= p * math.log2(p)
-        return e
+        return -sum(plogp(self.probability(ind, condition=condition)) for ind in rv)
     def conditional_entropy(self, rv1: RandomVariable, rv2: RandomVariable, condition: Condition = ()) -> float:
         """
@@ -310,13 +323,11 @@ class ProbabilitySpace(ABC):
             joint entropy of the given random variables.
         """
         condition = check_condition(condition)
-        e = 0.0
-        for ind1 in rv1:
-            for ind2 in rv2:
-                p = self._joint_probability(ind1, ind2, condition=condition)
-                if p > 0.0:
-                    e -= p * math.log2(p)
-        return e
+        return -sum(
+            plogp(self._joint_probability(ind1, ind2, condition=condition))
+            for ind1 in rv1
+            for ind2 in rv2
+        )
     def mutual_information(self, rv1: RandomVariable, rv2: RandomVariable, condition: Condition = ()) -> float:
         """
@@ -420,8 +431,12 @@ class ProbabilitySpace(ABC):
         denominator = self.joint_entropy(rv1, rv2, condition=condition)
         return self._normalised_mutual_information(rv1, rv2, denominator, condition=condition)
-    def covariant_normalised_mutual_information(self, rv1: RandomVariable, rv2: RandomVariable,
-                                                condition: Condition = ()) -> float:
+    def covariant_normalised_mutual_information(
+            self,
+            rv1: RandomVariable,
+            rv2: RandomVariable,
+            condition: Condition = (),
+    ) -> float:
         """
         Calculate the covariant normalised mutual information
         = I(rv1; rv2) / sqrt(H(rv1) * H(rv2)).
@@ -550,6 +565,14 @@ class ProbabilitySpace(ABC):
         return wmc
+def plogp(p: float) -> float:
+    """
+    Returns:
+        p * log2(p)
+    """
+    return p * math.log2(p) if p > 0 else 0
 def check_condition(condition: Condition) -> Tuple[Indicator, ...]:
     """
     Make the best effort to interpret the given condition.

ck/program/raw_program.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from dataclasses import dataclass
-from typing import Callable, Sequence
+from typing import Callable, Sequence, TypeAlias
 import numpy as np
 import ctypes as ct
@@ -7,12 +7,14 @@ import ctypes as ct
 from ck.utils.np_extras import NDArrayNumeric, DTypeNumeric
-# RawProgramFunction is a function of three ctypes arrays, returning nothing.
-# Args:
-#     [0]: input  values,
-#     [1]: temporary working memory,
-#     [2]: output values.
-RawProgramFunction = Callable[[ct.POINTER, ct.POINTER, ct.POINTER], None]
+RawProgramFunction: TypeAlias = Callable[[ct.POINTER, ct.POINTER, ct.POINTER], None]
+"""
+RawProgramFunction is a function of three ctypes arrays, returning nothing.
+Args:
+    [0]: input  values,
+    [1]: temporary working memory,
+    [2]: output values.
+"""
 @dataclass
@@ -26,23 +28,28 @@ class RawProgram:
     an efficient method for executing a program as buffers are reallocated for
     each call. Alternatively, a `RawProgram` can be wrapped in a `ProgramBuffer`
     for computationally efficient memory buffer reuse.
-    Fields:
-        function: is a function of three ctypes arrays, returning nothing.
-        dtype: the numpy data type of  the array values.
-        number_of_vars: the number of input values (first function argument).
-        number_of_tmps: the number of working memory values (second function argument).
-        number_of_results: the number of result values (third function argument).
-        var_indices: maps the index of inputs (from 0 to self.number_of_vars - 1) to the index
-            of the corresponding circuit var.
     """
     function: RawProgramFunction
+    """a function of three ctypes arrays, returning nothing."""
     dtype: DTypeNumeric
+    """the numpy data type of  the array values."""
     number_of_vars: int
+    """the number of input values (first function argument)."""
     number_of_tmps: int
+    """the number of working memory values (second function argument)."""
     number_of_results: int
+    """the number of result values (third function argument)."""
     var_indices: Sequence[int]
+    """
+    a map from the index of inputs (from 0 to self.number_of_vars - 1) to the index
+    of the corresponding circuit var.
+    """
     def __call__(self, var_values: NDArrayNumeric | Sequence[int | float] | int | float) -> NDArrayNumeric:
         """

ck/sampling/sampler_support.py CHANGED Viewed

@@ -11,12 +11,11 @@ from ck.utils.np_extras import NDArrayStates, NDArrayNumeric
 from ck.utils.random_extras import Random
 YieldF: TypeAlias = Callable[[NDArrayStates], int] | Callable[[NDArrayStates], Instance]
-YieldF.__doc__ = \
-    """
-    Type of a yield function. Support for a sampler.
-    A yield function may be used to implement a sampler's iterator, thus
-    it provides an Instance or single state index.
-    """
+"""
+Type of a yield function. Support for a sampler.
+A yield function may be used to implement a sampler's iterator, thus
+it provides an Instance or single state index.
+"""
 @dataclass

ck/utils/iter_extras.py CHANGED Viewed

@@ -33,11 +33,12 @@ def combos(list_of_lists: Sequence[Sequence[_T]], flip=False) -> Iterable[Tuple[
     Iterate over all combinations of taking one element from each of the lists.
     The order of results has the first element changing most rapidly.
-    For example, given [[1,2,3],[4,5],[6,7]], combos yields the following:
+    For example, given [[1,2,3],[4,5],[6,7]], combos yields the following::
         (1,4,6), (2,4,6), (3,4,6), (1,5,6), (2,5,6), (3,5,6),
         (1,4,7), (2,4,7), (3,4,7), (1,5,7), (2,5,7), (3,5,7).
-    If flip, then the last changes most rapidly.
+    If `flip` is true, then the last changes most rapidly.
     """
     num = len(list_of_lists)
     if num == 0:

ck/utils/local_config.py CHANGED Viewed

@@ -12,10 +12,13 @@ other getter methods wrap `get`.
 The `get` method will search for a value for a requested variable
 using the following steps.
 1) Check the `programmatic config` which is a dictionary that
-   can be directly updated.
+can be directly updated.
 2) Check the PYTHONPATH for a module called `config` (i.e., a
-   `config.py` file) for global variables defined in that module.
+`config.py` file) for global variables defined in that module.
 3) Check the system environment variables (`os.environ`).
 Variable names must be a valid Python identifier. Only valid
@@ -171,8 +174,9 @@ def get_params(
     are returned as a single string with `delim` as the delimiter. If
     `delim` is not None then the default value for `sep` is '='.
-    For example, assume config.py contains: ABC = 123 and DEF = 456,
-    then:
+    For example, assume `config.py` contains: `ABC = 123` and `DEF = 456`,
+    then::
         get_params('ABC') -> ('ABC', 123)
         get_params('ABC', 'DEF') -> ('ABC', 123), ('DEF', 456)
         get_params('ABC', sep='=') = 'ABC=123'
@@ -180,10 +184,14 @@ def get_params(
         get_params('ABC;DEF', delim=';') = 'ABC=123;DEF=456'
         get_params('ABC;DEF', sep='==', delim=';') = 'ABC==123;DEF==456'
-    :param keys: the names of variables to access.
-    :param sep: the separator character between {variable} and {value}.
-    :param delim: the delimiter character between key-value pairs.
-    :param config: a Config instance to update. Default is the global config.
+    Args:
+        keys: the names of variables to access.
+        sep: the separator character between {variable} and {value}.
+        delim: the delimiter character between key-value pairs.
+        config: a Config instance to update. Default is the global config.
+    Returns:
+        the requested parameter values.
     """
     if delim is not None:
         keys = flatten(key.split(delim) for key in keys)

ck_demos/dataset/__init__.py ADDED Viewed

File without changes

ck_demos/dataset/demo_dataset_builder.py ADDED Viewed

@@ -0,0 +1,37 @@
+from ck.dataset import HardDataset, SoftDataset
+from ck.dataset.dataset_builder import DatasetBuilder, soft_dataset_from_builder, hard_dataset_from_builder
+from ck.pgm import PGM
+def main() -> None:
+    pgm = PGM()
+    x = pgm.new_rv('x', (True, False))
+    y = pgm.new_rv('y', ('yes', 'no', 'maybe'))
+    builder = DatasetBuilder([x, y])
+    builder.append()
+    builder.append(1, 2).weight = 3
+    builder.append(None, [0.7, 0.1, 0.2])
+    builder.append().set_states(True, 'maybe')
+    print('DatasetBuilder dump')
+    builder.dump()
+    print()
+    print('DatasetBuilder dump, showing states and custom missing values')
+    builder.dump(as_states=True, missing='?')
+    print()
+    print('HardDataset dump')
+    dataset: HardDataset = hard_dataset_from_builder(builder, missing=99)
+    dataset.dump()
+    print()
+    print('SoftDataset dump')
+    dataset: SoftDataset = soft_dataset_from_builder(builder)
+    dataset.dump()
+    print()
+if __name__ == '__main__':
+    main()

ck_demos/dataset/demo_dataset_from_sampler.py ADDED Viewed

@@ -0,0 +1,18 @@
+from ck import example
+from ck.dataset.sampled_dataset import dataset_from_sampler
+from ck.pgm import PGM
+from ck.pgm_circuit.wmc_program import WMCProgram
+from ck.pgm_compiler import DEFAULT_PGM_COMPILER
+from ck.sampling.sampler import Sampler
+def main() -> None:
+    pgm: PGM = example.Student()
+    sampler: Sampler = WMCProgram(DEFAULT_PGM_COMPILER(pgm)).sample_direct()
+    dataset = dataset_from_sampler(sampler, 10)
+    dataset.dump()
+if __name__ == '__main__':
+    main()

ck_demos/learning/__init__.py ADDED Viewed

File without changes