PyPI - compiled-knowledge - Versions diffs - 4.0.0a5__cp313-cp313-macosx_10_13_universal2.whl - Mend

compiled-knowledge 4.0.0a5__cp313-cp313-macosx_10_13_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of compiled-knowledge might be problematic. Click here for more details.

Files changed (167) hide show

ck/__init__.py +0 -0
ck/circuit/__init__.py +13 -0
ck/circuit/circuit.c +38749 -0
ck/circuit/circuit.cpython-313-darwin.so +0 -0
ck/circuit/circuit_py.py +807 -0
ck/circuit/tmp_const.py +74 -0
ck/circuit_compiler/__init__.py +2 -0
ck/circuit_compiler/circuit_compiler.py +26 -0
ck/circuit_compiler/cython_vm_compiler/__init__.py +1 -0
ck/circuit_compiler/cython_vm_compiler/_compiler.c +17373 -0
ck/circuit_compiler/cython_vm_compiler/_compiler.cpython-313-darwin.so +0 -0
ck/circuit_compiler/cython_vm_compiler/cython_vm_compiler.py +96 -0
ck/circuit_compiler/interpret_compiler.py +223 -0
ck/circuit_compiler/llvm_compiler.py +388 -0
ck/circuit_compiler/llvm_vm_compiler.py +546 -0
ck/circuit_compiler/named_circuit_compilers.py +57 -0
ck/circuit_compiler/support/__init__.py +0 -0
ck/circuit_compiler/support/circuit_analyser.py +81 -0
ck/circuit_compiler/support/input_vars.py +148 -0
ck/circuit_compiler/support/llvm_ir_function.py +234 -0
ck/example/__init__.py +53 -0
ck/example/alarm.py +366 -0
ck/example/asia.py +28 -0
ck/example/binary_clique.py +32 -0
ck/example/bow_tie.py +33 -0
ck/example/cancer.py +37 -0
ck/example/chain.py +38 -0
ck/example/child.py +199 -0
ck/example/clique.py +33 -0
ck/example/cnf_pgm.py +39 -0
ck/example/diamond_square.py +68 -0
ck/example/earthquake.py +36 -0
ck/example/empty.py +10 -0
ck/example/hailfinder.py +539 -0
ck/example/hepar2.py +628 -0
ck/example/insurance.py +504 -0
ck/example/loop.py +40 -0
ck/example/mildew.py +38161 -0
ck/example/munin.py +22982 -0
ck/example/pathfinder.py +53674 -0
ck/example/rain.py +39 -0
ck/example/rectangle.py +161 -0
ck/example/run.py +30 -0
ck/example/sachs.py +129 -0
ck/example/sprinkler.py +30 -0
ck/example/star.py +44 -0
ck/example/stress.py +64 -0
ck/example/student.py +43 -0
ck/example/survey.py +46 -0
ck/example/triangle_square.py +54 -0
ck/example/truss.py +49 -0
ck/in_out/__init__.py +3 -0
ck/in_out/parse_ace_lmap.py +216 -0
ck/in_out/parse_ace_nnf.py +288 -0
ck/in_out/parse_net.py +480 -0
ck/in_out/parser_utils.py +185 -0
ck/in_out/pgm_pickle.py +42 -0
ck/in_out/pgm_python.py +268 -0
ck/in_out/render_bugs.py +111 -0
ck/in_out/render_net.py +177 -0
ck/in_out/render_pomegranate.py +184 -0
ck/pgm.py +3494 -0
ck/pgm_circuit/__init__.py +1 -0
ck/pgm_circuit/marginals_program.py +352 -0
ck/pgm_circuit/mpe_program.py +237 -0
ck/pgm_circuit/pgm_circuit.py +75 -0
ck/pgm_circuit/program_with_slotmap.py +234 -0
ck/pgm_circuit/slot_map.py +35 -0
ck/pgm_circuit/support/__init__.py +0 -0
ck/pgm_circuit/support/compile_circuit.py +83 -0
ck/pgm_circuit/target_marginals_program.py +103 -0
ck/pgm_circuit/wmc_program.py +323 -0
ck/pgm_compiler/__init__.py +2 -0
ck/pgm_compiler/ace/__init__.py +1 -0
ck/pgm_compiler/ace/ace.py +252 -0
ck/pgm_compiler/factor_elimination.py +383 -0
ck/pgm_compiler/named_pgm_compilers.py +63 -0
ck/pgm_compiler/pgm_compiler.py +19 -0
ck/pgm_compiler/recursive_conditioning.py +226 -0
ck/pgm_compiler/support/__init__.py +0 -0
ck/pgm_compiler/support/circuit_table/__init__.py +9 -0
ck/pgm_compiler/support/circuit_table/circuit_table.c +16042 -0
ck/pgm_compiler/support/circuit_table/circuit_table.cpython-313-darwin.so +0 -0
ck/pgm_compiler/support/circuit_table/circuit_table_py.py +269 -0
ck/pgm_compiler/support/clusters.py +556 -0
ck/pgm_compiler/support/factor_tables.py +398 -0
ck/pgm_compiler/support/join_tree.py +275 -0
ck/pgm_compiler/support/named_compiler_maker.py +33 -0
ck/pgm_compiler/variable_elimination.py +89 -0
ck/probability/__init__.py +0 -0
ck/probability/empirical_probability_space.py +47 -0
ck/probability/probability_space.py +568 -0
ck/program/__init__.py +3 -0
ck/program/program.py +129 -0
ck/program/program_buffer.py +180 -0
ck/program/raw_program.py +61 -0
ck/sampling/__init__.py +0 -0
ck/sampling/forward_sampler.py +211 -0
ck/sampling/marginals_direct_sampler.py +113 -0
ck/sampling/sampler.py +62 -0
ck/sampling/sampler_support.py +232 -0
ck/sampling/uniform_sampler.py +66 -0
ck/sampling/wmc_direct_sampler.py +169 -0
ck/sampling/wmc_gibbs_sampler.py +147 -0
ck/sampling/wmc_metropolis_sampler.py +159 -0
ck/sampling/wmc_rejection_sampler.py +113 -0
ck/utils/__init__.py +0 -0
ck/utils/iter_extras.py +153 -0
ck/utils/map_list.py +128 -0
ck/utils/map_set.py +128 -0
ck/utils/np_extras.py +51 -0
ck/utils/random_extras.py +64 -0
ck/utils/tmp_dir.py +94 -0
ck_demos/__init__.py +0 -0
ck_demos/ace/__init__.py +0 -0
ck_demos/ace/copy_ace_to_ck.py +15 -0
ck_demos/ace/demo_ace.py +44 -0
ck_demos/all_demos.py +88 -0
ck_demos/circuit/__init__.py +0 -0
ck_demos/circuit/demo_circuit_dump.py +22 -0
ck_demos/circuit/demo_derivatives.py +43 -0
ck_demos/circuit_compiler/__init__.py +0 -0
ck_demos/circuit_compiler/compare_circuit_compilers.py +32 -0
ck_demos/circuit_compiler/show_llvm_program.py +26 -0
ck_demos/pgm/__init__.py +0 -0
ck_demos/pgm/demo_pgm_dump.py +18 -0
ck_demos/pgm/demo_pgm_dump_stress.py +18 -0
ck_demos/pgm/demo_pgm_string_rendering.py +15 -0
ck_demos/pgm/show_examples.py +25 -0
ck_demos/pgm_compiler/__init__.py +0 -0
ck_demos/pgm_compiler/compare_pgm_compilers.py +50 -0
ck_demos/pgm_compiler/demo_compiler_dump.py +50 -0
ck_demos/pgm_compiler/demo_factor_elimination.py +47 -0
ck_demos/pgm_compiler/demo_join_tree.py +25 -0
ck_demos/pgm_compiler/demo_marginals_program.py +53 -0
ck_demos/pgm_compiler/demo_mpe_program.py +55 -0
ck_demos/pgm_compiler/demo_pgm_compiler.py +38 -0
ck_demos/pgm_compiler/demo_recursive_conditioning.py +33 -0
ck_demos/pgm_compiler/demo_variable_elimination.py +33 -0
ck_demos/pgm_compiler/demo_wmc_program.py +29 -0
ck_demos/pgm_inference/__init__.py +0 -0
ck_demos/pgm_inference/demo_inferencing_basic.py +188 -0
ck_demos/pgm_inference/demo_inferencing_mpe_cancer.py +45 -0
ck_demos/pgm_inference/demo_inferencing_wmc_and_mpe_sprinkler.py +154 -0
ck_demos/pgm_inference/demo_inferencing_wmc_student.py +110 -0
ck_demos/programs/__init__.py +0 -0
ck_demos/programs/demo_program_buffer.py +24 -0
ck_demos/programs/demo_program_multi.py +24 -0
ck_demos/programs/demo_program_none.py +19 -0
ck_demos/programs/demo_program_single.py +23 -0
ck_demos/programs/demo_raw_program_interpreted.py +21 -0
ck_demos/programs/demo_raw_program_llvm.py +21 -0
ck_demos/sampling/__init__.py +0 -0
ck_demos/sampling/check_sampler.py +71 -0
ck_demos/sampling/demo_marginal_direct_sampler.py +40 -0
ck_demos/sampling/demo_uniform_sampler.py +38 -0
ck_demos/sampling/demo_wmc_direct_sampler.py +40 -0
ck_demos/utils/__init__.py +0 -0
ck_demos/utils/compare.py +88 -0
ck_demos/utils/convert_network.py +45 -0
ck_demos/utils/sample_model.py +216 -0
ck_demos/utils/stop_watch.py +384 -0
compiled_knowledge-4.0.0a5.dist-info/METADATA +50 -0
compiled_knowledge-4.0.0a5.dist-info/RECORD +167 -0
compiled_knowledge-4.0.0a5.dist-info/WHEEL +5 -0
compiled_knowledge-4.0.0a5.dist-info/licenses/LICENSE.txt +21 -0
compiled_knowledge-4.0.0a5.dist-info/top_level.txt +2 -0

ck/pgm_compiler/support/factor_tables.py ADDED Viewed

@@ -0,0 +1,398 @@
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Dict, Sequence, Tuple, List, Iterator, Set, Iterable, Optional, Callable
+import numpy as np
+from ck.circuit import Circuit, VarNode, CircuitNode
+from ck.pgm import PGM, ParamId, Factor, PotentialFunction, RandomVariable, ZeroPotentialFunction
+from ck.pgm_circuit.slot_map import SlotMap, SlotKey
+from ck.pgm_compiler.support.circuit_table import CircuitTable, TableInstance
+from ck.utils.iter_extras import pairs
+from ck.utils.map_list import MapList
+from ck.utils.np_extras import NDArray, NDArrayFloat64
+@dataclass
+class FactorTables:
+    circuit: Circuit  # The host circuit
+    number_of_indicators: int  # number of indicator variables
+    number_of_parameters: int  # number of parameter variables (i.e., non-const, in-use parameters)
+    slot_map: SlotMap  # map from Indicator or ParamId object to a circuit var index.
+    tables: Sequence[CircuitTable]  # one CircuitTable for each PGM factor.
+    # For a non-const, in-use parameter with id `param_id`, the PGM value of that
+    # parameter was `self.parameter_values[self.slot_map[param_id] - self.number_of_indicators]`.
+    parameter_values: NDArray
+    def get_table(self, factor: Factor) -> CircuitTable:
+        return self.tables[factor.idx]
+def make_factor_tables(
+        pgm: PGM,
+        const_parameters: bool,
+        multiply_indicators: bool,
+        pre_prune_factor_tables: bool,
+) -> FactorTables:
+    """
+    Consistently and efficiently create circuit tables for factors of a PGM.
+    Creates:
+    * a circuit,
+    * a circuit variable for each indicator of the PGM,
+    * a circuit variable for each non-constant, in-use potential function parameter.
+    * a circuit table for each Factor of the PGM,
+    The parameter of each potential function will be converted either
+    eiter to a circuit constant (if const_parameters is true) or a circuit
+    variable (if const_parameters is false).
+    Random variables will be multiplied into factor circuit tables if
+    `multiply_indicators` is true.
+    A slot map will be created that maps PGM indicators and parameter ids to circuit var indices.
+    Specifically, a circuit var will be added for each indicator,
+    in the order they appear in `pgm.indicators`. Circuit vars for parameter ids will be added
+    after those for indicators, and only if const_parameters is false.
+    Args:
+        pgm: The PGM with the random variables, factors, and potential functions.
+        const_parameters: if true, then potential function parameters will be circuit constants,
+            otherwise they will be circuit variables, with entries in the returned slot map.
+        multiply_indicators: if true then indicator variables will be multiplied into an acceptable
+            factor.
+        pre_prune_factor_tables: if true, then heuristics will be used to remove any provably zero row.
+    Returns:
+        FactorTables, holding a slot_map and a circuit table for each PGM factor.
+    """
+    # Create circuit and initialise the slot map with indicator variables
+    circuit = Circuit()
+    slot_map: Dict[SlotKey, int] = {
+        indicator: circuit.new_var().idx
+        for indicator in pgm.indicators
+    }
+    # Get the circuit table rows for each potential function
+    # functions_rows[id(function)] = rows for the function
+    functions_rows: Dict[int, _FunctionRows]
+    if const_parameters:
+        functions_rows = {
+            id(function): _rows_for_function_const(function, circuit)
+            for function in pgm.functions
+        }
+    else:
+        functions_rows = {
+            id(function): _rows_for_function_var(function, circuit, slot_map)
+            for function in pgm.functions
+        }
+    # Link factors to function rows.
+    # factor_rows[id(factor)] = rows for the factor
+    factor_rows: Dict[int, _FactorRows] = {}
+    for factor in pgm.factors:
+        rows: _FunctionRows = functions_rows[id(factor.function)]
+        rows.use_count += 1
+        factor_rows[id(factor)] = _FactorRows(factor, rows)
+    # Check to see if any factor rows can be pre-pruned.
+    if pre_prune_factor_tables:
+        _pre_prune_factor_tables(list(factor_rows.values()))
+    # Allocated random variables to factors
+    factors_mul_rvs: MapList[int, RandomVariable]
+    if multiply_indicators:
+        def _factor_size(_factor: Factor) -> int:
+            return len(factor_rows[id(_factor)])
+        factors_mul_rvs = _assign_rvs_to_factors(pgm, _factor_size)
+    else:
+        factors_mul_rvs = MapList()  # no assignment of rvs to factors.
+    # Make a circuit table for each factor. `tables[factor.index]` is the circuit table for `factor`.
+    tables: List[CircuitTable] = [
+        _make_factor_table(factor, circuit, slot_map, factor_rows[id(factor)], factors_mul_rvs)
+        for factor in pgm.factors
+    ]
+    # Extract the parameter values (if they are circuit vars).
+    number_of_indicators: int = pgm.number_of_indicators
+    number_of_parameters: int = len(slot_map) - number_of_indicators
+    parameter_values: NDArrayFloat64 = np.zeros(number_of_parameters, dtype=np.float64)
+    if not const_parameters:
+        for function in pgm.functions:
+            for param_index, value in function.params:
+                param_id: ParamId = function.param_id(param_index)
+                slot: Optional[int] = slot_map.get(param_id)
+                if slot is not None:
+                    parameter_values[slot - number_of_indicators] = value
+    return FactorTables(
+        circuit=circuit,
+        number_of_indicators=number_of_indicators,
+        number_of_parameters=number_of_parameters,
+        slot_map=slot_map,
+        tables=tables,
+        parameter_values=parameter_values,
+    )
+def _assign_rvs_to_factors(
+        pgm: PGM,
+        factor_size: Callable[[Factor], int],
+) -> MapList[int, RandomVariable]:
+    """
+    Assign each random variable to the smallest factor containing it.
+    Returns:
+        a map from factor id to list of random variables assigned to that factor
+    """
+    factors = pgm.factors
+    rvs = pgm.rvs
+    # For each rv, get the factors it is in
+    rv_factors: MapList[int, Factor] = MapList()  # rv index to list of Factors with that rv.
+    for factor in factors:
+        for rv in factor.rvs:
+            rv_factors.append(rv.idx, factor)
+    # For each rv, assign it to a factor for multiplication
+    factors_mul_rvs: MapList[int, RandomVariable] = MapList()  # factor id to list of rvs
+    for rv_index in range(len(rvs)):
+        candidates: Sequence[Factor] = rv_factors.get(rv_index, ())
+        if len(candidates) > 0:
+            best_factor = min(candidates, key=factor_size)
+            factors_mul_rvs.append(id(best_factor), rvs[rv_index])
+    return factors_mul_rvs
+class _FunctionRows:
+    def __init__(self, rows: Dict[TableInstance, CircuitNode], use_count: int = 0):
+        self.rows: Dict[TableInstance, CircuitNode] = rows
+        self.use_count: int = use_count
+class _FactorRows:
+    def __init__(self, factor: Factor, rows: _FunctionRows):
+        self.rows: _FunctionRows = rows
+        self.rv_indexes: Tuple[int, ...] = tuple(rv.idx for rv in factor.rvs)
+    def __len__(self) -> int:
+        return len(self.rows.rows)
+    def items(self) -> Iterable[Tuple[TableInstance, CircuitNode]]:
+        return self.rows.rows.items()
+    def prune(self, extra_keys: Set[TableInstance]) -> None:
+        """
+        Remove the given keys from the factor's function rows.
+        """
+        if len(extra_keys) > 0:
+            new_rows: Dict[TableInstance, CircuitNode] = {
+                instance: node
+                for instance, node in self.rows.rows.items()
+                if instance not in extra_keys
+            }
+            if self.rows.use_count > 1:
+                self.rows.use_count -= 1
+                self.rows = _FunctionRows(new_rows, 1)
+            else:
+                self.rows.rows = new_rows
+class _FactorPair:
+    def __init__(self, x: _FactorRows, y: _FactorRows):
+        self.x: _FactorRows = x
+        self.y: _FactorRows = y
+        x_set = set(self.x.rv_indexes)
+        # Identify all random variables used by x and y
+        self.all_rv_indexes: Set[int] = x_set.union(self.y.rv_indexes)
+        # Identify common random variables between x and y
+        # Keep them in a stable order
+        self.co_rv_indexes: Tuple[int, ...] = tuple(x_set.intersection(self.y.rv_indexes))
+        # Cache mappings from result Instance to index into source Instance (x or y).
+        # This will be used in indexing and product loops to pull our needed values
+        # from the source instances.
+        self.co_from_x_map = tuple(x.rv_indexes.index(rv_index) for rv_index in self.co_rv_indexes)
+        self.co_from_y_map = tuple(y.rv_indexes.index(rv_index) for rv_index in self.co_rv_indexes)
+    def prune(self) -> None:
+        """
+        Prune any rows from x and y that cannot join to each other.
+        """
+        co_from_x_map = self.co_from_x_map
+        co_from_y_map = self.co_from_y_map
+        x_rows = self.x.rows.rows
+        y_rows = self.y.rows.rows
+        x_co_set: Set[TableInstance] = {
+            tuple(instance[i] for i in co_from_x_map)
+            for instance in x_rows.keys()
+        }
+        y_co_set: Set[TableInstance] = {
+            tuple(instance[i] for i in co_from_y_map)
+            for instance in y_rows.keys()
+        }
+        # Keys in x that will not join to y
+        x_extra_keys: Set[TableInstance] = {
+            instance
+            for instance in x_rows.keys()
+            if tuple(instance[i] for i in co_from_x_map) not in y_co_set
+        }
+        # Keys in y that will not join to x
+        y_extra_keys: Set[TableInstance] = {
+            instance
+            for instance in y_rows.keys()
+            if tuple(instance[i] for i in co_from_y_map) not in x_co_set
+        }
+        self.x.prune(x_extra_keys)
+        self.y.prune(y_extra_keys)
+def _pre_prune_factor_tables(factor_rows: Sequence[_FactorRows]) -> None:
+    """
+    It may be possible to reduce the size of a table for a factor.
+    If two factors contain a common random variable then at some point their product
+    will be formed, which may eliminate rows. This method identifies and removes
+    such rows.
+    """
+    pairs_to_check: List[_FactorPair] = [
+        _FactorPair(f1, f2)
+        for f1, f2 in pairs(factor_rows)
+        if not set(f1.rv_indexes).isdisjoint(f1.rv_indexes)
+    ]
+    pairs_done: List[_FactorPair] = []
+    while len(pairs_to_check) > 0:
+        pair = pairs_to_check.pop()
+        x = pair.x
+        y = pair.y
+        x_size = len(x)
+        y_size = len(y)
+        pair.prune()
+        # See if any pairs need re-checking
+        rvs_affected: Set[int] = set()
+        if x_size != len(x):
+            rvs_affected.update(x.rv_indexes)
+        if y_size != len(y):
+            rvs_affected.update(y.rv_indexes)
+        if len(rvs_affected) > 0:
+            next_pairs_done: List[_FactorPair] = []
+            for pair in pairs_done:
+                if rvs_affected.isdisjoint(pair.all_rv_indexes):
+                    next_pairs_done.append(pair)
+                else:
+                    pairs_to_check.append(pair)
+            pairs_done = next_pairs_done
+        # Mark the current pair as done.
+        pairs_done.append(pair)
+def _make_factor_table(
+        factor: Factor,
+        circuit: Circuit,
+        slot_map: Dict[SlotKey, int],
+        rows: _FactorRows,
+        factors_mul_rvs: MapList[int, RandomVariable],
+) -> CircuitTable:
+    # Get random variables to multiply into the table
+    factor_mul_rvs: Sequence[RandomVariable] = factors_mul_rvs.get(id(factor), ())
+    # Create the empty circuit table
+    factor_rv_indexes: Sequence[int] = tuple(rv.idx for rv in factor.rvs)
+    if len(factor_mul_rvs) == 0:
+        # Trivial case - no random variables to multiply into the table.
+        return CircuitTable(circuit, factor_rv_indexes, rows.items())
+    # Work out what element in an instance of the factor will select the indicator
+    # variable for each mul rv.
+    # inst_to_mul[i] is the index into factor.rvs for factor_mul_rvs[i]
+    inst_to_mul: Sequence[int] = tuple(factor_rv_indexes.index(rv.idx) for rv in factor_mul_rvs)
+    # Map a state index of a mul rv to its indicator circuit variable.
+    # mul_rvs_vars[i][j] is the indicator circuit variable for factor_mul_rvs[i][j]
+    mul_rvs_vars: Sequence[Sequence[CircuitNode]] = tuple(
+        tuple(circuit.vars[slot_map[ind]] for ind in rv.indicators)
+        for rv in factor_mul_rvs
+    )
+    def _result_rows() -> Iterator[Tuple[TableInstance, CircuitNode]]:
+        for instance, node in rows.items():
+            to_mul = tuple(
+                mul_vars[instance[inst_index]]
+                for inst_index, mul_vars in zip(inst_to_mul, mul_rvs_vars)
+            )
+            if not node.is_one():
+                to_mul += (node,)
+            if len(to_mul) == 0:
+                yield instance, circuit.one
+            elif len(to_mul) == 1:
+                yield instance, to_mul[0]
+            else:
+                yield instance, circuit.optimised_mul(to_mul)
+    return CircuitTable(circuit, factor_rv_indexes, _result_rows())
+def _rows_for_function_const(
+        function: PotentialFunction,
+        circuit: Circuit,
+) -> _FunctionRows:
+    """
+    Get the rows (instance, node) for the given potential function
+    where each node is a circuit constant.
+    This will exclude zero values.
+    """
+    if isinstance(function, ZeroPotentialFunction):
+        # shortcut
+        return _FunctionRows({})
+    return _FunctionRows({
+        tuple(instance): circuit.const(value)
+        for instance, _, value in function.keys_with_param
+        if value != 0
+    })
+def _rows_for_function_var(
+        function: PotentialFunction,
+        circuit: Circuit,
+        slot_map: Dict[SlotKey, int],
+) -> _FunctionRows:
+    """
+    Get the rows (instance, node) for the given potential function
+    where each node is a circuit variable.
+    """
+    def _create_param_var(param_id: ParamId) -> VarNode:
+        """
+        Create a circuit variable for the given parameter id.
+        This assumes one does not already exist for the parameter id.
+        """
+        assert param_id not in slot_map.keys(), 'parameter should not already have a circuit var'
+        node: VarNode = circuit.new_var()
+        slot_map[param_id] = node.idx
+        return node
+    return _FunctionRows({
+        tuple(instance): _create_param_var(function.param_id(param_index))
+        for instance, param_index, _ in function.keys_with_param
+    })

ck/pgm_compiler/support/join_tree.py ADDED Viewed

@@ -0,0 +1,275 @@
+from __future__ import annotations
+from dataclasses import dataclass
+from itertools import chain
+from typing import List, Set, Callable, Sequence, Tuple
+import numpy as np
+from ck.pgm import PGM, Factor
+from ck.pgm_compiler.support.clusters import Clusters, min_degree, min_fill, \
+    min_degree_then_fill, min_fill_then_degree, min_weighted_degree, min_weighted_fill, min_traditional_weighted_fill, \
+    ClusterAlgorithm
+from ck.utils.np_extras import NDArrayFloat64
+@dataclass
+class JoinTree:
+    # The PGM that this join tree is for.
+    pgm: PGM
+    # Indexes of random variables in this join tree node
+    cluster: Set[int]
+    # Child nodes in the join tree
+    children: List[JoinTree]
+    # Factors of the PGM allocated to this join tree node.
+    factors: List[Factor]
+    # Indexes of random variables that in both this cluster and the parent's cluster.
+    # (Empty if this is the root of the spanning tree).
+    separator: Set[int]
+    def max_cluster_size(self) -> int:
+        """
+        Returns:
+            the maximum `len(self.cluster)` over self and all children, recursively.
+        """
+        return max(chain((len(self.cluster),), (child.max_cluster_size() for child in self.children)))
+    def max_cluster_weighted_size(self, rv_log_sizes: Sequence[float]) -> float:
+        """
+        Returns:
+            the maximum `log2` over self and all children, recursively.
+        """
+        self_weighted_size: float = sum(rv_log_sizes[rv_idx] for rv_idx in self.cluster)
+        return max(
+            chain(
+                (self_weighted_size,),
+                (child.max_cluster_weighted_size(rv_log_sizes) for child in self.children)
+            )
+        )
+    def dump(self, *, prefix: str = '', indent: str = '    ', show_factors: bool = True) -> None:
+        """
+        Print a dump of the Join Tree.
+        This is intended for debugging and demonstration purposes.
+        Each cluster is printed as: {separator rvs} | {non-separator rvs}.
+        Args:
+            prefix: optional prefix for indenting all lines.
+            indent: additional prefix to use for extra indentation.
+            show_factors: if true, the factors of each cluster are shown.
+        """
+        sep_str = ' '.join(repr(str(self.pgm.rvs[i])) for i in sorted(self.separator))
+        rest_str = ' '.join(repr(str(self.pgm.rvs[i])) for i in sorted(self.cluster) if i not in self.separator)
+        if len(sep_str) > 0:
+            sep_str += ' '
+        print(f'{prefix}{sep_str}| {rest_str} (factors: {len(self.factors)})')
+        if show_factors:
+            for factor in self.factors:
+                print(f'{prefix}factor{factor}')
+        next_prefix = prefix + indent
+        for child in self.children:
+            child.dump(prefix=next_prefix, indent=indent, show_factors=show_factors)
+# Type for a join tree algorithm: PGM -> JoinTree.
+JoinTreeAlgorithm = Callable[[PGM], JoinTree]
+def _join_tree_algorithm(pgm_to_clusters: ClusterAlgorithm) -> JoinTreeAlgorithm:
+    """
+    Helper function for creating a standard JoinTreeAlgorithm from
+    a ClusterAlgorithm.
+    Args:
+        pgm_to_clusters: The clusters method to use.
+    Returns:
+        a JoinTreeAlgorithm.
+    """
+    def __join_tree_algorithm(pgm: PGM) -> JoinTree:
+        clusters: Clusters = pgm_to_clusters(pgm)
+        return clusters_to_join_tree(clusters)
+    return __join_tree_algorithm
+# standard JoinTreeAlgorithms
+MIN_DEGREE: JoinTreeAlgorithm = _join_tree_algorithm(min_degree)
+MIN_FILL: JoinTreeAlgorithm = _join_tree_algorithm(min_fill)
+MIN_DEGREE_THEN_FILL: JoinTreeAlgorithm = _join_tree_algorithm(min_degree_then_fill)
+MIN_FILL_THEN_DEGREE: JoinTreeAlgorithm = _join_tree_algorithm(min_fill_then_degree)
+MIN_WEIGHTED_DEGREE: JoinTreeAlgorithm = _join_tree_algorithm(min_weighted_degree)
+MIN_WEIGHTED_FILL: JoinTreeAlgorithm = _join_tree_algorithm(min_weighted_fill)
+MIN_TRADITIONAL_WEIGHTED_FILL: JoinTreeAlgorithm = _join_tree_algorithm(min_traditional_weighted_fill)
+def clusters_to_join_tree(clusters: Clusters) -> JoinTree:
+    """
+    Construct a join tree maker for the given PGM and random variable clusters.
+    A join tree is formed by finding a minimum spanning tree over the clusters
+    where the cost between a pair of cluster is defined according to
+    `separator_cost_counts` and `costing`.
+    Args:
+        clusters: the clusters that resulted from graph clusters of the given PGM.
+    """
+    pgm: PGM = clusters.pgm
+    cluster_sets: List[Set[int]] = clusters.clusters
+    number_of_clusters = len(cluster_sets)
+    # Dealing with these cases directly simplifies
+    # the spanning tree algorithm implementation.
+    if number_of_clusters == 0:
+        return JoinTree(pgm, set(), [], [], set())
+    elif number_of_clusters == 1:
+        return JoinTree(pgm, cluster_sets[0], [], list(pgm.factors), set())
+    # Calculate inter-cluster costs for determining the minimum spanning tree
+    cost: NDArrayFloat64 = np.zeros((number_of_clusters, number_of_clusters), dtype=np.float64)
+    # We will use separator state space size to break ties.
+    max_raw_break_cost = sum(pgm.rv_log_sizes) * 1.1  # sum of break costs must be < 1
+    break_cost = [cost / max_raw_break_cost for cost in pgm.rv_log_sizes]
+    for i in range(number_of_clusters):
+        cluster_i = cluster_sets[i]
+        for j in range(i + 1, number_of_clusters):
+            cluster_j = cluster_sets[j]
+            separator = cluster_i.intersection(cluster_j)
+            cost[i, j] = cost[j, i] = -len(separator) + sum(break_cost[rv_idx] for rv_idx in separator)
+    # Make the spanning tree over the clusters
+    root_custer_index: int
+    children: List[List[int]]
+    children, root_custer_index = _make_spanning_tree_small_root(cost, clusters.clusters)
+    # Allocate each PGM factor to a cluster
+    cluster_factors: List[List[Factor]] = [[] for _ in range(number_of_clusters)]
+    ordered_indexed_clusters = list(enumerate(cluster_sets))
+    ordered_indexed_clusters.sort(key=lambda idx_c: len(idx_c[1]))  # sort from smallest to largest cluster
+    for factor in pgm.factors:
+        rv_indexes = frozenset(rv.idx for rv in factor.rvs)
+        for cluster_index, cluster in ordered_indexed_clusters:
+            if rv_indexes.issubset(cluster):
+                cluster_factors[cluster_index].append(factor)
+                break
+    return _form_join_tree_r(pgm, root_custer_index, set(), children, cluster_sets, cluster_factors)
+_INF = float('inf')
+def _make_spanning_tree_small_root(cost: NDArrayFloat64, clusters: List[Set[int]]) -> Tuple[List[List[int]], int]:
+    """
+    Construct a minimum spanning tree over the clusters, where the root is the cluster with
+    the smallest number of random variable.
+    """
+    root_custer_index: int = 0
+    root_size: int = len(clusters[root_custer_index])
+    for i, cluster in enumerate(clusters[1:], start=1):
+        if len(clusters[root_custer_index]) < root_size:
+            root_custer_index = i
+            root_size: int = len(cluster)
+    children: List[List[int]] = _make_spanning_tree_at_root(cost, root_custer_index)
+    return children, root_custer_index
+def _make_spanning_tree_arbitrary_root(cost: NDArrayFloat64) -> Tuple[List[List[int]], int]:
+    """
+    Construct a minimum spanning tree over the clusters, starting at an arbitrary root.
+    """
+    root_custer_index: int = 0
+    children: List[List[int]] = _make_spanning_tree_at_root(cost, root_custer_index)
+    return children, root_custer_index
+def _make_spanning_tree_at_root(
+        cost: NDArrayFloat64,
+        root_custer_index: int,
+) -> List[List[int]]:
+    """
+    Construct a minimum spanning tree over the clusters, starting at the given root.
+    Args:
+        cost: and nxn matrix where n is the number of clusters and cost[i, j]
+            gives the cost between clusters i and j.
+        root_custer_index: a nominated root cluster to be the root of the tree.
+    """
+    number_of_clusters: int = cost.shape[0]
+    # clusters left to process.
+    remaining: List[int] = list(range(number_of_clusters))
+    # clusters that have been processed.
+    included: List[int] = []
+    def remove_remaining(_remaining_index: int) -> None:
+        # Remove the `remaining` element at the given index location.
+        remaining[_remaining_index] = remaining[-1]
+        remaining.pop()
+    # Move root from `remaining` to `included`
+    included.append(root_custer_index)
+    remove_remaining(root_custer_index)  # assumes remaining[root_custer_index] = root_custer_index
+    # Data structure to collect the results.
+    children: List[List[int]] = [[] for _ in range(number_of_clusters)]
+    while True:
+        min_i: int = 0
+        min_j: int = 0
+        min_j_pos: int = 0
+        min_c: float = _INF
+        for i in included:
+            for j_pos, j in enumerate(remaining):
+                c: float = cost.item(i, j)
+                if c < min_c:
+                    min_c = c
+                    min_i = i
+                    min_j = j
+                    min_j_pos = j_pos
+        # Record the child and move remaining_idx from 'remaining' to 'included'.
+        children[min_i].append(min_j)
+        if len(remaining) == 1:
+            # That was the last one.
+            return children
+        # Update `remaining` and `included`
+        remove_remaining(min_j_pos)
+        included.append(min_j)
+def _form_join_tree_r(
+        pgm: PGM,
+        cluster_index: int,
+        parent_cluster: Set[int],
+        children: Sequence[List[int]],
+        clusters: Sequence[Set[int]],
+        cluster_factors: List[List[Factor]],
+) -> JoinTree:
+    """
+    Recursively build the join tree data structure.
+    """
+    cluster: Set[int] = clusters[cluster_index]
+    factors: List[Factor] = cluster_factors[cluster_index]
+    children = [
+        _form_join_tree_r(pgm, child, cluster, children, clusters, cluster_factors)
+        for child in children[cluster_index]
+    ]
+    separator: Set[int] = parent_cluster.intersection(cluster)
+    return JoinTree(
+        pgm,
+        cluster,
+        children,
+        factors,
+        separator,
+    )

ck/pgm_compiler/support/named_compiler_maker.py ADDED Viewed

@@ -0,0 +1,33 @@
+from types import ModuleType
+from typing import Tuple
+from ck.pgm import PGM
+from ck.pgm_circuit import PGMCircuit
+from ck.pgm_compiler import PGMCompiler
+def get_compiler(module: ModuleType, **kwargs) -> Tuple[PGMCompiler]:
+    """
+    Helper function to create a named PGM compiler.
+    Args:
+        module: module containing `compile_pgm` function.
+    Returns:
+        a singleton tuple containing PGMCompiler function.
+    """
+    def compiler(pgm: PGM, const_parameters: bool = True) -> PGMCircuit:
+        """Conforms to the `PGMCompiler` protocol."""
+        return module.compile_pgm(pgm, const_parameters=const_parameters, **kwargs)
+    return compiler,
+def get_compiler_algorithm(module, algorithm: str, **kwargs) -> Tuple[PGMCompiler]:
+    """
+    Helper function to create a named PGM compiler, with a named algorithm argument.
+    """
+    return get_compiler(module, algorithm=getattr(module, algorithm, **kwargs))