PyPI - compiled-knowledge - Versions diffs - 4.0.0a6__cp313-cp313-macosx_10_13_universal2.whl → 4.0.0a8__cp313-cp313-macosx_10_13_universal2.whl - Mend

compiled-knowledge 4.0.0a6__cp313-cp313-macosx_10_13_universal2.whl → 4.0.0a8__cp313-cp313-macosx_10_13_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of compiled-knowledge might be problematic. Click here for more details.

Files changed (12) hide show

ck/circuit/circuit_node.pyx ADDED Viewed

@@ -0,0 +1,138 @@
+from __future__ import annotations
+from typing import Optional, Tuple
+# Python Type for values of ConstNode objects
+ConstValue = float | int | bool
+cdef class CircuitNode:
+    """
+    A node in an arithmetic circuit.
+    Each node is either an op, var, or const node.
+    Each op node is either a mul, add or sub node. Each op
+    node has zero or more arguments. Each argument is another node.
+    Every var node has an index, `idx`, which is an integer counting from zero, and denotes
+    its creation order.
+    A var node may be temporarily set to be a constant node, which may
+    be useful for optimising a compiled circuit.
+    """
+    cdef public object circuit
+    def __init__(self, circuit):
+        self.circuit = circuit
+    cpdef int is_zero(self) except*:
+        return False
+    cpdef int is_one(self) except*:
+        return False
+    def __add__(self, other: CircuitNode | ConstValue):
+        return self.circuit.add(self, other)
+    def __mul__(self, other: CircuitNode | ConstValue):
+        return self.circuit.mul(self, other)
+cdef class ConstNode(CircuitNode):
+    cdef public object value
+    """
+    A node in a circuit representing a constant value.
+    """
+    def __init__(self, circuit, value: ConstValue):
+        super().__init__(circuit)
+        self.value: ConstValue = value
+    cpdef int is_zero(self) except*:
+        # noinspection PyProtectedMember
+        return self is self.circuit.zero
+    cpdef int is_one(self) except*:
+        # noinspection PyProtectedMember
+        return self is self.circuit.one
+    def __str__(self) -> str:
+        return 'const(' + str(self.value) + ')'
+    def __lt__(self, other) -> bool:
+        if isinstance(other, ConstNode):
+            return self.value < other.value
+        else:
+            return False
+cdef class VarNode(CircuitNode):
+    """
+    A node in a circuit representing an input variable.
+    """
+    cdef public int idx
+    cdef object _const
+    def __init__(self, circuit, idx: int):
+        super().__init__(circuit)
+        self.idx = idx
+        self._const = None
+    cpdef int is_zero(self) except*:
+        return self._const is not None and self._const.is_zero()
+    cpdef int is_one(self) except*:
+        return self._const is not None and self._const.is_one()
+    cpdef int is_const(self) except*:
+        return self._const is not None
+    @property
+    def const(self) -> Optional[ConstNode]:
+        return self._const
+    @const.setter
+    def const(self, value: ConstValue | ConstNode | None) -> None:
+        if value is None:
+            self._const = None
+        else:
+            self._const = self.circuit.const(value)
+    def __lt__(self, other) -> bool:
+        if isinstance(other, VarNode):
+            return self.idx < other.idx
+        else:
+            return False
+    def __str__(self) -> str:
+        if self._const is None:
+            return 'var[' + str(self.idx) + ']'
+        else:
+            return 'var[' + str(self.idx) + '] = ' + str(self._const.value)
+cdef class OpNode(CircuitNode):
+    """
+    A node in a circuit representing an arithmetic operation.
+    """
+    cdef public tuple[object, ...] args
+    cdef public str symbol
+    def __init__(self, object circuit, symbol: str, tuple[object, ...] args: Tuple[CircuitNode]):
+        super().__init__(circuit)
+        self.args = tuple(args)
+        self.symbol = str(symbol)
+    def __str__(self) -> str:
+        return self.symbol + '\\' + str(len(self.args))
+cdef class MulNode(OpNode):
+    """
+    A node in a circuit representing a multiplication operation.
+    """
+    def __init__(self, object circuit, tuple[object, ...] args: Tuple[CircuitNode, ...]):
+        super().__init__(circuit, 'mul', args)
+cdef class AddNode(OpNode):
+    """
+    A node in a circuit representing an addition operation.
+    """
+    def __init__(self, circuit, tuple[object, ...] args: Tuple[CircuitNode, ...]):
+        super().__init__(circuit, 'add', args)

ck/circuit_compiler/cython_vm_compiler/_compiler.cpython-313-darwin.so CHANGED Viewed

Binary file

ck/circuit_compiler/cython_vm_compiler/_compiler.pyx ADDED Viewed

@@ -0,0 +1,239 @@
+from __future__ import annotations
+from pickletools import long1
+from typing import Sequence, Dict, List, Tuple, Set, Optional, Iterator
+import numpy as np
+import ctypes as ct
+from ck import circuit
+from ck.circuit import CircuitNode, ConstNode, VarNode, OpNode, ADD, Circuit
+from ck.circuit_compiler.support.circuit_analyser import CircuitAnalysis, analyze_circuit
+from ck.circuit_compiler.support.input_vars import infer_input_vars, InputVars
+from ck.program.raw_program import RawProgram, RawProgramFunction
+from ck.utils.np_extras import DType, NDArrayNumeric, NDArray, DTypeNumeric
+from cpython.mem cimport PyMem_Malloc, PyMem_Realloc, PyMem_Free
+cimport numpy as cnp
+cimport cython
+cnp.import_array()
+DTYPE_FLOAT64 = np.float64
+ctypedef cnp.float64_t DTYPE_FLOAT64_t
+def make_function(
+        var_nodes: Sequence[VarNode],
+        result_nodes: Sequence[CircuitNode],
+        dtype: DTypeNumeric,
+) -> Tuple[RawProgramFunction, int]:
+    """
+    Make a RawProgram function that interprets the circuit.
+    Returns:
+        (function, number_of_tmps)
+    """
+    analysis: CircuitAnalysis = analyze_circuit(var_nodes, result_nodes)
+    cdef Instructions instructions
+    np_consts: NDArrayNumeric
+    instructions, np_consts = _make_instructions_from_analysis(analysis, dtype)
+    ptr_type = ct.POINTER(np.ctypeslib.as_ctypes_type(dtype))
+    c_np_consts = np_consts.ctypes.data_as(ptr_type)
+    # RawProgramFunction = Callable[[ct.POINTER, ct.POINTER, ct.POINTER], None]
+    def function(vars_in: ct.POINTER, tmps: ct.POINTER, result: ct.POINTER) -> None:
+        cdef size_t vars_in_addr = ct.cast(vars_in, ct.c_void_p).value
+        cdef size_t tmps_addr = ct.cast(tmps, ct.c_void_p).value
+        cdef size_t consts_addr = ct.cast(c_np_consts, ct.c_void_p).value
+        cdef size_t result_addr = ct.cast(result, ct.c_void_p).value
+        cvm_float64(
+            <double*> vars_in_addr,
+            <double*> tmps_addr,
+            <double*> consts_addr,
+            <double*> result_addr,
+            instructions,
+        )
+    return function, len(analysis.op_to_tmp)
+# VM instructions
+ADD = circuit.ADD
+MUL = circuit.MUL
+COPY: int = max(ADD, MUL) + 1
+# VM arrays
+VARS: int = 0
+TMPS: int = 1
+CONSTS: int = 2
+RESULT: int = 3
+def _make_instructions_from_analysis(
+        analysis: CircuitAnalysis,
+        dtype: DTypeNumeric,
+) -> Tuple[Instructions, NDArrayNumeric]:
+    if dtype != np.float64:
+        raise RuntimeError(f'only DType {np.float64} currently supported')
+    # Store const values in a numpy array
+    node_to_const_idx: Dict[int, int] = {
+        id(node): i
+        for i, node in enumerate(analysis.const_nodes)
+    }
+    np_consts: NDArrayNumeric = np.zeros(len(node_to_const_idx), dtype=dtype)
+    for i, node in enumerate(analysis.const_nodes):
+        np_consts[i] = node.value
+    # Where to get input values for each possible node.
+    node_to_element: Dict[int, ElementID] = {}
+    # const nodes
+    for node_id, const_idx in node_to_const_idx.items():
+        node_to_element[node_id] = ElementID(CONSTS, const_idx)
+    # var nodes
+    for i, var_node in enumerate(analysis.var_nodes):
+        if var_node.is_const():
+            node_to_element[id(var_node)] = node_to_element[id(var_node.const)]
+        else:
+            node_to_element[id(var_node)] = ElementID(VARS, i)
+    # op nodes
+    for node_id, tmp_idx in analysis.op_to_tmp.items():
+        node_to_element[node_id] = ElementID(TMPS, tmp_idx)
+    for node_id, result_idx in analysis.op_to_result.items():
+        node_to_element[node_id] = ElementID(RESULT, result_idx)
+    # Build instructions
+    instructions: Instructions = Instructions()
+    op_node: OpNode
+    for op_node in analysis.op_nodes:
+        dest: ElementID = node_to_element[id(op_node)]
+        args: list[ElementID] = [
+            node_to_element[id(arg)]
+            for arg in op_node.args
+        ]
+        instructions.append(op_node.symbol, args, dest)
+    # Add any copy operations, i.e., result nodes that are not op nodes
+    for i, node in enumerate(analysis.result_nodes):
+        if not isinstance(node, OpNode):
+            dest: ElementID = ElementID(RESULT, i)
+            args: list[ElementID] = [node_to_element[id(node)]]
+            instructions.append(COPY, args, dest)
+    return instructions, np_consts
+cdef struct ElementID:
+    int array  # VARS, TMPS, CONSTS, RESULT
+    int index  # index into the array
+cdef struct Instruction:
+    int             symbol  # ADD, MUL, COPY
+    int             num_args
+    ElementID*      args
+    ElementID       dest
+cdef class Instructions:
+    cdef Instruction* instructions
+    cdef int num_instructions
+    def __init__(self):
+        self.instructions = <Instruction*> PyMem_Malloc(0)
+        self.num_instructions = 0
+    def append(self, int symbol, list[ElementID] args, ElementID dest) -> None:
+        cdef int num_args = len(args)
+        cdef int i
+        c_args = <ElementID*> PyMem_Malloc(
+            num_args * sizeof(ElementID))
+        if not c_args:
+            raise MemoryError()
+        for i in range(num_args):
+            c_args[i] = args[i]
+        cdef int num_instructions = self.num_instructions
+        self.instructions = <Instruction*> PyMem_Realloc(
+            self.instructions,
+            sizeof(Instruction) * (num_instructions + 1)
+        )
+        if not self.instructions:
+            raise MemoryError()
+        self.instructions[num_instructions] = Instruction(
+            symbol,
+            num_args,
+            c_args,
+            dest
+        )
+        self.num_instructions = num_instructions + 1
+    def __dealloc__(self):
+        cdef Instruction* instructions = self.instructions
+        if instructions:
+            for i in range(self.num_instructions):
+                PyMem_Free(instructions[i].args)
+            PyMem_Free(instructions)
+@cython.boundscheck(False) # turn off bounds-checking for entire function
+@cython.wraparound(False)  # turn off negative index wrapping for entire function
+cdef void cvm_float64(
+    double* vars_in,
+    double* tmps,
+    double* consts,
+    double* result,
+    Instructions instructions,
+):
+    # Core virtual machine.
+    cdef int i, num_args, symbol
+    cdef double accumulator
+    cdef ElementID* args
+    cdef ElementID elem
+    # index the four arrays by constants VARS, TMPS, CONSTS, and RESULT
+    cdef (double*) arrays[4]
+    arrays[VARS] = vars_in
+    arrays[TMPS] = tmps
+    arrays[CONSTS] = consts
+    arrays[RESULT] = result
+    cdef Instruction* instruction_ptr = instructions.instructions
+    for _ in range(instructions.num_instructions):
+        symbol = instruction_ptr[0].symbol
+        args = instruction_ptr[0].args
+        num_args = instruction_ptr[0].num_args
+        elem = args[0]
+        accumulator = arrays[elem.array][elem.index]
+        if symbol == ADD:
+            for i in range(1, num_args):
+                elem = args[i]
+                accumulator += arrays[elem.array][elem.index]
+        elif symbol == MUL:
+            for i in range(1, num_args):
+                elem = args[i]
+                accumulator *= arrays[elem.array][elem.index]
+        elif symbol == COPY:
+            pass
+        else:
+            raise RuntimeError('symbol not understood: ' + str(symbol))
+        elem = instruction_ptr[0].dest
+        arrays[elem.array][elem.index] = accumulator
+        # Advance the instruction pointer
+        instruction_ptr = &(instruction_ptr[1])

ck/pgm_compiler/support/circuit_table/circuit_table.cpython-313-darwin.so CHANGED Viewed

Binary file

ck/pgm_compiler/support/circuit_table/circuit_table.pyx ADDED Viewed

@@ -0,0 +1,325 @@
+from __future__ import annotations
+from typing import Sequence, Tuple, Iterable, Iterator
+from ck.circuit import CircuitNode, Circuit, OpNode, MUL
+TableInstance = Tuple[int, ...]
+cdef class CircuitTable:
+    """
+    A circuit table manages a set of CircuitNodes, where each node corresponds
+    to an instance for a set of (zero or more) random variables.
+    Operations on circuit tables typically add circuit nodes to the circuit. It will
+    heuristically avoid adding unnecessary nodes (e.g. addition of zero, multiplication
+    by zero or one.) However, it may be that interim circuit nodes are created that
+    end up not being used. Consider calling `Circuit.remove_unreachable_op_nodes` after
+    completing all circuit table operations.
+    It is generally expected that no CircuitTable row will be created with a constant
+    zero node. These are assumed to be optimised out already.
+    """
+    cdef public object circuit
+    cdef public tuple[int, ...] rv_idxs
+    cdef public dict[tuple[int, ...], CircuitNode] rows
+    def __init__(
+            self,
+            circuit: Circuit,
+            rv_idxs: Sequence[int, ...],
+            rows: Iterable[Tuple[TableInstance, CircuitNode]] = (),
+    ):
+        """
+        Args:
+            circuit: the circuit whose nodes are being managed by this table.
+            rv_idxs: indexes of random variables.
+            rows: optional rows to add to the table.
+        Assumes:
+            * rv_idxs contains no duplicates.
+            * all row instances conform to the indexed random variables.
+            * all row circuit nodes belong to the given circuit.
+        """
+        self.circuit = circuit
+        self.rv_idxs = tuple(rv_idxs)
+        self.rows = dict(rows)
+    def __len__(self) -> int:
+        return len(self.rows)
+    def get(self, key, default=None):
+        return self.rows.get(key, default)
+    def __getitem__(self, key):
+        return self.rows[key]
+    def __setitem__(self, key, value):
+        self.rows[key] = value
+    cpdef object top(self):  # -> CircuitNode:
+        # Get the circuit top value.
+        #
+        # Raises:
+        #     RuntimeError if there is more than one row in the table.
+        #
+        # Returns:
+        #     A single circuit node.
+        cdef int number_of_rows = len(self.rows)
+        if number_of_rows == 0:
+            return self.circuit.zero
+        elif number_of_rows == 1:
+            return next(iter(self.rows.values()))
+        else:
+            raise RuntimeError('cannot get top node from a table with more that 1 row')
+# ==================================================================================
+#  Circuit Table Operations
+# ==================================================================================
+cpdef object sum_out(object table: CircuitTable, object rv_idxs: Iterable[int]):  # -> CircuitTable:
+    # Return a circuit table that results from summing out
+    # the given random variables of this circuit table.
+    #
+    # Normally this will return a new table. However, if rv_idxs is empty,
+    # then the given table is returned unmodified.
+    #
+    # Raises:
+    #     ValueError if rv_idxs is not a subset of table.rv_idxs.
+    #     ValueError if rv_idxs contains duplicates.
+    cdef tuple[int, ...] rv_idxs_seq = tuple(rv_idxs)
+    if len(rv_idxs_seq) == 0:
+        # nothing to do
+        return table
+    cdef set[int] rv_idxs_set = set(rv_idxs_seq)
+    if len(rv_idxs_set) != len(rv_idxs_seq):
+        raise ValueError('rv_idxs contains duplicates')
+    if not rv_idxs_set.issubset(table.rv_idxs):
+        raise ValueError('rv_idxs is not a subset of table.rv_idxs')
+    cdef int rv_index
+    cdef list[int] remaining_rv_idxs = []
+    for rv_index in table.rv_idxs:
+        if rv_index not in rv_idxs_set:
+            remaining_rv_idxs.append(rv_index)
+    cdef int num_remaining = len(remaining_rv_idxs)
+    if num_remaining == 0:
+        # Special case: summing out all random variables
+        return sum_out_all(table)
+    # index_map[i] is the location in table.rv_idxs for remaining_rv_idxs[i]
+    cdef list[int] index_map = []
+    for rv_index in remaining_rv_idxs:
+        index_map.append(_find(table.rv_idxs, rv_index))
+    cdef dict[tuple[int, ...], list[object]] groups = {}
+    cdef object got
+    cdef list[int] group_instance
+    cdef tuple[int, ...] group_instance_tuple
+    cdef int i
+    cdef object node
+    cdef tuple[int, ...] instance
+    for instance, node in table.rows.items():
+        group_instance = []
+        for i in index_map:
+            group_instance.append(instance[i])
+        group_instance_tuple = tuple(group_instance)
+        got = groups.get(group_instance_tuple)
+        if got is None:
+            groups[group_instance_tuple] = [node]
+        else:
+            got.append(node)
+    cdef object circuit = table.circuit
+    cdef object new_table = CircuitTable(circuit, remaining_rv_idxs)
+    cdef dict[tuple[int, ...], object] rows = new_table.rows
+    for group_instance_tuple, to_add in groups.items():
+        node = circuit.optimised_add(to_add)
+        if not node.is_zero():
+            rows[group_instance_tuple] = node
+    return new_table
+cpdef object sum_out_all(object table: CircuitTable):  # -> CircuitTable:
+    # Return a circuit table that results from summing out
+    # all random variables of this circuit table.
+    circuit: Circuit = table.circuit
+    num_rows: int = len(table)
+    if num_rows == 0:
+        return CircuitTable(circuit, ())
+    elif num_rows == 1:
+        node = next(iter(table.rows.values()))
+    else:
+        node: CircuitNode = circuit.optimised_add(table.rows.values())
+        if node.is_zero():
+            return CircuitTable(circuit, ())
+    return CircuitTable(circuit, (), [((), node)])
+cpdef object project(object table: CircuitTable, object rv_idxs: Iterable[int]):  # -> CircuitTable:
+    # Call `sum_out(table, to_sum_out)`, where
+    # `to_sum_out = table.rv_idxs - rv_idxs`.
+    cdef set[int] to_sum_out = set(table.rv_idxs)
+    to_sum_out.difference_update(rv_idxs)
+    return sum_out(table, to_sum_out)
+cpdef object product(x: CircuitTable, y: CircuitTable):  # -> CircuitTable:
+    # Return a circuit table that results from the product of the two given tables.
+    #
+    # If x or y equals `one_table`, then the other table is returned. Otherwise,
+    # a new circuit table will be constructed and returned.
+    cdef int i
+    cdef object circuit = x.circuit
+    if y.circuit is not circuit:
+        raise ValueError('circuit tables must refer to the same circuit')
+    # Make the smaller table 'y', and the other 'x'.
+    # This is to minimise the index size on 'y'.
+    if len(x) < len(y):
+        x, y = y, x
+    # Special case: y == 0 or 1, and has no random variables.
+    if len(y.rv_idxs) == 0:
+        if len(y) == 1 and y.top().is_one():
+            return x
+        elif len(y) == 0:
+            return CircuitTable(circuit, x.rv_idxs)
+    # Set operations on rv indexes. After these operations:
+    # * co_rv_idxs is the set of rv indexes common (co) to x and y,
+    # * yo_rv_idxs is the set of rv indexes in y only (yo), and not in x.
+    cdef set[int] yo_rv_idxs_set = set(y.rv_idxs)
+    cdef set[int] co_rv_idxs_set = set(x.rv_idxs)
+    co_rv_idxs_set.intersection_update(yo_rv_idxs_set)
+    yo_rv_idxs_set.difference_update(co_rv_idxs_set)
+    if len(co_rv_idxs_set) == 0:
+        # Special case: no common random variables.
+        return _product_no_common_rvs(x, y)
+    # Convert random variable index sets to sequences
+    cdef tuple[int, ...] yo_rv_idxs = tuple(yo_rv_idxs_set)  # y only random variables
+    cdef tuple[int, ...] co_rv_idxs = tuple(co_rv_idxs_set)  # common random variables
+    # Cache mappings from result Instance to index into source Instance (x or y).
+    # This will be used in indexing and product loops to pull our needed values
+    # from the source instances.
+    cdef list[int] co_from_x_map = []
+    cdef list[int] co_from_y_map = []
+    cdef list[int] yo_from_y_map = []
+    for rv_index in co_rv_idxs:
+        co_from_x_map.append(_find(x.rv_idxs, rv_index))
+        co_from_y_map.append(_find(y.rv_idxs, rv_index))
+    for rv_index in yo_rv_idxs:
+        yo_from_y_map.append(_find(y.rv_idxs, rv_index))
+    cdef list[int] co
+    cdef list[int] yo
+    cdef object got
+    cdef tuple[int, ...] co_tuple
+    cdef tuple[int, ...] yo_tuple
+    cdef object table = CircuitTable(circuit, x.rv_idxs + yo_rv_idxs)
+    cdef dict[tuple[int, ...], object] rows = table.rows
+    # Index the y rows by common-only key (y is the smaller of the two tables).
+    cdef dict[tuple[int, ...], list[tuple[tuple[int, ...], object]]] y_index = {}
+    for y_instance, y_node in y.rows.items():
+        co = []
+        yo = []
+        for i in co_from_y_map:
+            co.append(y_instance[i])
+        for i in yo_from_y_map:
+            yo.append(y_instance[i])
+        co_tuple = tuple(co)
+        yo_tuple = tuple(yo)
+        got = y_index.get(co_tuple)
+        if got is None:
+            y_index[co_tuple] = [(yo_tuple, y_node)]
+        else:
+            got.append((yo_tuple, y_node))
+    # Iterate over x rows, inserting (instance, value).
+    # Rows with constant node values of one are optimised out.
+    for x_instance, x_node in x.rows.items():
+        co = []
+        for i in co_from_x_map:
+            co.append(x_instance[i])
+        co_tuple = tuple(co)
+        if x_node.is_one():
+            # Multiplying by one.
+            # Iterate over matching y rows.
+            got = y_index.get(co_tuple)
+            if got is not None:
+                for yo_tuple, y_node in got:
+                    rows[x_instance + yo_tuple] = y_node
+        else:
+            # Iterate over matching y rows.
+            got = y_index.get(co_tuple)
+            if got is not None:
+                for yo_tuple, y_node in got:
+                    rows[x_instance + yo_tuple] = _optimised_mul(circuit, x_node, y_node)
+    return table
+cdef int _find(tuple[int, ...] xs, int x):
+    cdef int i
+    for i in range(len(xs)):
+        if xs[i] == x:
+            return i
+    # Very unexpected
+    raise RuntimeError('not found')
+cdef object _product_no_common_rvs(x: CircuitTable, y: CircuitTable):  # -> CircuitTable:
+    # Return the product of x and y, where x and y have no common random variables.
+    #
+    # This is an optimisation of more general product algorithm as no index needs
+    # to be construction based on the common random variables.
+    #
+    # Rows with constant node values of one are optimised out.
+    #
+    # Assumes:
+    #     * There are no common random variables between x and y.
+    #     * x and y are for the same circuit.
+    cdef object circuit = x.circuit
+    cdef object table = CircuitTable(circuit, x.rv_idxs + y.rv_idxs)
+    cdef tuple[int, ...] instance
+    for x_instance, x_node in x.rows.items():
+        if x_node.is_one():
+            for y_instance, y_node in y.rows.items():
+                instance = x_instance + y_instance
+                table.rows[instance] = y_node
+        else:
+            for y_instance, y_node in y.rows.items():
+                instance = x_instance + y_instance
+                table.rows[instance] = _optimised_mul(circuit, x_node, y_node)
+    return table
+cdef object _optimised_mul(object circuit: Circuit, object x: CircuitNode, object y: CircuitNode):  # -> CircuitNode
+    if x.is_zero():
+        return x
+    if y.is_zero():
+        return y
+    if x.is_one():
+        return y
+    if y.is_one():
+        return x
+    return circuit.mul(x, y)