PyPI - da4ml - Versions diffs - 0.4.1__py3-none-any.whl → 0.5.0b0__py3-none-any.whl - Mend

da4ml 0.4.1py3-none-any.whl → 0.5.0b0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of da4ml might be problematic. Click here for more details.

Files changed (40) hide show

da4ml/__init__.py +2 -16
da4ml/_version.py +2 -2
da4ml/cmvm/__init__.py +2 -2
da4ml/cmvm/api.py +15 -4
da4ml/cmvm/core/__init__.py +2 -2
da4ml/cmvm/types.py +32 -18
da4ml/cmvm/util/bit_decompose.py +2 -2
da4ml/codegen/hls/hls_codegen.py +10 -5
da4ml/codegen/hls/hls_model.py +7 -4
da4ml/codegen/rtl/common_source/build_binder.mk +6 -5
da4ml/codegen/rtl/common_source/build_quartus_prj.tcl +104 -0
da4ml/codegen/rtl/common_source/{build_prj.tcl → build_vivado_prj.tcl} +39 -18
da4ml/codegen/rtl/common_source/template.sdc +27 -0
da4ml/codegen/rtl/common_source/template.xdc +11 -13
da4ml/codegen/rtl/rtl_model.py +105 -54
da4ml/codegen/rtl/verilog/__init__.py +2 -1
da4ml/codegen/rtl/verilog/comb.py +47 -7
da4ml/codegen/rtl/verilog/io_wrapper.py +4 -4
da4ml/codegen/rtl/verilog/pipeline.py +12 -12
da4ml/codegen/rtl/verilog/source/lookup_table.v +27 -0
da4ml/codegen/rtl/vhdl/comb.py +27 -21
da4ml/codegen/rtl/vhdl/io_wrapper.py +11 -11
da4ml/codegen/rtl/vhdl/pipeline.py +12 -12
da4ml/codegen/rtl/vhdl/source/lookup_table.vhd +52 -0
da4ml/converter/__init__.py +57 -1
da4ml/converter/hgq2/parser.py +4 -25
da4ml/converter/hgq2/replica.py +208 -22
da4ml/trace/fixed_variable.py +239 -29
da4ml/trace/fixed_variable_array.py +276 -48
da4ml/trace/ops/__init__.py +31 -15
da4ml/trace/ops/reduce_utils.py +3 -3
da4ml/trace/pipeline.py +40 -18
da4ml/trace/tracer.py +33 -8
da4ml/typing/__init__.py +3 -0
{da4ml-0.4.1.dist-info → da4ml-0.5.0b0.dist-info}/METADATA +2 -1
{da4ml-0.4.1.dist-info → da4ml-0.5.0b0.dist-info}/RECORD +39 -35
da4ml/codegen/rtl/vhdl/source/template.xdc +0 -32
{da4ml-0.4.1.dist-info → da4ml-0.5.0b0.dist-info}/WHEEL +0 -0
{da4ml-0.4.1.dist-info → da4ml-0.5.0b0.dist-info}/licenses/LICENSE +0 -0
{da4ml-0.4.1.dist-info → da4ml-0.5.0b0.dist-info}/top_level.txt +0 -0

da4ml/trace/fixed_variable_array.py CHANGED Viewed

@@ -1,13 +1,15 @@
+from collections.abc import Callable
+from decimal import Decimal
 from inspect import signature
-from typing import Any, TypeVar
+from typing import TypeVar
 import numpy as np
 from numba.typed import List as NumbaList
 from numpy.typing import NDArray
-from ..cmvm import solve
-from .fixed_variable import FixedVariable, FixedVariableInput, HWConfig, QInterval
-from .ops import einsum, reduce
+from ..cmvm.api import solve, solver_options_t
+from .fixed_variable import FixedVariable, FixedVariableInput, HWConfig, LookupTable, QInterval
+from .ops import _quantize, einsum, reduce
 T = TypeVar('T')
@@ -42,7 +44,79 @@ def _min_of(a, b):
         return min(a, b)
+def mmm(mat0: np.ndarray, mat1: np.ndarray):
+    shape = mat0.shape[:-1] + mat1.shape[1:]
+    mat0, mat1 = mat0.reshape((-1, mat0.shape[-1])), mat1.reshape((mat1.shape[0], -1))
+    _shape = (mat0.shape[0], mat1.shape[1])
+    _vars = np.empty(_shape, dtype=object)
+    for i in range(mat0.shape[0]):
+        for j in range(mat1.shape[1]):
+            vec0 = mat0[i]
+            vec1 = mat1[:, j]
+            _vars[i, j] = reduce(lambda x, y: x + y, vec0 * vec1)
+    return _vars.reshape(shape)
+def cmvm(cm: np.ndarray, v: 'FixedVariableArray', solver_options: solver_options_t) -> np.ndarray:
+    mask = offload_mask(cm, v)
+    if np.any(mask):
+        offload_cm = cm * mask.astype(cm.dtype)
+        cm = cm * (~mask).astype(cm.dtype)
+    else:
+        offload_cm = None
+    _qintervals = [QInterval(float(_v.low), float(_v.high), float(_v.step)) for _v in v._vars]
+    _latencies = [float(_v.latency) for _v in v._vars]
+    qintervals = NumbaList(_qintervals)  # type: ignore
+    latencies = NumbaList(_latencies)  # type: ignore
+    hwconf = v._vars.ravel()[0].hwconf
+    solver_options.setdefault('adder_size', hwconf.adder_size)
+    solver_options.setdefault('carry_size', hwconf.carry_size)
+    _mat = np.ascontiguousarray(cm.astype(np.float32))
+    sol = solve(_mat, qintervals=qintervals, latencies=latencies, **solver_options)
+    _r: np.ndarray = sol(v._vars)
+    if offload_cm is not None:
+        _r = _r + mmm(v._vars, offload_cm)
+    return _r
+def offload_mask(cm: NDArray, v: 'FixedVariableArray') -> NDArray[np.bool_]:
+    assert v.ndim == 1
+    assert cm.ndim == 2
+    assert cm.shape[0] == v.shape[0]
+    bits = np.sum(v.kif, axis=0)[:, None]
+    return (bits == 0) & (cm != 0)
+_unary_functions = (
+    np.sin,
+    np.cos,
+    np.tan,
+    np.exp,
+    np.log,
+    np.invert,
+    np.sqrt,
+    np.tanh,
+    np.sinh,
+    np.cosh,
+    np.arccos,
+    np.arcsin,
+    np.arctan,
+    np.arcsinh,
+    np.arccosh,
+    np.arctanh,
+    np.exp2,
+    np.expm1,
+    np.log2,
+    np.log10,
+    np.log1p,
+    np.cbrt,
+    np.reciprocal,
+)
 class FixedVariableArray:
+    """Symbolic array of FixedVariable for tracing operations. Supports numpy ufuncs and array functions."""
     __array_priority__ = 100
     def __array_function__(self, func, types, args, kwargs):
@@ -52,17 +126,19 @@ class FixedVariableArray:
             elif len(args) == 2 and isinstance(args[0], np.ndarray) and isinstance(args[1], np.ndarray):
                 return self.__rmatmul__(args[1])
-        if func in (np.mean, np.sum, np.amax, np.amin, np.max, np.min):
+        if func in (np.mean, np.sum, np.amax, np.amin, np.prod, np.max, np.min):
             match func:
                 case np.mean:
-                    _x = reduce(lambda x, y: x + y, self, *args[1:], **kwargs)
+                    _x = reduce(lambda x, y: x + y, *args, **kwargs)
                     return _x * (_x.size / self._vars.size)
                 case np.sum:
-                    return reduce(lambda x, y: x + y, self, *args[1:], **kwargs)
+                    return reduce(lambda x, y: x + y, *args, **kwargs)
                 case np.max | np.amax:
-                    return reduce(_max_of, self, *args[1:], **kwargs)
+                    return reduce(_max_of, *args, **kwargs)
                 case np.min | np.amin:
-                    return reduce(_min_of, self, *args[1:], **kwargs)
+                    return reduce(_min_of, *args, **kwargs)
+                case np.prod:
+                    return reduce(lambda x, y: x * y, *args, **kwargs)
                 case _:
                     raise NotImplementedError(f'Unsupported function: {func}')
@@ -86,7 +162,7 @@ class FixedVariableArray:
             assert bind.arguments.get('out', None) is None, 'Output argument is not supported'
             return einsum(eq, *operands)
-        if func in (np.dot, np.matmul):
+        if func is np.dot:
             assert len(args) in (2, 3), 'Dot function requires exactly two or three arguments'
             assert len(args) == 2
@@ -107,19 +183,85 @@ class FixedVariableArray:
             self.solver_options,
         )
+    def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
+        assert method == '__call__', f'Only __call__ method is supported for ufuncs, got {method}'
+        match ufunc:
+            case np.add | np.subtract | np.multiply | np.true_divide | np.negative:
+                inputs = [to_raw_arr(x) for x in inputs]
+                return FixedVariableArray(ufunc(*inputs, **kwargs), self.solver_options)
+            case np.negative:
+                assert len(inputs) == 1
+                return FixedVariableArray(ufunc(to_raw_arr(inputs[0]), **kwargs), self.solver_options)
+            case np.maximum | np.minimum:
+                op = _max_of if ufunc is np.maximum else _min_of
+                a, b = np.broadcast_arrays(inputs[0], inputs[1])
+                shape = a.shape
+                a, b = a.ravel(), b.ravel()
+                r = np.empty(a.size, dtype=object)
+                for i in range(a.size):
+                    r[i] = op(a[i], b[i])
+                return FixedVariableArray(r.reshape(shape), self.solver_options)
+            case np.matmul:
+                assert len(inputs) == 2
+                assert isinstance(inputs[0], FixedVariableArray) or isinstance(inputs[1], FixedVariableArray)
+                if isinstance(inputs[0], FixedVariableArray):
+                    return inputs[0].matmul(inputs[1])
+                else:
+                    return inputs[1].rmatmul(inputs[0])
+            case np.power:
+                assert len(inputs) == 2
+                base, exp = inputs
+                return base**exp
+            case np.abs | np.absolute:
+                assert len(inputs) == 1
+                assert inputs[0] is self
+                mask: np.ndarray = (self.kif[0] == 0).ravel()
+                arr = self._vars.ravel()
+                r = np.empty(arr.size, dtype=object)
+                for i in range(arr.size):
+                    if mask[i]:
+                        r[i] = arr[i]
+                        continue
+                    v = arr[i]
+                    v = v.msb_mux(-v, v)
+                    v.low = Decimal(0)
+                    r[i] = v
+                return FixedVariableArray(r.reshape(self.shape), self.solver_options)
+            case np.square:
+                assert len(inputs) == 1
+                assert inputs[0] is self
+                return self**2
+        if ufunc in _unary_functions:
+            assert len(inputs) == 1
+            assert inputs[0] is self
+            return self.apply(ufunc)
+        raise NotImplementedError(f'Unsupported ufunc: {ufunc}')
     def __init__(
         self,
         vars: NDArray,
-        solver_options: dict[str, Any] | None = None,
+        solver_options: solver_options_t | None = None,
     ):
-        self._vars = np.array(vars)
+        _vars = np.array(vars)
+        _vars_f = _vars.ravel()
+        hwconf = next(iter(v for v in _vars_f if isinstance(v, FixedVariable))).hwconf
+        for i, v in enumerate(_vars_f):
+            if not isinstance(v, FixedVariable):
+                _vars_f[i] = FixedVariable(float(v), float(v), 1.0, hwconf=hwconf)
+        self._vars = _vars
         _solver_options = signature(solve).parameters
         _solver_options = {k: v.default for k, v in _solver_options.items() if v.default is not v.empty}
         if solver_options is not None:
             _solver_options.update(solver_options)
         _solver_options.pop('qintervals', None)
         _solver_options.pop('latencies', None)
-        self.solver_options = _solver_options
+        self.solver_options: solver_options_t = _solver_options  # type: ignore
     @classmethod
     def from_lhs(
@@ -129,7 +271,7 @@ class FixedVariableArray:
         step: NDArray[np.floating],
         hwconf: HWConfig,
         latency: np.ndarray | float = 0.0,
-        solver_options: dict[str, Any] | None = None,
+        solver_options: solver_options_t | None = None,
     ):
         shape = low.shape
         assert shape == high.shape == step.shape
@@ -162,7 +304,7 @@ class FixedVariableArray:
         f: NDArray[np.integer],
         hwconf: HWConfig,
         latency: NDArray[np.floating] | float = 0.0,
-        solver_options: dict[str, Any] | None = None,
+        solver_options: solver_options_t | None = None,
     ):
         mask = k + i + f <= 0
         k = np.where(mask, 0, k)
@@ -173,47 +315,34 @@ class FixedVariableArray:
         high, low = _high - step, -_high * k
         return cls.from_lhs(low, high, step, hwconf, latency, solver_options)
-    def __matmul__(self, other):
+    def matmul(self, other):
         if isinstance(other, FixedVariableArray):
             other = other._vars
         if not isinstance(other, np.ndarray):
             other = np.array(other)
         if any(isinstance(x, FixedVariable) for x in other.ravel()):
             mat0, mat1 = self._vars, other
-            shape = mat0.shape[:-1] + mat1.shape[1:]
-            mat0, mat1 = mat0.reshape((-1, mat0.shape[-1])), mat1.reshape((mat1.shape[0], -1))
-            _shape = (mat0.shape[0], mat1.shape[1])
-            _vars = np.empty(_shape, dtype=object)
-            for i in range(mat0.shape[0]):
-                for j in range(mat1.shape[1]):
-                    vec0 = mat0[i]
-                    vec1 = mat1[:, j]
-                    _vars[i, j] = reduce(lambda x, y: x + y, vec0 * vec1)
-            return FixedVariableArray(_vars.reshape(shape), self.solver_options)
-        kwargs = (self.solver_options or {}).copy()
+            _vars = mmm(mat0, mat1)
+            return FixedVariableArray(_vars, self.solver_options)
+        solver_options = (self.solver_options or {}).copy()
         shape0, shape1 = self.shape, other.shape
         assert shape0[-1] == shape1[0], f'Matrix shapes do not match: {shape0} @ {shape1}'
-        c = shape1[0]
+        contract_len = shape1[0]
         out_shape = shape0[:-1] + shape1[1:]
-        mat0, mat1 = self.reshape((-1, c)), other.reshape((c, -1))
+        mat0, mat1 = self.reshape((-1, contract_len)), other.reshape((contract_len, -1))
         r = []
         for i in range(mat0.shape[0]):
             vec = mat0[i]
-            _qintervals = [QInterval(float(v.low), float(v.high), float(v.step)) for v in vec._vars]
-            _latencies = [float(v.latency) for v in vec._vars]
-            qintervals = NumbaList(_qintervals)  # type: ignore
-            latencies = NumbaList(_latencies)  # type: ignore
-            hwconf = self._vars.ravel()[0].hwconf
-            kwargs.update(adder_size=hwconf.adder_size, carry_size=hwconf.carry_size)
-            _mat = np.ascontiguousarray(mat1.astype(np.float32))
-            sol = solve(_mat, qintervals=qintervals, latencies=latencies, **kwargs)
-            _r = sol(vec._vars)
+            _r = cmvm(mat1, vec, solver_options)
             r.append(_r)
         r = np.array(r).reshape(out_shape)
         return FixedVariableArray(r, self.solver_options)
-    def __rmatmul__(self, other):
+    def __matmul__(self, other):
+        return self.matmul(other)
+    def rmatmul(self, other):
         mat1 = np.moveaxis(other, -1, 0)
         mat0 = np.moveaxis(self, 0, -1)  # type: ignore
         ndim0, ndim1 = mat0.ndim, mat1.ndim
@@ -223,6 +352,9 @@ class FixedVariableArray:
         axes = _axes[ndim0 - 1 :] + _axes[: ndim0 - 1]
         return r.transpose(axes)
+    def __rmatmul__(self, other):
+        return self.rmatmul(other)
     def __getitem__(self, item):
         vars = self._vars[item]
         if isinstance(vars, np.ndarray):
@@ -269,10 +401,17 @@ class FixedVariableArray:
     def __pow__(self, power: int | float):
         _power = int(power)
-        assert _power == power, 'Power must be an integer'
-        return FixedVariableArray(self._vars**_power, self.solver_options)
+        if _power == power and _power >= 0:
+            return FixedVariableArray(self._vars**_power, self.solver_options)
+        else:
+            return self.apply(lambda x: x**power)
-    def relu(self, i: NDArray[np.integer] | None = None, f: NDArray[np.integer] | None = None, round_mode: str = 'TRN'):
+    def relu(
+        self,
+        i: NDArray[np.integer] | None = None,
+        f: NDArray[np.integer] | None = None,
+        round_mode: str = 'TRN',
+    ):
         shape = self._vars.shape
         i = np.broadcast_to(i, shape) if i is not None else np.full(shape, None)
         f = np.broadcast_to(f, shape) if f is not None else np.full(shape, None)
@@ -290,9 +429,11 @@ class FixedVariableArray:
         round_mode: str = 'TRN',
     ):
         shape = self._vars.shape
-        k = np.broadcast_to(k, shape) if k is not None else np.full(shape, None)
-        i = np.broadcast_to(i, shape) if i is not None else np.full(shape, None)
-        f = np.broadcast_to(f, shape) if f is not None else np.full(shape, None)
+        if any(x is None for x in (k, i, f)):
+            kif = self.kif
+        k = np.broadcast_to(k, shape) if k is not None else kif[0]  # type: ignore
+        i = np.broadcast_to(i, shape) if i is not None else kif[1]  # type: ignore
+        f = np.broadcast_to(f, shape) if f is not None else kif[2]  # type: ignore
         ret = []
         for v, k, i, f in zip(self._vars.ravel(), k.ravel(), i.ravel(), f.ravel()):  # type: ignore
             ret.append(v.quantize(k=k, i=i, f=f, overflow_mode=overflow_mode, round_mode=round_mode))
@@ -324,17 +465,28 @@ class FixedVariableArray:
     @property
     def kif(self):
+        """[k, i, f] array"""
         shape = self._vars.shape
         kif = np.array([v.kif for v in self._vars.ravel()]).reshape(*shape, 3)
         return np.moveaxis(kif, -1, 0)
+    def apply(self, fn: Callable[[NDArray], NDArray]) -> 'RetardedFixedVariableArray':
+        """Apply a unary operator to all elements, returning a RetardedFixedVariableArray."""
+        return RetardedFixedVariableArray(
+            self._vars,
+            self.solver_options,
+            operator=fn,
+        )
 class FixedVariableArrayInput(FixedVariableArray):
+    """Similar to FixedVariableArray, but initializes all elements as FixedVariableInput - the precisions are unspecified when initialized, and the highest precision requested (i.e., quantized to) will be recorded for generation of the logic."""
     def __init__(
         self,
         shape: tuple[int, ...] | int,
-        hwconf: HWConfig = HWConfig(1, -1, -1),
-        solver_options: dict[str, Any] | None = None,
+        hwconf: HWConfig | tuple[int, int, int] = HWConfig(1, -1, -1),
+        solver_options: solver_options_t | None = None,
         latency=0.0,
     ):
         _vars = np.empty(shape, dtype=object)
@@ -342,3 +494,79 @@ class FixedVariableArrayInput(FixedVariableArray):
         for i in range(_vars.size):
             _vars_f[i] = FixedVariableInput(latency, hwconf)
         super().__init__(_vars, solver_options)
+def make_table(fn: Callable[[NDArray], NDArray], qint: QInterval) -> LookupTable:
+    low, high, step = qint
+    n = round((high - low) / step) + 1
+    return LookupTable(fn(np.linspace(low, high, n)))
+class RetardedFixedVariableArray(FixedVariableArray):
+    """Ephemeral FixedVariableArray generated from operations of unspecified output precision.
+    This object translates to normal FixedVariableArray upon quantization.
+    Does not inherit the maximum precision like FixedVariableArrayInput.
+    This object can be used in two ways:
+    1. Quantization with specified precision, which converts to FixedVariableArray.
+    2. Apply an further unary operation, which returns another RetardedFixedVariableArray. (e.g., composite functions)
+    """
+    def __init__(self, vars: NDArray, solver_options: solver_options_t | None, operator: Callable[[NDArray], NDArray]):
+        self._operator = operator
+        super().__init__(vars, solver_options)
+    def __array_function__(self, ufunc, method, *inputs, **kwargs):
+        raise RuntimeError('RetardedFixedVariableArray only supports quantization or further unary operations.')
+    def apply(self, fn: Callable[[NDArray], NDArray]) -> 'RetardedFixedVariableArray':
+        return RetardedFixedVariableArray(
+            self._vars,
+            self.solver_options,
+            operator=lambda x: fn(self._operator(x)),
+        )
+    def quantize(
+        self,
+        k: NDArray[np.integer] | np.integer | int | None = None,
+        i: NDArray[np.integer] | np.integer | int | None = None,
+        f: NDArray[np.integer] | np.integer | int | None = None,
+        overflow_mode: str = 'WRAP',
+        round_mode: str = 'TRN',
+    ):
+        if any(x is None for x in (k, i, f)):
+            assert all(x is not None for x in (k, i, f)), 'Either all or none of k, i, f must be specified'
+            _k = _i = _f = [None] * self.size
+        else:
+            _k = np.broadcast_to(k, self.shape).ravel()  # type: ignore
+            _i = np.broadcast_to(i, self.shape).ravel()  # type: ignore
+            _f = np.broadcast_to(f, self.shape).ravel()  # type: ignore
+            op = lambda x: _quantize(self._operator(x), k, i, f, overflow_mode, round_mode)  # type: ignore
+        local_tables: dict[tuple[QInterval, tuple[int, int, int] | None], LookupTable] = {}
+        variables = []
+        for v, _kk, _ii, _ff in zip(self._vars.ravel(), _k, _i, _f):
+            if (_kk is None) or (_ii is None) or (_ff is None):
+                op = self._operator
+                _key = v.qint
+            else:
+                op = lambda x: _quantize(self._operator(x), _kk, _ii, _ff, overflow_mode, round_mode)  # type: ignore
+                _key = (v.qint, (int(_kk), int(_ii), int(_ff)))
+            if _key in local_tables:
+                table = local_tables[_key]
+            else:
+                table = make_table(op, v.qint)
+                local_tables[_key] = table
+            variables.append(v.lookup(table))
+        variables = np.array(variables).reshape(self._vars.shape)
+        return FixedVariableArray(variables, self.solver_options)
+    def __repr__(self):
+        return 'Retarded' + super().__repr__()
+    @property
+    def kif(self):
+        raise RuntimeError('RetardedFixedVariableArray does not have defined kif until quantized.')

da4ml/trace/ops/__init__.py CHANGED Viewed

@@ -2,6 +2,7 @@ from typing import TYPE_CHECKING, TypeVar
 import numpy as np
 from numpy.typing import NDArray
+from quantizers.fixed_point.fixed_point_ops_np import get_fixed_quantizer_np
 from ..fixed_variable_array import FixedVariable
 from .conv_utils import conv, pool
@@ -22,9 +23,11 @@ def relu(x: T, i: NDArray[np.integer] | None = None, f: NDArray[np.integer] | No
     elif isinstance(x, list):
         return [xx.relu(i=ii, f=ff, round_mode=round_mode) for xx, ii, ff in zip(x, i, f)]  # type: ignore
     else:
+        round_mode = round_mode.upper()
+        assert round_mode in ('TRN', 'RND')
         x = np.maximum(x, 0)
         if f is not None:
-            if round_mode.upper() == 'RND':
+            if round_mode == 'RND':
                 x += 2.0 ** (-f - 1)
             sf = 2.0**f
             x = np.floor(x * sf) / sf
@@ -33,6 +36,18 @@ def relu(x: T, i: NDArray[np.integer] | None = None, f: NDArray[np.integer] | No
         return x
+def _quantize(
+    x: NDArray[np.floating],
+    k: NDArray[np.integer] | np.integer | int,
+    i: NDArray[np.integer] | np.integer | int,
+    f: NDArray[np.integer] | np.integer | int,
+    overflow_mode: str = 'WRAP',
+    round_mode: str = 'TRN',
+) -> NDArray[np.floating]:
+    q = get_fixed_quantizer_np(round_mode=round_mode, overflow_mode=overflow_mode)
+    return q(x, k=k, i=i, f=f)  # type: ignore
 def quantize(
     x: T,
     k: NDArray[np.integer] | np.integer | int,
@@ -43,22 +58,23 @@ def quantize(
 ) -> T:
     from ..fixed_variable_array import FixedVariableArray
-    if isinstance(x, FixedVariableArray):
+    if isinstance(x, (FixedVariableArray, FixedVariable)):
         return x.quantize(k=k, i=i, f=f, overflow_mode=overflow_mode, round_mode=round_mode)
+    elif isinstance(x, list):
+        ret: list[FixedVariable] = []
+        for i in range(len(x)):
+            ret.append(
+                x[i].quantize(
+                    k=int(k[i] if isinstance(k, (list, np.ndarray)) else k),
+                    i=int(i[i] if isinstance(i, (list, np.ndarray)) else i),
+                    f=int(f[i] if isinstance(f, (list, np.ndarray)) else f),
+                    overflow_mode=overflow_mode,
+                    round_mode=round_mode,
+                )
+            )
+        return ret  # type: ignore
     else:
-        x = x.copy()
-        if overflow_mode in ('SAT', 'SAT_SYM'):
-            step = 2.0**-f
-            _high = 2.0**i
-            high = _high - step
-            low = -_high * k if overflow_mode == 'SAT' else -high * k
-            x = np.clip(x, low, high)  # type: ignore
-        if round_mode.upper() == 'RND':
-            x += 2.0 ** (-f - 1)  # type: ignore
-        b = k + i + f
-        bias = 2.0 ** (b - 1) * k
-        eps = 2.0**-f
-        return eps * ((np.floor(x / eps) + bias) % 2.0**b - bias)  # type: ignore
+        return _quantize(x, k, i, f, overflow_mode, round_mode)
 __all__ = [

da4ml/trace/ops/reduce_utils.py CHANGED Viewed

@@ -100,6 +100,6 @@ def reduce(operator: Callable[[T, T], T], x: TA, axis: int | Sequence[int] | Non
     if isinstance(x, FixedVariableArray):
         r = FixedVariableArray(r, solver_config)
-        if r.size == 1 and not keepdims:
-            return r.ravel()[0]  # type: ignore
-    return r if r.size > 1 or keepdims else r.ravel()[0]  # type: ignore
+        if r.shape == ():
+            return r._vars.item()  # type: ignore
+    return r if r.shape != () or keepdims else r.item()  # type: ignore

da4ml/trace/pipeline.py CHANGED Viewed

@@ -1,11 +1,11 @@
 from math import ceil, floor
-from ..cmvm.types import CascadedSolution, Op, Solution
+from ..cmvm.types import CombLogic, Op, Pipeline
 from .fixed_variable import FixedVariable, HWConfig
 from .tracer import comb_trace
-def retime_pipeline(csol: CascadedSolution, verbose=True):
+def retime_pipeline(csol: Pipeline, verbose=True):
     n_stages = len(csol[0])
     cutoff_high = ceil(max(max(sol.out_latency) / (i + 1) for i, sol in enumerate(csol[0])))
     cutoff_low = 0
@@ -60,14 +60,14 @@ def _get_new_idx(
     return p0_idx
-def to_pipeline(sol: Solution, latency_cutoff: float, retiming=True, verbose=True) -> CascadedSolution:
+def to_pipeline(comb: CombLogic, latency_cutoff: float, retiming=True, verbose=True) -> Pipeline:
     """Split the record into multiple stages based on the latency of the operations.
     Only useful for HDL generation.
     Parameters
     ----------
-    sol : Solution
-        The solution to be split into multiple stages.
+    sol : CombLogic
+        The combinational logic to be pipelined into multiple stages.
     latency_cutoff : float
         The latency cutoff for splitting the operations.
     retiming : bool
@@ -83,8 +83,8 @@ def to_pipeline(sol: Solution, latency_cutoff: float, retiming=True, verbose=Tru
     CascadedSolution
         The cascaded solution with multiple stages.
     """
-    assert len(sol.ops) > 0, 'No operations in the record'
-    for i, op in enumerate(sol.ops):
+    assert len(comb.ops) > 0, 'No operations in the record'
+    for i, op in enumerate(comb.ops):
         if op.id1 != -1:
             break
@@ -96,9 +96,9 @@ def to_pipeline(sol: Solution, latency_cutoff: float, retiming=True, verbose=Tru
     locator: list[dict[int, int]] = []
-    ops = sol.ops.copy()
-    lat = max(ops[i].latency for i in sol.out_idxs)
-    for i in sol.out_idxs:
+    ops = comb.ops.copy()
+    lat = max(ops[i].latency for i in comb.out_idxs)
+    for i in comb.out_idxs:
         op_out = ops[i]
         ops.append(Op(i, -1001, -1001, 0, op_out.qint, lat, 0.0))
@@ -113,7 +113,10 @@ def to_pipeline(sol: Solution, latency_cutoff: float, retiming=True, verbose=Tru
         p0_idx = _get_new_idx(op.id0, locator, opd, out_idxd, ops, stage, latency_cutoff)
         p1_idx = _get_new_idx(op.id1, locator, opd, out_idxd, ops, stage, latency_cutoff)
         if op.opcode in (6, -6):
-            data = _get_new_idx(op.data, locator, opd, out_idxd, ops, stage, latency_cutoff)
+            k = op.data & 0xFFFFFFFF
+            _shift = (op.data >> 32) & 0xFFFFFFFF
+            k = _get_new_idx(k, locator, opd, out_idxd, ops, stage, latency_cutoff)
+            data = _shift << 32 | k
         else:
             data = op.data
@@ -126,34 +129,53 @@ def to_pipeline(sol: Solution, latency_cutoff: float, retiming=True, verbose=Tru
             locator.append({stage: len(opd[stage]) - 1})
     sols = []
     max_stage = max(opd.keys())
-    n_in = sol.shape[0]
+    n_in = comb.shape[0]
     for i, stage in enumerate(opd.keys()):
         _ops = opd[stage]
         _out_idx = out_idxd[stage]
         n_out = len(_out_idx)
         if i == max_stage:
-            out_shifts = sol.out_shifts
-            out_negs = sol.out_negs
+            out_shifts = comb.out_shifts
+            out_negs = comb.out_negs
         else:
             out_shifts = [0] * len(_out_idx)
             out_negs = [False] * len(_out_idx)
-        _sol = Solution(
+        if comb.lookup_tables is not None:
+            _ops, lookup_tables = remap_table_idxs(comb, _ops)
+        else:
+            lookup_tables = None
+        _sol = CombLogic(
             shape=(n_in, n_out),
             inp_shift=[0] * n_in,
             out_idxs=_out_idx,
             out_shifts=out_shifts,
             out_negs=out_negs,
             ops=_ops,
-            carry_size=sol.carry_size,
-            adder_size=sol.adder_size,
+            carry_size=comb.carry_size,
+            adder_size=comb.adder_size,
+            lookup_tables=lookup_tables,
         )
         sols.append(_sol)
         n_in = n_out
-    csol = CascadedSolution(tuple(sols))
+    csol = Pipeline(tuple(sols))
     if retiming:
         csol = retime_pipeline(csol, verbose=verbose)
     return csol
+def remap_table_idxs(comb: CombLogic, _ops):
+    assert comb.lookup_tables is not None
+    table_idxs = sorted(list({op.data for op in _ops if op.opcode == 8}))
+    remap = {j: i for i, j in enumerate(table_idxs)}
+    _ops_remap = []
+    for op in _ops:
+        if op.opcode == 8:
+            op = Op(op.id0, op.id1, op.opcode, remap[op.data], op.qint, op.latency, op.cost)
+        _ops_remap.append(op)
+    _ops = _ops_remap
+    lookup_tables = tuple(comb.lookup_tables[i] for i in table_idxs)
+    return _ops, lookup_tables

da4ml 0.4.1__py3-none-any.whl → 0.5.0b0__py3-none-any.whl

Potentially problematic release.

da4ml 0.4.1py3-none-any.whl → 0.5.0b0py3-none-any.whl