PyPI - da4ml - Versions diffs - 0.2.1__py3-none-any.whl → 0.3.0.post1__py3-none-any.whl - Mend

da4ml 0.2.1py3-none-any.whl → 0.3.0.post1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of da4ml might be problematic. Click here for more details.

Files changed (55) hide show

da4ml/_version.py +2 -2
da4ml/cmvm/types.py +95 -15
da4ml/codegen/__init__.py +5 -4
da4ml/codegen/cpp/__init__.py +2 -1
da4ml/codegen/cpp/cpp_codegen.py +56 -23
da4ml/codegen/cpp/hls_model.py +252 -0
da4ml/codegen/cpp/source/ap_types/ap_binary.h +78 -0
da4ml/codegen/cpp/source/ap_types/ap_common.h +376 -0
da4ml/codegen/cpp/source/ap_types/ap_decl.h +212 -0
da4ml/codegen/cpp/source/ap_types/ap_fixed.h +360 -0
da4ml/codegen/cpp/source/ap_types/ap_fixed_base.h +2354 -0
da4ml/codegen/cpp/source/ap_types/ap_fixed_ref.h +718 -0
da4ml/codegen/cpp/source/ap_types/ap_fixed_special.h +230 -0
da4ml/codegen/cpp/source/ap_types/ap_int.h +330 -0
da4ml/codegen/cpp/source/ap_types/ap_int_base.h +1885 -0
da4ml/codegen/cpp/source/ap_types/ap_int_ref.h +1346 -0
da4ml/codegen/cpp/source/ap_types/ap_int_special.h +223 -0
da4ml/codegen/cpp/source/ap_types/ap_shift_reg.h +138 -0
da4ml/codegen/cpp/source/ap_types/etc/ap_private.h +7199 -0
da4ml/codegen/cpp/source/ap_types/hls_math.h +27 -0
da4ml/codegen/cpp/source/ap_types/hls_stream.h +263 -0
da4ml/codegen/cpp/source/ap_types/utils/x_hls_utils.h +80 -0
da4ml/codegen/cpp/source/binder_util.hh +56 -0
da4ml/codegen/cpp/source/build_binder.mk +24 -0
da4ml/codegen/cpp/source/{vitis.h → vitis_bitshift.hh} +1 -1
da4ml/codegen/verilog/__init__.py +2 -3
da4ml/codegen/verilog/comb.py +65 -24
da4ml/codegen/verilog/io_wrapper.py +36 -141
da4ml/codegen/verilog/source/binder_util.hh +72 -0
da4ml/codegen/verilog/source/mux.v +58 -0
da4ml/codegen/verilog/source/negative.v +28 -0
da4ml/codegen/verilog/source/shift_adder.v +4 -1
da4ml/codegen/verilog/source/template.xdc +3 -0
da4ml/codegen/verilog/verilog_model.py +36 -12
da4ml/converter/__init__.py +0 -0
da4ml/converter/hgq2/parser.py +105 -0
da4ml/converter/hgq2/replica.py +383 -0
da4ml/trace/__init__.py +2 -2
da4ml/trace/fixed_variable.py +175 -16
da4ml/trace/fixed_variable_array.py +109 -4
da4ml/trace/ops/__init__.py +22 -6
da4ml/trace/ops/conv_utils.py +147 -15
da4ml/trace/ops/einsum_utils.py +9 -6
da4ml/trace/ops/reduce_utils.py +103 -0
da4ml/trace/pipeline.py +36 -34
da4ml/trace/tracer.py +37 -7
da4ml-0.3.0.post1.dist-info/METADATA +107 -0
da4ml-0.3.0.post1.dist-info/RECORD +64 -0
da4ml/codegen/cpp/source/vitis_bridge.h +0 -17
da4ml-0.2.1.dist-info/METADATA +0 -65
da4ml-0.2.1.dist-info/RECORD +0 -39
/da4ml/codegen/verilog/source/{ioutils.hh → ioutil.hh} +0 -0
{da4ml-0.2.1.dist-info → da4ml-0.3.0.post1.dist-info}/WHEEL +0 -0
{da4ml-0.2.1.dist-info → da4ml-0.3.0.post1.dist-info}/licenses/LICENSE +0 -0
{da4ml-0.2.1.dist-info → da4ml-0.3.0.post1.dist-info}/top_level.txt +0 -0

da4ml/trace/fixed_variable_array.py CHANGED Viewed

@@ -1,15 +1,97 @@
 from inspect import signature
-from typing import Any
+from typing import Any, TypeVar
 import numpy as np
 from numba.typed import List as NumbaList
 from numpy.typing import NDArray
 from ..cmvm import solve
-from .fixed_variable import FixedVariable, HWConfig, QInterval
+from .fixed_variable import FixedVariable, FixedVariableInput, HWConfig, QInterval
+from .ops import einsum, reduce
+T = TypeVar('T')
+def to_raw_arr(obj: T) -> T:
+    if isinstance(obj, tuple):
+        return tuple(to_raw_arr(x) for x in obj)  # type: ignore
+    elif isinstance(obj, list):
+        return [to_raw_arr(x) for x in obj]  # type: ignore
+    elif isinstance(obj, dict):
+        return {k: to_raw_arr(v) for k, v in obj.items()}  # type: ignore
+    if isinstance(obj, FixedVariableArray):
+        return obj._vars  # type: ignore
+    return obj
+def _max_of(a, b):
+    if isinstance(a, FixedVariable):
+        return a.max_of(b)
+    elif isinstance(b, FixedVariable):
+        return b.max_of(a)
+    else:
+        return max(a, b)
+def _min_of(a, b):
+    if isinstance(a, FixedVariable):
+        return a.min_of(b)
+    elif isinstance(b, FixedVariable):
+        return b.min_of(a)
+    else:
+        return min(a, b)
 class FixedVariableArray:
+    __array_priority__ = 100
+    def __array_function__(self, func, types, args, kwargs):
+        if func is np.matmul:
+            if len(args) == 1 and isinstance(args[0], np.ndarray):
+                return self.__matmul__(args[0])
+            elif len(args) == 2 and isinstance(args[0], np.ndarray) and isinstance(args[1], np.ndarray):
+                return self.__rmatmul__(args[1])
+        if func in (np.mean, np.sum, np.amax, np.amin, np.max, np.min):
+            match func:
+                case np.mean:
+                    _x = reduce(lambda x, y: x + y, self, *args[1:], **kwargs)
+                    return _x * (_x.size / self._vars.size)
+                case np.sum:
+                    return reduce(lambda x, y: x + y, self, *args[1:], **kwargs)
+                case np.max | np.amax:
+                    return reduce(_max_of, self, *args[1:], **kwargs)
+                case np.min | np.amin:
+                    return reduce(_min_of, self, *args[1:], **kwargs)
+                case _:
+                    raise NotImplementedError(f'Unsupported function: {func}')
+        if func is np.clip:
+            assert len(args) == 3, 'Clip function requires exactly three arguments'
+            x, low, high = args
+            _x, low, high = np.broadcast_arrays(x, low, high)
+            x = FixedVariableArray(_x, self.solver_options)
+            x = np.amax(np.stack((x, low), axis=-1), axis=-1)  # type: ignore
+            return np.amin(np.stack((x, high), axis=-1), axis=-1)
+        if func is np.einsum:
+            # assert len(args) == 2
+            sig = signature(np.einsum)
+            bind = sig.bind(*args, **kwargs)
+            eq = args[0]
+            operands = bind.arguments['operands']
+            if isinstance(operands[0], str):
+                operands = operands[1:]
+            assert len(operands) == 2, 'Einsum on FixedVariableArray requires exactly two operands'
+            assert bind.arguments.get('out', None) is None, 'Output argument is not supported'
+            return einsum(eq, *operands)
+        args, kwargs = to_raw_arr(args), to_raw_arr(kwargs)
+        return FixedVariableArray(
+            func(*args, **kwargs),
+            self.solver_options,
+        )
     def __init__(
         self,
         vars: NDArray,
@@ -121,9 +203,13 @@ class FixedVariableArray:
         return self._vars.shape
     def __add__(self, other):
+        if isinstance(other, FixedVariableArray):
+            return FixedVariableArray(self._vars + other._vars, self.solver_options)
         return FixedVariableArray(self._vars + other, self.solver_options)
     def __sub__(self, other):
+        if isinstance(other, FixedVariableArray):
+            return FixedVariableArray(self._vars - other._vars, self.solver_options)
         return FixedVariableArray(self._vars - other, self.solver_options)
     def __mul__(self, other):
@@ -149,7 +235,7 @@ class FixedVariableArray:
         i = np.broadcast_to(i, shape) if i is not None else np.full(shape, None)
         f = np.broadcast_to(f, shape) if f is not None else np.full(shape, None)
         ret = []
-        for v, i, f in zip(self._vars.ravel(), i.ravel(), f.ravel()):
+        for v, i, f in zip(self._vars.ravel(), i.ravel(), f.ravel()):  # type: ignore
             ret.append(v.relu(i=i, f=f, round_mode=round_mode))
         return FixedVariableArray(np.array(ret).reshape(shape), self.solver_options)
@@ -166,7 +252,7 @@ class FixedVariableArray:
         i = np.broadcast_to(i, shape) if i is not None else np.full(shape, None)
         f = np.broadcast_to(f, shape) if f is not None else np.full(shape, None)
         ret = []
-        for v, k, i, f in zip(self._vars.ravel(), k.ravel(), i.ravel(), f.ravel()):
+        for v, k, i, f in zip(self._vars.ravel(), k.ravel(), i.ravel(), f.ravel()):  # type: ignore
             ret.append(v.quantize(k=k, i=i, f=f, overflow_mode=overflow_mode, round_mode=round_mode))
         return FixedVariableArray(np.array(ret).reshape(shape), self.solver_options)
@@ -185,3 +271,22 @@ class FixedVariableArray:
     @property
     def dtype(self):
         return self._vars.dtype
+    @property
+    def size(self):
+        return self._vars.size
+    @property
+    def kif(self):
+        shape = self._vars.shape
+        kif = np.array([v.kif for v in self._vars.ravel()]).reshape(*shape, 3)
+        return np.moveaxis(kif, -1, 0)
+class FixedVariableArrayInput(FixedVariableArray):
+    def __init__(self, shape: tuple[int, ...] | int, hwconf: HWConfig, solver_options: dict[str, Any] | None = None, latency=0.0):
+        _vars = np.empty(shape, dtype=object)
+        _vars_f = _vars.ravel()
+        for i in range(_vars.size):
+            _vars_f[i] = FixedVariableInput(latency, hwconf)
+        super().__init__(_vars, solver_options)

da4ml/trace/ops/__init__.py CHANGED Viewed

@@ -1,16 +1,22 @@
-from typing import TypeVar
+from typing import TYPE_CHECKING, TypeVar
 import numpy as np
 from numpy.typing import NDArray
-from ..fixed_variable_array import FixedVariable, FixedVariableArray
-from .conv_utils import conv
+from ..fixed_variable_array import FixedVariable
+from .conv_utils import conv, pool
 from .einsum_utils import einsum
+from .reduce_utils import reduce
-T = TypeVar('T', FixedVariableArray, NDArray[np.floating], list[FixedVariable])
+if TYPE_CHECKING:
+    from ..fixed_variable_array import FixedVariableArray
+T = TypeVar('T', 'FixedVariableArray', NDArray[np.floating], list[FixedVariable])
 def relu(x: T, i: NDArray[np.integer] | None = None, f: NDArray[np.integer] | None = None, round_mode: str = 'TRN') -> T:
+    from ..fixed_variable_array import FixedVariableArray
     if isinstance(x, FixedVariableArray):
         return x.relu(i=i, f=f, round_mode=round_mode)
     elif isinstance(x, list):
@@ -35,12 +41,20 @@ def quantize(
     overflow_mode: str = 'WRAP',
     round_mode: str = 'TRN',
 ) -> T:
-    assert overflow_mode.upper() == 'WRAP', 'Only WRAP overflow mode is supported'
+    from ..fixed_variable_array import FixedVariableArray
     if isinstance(x, FixedVariableArray):
         return x.quantize(k=k, i=i, f=f, overflow_mode=overflow_mode, round_mode=round_mode)
     else:
+        x = x.copy()
+        if overflow_mode in ('SAT', 'SAT_SM'):
+            step = 2.0**-f
+            _high = 2.0**i
+            high = _high - step
+            low = -_high * k if overflow_mode == 'SAT' else -high * k
+            x = np.clip(x, low, high)  # type: ignore
         if round_mode.upper() == 'RND':
-            x += 2.0 ** (-f - 1)
+            x += 2.0 ** (-f - 1)  # type: ignore
         b = k + i + f
         bias = 2.0 ** (b - 1) * k
         eps = 2.0**-f
@@ -52,4 +66,6 @@ __all__ = [
     'einsum',
     'relu',
     'quantize',
+    'pool',
+    'reduce',
 ]

da4ml/trace/ops/conv_utils.py CHANGED Viewed

@@ -1,10 +1,15 @@
+import typing
 from collections.abc import Sequence
+from math import ceil, prod
 from typing import TypeVar
 import numpy as np
 from numpy.typing import NDArray
-from ..fixed_variable_array import FixedVariableArray
+from .reduce_utils import reduce
+if typing.TYPE_CHECKING:
+    from ..fixed_variable_array import FixedVariableArray
 def r_im2col(kernel_size: Sequence[int], arr: np.ndarray, buffer: np.ndarray, axis: int):
@@ -33,23 +38,23 @@ def stride_arr(stride: int | tuple[int, ...], arr: np.ndarray):
     ndim = arr.ndim
     if isinstance(stride, int):
         stride = (stride,) * (ndim - 1)
-    assert len(stride) == ndim - 1, f'Invalid stride {stride} for array with {ndim} dimensions'
     _idx = tuple(slice(None, None, st) for st in stride)
-    return arr[*_idx]
+    return arr[_idx]
-T = TypeVar('T', FixedVariableArray, NDArray[np.integer | np.floating])
+TA = TypeVar('TA', 'FixedVariableArray', NDArray[np.integer | np.floating])
-def conv(
-    x: T,
+def _conv(
+    x: TA,
     kernel: NDArray[np.integer | np.floating],
     bias: NDArray[np.integer | np.floating] | None = None,
     strides: int | tuple[int, ...] = 1,
     padding: tuple[tuple[int, int], ...] | str = 'VALID',
-    format: str = 'channels_last',
-):
+) -> TA:
+    from ..fixed_variable_array import FixedVariableArray
     if isinstance(x, FixedVariableArray):
         solver_options = x.solver_options
         data = x._vars
@@ -63,10 +68,10 @@ def conv(
     ch_in, ch_out = kernel.shape[-2:]
     _ch_in = data.shape[-1]
     assert ch_in == _ch_in, f'Invalid input shape {data.shape} for kernel {kernel.shape}'
-    assert kernel.ndim == ndim + 1
-    assert format in ('channels_last', 'channels_first'), f'Invalid format {format}'
+    if kernel.ndim != ndim + 1:
+        if kernel.ndim == ndim:
+            raise ValueError('Inputs should not contain batch dimension')
+        raise ValueError(f'Invalid kernel shape {kernel.shape} for input with {ndim} dimensions')
     if isinstance(strides, int):
         strides = (strides,) * (ndim - 1)
     assert len(strides) == ndim - 1, f'Invalid stride {strides} for array with {ndim} dimensions'
@@ -89,16 +94,143 @@ def conv(
     data = np.pad(data, padding + ((0, 0),), mode='constant', constant_values=0.0)
     data = _im2col(kernel.shape, data)
+    data = stride_arr(strides, data)
     if is_symbolic:
         _data = FixedVariableArray(data, solver_options) @ kernel.reshape(-1, ch_out)
         data = _data._vars
     else:
         data = data @ kernel.reshape(-1, ch_out)
-    data = stride_arr(strides, data)
     if bias is not None:
         data = data + bias
+    if isinstance(x, FixedVariableArray):
+        return FixedVariableArray(data, solver_options)
+    return data
+def conv(
+    x: TA,
+    kernel: NDArray[np.integer | np.floating],
+    bias: NDArray[np.integer | np.floating] | None = None,
+    strides: int | tuple[int, ...] = 1,
+    padding: tuple[tuple[int, int], ...] | str = 'VALID',
+    format: str = 'channels_last',
+    groups: int | None = None,
+) -> TA:
+    from ..fixed_variable_array import FixedVariableArray
+    assert format in ('channels_last', 'channels_first'), f'Invalid format {format}'
+    if format == 'channels_first':
+        x = np.moveaxis(x, 0, -1)  # type: ignore
+    *_, _ch_in, ch_out = kernel.shape
+    ch_in = x.shape[-1]
+    assert ch_in % _ch_in == 0, f'groups is not integer (total_ch_in={ch_in}, kernel_ch_in={_ch_in})'
+    if groups is None:
+        groups = ch_in // _ch_in
+    else:
+        assert (
+            groups == ch_in // _ch_in
+        ), f'groups {groups} does not match input channels {ch_in} and kernel input channels {_ch_in}'
+    assert ch_out % groups == 0, f'groups is not integer (total_ch_out={ch_out}, groups={groups})'
+    _ch_out = ch_out // groups
+    buf: list[TA] = []
+    for gp in range(groups):
+        _kernel = kernel[..., gp * _ch_out : (gp + 1) * _ch_out]
+        _x = x[..., gp * _ch_in : (gp + 1) * _ch_in]
+        _buf = _conv(
+            _x,
+            _kernel,
+            strides=strides,
+            padding=padding,
+        )
+        buf.append(_buf)  # type: ignore
+    if isinstance(x, FixedVariableArray):
+        data = np.concatenate([b._vars for b in buf], axis=-1)  # type: ignore
+    else:
+        data = np.concatenate(buf, axis=-1)  # type: ignore
+    data = data + bias if bias is not None else data
     if format == 'channels_first':
-        data = np.moveaxis(data, -1, 1)
-    if solver_options is not None:
+        return np.moveaxis(data, -1, 0)  # type: ignore
+    if isinstance(x, FixedVariableArray):
+        return FixedVariableArray(data, x.solver_options)
+    return data
+def pool(
+    x: TA,
+    pool_size: Sequence[int],
+    strides: int | Sequence[int] | None = None,
+    padding: tuple[tuple[int, int], ...] | str = 'VALID',
+    pool_type: str = 'avg',
+    format: str = 'channels_last',
+) -> TA:
+    from ..fixed_variable import FixedVariable
+    from ..fixed_variable_array import FixedVariableArray
+    if isinstance(x, FixedVariableArray):
+        solver_options = x.solver_options
+        data = x._vars
+    else:
+        solver_options = None
+        data = x
+    if format == 'channels_first':
+        data = np.moveaxis(data, 0, -1)
+    strides = strides or pool_size
+    assert pool_type in ('avg', 'max'), f'Invalid pool type {pool_type}'
+    ndim = data.ndim
+    if isinstance(strides, int):
+        strides = (strides,) * (ndim - 1)
+    assert len(strides) == ndim - 1, f'Invalid stride {strides} for array with {ndim} dimensions'
+    if isinstance(padding, str):
+        padding = padding.upper()
+        if padding == 'VALID':
+            padding = ((0, 0),) * (ndim - 1)
+        elif padding == 'SAME':
+            _padding = []
+            for i in range(ndim - 1):
+                n_pad = ceil(data.shape[i] / strides[i]) * strides[i] + (pool_size[i] - strides[i]) - data.shape[i]
+                pad0 = n_pad // 2
+                pad1 = n_pad - pad0
+                _padding.append((pad0, pad1))
+            padding = tuple(_padding)
+        else:
+            raise ValueError(f'Invalid padding {padding}')
+    assert len(padding) == ndim - 1, f'Invalid padding {padding} for array with {ndim} dimensions'
+    assert all(len(p) == 2 for p in padding), f'Invalid padding {padding} for array with {ndim} dimensions'
+    data = np.pad(data, padding + ((0, 0),), mode='constant', constant_values=-np.inf)
+    ch_in = data.shape[-1]
+    fake_kernel_shape = tuple(pool_size) + (ch_in, ch_in)
+    data = _im2col(fake_kernel_shape, data)
+    data = data.reshape(*data.shape[:-1], prod(pool_size), ch_in)
+    data = stride_arr(tuple(strides), data)
+    if pool_type == 'avg':
+        div = np.sum(data != -np.inf, axis=-2)
+        data = np.where(data == -np.inf, 0, data)
+        data = reduce(lambda x, y: x + y, data, axis=-2) * (1 / div)
+    else:
+        def max_of(a, b):
+            if isinstance(a, FixedVariable):
+                return a.max_of(b)
+            if isinstance(b, FixedVariable):
+                return b.max_of(a)
+            return max(a, b)
+        data = reduce(lambda x, y: max_of(x, y), data, axis=-2)
+    if format == 'channels_first':
+        data = np.moveaxis(data, -1, 0)
+    if isinstance(x, FixedVariableArray):
         return FixedVariableArray(data, solver_options)
     return data

da4ml/trace/ops/einsum_utils.py CHANGED Viewed

@@ -1,10 +1,11 @@
 from math import prod
-from typing import TypedDict, overload
+from typing import TYPE_CHECKING, TypedDict, overload
 import numpy as np
 from numpy.typing import NDArray
-from ..fixed_variable_array import FixedVariableArray
+if TYPE_CHECKING:
+    from ..fixed_variable_array import FixedVariableArray
 class EinsumRecipe(TypedDict):
@@ -105,7 +106,7 @@ def _validate_einsum_expr(fn: str, shape0: tuple[int, ...], shape1: tuple[int, .
         # Axes expansion in input0 or input1 only
         if '0' in sax_in0:
             if len(sax_in0) - 1 > len(shape0):
-                raise ValueError(f'Input0 requires at least {len(sax_in0)-1} dimensions, but only {len(shape0)} given')
+                raise ValueError(f'Input0 requires at least {len(sax_in0) - 1} dimensions, but only {len(shape0)} given')
             # Replace auto expansion indices with free indices
             n_broadcast = len(shape0) - len(sax_in0) + 1
             in0 = in0.replace('0', free_indices[:n_broadcast])
@@ -118,7 +119,7 @@ def _validate_einsum_expr(fn: str, shape0: tuple[int, ...], shape1: tuple[int, .
         if '0' in sax_in1:
             if len(sax_in1) - 1 > len(shape1):
-                raise ValueError(f'Input1 requires at least {len(sax_in1)-1} dimensions, but only {len(shape1)} given')
+                raise ValueError(f'Input1 requires at least {len(sax_in1) - 1} dimensions, but only {len(shape1)} given')
             # Replace expansion indices with free indices
             n_broadcast = len(shape1) - len(sax_in1) + 1
             in1 = in1.replace('0', free_indices[:n_broadcast])
@@ -271,11 +272,11 @@ def _einsum(fn: str, input0, input1) -> np.ndarray:
 @overload
-def einsum(fn: str, input0: FixedVariableArray, input1: NDArray[np.integer | np.floating]) -> FixedVariableArray: ...
+def einsum(fn: str, input0: 'FixedVariableArray', input1: NDArray[np.integer | np.floating]) -> 'FixedVariableArray': ...
 @overload
-def einsum(fn: str, input0: NDArray[np.integer | np.floating], input1: FixedVariableArray) -> FixedVariableArray: ...
+def einsum(fn: str, input0: NDArray[np.integer | np.floating], input1: 'FixedVariableArray') -> 'FixedVariableArray': ...
 @overload
@@ -285,6 +286,8 @@ def einsum(
 def einsum(fn: str, input0, input1):
+    from ..fixed_variable_array import FixedVariableArray
     fg0 = isinstance(input0, FixedVariableArray)
     fg1 = isinstance(input1, FixedVariableArray)
     if fg0 and fg1:

da4ml/trace/ops/reduce_utils.py ADDED Viewed

@@ -0,0 +1,103 @@
+import heapq
+import typing
+from collections.abc import Callable, Sequence
+from math import prod
+from typing import TypeVar
+import numpy as np
+from numpy.typing import NDArray
+if typing.TYPE_CHECKING:
+    from ..fixed_variable import FixedVariable
+    from ..fixed_variable_array import FixedVariableArray
+T = typing.TypeVar('T', 'FixedVariable', float, np.floating)
+TA = TypeVar('TA', 'FixedVariableArray', NDArray[np.integer | np.floating])
+class Packet:
+    def __init__(self, v):
+        self.value = v
+    def __gt__(self, other: 'Packet') -> bool:  # type: ignore
+        from ..fixed_variable_array import FixedVariable
+        a, b = self.value, other.value
+        if isinstance(a, FixedVariable):
+            if isinstance(b, FixedVariable):
+                if b.latency > a.latency:
+                    return False
+                if b.latency < a.latency:
+                    return True
+                if b._factor > 0 and a._factor < 0:
+                    return False
+                if b._factor < 0 and a._factor > 0:
+                    return True
+                return sum(a.kif[:2]) > sum(b.kif[:2])
+            return True
+        return False
+    def __lt__(self, other: 'Packet') -> bool:  # type: ignore
+        return not self.__gt__(other)
+def _reduce(operator: Callable[[T, T], T], arr: Sequence[T]) -> T:
+    from ..fixed_variable_array import FixedVariable
+    if isinstance(arr, np.ndarray):
+        arr = list(arr.ravel())
+    assert len(arr) > 0, 'Array must not be empty'
+    if len(arr) == 1:
+        return arr[0]
+    dtype = arr[0].__class__
+    if not issubclass(dtype, FixedVariable):
+        r = operator(arr[0], arr[1])
+        for i in range(2, len(arr)):
+            r = operator(r, arr[i])
+        return r
+    heap = [Packet(v) for v in arr]  # type: ignore
+    heapq.heapify(heap)
+    while len(heap) > 1:
+        v1 = heapq.heappop(heap).value
+        v2 = heapq.heappop(heap).value
+        v = operator(v1, v2)
+        heapq.heappush(heap, Packet(v))  # type: ignore
+    return heap[0].value
+def reduce(operator: Callable[[T, T], T], x: TA, axis: int | Sequence[int] | None = None, keepdims: bool = False) -> TA:
+    """
+    Reduce the array by summing over the specified axis.
+    """
+    from ..fixed_variable_array import FixedVariableArray
+    if isinstance(x, FixedVariableArray):
+        solver_config = x.solver_options
+        arr = x._vars
+    else:
+        solver_config = None
+        arr = x
+    all_axis = tuple(range(arr.ndim))
+    axis = axis if axis is not None else all_axis
+    axis = (axis,) if isinstance(axis, int) else tuple(axis)
+    axis = tuple(a if a >= 0 else a + arr.ndim for a in axis)
+    xpose_axis = sorted(all_axis, key=lambda a: (a in axis) * 1000 + a)
+    if keepdims:
+        target_shape = tuple(d if ax not in axis else 1 for ax, d in enumerate(arr.shape))
+    else:
+        target_shape = tuple(d for ax, d in enumerate(arr.shape) if ax not in axis)
+    dim_contract = prod(arr.shape[a] for a in axis)
+    arr = np.transpose(arr, xpose_axis)  # type: ignore
+    _arr = arr.reshape(-1, dim_contract)
+    _arr = np.array([_reduce(operator, _arr[i]) for i in range(_arr.shape[0])])
+    r = _arr.reshape(target_shape)  # type: ignore
+    if isinstance(x, FixedVariableArray):
+        return FixedVariableArray(r, solver_config)
+    return r

da4ml/trace/pipeline.py CHANGED Viewed

@@ -31,6 +31,35 @@ def retime_pipeline(csol: CascadedSolution, verbose=True):
     return best
+def _get_new_idx(
+    idx: int,
+    locator: list[dict[int, int]],
+    opd: dict[int, list[Op]],
+    out_idxd: dict[int, list[int]],
+    ops: list[Op],
+    stage: int,
+    latency_cutoff: int,
+):
+    if idx < 0:
+        return idx
+    p0_stages = locator[idx].keys()
+    if stage not in p0_stages:
+        # Need to copy parent to later states
+        p0_stage = max(p0_stages)
+        p0_idx = locator[idx][p0_stage]
+        for j in range(p0_stage, stage):
+            op0 = ops[idx]
+            latency = float(latency_cutoff * (j + 1))
+            out_idxd.setdefault(j, []).append(locator[idx][j])
+            _copy_op = Op(len(out_idxd[j]) - 1, -1, -1, 0, op0.qint, latency, 0.0)
+            opd.setdefault(j + 1, []).append(_copy_op)
+            p0_idx = len(opd[j + 1]) - 1
+            locator[idx][j + 1] = p0_idx
+    else:
+        p0_idx = locator[idx][stage]
+    return p0_idx
 def to_pipeline(sol: Solution, latency_cutoff: int, retiming=True, verbose=True) -> CascadedSolution:
     """Split the record into multiple stages based on the latency of the operations.
     Only useful for HDL generation.
@@ -80,46 +109,19 @@ def to_pipeline(sol: Solution, latency_cutoff: int, retiming=True, verbose=True)
             opd.setdefault(stage, []).append(op)
             locator.append({stage: len(opd[stage]) - 1})
             continue
-        p0_stages = locator[op.id0].keys()
-        if stage not in p0_stages:
-            # Need to copy parent to later states
-            p0_stage = max(p0_stages)
-            p0_idx = locator[op.id0][p0_stage]
-            for j in range(p0_stage, stage):
-                op0 = ops[op.id0]
-                latency = float(latency_cutoff * (j + 1))
-                out_idxd.setdefault(j, []).append(locator[op.id0][j])
-                _copy_op = Op(len(out_idxd[j]) - 1, -1, -1, 0, op0.qint, latency, 0.0)
-                opd.setdefault(j + 1, []).append(_copy_op)
-                p0_idx = len(opd[j + 1]) - 1
-                locator[op.id0][j + 1] = p0_idx
-        else:
-            p0_idx = locator[op.id0][stage]
-        if op.opcode in (0, 1):
-            p1_stages = locator[op.id1].keys()
-            if stage not in p1_stages:
-                # Need to copy parent to later states
-                p1_stage = max(p1_stages)
-                p1_idx = locator[op.id1][p1_stage]
-                for j in range(p1_stage, stage):
-                    op1 = ops[op.id1]
-                    latency = float(latency_cutoff * (j + 1))
-                    out_idxd.setdefault(j, []).append(locator[op.id1][j])
-                    _copy_op = Op(len(out_idxd[j]) - 1, -1, -1, 0, op1.qint, latency, 0.0)
-                    opd.setdefault(j + 1, []).append(_copy_op)
-                    p1_idx = len(opd[j + 1]) - 1
-                    locator[op.id1][j + 1] = p1_idx
-            else:
-                p1_idx = locator[op.id1][stage]
+        p0_idx = _get_new_idx(op.id0, locator, opd, out_idxd, ops, stage, latency_cutoff)
+        p1_idx = _get_new_idx(op.id1, locator, opd, out_idxd, ops, stage, latency_cutoff)
+        if op.opcode in (6, -6):
+            data = _get_new_idx(op.data, locator, opd, out_idxd, ops, stage, latency_cutoff)
         else:
-            p1_idx = op.id1
+            data = op.data
         if p1_idx == -1001:
             # Output to external buffer
             out_idxd.setdefault(stage, []).append(p0_idx)
         else:
-            _Op = Op(p0_idx, p1_idx, op.opcode, op.data, op.qint, op.latency, op.cost)
+            _Op = Op(p0_idx, p1_idx, op.opcode, data, op.qint, op.latency, op.cost)
             opd.setdefault(stage, []).append(_Op)
             locator.append({stage: len(opd[stage]) - 1})
     sols = []

da4ml 0.2.1__py3-none-any.whl → 0.3.0.post1__py3-none-any.whl

Potentially problematic release.

da4ml 0.2.1py3-none-any.whl → 0.3.0.post1py3-none-any.whl