PyPI - da4ml - Versions diffs - 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

da4ml 0.2.0py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of da4ml might be problematic. Click here for more details.

Files changed (59) hide show

da4ml/_version.py +2 -2
da4ml/cmvm/api.py +2 -6
da4ml/cmvm/core/__init__.py +0 -1
da4ml/cmvm/types.py +99 -19
da4ml/codegen/__init__.py +5 -4
da4ml/codegen/cpp/__init__.py +2 -1
da4ml/codegen/cpp/cpp_codegen.py +58 -25
da4ml/codegen/cpp/hls_model.py +252 -0
da4ml/codegen/cpp/source/ap_types/ap_binary.h +78 -0
da4ml/codegen/cpp/source/ap_types/ap_common.h +376 -0
da4ml/codegen/cpp/source/ap_types/ap_decl.h +212 -0
da4ml/codegen/cpp/source/ap_types/ap_fixed.h +360 -0
da4ml/codegen/cpp/source/ap_types/ap_fixed_base.h +2354 -0
da4ml/codegen/cpp/source/ap_types/ap_fixed_ref.h +718 -0
da4ml/codegen/cpp/source/ap_types/ap_fixed_special.h +230 -0
da4ml/codegen/cpp/source/ap_types/ap_int.h +330 -0
da4ml/codegen/cpp/source/ap_types/ap_int_base.h +1885 -0
da4ml/codegen/cpp/source/ap_types/ap_int_ref.h +1346 -0
da4ml/codegen/cpp/source/ap_types/ap_int_special.h +223 -0
da4ml/codegen/cpp/source/ap_types/ap_shift_reg.h +138 -0
da4ml/codegen/cpp/source/ap_types/etc/ap_private.h +7199 -0
da4ml/codegen/cpp/source/ap_types/hls_math.h +27 -0
da4ml/codegen/cpp/source/ap_types/hls_stream.h +263 -0
da4ml/codegen/cpp/source/ap_types/utils/x_hls_utils.h +80 -0
da4ml/codegen/cpp/source/binder_util.hh +56 -0
da4ml/codegen/cpp/source/build_binder.mk +24 -0
da4ml/codegen/cpp/source/{vitis.h → vitis_bitshift.hh} +1 -1
da4ml/codegen/verilog/__init__.py +2 -3
da4ml/codegen/verilog/comb.py +65 -24
da4ml/codegen/verilog/io_wrapper.py +36 -141
da4ml/codegen/verilog/pipeline.py +21 -3
da4ml/codegen/verilog/source/binder_util.hh +72 -0
da4ml/codegen/verilog/source/build_prj.tcl +0 -1
da4ml/codegen/verilog/source/mux.v +58 -0
da4ml/codegen/verilog/source/negative.v +28 -0
da4ml/codegen/verilog/source/shift_adder.v +4 -1
da4ml/codegen/verilog/source/template.xdc +3 -0
da4ml/codegen/verilog/verilog_model.py +42 -15
da4ml/converter/__init__.py +0 -0
da4ml/converter/hgq2/parser.py +105 -0
da4ml/converter/hgq2/replica.py +383 -0
da4ml/trace/__init__.py +2 -2
da4ml/trace/fixed_variable.py +177 -18
da4ml/trace/fixed_variable_array.py +124 -9
da4ml/trace/ops/__init__.py +22 -6
da4ml/trace/ops/conv_utils.py +146 -14
da4ml/trace/ops/einsum_utils.py +9 -6
da4ml/trace/ops/reduce_utils.py +103 -0
da4ml/trace/pipeline.py +36 -34
da4ml/trace/tracer.py +37 -5
da4ml-0.3.0.dist-info/METADATA +107 -0
da4ml-0.3.0.dist-info/RECORD +64 -0
da4ml/codegen/cpp/source/vitis_bridge.h +0 -17
da4ml-0.2.0.dist-info/METADATA +0 -65
da4ml-0.2.0.dist-info/RECORD +0 -39
/da4ml/codegen/verilog/source/{ioutils.hh → ioutil.hh} +0 -0
{da4ml-0.2.0.dist-info → da4ml-0.3.0.dist-info}/WHEEL +0 -0
{da4ml-0.2.0.dist-info → da4ml-0.3.0.dist-info}/licenses/LICENSE +0 -0
{da4ml-0.2.0.dist-info → da4ml-0.3.0.dist-info}/top_level.txt +0 -0

da4ml/trace/fixed_variable.py CHANGED Viewed

@@ -43,9 +43,9 @@ class FixedVariable:
     ) -> None:
         assert low <= high, f'low {low} must be less than high {high}'
-        if low == high:
+        if low == high and opr != 'new':
             opr = 'const'
-            _factor = 1.0
+            _factor = _factor
             _from = ()
         low, high, step = Decimal(low), Decimal(high), Decimal(step)
@@ -72,15 +72,21 @@ class FixedVariable:
         self.latency = _latency
         self.cost = _cost
+        # Update latency for constant variables to match the current variable for piplining
+        for v in self._from:
+            if v.opr == 'const':
+                v.latency = self.latency
     def get_cost_and_latency(self):
         if self.opr == 'const':
             return 0.0, 0.0
-        if self.opr in ('vadd', 'cadd'):
+        if self.opr in ('vadd', 'cadd', 'min', 'max'):
             adder_size = self.hwconf.adder_size
             carry_size = self.hwconf.carry_size
             latency_cutoff = self.hwconf.latency_cutoff
-            if self.opr == 'vadd':
+            if self.opr in ('min', 'max', 'vadd'):
                 assert len(self._from) == 2
                 v0, v1 = self._from
                 int0, int1 = v0.qint, v1.qint
@@ -89,8 +95,6 @@ class FixedVariable:
             else:
                 assert len(self._from) == 1
                 assert self._data is not None, 'cadd must have data'
-                # int0 = self._from[0].qint
-                # int1 = QInterval(float(self._data), float(self._data), float(self.step))
                 _f = _const_f(self._data)
                 _cost = float(ceil(log2(abs(self._data) + Decimal(2) ** -_f))) + _f
                 base_latency = self._from[0].latency
@@ -138,6 +142,12 @@ class FixedVariable:
         k = self.low < 0
         return k, i, f
+    @classmethod
+    def from_const(cls, const: float | Decimal, hwconf: HWConfig, latency: float, _factor: float | Decimal):
+        f = _const_f(const)
+        step = Decimal(2) ** -f
+        return cls(const, const, step, hwconf=hwconf, opr='const', _factor=_factor, latency=latency)
     def __repr__(self) -> str:
         if self._factor == 1:
             return f'FixedVariable({self.low}, {self.high}, {self.step})'
@@ -185,7 +195,9 @@ class FixedVariable:
             hwconf=self.hwconf,
         )
-    def _const_add(self, other: float | Decimal):
+    def _const_add(self, other: float | Decimal | None):
+        if other is None:
+            return self
         if not isinstance(other, (int, float, Decimal)):
             other = float(other)  # direct numpy to decimal raises error
         other = Decimal(other)
@@ -222,7 +234,7 @@ class FixedVariable:
         other: 'float|Decimal',
     ):
         if other == 0:
-            return FixedVariable(0, 0, 1, hwconf=self.hwconf)
+            return FixedVariable(0, 0, 1, hwconf=self.hwconf, opr='const')
         assert log2(abs(other)) % 1 == 0, 'Only support pow2 multiplication'
@@ -266,8 +278,8 @@ class FixedVariable:
             step = Decimal(2) ** -f
             i = ceil(log2(val + step)) if not i else i
             eps = step / 2 if round_mode == 'RND' else 0
-            val = floor(val / step + eps) % Decimal(2) ** i * step
-            return FixedVariable(val, val, step, hwconf=self.hwconf)
+            val = (floor(val / step + eps) * step) % (Decimal(2) ** i)
+            return FixedVariable(val, val, step, hwconf=self.hwconf, opr='const')
         step = max(Decimal(2) ** -f, self.step) if f is not None else self.step
         if step > self.step and round_mode == 'RND':
@@ -281,6 +293,10 @@ class FixedVariable:
                 low = Decimal(0)
                 high = _high
         _factor = self._factor
+        if self.low == low and self.high == high and self.step == step:
+            return self
         return FixedVariable(
             low,
             high,
@@ -301,7 +317,7 @@ class FixedVariable:
         round_mode: str = 'TRN',
     ):
         overflow_mode, round_mode = overflow_mode.upper(), round_mode.upper()
-        assert overflow_mode in ('WRAP', 'SAT')
+        assert overflow_mode in ('WRAP', 'SAT', 'SAT_SM')
         assert round_mode in ('TRN', 'RND')
         _k, _i, _f = self.kif
@@ -312,32 +328,42 @@ class FixedVariable:
         if f < _f and round_mode == 'RND':
             return (self + 2.0 ** (-f - 1)).quantize(k, i, f, overflow_mode, 'TRN')
+        if overflow_mode in ('SAT', 'SAT_SM'):
+            step = Decimal(2) ** -f
+            _high = Decimal(2) ** i
+            high = _high - step
+            low = -_high * k if overflow_mode == 'SAT' else -high * k
+            return self.max_of(low).min_of(high).quantize(k, i, f, 'WRAP', round_mode)
         if self.low == self.high:
             val = self.low
             step = Decimal(2) ** -f
             _high = Decimal(2) ** i
             high, low = _high - step, -_high * k
             val = (floor(val / step) * step - low) % (2 * _high) + low
-            return FixedVariable(val, val, step, hwconf=self.hwconf)
+            return FixedVariable(val, val, step, hwconf=self.hwconf, opr='const')
         # TODO: corner cases exists (e.g., overflow to negative, or negative overflow to high value)
         # bit-exactness will be lost in these cases, but they should never happen (quantizers are used in a weird way)
         # Keeping this for now; change if absolutely necessary
         f = min(f, _f)
-        k = min(k, _k)
+        k = min(k, _k) if i >= _i else k
         i = min(i, _i)
-        step = max(Decimal(2) ** -f, self.step)
+        if i + k + f <= 0:
+            return FixedVariable(0, 0, 1, hwconf=self.hwconf, opr='const')
+        step = Decimal(2) ** -f
         low = -k * Decimal(2) ** i
         high = Decimal(2) ** i - step
         _low, _high = self.low, self.high
         if _low >= low and _high <= high:
             low, high = _low, _high
-        if low > high:
-            return FixedVariable(0, 0, 1, hwconf=self.hwconf)
+            low = floor(low / step) * step
+            high = ceil(high / step) * step
         return FixedVariable(
             low,
@@ -345,7 +371,7 @@ class FixedVariable:
             step,
             _from=(self,),
             _factor=abs(self._factor),
-            opr='wrap' if overflow_mode == 'WRAP' else 'sat',
+            opr='wrap',
             latency=self.latency,
             hwconf=self.hwconf,
         )
@@ -356,3 +382,136 @@ class FixedVariable:
         _high = Decimal(2) ** i
         low, high = k * _high, _high - step
         return cls(low, high, step, **kwargs)
+    def msb_mux(self, a: 'FixedVariable', b: 'FixedVariable', qint: tuple[Decimal, Decimal, Decimal] | None = None):
+        assert isinstance(a, FixedVariable) and isinstance(b, FixedVariable), 'msb_mux requires two FixedVariables'
+        if self._factor < 0:
+            return (-self).msb_mux(b, a, qint)
+        if a._factor < 0:
+            qint = (-qint[1], -qint[0], qint[2]) if qint else None
+            return -(self.msb_mux(-a, -b, qint=qint))
+        _factor = a._factor
+        if qint is None:
+            qint = (min(a.low, b.low), max(a.high, b.high), min(a.step, b.step))
+        dlat, dcost = cost_add(a.qint, b.qint, 0, False, self.hwconf.adder_size, self.hwconf.carry_size)
+        return FixedVariable(
+            *qint,
+            _from=(self, a, b),
+            _factor=_factor,
+            opr='msb_mux',
+            latency=max(a.latency, b.latency, self.latency) + dlat,
+            hwconf=self.hwconf,
+            cost=dcost,
+        )
+    def max_of(self, other):
+        if other == 0:
+            return self.relu()
+        if other == -float('inf'):
+            return self
+        if other == float('inf'):
+            raise ValueError('Cannot apply max_of with inf')
+        if not isinstance(other, FixedVariable):
+            other = FixedVariable.from_const(other, hwconf=self.hwconf, latency=self.latency, _factor=abs(self._factor))
+        if self.low >= other.high:
+            return self
+        if self.high <= other.low:
+            return other
+        qint = (max(self.low, other.low), max(self.high, other.high), min(self.step, other.step))
+        return (self - other).msb_mux(other, self, qint=qint)
+    def min_of(self, other):
+        if other == 0:
+            return (-self).relu()
+        if other == float('inf'):
+            return self
+        if other == -float('inf'):
+            raise ValueError('Cannot apply min_of with -inf')
+        if not isinstance(other, FixedVariable):
+            other = FixedVariable.from_const(other, hwconf=self.hwconf, latency=self.latency, _factor=(self._factor))
+        if self.high <= other.low:
+            return self
+        if self.low >= other.high:
+            return other
+        qint = (min(self.low, other.low), min(self.high, other.high), min(self.step, other.step))
+        return (self - other).msb_mux(self, other, qint=qint)
+class FixedVariableInput(FixedVariable):
+    def __init__(
+        self,
+        latency: float | None = None,
+        hwconf=HWConfig(-1, -1, -1),
+    ) -> None:
+        self.low = Decimal(1e10)
+        self.high = Decimal(-1e10)
+        self.step = Decimal(1e10)
+        self._factor = Decimal(1)
+        self._from: tuple[FixedVariable, ...] = ()
+        self.opr = 'new'
+        self._data = None
+        self.id = uuid4()
+        self.hwconf = hwconf
+        self.latency = latency if latency is not None else 0.0
+        self.cost = 0.0
+    def __add__(self, other):
+        raise ValueError('Cannot operate on unquantized input variable')
+    def __sub__(self, other):
+        raise ValueError('Cannot operate on unquantized input variable')
+    def __neg__(self):
+        raise ValueError('Cannot negate unquantized input variable')
+    def relu(self, *args, **kwargs):
+        raise ValueError('Cannot apply relu on unquantized input variable')
+    def max_of(self, other):
+        raise ValueError('Cannot apply max_of on unquantized input variable')
+    def min_of(self, other):
+        raise ValueError('Cannot apply min_of on unquantized input variable')
+    def quantize(
+        self,
+        k: int | bool,
+        i: int,
+        f: int,
+        overflow_mode: str = 'WRAP',
+        round_mode: str = 'TRN',
+    ):
+        assert overflow_mode == 'WRAP'
+        if k + i + f <= 0:
+            return FixedVariable(0, 0, 1, hwconf=self.hwconf, opr='const')
+        if round_mode == 'RND':
+            return (self.quantize(k, i, f + 1) + 2.0 ** (-f - 1)).quantize(k, i, f, overflow_mode, 'TRN')
+        step = Decimal(2) ** -f
+        _high = Decimal(2) ** i
+        low, high = -_high * k, _high - step
+        self.high = max(self.high, high)
+        self.low = min(self.low, low)
+        self.step = min(self.step, step)
+        return FixedVariable(
+            low,
+            high,
+            step,
+            _from=(self,),
+            _factor=self._factor,
+            opr='wrap',
+            latency=self.latency,
+            hwconf=self.hwconf,
+        )

da4ml/trace/fixed_variable_array.py CHANGED Viewed

@@ -1,20 +1,110 @@
-from typing import Any
+from inspect import signature
+from typing import Any, TypeVar
 import numpy as np
+from numba.typed import List as NumbaList
 from numpy.typing import NDArray
 from ..cmvm import solve
-from .fixed_variable import FixedVariable, HWConfig, QInterval
+from .fixed_variable import FixedVariable, FixedVariableInput, HWConfig, QInterval
+from .ops import einsum, reduce
+T = TypeVar('T')
+def to_raw_arr(obj: T) -> T:
+    if isinstance(obj, tuple):
+        return tuple(to_raw_arr(x) for x in obj)  # type: ignore
+    elif isinstance(obj, list):
+        return [to_raw_arr(x) for x in obj]  # type: ignore
+    elif isinstance(obj, dict):
+        return {k: to_raw_arr(v) for k, v in obj.items()}  # type: ignore
+    if isinstance(obj, FixedVariableArray):
+        return obj._vars  # type: ignore
+    return obj
+def _max_of(a, b):
+    if isinstance(a, FixedVariable):
+        return a.max_of(b)
+    elif isinstance(b, FixedVariable):
+        return b.max_of(a)
+    else:
+        return max(a, b)
+def _min_of(a, b):
+    if isinstance(a, FixedVariable):
+        return a.min_of(b)
+    elif isinstance(b, FixedVariable):
+        return b.min_of(a)
+    else:
+        return min(a, b)
 class FixedVariableArray:
+    __array_priority__ = 100
+    def __array_function__(self, func, types, args, kwargs):
+        if func is np.matmul:
+            if len(args) == 1 and isinstance(args[0], np.ndarray):
+                return self.__matmul__(args[0])
+            elif len(args) == 2 and isinstance(args[0], np.ndarray) and isinstance(args[1], np.ndarray):
+                return self.__rmatmul__(args[1])
+        if func in (np.mean, np.sum, np.amax, np.amin, np.max, np.min):
+            match func:
+                case np.mean:
+                    _x = reduce(lambda x, y: x + y, self, *args[1:], **kwargs)
+                    return _x * (_x.size / self._vars.size)
+                case np.sum:
+                    return reduce(lambda x, y: x + y, self, *args[1:], **kwargs)
+                case np.max | np.amax:
+                    return reduce(_max_of, self, *args[1:], **kwargs)
+                case np.min | np.amin:
+                    return reduce(_min_of, self, *args[1:], **kwargs)
+                case _:
+                    raise NotImplementedError(f'Unsupported function: {func}')
+        if func is np.clip:
+            assert len(args) == 3, 'Clip function requires exactly three arguments'
+            x, low, high = args
+            _x, low, high = np.broadcast_arrays(x, low, high)
+            x = FixedVariableArray(_x, self.solver_options)
+            x = np.amax(np.stack((x, low), axis=-1), axis=-1)  # type: ignore
+            return np.amin(np.stack((x, high), axis=-1), axis=-1)
+        if func is np.einsum:
+            # assert len(args) == 2
+            sig = signature(np.einsum)
+            bind = sig.bind(*args, **kwargs)
+            eq = args[0]
+            operands = bind.arguments['operands']
+            if isinstance(operands[0], str):
+                operands = operands[1:]
+            assert len(operands) == 2, 'Einsum on FixedVariableArray requires exactly two operands'
+            assert bind.arguments.get('out', None) is None, 'Output argument is not supported'
+            return einsum(eq, *operands)
+        args, kwargs = to_raw_arr(args), to_raw_arr(kwargs)
+        return FixedVariableArray(
+            func(*args, **kwargs),
+            self.solver_options,
+        )
     def __init__(
         self,
         vars: NDArray,
         solver_options: dict[str, Any] | None = None,
     ):
         self._vars = np.array(vars)
-        self.solver_options = solver_options
+        _solver_options = signature(solve).parameters
+        _solver_options = {k: v.default for k, v in _solver_options.items() if v.default is not v.empty}
+        if solver_options is not None:
+            _solver_options.update(solver_options)
+        _solver_options.pop('qintervals', None)
+        _solver_options.pop('latencies', None)
+        self.solver_options = _solver_options
     @classmethod
     def from_lhs(
@@ -75,8 +165,10 @@ class FixedVariableArray:
         r = []
         for i in range(mat0.shape[0]):
             vec = mat0[i]
-            qintervals = tuple([QInterval(float(v.low), float(v.high), float(v.step)) for v in vec._vars])
-            latencies = tuple([float(v.latency) for v in vec._vars])
+            _qintervals = [QInterval(float(v.low), float(v.high), float(v.step)) for v in vec._vars]
+            _latencies = [float(v.latency) for v in vec._vars]
+            qintervals = NumbaList(_qintervals)  # type: ignore
+            latencies = NumbaList(_latencies)  # type: ignore
             hwconf = self._vars.ravel()[0].hwconf
             kwargs.update(adder_size=hwconf.adder_size, carry_size=hwconf.carry_size)
             _mat = np.ascontiguousarray(mat1.astype(np.float32))
@@ -96,8 +188,8 @@ class FixedVariableArray:
         axes = _axes[ndim0 - 1 :] + _axes[: ndim0 - 1]
         return r.transpose(axes)
-    def __getitem__(self, *item):
-        vars = self._vars[*item]
+    def __getitem__(self, item):
+        vars = self._vars[item]
         if isinstance(vars, np.ndarray):
             return FixedVariableArray(vars, self.solver_options)
         else:
@@ -111,9 +203,13 @@ class FixedVariableArray:
         return self._vars.shape
     def __add__(self, other):
+        if isinstance(other, FixedVariableArray):
+            return FixedVariableArray(self._vars + other._vars, self.solver_options)
         return FixedVariableArray(self._vars + other, self.solver_options)
     def __sub__(self, other):
+        if isinstance(other, FixedVariableArray):
+            return FixedVariableArray(self._vars - other._vars, self.solver_options)
         return FixedVariableArray(self._vars - other, self.solver_options)
     def __mul__(self, other):
@@ -139,7 +235,7 @@ class FixedVariableArray:
         i = np.broadcast_to(i, shape) if i is not None else np.full(shape, None)
         f = np.broadcast_to(f, shape) if f is not None else np.full(shape, None)
         ret = []
-        for v, i, f in zip(self._vars.ravel(), i.ravel(), f.ravel()):
+        for v, i, f in zip(self._vars.ravel(), i.ravel(), f.ravel()):  # type: ignore
             ret.append(v.relu(i=i, f=f, round_mode=round_mode))
         return FixedVariableArray(np.array(ret).reshape(shape), self.solver_options)
@@ -156,7 +252,7 @@ class FixedVariableArray:
         i = np.broadcast_to(i, shape) if i is not None else np.full(shape, None)
         f = np.broadcast_to(f, shape) if f is not None else np.full(shape, None)
         ret = []
-        for v, k, i, f in zip(self._vars.ravel(), k.ravel(), i.ravel(), f.ravel()):
+        for v, k, i, f in zip(self._vars.ravel(), k.ravel(), i.ravel(), f.ravel()):  # type: ignore
             ret.append(v.quantize(k=k, i=i, f=f, overflow_mode=overflow_mode, round_mode=round_mode))
         return FixedVariableArray(np.array(ret).reshape(shape), self.solver_options)
@@ -175,3 +271,22 @@ class FixedVariableArray:
     @property
     def dtype(self):
         return self._vars.dtype
+    @property
+    def size(self):
+        return self._vars.size
+    @property
+    def kif(self):
+        shape = self._vars.shape
+        kif = np.array([v.kif for v in self._vars.ravel()]).reshape(*shape, 3)
+        return np.moveaxis(kif, -1, 0)
+class FixedVariableArrayInput(FixedVariableArray):
+    def __init__(self, shape: tuple[int, ...] | int, hwconf: HWConfig, solver_options: dict[str, Any] | None = None, latency=0.0):
+        _vars = np.empty(shape, dtype=object)
+        _vars_f = _vars.ravel()
+        for i in range(_vars.size):
+            _vars_f[i] = FixedVariableInput(latency, hwconf)
+        super().__init__(_vars, solver_options)

da4ml/trace/ops/__init__.py CHANGED Viewed

@@ -1,16 +1,22 @@
-from typing import TypeVar
+from typing import TYPE_CHECKING, TypeVar
 import numpy as np
 from numpy.typing import NDArray
-from ..fixed_variable_array import FixedVariable, FixedVariableArray
-from .conv_utils import conv
+from ..fixed_variable_array import FixedVariable
+from .conv_utils import conv, pool
 from .einsum_utils import einsum
+from .reduce_utils import reduce
-T = TypeVar('T', FixedVariableArray, NDArray[np.floating], list[FixedVariable])
+if TYPE_CHECKING:
+    from ..fixed_variable_array import FixedVariableArray
+T = TypeVar('T', 'FixedVariableArray', NDArray[np.floating], list[FixedVariable])
 def relu(x: T, i: NDArray[np.integer] | None = None, f: NDArray[np.integer] | None = None, round_mode: str = 'TRN') -> T:
+    from ..fixed_variable_array import FixedVariableArray
     if isinstance(x, FixedVariableArray):
         return x.relu(i=i, f=f, round_mode=round_mode)
     elif isinstance(x, list):
@@ -35,12 +41,20 @@ def quantize(
     overflow_mode: str = 'WRAP',
     round_mode: str = 'TRN',
 ) -> T:
-    assert overflow_mode.upper() == 'WRAP', 'Only WRAP overflow mode is supported'
+    from ..fixed_variable_array import FixedVariableArray
     if isinstance(x, FixedVariableArray):
         return x.quantize(k=k, i=i, f=f, overflow_mode=overflow_mode, round_mode=round_mode)
     else:
+        x = x.copy()
+        if overflow_mode in ('SAT', 'SAT_SM'):
+            step = 2.0**-f
+            _high = 2.0**i
+            high = _high - step
+            low = -_high * k if overflow_mode == 'SAT' else -high * k
+            x = np.clip(x, low, high)  # type: ignore
         if round_mode.upper() == 'RND':
-            x += 2.0 ** (-f - 1)
+            x += 2.0 ** (-f - 1)  # type: ignore
         b = k + i + f
         bias = 2.0 ** (b - 1) * k
         eps = 2.0**-f
@@ -52,4 +66,6 @@ __all__ = [
     'einsum',
     'relu',
     'quantize',
+    'pool',
+    'reduce',
 ]

da4ml 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

Potentially problematic release.

da4ml 0.2.0py3-none-any.whl → 0.3.0py3-none-any.whl