PyPI - da4ml - Versions diffs - 0.4.0__py3-none-any.whl → 0.5.0b0__py3-none-any.whl - Mend

da4ml 0.4.0py3-none-any.whl → 0.5.0b0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of da4ml might be problematic. Click here for more details.

Files changed (40) hide show

da4ml/__init__.py +2 -16
da4ml/_version.py +2 -2
da4ml/cmvm/__init__.py +2 -2
da4ml/cmvm/api.py +15 -4
da4ml/cmvm/core/__init__.py +2 -2
da4ml/cmvm/types.py +32 -18
da4ml/cmvm/util/bit_decompose.py +2 -2
da4ml/codegen/hls/hls_codegen.py +10 -5
da4ml/codegen/hls/hls_model.py +7 -4
da4ml/codegen/rtl/common_source/build_binder.mk +6 -5
da4ml/codegen/rtl/common_source/build_quartus_prj.tcl +104 -0
da4ml/codegen/rtl/common_source/{build_prj.tcl → build_vivado_prj.tcl} +39 -18
da4ml/codegen/rtl/common_source/template.sdc +27 -0
da4ml/codegen/rtl/common_source/template.xdc +11 -13
da4ml/codegen/rtl/rtl_model.py +105 -53
da4ml/codegen/rtl/verilog/__init__.py +2 -1
da4ml/codegen/rtl/verilog/comb.py +47 -7
da4ml/codegen/rtl/verilog/io_wrapper.py +4 -4
da4ml/codegen/rtl/verilog/pipeline.py +12 -12
da4ml/codegen/rtl/verilog/source/lookup_table.v +27 -0
da4ml/codegen/rtl/vhdl/comb.py +27 -21
da4ml/codegen/rtl/vhdl/io_wrapper.py +11 -11
da4ml/codegen/rtl/vhdl/pipeline.py +12 -12
da4ml/codegen/rtl/vhdl/source/lookup_table.vhd +52 -0
da4ml/converter/__init__.py +57 -1
da4ml/converter/hgq2/parser.py +4 -25
da4ml/converter/hgq2/replica.py +210 -25
da4ml/trace/fixed_variable.py +239 -29
da4ml/trace/fixed_variable_array.py +276 -48
da4ml/trace/ops/__init__.py +31 -15
da4ml/trace/ops/reduce_utils.py +3 -3
da4ml/trace/pipeline.py +40 -18
da4ml/trace/tracer.py +33 -8
da4ml/typing/__init__.py +3 -0
{da4ml-0.4.0.dist-info → da4ml-0.5.0b0.dist-info}/METADATA +2 -1
{da4ml-0.4.0.dist-info → da4ml-0.5.0b0.dist-info}/RECORD +39 -35
da4ml/codegen/rtl/vhdl/source/template.xdc +0 -32
{da4ml-0.4.0.dist-info → da4ml-0.5.0b0.dist-info}/WHEEL +0 -0
{da4ml-0.4.0.dist-info → da4ml-0.5.0b0.dist-info}/licenses/LICENSE +0 -0
{da4ml-0.4.0.dist-info → da4ml-0.5.0b0.dist-info}/top_level.txt +0 -0

da4ml/converter/hgq2/replica.py CHANGED Viewed

@@ -18,11 +18,16 @@ from hgq.layers import (
     QEinsum,
     QEinsumDense,
     QEinsumDenseBatchnorm,
+    QLinformerAttention,
     QMaximum,
     QMeanPow2,
     QMinimum,
+    QMultiHeadAttention,
+    QMultiply,
+    QSoftmax,
     QSubtract,
     QSum,
+    QUnaryFunctionLUT,
 )
 from hgq.layers.core.base import MultipleQuantizers, Quantizer
 from hgq.quantizer.internal import FixedPointQuantizerBase
@@ -68,7 +73,9 @@ def mirror_quantizer(q: Quantizer, v: FixedVariableArray) -> FixedVariableArray:
 _registry: dict[type, 'type[ReplayOperationBase]'] = {}
-class ReplayOperationMeta(type):
+class HandlerRegMeta(type):
+    """Metaclass for automatic registration of handler classes."""
     def __new__(mcs, name: str, bases: tuple[type, ...], namespace: dict[str, typing.Any]):
         cls = super().__new__(mcs, name, bases, namespace)
         if name == 'ReplayOperationBase':
@@ -83,8 +90,11 @@ class ReplayOperationMeta(type):
         return cls
-class ReplayOperationBase(metaclass=ReplayOperationMeta):
+class ReplayOperationBase(metaclass=HandlerRegMeta):
     handles: tuple[type, ...] = ()
+    __activation_handled__ = False
+    __input_quantizer_handled__ = False
+    __output_quantizer_handled__ = False
     def __init__(self, layer: 'keras.Operation'):
         assert isinstance(layer, self.handles)
@@ -94,8 +104,6 @@ class ReplayOperationBase(metaclass=ReplayOperationMeta):
     def __call__(self, *args, **kwargs) -> tuple[FixedVariableArray, ...]:
         assert all(not isinstance(a, FixedVariableArray) for a in kwargs.values())
-        assert all(isinstance(a, FixedVariableArray) or isinstance(a, Sequence) for a in args)
-        inputs = args[0] if len(args) == 1 else args
         if not isinstance(self.op, hgq.layers.QLayerBase):
             r = self.call(*args, **kwargs)
@@ -105,28 +113,35 @@ class ReplayOperationBase(metaclass=ReplayOperationMeta):
         assert kwargs.pop('training', False) is False, 'Training mode is not supported in mirror operation'
         assert kwargs.pop('mask', None) is None, 'Masking is not supported in mirror operation'
-        if layer.enable_iq:
-            if isinstance(inputs, Sequence):
-                assert isinstance(layer.iq, MultipleQuantizers)
-                inputs = tuple(mirror_quantizer(q, v) for q, v in zip(layer.iq.quantizers, inputs))
-            else:
-                assert isinstance(layer.iq, Quantizer), f'Expected iq to be a Quantizer, got {type(layer.iq)}'
-                inputs = mirror_quantizer(layer.iq, inputs)
+        if not self.__input_quantizer_handled__:
+            assert len(args) == 1
+            inputs = args[0]
-        outputs = self.call(inputs, **kwargs)
+            if layer.enable_iq:
+                if isinstance(inputs, Sequence):
+                    assert isinstance(layer.iq, MultipleQuantizers)
+                    inputs = tuple(mirror_quantizer(q, v) for q, v in zip(layer.iq.quantizers, inputs))
+                else:
+                    assert isinstance(layer.iq, Quantizer), f'Expected iq to be a Quantizer, got {type(layer.iq)}'
+                    inputs = mirror_quantizer(layer.iq, inputs)
-        activation = getattr(layer, 'activation', keras.activations.linear)
-        if activation is not keras.activations.linear:
-            if activation is keras.activations.relu:
-                if isinstance(outputs, tuple):
-                    assert len(outputs) == 1, 'ReLU activation is expected to have a single output'
-                    outputs = (relu(outputs[0]),)
+            outputs = self.call(inputs, **kwargs)
+        else:
+            outputs = self.call(*args, **kwargs)
+        if not self.__activation_handled__:
+            activation = getattr(layer, 'activation', keras.activations.linear)
+            if activation is not keras.activations.linear:
+                if activation is keras.activations.relu:
+                    if isinstance(outputs, tuple):
+                        assert len(outputs) == 1, 'ReLU activation is expected to have a single output'
+                        outputs = (relu(outputs[0]),)
+                    else:
+                        outputs = relu(outputs)
                 else:
-                    outputs = relu(outputs)
-            else:
-                raise NotImplementedError(f'Activation {activation} is not supported in mirror operation')
+                    raise NotImplementedError(f'Activation {activation} is not supported in mirror operation')
-        if layer.enable_oq:
+        if layer.enable_oq and not self.__output_quantizer_handled__:
             if isinstance(outputs, tuple):
                 assert isinstance(layer.oq, MultipleQuantizers)
                 outputs = tuple(mirror_quantizer(q, v) for q, v in zip(layer.oq.quantizers, outputs))
@@ -134,7 +149,7 @@ class ReplayOperationBase(metaclass=ReplayOperationMeta):
                 assert isinstance(layer.oq, Quantizer)
                 outputs = mirror_quantizer(layer.oq, outputs)
-        if isinstance(outputs, FixedVariableArray):
+        if isinstance(outputs, (FixedVariableArray, np.ndarray)):
             outputs = (outputs,)
         return outputs
@@ -193,7 +208,7 @@ class ReplayQBatchNormalization(ReplayOperationBase):
     def call(self, inputs: FixedVariableArray) -> FixedVariableArray:
         layer: QBatchNormalization = self.op
         scale, bias = map(np.array, layer.qscaler_and_qoffset)
-        shape = layer._shape
+        shape = layer._shape[1:]
         return inputs * scale.reshape(shape) + bias.reshape(shape)
@@ -367,7 +382,7 @@ class ReplayQReduction(ReplayOperationBase):
 class ReplayArithmetic(ReplayOperationBase):
-    handles = (Add, Subtract, Multiply, TrueDivide, Divide, QSubtract, QMaximum, QMinimum, Maximum, Minimum)
+    handles = (Add, Subtract, Multiply, QMultiply, TrueDivide, Divide, QSubtract, QMaximum, QMinimum, Maximum, Minimum)
     def call(self, x1: FixedVariableArray, x2: FixedVariableArray):
         name = self.op.__class__.__name__
@@ -471,3 +486,173 @@ class ReplayAbs(ReplayOperationBase):
     def call(self, x: FixedVariableArray) -> FixedVariableArray:
         return np.abs(x)  # type: ignore
+class ReplayQFunctionLUT(ReplayOperationBase):
+    __activation_handled__ = True
+    handles = (QUnaryFunctionLUT,)
+    def call(self, x: FixedVariableArray) -> FixedVariableArray:
+        op: QUnaryFunctionLUT = self.op
+        def activation(x) -> np.ndarray:
+            kx = keras.ops.convert_to_tensor(x[None])
+            kx = op.activation(kx)
+            return keras.ops.convert_to_numpy(kx[0])  # type: ignore
+        return x.apply(activation)
+class ReplayQSoftmax(ReplayOperationBase):
+    handles = (QSoftmax,)
+    def call(self, inputs: FixedVariableArray, mask: None | FixedVariableArray = None) -> FixedVariableArray:
+        op: QSoftmax = self.op
+        inputs = inputs[None]
+        if op.stable:
+            inputs = np.amax(inputs, axis=op.axes, keepdims=True) - inputs  # type: ignore
+        exp_inp = ReplayQFunctionLUT(op.exp_table)(inputs[0])[0]
+        if mask is not None:
+            exp_inp = mask[0] * exp_inp
+        sums = np.sum(exp_inp[None], axis=op.axes, keepdims=True)[0]  # type: ignore
+        divisor = ReplayQFunctionLUT(op.inv_table)(sums)[0]
+        return exp_inp * divisor
+def _compute_attention_mask(
+    query,
+    value,
+    query_mask=None,
+    value_mask=None,
+    key_mask=None,
+    attention_mask=None,
+    use_causal_mask=False,
+):
+    masks = []
+    if query_mask is not None:
+        masks.append(np.expand_dims(query_mask, -1))  # [Q, 1]
+    if value_mask is not None:
+        masks.append(np.expand_dims(value_mask, -2))  # [1, V]
+    if key_mask is not None:
+        masks.append(np.expand_dims(key_mask, -2))  # [1, V]
+    if use_causal_mask:
+        q = query.shape[0]
+        v = q if value is None else value.shape[0]
+        masks.append(np.tril(np.ones((q, v), dtype='uint8')))  # [Q, V]
+    masks.append(attention_mask)
+    if not masks:
+        return None
+    if any(isinstance(m, FixedVariableArray) for m in masks):
+        return np.prod(np.stack(masks, axis=0), axis=0)
+    else:
+        return None
+def _masked_softmax(op, attention_scores, attention_mask=None):
+    # Normalize the attention scores to probabilities.
+    # attention_scores = [B, N, T, S]
+    if attention_mask is not None:
+        # The expand dim happens starting from the `num_heads` dimension,
+        # (<batch_dims>, num_heads, <query_attention_dims,
+        # key_attention_dims>)
+        mask_expansion_axis = -len(op._attention_axes) * 2 - 1
+        for _ in range(len(attention_scores.shape) - len(attention_mask.shape)):
+            attention_mask = np.expand_dims(attention_mask, axis=mask_expansion_axis)
+    return ReplayQSoftmax(op._softmax)(attention_scores[0], mask=attention_mask)[0][None]
+def _compute_attention(op: QMultiHeadAttention, query, key, value, attention_mask=None, training=None):
+    # Take the dot product between "query" and "key" to get the raw
+    # attention scores.
+    attention_scores = einsum(op._dot_product_equation, key, query)
+    attention_scores = _masked_softmax(op, attention_scores, attention_mask)
+    # `context_layer` = [B, T, N, H]
+    attention_output = einsum(op._combine_equation, attention_scores, value)
+    return attention_output, attention_scores
+class ReplayMHA(ReplayOperationBase):
+    handles = (QMultiHeadAttention,)
+    __input_quantizer_handled__ = True
+    __output_quantizer_handled__ = True
+    def call(
+        self,
+        query: FixedVariableArray,
+        value: FixedVariableArray,
+        key=None,
+        query_mask=None,
+        value_mask=None,
+        key_mask=None,
+        attention_mask=None,
+        return_attention_scores=False,
+        use_causal_mask=False,
+    ):
+        op: QMultiHeadAttention = self.op
+        if key is None:
+            key = value
+        _attention_mask = _compute_attention_mask(
+            query,
+            value,
+            query_mask=query_mask,
+            value_mask=value_mask,
+            key_mask=key_mask,
+            attention_mask=attention_mask,
+            use_causal_mask=use_causal_mask,
+        )
+        query = ReplayQDense(op._query_dense)(query)[0][None]
+        key = ReplayQDense(op._key_dense)(key)[0][None]
+        value = ReplayQDense(op._value_dense)(value)[0][None]
+        attention_output, attention_scores = _compute_attention(op, query, key, value, _attention_mask)
+        attention_output = ReplayQDense(op._output_dense)(attention_output[0])[0]
+        if op.enable_oq:
+            attention_output = mirror_quantizer(op.oq, attention_output)
+        if return_attention_scores:
+            return attention_output, attention_scores[0]
+        return attention_output
+class ReplayQLinformerAttention(ReplayMHA):
+    handles = (QLinformerAttention,)
+    def call(
+        self,
+        query,
+        value,
+        key=None,
+        query_mask=None,
+        value_mask=None,
+        key_mask=None,
+        attention_mask=None,
+        return_attention_scores=False,
+        use_causal_mask=False,
+    ):
+        assert use_causal_mask is False, 'Causal mask is not supported in QLinformerAttention.'
+        key = key if key is not None else value
+        op: QLinformerAttention = self.op
+        key = ReplayQDense(op._lin_k_proj)(key)[0]
+        value = ReplayQDense(op._lin_v_proj)(value)[0]
+        return super().call(
+            query,
+            value,
+            key,
+            query_mask=query_mask,
+            value_mask=value_mask,
+            key_mask=key_mask,
+            attention_mask=attention_mask,
+            return_attention_scores=return_attention_scores,
+        )

da4ml/trace/fixed_variable.py CHANGED Viewed

@@ -1,14 +1,24 @@
 import random
-from collections.abc import Generator
+import typing
+from collections.abc import Callable, Generator
+from dataclasses import dataclass
 from decimal import Decimal
+from hashlib import sha256
 from math import ceil, floor, log2
-from typing import NamedTuple
+from typing import NamedTuple, overload
 from uuid import UUID
+import numpy as np
+from numpy.typing import NDArray
 from ..cmvm.core import cost_add
-from ..cmvm.types import QInterval
+from ..cmvm.types import QInterval, _minimal_kif
+from ..cmvm.util.bit_decompose import _shift_centering
+rd = random.Random()
-rd = random.SystemRandom()
+if typing.TYPE_CHECKING:
+    pass
 class HWConfig(NamedTuple):
@@ -17,7 +27,154 @@ class HWConfig(NamedTuple):
     latency_cutoff: float
+ufunc_t = Callable[[NDArray[np.floating]], NDArray[np.floating]]
+class TraceContext:
+    _tables: 'dict[str, tuple[LookupTable, int]]' = {}
+    hwconf: HWConfig = HWConfig(1, -1, -1)
+    _table_counter = 0
+    def register_table(self, table: 'LookupTable|np.ndarray'):
+        if isinstance(table, np.ndarray):
+            table = LookupTable(table)
+        if table.spec.hash in self._tables:
+            return self._tables[table.spec.hash]
+        self._tables[table.spec.hash] = (table, self._table_counter)
+        self._table_counter += 1
+        return self._tables[table.spec.hash]
+    def index_table(self, hash: str) -> int:
+        return self._tables[hash][1]
+    def get_table_from_index(self, index: int) -> 'LookupTable':
+        for table, idx in self._tables.values():
+            if idx == index:
+                return table
+        raise KeyError(f'No table found with index {index}')
+table_context = TraceContext()
+@dataclass
+class TableSpec:
+    hash: str
+    out_qint: QInterval
+    inp_width: int
+    @property
+    def out_kif(self) -> tuple[bool, int, int]:
+        return _minimal_kif(self.out_qint)
+def to_spec(table: NDArray[np.floating]) -> tuple[TableSpec, NDArray[np.int32]]:
+    f_out = -_shift_centering(np.array(table))
+    int_table = (table * 2**f_out).astype(np.int32)
+    h = sha256(int_table.data)
+    h.update(f'{f_out}'.encode())
+    inp_width = ceil(log2(table.size))
+    out_qint = QInterval(float(np.min(table)), float(np.max(table)), float(2**-f_out))
+    return TableSpec(hash=h.hexdigest(), inp_width=inp_width, out_qint=out_qint), int_table
+def interpret_as(
+    x: int | NDArray[np.integer],
+    k: int,
+    i: int,
+    f: int,
+) -> float | NDArray[np.floating]:
+    b = k + i + f
+    bias = 2.0 ** (b - 1) * k
+    eps = 2.0**-f
+    floor_fn = np.floor if isinstance(x, np.ndarray) else floor
+    return eps * (floor_fn(x + bias) % 2.0**b - bias)
+class LookupTable:
+    def __init__(self, values: NDArray, spec: TableSpec | None = None):
+        assert values.ndim == 1, 'Lookup table values must be 1-dimensional'
+        if spec is not None:
+            assert values.dtype is np.int32
+            self.spec = spec
+            self.table = values
+        else:
+            self.spec, self.table = to_spec(values)
+    @overload
+    def lookup(self, var: 'FixedVariable', qint_in: QInterval) -> 'FixedVariable': ...
+    @overload
+    def lookup(self, var: np.floating | float, qint_in: QInterval | tuple[float, float, float]) -> float: ...
+    def lookup(self, var, qint_in: QInterval | tuple[float, float, float]):
+        if isinstance(var, FixedVariable):
+            return var.lookup(self)
+        else:
+            _min, _max, _step = qint_in
+            assert _min <= var <= _max, f'Value {var} out of range [{_min}, {_max}]'
+            index = round((var - _min) / _step)
+            return interpret_as(int(self.table[index]), *self.spec.out_kif)
+    @property
+    def float_table(self) -> NDArray[np.floating]:
+        k, i, f = self.spec.out_kif
+        return interpret_as(self.table, k, i, f)  # type: ignore
+    def to_dict(self) -> dict:
+        return {
+            'spec': {
+                'hash': self.spec.hash,
+                'out_qint': {
+                    'min': self.spec.out_qint.min,
+                    'max': self.spec.out_qint.max,
+                    'step': self.spec.out_qint.step,
+                },
+                'inp_width': self.spec.inp_width,
+            },
+            'table': self.table.tolist(),
+        }
+    @classmethod
+    def from_dict(cls, data: dict) -> 'LookupTable':
+        spec_data = data['spec']
+        out_qint_data = spec_data['out_qint']
+        spec = TableSpec(
+            hash=spec_data['hash'],
+            out_qint=QInterval(out_qint_data['min'], out_qint_data['max'], out_qint_data['step']),
+            inp_width=spec_data['inp_width'],
+        )
+        table = np.array(data['table'], dtype=np.int32)
+        return cls(table, spec=spec)
+    def _get_pads(self, qint: QInterval) -> tuple[int, int]:
+        k, i, f = _minimal_kif(qint)
+        if k:
+            pad_left = round((qint.min + 2**i) / qint.step)
+        else:
+            pad_left = round(qint.min / qint.step)
+        size = 2 ** (k + i + f)
+        pad_right = size - len(self.table) - pad_left
+        return pad_left, pad_right
+    def padded_table(self, qint: QInterval) -> NDArray[np.int32]:
+        pad_left, pad_right = self._get_pads(qint)
+        data = np.pad(self.table, (pad_left, pad_right), mode='constant', constant_values=0)
+        if qint.min < 0:
+            size = len(data)
+            # data = np.concatenate((data[size // 2 :], data[: size // 2]))
+            data = np.roll(data, size // 2)
+        return data
+    def get_uuid(self, qint: QInterval) -> UUID:
+        pad_left, _ = self._get_pads(qint)
+        _int = int(self.spec.hash[:32], 16) ^ pad_left
+        return UUID(int=_int, version=4)
 def _const_f(const: float | Decimal):
+    """Get the minimum f such that const * 2^f is an integer."""
     const = float(const)
     _low, _high = -32, 32
     while _high - _low > 1:
@@ -31,6 +188,7 @@ def _const_f(const: float | Decimal):
 def to_csd_powers(x: float) -> Generator[float, None, None]:
+    """Convert a float to a list of +/- powers of two in CSD representation."""
     if x == 0:
         return
     f = _const_f(abs(x))
@@ -48,6 +206,8 @@ def to_csd_powers(x: float) -> Generator[float, None, None]:
 class FixedVariable:
+    __normal__variable__ = True
     def __init__(
         self,
         low: float | Decimal,
@@ -62,7 +222,8 @@ class FixedVariable:
         _data: Decimal | None = None,
         _id: UUID | None = None,
     ) -> None:
-        assert low <= high, f'low {low} must be less than high {high}'
+        if self.__normal__variable__:
+            assert low <= high, f'low {low} must be less than high {high}'
         if low != high and opr == 'const':
             raise ValueError('Constant variable must have low == high')
@@ -100,9 +261,19 @@ class FixedVariable:
             if v.opr == 'const':
                 v.latency = self.latency
-    def get_cost_and_latency(self):
+    def get_cost_and_latency(self) -> tuple[float, float]:
         if self.opr == 'const':
             return 0.0, 0.0
+        if self.opr == 'lookup':
+            assert len(self._from) == 1
+            b_in = sum(self._from[0].kif)
+            b_out = sum(self.kif)
+            _latency = max(b_in - 6, 1) + self._from[0].latency
+            _cost = 2 ** max(b_in - 5, 0) * ceil(b_out / 2)
+            # Assume LUT6 with extra o5 output
+            return _cost, _latency
         if self.opr in ('vadd', 'cadd', 'min', 'max', 'vmul'):
             adder_size = self.hwconf.adder_size
             carry_size = self.hwconf.carry_size
@@ -212,7 +383,7 @@ class FixedVariable:
         if self.high == self.low:
             return other._const_add(self.low)
-        assert self.hwconf == other.hwconf, 'FixedVariable must have the same hwconf'
+        assert self.hwconf == other.hwconf, f'FixedVariable must have the same hwconf, got {self.hwconf} and {other.hwconf}'
         f0, f1 = self._factor, other._factor
         if f0 < 0:
@@ -270,20 +441,32 @@ class FixedVariable:
         return self * (1 / other)
     def __mul__(self, other: 'FixedVariable|int|float|Decimal') -> 'FixedVariable':
+        if isinstance(other, FixedVariable):
+            if self.high == self.low:
+                return other * self.low
+            if other.high > other.low:
+                return self._var_mul(other)
+            assert other.high == other.low
+            other = float(other.low)
         if other == 0:
             return FixedVariable(0, 0, 1, hwconf=self.hwconf, opr='const')
-        if isinstance(other, FixedVariable):
-            return self._var_mul(other)
         if log2(abs(other)) % 1 == 0:
             return self._pow2_mul(other)
-        variables = [self._pow2_mul(v) for v in to_csd_powers(float(other))]
+        variables = [(self._pow2_mul(v), Decimal(v)) for v in to_csd_powers(float(other))]
         while len(variables) > 1:
-            v = variables.pop() + variables.pop()
-            variables.append(v)
-        return variables[0]
+            v1, p1 = variables.pop()
+            v2, p2 = variables.pop()
+            v, p = v1 + v2, p1 + p2
+            if p > 0:
+                high, low = self.high * p, self.low * p
+            else:
+                high, low = self.low * p, self.high * p
+            v.high, v.low = high, low
+            variables.append((v, p))
+        return variables[0][0]
     def _var_mul(self, other: 'FixedVariable') -> 'FixedVariable':
         if other is not self:
@@ -307,6 +490,7 @@ class FixedVariable:
             high,
             step,
             _from=(self, other),
+            hwconf=self.hwconf,
             _factor=_factor,
             opr=opr,
         )
@@ -407,7 +591,7 @@ class FixedVariable:
         f: int,
         overflow_mode: str = 'WRAP',
         round_mode: str = 'TRN',
-    ):
+    ) -> 'FixedVariable':
         overflow_mode, round_mode = overflow_mode.upper(), round_mode.upper()
         assert overflow_mode in ('WRAP', 'SAT', 'SAT_SYM')
         assert round_mode in ('TRN', 'RND')
@@ -428,7 +612,9 @@ class FixedVariable:
             _high = Decimal(2) ** i
             high = _high - step
             low = -_high * k if overflow_mode == 'SAT' else -high * k
-            return self.max_of(low).min_of(high).quantize(k, i, f, 'WRAP', round_mode)
+            ff = f + 1 if round_mode == 'RND' else f
+            v = self.quantize(_k, _i, ff, 'WRAP', 'TRN')
+            return v.max_of(low).min_of(high).quantize(k, i, f, 'WRAP', round_mode)
         if self.low == self.high:
             val = self.low
@@ -539,25 +725,47 @@ class FixedVariable:
         qint = (min(self.low, other.low), min(self.high, other.high), min(self.step, other.step))
         return (self - other).msb_mux(self, other, qint=qint)
+    def lookup(self, table: LookupTable | np.ndarray) -> 'FixedVariable':
+        _table, table_id = table_context.register_table(table)
+        size = len(table.table) if isinstance(table, LookupTable) else len(table)
+        assert (
+            round((self.high - self.low) / self.step) + 1 == size
+        ), f'Input variable size does not match lookup table size ({round((self.high - self.low) / self.step) + 1} != {size})'
+        return FixedVariable(
+            _table.spec.out_qint.min,
+            _table.spec.out_qint.max,
+            _table.spec.out_qint.step,
+            _from=(self,),
+            _factor=Decimal(1),
+            opr='lookup',
+            hwconf=self.hwconf,
+            _data=Decimal(table_id),
+        )
 class FixedVariableInput(FixedVariable):
+    __normal__variable__ = False
     def __init__(
         self,
         latency: float | None = None,
-        hwconf=HWConfig(-1, -1, -1),
+        hwconf: HWConfig | tuple[int, int, int] = HWConfig(-1, -1, -1),
+        opr: str = 'new',
     ) -> None:
-        self.low = Decimal(1e10)
-        self.high = Decimal(-1e10)
-        self.step = Decimal(1e10)
-        self._factor = Decimal(1)
-        self._from: tuple[FixedVariable, ...] = ()
-        self.opr = 'new'
-        self._data = None
-        self.id = UUID(int=rd.getrandbits(128), version=4)
-        self.hwconf = hwconf
-        self.latency = latency if latency is not None else 0.0
-        self.cost = 0.0
+        super().__init__(
+            low=Decimal(1e10),
+            high=Decimal(-1e10),
+            step=Decimal(1e10),
+            latency=latency if latency is not None else 0.0,
+            hwconf=HWConfig(*hwconf),
+            opr=opr,
+            cost=0.0,
+            _factor=Decimal(1),
+            _from=(),
+            _data=None,
+            _id=None,
+        )
     def __add__(self, other):
         if other == 0:
@@ -614,6 +822,8 @@ class FixedVariableInput(FixedVariable):
         if round_mode == 'RND':
             return (self.quantize(k, i, f + 1) + 2.0 ** (-f - 1)).quantize(k, i, f, overflow_mode, 'TRN')
+        else:
+            round_mode = 'TRN'
         step = Decimal(2) ** -f
         _high = Decimal(2) ** i

da4ml 0.4.0__py3-none-any.whl → 0.5.0b0__py3-none-any.whl

Potentially problematic release.

da4ml 0.4.0py3-none-any.whl → 0.5.0b0py3-none-any.whl