PyPI - da4ml - Versions diffs - 0.5.0__cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl - Mend

da4ml 0.5.0__cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (96) hide show

da4ml/__init__.py +4 -0
da4ml/_binary/__init__.py +15 -0
da4ml/_binary/dais_bin.cpython-312-x86_64-linux-gnu.so +0 -0
da4ml/_binary/dais_bin.pyi +5 -0
da4ml/_cli/__init__.py +30 -0
da4ml/_cli/convert.py +194 -0
da4ml/_cli/report.py +295 -0
da4ml/_version.py +32 -0
da4ml/cmvm/__init__.py +4 -0
da4ml/cmvm/api.py +264 -0
da4ml/cmvm/core/__init__.py +221 -0
da4ml/cmvm/core/indexers.py +83 -0
da4ml/cmvm/core/state_opr.py +284 -0
da4ml/cmvm/types.py +739 -0
da4ml/cmvm/util/__init__.py +7 -0
da4ml/cmvm/util/bit_decompose.py +86 -0
da4ml/cmvm/util/mat_decompose.py +121 -0
da4ml/codegen/__init__.py +9 -0
da4ml/codegen/hls/__init__.py +4 -0
da4ml/codegen/hls/hls_codegen.py +196 -0
da4ml/codegen/hls/hls_model.py +255 -0
da4ml/codegen/hls/source/ap_types/ap_binary.h +78 -0
da4ml/codegen/hls/source/ap_types/ap_common.h +376 -0
da4ml/codegen/hls/source/ap_types/ap_decl.h +212 -0
da4ml/codegen/hls/source/ap_types/ap_fixed.h +360 -0
da4ml/codegen/hls/source/ap_types/ap_fixed_base.h +2354 -0
da4ml/codegen/hls/source/ap_types/ap_fixed_ref.h +718 -0
da4ml/codegen/hls/source/ap_types/ap_fixed_special.h +230 -0
da4ml/codegen/hls/source/ap_types/ap_int.h +330 -0
da4ml/codegen/hls/source/ap_types/ap_int_base.h +1885 -0
da4ml/codegen/hls/source/ap_types/ap_int_ref.h +1346 -0
da4ml/codegen/hls/source/ap_types/ap_int_special.h +223 -0
da4ml/codegen/hls/source/ap_types/ap_shift_reg.h +138 -0
da4ml/codegen/hls/source/ap_types/etc/ap_private.h +7199 -0
da4ml/codegen/hls/source/ap_types/hls_math.h +27 -0
da4ml/codegen/hls/source/ap_types/hls_stream.h +263 -0
da4ml/codegen/hls/source/ap_types/utils/x_hls_utils.h +80 -0
da4ml/codegen/hls/source/binder_util.hh +71 -0
da4ml/codegen/hls/source/build_binder.mk +22 -0
da4ml/codegen/hls/source/vitis_bitshift.hh +32 -0
da4ml/codegen/rtl/__init__.py +15 -0
da4ml/codegen/rtl/common_source/binder_util.hh +99 -0
da4ml/codegen/rtl/common_source/build_binder.mk +34 -0
da4ml/codegen/rtl/common_source/build_quartus_prj.tcl +104 -0
da4ml/codegen/rtl/common_source/build_vivado_prj.tcl +111 -0
da4ml/codegen/rtl/common_source/ioutil.hh +124 -0
da4ml/codegen/rtl/common_source/template.sdc +27 -0
da4ml/codegen/rtl/common_source/template.xdc +30 -0
da4ml/codegen/rtl/rtl_model.py +486 -0
da4ml/codegen/rtl/verilog/__init__.py +10 -0
da4ml/codegen/rtl/verilog/comb.py +239 -0
da4ml/codegen/rtl/verilog/io_wrapper.py +113 -0
da4ml/codegen/rtl/verilog/pipeline.py +67 -0
da4ml/codegen/rtl/verilog/source/lookup_table.v +27 -0
da4ml/codegen/rtl/verilog/source/multiplier.v +37 -0
da4ml/codegen/rtl/verilog/source/mux.v +58 -0
da4ml/codegen/rtl/verilog/source/negative.v +31 -0
da4ml/codegen/rtl/verilog/source/shift_adder.v +59 -0
da4ml/codegen/rtl/vhdl/__init__.py +9 -0
da4ml/codegen/rtl/vhdl/comb.py +206 -0
da4ml/codegen/rtl/vhdl/io_wrapper.py +120 -0
da4ml/codegen/rtl/vhdl/pipeline.py +71 -0
da4ml/codegen/rtl/vhdl/source/lookup_table.vhd +52 -0
da4ml/codegen/rtl/vhdl/source/multiplier.vhd +40 -0
da4ml/codegen/rtl/vhdl/source/mux.vhd +102 -0
da4ml/codegen/rtl/vhdl/source/negative.vhd +35 -0
da4ml/codegen/rtl/vhdl/source/shift_adder.vhd +101 -0
da4ml/converter/__init__.py +63 -0
da4ml/converter/hgq2/__init__.py +3 -0
da4ml/converter/hgq2/layers/__init__.py +11 -0
da4ml/converter/hgq2/layers/_base.py +132 -0
da4ml/converter/hgq2/layers/activation.py +81 -0
da4ml/converter/hgq2/layers/attn.py +148 -0
da4ml/converter/hgq2/layers/batchnorm.py +15 -0
da4ml/converter/hgq2/layers/conv.py +149 -0
da4ml/converter/hgq2/layers/dense.py +39 -0
da4ml/converter/hgq2/layers/ops.py +240 -0
da4ml/converter/hgq2/layers/pool.py +107 -0
da4ml/converter/hgq2/layers/table.py +176 -0
da4ml/converter/hgq2/parser.py +161 -0
da4ml/trace/__init__.py +6 -0
da4ml/trace/fixed_variable.py +965 -0
da4ml/trace/fixed_variable_array.py +600 -0
da4ml/trace/ops/__init__.py +13 -0
da4ml/trace/ops/einsum_utils.py +305 -0
da4ml/trace/ops/quantization.py +74 -0
da4ml/trace/ops/reduce_utils.py +105 -0
da4ml/trace/pipeline.py +181 -0
da4ml/trace/tracer.py +186 -0
da4ml/typing/__init__.py +3 -0
da4ml-0.5.0.dist-info/METADATA +85 -0
da4ml-0.5.0.dist-info/RECORD +96 -0
da4ml-0.5.0.dist-info/WHEEL +6 -0
da4ml-0.5.0.dist-info/entry_points.txt +3 -0
da4ml-0.5.0.dist-info/sboms/auditwheel.cdx.json +1 -0
da4ml.libs/libgomp-e985bcbb.so.1.0.0 +0 -0

da4ml/codegen/rtl/vhdl/source/shift_adder.vhd ADDED Viewed

@@ -0,0 +1,101 @@
+library ieee;
+use ieee.std_logic_1164.all;
+use ieee.numeric_std.all;
+entity shift_adder is
+    generic (
+        BW_INPUT0 : integer := 32;
+        BW_INPUT1 : integer := 32;
+        SIGNED0   : integer := 0;
+        SIGNED1   : integer := 0;
+        BW_OUT    : integer := 32;
+        SHIFT1    : integer := 0;
+        IS_SUB    : integer := 0
+    );
+    port (
+        in0 : in  std_logic_vector(BW_INPUT0-1 downto 0);
+        in1 : in  std_logic_vector(BW_INPUT1-1 downto 0);
+        result : out std_logic_vector(BW_OUT-1 downto 0)
+    );
+end entity shift_adder;
+architecture rtl of shift_adder is
+    function max(L, R: integer) return integer is
+    begin
+        if L > R then
+            return L;
+        else
+            return R;
+        end if;
+    end function;
+    function if_then_else(cond: boolean; val_true: integer; val_false: integer) return integer is
+    begin
+        if cond then
+            return val_true;
+        else
+            return val_false;
+        end if;
+    end function;
+    constant IN0_NEED_BITS : integer := if_then_else(SHIFT1 < 0, BW_INPUT0 - SHIFT1, BW_INPUT0);
+    constant IN1_NEED_BITS : integer := if_then_else(SHIFT1 > 0, BW_INPUT1 + SHIFT1, BW_INPUT1);
+    constant EXTRA_PAD     : integer := if_then_else(SIGNED0 /= SIGNED1, IS_SUB + 1, IS_SUB);
+    constant BW_ADD        : integer := max(IN0_NEED_BITS, IN1_NEED_BITS) + EXTRA_PAD + 1;
+    signal in0_ext : std_logic_vector(BW_ADD-1 downto 0);
+    signal in1_ext : std_logic_vector(BW_ADD-1 downto 0);
+    signal accum   : std_logic_vector(BW_ADD-1 downto 0);
+begin
+    -- Extension and shifting for input 0
+    gen_in0_shift_neg: if SHIFT1 < 0 generate
+        gen_in0_signed: if SIGNED0 = 1 generate
+            in0_ext <= std_logic_vector(resize(signed(in0), BW_ADD)) sll (-SHIFT1);
+        end generate;
+        gen_in0_unsigned: if SIGNED0 = 0 generate
+            in0_ext <= std_logic_vector(resize(unsigned(in0), BW_ADD)) sll (-SHIFT1);
+        end generate;
+    end generate;
+    gen_in0_shift_pos: if SHIFT1 >= 0 generate
+        gen_in0_signed: if SIGNED0 = 1 generate
+            in0_ext <= std_logic_vector(resize(signed(in0), BW_ADD));
+        end generate;
+        gen_in0_unsigned: if SIGNED0 = 0 generate
+            in0_ext <= std_logic_vector(resize(unsigned(in0), BW_ADD));
+        end generate;
+    end generate;
+    -- Extension and shifting for input 1
+    gen_in1_shift_pos: if SHIFT1 > 0 generate
+        gen_in1_signed: if SIGNED1 = 1 generate
+            in1_ext <= std_logic_vector(resize(signed(in1), BW_ADD)) sll SHIFT1;
+        end generate;
+        gen_in1_unsigned: if SIGNED1 = 0 generate
+            in1_ext <= std_logic_vector(resize(unsigned(in1), BW_ADD)) sll SHIFT1;
+        end generate;
+    end generate;
+    gen_in1_shift_neg: if SHIFT1 <= 0 generate
+        gen_in1_signed: if SIGNED1 = 1 generate
+            in1_ext <= std_logic_vector(resize(signed(in1), BW_ADD));
+        end generate;
+        gen_in1_unsigned: if SIGNED1 = 0 generate
+            in1_ext <= std_logic_vector(resize(unsigned(in1), BW_ADD));
+        end generate;
+    end generate;
+    -- Addition/subtraction logic
+    gen_sub: if IS_SUB = 1 generate
+        accum <= std_logic_vector(signed(in0_ext) - signed(in1_ext));
+    end generate;
+    gen_add: if IS_SUB = 0 generate
+        accum <= std_logic_vector(signed(in0_ext) + signed(in1_ext));
+    end generate;
+    result <= accum(BW_OUT-1 downto 0);
+end architecture rtl;

da4ml/converter/__init__.py ADDED Viewed

@@ -0,0 +1,63 @@
+from collections.abc import Callable
+from typing import Literal, overload
+from ..cmvm.api import solver_options_t
+from ..trace import FixedVariableArray, HWConfig
+__all__ = ['trace_model']
+@overload
+def trace_model(  # type: ignore
+    model: Callable,
+    hwconf: HWConfig | tuple[int, int, int] = HWConfig(1, -1, -1),
+    solver_options: solver_options_t | None = None,
+    verbose: bool = False,
+    inputs: tuple[FixedVariableArray, ...] | FixedVariableArray | None = None,
+    inputs_kif: tuple[int, int, int] | None = None,
+    dump: Literal[False] = False,
+) -> tuple[FixedVariableArray, FixedVariableArray]: ...
+@overload
+def trace_model(  # type: ignore
+    model: Callable,
+    hwconf: HWConfig | tuple[int, int, int] = HWConfig(1, -1, -1),
+    solver_options: solver_options_t | None = None,
+    verbose: bool = False,
+    inputs: tuple[FixedVariableArray, ...] | FixedVariableArray | None = None,
+    inputs_kif: tuple[int, int, int] | None = None,
+    dump: Literal[True] = False,  # type: ignore
+) -> dict[str, FixedVariableArray]: ...
+def trace_model(  # type: ignore
+    model: Callable,
+    hwconf: HWConfig | tuple[int, int, int] = HWConfig(1, -1, -1),
+    solver_options: solver_options_t | None = None,
+    verbose: bool = False,
+    inputs: tuple[FixedVariableArray, ...] | None = None,
+    inputs_kif: tuple[int, int, int] | None = None,
+    dump=False,
+):
+    hwconf = HWConfig(*hwconf) if isinstance(hwconf, tuple) else hwconf
+    module = type(model).__module__
+    if module.startswith('keras.'):
+        import keras
+        from .hgq2 import trace_model as keras_trace_model
+        assert isinstance(model, keras.Model)
+        return keras_trace_model(
+            model,
+            hwconf,
+            solver_options=solver_options,
+            verbose=verbose,
+            inputs=inputs,
+            inputs_kif=inputs_kif,
+            dump=dump,
+        )
+    else:
+        raise ValueError(f'Unsupported model type: {type(model)}')

da4ml/converter/hgq2/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .parser import trace_model
+__all__ = ['trace_model']

da4ml/converter/hgq2/layers/__init__.py ADDED Viewed

@@ -0,0 +1,11 @@
+from ._base import _registry
+from .activation import *
+from .attn import *
+from .batchnorm import *
+from .conv import *
+from .dense import *
+from .ops import *
+from .pool import *
+from .table import *
+__all__ = ['_registry']

da4ml/converter/hgq2/layers/_base.py ADDED Viewed

@@ -0,0 +1,132 @@
+import typing
+from collections.abc import Sequence
+from typing import Any
+import hgq
+import keras
+import numpy as np
+from hgq.layers.core.base import MultipleQuantizers, Quantizer
+from hgq.quantizer.internal import FixedPointQuantizerBase
+from keras.ops import convert_to_numpy
+from ....trace import FixedVariable, FixedVariableArray
+from ....trace.ops import quantize, relu
+def to_np_arr(x: Any) -> np.ndarray:
+    return np.asarray(convert_to_numpy(x))
+def mirror_quantizer(q: Quantizer, v: FixedVariableArray) -> FixedVariableArray:
+    if q.scaler is not None:
+        v = v * (1.0 / q.scaler)
+    q_internal: FixedPointQuantizerBase = q.quantizer
+    kk, ki, kf = q_internal.kif
+    shape = (1,) + v.shape
+    kk = q_internal.bw_mapper.bw_to_x(kk, shape)
+    ki = q_internal.bw_mapper.bw_to_x(ki, shape)
+    kf = q_internal.bw_mapper.bw_to_x(kf, shape)
+    k, i, f = (to_np_arr(x).astype(np.int8)[0] for x in (kk, ki, kf))
+    round_mode, overflow_mode = q_internal.round_mode, q_internal.overflow_mode
+    rq = quantize(v, k, i, f, overflow_mode=overflow_mode, round_mode=round_mode)
+    if q.affine:
+        rq = rq * q.affine[0] + q.affine[1]
+    return rq
+_registry: dict[type, 'type[ReplayOperationBase]'] = {}
+class HandlerRegMeta(type):
+    """Metaclass for automatic registration of handler classes."""
+    def __new__(mcs, name: str, bases: tuple[type, ...], namespace: dict[str, typing.Any]):
+        cls = super().__new__(mcs, name, bases, namespace)
+        if name == 'ReplayOperationBase':
+            return cls
+        handles: type | tuple[type, ...] = namespace['handles']
+        if not isinstance(handles, tuple):
+            handles = (handles,)
+        for handle in handles:
+            _registry[handle] = cls  # type: ignore
+        return cls
+class ReplayOperationBase(metaclass=HandlerRegMeta):
+    handles: tuple[type, ...] = ()
+    __activation_handled__ = False
+    __input_quantizer_handled__ = False
+    __output_quantizer_handled__ = False
+    def __init__(self, layer: 'keras.Operation'):
+        assert isinstance(layer, self.handles)
+        self.op: Any = layer
+    def call(self, *args, **kwargs) -> tuple[FixedVariableArray, ...] | FixedVariableArray: ...
+    def __call__(self, *args, **kwargs) -> tuple[FixedVariableArray, ...]:
+        assert all(not isinstance(a, FixedVariableArray) for a in kwargs.values())
+        if not isinstance(self.op, hgq.layers.QLayerBase):
+            r = self.call(*args, **kwargs)
+            return r if isinstance(r, tuple) else (r,)
+        layer: hgq.layers.QLayerBase = self.op
+        assert kwargs.pop('training', False) is False, 'Training mode is not supported in mirror operation'
+        assert kwargs.pop('mask', None) is None, 'Masking is not supported in mirror operation'
+        if not self.__input_quantizer_handled__:
+            assert len(args) == 1
+            inputs = args[0]
+            if layer.enable_iq:
+                if isinstance(inputs, Sequence):
+                    assert isinstance(layer.iq, MultipleQuantizers)
+                    inputs = tuple(mirror_quantizer(q, v) for q, v in zip(layer.iq.quantizers, inputs))
+                else:
+                    assert isinstance(layer.iq, Quantizer), f'Expected iq to be a Quantizer, got {type(layer.iq)}'
+                    inputs = mirror_quantizer(layer.iq, inputs)
+            outputs = self.call(inputs, **kwargs)
+        else:
+            outputs = self.call(*args, **kwargs)
+        if isinstance(outputs, FixedVariable):
+            outputs = FixedVariableArray(np.array([outputs]))
+        if not self.__activation_handled__:
+            activation = getattr(layer, 'activation', keras.activations.linear)
+            if activation is not keras.activations.linear:
+                if activation is keras.activations.relu:
+                    if isinstance(outputs, tuple):
+                        assert len(outputs) == 1, 'ReLU activation is expected to have a single output'
+                        outputs = (relu(outputs[0]),)
+                    else:
+                        outputs = relu(outputs)
+                else:
+                    raise NotImplementedError(f'Activation {activation} is not supported in mirror operation')
+        if layer.enable_oq and not self.__output_quantizer_handled__:
+            if isinstance(outputs, tuple):
+                assert isinstance(layer.oq, MultipleQuantizers)
+                outputs = tuple(mirror_quantizer(q, v) for q, v in zip(layer.oq.quantizers, outputs))
+            else:
+                assert isinstance(layer.oq, Quantizer)
+                outputs = mirror_quantizer(layer.oq, outputs)
+        if isinstance(outputs, (FixedVariableArray, np.ndarray)):
+            outputs = (outputs,)
+        return outputs
+class ReplayQuantizer(ReplayOperationBase):
+    handles = (Quantizer,)
+    def __init__(self, op: 'Quantizer'):
+        super().__init__(op)
+        assert isinstance(op.quantizer, FixedPointQuantizerBase)
+    def call(self, inputs: FixedVariableArray) -> FixedVariableArray:
+        return mirror_quantizer(self.op, inputs)

da4ml/converter/hgq2/layers/activation.py ADDED Viewed

@@ -0,0 +1,81 @@
+import keras
+import numpy as np
+from hgq.layers import (
+    QSoftmax,
+    QUnaryFunctionLUT,
+)
+from keras.layers import LeakyReLU, PReLU, ReLU
+from ....trace import FixedVariableArray
+from ....trace.ops import relu
+from ._base import ReplayOperationBase, to_np_arr
+class ReplayReLU(ReplayOperationBase):
+    handles = (ReLU, LeakyReLU, PReLU)
+    def call(self, inputs: FixedVariableArray) -> FixedVariableArray:
+        op = self.op
+        if isinstance(op, ReLU):
+            th, neg, maxv = op.threshold, op.negative_slope, op.max_value
+        elif isinstance(op, LeakyReLU):
+            th, neg, maxv = 0, op.negative_slope, None
+        elif isinstance(op, PReLU):
+            th, neg, maxv = 0, to_np_arr(op.alpha), None
+        else:
+            raise TypeError(f'Unsupported activation layer: {type(op)}')
+        if th == 0 and np.all(neg == 0) and maxv is None:
+            return relu(inputs)
+        pos_part = inputs if maxv is None else np.minimum(inputs, maxv)  # type: ignore
+        pos_part = pos_part._vars.ravel()
+        if th != 0:
+            z_cond = (inputs - (th + 2.0 ** (-inputs.kif[2] - 1)))._vars.ravel()
+        else:
+            z_cond = inputs._vars.ravel()
+        neg_part = ((inputs[None] - th) * neg)._vars.ravel()
+        out = np.array([c.msb_mux(n, p) if c.low < 0 else p for c, n, p in zip(z_cond, neg_part, pos_part)])
+        return FixedVariableArray(out.reshape(inputs.shape), inputs.solver_options)
+class ReplayQFunctionLUT(ReplayOperationBase):
+    __activation_handled__ = True
+    handles = (QUnaryFunctionLUT,)
+    def call(self, x: FixedVariableArray) -> FixedVariableArray:
+        op: QUnaryFunctionLUT = self.op
+        def activation(x) -> np.ndarray:
+            kx = keras.ops.convert_to_tensor(x[None])
+            kx = op.activation(kx)
+            return keras.ops.convert_to_numpy(kx[0])  # type: ignore
+        return x.apply(activation)
+class ReplayQSoftmax(ReplayOperationBase):
+    handles = (QSoftmax,)
+    def call(self, inputs: FixedVariableArray, mask: None | FixedVariableArray = None) -> FixedVariableArray:
+        op: QSoftmax = self.op
+        inputs = inputs[None]
+        if op.stable:
+            inputs = np.amax(inputs, axis=op.axes, keepdims=True) - inputs  # type: ignore
+        exp_inp = ReplayQFunctionLUT(op.exp_table)(inputs[0])[0]
+        if mask is not None:
+            exp_inp = mask[0] * exp_inp
+        sums = np.sum(exp_inp[None], axis=op.axes, keepdims=True)[0]  # type: ignore
+        divisor = ReplayQFunctionLUT(op.inv_table)(sums)[0]
+        return exp_inp * divisor
+__all__ = ['ReplayReLU', 'ReplayQFunctionLUT', 'ReplayQSoftmax']

da4ml/converter/hgq2/layers/attn.py ADDED Viewed

@@ -0,0 +1,148 @@
+import numpy as np
+from hgq.layers import (
+    QLinformerAttention,
+    QMultiHeadAttention,
+)
+from ....trace import FixedVariableArray
+from ....trace.ops import einsum
+from ._base import ReplayOperationBase, mirror_quantizer
+from .activation import ReplayQSoftmax
+from .dense import ReplayQDense
+def _compute_attention_mask(
+    query,
+    value,
+    query_mask=None,
+    value_mask=None,
+    key_mask=None,
+    attention_mask=None,
+    use_causal_mask=False,
+):
+    masks = []
+    if query_mask is not None:
+        masks.append(np.expand_dims(query_mask, -1))  # [Q, 1]
+    if value_mask is not None:
+        masks.append(np.expand_dims(value_mask, -2))  # [1, V]
+    if key_mask is not None:
+        masks.append(np.expand_dims(key_mask, -2))  # [1, V]
+    if use_causal_mask:
+        q = query.shape[0]
+        v = q if value is None else value.shape[0]
+        masks.append(np.tril(np.ones((q, v), dtype='uint8')))  # [Q, V]
+    masks.append(attention_mask)
+    if not masks:
+        return None
+    if any(isinstance(m, FixedVariableArray) for m in masks):
+        return np.prod(np.stack(masks, axis=0), axis=0)
+    else:
+        return None
+def _masked_softmax(op, attention_scores, attention_mask=None):
+    # Normalize the attention scores to probabilities.
+    # attention_scores = [B, N, T, S]
+    if attention_mask is not None:
+        # The expand dim happens starting from the `num_heads` dimension,
+        # (<batch_dims>, num_heads, <query_attention_dims,
+        # key_attention_dims>)
+        mask_expansion_axis = -len(op._attention_axes) * 2 - 1
+        for _ in range(len(attention_scores.shape) - len(attention_mask.shape)):
+            attention_mask = np.expand_dims(attention_mask, axis=mask_expansion_axis)
+    return ReplayQSoftmax(op._softmax)(attention_scores[0], mask=attention_mask)[0][None]
+def _compute_attention(op: QMultiHeadAttention, query, key, value, attention_mask=None, training=None):
+    # Take the dot product between "query" and "key" to get the raw
+    # attention scores.
+    attention_scores = einsum(op._dot_product_equation, key, query)
+    attention_scores = _masked_softmax(op, attention_scores, attention_mask)
+    # `context_layer` = [B, T, N, H]
+    attention_output = einsum(op._combine_equation, attention_scores, value)
+    return attention_output, attention_scores
+class ReplayMHA(ReplayOperationBase):
+    handles = (QMultiHeadAttention,)
+    __input_quantizer_handled__ = True
+    __output_quantizer_handled__ = True
+    def call(
+        self,
+        query: FixedVariableArray,
+        value: FixedVariableArray,
+        key=None,
+        query_mask=None,
+        value_mask=None,
+        key_mask=None,
+        attention_mask=None,
+        return_attention_scores=False,
+        use_causal_mask=False,
+    ):
+        op: QMultiHeadAttention = self.op
+        if key is None:
+            key = value
+        _attention_mask = _compute_attention_mask(
+            query,
+            value,
+            query_mask=query_mask,
+            value_mask=value_mask,
+            key_mask=key_mask,
+            attention_mask=attention_mask,
+            use_causal_mask=use_causal_mask,
+        )
+        query = ReplayQDense(op._query_dense)(query)[0][None]
+        key = ReplayQDense(op._key_dense)(key)[0][None]
+        value = ReplayQDense(op._value_dense)(value)[0][None]
+        attention_output, attention_scores = _compute_attention(op, query, key, value, _attention_mask)
+        attention_output = ReplayQDense(op._output_dense)(attention_output[0])[0]
+        if op.enable_oq:
+            attention_output = mirror_quantizer(op.oq, attention_output)
+        if return_attention_scores:
+            return attention_output, attention_scores[0]
+        return attention_output
+class ReplayQLinformerAttention(ReplayMHA):
+    handles = (QLinformerAttention,)
+    def call(
+        self,
+        query,
+        value,
+        key=None,
+        query_mask=None,
+        value_mask=None,
+        key_mask=None,
+        attention_mask=None,
+        return_attention_scores=False,
+        use_causal_mask=False,
+    ):
+        assert use_causal_mask is False, 'Causal mask is not supported in QLinformerAttention.'
+        key = key if key is not None else value
+        op: QLinformerAttention = self.op
+        key = ReplayQDense(op._lin_k_proj)(key)[0]
+        value = ReplayQDense(op._lin_v_proj)(value)[0]
+        return super().call(
+            query,
+            value,
+            key,
+            query_mask=query_mask,
+            value_mask=value_mask,
+            key_mask=key_mask,
+            attention_mask=attention_mask,
+            return_attention_scores=return_attention_scores,
+        )
+__all__ = ['ReplayMHA', 'ReplayQLinformerAttention']

da4ml/converter/hgq2/layers/batchnorm.py ADDED Viewed

@@ -0,0 +1,15 @@
+import numpy as np
+from hgq.layers import QBatchNormalization
+from ....trace import FixedVariableArray
+from ._base import ReplayOperationBase
+class ReplayQBatchNormalization(ReplayOperationBase):
+    handles = (QBatchNormalization,)
+    def call(self, inputs: FixedVariableArray) -> FixedVariableArray:
+        layer: QBatchNormalization = self.op
+        scale, bias = map(np.array, layer.qscaler_and_qoffset)
+        shape = layer._shape[1:]
+        return inputs * scale.reshape(shape) + bias.reshape(shape)