PyPI - da4ml - Versions diffs - 0.2.1__py3-none-any.whl → 0.3.0.post1__py3-none-any.whl - Mend

da4ml 0.2.1py3-none-any.whl → 0.3.0.post1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of da4ml might be problematic. Click here for more details.

Files changed (55) hide show

da4ml/_version.py +2 -2
da4ml/cmvm/types.py +95 -15
da4ml/codegen/__init__.py +5 -4
da4ml/codegen/cpp/__init__.py +2 -1
da4ml/codegen/cpp/cpp_codegen.py +56 -23
da4ml/codegen/cpp/hls_model.py +252 -0
da4ml/codegen/cpp/source/ap_types/ap_binary.h +78 -0
da4ml/codegen/cpp/source/ap_types/ap_common.h +376 -0
da4ml/codegen/cpp/source/ap_types/ap_decl.h +212 -0
da4ml/codegen/cpp/source/ap_types/ap_fixed.h +360 -0
da4ml/codegen/cpp/source/ap_types/ap_fixed_base.h +2354 -0
da4ml/codegen/cpp/source/ap_types/ap_fixed_ref.h +718 -0
da4ml/codegen/cpp/source/ap_types/ap_fixed_special.h +230 -0
da4ml/codegen/cpp/source/ap_types/ap_int.h +330 -0
da4ml/codegen/cpp/source/ap_types/ap_int_base.h +1885 -0
da4ml/codegen/cpp/source/ap_types/ap_int_ref.h +1346 -0
da4ml/codegen/cpp/source/ap_types/ap_int_special.h +223 -0
da4ml/codegen/cpp/source/ap_types/ap_shift_reg.h +138 -0
da4ml/codegen/cpp/source/ap_types/etc/ap_private.h +7199 -0
da4ml/codegen/cpp/source/ap_types/hls_math.h +27 -0
da4ml/codegen/cpp/source/ap_types/hls_stream.h +263 -0
da4ml/codegen/cpp/source/ap_types/utils/x_hls_utils.h +80 -0
da4ml/codegen/cpp/source/binder_util.hh +56 -0
da4ml/codegen/cpp/source/build_binder.mk +24 -0
da4ml/codegen/cpp/source/{vitis.h → vitis_bitshift.hh} +1 -1
da4ml/codegen/verilog/__init__.py +2 -3
da4ml/codegen/verilog/comb.py +65 -24
da4ml/codegen/verilog/io_wrapper.py +36 -141
da4ml/codegen/verilog/source/binder_util.hh +72 -0
da4ml/codegen/verilog/source/mux.v +58 -0
da4ml/codegen/verilog/source/negative.v +28 -0
da4ml/codegen/verilog/source/shift_adder.v +4 -1
da4ml/codegen/verilog/source/template.xdc +3 -0
da4ml/codegen/verilog/verilog_model.py +36 -12
da4ml/converter/__init__.py +0 -0
da4ml/converter/hgq2/parser.py +105 -0
da4ml/converter/hgq2/replica.py +383 -0
da4ml/trace/__init__.py +2 -2
da4ml/trace/fixed_variable.py +175 -16
da4ml/trace/fixed_variable_array.py +109 -4
da4ml/trace/ops/__init__.py +22 -6
da4ml/trace/ops/conv_utils.py +147 -15
da4ml/trace/ops/einsum_utils.py +9 -6
da4ml/trace/ops/reduce_utils.py +103 -0
da4ml/trace/pipeline.py +36 -34
da4ml/trace/tracer.py +37 -7
da4ml-0.3.0.post1.dist-info/METADATA +107 -0
da4ml-0.3.0.post1.dist-info/RECORD +64 -0
da4ml/codegen/cpp/source/vitis_bridge.h +0 -17
da4ml-0.2.1.dist-info/METADATA +0 -65
da4ml-0.2.1.dist-info/RECORD +0 -39
/da4ml/codegen/verilog/source/{ioutils.hh → ioutil.hh} +0 -0
{da4ml-0.2.1.dist-info → da4ml-0.3.0.post1.dist-info}/WHEEL +0 -0
{da4ml-0.2.1.dist-info → da4ml-0.3.0.post1.dist-info}/licenses/LICENSE +0 -0
{da4ml-0.2.1.dist-info → da4ml-0.3.0.post1.dist-info}/top_level.txt +0 -0

da4ml/_version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.2.1'
-__version_tuple__ = version_tuple = (0, 2, 1)
+__version__ = version = '0.3.0.post1'
+__version_tuple__ = version_tuple = (0, 3, 0, 'post1')

da4ml/cmvm/types.py CHANGED Viewed

@@ -291,6 +291,9 @@ class Solution(NamedTuple):
             The output data after applying the operations defined in the solution.
         """
+        from ..trace.fixed_variable import FixedVariable
         buf = np.empty(len(self.ops), dtype=object)
         inp = np.asarray(inp)
@@ -320,39 +323,61 @@ class Solution(NamedTuple):
                     buf[i] = buf[op.id0] + bias
                 case 5:
                     buf[i] = op.data * op.qint.step  # const definition
+                case 6 | -6:  # MSB Mux
+                    id_c = op.data & 0xFFFFFFFF
+                    k, v0, v1 = buf[id_c], buf[op.id0], buf[op.id1]
+                    shift = (op.data >> 32) & 0xFFFFFFFF
+                    shift = shift if shift < 0x80000000 else shift - 0x100000000
+                    if op.opcode == -6:
+                        v1 = -v1
+                    if isinstance(k, FixedVariable):
+                        buf[i] = k.msb_mux(v0, v1 * 2**shift)
+                    else:
+                        qint_k = self.ops[id_c].qint
+                        if qint_k.min < 0:
+                            buf[i] = v0 if k < 0 else v1 * 2.0**shift
+                        else:
+                            _k, _i, _f = _minimal_kif(qint_k)
+                            buf[i] = v0 if k >= 2.0 ** (_i - 1) else v1 * 2.0**shift
                 case _:
                     raise ValueError(f'Unknown opcode {op.opcode} in {op}')
-        sf = 2.0 ** np.array(self.out_shifts)
+        sf = 2.0 ** np.array(self.out_shifts, dtype=np.float64)
         sign = np.where(self.out_negs, -1, 1)
-        out_idx = np.array(self.out_idxs)
+        out_idx = np.array(self.out_idxs, dtype=np.int32)
         mask = np.where(out_idx < 0, 0, 1)
         if debug:
+            operands = []
             for i, v in enumerate(buf):
                 op = self.ops[i]
                 match op.opcode:
                     case -1:
                         op_str = 'inp'
-                    case 0:
-                        op_str = f'buf[{op.id0}] + buf[{op.id1}]<<{op.data}'
-                    case 1:
-                        op_str = f'buf[{op.id0}] - buf[{op.id1}]<<{op.data}'
-                    case 2:
-                        op_str = f'relu(buf[{op.id0}])'
-                    case -2:
-                        op_str = f'relu(-buf[{op.id0}])'
-                    case 3:
-                        op_str = f'quantize(buf[{op.id0}])'
-                    case -3:
-                        op_str = f'quantize(-buf[{op.id0}])'
+                    case 0 | 1:
+                        _sign = '-' if op.opcode == 1 else '+'
+                        op_str = f'buf[{op.id0}] {_sign} buf[{op.id1}]<<{op.data}'
+                    case 2 | -2:
+                        _sign = '' if op.opcode == 2 else '-'
+                        op_str = f'relu({_sign}buf[{op.id0}])'
+                    case 3 | -3:
+                        _sign = '' if op.opcode == 3 else '-'
+                        op_str = f'quantize({_sign}buf[{op.id0}])'
                     case 4:
                         op_str = f'buf[{op.id0}] + {op.data * op.qint.step}'
                     case 5:
                         op_str = f'const {op.data * op.qint.step}'
+                    case 6 | -6:
+                        _sign = '-' if op.opcode == -6 else ''
+                        op_str = f'msb(buf[{op.data}]) ? buf[{op.id0}] : {_sign}buf[{op.id1}]'
                     case _:
                         raise ValueError(f'Unknown opcode {op.opcode} in {op}')
-                print(f'{op_str:24} |-> buf[{i}] = {v}')
+                result = f'|-> buf[{i}] = {v}'
+                operands.append((op_str, result))
+            max_len = max(len(op[0]) for op in operands)
+            for op_str, result in operands:
+                print(f'{op_str:<{max_len}} {result}')
         if dump:
             return buf
@@ -443,6 +468,61 @@ class Solution(NamedTuple):
             data = json.load(f)
         return cls.deserialize(data)
+    @property
+    def ref_count(self) -> np.ndarray:
+        """The number of references to the output elements in the solution."""
+        ref_count = np.zeros(len(self.ops), dtype=np.uint64)
+        for op in self.ops:
+            if op.opcode == -1:
+                continue
+            id0, id1 = op.id0, op.id1
+            if id0 != -1:
+                ref_count[id0] += 1
+            if id1 != -1:
+                ref_count[id1] += 1
+            if op.opcode in (6, -6):
+                # msb_mux operation
+                ref_count[op.data & 0xFFFFFFFF] += 1
+        for i in self.out_idxs:
+            if i < 0:
+                continue
+            ref_count[i] += 1
+        return ref_count
+    def to_binary(self):
+        n_in, n_out = self.shape
+        header_size_i32 = 2 + n_in + n_out * 3 + 1
+        header = np.concatenate(
+            [
+                [n_in, n_out, len(self.ops)],
+                self.inp_shift,
+                self.out_idxs,
+                self.out_shifts,
+                self.out_negs,
+            ],
+            axis=0,
+            dtype=np.int32,
+        )
+        assert len(header) == header_size_i32, f'Header size mismatch: {len(header)} != {header_size_i32}'
+        code = np.empty((len(self.ops), 8), dtype=np.int32)
+        for i, op in enumerate(self.ops):
+            buf = code[i]
+            buf[0] = op.opcode
+            buf[1] = op.id0
+            buf[2] = op.id1
+            buf[5:] = _minimal_kif(op.qint)
+            buf_i64 = buf[3:5].view(np.int64)
+            buf_i64[0] = op.data
+        data = np.concatenate([header, code.flatten()])
+        return data
+    def save_binary(self, path: str | Path):
+        """Dump the solution to a binary file."""
+        data = self.to_binary()
+        with open(path, 'wb') as f:
+            data.tofile(f)
 class CascadedSolution(NamedTuple):
     """A solution that implements cascaded matrix-vector multiplications through multiple CMVM stages.

da4ml/codegen/__init__.py CHANGED Viewed

@@ -1,11 +1,12 @@
-from .cpp import cpp_logic_and_bridge_gen
-from .verilog import comb_binder_gen, comb_logic_gen, generate_io_wrapper, pipeline_binder_gen, pipeline_logic_gen
+from .cpp import HLSModel, cpp_logic_and_bridge_gen
+from .verilog import VerilogModel, binder_gen, comb_logic_gen, generate_io_wrapper, pipeline_logic_gen
 __all__ = [
     'cpp_logic_and_bridge_gen',
     'comb_logic_gen',
     'generate_io_wrapper',
-    'comb_binder_gen',
     'pipeline_logic_gen',
-    'pipeline_binder_gen',
+    'binder_gen',
+    'HLSModel',
+    'VerilogModel',
 ]

da4ml/codegen/cpp/__init__.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from .cpp_codegen import cpp_logic_and_bridge_gen
+from .hls_model import HLSModel
-__all__ = ['cpp_logic_and_bridge_gen']
+__all__ = ['cpp_logic_and_bridge_gen', 'HLSModel']

da4ml/codegen/cpp/cpp_codegen.py CHANGED Viewed

@@ -1,19 +1,19 @@
 from collections.abc import Callable
-from ...cmvm.types import Op, QInterval, Solution, _minimal_kif
+from ...cmvm.types import QInterval, Solution, _minimal_kif
 from ...trace.fixed_variable import _const_f
 def kif_to_vitis_type(k: bool | int = 1, i: int = 0, f: int = 0):
     if k == i == f == 0:
         f = 1
-    return f'ap_{"" if k else "u"}fixed<{k+i+f},{k+i}>'
+    return f'ap_{"" if k else "u"}fixed<{k + i + f},{k + i}>'
 def kif_to_hlslib_type(k: bool | int = 1, i: int = 0, f: int = 0):
     if k == i == f == 0:
         f = 1
-    return f'ac_fixed<{int(k)},{k+i+f},{k+i}>'
+    return f'ac_fixed<{int(k)},{k + i + f},{k + i}>'
 def get_typestr_fn(flavor: str):
@@ -27,13 +27,18 @@ def get_typestr_fn(flavor: str):
     return typestr_fn
-def ssa_gen(ops: list[Op], print_latency: bool, typestr_fn: Callable[[bool | int, int, int], str]):
-    all_kifs = map(_minimal_kif, (op.qint for op in ops))
+def ssa_gen(sol: Solution, print_latency: bool, typestr_fn: Callable[[bool | int, int, int], str]):
+    ops = sol.ops
+    all_kifs = list(map(_minimal_kif, (op.qint for op in ops)))
     all_types = list(map(lambda x: typestr_fn(*x), all_kifs))
     lines = []
+    ref_count = sol.ref_count
     for i, op in enumerate(ops):
+        if ref_count[i] == 0:
+            # Skip unused ops
+            continue
         _type = all_types[i]
         ref0 = f'v{op.id0}'
@@ -42,12 +47,10 @@ def ssa_gen(ops: list[Op], print_latency: bool, typestr_fn: Callable[[bool | int
             case -1:
                 # Input marker
                 val = f'inp[{ops[op.id0].id0}]'
             case 0 | 1:
                 # Common a+/-b<<shift op
                 ref1 = f'bit_shift<{op.data}>(v{op.id1})' if op.data != 0 else f'v{op.id1}'
                 val = f'{ref0} {"-" if op.opcode == 1 else "+"} {ref1}'
             case 2 | -2:
                 if op.opcode == 2:  # relu(inp)
                     if ops[op.id0].qint.min < 0:
@@ -59,11 +62,9 @@ def ssa_gen(ops: list[Op], print_latency: bool, typestr_fn: Callable[[bool | int
                         val = f'{ref0} > 0 ? {_type}(0) : {_type}(-{ref0})'
                     else:
                         val = f'-{ref0}'
             case 3 | -3:
                 # Explicit quantization op, done implicitly via assignment
                 val = ref0 if op.opcode == 3 else f'-{ref0}'
             case 4:
                 # Constant addition
                 _number = op.data * op.qint.step
@@ -71,10 +72,20 @@ def ssa_gen(ops: list[Op], print_latency: bool, typestr_fn: Callable[[bool | int
                 f = _const_f(mag)
                 const_type_str = typestr_fn(*_minimal_kif(QInterval(mag, mag, 2.0**-f)))
                 val = f'{ref0} {sign} {const_type_str}({mag})'
             case 5:
+                # Define constant
                 _number = op.data * op.qint.step
                 val = f'{_number}'
+            case 6 | -6:
+                # MSB Mux
+                id_c = op.data & 0xFFFFFFFF
+                bw_k = sum(all_kifs[id_c])
+                shift = (op.data >> 32) & 0xFFFFFFFF
+                shift = shift if shift < 0x80000000 else shift - 0x100000000
+                ref_k = f'v{id_c}[{bw_k - 1}]'
+                sign = '-' if op.opcode == -6 else ''
+                ref1 = f'v{op.id1}' if shift == 0 else f'bit_shift<{shift}>(v{op.id1})'
+                val = f'{ref_k} ? {_type}({ref0}) : {_type}({sign}{ref1})'
             case _:
                 raise ValueError(f'Unsupported opcode: {op.opcode}')
@@ -103,6 +114,15 @@ def output_gen(sol: Solution, typestr_fn: Callable[[bool | int, int, int], str])
     return lines
+def get_io_types(sol: Solution, flavor: str):
+    typestr_fn = get_typestr_fn(flavor)
+    in_kif = map(max, zip(*map(_minimal_kif, sol.inp_qint)))
+    inp_type = typestr_fn(*in_kif)
+    out_kif = map(max, zip(*map(_minimal_kif, sol.out_qint)))
+    out_type = typestr_fn(*out_kif)
+    return inp_type, out_type
 def cpp_logic_and_bridge_gen(
     sol: Solution,
     fn_name: str,
@@ -113,36 +133,49 @@ def cpp_logic_and_bridge_gen(
     print_latency: bool = False,
 ):
     typestr_fn = get_typestr_fn(flavor)
-    in_kif = map(max, zip(*map(_minimal_kif, sol.inp_qint)))
-    inp_type = typestr_fn(*in_kif)
-    out_kif = map(max, zip(*map(_minimal_kif, sol.out_qint)))
-    out_type = typestr_fn(*out_kif)
+    inp_t, out_t = get_io_types(sol, flavor)
     n_in, n_out = sol.shape
     template_def = 'template <typename inp_t, typename out_t>'
     fn_signature = f'void {fn_name}(inp_t inp[{n_in}], out_t out[{n_out}])'
     pragmas = pragmas or []
-    ssa_lines = ssa_gen(sol.ops, print_latency=print_latency, typestr_fn=typestr_fn)
+    ssa_lines = ssa_gen(sol, print_latency=print_latency, typestr_fn=typestr_fn)
     output_lines = output_gen(sol, typestr_fn=typestr_fn)
     indent = ' ' * n_indent
     base_indent = indent * n_base_indent
     body_indent = '\n' + base_indent + indent
     code = f"""{base_indent}{template_def}
-{base_indent}{fn_signature} {{ // {inp_type} -> {out_type}
-{body_indent}{body_indent.join(pragmas)}
+{base_indent}{fn_signature} {{ // {inp_t} -> {out_t}
+{base_indent + indent}{body_indent.join(pragmas)}
 {body_indent}{body_indent.join(ssa_lines)}
 {body_indent}{body_indent.join(output_lines)}
 {base_indent}}}
 """
-    bridge = f"""#include "bridge.h"
-#include "fn.h"
+    bridge = f"""#include "binder_util.hh"
+#include "{fn_name}.hh"
+struct {fn_name}_config {{
+    static const size_t N_inp = {n_in};
+    static const size_t N_out = {n_out};
+    typedef {inp_t} inp_t;
+    typedef {out_t} out_t;
+    constexpr static auto f = {fn_name}<inp_t, out_t>;
+}};
 extern "C" {{
-void bridge(double *inp, double *out, int size) {{
-    auto fn = {fn_name}<{inp_type}, {out_type}>;
-    vitis_bridge<{inp_type}, {out_type}, {n_in}, {n_out}>(fn, inp, out, size);
+bool openmp_enabled() {{
+    return _openmp;
+}}
+void inference_f64(double *inp, double *out, size_t size) {{
+    batch_inference<{fn_name}_config, double>(inp, out, size);
+}}
+void inference_f32(float *inp, float *out, size_t size) {{
+    batch_inference<{fn_name}_config, float>(inp, out, size);
 }}
 }}"""
     return code, bridge

da4ml/codegen/cpp/hls_model.py ADDED Viewed

@@ -0,0 +1,252 @@
+import ctypes
+import os
+import re
+import shutil
+import subprocess
+import sys
+from collections.abc import Sequence
+from pathlib import Path
+from typing import TypeVar
+from uuid import uuid4
+import numpy as np
+from numpy.typing import NDArray
+from da4ml.cmvm.types import Solution
+from da4ml.codegen.cpp.cpp_codegen import cpp_logic_and_bridge_gen, get_io_types
+from ... import codegen
+from ...cmvm.types import _minimal_kif
+T = TypeVar('T', bound=np.floating)
+class HLSModel:
+    def __init__(
+        self,
+        solution: Solution,
+        prj_name: str,
+        path: str | Path,
+        flavor: str = 'vitis',
+        print_latency: bool = True,
+        part_name: str = 'xcvu13p-flga2577-2-e',
+        pragma: Sequence[str] | None = None,
+        clock_period: int = 5,
+        clock_uncertainty: float = 0.1,
+        io_delay_minmax: tuple[float, float] = (0.2, 0.4),
+    ):
+        self._solution = solution
+        self._prj_name = prj_name
+        self._path = Path(path)
+        self._flavor = flavor.lower()
+        assert self._flavor in ('vitis', 'hlslib'), f'Unsupported HLS flavor: {self._flavor}'
+        self._print_latency = print_latency
+        self._part_name = part_name
+        self._clock_period = clock_period
+        self._clock_uncertainty = clock_uncertainty
+        self._io_delay_minmax = io_delay_minmax
+        self.__src_root = Path(codegen.__file__).parent
+        self._lib = None
+        self._uuid = None
+        if pragma is None:
+            if self._flavor == 'vitis':
+                self._pragma = (
+                    '#pragma HLS ARRAY_PARTITION variable=inp complete',
+                    '#pragma HLS ARRAY_PARTITION variable=out complete',
+                    '#pragma HLS PIPELINE II=1',
+                )
+            else:
+                self._pragma = ()
+        else:
+            self._pragma = tuple(pragma)
+    def write(self):
+        if not self._path.exists():
+            self._path.mkdir(parents=True, exist_ok=True)
+        template_def, bridge = cpp_logic_and_bridge_gen(
+            self._solution,
+            self._prj_name,
+            self._flavor,
+            ['#pragma HLS INLINE'],
+            4,
+            0,
+            self._print_latency,
+        )
+        headers = ['#pragma once', '#include "bitshift.hh"']
+        inp_type, out_type = get_io_types(self._solution, self._flavor)
+        n_in, n_out = len(self._solution.inp_qint), len(self._solution.out_qint)
+        template_signature = (
+            f'template <typename inp_t, typename out_t>\nvoid {self._prj_name}(inp_t inp[{n_in}], out_t out[{n_out}]);'
+        )
+        fn_signature = f'void {self._prj_name}_fn({inp_type} inp[{n_in}], {out_type} out[{n_out}])'
+        with open(self._path / f'{self._prj_name}.hh', 'w') as f:
+            f.write('\n'.join(headers) + '\n\n')
+            f.write(f'{template_signature}\n\n{fn_signature};\n')
+        pragma_str = '\n'.join(self._pragma)
+        cpp_def = f"""
+#include "{self._prj_name}.hh"
+{template_def}
+{fn_signature} {{
+{pragma_str}
+    {self._prj_name}<{inp_type}, {out_type}>(inp, out);
+}}
+"""
+        with open(self._path / f'{self._prj_name}.cc', 'w') as f:
+            f.write(cpp_def)
+        with open(self._path / f'{self._prj_name}_bridge.cc', 'w') as f:
+            f.write(bridge)
+        shutil.copy(self.__src_root / 'cpp/source/binder_util.hh', self._path)
+        shutil.copy(self.__src_root / f'cpp/source/{self._flavor}_bitshift.hh', self._path / 'bitshift.hh')
+        shutil.copy(self.__src_root / 'cpp/source/build_binder.mk', self._path)
+        if self._flavor == 'vitis':
+            shutil.copytree(self.__src_root / 'cpp/source/ap_types', self._path / 'ap_types', dirs_exist_ok=True)
+        else:
+            pass
+        self._solution.save(self._path / 'project.json')
+    def _compile(self, verbose=False, openmp=True, o3: bool = False, clean=True):
+        """Same as compile, but will not write to the library
+        Parameters
+        ----------
+        verbose : bool, optional
+            Verbose output, by default False
+        openmp : bool, optional
+            Enable openmp, by default True
+        o3 : bool | None, optional
+            Turn on -O3 flag, by default False
+        clean : bool, optional
+            Remove obsolete shared object files, by default True
+        Raises
+        ------
+        RuntimeError
+            If compilation fails
+        """
+        self._uuid = str(uuid4())
+        args = ['make', '-f', 'build_binder.mk']
+        env = os.environ.copy()
+        env['PRJ_NAME'] = self._prj_name
+        env['STAMP'] = self._uuid
+        env['EXTRA_CXXFLAGS'] = '-fopenmp' if openmp else ''
+        if o3:
+            args.append('fast')
+        if clean:
+            m = re.compile(r'^lib.*[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\.so$')
+            for p in self._path.iterdir():
+                if not p.is_dir() and m.match(p.name):
+                    p.unlink()
+        try:
+            r = subprocess.run(args, env=env, check=True, cwd=self._path, capture_output=not verbose)
+        except subprocess.CalledProcessError as e:
+            print(e.stderr.decode(), file=sys.stderr)
+            print(e.stdout.decode(), file=sys.stdout)
+            raise RuntimeError('Compilation failed!!') from e
+        if r.returncode != 0:
+            print(r.stderr.decode(), file=sys.stderr)
+            print(r.stdout.decode(), file=sys.stderr)
+            raise RuntimeError('Compilation failed!!')
+        self._load_lib(self._uuid)
+    def _load_lib(self, uuid: str | None = None):
+        uuid = uuid if uuid is not None else self._uuid
+        self._uuid = uuid
+        lib_path = self._path / f'lib{self._prj_name}_{uuid}.so'
+        if not lib_path.exists():
+            raise RuntimeError(f'Library {lib_path} does not exist')
+        self._lib = ctypes.CDLL(str(lib_path))
+    def compile(self, verbose=False, openmp=True, o3: bool = False, clean=True):
+        """Compile the model to a shared object file
+        Parameters
+        ----------
+        verbose : bool, optional
+            Verbose output, by default False
+        openmp : bool, optional
+            Enable openmp, by default True
+        o3 : bool | None, optional
+            Turn on -O3 flag, by default False
+        clean : bool, optional
+            Remove obsolete shared object files, by default True
+        Raises
+        ------
+        RuntimeError
+            If compilation fails
+        """
+        self.write()
+        self._compile(verbose, openmp, o3, clean)
+    def predict(self, data: NDArray[T]) -> NDArray[T]:
+        """Run the model on the input data.
+        Parameters
+        ----------
+        data : NDArray[np.floating]
+            Input data to the model. The shape is ignored, and the number of samples is
+            determined by the size of the data.
+        Returns
+        -------
+        NDArray[np.floating]
+            Output of the model in shape (n_samples, output_size).
+        """
+        assert self._lib is not None, 'Library not loaded, call .compile() first.'
+        inp_size, out_size = self._solution.shape
+        dtype = data.dtype
+        if dtype not in (np.float32, np.float64):
+            raise TypeError(f'Unsupported input data type: {dtype}. Expected float32 or float64.')
+        c_dtype = ctypes.c_float if dtype == np.float32 else ctypes.c_double
+        assert data.size % inp_size == 0, f'Input size {data.size} is not divisible by {inp_size}'
+        n_sample = data.size // inp_size
+        inp_data = np.ascontiguousarray(data)
+        out_data = np.empty(n_sample * out_size, dtype=dtype)
+        inp_buf = inp_data.ctypes.data_as(ctypes.POINTER(c_dtype))
+        out_buf = out_data.ctypes.data_as(ctypes.POINTER(c_dtype))
+        if dtype == np.float32:
+            self._lib.inference_f32(inp_buf, out_buf, n_sample)
+        else:
+            self._lib.inference_f64(inp_buf, out_buf, n_sample)
+        return out_data.reshape(n_sample, out_size)  # type: ignore
+    def __repr__(self):
+        inp_size, out_size = self._solution.shape
+        inp_size, out_size = self._solution.shape
+        cost = round(self._solution.cost)
+        inp_kifs = tuple(zip(*map(_minimal_kif, self._solution.inp_qint)))
+        out_kifs = tuple(zip(*map(_minimal_kif, self._solution.out_qint)))
+        in_bits, out_bits = np.sum(inp_kifs), np.sum(out_kifs)
+        spec = f"""Top Function: {self._prj_name}\n====================
+{inp_size} ({in_bits} bits) -> {out_size} ({out_bits} bits)
+combinational @ delay={self._solution.latency}
+Estimated cost: {cost} LUTs"""
+        is_compiled = self._lib is not None
+        if is_compiled:
+            assert self._uuid is not None
+            openmp = 'with OpenMP' if self._lib.openmp_enabled() else ''  # type: ignore
+            spec += f'\nEmulator is compiled {openmp} ({self._uuid[-12:]})'
+        else:
+            spec += '\nEmulator is **not compiled**'
+        return spec

da4ml/codegen/cpp/source/ap_types/ap_binary.h ADDED Viewed

@@ -0,0 +1,78 @@
+/*
+ * Copyright 2024-2024 Chang Sun
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __AP_BINARY_H__
+#define __AP_BINARY_H__
+#include <ap_fixed.h>
+#include <cassert>
+struct ap_binary {
+    bool is_one;
+    INLINE ap_binary() {}
+    INLINE ap_binary(const bool value) : is_one(value) {}
+    INLINE ap_binary(const ap_binary &value) : is_one(value.is_one) {}
+    INLINE operator int() const { return is_one ? 1 : -1; }
+    INLINE operator float() const { return is_one ? 1.0 : -1.0; }
+    template <typename T> INLINE ap_binary(T value) : is_one(value >= 0) {}
+    template <typename T>
+    INLINE auto operator=(T value) -> decltype(std::enable_if_t<std::is_same<T, ap_binary>::value, int>()) {
+        is_one = value.is_one;
+        return 0;
+    }
+    template <typename T>
+    INLINE auto operator=(T value) -> decltype(std::enable_if_t<!std::is_same<T, ap_binary>::value, int>()) {
+        is_one = value >= 0;
+        return 0;
+    }
+    INLINE ap_fixed<2, 1> value() const { return is_one ? 1 : -1; }
+    template <typename T> INLINE bool operator==(T value) const { return value() == value; }
+    template <typename T> INLINE bool operator!=(T value) const { return value() != value; }
+    template <typename T> INLINE bool operator<(T value) const { return value() < value; }
+    template <typename T> INLINE bool operator<=(T value) const { return value() <= value; }
+    template <typename T> INLINE bool operator>(T value) const { return value() > value; }
+    template <typename T> INLINE bool operator>=(T value) const { return value() >= value; }
+    template <typename T> INLINE ap_binary operator+(T value) const { return ap_binary(is_one || value.is_one); }
+    template <typename T> INLINE ap_binary operator*(T value) const { return ap_binary(is_one && value.is_one); }
+    template <typename T> INLINE ap_binary operator-(T value) const { return ap_binary(is_one && !value.is_one); }
+    template <typename T> INLINE T operator+(T value) { return value + value(); }
+    template <typename T> INLINE T operator*(T value) { return value * value(); }
+    template <typename T> INLINE T operator-(T value) { return value - value(); }
+};
+typedef ap_fixed<2, 1, AP_RND_CONV, AP_SAT_SYM> ap_ternary;
+#endif

da4ml 0.2.1__py3-none-any.whl → 0.3.0.post1__py3-none-any.whl

Potentially problematic release.

da4ml 0.2.1py3-none-any.whl → 0.3.0.post1py3-none-any.whl