PyPI - da4ml - Versions diffs - 0.3.2__tar.gz → 0.4.0__tar.gz - Mend

da4ml 0.3.2tar.gz → 0.4.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of da4ml might be problematic. Click here for more details.

Files changed (106) hide show

{da4ml-0.3.2/src/da4ml.egg-info → da4ml-0.4.0}/PKG-INFO RENAMED Viewed

@@ -1,7 +1,7 @@
 Metadata-Version: 2.4
 Name: da4ml
-Version: 0.3.2
-Summary: Digital Arithmetic for Machine Learning
+Version: 0.4.0
+Summary: Distributed Arithmetic for Machine Learning
 Author-email: Chang Sun <chsun@cern.ch>
 License: GNU Lesser General Public License v3 (LGPLv3)
 Project-URL: repository, https://github.com/calad0i/da4ml

{da4ml-0.3.2 → da4ml-0.4.0}/docs/faq.md RENAMED Viewed

@@ -5,6 +5,9 @@ Two things:
 1. Converting constant-matrix-vector multiplications (CMVMs) into optimized adder graphs with distributed arithmetic for FPGA implementation.
 2. Converting (a part of) neural networks to fully parallel HDL or HLS with the CMVM optimization above.
+## Should I use the standalone flow or the hls4ml-integrated flow?
+If the network is supported by da4ml standalone, it is **recommended to use the standalone flow**. In most cases, the standalone flow gives better latency and timing, and is orders of magnitude faster in synthesis time. However, in some occasions, the hls4ml-integrated flow could provide better timing when the routing is highly challenging for the standalone flow. If the network is not supported by da4ml standalone (e.g., contains unsupported layers or operations), then the hls4ml-integrated flow is the only option.
 ## So does da4ml only work with neural networks with II=1?
 No. When integrated with hls4ml, da4ml only requires that **each CMVM operation is unrolled (II=1)**. This is different from unrolling the whole model, e.g., convolution layers can still have II>1 by reusing the same CMVM kernel for different input windows.

{da4ml-0.3.2 → da4ml-0.4.0}/docs/getting_started.md RENAMED Viewed

@@ -1,6 +1,6 @@
 # Getting Started with da4ml
-da4ml can be used in three different ways:
+da4ml can be used in three different ways. When standalone code generation, it is recommended to use the functional API or HGQ2 integration. See [FAQ](./faq.html) for more details on when to use which flow.
 ## functional API:

{da4ml-0.3.2 → da4ml-0.4.0}/pyproject.toml RENAMED Viewed

@@ -5,7 +5,7 @@ requires = [ "setuptools>=67.8", "setuptools-scm>=8" ]
 [project]
 name = "da4ml"
-description = "Digital Arithmetic for Machine Learning"
+description = "Distributed Arithmetic for Machine Learning"
 readme = "README.md"
 keywords = [
   "CMVM",

{da4ml-0.3.2 → da4ml-0.4.0}/src/da4ml/_version.py RENAMED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '0.3.2'
-__version_tuple__ = version_tuple = (0, 3, 2)
+__version__ = version = '0.4.0'
+__version_tuple__ = version_tuple = (0, 4, 0)
-__commit_id__ = commit_id = 'g01e84ad19'
+__commit_id__ = commit_id = 'gb2796d8af'

da4ml-0.4.0/src/da4ml/codegen/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+from .hls import HLSModel
+from .rtl import RTLModel, VerilogModel, VHDLModel
+__all__ = [
+    'HLSModel',
+    'VerilogModel',
+    'VHDLModel',
+    'RTLModel',
+]

da4ml-0.4.0/src/da4ml/codegen/hls/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .hls_codegen import hls_logic_and_bridge_gen
+from .hls_model import HLSModel
+__all__ = ['hls_logic_and_bridge_gen', 'HLSModel']

da4ml-0.3.2/src/da4ml/codegen/cpp/cpp_codegen.py → da4ml-0.4.0/src/da4ml/codegen/hls/hls_codegen.py RENAMED Viewed

@@ -16,12 +16,19 @@ def kif_to_hlslib_type(k: bool | int = 1, i: int = 0, f: int = 0):
     return f'ac_fixed<{int(k)},{k + i + f},{k + i}>'
+def kif_to_oneapi_type(k: bool | int = 1, i: int = 0, f: int = 0):
+    # OneAPI requires at least 2 bits for all ac_fixed as of 2025.1
+    return f'ac_fixed<{int(k)},{max(k + i + f, 2)},{k + i}>'
 def get_typestr_fn(flavor: str):
     match flavor.lower():
         case 'vitis':
             typestr_fn = kif_to_vitis_type
         case 'hlslib':
             typestr_fn = kif_to_hlslib_type
+        case 'oneapi':
+            typestr_fn = kif_to_oneapi_type
         case _:
             raise ValueError(f'Unsupported flavor: {flavor}')
     return typestr_fn
@@ -46,18 +53,18 @@ def ssa_gen(sol: Solution, print_latency: bool, typestr_fn: Callable[[bool | int
         match op.opcode:
             case -1:
                 # Input marker
-                val = f'inp[{ops[op.id0].id0}]'
+                val = f'model_inp[{op.id0}]'
             case 0 | 1:
                 # Common a+/-b<<shift op
                 ref1 = f'bit_shift<{op.data}>(v{op.id1})' if op.data != 0 else f'v{op.id1}'
                 val = f'{ref0} {"-" if op.opcode == 1 else "+"} {ref1}'
             case 2 | -2:
-                if op.opcode == 2:  # relu(inp)
+                if op.opcode == 2:  # relu(model_inp)
                     if ops[op.id0].qint.min < 0:
                         val = f'{ref0} > 0 ? {_type}({ref0}) : {_type}(0)'
                     else:
                         val = ref0
-                else:  # relu(-inp)
+                else:  # relu(-model_inp)
                     if ops[op.id0].qint.max > 0:
                         val = f'{ref0} > 0 ? {_type}(0) : {_type}(-{ref0})'
                     else:
@@ -105,15 +112,15 @@ def output_gen(sol: Solution, typestr_fn: Callable[[bool | int, int, int], str])
     lines = []
     for i, idx in enumerate(sol.out_idxs):
         if idx < 0:
-            lines.append(f'out[{i}] = 0;')
+            lines.append(f'model_out[{i}] = 0;')
             continue
         _type = typestr_fn(*_minimal_kif(sol.out_qint[i]))
         shift = sol.out_shifts[i]
         neg_str = '-' if sol.out_negs[i] else ''
         if shift == 0:
-            lines.append(f'out[{i}] = {_type}({neg_str}v{idx});')
+            lines.append(f'model_out[{i}] = {_type}({neg_str}v{idx});')
         else:
-            lines.append(f'out[{i}] = {_type}({neg_str}bit_shift<{shift}>(v{idx}));')
+            lines.append(f'model_out[{i}] = {_type}({neg_str}bit_shift<{shift}>(v{idx}));')
     return lines
@@ -126,7 +133,7 @@ def get_io_types(sol: Solution, flavor: str):
     return inp_type, out_type
-def cpp_logic_and_bridge_gen(
+def hls_logic_and_bridge_gen(
     sol: Solution,
     fn_name: str,
     flavor: str,
@@ -140,7 +147,7 @@ def cpp_logic_and_bridge_gen(
     n_in, n_out = sol.shape
     template_def = 'template <typename inp_t, typename out_t>'
-    fn_signature = f'void {fn_name}(inp_t inp[{n_in}], out_t out[{n_out}])'
+    fn_signature = f'void {fn_name}(inp_t model_inp[{n_in}], out_t model_out[{n_out}])'
     pragmas = pragmas or []
     ssa_lines = ssa_gen(sol, print_latency=print_latency, typestr_fn=typestr_fn)
@@ -173,12 +180,12 @@ bool openmp_enabled() {{
     return _openmp;
 }}
-void inference_f64(double *inp, double *out, size_t size) {{
-    batch_inference<{fn_name}_config, double>(inp, out, size);
+void inference_f64(double *model_inp, double *model_out, size_t size) {{
+    batch_inference<{fn_name}_config, double>(model_inp, model_out, size);
 }}
-void inference_f32(float *inp, float *out, size_t size) {{
-    batch_inference<{fn_name}_config, float>(inp, out, size);
+void inference_f32(float *model_inp, float *model_out, size_t size) {{
+    batch_inference<{fn_name}_config, float>(model_inp, model_out, size);
 }}
 }}"""
     return code, bridge

{da4ml-0.3.2/src/da4ml/codegen/cpp → da4ml-0.4.0/src/da4ml/codegen/hls}/hls_model.py RENAMED Viewed

@@ -13,7 +13,7 @@ import numpy as np
 from numpy.typing import NDArray
 from da4ml.cmvm.types import Solution
-from da4ml.codegen.cpp.cpp_codegen import cpp_logic_and_bridge_gen, get_io_types
+from da4ml.codegen.hls.hls_codegen import get_io_types, hls_logic_and_bridge_gen
 from ... import codegen
 from ...cmvm.types import _minimal_kif
@@ -39,7 +39,7 @@ class HLSModel:
         self._prj_name = prj_name
         self._path = Path(path)
         self._flavor = flavor.lower()
-        assert self._flavor in ('vitis', 'hlslib'), f'Unsupported HLS flavor: {self._flavor}'
+        assert self._flavor in ('vitis', 'hlslib', 'oneapi'), f'Unsupported HLS flavor: {self._flavor}'
         self._print_latency = print_latency
         self._part_name = part_name
         self._clock_period = clock_period
@@ -64,7 +64,7 @@ class HLSModel:
     def write(self):
         if not self._path.exists():
             self._path.mkdir(parents=True, exist_ok=True)
-        template_def, bridge = cpp_logic_and_bridge_gen(
+        template_def, bridge = hls_logic_and_bridge_gen(
             self._solution,
             self._prj_name,
             self._flavor,
@@ -104,11 +104,11 @@ class HLSModel:
         with open(self._path / f'{self._prj_name}_bridge.cc', 'w') as f:
             f.write(bridge)
-        shutil.copy(self.__src_root / 'cpp/source/binder_util.hh', self._path)
-        shutil.copy(self.__src_root / f'cpp/source/{self._flavor}_bitshift.hh', self._path / 'bitshift.hh')
-        shutil.copy(self.__src_root / 'cpp/source/build_binder.mk', self._path)
+        shutil.copy(self.__src_root / 'hls/source/binder_util.hh', self._path)
+        shutil.copy(self.__src_root / f'hls/source/{self._flavor}_bitshift.hh', self._path / 'bitshift.hh')
+        shutil.copy(self.__src_root / 'hls/source/build_binder.mk', self._path)
         if self._flavor == 'vitis':
-            shutil.copytree(self.__src_root / 'cpp/source/ap_types', self._path / 'ap_types', dirs_exist_ok=True)
+            shutil.copytree(self.__src_root / 'hls/source/ap_types', self._path / 'ap_types', dirs_exist_ok=True)
         else:
             pass

da4ml-0.4.0/src/da4ml/codegen/hls/source/binder_util.hh ADDED Viewed

@@ -0,0 +1,50 @@
+#pragma once
+#include <cstddef>
+#ifdef _OPENMP
+#include <algorithm>
+#include <omp.h>
+constexpr bool _openmp = true;
+#else
+constexpr bool _openmp = false;
+#endif
+template <typename CONFIG_T, typename T> void _inference(T *c_inp, T *c_out, size_t n_samples) {
+    typename CONFIG_T::inp_t in_fixed_buf[CONFIG_T::N_inp];
+    typename CONFIG_T::out_t out_fixed_buf[CONFIG_T::N_out];
+    for (size_t i = 0; i < n_samples; ++i) {
+        size_t offset_in = i * CONFIG_T::N_inp;
+        size_t offset_out = i * CONFIG_T::N_out;
+        for (size_t j = 0; j < CONFIG_T::N_inp; ++j) {
+            in_fixed_buf[j] = c_inp[offset_in + j];
+        }
+        CONFIG_T::f(in_fixed_buf, out_fixed_buf);
+        for (size_t j = 0; j < CONFIG_T::N_out; ++j) {
+            c_out[offset_out + j] = out_fixed_buf[j];
+        }
+    }
+}
+template <typename CONFIG_T, typename T> void batch_inference(T *c_inp, T *c_out, size_t n_samples) {
+#ifdef _OPENMP
+    size_t n_max_threads = omp_get_max_threads();
+    size_t n_samples_per_thread = std::max<size_t>(n_samples / n_max_threads, 32);
+    size_t n_thread = n_samples / n_samples_per_thread;
+    n_thread += (n_samples % n_samples_per_thread) ? 1 : 0;
+#pragma omp parallel for num_threads(n_thread) schedule(static)
+    for (size_t i = 0; i < n_thread; ++i) {
+        size_t start = i * n_samples_per_thread;
+        size_t end = std::min<size_t>(start + n_samples_per_thread, n_samples);
+        size_t n_samples_this_thread = end - start;
+        size_t offset_in = start * CONFIG_T::N_inp;
+        size_t offset_out = start * CONFIG_T::N_out;
+        _inference<CONFIG_T, T>(&c_inp[offset_in], &c_out[offset_out], n_samples_this_thread);
+    }
+#else
+    _inference<CONFIG_T, T>(c_inp, c_out, n_samples);
+#endif
+}

{da4ml-0.3.2/src/da4ml/codegen/cpp → da4ml-0.4.0/src/da4ml/codegen/hls}/source/vitis_bitshift.hh RENAMED Viewed

@@ -1,14 +1,16 @@
 #pragma once
-#include "ap_types/ap_fixed.h"
+#include "ap_fixed.h"
-template <int s, int b, int i, ap_q_mode Q, ap_o_mode O, int N> ap_fixed<b, i + s> bit_shift(ap_fixed<b, i, Q, O, N> x) {
+template <int s, int b, int i, ap_q_mode Q, ap_o_mode O, int N>
+ap_fixed<b, i + s> bit_shift(ap_fixed<b, i, Q, O, N> x) {
 #pragma HLS INLINE
     ap_fixed<b, i + s> r;
     r.range() = x.range();
     return r;
 };
-template <int s, int b, int i, ap_q_mode Q, ap_o_mode O, int N> ap_ufixed<b, i + s> bit_shift(ap_ufixed<b, i, Q, O, N> x) {
+template <int s, int b, int i, ap_q_mode Q, ap_o_mode O, int N>
+ap_ufixed<b, i + s> bit_shift(ap_ufixed<b, i, Q, O, N> x) {
 #pragma HLS INLINE
     ap_ufixed<b, i + s> r;
     r.range() = x.range();

da4ml-0.4.0/src/da4ml/codegen/rtl/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+from .rtl_model import RTLModel, VerilogModel, VHDLModel
+from .verilog import comb_logic_gen as verilog_comb_logic_gen
+from .verilog import generate_io_wrapper as verilog_generate_io_wrapper
+from .vhdl import comb_logic_gen as vhdl_comb_logic_gen
+from .vhdl import generate_io_wrapper as vhdl_generate_io_wrapper
+__all__ = [
+    'RTLModel',
+    'VerilogModel',
+    'VHDLModel',
+    'verilog_comb_logic_gen',
+    'verilog_generate_io_wrapper',
+    'vhdl_comb_logic_gen',
+    'vhdl_generate_io_wrapper',
+]

{da4ml-0.3.2/src/da4ml/codegen/verilog/source → da4ml-0.4.0/src/da4ml/codegen/rtl/common_source}/binder_util.hh RENAMED Viewed

@@ -19,7 +19,7 @@ std::enable_if_t<CONFIG_T::II != 0> _inference(int32_t *c_inp, int32_t *c_out, s
         if (t_inp < n_samples * CONFIG_T::II && t_inp % CONFIG_T::II == 0) {
             write_input<CONFIG_T::N_inp, CONFIG_T::max_inp_bw>(
-                dut->inp, &c_inp[t_inp / CONFIG_T::II * CONFIG_T::N_inp]
+                dut->model_inp, &c_inp[t_inp / CONFIG_T::II * CONFIG_T::N_inp]
             );
         }
@@ -28,7 +28,7 @@ std::enable_if_t<CONFIG_T::II != 0> _inference(int32_t *c_inp, int32_t *c_out, s
         if (t_inp > CONFIG_T::latency && t_out % CONFIG_T::II == 0) {
             read_output<CONFIG_T::N_out, CONFIG_T::max_out_bw>(
-                dut->out, &c_out[t_out / CONFIG_T::II * CONFIG_T::N_out]
+                dut->model_out, &c_out[t_out / CONFIG_T::II * CONFIG_T::N_out]
             );
         }
@@ -44,9 +44,9 @@ std::enable_if_t<CONFIG_T::II == 0> _inference(int32_t *c_inp, int32_t *c_out, s
     auto dut = std::make_unique<typename CONFIG_T::dut_t>();
     for (size_t i = 0; i < n_samples; ++i) {
-        write_input<CONFIG_T::N_inp, CONFIG_T::max_inp_bw>(dut->inp, &c_inp[i * CONFIG_T::N_inp]);
+        write_input<CONFIG_T::N_inp, CONFIG_T::max_inp_bw>(dut->model_inp, &c_inp[i * CONFIG_T::N_inp]);
         dut->eval();
-        read_output<CONFIG_T::N_out, CONFIG_T::max_out_bw>(dut->out, &c_out[i * CONFIG_T::N_out]);
+        read_output<CONFIG_T::N_out, CONFIG_T::max_out_bw>(dut->model_out, &c_out[i * CONFIG_T::N_out]);
     }
     dut->final();

{da4ml-0.3.2/src/da4ml/codegen/verilog/source → da4ml-0.4.0/src/da4ml/codegen/rtl/common_source}/build_binder.mk RENAMED Viewed

@@ -7,10 +7,16 @@ CFLAGS = -std=c++17 -fPIC
 LINKFLAGS = $(INCLUDES) $(WARNINGS)
 LIBNAME = lib$(VM_PREFIX)_$(STAMP).so
 N_JOBS ?= $(shell nproc)
+VERILATOR_FLAGS ?=
+$(VM_PREFIX).v: $(wildcard $(VM_PREFIX).vhd)
+# vhdl specific - convert to verilog first for verilating
+	mkdir -p obj_dir
+	ghdl -a --std=08 --workdir=obj_dir multiplier.vhd mux.vhd negative.vhd shift_adder.vhd $(wildcard $(VM_PREFIX:_wrapper=)_stage*.vhd) $(wildcard $(VM_PREFIX:_wrapper=).vhd) $(VM_PREFIX).vhd
+	ghdl synth --std=08 --workdir=obj_dir --out=verilog $(VM_PREFIX) > $(VM_PREFIX).v
 ./obj_dir/libV$(VM_PREFIX).a ./obj_dir/libverilated.a ./obj_dir/V$(VM_PREFIX)__ALL.a: $(VM_PREFIX).v
-	verilator --cc -j $(N_JOBS) -Wall -build $(VM_PREFIX).v --prefix V$(VM_PREFIX) -CFLAGS "$(CFLAGS)"
+	verilator --cc -j $(N_JOBS) -build $(VM_PREFIX).v --prefix V$(VM_PREFIX) $(VERILATOR_FLAGS) -CFLAGS "$(CFLAGS)"
 $(LIBNAME): ./obj_dir/libV$(VM_PREFIX).a ./obj_dir/libverilated.a ./obj_dir/V$(VM_PREFIX)__ALL.a $(VM_PREFIX)_binder.cc
 	$(CXX) $(CFLAGS) $(LINKFLAGS) $(CXXFLAGS2) -pthread -shared -o $(LIBNAME) $(VM_PREFIX)_binder.cc ./obj_dir/libV$(VM_PREFIX).a ./obj_dir/libverilated.a ./obj_dir/V$(VM_PREFIX)__ALL.a $(EXTRA_CXXFLAGS)

{da4ml-0.3.2/src/da4ml/codegen/verilog/source → da4ml-0.4.0/src/da4ml/codegen/rtl/common_source}/build_prj.tcl RENAMED Viewed

@@ -1,20 +1,41 @@
 set project_name "${PROJECT_NAME}"
 set device "${DEVICE}"
+set source_type "${SOURCE_TYPE}"
 set top_module "${project_name}"
 set output_dir "./output_${project_name}"
 create_project $project_name "${output_dir}/$project_name" -force -part $device
-set_property TARGET_LANGUAGE Verilog [current_project]
 set_property DEFAULT_LIB work [current_project]
-read_verilog "${project_name}.v"
-read_verilog "shift_adder.v"
-read_verilog "negative.v"
-read_verilog "mux.v"
-foreach file [glob -nocomplain "${project_name}_stage*.v"] {
-    read_verilog $file
+if { $source_type != "vhdl" && $source_type != "verilog" } {
+    puts "Error: SOURCE_TYPE must be either 'vhdl' or 'verilog'."
+    exit 1
+}
+if { $source_type == "vhdl" } {
+    set_property TARGET_LANGUAGE VHDL [current_project]
+    read_vhdl -vhdl2008 "${project_name}.vhd"
+    read_vhdl -vhdl2008 "shift_adder.vhd"
+    read_vhdl -vhdl2008 "negative.vhd"
+    read_vhdl -vhdl2008 "mux.vhd"
+    read_vhdl -vhdl2008 "multiplier.vhd"
+    foreach file [glob -nocomplain "${project_name}_stage*.vhd"] {
+        read_vhdl -vhdl2008 $file
+    }
+} else {
+    set_property TARGET_LANGUAGE Verilog [current_project]
+    read_verilog "${project_name}.v"
+    read_verilog "shift_adder.v"
+    read_verilog "negative.v"
+    read_verilog "mux.v"
+    read_verilog "multiplier.v"
+    foreach file [glob -nocomplain "${project_name}_stage*.v"] {
+        read_verilog $file
+    }
 }
 read_xdc "${project_name}.xdc" -mode out_of_context

da4ml-0.3.2/src/da4ml/codegen/verilog/verilog_model.py → da4ml-0.4.0/src/da4ml/codegen/rtl/rtl_model.py RENAMED Viewed

@@ -10,10 +10,9 @@ from uuid import uuid4
 import numpy as np
 from numpy.typing import NDArray
-from ... import codegen
 from ...cmvm.types import CascadedSolution, Solution, _minimal_kif
 from ...trace.pipeline import to_pipeline
-from . import binder_gen, comb_logic_gen, generate_io_wrapper, pipeline_logic_gen
+from .. import rtl
 def get_io_kifs(sol: Solution | CascadedSolution):
@@ -22,12 +21,13 @@ def get_io_kifs(sol: Solution | CascadedSolution):
     return np.array(inp_kifs, np.int8), np.array(out_kifs, np.int8)
-class VerilogModel:
+class RTLModel:
     def __init__(
         self,
         solution: Solution | CascadedSolution,
         prj_name: str,
         path: str | Path,
+        flavor: str = 'verilog',
         latency_cutoff: float = -1,
         print_latency: bool = True,
         part_name: str = 'xcvu13p-flga2577-2-e',
@@ -36,18 +36,21 @@ class VerilogModel:
         io_delay_minmax: tuple[float, float] = (0.2, 0.4),
         register_layers: int = 1,
     ):
+        self._flavor = flavor.lower()
         self._solution = solution
         self._path = Path(path)
         self._prj_name = prj_name
         self._latency_cutoff = latency_cutoff
         self._print_latency = print_latency
-        self.__src_root = Path(codegen.__file__).parent
+        self.__src_root = Path(rtl.__file__).parent
         self._part_name = part_name
         self._clock_period = clock_period
         self._clock_uncertainty = clock_uncertainty
         self._io_delay_minmax = io_delay_minmax
         self._register_layers = register_layers
+        assert self._flavor in ('vhdl', 'verilog'), f'Unsupported flavor {flavor}, only vhdl and verilog are supported.'
         self._pipe = solution if isinstance(solution, CascadedSolution) else None
         if latency_cutoff > 0 and self._pipe is None:
             assert isinstance(solution, Solution)
@@ -62,16 +65,23 @@ class VerilogModel:
         self._uuid = None
     def write(self):
+        flavor = self._flavor
+        suffix = 'v' if flavor == 'verilog' else 'vhd'
+        if flavor == 'vhdl':
+            from .vhdl import binder_gen, comb_logic_gen, generate_io_wrapper, pipeline_logic_gen
+        else:  # verilog
+            from .verilog import binder_gen, comb_logic_gen, generate_io_wrapper, pipeline_logic_gen
         self._path.mkdir(parents=True, exist_ok=True)
         if self._pipe is not None:  # Pipeline
             # Main logic
             codes = pipeline_logic_gen(self._pipe, self._prj_name, self._print_latency, register_layers=self._register_layers)
             for k, v in codes.items():
-                with open(self._path / f'{k}.v', 'w') as f:
+                with open(self._path / f'{k}.{suffix}', 'w') as f:
                     f.write(v)
             # Build script
-            with open(self.__src_root / 'verilog/source/build_prj.tcl') as f:
+            with open(self.__src_root / 'common_source/build_prj.tcl') as f:
                 tcl = f.read()
             tcl = tcl.replace('${DEVICE}', self._part_name)
             tcl = tcl.replace('${PROJECT_NAME}', self._prj_name)
@@ -79,7 +89,7 @@ class VerilogModel:
                 f.write(tcl)
             # XDC
-            with open(self.__src_root / 'verilog/source/template.xdc') as f:
+            with open(self.__src_root / 'common_source/template.xdc') as f:
                 xdc = f.read()
             xdc = xdc.replace('${CLOCK_PERIOD}', str(self._clock_period))
             xdc = xdc.replace('${UNCERTAINITY_SETUP}', str(self._clock_uncertainty))
@@ -89,7 +99,7 @@ class VerilogModel:
             with open(self._path / f'{self._prj_name}.xdc', 'w') as f:
                 f.write(xdc)
-            # C++ binder w/ verilog wrapper for uniform bw
+            # C++ binder w/ HDL wrapper for uniform bw
             binder = binder_gen(self._pipe, f'{self._prj_name}_wrapper', 1, self._register_layers)
             # Verilog IO wrapper (non-uniform bw to uniform one, clk passthrough)
@@ -101,24 +111,25 @@ class VerilogModel:
             # Main logic
             code = comb_logic_gen(self._solution, self._prj_name, self._print_latency, '`timescale 1ns/1ps')
-            with open(self._path / f'{self._prj_name}.v', 'w') as f:
+            with open(self._path / f'{self._prj_name}.{suffix}', 'w') as f:
                 f.write(code)
             # Verilog IO wrapper (non-uniform bw to uniform one, no clk)
             io_wrapper = generate_io_wrapper(self._solution, self._prj_name, False)
             binder = binder_gen(self._solution, f'{self._prj_name}_wrapper')
-        with open(self._path / f'{self._prj_name}_wrapper.v', 'w') as f:
+        with open(self._path / f'{self._prj_name}_wrapper.{suffix}', 'w') as f:
             f.write(io_wrapper)
         with open(self._path / f'{self._prj_name}_wrapper_binder.cc', 'w') as f:
             f.write(binder)
         # Common resource copy
-        for fname in self.__src_root.glob('verilog/source/*.v'):
+        for fname in self.__src_root.glob(f'{flavor}/source/*.{suffix}'):
             shutil.copy(fname, self._path)
-        shutil.copy(self.__src_root / 'verilog/source/build_binder.mk', self._path)
-        shutil.copy(self.__src_root / 'verilog/source/ioutil.hh', self._path)
-        shutil.copy(self.__src_root / 'verilog/source/binder_util.hh', self._path)
+        shutil.copy(self.__src_root / 'common_source/build_binder.mk', self._path)
+        shutil.copy(self.__src_root / 'common_source/ioutil.hh', self._path)
+        shutil.copy(self.__src_root / 'common_source/binder_util.hh', self._path)
         self._solution.save(self._path / 'model.json')
         with open(self._path / 'misc.json', 'w') as f:
             f.write(f'{{"cost": {self._solution.cost}}}')
@@ -152,6 +163,7 @@ class VerilogModel:
         env['VM_PREFIX'] = f'{self._prj_name}_wrapper'
         env['STAMP'] = self._uuid
         env['EXTRA_CXXFLAGS'] = '-fopenmp' if openmp else ''
+        env['VERILATOR_FLAGS'] = '-Wall' if self._flavor == 'verilog' else ''
         if nproc is not None:
             env['N_JOBS'] = str(nproc)
         if o3:
@@ -219,7 +231,7 @@ class VerilogModel:
         self.write()
         self._compile(verbose=verbose, openmp=openmp, nproc=nproc, o3=o3, clean=clean)
-    def predict(self, data: NDArray[np.floating]):
+    def predict(self, data: NDArray[np.floating]) -> NDArray[np.float32]:
         """Run the model on the input data.
         Parameters
@@ -233,6 +245,7 @@ class VerilogModel:
         NDArray[np.float64]
             Output of the model in shape (n_samples, output_size).
         """
         assert self._lib is not None, 'Library not loaded, call .compile() first.'
         inp_size, out_size = self._solution.shape
@@ -258,7 +271,7 @@ class VerilogModel:
         # Unscale the output int32 to recover fp values
         k, i, f = np.max(k_out), np.max(i_out), np.max(f_out)
         a, b, c = 2.0 ** (k + i + f), k * 2.0 ** (i + f), 2.0**-f
-        return ((out_data.reshape(n_sample, out_size) + b) % a - b) * c
+        return ((out_data.reshape(n_sample, out_size) + b) % a - b) * c.astype(np.float32)
     def __repr__(self):
         inp_size, out_size = self._solution.shape
@@ -289,3 +302,61 @@ Estimated cost: {cost} LUTs"""
         else:
             spec += '\nEmulator is **not compiled**'
         return spec
+class VerilogModel(RTLModel):
+    def __init__(
+        self,
+        solution: Solution | CascadedSolution,
+        prj_name: str,
+        path: str | Path,
+        latency_cutoff: float = -1,
+        print_latency: bool = True,
+        part_name: str = 'xcvu13p-flga2577-2-e',
+        clock_period: float = 5,
+        clock_uncertainty: float = 0.1,
+        io_delay_minmax: tuple[float, float] = (0.2, 0.4),
+        register_layers: int = 1,
+    ):
+        self._hdl_model = super().__init__(
+            solution,
+            prj_name,
+            path,
+            'verilog',
+            latency_cutoff,
+            print_latency,
+            part_name,
+            clock_period,
+            clock_uncertainty,
+            io_delay_minmax,
+            register_layers,
+        )
+class VHDLModel(RTLModel):
+    def __init__(
+        self,
+        solution: Solution | CascadedSolution,
+        prj_name: str,
+        path: str | Path,
+        latency_cutoff: float = -1,
+        print_latency: bool = True,
+        part_name: str = 'xcvu13p-flga2577-2-e',
+        clock_period: float = 5,
+        clock_uncertainty: float = 0.1,
+        io_delay_minmax: tuple[float, float] = (0.2, 0.4),
+        register_layers: int = 1,
+    ):
+        self._hdl_model = super().__init__(
+            solution,
+            prj_name,
+            path,
+            'vhdl',
+            latency_cutoff,
+            print_latency,
+            part_name,
+            clock_period,
+            clock_uncertainty,
+            io_delay_minmax,
+            register_layers,
+        )

{da4ml-0.3.2/src/da4ml/codegen → da4ml-0.4.0/src/da4ml/codegen/rtl}/verilog/__init__.py RENAMED Viewed

@@ -1,12 +1,10 @@
 from .comb import comb_logic_gen
 from .io_wrapper import binder_gen, generate_io_wrapper
 from .pipeline import pipeline_logic_gen
-from .verilog_model import VerilogModel
 __all__ = [
     'comb_logic_gen',
     'generate_io_wrapper',
     'pipeline_logic_gen',
     'binder_gen',
-    'VerilogModel',
 ]

da4ml 0.3.2__tar.gz → 0.4.0__tar.gz

Potentially problematic release.

da4ml 0.3.2tar.gz → 0.4.0tar.gz