PyPI - da4ml - Versions diffs - 0.2.1__py3-none-any.whl → 0.3.0.post1__py3-none-any.whl - Mend

da4ml 0.2.1py3-none-any.whl → 0.3.0.post1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of da4ml might be problematic. Click here for more details.

Files changed (55) hide show

da4ml/_version.py +2 -2
da4ml/cmvm/types.py +95 -15
da4ml/codegen/__init__.py +5 -4
da4ml/codegen/cpp/__init__.py +2 -1
da4ml/codegen/cpp/cpp_codegen.py +56 -23
da4ml/codegen/cpp/hls_model.py +252 -0
da4ml/codegen/cpp/source/ap_types/ap_binary.h +78 -0
da4ml/codegen/cpp/source/ap_types/ap_common.h +376 -0
da4ml/codegen/cpp/source/ap_types/ap_decl.h +212 -0
da4ml/codegen/cpp/source/ap_types/ap_fixed.h +360 -0
da4ml/codegen/cpp/source/ap_types/ap_fixed_base.h +2354 -0
da4ml/codegen/cpp/source/ap_types/ap_fixed_ref.h +718 -0
da4ml/codegen/cpp/source/ap_types/ap_fixed_special.h +230 -0
da4ml/codegen/cpp/source/ap_types/ap_int.h +330 -0
da4ml/codegen/cpp/source/ap_types/ap_int_base.h +1885 -0
da4ml/codegen/cpp/source/ap_types/ap_int_ref.h +1346 -0
da4ml/codegen/cpp/source/ap_types/ap_int_special.h +223 -0
da4ml/codegen/cpp/source/ap_types/ap_shift_reg.h +138 -0
da4ml/codegen/cpp/source/ap_types/etc/ap_private.h +7199 -0
da4ml/codegen/cpp/source/ap_types/hls_math.h +27 -0
da4ml/codegen/cpp/source/ap_types/hls_stream.h +263 -0
da4ml/codegen/cpp/source/ap_types/utils/x_hls_utils.h +80 -0
da4ml/codegen/cpp/source/binder_util.hh +56 -0
da4ml/codegen/cpp/source/build_binder.mk +24 -0
da4ml/codegen/cpp/source/{vitis.h → vitis_bitshift.hh} +1 -1
da4ml/codegen/verilog/__init__.py +2 -3
da4ml/codegen/verilog/comb.py +65 -24
da4ml/codegen/verilog/io_wrapper.py +36 -141
da4ml/codegen/verilog/source/binder_util.hh +72 -0
da4ml/codegen/verilog/source/mux.v +58 -0
da4ml/codegen/verilog/source/negative.v +28 -0
da4ml/codegen/verilog/source/shift_adder.v +4 -1
da4ml/codegen/verilog/source/template.xdc +3 -0
da4ml/codegen/verilog/verilog_model.py +36 -12
da4ml/converter/__init__.py +0 -0
da4ml/converter/hgq2/parser.py +105 -0
da4ml/converter/hgq2/replica.py +383 -0
da4ml/trace/__init__.py +2 -2
da4ml/trace/fixed_variable.py +175 -16
da4ml/trace/fixed_variable_array.py +109 -4
da4ml/trace/ops/__init__.py +22 -6
da4ml/trace/ops/conv_utils.py +147 -15
da4ml/trace/ops/einsum_utils.py +9 -6
da4ml/trace/ops/reduce_utils.py +103 -0
da4ml/trace/pipeline.py +36 -34
da4ml/trace/tracer.py +37 -7
da4ml-0.3.0.post1.dist-info/METADATA +107 -0
da4ml-0.3.0.post1.dist-info/RECORD +64 -0
da4ml/codegen/cpp/source/vitis_bridge.h +0 -17
da4ml-0.2.1.dist-info/METADATA +0 -65
da4ml-0.2.1.dist-info/RECORD +0 -39
/da4ml/codegen/verilog/source/{ioutils.hh → ioutil.hh} +0 -0
{da4ml-0.2.1.dist-info → da4ml-0.3.0.post1.dist-info}/WHEEL +0 -0
{da4ml-0.2.1.dist-info → da4ml-0.3.0.post1.dist-info}/licenses/LICENSE +0 -0
{da4ml-0.2.1.dist-info → da4ml-0.3.0.post1.dist-info}/top_level.txt +0 -0

da4ml/codegen/cpp/source/ap_types/hls_math.h ADDED Viewed

@@ -0,0 +1,27 @@
+#ifndef X_HLS_MATH_H
+#define X_HLS_MATH_H
+#include <cmath>
+#include "ap_fixed.h"
+namespace hls {
+template<class T>
+static T exp(const T x) {
+  return (T) std::exp(x.to_double());
+}
+template <typename T> T sin(T x) { return (T) std::sin(x.to_double()); };
+template <typename T> T cos(T x) { return (T) std::cos(x.to_double()); };
+template <typename T> T asin(T x) { return (T) std::asin(x.to_double()); };
+template <typename T> T acos(T x) { return (T) std::acos(x.to_double()); };
+template <typename T> T atan(T x) { return (T) std::atan(x.to_double()); };
+template <typename T> T atan2(T x, T y) { return (T) hls::atan2(x.to_double(), y.to_double()); };
+}
+#endif

da4ml/codegen/cpp/source/ap_types/hls_stream.h ADDED Viewed

@@ -0,0 +1,263 @@
+/*
+#-  (c) Copyright 2011-2018 Xilinx, Inc. All rights reserved.
+#-
+#-  This file contains confidential and proprietary information
+#-  of Xilinx, Inc. and is protected under U.S. and
+#-  international copyright and other intellectual property
+#-  laws.
+#-
+#-  DISCLAIMER
+#-  This disclaimer is not a license and does not grant any
+#-  rights to the materials distributed herewith. Except as
+#-  otherwise provided in a valid license issued to you by
+#-  Xilinx, and to the maximum extent permitted by applicable
+#-  law: (1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND
+#-  WITH ALL FAULTS, AND XILINX HEREBY DISCLAIMS ALL WARRANTIES
+#-  AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, INCLUDING
+#-  BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-
+#-  INFRINGEMENT, OR FITNESS FOR ANY PARTICULAR PURPOSE; and
+#-  (2) Xilinx shall not be liable (whether in contract or tort,
+#-  including negligence, or under any other theory of
+#-  liability) for any loss or damage of any kind or nature
+#-  related to, arising under or in connection with these
+#-  materials, including for any direct, or any indirect,
+#-  special, incidental, or consequential loss or damage
+#-  (including loss of data, profits, goodwill, or any type of
+#-  loss or damage suffered as a result of any action brought
+#-  by a third party) even if such damage or loss was
+#-  reasonably foreseeable or Xilinx had been advised of the
+#-  possibility of the same.
+#-
+#-  CRITICAL APPLICATIONS
+#-  Xilinx products are not designed or intended to be fail-
+#-  safe, or for use in any application requiring fail-safe
+#-  performance, such as life-support or safety devices or
+#-  systems, Class III medical devices, nuclear facilities,
+#-  applications related to the deployment of airbags, or any
+#-  other applications that could lead to death, personal
+#-  injury, or severe property or environmental damage
+#-  (individually and collectively, "Critical
+#-  Applications"). Customer assumes the sole risk and
+#-  liability of any use of Xilinx products in Critical
+#-  Applications, subject only to applicable laws and
+#-  regulations governing limitations on product liability.
+#-
+#-  THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS
+#-  PART OF THIS FILE AT ALL TIMES.
+#- ************************************************************************
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+#ifndef X_HLS_STREAM_SIM_H
+#define X_HLS_STREAM_SIM_H
+/*
+ * This file contains a C++ model of hls::stream.
+ * It defines C simulation model.
+ */
+#ifndef __cplusplus
+#error C++ is required to include this header file
+#else
+//////////////////////////////////////////////
+// C level simulation models for hls::stream
+//////////////////////////////////////////////
+#include <queue>
+#include <iostream>
+#include <typeinfo>
+#include <string>
+#include <sstream>
+#ifdef HLS_STREAM_THREAD_SAFE
+#include <mutex>
+#include <condition_variable>
+#endif
+#ifndef _MSC_VER
+#include <cxxabi.h>
+#include <stdlib.h>
+#endif
+namespace hls {
+template<typename __STREAM_T__>
+class stream
+{
+  protected:
+    std::string _name;
+    std::deque<__STREAM_T__> _data; // container for the elements
+#ifdef HLS_STREAM_THREAD_SAFE
+    std::mutex _mutex;
+    std::condition_variable _condition_var;
+#endif
+  public:
+    /// Constructors
+    // Keep consistent with the synthesis model's constructors
+    stream() {
+        static unsigned _counter = 1;
+        std::stringstream ss;
+#ifndef _MSC_VER
+        char* _demangle_name = abi::__cxa_demangle(typeid(*this).name(), 0, 0, 0);
+        if (_demangle_name) {
+            _name = _demangle_name;
+            free(_demangle_name);
+        }
+        else {
+            _name = "hls_stream";
+        }
+#else
+        _name = typeid(*this).name();
+#endif
+        ss << _counter++;
+        _name += "." + ss.str();
+    }
+    stream(const std::string name) {
+    // default constructor,
+    // capacity set to predefined maximum
+        _name = name;
+    }
+  /// Make copy constructor and assignment operator private
+  private:
+    stream(const stream< __STREAM_T__ >& chn):
+        _name(chn._name), _data(chn._data) {
+    }
+    stream& operator = (const stream< __STREAM_T__ >& chn) {
+        _name = chn._name;
+        _data = chn._data;
+        return *this;
+    }
+  public:
+    /// Overload >> and << operators to implement read() and write()
+    void operator >> (__STREAM_T__& rdata) {
+        read(rdata);
+    }
+    void operator << (const __STREAM_T__& wdata) {
+        write(wdata);
+    }
+  public:
+    /// Destructor
+    /// Check status of the queue
+    virtual ~stream() {
+        if (!_data.empty())
+        {
+            std::cout << "WARNING: Hls::stream '"
+                      << _name
+                      << "' contains leftover data,"
+                      << " which may result in RTL simulation hanging."
+                      << std::endl;
+        }
+    }
+    /// Status of the queue
+    bool empty() {
+#ifdef HLS_STREAM_THREAD_SAFE
+        std::lock_guard<std::mutex> lg(_mutex);
+#endif
+        return _data.empty();
+    }
+    bool full() const { return false; }
+    /// Blocking read
+    void read(__STREAM_T__& head) {
+        head = read();
+    }
+#ifdef HLS_STREAM_THREAD_SAFE
+    __STREAM_T__ read() {
+        std::unique_lock<std::mutex> ul(_mutex);
+        while (_data.empty()) {
+            _condition_var.wait(ul);
+        }
+        __STREAM_T__ elem;
+        elem = _data.front();
+        _data.pop_front();
+        return elem;
+    }
+#else
+    __STREAM_T__ read() {
+        __STREAM_T__ elem;
+        if (_data.empty()) {
+            std::cout << "WARNING: Hls::stream '"
+                      << _name
+                      << "' is read while empty,"
+                      << " which may result in RTL simulation hanging."
+                      << std::endl;
+            elem = __STREAM_T__();
+        } else {
+            elem = _data.front();
+            _data.pop_front();
+        }
+        return elem;
+    }
+#endif
+    /// Blocking write
+    void write(const __STREAM_T__& tail) {
+#ifdef HLS_STREAM_THREAD_SAFE
+        std::unique_lock<std::mutex> ul(_mutex);
+#endif
+        _data.push_back(tail);
+#ifdef HLS_STREAM_THREAD_SAFE
+        _condition_var.notify_one();
+#endif
+    }
+    /// Nonblocking read
+    bool read_nb(__STREAM_T__& head) {
+#ifdef HLS_STREAM_THREAD_SAFE
+        std::lock_guard<std::mutex> lg(_mutex);
+#endif
+        bool is_empty = _data.empty();
+        if (is_empty) {
+            head = __STREAM_T__();
+        } else {
+            __STREAM_T__ elem(_data.front());
+            _data.pop_front();
+            head = elem;
+        }
+        return !is_empty;
+    }
+    /// Nonblocking write
+    bool write_nb(const __STREAM_T__& tail) {
+        bool is_full = full();
+        write(tail);
+        return !is_full;
+    }
+    /// Fifo size
+    size_t size() {
+        return _data.size();
+    }
+};
+} // namespace hls
+#endif // __cplusplus
+#endif  // X_HLS_STREAM_SIM_H

da4ml/codegen/cpp/source/ap_types/utils/x_hls_utils.h ADDED Viewed

@@ -0,0 +1,80 @@
+#ifndef X_HLS_UTILS_H
+#define X_HLS_UTILS_H
+#include "ap_fixed.h"
+#include <limits>
+namespace hls {
+    template<typename T>
+    class numeric_limits {
+    public:
+        static T max()     { return std::numeric_limits<T>::max(); }
+        static T min()     { return std::numeric_limits<T>::min(); }
+        static T epsilon() { return std::numeric_limits<T>::epsilon(); }
+    };
+    template <int W, int I, ap_q_mode Q, ap_o_mode O>
+    class numeric_limits<ap_fixed<W,I,Q,O> > {
+    public:
+        static ap_fixed<W,I,Q,O> max() {
+            ap_int<W> m = ::hls::numeric_limits<ap_int<W> >::max();
+            ap_fixed<W,I,Q,O> x;
+            x(W-1,0) = m(W-1,0);
+            return x;
+        }
+        static ap_fixed<W,I,Q,O> min() {
+            ap_int<W> m = ::hls::numeric_limits<ap_int<W> >::min();
+            ap_fixed<W,I,Q,O> x;
+            x(W-1,0) = m(W-1,0);
+            return x;
+        }
+        static ap_fixed<W,I,Q,O> epsilon() {
+          ap_fixed<W,I,Q,O> x = 0;
+          x[0] = 1;
+          return x;
+        }
+    };
+    template <int W, int I, ap_q_mode Q, ap_o_mode O>
+    class numeric_limits<ap_ufixed<W,I,Q,O> > {
+    public:
+        static ap_ufixed<W,I,Q,O> max() {
+            ap_uint<W> m = ::hls::numeric_limits<ap_uint<W> >::max();
+            ap_ufixed<W,I,Q,O> x;
+            x(W-1,0) = m(W-1,0);
+            return x;
+        }
+        static ap_ufixed<W,I,Q,O> min() { return 0; }
+        static ap_ufixed<W,I,Q,O> epsilon() {
+          ap_ufixed<W,I,Q,O> x = 0;
+          x[0] = 1;
+          return x;
+        }
+    };
+    template <int W>
+    class numeric_limits<ap_int<W> > {
+    public:
+        static ap_int<W> max() { ap_int<W> m = min(); return ~m; }
+        static ap_int<W> min() { ap_int<W> m = 0; m[W-1] = 1; return m; }
+        static ap_int<W> epsilon() {
+          ap_int<W> x = 0;
+          x[0] = 1;
+          return x;
+        }
+    };
+    template <int W>
+    class numeric_limits<ap_uint<W> > {
+    public:
+        static ap_uint<W> max() { ap_uint<W> zero = 0; return ~zero; }
+        static ap_uint<W> min() { return 0; }
+        static ap_uint<W> epsilon() {
+          ap_uint<W> x = 0;
+          x[0] = 1;
+          return x;
+        }
+    };
+}
+#endif

da4ml/codegen/cpp/source/binder_util.hh ADDED Viewed

@@ -0,0 +1,56 @@
+#pragma once
+#include <cstddef>
+#ifdef _OPENMP
+#include <algorithm>
+#include <omp.h>
+constexpr bool _openmp = true;
+#else
+constexpr bool _openmp = false;
+#endif
+template <typename CONFIG_T, typename T> void _inference(T *c_inp, T *c_out, size_t n_samples)
+{
+    typename CONFIG_T::inp_t in_fixed_buf[CONFIG_T::N_inp];
+    typename CONFIG_T::out_t out_fixed_buf[CONFIG_T::N_out];
+    for(size_t i = 0; i < n_samples; ++i)
+        {
+            size_t offset_in = i * CONFIG_T::N_inp;
+            size_t offset_out = i * CONFIG_T::N_out;
+            for(size_t j = 0; j < CONFIG_T::N_inp; ++j)
+                {
+                    in_fixed_buf[j] = c_inp[offset_in + j];
+                }
+            CONFIG_T::f(in_fixed_buf, out_fixed_buf);
+            for(size_t j = 0; j < CONFIG_T::N_out; ++j)
+                {
+                    c_out[offset_out + j] = out_fixed_buf[j];
+                }
+        }
+}
+template <typename CONFIG_T, typename T> void batch_inference(T *c_inp, T *c_out, size_t n_samples)
+{
+#ifdef _OPENMP
+    size_t n_max_threads = omp_get_max_threads();
+    size_t n_samples_per_thread = std::max<size_t>(n_samples / n_max_threads, 32);
+    size_t n_thread = n_samples / n_samples_per_thread;
+    n_thread += (n_samples % n_samples_per_thread) ? 1 : 0;
+#pragma omp parallel for num_threads(n_thread) schedule(static)
+    for(size_t i = 0; i < n_thread; ++i)
+        {
+            size_t start = i * n_samples_per_thread;
+            size_t end = std::min<size_t>(start + n_samples_per_thread, n_samples);
+            size_t n_samples_this_thread = end - start;
+            size_t offset_in = start * CONFIG_T::N_inp;
+            size_t offset_out = start * CONFIG_T::N_out;
+            _inference<CONFIG_T, T>(&c_inp[offset_in], &c_out[offset_out], n_samples_this_thread);
+        }
+#else
+    _inference<CONFIG_T, T>(c_inp, c_out, n_samples);
+#endif
+}

da4ml/codegen/cpp/source/build_binder.mk ADDED Viewed

@@ -0,0 +1,24 @@
+default: slow
+CXX = g++
+CC = gcc
+INCLUDES = -I ap_types -I .
+CXXFLAGS = -fPIC
+CFLAGS = -std=c++17 -fPIC
+LIBNAME = lib$(PRJ_NAME)_$(STAMP).so
+fast: CXXFLAGS += -O3
+fast: $(LIBNAME)
+slow: CXXFLAGS += -O
+slow: $(LIBNAME)
+$(PRJ_NAME)_$(STAMP).o: $(PRJ_NAME).cc
+	$(CC) -c $(PRJ_NAME).cc -o $(PRJ_NAME)_$(STAMP).o $(INCLUDES) $(CXXFLAGS) $(EXTRA_CXXFLAGS)
+$(LIBNAME): $(PRJ_NAME)_$(STAMP).o $(PRJ_NAME)_bridge.cc
+	$(CXX) $(INCLUDES) $(CXXFLAGS) -shared -o $@ $(PRJ_NAME)_$(STAMP).o $(PRJ_NAME)_bridge.cc $(EXTRA_CXXFLAGS)
+clean:
+	rm -f $(LIBNAME) $(PRJ_NAME)_$(STAMP).o
+.PHONY: clean

da4ml/codegen/cpp/source/{vitis.h → vitis_bitshift.hh} RENAMED Viewed

@@ -1,5 +1,5 @@
 #pragma once
-#include "ap_fixed.h"
+#include "ap_types/ap_fixed.h"
 template <int s, int b, int i, ap_q_mode Q, ap_o_mode O, int N> ap_fixed<b, i + s> bit_shift(ap_fixed<b, i, Q, O, N> x) {
 #pragma HLS INLINE

da4ml/codegen/verilog/__init__.py CHANGED Viewed

@@ -1,13 +1,12 @@
 from .comb import comb_logic_gen
-from .io_wrapper import comb_binder_gen, generate_io_wrapper, pipeline_binder_gen
+from .io_wrapper import binder_gen, generate_io_wrapper
 from .pipeline import pipeline_logic_gen
 from .verilog_model import VerilogModel
 __all__ = [
     'comb_logic_gen',
     'generate_io_wrapper',
-    'comb_binder_gen',
     'pipeline_logic_gen',
-    'pipeline_binder_gen',
+    'binder_gen',
     'VerilogModel',
 ]

da4ml/codegen/verilog/comb.py CHANGED Viewed

@@ -2,10 +2,11 @@ from math import ceil, log2
 import numpy as np
-from da4ml.cmvm.types import Op, QInterval, Solution, _minimal_kif
+from ...cmvm.types import QInterval, Solution, _minimal_kif
-def ssa_gen(ops: list[Op], print_latency: bool = False):
+def ssa_gen(sol: Solution, neg_defined: set[int], print_latency: bool = False):
+    ops = sol.ops
     kifs = list(map(_minimal_kif, (op.qint for op in ops)))
     widths = list(map(sum, kifs))
     inp_kifs = [_minimal_kif(op.qint) for op in ops if op.opcode == -1]
@@ -14,11 +15,17 @@ def ssa_gen(ops: list[Op], print_latency: bool = False):
     inp_idxs = np.stack([_inp_widths[1:] - 1, _inp_widths[:-1]], axis=1)
     lines = []
+    ref_count = sol.ref_count
     for i, op in enumerate(ops):
+        if ref_count[i] == 0:
+            continue
         bw = widths[i]
-        v = f'v{i}[{bw-1}:0]'
-        _def = f'wire [{bw-1}:0] v{i};'
+        v = f'v{i}[{bw - 1}:0]'
+        _def = f'wire [{bw - 1}:0] v{i};'
+        if bw == 0:
+            continue
         match op.opcode:
             case -1:  # Input marker
@@ -34,12 +41,16 @@ def ssa_gen(ops: list[Op], print_latency: bool = False):
                 if op.opcode == -2:
                     _min, _max, step = ops[op.id0].qint
                     bw_neg = max(sum(_minimal_kif(QInterval(-_max, -_min, step))), bw0)
-                    lines.append(
-                        f'wire [{bw_neg-1}:0] v{op.id0}_neg; assign v{op.id0}_neg[{bw_neg-1}:0] = -{v0_name}[{bw0-1}:0];'
-                    )
+                    if op.id0 not in neg_defined:
+                        neg_defined.add(op.id0)
+                        was_signed = int(kifs[op.id0][0])
+                        lines.append(
+                            f'wire [{bw_neg - 1}:0] v{op.id0}_neg; negative #({bw0}, {bw_neg}, {was_signed}) op_neg_{op.id0} ({v0_name}, v{op.id0}_neg);'
+                        )
+                        bw0 = bw_neg
                     v0_name = f'v{op.id0}_neg'
                 if ops[op.id0].qint.min < 0:
-                    line = f'{_def} assign {v} = {v0_name}[{i0}:{i1}] & {{{bw}{{~{v0_name}[{bw0-1}]}}}};'
+                    line = f'{_def} assign {v} = {v0_name}[{i0}:{i1}] & {{{bw}{{~{v0_name}[{bw0 - 1}]}}}};'
                 else:
                     line = f'{_def} assign {v} = {v0_name}[{i0}:{i1}];'
             case 3 | -3:  # Explicit quantization
@@ -50,23 +61,31 @@ def ssa_gen(ops: list[Op], print_latency: bool = False):
                 if op.opcode == -3:
                     _min, _max, step = ops[op.id0].qint
+                    lines.append('/* verilator lint_off WIDTHTRUNC */')
                     bw_neg = max(sum(_minimal_kif(QInterval(-_max, -_min, step))), bw0)
-                    lines.append(
-                        f'wire [{bw_neg-1}:0] v{op.id0}_neg; assign v{op.id0}_neg[{bw_neg-1}:0] = -{v0_name}[{bw0-1}:0];'
-                    )
+                    if op.id0 not in neg_defined:
+                        neg_defined.add(op.id0)
+                        # lines.append('/* verilator lint_off WIDTHTRUNC */')
+                        # lines.append(
+                        #     f'wire [{bw_neg - 1}:0] v{op.id0}_neg; assign v{op.id0}_neg[{bw_neg - 1}:0] = -{v0_name}[{bw0 - 1}:0];'
+                        # )
+                        # lines.append('/* verilator lint_on WIDTHTRUNC */')
+                        was_signed = int(kifs[op.id0][0])
+                        lines.append(
+                            f'wire [{bw_neg - 1}:0] v{op.id0}_neg; negative #({bw0}, {bw_neg}, {was_signed}) op_neg_{op.id0} ({v0_name}, v{op.id0}_neg);'
+                        )
                     v0_name = f'v{op.id0}_neg'
                 line = f'{_def} assign {v} = {v0_name}[{i0}:{i1}];'
             case 4:  # constant addition
                 num = op.data
                 sign, mag = int(num < 0), abs(num)
-                line = f"{_def} assign {v} = '{bin(mag)[1:]};"
                 bw1 = ceil(log2(mag + 1))
                 bw0 = widths[op.id0]
                 s0 = int(kifs[op.id0][0])
-                v0 = f'v{op.id0}[{bw0-1}:0]'
+                v0 = f'v{op.id0}[{bw0 - 1}:0]'
                 v1 = f"'{bin(mag)[1:]}"
-                shift = int(log2(op.qint.step / ops[op.id0].qint.step))
+                shift = kifs[op.id0][2] - kifs[i][2]
                 line = f'{_def} shift_adder #({bw0}, {bw1}, {s0}, 0, {bw}, {shift}, {sign}) op_{i} ({v0}, {v1}, {v});'
             case 5:  # constant
                 num = op.data
@@ -80,9 +99,22 @@ def ssa_gen(ops: list[Op], print_latency: bool = False):
                 bw0, bw1 = widths[op.id0], widths[op.id1]  # width
                 s0, f0, s1, f1 = int(p0[0]), p0[2], int(p1[0]), p1[2]
                 shift = op.data + f0 - f1
-                v0, v1 = f'v{op.id0}[{bw0-1}:0]', f'v{op.id1}[{bw1-1}:0]'
+                v0, v1 = f'v{op.id0}[{bw0 - 1}:0]', f'v{op.id1}[{bw1 - 1}:0]'
                 line = f'{_def} shift_adder #({bw0}, {bw1}, {s0}, {s1}, {bw}, {shift}, {op.opcode}) op_{i} ({v0}, {v1}, {v});'
+            case 6 | -6:  # MSB Muxing
+                k, a, b = op.data & 0xFFFFFFFF, op.id0, op.id1
+                p0, p1 = kifs[a], kifs[b]
+                inv = '1' if op.opcode == -6 else '0'
+                bwk, bw0, bw1 = widths[k], widths[a], widths[b]
+                s0, f0, s1, f1 = int(p0[0]), p0[2], int(p1[0]), p1[2]
+                _shift = (op.data >> 32) & 0xFFFFFFFF
+                _shift = _shift if _shift < 0x80000000 else _shift - 0x100000000
+                shift = f0 - f1 + _shift
+                vk, v0, v1 = f'v{k}[{bwk - 1}]', f'v{a}[{bw0 - 1}:0]', f'v{b}[{bw1 - 1}:0]'
+                line = f'{_def} mux #({bw0}, {bw1}, {s0}, {s1}, {bw}, {shift}, {inv}) op_{i} ({vk}, {v0}, {v1}, {v});'
             case _:
                 raise ValueError(f'Unknown opcode {op.opcode} for operation {i} ({op})')
@@ -92,7 +124,7 @@ def ssa_gen(ops: list[Op], print_latency: bool = False):
     return lines
-def output_gen(sol: Solution):
+def output_gen(sol: Solution, neg_defined: set[int]):
     lines = []
     widths = list(map(sum, map(_minimal_kif, sol.out_qint)))
     _widths = np.cumsum([0] + widths)
@@ -101,13 +133,21 @@ def output_gen(sol: Solution):
         if idx < 0:
             continue
         i0, i1 = out_idxs[i]
+        if i0 == i1 - 1:
+            continue
         bw = widths[i]
-        bw0 = sum(_minimal_kif(sol.ops[idx].qint))
         if sol.out_negs[i]:
-            lines.append(f'wire [{bw-1}:0] out_neg{i}; assign out_neg{i} = -v{idx}[{bw0-1}:0];')
-            lines.append(f'assign out[{i0}:{i1}] = out_neg{i}[{bw-1}:0];')
+            if idx not in neg_defined:
+                neg_defined.add(idx)
+                bw0 = sum(_minimal_kif(sol.ops[idx].qint))
+                was_signed = int(sol.ops[idx].qint[0] < 0)
+                lines.append(
+                    f'wire [{bw - 1}:0] v{idx}_neg; negative #({bw0}, {bw}, {was_signed}) op_neg_{idx} (v{idx}, v{idx}_neg);'
+                )
+            lines.append(f'assign out[{i0}:{i1}] = v{idx}_neg[{bw - 1}:0];')
         else:
-            lines.append(f'assign out[{i0}:{i1}] = v{idx}[{bw-1}:0];')
+            lines.append(f'assign out[{i0}:{i1}] = v{idx}[{bw - 1}:0];')
     return lines
@@ -117,13 +157,14 @@ def comb_logic_gen(sol: Solution, fn_name: str, print_latency: bool = False, tim
     fn_signature = [
         f'module {fn_name} (',
-        f'    input [{inp_bits-1}:0] inp,',
-        f'    output [{out_bits-1}:0] out',
+        f'    input [{inp_bits - 1}:0] inp,',
+        f'    output [{out_bits - 1}:0] out',
         ');',
     ]
-    ssa_lines = ssa_gen(sol.ops, print_latency=print_latency)
-    output_lines = output_gen(sol)
+    neg_defined = set()
+    ssa_lines = ssa_gen(sol, neg_defined=neg_defined, print_latency=print_latency)
+    output_lines = output_gen(sol, neg_defined)
     indent = '    '
     base_indent = '\n'

da4ml 0.2.1__py3-none-any.whl → 0.3.0.post1__py3-none-any.whl

Potentially problematic release.

da4ml 0.2.1py3-none-any.whl → 0.3.0.post1py3-none-any.whl