da4ml 0.5.1.post1__cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- da4ml/__init__.py +4 -0
- da4ml/_binary/__init__.py +15 -0
- da4ml/_binary/dais_bin.cpython-311-x86_64-linux-gnu.so +0 -0
- da4ml/_binary/dais_bin.pyi +5 -0
- da4ml/_cli/__init__.py +30 -0
- da4ml/_cli/convert.py +204 -0
- da4ml/_cli/report.py +295 -0
- da4ml/_version.py +32 -0
- da4ml/cmvm/__init__.py +4 -0
- da4ml/cmvm/api.py +264 -0
- da4ml/cmvm/core/__init__.py +221 -0
- da4ml/cmvm/core/indexers.py +83 -0
- da4ml/cmvm/core/state_opr.py +284 -0
- da4ml/cmvm/types.py +739 -0
- da4ml/cmvm/util/__init__.py +7 -0
- da4ml/cmvm/util/bit_decompose.py +86 -0
- da4ml/cmvm/util/mat_decompose.py +121 -0
- da4ml/codegen/__init__.py +9 -0
- da4ml/codegen/hls/__init__.py +4 -0
- da4ml/codegen/hls/hls_codegen.py +196 -0
- da4ml/codegen/hls/hls_model.py +255 -0
- da4ml/codegen/hls/source/ap_types/ap_binary.h +78 -0
- da4ml/codegen/hls/source/ap_types/ap_common.h +376 -0
- da4ml/codegen/hls/source/ap_types/ap_decl.h +212 -0
- da4ml/codegen/hls/source/ap_types/ap_fixed.h +360 -0
- da4ml/codegen/hls/source/ap_types/ap_fixed_base.h +2354 -0
- da4ml/codegen/hls/source/ap_types/ap_fixed_ref.h +718 -0
- da4ml/codegen/hls/source/ap_types/ap_fixed_special.h +230 -0
- da4ml/codegen/hls/source/ap_types/ap_int.h +330 -0
- da4ml/codegen/hls/source/ap_types/ap_int_base.h +1885 -0
- da4ml/codegen/hls/source/ap_types/ap_int_ref.h +1346 -0
- da4ml/codegen/hls/source/ap_types/ap_int_special.h +223 -0
- da4ml/codegen/hls/source/ap_types/ap_shift_reg.h +138 -0
- da4ml/codegen/hls/source/ap_types/etc/ap_private.h +7199 -0
- da4ml/codegen/hls/source/ap_types/hls_math.h +27 -0
- da4ml/codegen/hls/source/ap_types/hls_stream.h +263 -0
- da4ml/codegen/hls/source/ap_types/utils/x_hls_utils.h +80 -0
- da4ml/codegen/hls/source/binder_util.hh +71 -0
- da4ml/codegen/hls/source/build_binder.mk +22 -0
- da4ml/codegen/hls/source/vitis_bitshift.hh +32 -0
- da4ml/codegen/rtl/__init__.py +15 -0
- da4ml/codegen/rtl/common_source/binder_util.hh +99 -0
- da4ml/codegen/rtl/common_source/build_binder.mk +34 -0
- da4ml/codegen/rtl/common_source/build_quartus_prj.tcl +104 -0
- da4ml/codegen/rtl/common_source/build_vivado_prj.tcl +111 -0
- da4ml/codegen/rtl/common_source/ioutil.hh +124 -0
- da4ml/codegen/rtl/common_source/template.sdc +27 -0
- da4ml/codegen/rtl/common_source/template.xdc +30 -0
- da4ml/codegen/rtl/rtl_model.py +486 -0
- da4ml/codegen/rtl/verilog/__init__.py +10 -0
- da4ml/codegen/rtl/verilog/comb.py +239 -0
- da4ml/codegen/rtl/verilog/io_wrapper.py +113 -0
- da4ml/codegen/rtl/verilog/pipeline.py +67 -0
- da4ml/codegen/rtl/verilog/source/lookup_table.v +27 -0
- da4ml/codegen/rtl/verilog/source/multiplier.v +37 -0
- da4ml/codegen/rtl/verilog/source/mux.v +58 -0
- da4ml/codegen/rtl/verilog/source/negative.v +31 -0
- da4ml/codegen/rtl/verilog/source/shift_adder.v +59 -0
- da4ml/codegen/rtl/vhdl/__init__.py +9 -0
- da4ml/codegen/rtl/vhdl/comb.py +206 -0
- da4ml/codegen/rtl/vhdl/io_wrapper.py +120 -0
- da4ml/codegen/rtl/vhdl/pipeline.py +71 -0
- da4ml/codegen/rtl/vhdl/source/lookup_table.vhd +52 -0
- da4ml/codegen/rtl/vhdl/source/multiplier.vhd +40 -0
- da4ml/codegen/rtl/vhdl/source/mux.vhd +102 -0
- da4ml/codegen/rtl/vhdl/source/negative.vhd +35 -0
- da4ml/codegen/rtl/vhdl/source/shift_adder.vhd +101 -0
- da4ml/converter/__init__.py +63 -0
- da4ml/converter/hgq2/__init__.py +3 -0
- da4ml/converter/hgq2/layers/__init__.py +11 -0
- da4ml/converter/hgq2/layers/_base.py +132 -0
- da4ml/converter/hgq2/layers/activation.py +81 -0
- da4ml/converter/hgq2/layers/attn.py +148 -0
- da4ml/converter/hgq2/layers/batchnorm.py +15 -0
- da4ml/converter/hgq2/layers/conv.py +149 -0
- da4ml/converter/hgq2/layers/dense.py +39 -0
- da4ml/converter/hgq2/layers/ops.py +246 -0
- da4ml/converter/hgq2/layers/pool.py +107 -0
- da4ml/converter/hgq2/layers/table.py +176 -0
- da4ml/converter/hgq2/parser.py +161 -0
- da4ml/trace/__init__.py +6 -0
- da4ml/trace/fixed_variable.py +965 -0
- da4ml/trace/fixed_variable_array.py +600 -0
- da4ml/trace/ops/__init__.py +13 -0
- da4ml/trace/ops/einsum_utils.py +305 -0
- da4ml/trace/ops/quantization.py +74 -0
- da4ml/trace/ops/reduce_utils.py +105 -0
- da4ml/trace/pipeline.py +181 -0
- da4ml/trace/tracer.py +186 -0
- da4ml/typing/__init__.py +3 -0
- da4ml-0.5.1.post1.dist-info/METADATA +85 -0
- da4ml-0.5.1.post1.dist-info/RECORD +96 -0
- da4ml-0.5.1.post1.dist-info/WHEEL +6 -0
- da4ml-0.5.1.post1.dist-info/entry_points.txt +3 -0
- da4ml-0.5.1.post1.dist-info/sboms/auditwheel.cdx.json +1 -0
- da4ml.libs/libgomp-e985bcbb.so.1.0.0 +0 -0
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
#ifndef X_HLS_MATH_H
|
|
2
|
+
#define X_HLS_MATH_H
|
|
3
|
+
|
|
4
|
+
#include <cmath>
|
|
5
|
+
#include "ap_fixed.h"
|
|
6
|
+
|
|
7
|
+
namespace hls {
|
|
8
|
+
|
|
9
|
+
template<class T>
|
|
10
|
+
static T exp(const T x) {
|
|
11
|
+
return (T) std::exp(x.to_double());
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
template <typename T> T sin(T x) { return (T) std::sin(x.to_double()); };
|
|
15
|
+
|
|
16
|
+
template <typename T> T cos(T x) { return (T) std::cos(x.to_double()); };
|
|
17
|
+
|
|
18
|
+
template <typename T> T asin(T x) { return (T) std::asin(x.to_double()); };
|
|
19
|
+
|
|
20
|
+
template <typename T> T acos(T x) { return (T) std::acos(x.to_double()); };
|
|
21
|
+
|
|
22
|
+
template <typename T> T atan(T x) { return (T) std::atan(x.to_double()); };
|
|
23
|
+
|
|
24
|
+
template <typename T> T atan2(T x, T y) { return (T) hls::atan2(x.to_double(), y.to_double()); };
|
|
25
|
+
|
|
26
|
+
}
|
|
27
|
+
#endif
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
/*
|
|
2
|
+
#- (c) Copyright 2011-2018 Xilinx, Inc. All rights reserved.
|
|
3
|
+
#-
|
|
4
|
+
#- This file contains confidential and proprietary information
|
|
5
|
+
#- of Xilinx, Inc. and is protected under U.S. and
|
|
6
|
+
#- international copyright and other intellectual property
|
|
7
|
+
#- laws.
|
|
8
|
+
#-
|
|
9
|
+
#- DISCLAIMER
|
|
10
|
+
#- This disclaimer is not a license and does not grant any
|
|
11
|
+
#- rights to the materials distributed herewith. Except as
|
|
12
|
+
#- otherwise provided in a valid license issued to you by
|
|
13
|
+
#- Xilinx, and to the maximum extent permitted by applicable
|
|
14
|
+
#- law: (1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND
|
|
15
|
+
#- WITH ALL FAULTS, AND XILINX HEREBY DISCLAIMS ALL WARRANTIES
|
|
16
|
+
#- AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, INCLUDING
|
|
17
|
+
#- BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-
|
|
18
|
+
#- INFRINGEMENT, OR FITNESS FOR ANY PARTICULAR PURPOSE; and
|
|
19
|
+
#- (2) Xilinx shall not be liable (whether in contract or tort,
|
|
20
|
+
#- including negligence, or under any other theory of
|
|
21
|
+
#- liability) for any loss or damage of any kind or nature
|
|
22
|
+
#- related to, arising under or in connection with these
|
|
23
|
+
#- materials, including for any direct, or any indirect,
|
|
24
|
+
#- special, incidental, or consequential loss or damage
|
|
25
|
+
#- (including loss of data, profits, goodwill, or any type of
|
|
26
|
+
#- loss or damage suffered as a result of any action brought
|
|
27
|
+
#- by a third party) even if such damage or loss was
|
|
28
|
+
#- reasonably foreseeable or Xilinx had been advised of the
|
|
29
|
+
#- possibility of the same.
|
|
30
|
+
#-
|
|
31
|
+
#- CRITICAL APPLICATIONS
|
|
32
|
+
#- Xilinx products are not designed or intended to be fail-
|
|
33
|
+
#- safe, or for use in any application requiring fail-safe
|
|
34
|
+
#- performance, such as life-support or safety devices or
|
|
35
|
+
#- systems, Class III medical devices, nuclear facilities,
|
|
36
|
+
#- applications related to the deployment of airbags, or any
|
|
37
|
+
#- other applications that could lead to death, personal
|
|
38
|
+
#- injury, or severe property or environmental damage
|
|
39
|
+
#- (individually and collectively, "Critical
|
|
40
|
+
#- Applications"). Customer assumes the sole risk and
|
|
41
|
+
#- liability of any use of Xilinx products in Critical
|
|
42
|
+
#- Applications, subject only to applicable laws and
|
|
43
|
+
#- regulations governing limitations on product liability.
|
|
44
|
+
#-
|
|
45
|
+
#- THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS
|
|
46
|
+
#- PART OF THIS FILE AT ALL TIMES.
|
|
47
|
+
#- ************************************************************************
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
51
|
+
you may not use this file except in compliance with the License.
|
|
52
|
+
You may obtain a copy of the License at
|
|
53
|
+
|
|
54
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
55
|
+
|
|
56
|
+
Unless required by applicable law or agreed to in writing, software
|
|
57
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
58
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
59
|
+
See the License for the specific language governing permissions and
|
|
60
|
+
limitations under the License.
|
|
61
|
+
*/
|
|
62
|
+
|
|
63
|
+
#ifndef X_HLS_STREAM_SIM_H
|
|
64
|
+
#define X_HLS_STREAM_SIM_H
|
|
65
|
+
|
|
66
|
+
/*
|
|
67
|
+
* This file contains a C++ model of hls::stream.
|
|
68
|
+
* It defines C simulation model.
|
|
69
|
+
*/
|
|
70
|
+
#ifndef __cplusplus
|
|
71
|
+
|
|
72
|
+
#error C++ is required to include this header file
|
|
73
|
+
|
|
74
|
+
#else
|
|
75
|
+
|
|
76
|
+
//////////////////////////////////////////////
|
|
77
|
+
// C level simulation models for hls::stream
|
|
78
|
+
//////////////////////////////////////////////
|
|
79
|
+
#include <queue>
|
|
80
|
+
#include <iostream>
|
|
81
|
+
#include <typeinfo>
|
|
82
|
+
#include <string>
|
|
83
|
+
#include <sstream>
|
|
84
|
+
|
|
85
|
+
#ifdef HLS_STREAM_THREAD_SAFE
|
|
86
|
+
#include <mutex>
|
|
87
|
+
#include <condition_variable>
|
|
88
|
+
#endif
|
|
89
|
+
|
|
90
|
+
#ifndef _MSC_VER
|
|
91
|
+
#include <cxxabi.h>
|
|
92
|
+
#include <stdlib.h>
|
|
93
|
+
#endif
|
|
94
|
+
|
|
95
|
+
namespace hls {
|
|
96
|
+
|
|
97
|
+
template<typename __STREAM_T__>
|
|
98
|
+
class stream
|
|
99
|
+
{
|
|
100
|
+
protected:
|
|
101
|
+
std::string _name;
|
|
102
|
+
std::deque<__STREAM_T__> _data; // container for the elements
|
|
103
|
+
#ifdef HLS_STREAM_THREAD_SAFE
|
|
104
|
+
std::mutex _mutex;
|
|
105
|
+
std::condition_variable _condition_var;
|
|
106
|
+
#endif
|
|
107
|
+
|
|
108
|
+
public:
|
|
109
|
+
/// Constructors
|
|
110
|
+
// Keep consistent with the synthesis model's constructors
|
|
111
|
+
stream() {
|
|
112
|
+
static unsigned _counter = 1;
|
|
113
|
+
std::stringstream ss;
|
|
114
|
+
#ifndef _MSC_VER
|
|
115
|
+
char* _demangle_name = abi::__cxa_demangle(typeid(*this).name(), 0, 0, 0);
|
|
116
|
+
if (_demangle_name) {
|
|
117
|
+
_name = _demangle_name;
|
|
118
|
+
free(_demangle_name);
|
|
119
|
+
}
|
|
120
|
+
else {
|
|
121
|
+
_name = "hls_stream";
|
|
122
|
+
}
|
|
123
|
+
#else
|
|
124
|
+
_name = typeid(*this).name();
|
|
125
|
+
#endif
|
|
126
|
+
|
|
127
|
+
ss << _counter++;
|
|
128
|
+
_name += "." + ss.str();
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
stream(const std::string name) {
|
|
132
|
+
// default constructor,
|
|
133
|
+
// capacity set to predefined maximum
|
|
134
|
+
_name = name;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/// Make copy constructor and assignment operator private
|
|
138
|
+
private:
|
|
139
|
+
stream(const stream< __STREAM_T__ >& chn):
|
|
140
|
+
_name(chn._name), _data(chn._data) {
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
stream& operator = (const stream< __STREAM_T__ >& chn) {
|
|
144
|
+
_name = chn._name;
|
|
145
|
+
_data = chn._data;
|
|
146
|
+
return *this;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
public:
|
|
150
|
+
/// Overload >> and << operators to implement read() and write()
|
|
151
|
+
void operator >> (__STREAM_T__& rdata) {
|
|
152
|
+
read(rdata);
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
void operator << (const __STREAM_T__& wdata) {
|
|
156
|
+
write(wdata);
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
public:
|
|
161
|
+
/// Destructor
|
|
162
|
+
/// Check status of the queue
|
|
163
|
+
virtual ~stream() {
|
|
164
|
+
if (!_data.empty())
|
|
165
|
+
{
|
|
166
|
+
std::cout << "WARNING: Hls::stream '"
|
|
167
|
+
<< _name
|
|
168
|
+
<< "' contains leftover data,"
|
|
169
|
+
<< " which may result in RTL simulation hanging."
|
|
170
|
+
<< std::endl;
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
/// Status of the queue
|
|
175
|
+
bool empty() {
|
|
176
|
+
#ifdef HLS_STREAM_THREAD_SAFE
|
|
177
|
+
std::lock_guard<std::mutex> lg(_mutex);
|
|
178
|
+
#endif
|
|
179
|
+
return _data.empty();
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
bool full() const { return false; }
|
|
183
|
+
|
|
184
|
+
/// Blocking read
|
|
185
|
+
void read(__STREAM_T__& head) {
|
|
186
|
+
head = read();
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
#ifdef HLS_STREAM_THREAD_SAFE
|
|
190
|
+
__STREAM_T__ read() {
|
|
191
|
+
std::unique_lock<std::mutex> ul(_mutex);
|
|
192
|
+
while (_data.empty()) {
|
|
193
|
+
_condition_var.wait(ul);
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
__STREAM_T__ elem;
|
|
197
|
+
elem = _data.front();
|
|
198
|
+
_data.pop_front();
|
|
199
|
+
return elem;
|
|
200
|
+
}
|
|
201
|
+
#else
|
|
202
|
+
__STREAM_T__ read() {
|
|
203
|
+
__STREAM_T__ elem;
|
|
204
|
+
if (_data.empty()) {
|
|
205
|
+
std::cout << "WARNING: Hls::stream '"
|
|
206
|
+
<< _name
|
|
207
|
+
<< "' is read while empty,"
|
|
208
|
+
<< " which may result in RTL simulation hanging."
|
|
209
|
+
<< std::endl;
|
|
210
|
+
elem = __STREAM_T__();
|
|
211
|
+
} else {
|
|
212
|
+
elem = _data.front();
|
|
213
|
+
_data.pop_front();
|
|
214
|
+
}
|
|
215
|
+
return elem;
|
|
216
|
+
}
|
|
217
|
+
#endif
|
|
218
|
+
|
|
219
|
+
/// Blocking write
|
|
220
|
+
void write(const __STREAM_T__& tail) {
|
|
221
|
+
#ifdef HLS_STREAM_THREAD_SAFE
|
|
222
|
+
std::unique_lock<std::mutex> ul(_mutex);
|
|
223
|
+
#endif
|
|
224
|
+
_data.push_back(tail);
|
|
225
|
+
#ifdef HLS_STREAM_THREAD_SAFE
|
|
226
|
+
_condition_var.notify_one();
|
|
227
|
+
#endif
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
/// Nonblocking read
|
|
231
|
+
bool read_nb(__STREAM_T__& head) {
|
|
232
|
+
#ifdef HLS_STREAM_THREAD_SAFE
|
|
233
|
+
std::lock_guard<std::mutex> lg(_mutex);
|
|
234
|
+
#endif
|
|
235
|
+
bool is_empty = _data.empty();
|
|
236
|
+
if (is_empty) {
|
|
237
|
+
head = __STREAM_T__();
|
|
238
|
+
} else {
|
|
239
|
+
__STREAM_T__ elem(_data.front());
|
|
240
|
+
_data.pop_front();
|
|
241
|
+
head = elem;
|
|
242
|
+
}
|
|
243
|
+
return !is_empty;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
/// Nonblocking write
|
|
247
|
+
bool write_nb(const __STREAM_T__& tail) {
|
|
248
|
+
bool is_full = full();
|
|
249
|
+
write(tail);
|
|
250
|
+
return !is_full;
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
/// Fifo size
|
|
254
|
+
size_t size() {
|
|
255
|
+
return _data.size();
|
|
256
|
+
}
|
|
257
|
+
};
|
|
258
|
+
|
|
259
|
+
} // namespace hls
|
|
260
|
+
|
|
261
|
+
#endif // __cplusplus
|
|
262
|
+
#endif // X_HLS_STREAM_SIM_H
|
|
263
|
+
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
#ifndef X_HLS_UTILS_H
|
|
2
|
+
#define X_HLS_UTILS_H
|
|
3
|
+
#include "ap_fixed.h"
|
|
4
|
+
#include <limits>
|
|
5
|
+
|
|
6
|
+
namespace hls {
|
|
7
|
+
|
|
8
|
+
template<typename T>
|
|
9
|
+
class numeric_limits {
|
|
10
|
+
public:
|
|
11
|
+
static T max() { return std::numeric_limits<T>::max(); }
|
|
12
|
+
static T min() { return std::numeric_limits<T>::min(); }
|
|
13
|
+
static T epsilon() { return std::numeric_limits<T>::epsilon(); }
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
template <int W, int I, ap_q_mode Q, ap_o_mode O>
|
|
17
|
+
class numeric_limits<ap_fixed<W,I,Q,O> > {
|
|
18
|
+
public:
|
|
19
|
+
static ap_fixed<W,I,Q,O> max() {
|
|
20
|
+
ap_int<W> m = ::hls::numeric_limits<ap_int<W> >::max();
|
|
21
|
+
ap_fixed<W,I,Q,O> x;
|
|
22
|
+
x(W-1,0) = m(W-1,0);
|
|
23
|
+
return x;
|
|
24
|
+
}
|
|
25
|
+
static ap_fixed<W,I,Q,O> min() {
|
|
26
|
+
ap_int<W> m = ::hls::numeric_limits<ap_int<W> >::min();
|
|
27
|
+
ap_fixed<W,I,Q,O> x;
|
|
28
|
+
x(W-1,0) = m(W-1,0);
|
|
29
|
+
return x;
|
|
30
|
+
}
|
|
31
|
+
static ap_fixed<W,I,Q,O> epsilon() {
|
|
32
|
+
ap_fixed<W,I,Q,O> x = 0;
|
|
33
|
+
x[0] = 1;
|
|
34
|
+
return x;
|
|
35
|
+
}
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
template <int W, int I, ap_q_mode Q, ap_o_mode O>
|
|
39
|
+
class numeric_limits<ap_ufixed<W,I,Q,O> > {
|
|
40
|
+
public:
|
|
41
|
+
static ap_ufixed<W,I,Q,O> max() {
|
|
42
|
+
ap_uint<W> m = ::hls::numeric_limits<ap_uint<W> >::max();
|
|
43
|
+
ap_ufixed<W,I,Q,O> x;
|
|
44
|
+
x(W-1,0) = m(W-1,0);
|
|
45
|
+
return x;
|
|
46
|
+
}
|
|
47
|
+
static ap_ufixed<W,I,Q,O> min() { return 0; }
|
|
48
|
+
static ap_ufixed<W,I,Q,O> epsilon() {
|
|
49
|
+
ap_ufixed<W,I,Q,O> x = 0;
|
|
50
|
+
x[0] = 1;
|
|
51
|
+
return x;
|
|
52
|
+
}
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
template <int W>
|
|
56
|
+
class numeric_limits<ap_int<W> > {
|
|
57
|
+
public:
|
|
58
|
+
static ap_int<W> max() { ap_int<W> m = min(); return ~m; }
|
|
59
|
+
static ap_int<W> min() { ap_int<W> m = 0; m[W-1] = 1; return m; }
|
|
60
|
+
static ap_int<W> epsilon() {
|
|
61
|
+
ap_int<W> x = 0;
|
|
62
|
+
x[0] = 1;
|
|
63
|
+
return x;
|
|
64
|
+
}
|
|
65
|
+
};
|
|
66
|
+
|
|
67
|
+
template <int W>
|
|
68
|
+
class numeric_limits<ap_uint<W> > {
|
|
69
|
+
public:
|
|
70
|
+
static ap_uint<W> max() { ap_uint<W> zero = 0; return ~zero; }
|
|
71
|
+
static ap_uint<W> min() { return 0; }
|
|
72
|
+
static ap_uint<W> epsilon() {
|
|
73
|
+
ap_uint<W> x = 0;
|
|
74
|
+
x[0] = 1;
|
|
75
|
+
return x;
|
|
76
|
+
}
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
#endif
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
#include <cstddef>
|
|
3
|
+
|
|
4
|
+
#ifdef _OPENMP
|
|
5
|
+
#include <algorithm>
|
|
6
|
+
#include <omp.h>
|
|
7
|
+
constexpr bool _openmp = true;
|
|
8
|
+
#else
|
|
9
|
+
constexpr bool _openmp = false;
|
|
10
|
+
#endif
|
|
11
|
+
|
|
12
|
+
template <typename CONFIG_T, typename T>
|
|
13
|
+
void _inference(T *c_inp, T *c_out, size_t n_samples) {
|
|
14
|
+
typename CONFIG_T::inp_t in_fixed_buf[CONFIG_T::N_inp];
|
|
15
|
+
typename CONFIG_T::out_t out_fixed_buf[CONFIG_T::N_out];
|
|
16
|
+
|
|
17
|
+
for (size_t i = 0; i < n_samples; ++i) {
|
|
18
|
+
size_t offset_in = i * CONFIG_T::N_inp;
|
|
19
|
+
size_t offset_out = i * CONFIG_T::N_out;
|
|
20
|
+
for (size_t j = 0; j < CONFIG_T::N_inp; ++j) {
|
|
21
|
+
in_fixed_buf[j] = c_inp[offset_in + j];
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
CONFIG_T::f(in_fixed_buf, out_fixed_buf);
|
|
25
|
+
|
|
26
|
+
for (size_t j = 0; j < CONFIG_T::N_out; ++j) {
|
|
27
|
+
c_out[offset_out + j] = out_fixed_buf[j];
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
template <typename CONFIG_T, typename T>
|
|
33
|
+
void batch_inference(T *c_inp, T *c_out, size_t n_samples, size_t n_threads) {
|
|
34
|
+
if (n_threads > 1 || n_threads == 0) {
|
|
35
|
+
#ifdef _OPENMP
|
|
36
|
+
size_t min_samples_per_thread;
|
|
37
|
+
size_t n_max_threads;
|
|
38
|
+
if (n_threads == 0) {
|
|
39
|
+
min_samples_per_thread = 1;
|
|
40
|
+
n_max_threads = omp_get_max_threads();
|
|
41
|
+
}
|
|
42
|
+
else {
|
|
43
|
+
min_samples_per_thread = std::max<size_t>(1, n_samples / n_threads);
|
|
44
|
+
n_max_threads = n_threads;
|
|
45
|
+
}
|
|
46
|
+
size_t n_sample_per_thread =
|
|
47
|
+
n_samples / n_max_threads + (n_samples % n_max_threads ? 1 : 0);
|
|
48
|
+
n_sample_per_thread =
|
|
49
|
+
std::max<size_t>(n_sample_per_thread, min_samples_per_thread);
|
|
50
|
+
size_t n_thread = n_samples / n_sample_per_thread;
|
|
51
|
+
n_thread += (n_samples % n_sample_per_thread) ? 1 : 0;
|
|
52
|
+
|
|
53
|
+
#pragma omp parallel for num_threads(n_thread) schedule(static)
|
|
54
|
+
for (size_t i = 0; i < n_thread; ++i) {
|
|
55
|
+
size_t start = i * n_sample_per_thread;
|
|
56
|
+
size_t end = std::min<size_t>(start + n_sample_per_thread, n_samples);
|
|
57
|
+
size_t n_samples_this_thread = end - start;
|
|
58
|
+
size_t offset_in = start * CONFIG_T::N_inp;
|
|
59
|
+
size_t offset_out = start * CONFIG_T::N_out;
|
|
60
|
+
_inference<CONFIG_T, T>(
|
|
61
|
+
&c_inp[offset_in], &c_out[offset_out], n_samples_this_thread
|
|
62
|
+
);
|
|
63
|
+
}
|
|
64
|
+
#else
|
|
65
|
+
_inference<CONFIG_T, T>(c_inp, c_out, n_samples);
|
|
66
|
+
#endif
|
|
67
|
+
}
|
|
68
|
+
else {
|
|
69
|
+
_inference<CONFIG_T, T>(c_inp, c_out, n_samples);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
default: slow
|
|
2
|
+
INCLUDES = -I ap_types -I .
|
|
3
|
+
CXXFLAGS = -fPIC
|
|
4
|
+
CFLAGS = -std=c++17 -fPIC
|
|
5
|
+
LIBNAME = lib$(PRJ_NAME)_$(STAMP).so
|
|
6
|
+
|
|
7
|
+
fast: CXXFLAGS += -O3
|
|
8
|
+
fast: $(LIBNAME)
|
|
9
|
+
|
|
10
|
+
slow: CXXFLAGS += -O
|
|
11
|
+
slow: $(LIBNAME)
|
|
12
|
+
|
|
13
|
+
$(PRJ_NAME)_$(STAMP).o: $(PRJ_NAME).cc
|
|
14
|
+
$(CC) -c $(PRJ_NAME).cc -o $(PRJ_NAME)_$(STAMP).o $(INCLUDES) $(CXXFLAGS) $(EXTRA_CXXFLAGS)
|
|
15
|
+
|
|
16
|
+
$(LIBNAME): $(PRJ_NAME)_$(STAMP).o $(PRJ_NAME)_bridge.cc
|
|
17
|
+
$(CXX) $(INCLUDES) $(CXXFLAGS) -shared -o $@ $(PRJ_NAME)_$(STAMP).o $(PRJ_NAME)_bridge.cc $(EXTRA_CXXFLAGS)
|
|
18
|
+
|
|
19
|
+
clean:
|
|
20
|
+
rm -f $(LIBNAME) $(PRJ_NAME)_$(STAMP).o
|
|
21
|
+
|
|
22
|
+
.PHONY: clean
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
#include "ap_fixed.h"
|
|
3
|
+
|
|
4
|
+
template <int s, int b, int i, ap_q_mode Q, ap_o_mode O, int N>
|
|
5
|
+
ap_fixed<b, i + s> bit_shift(ap_fixed<b, i, Q, O, N> x) {
|
|
6
|
+
#pragma HLS INLINE
|
|
7
|
+
ap_fixed<b, i + s> r;
|
|
8
|
+
r.range() = x.range();
|
|
9
|
+
return r;
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
template <int s, int b, int i, ap_q_mode Q, ap_o_mode O, int N>
|
|
13
|
+
ap_ufixed<b, i + s> bit_shift(ap_ufixed<b, i, Q, O, N> x) {
|
|
14
|
+
#pragma HLS INLINE
|
|
15
|
+
ap_ufixed<b, i + s> r;
|
|
16
|
+
r.range() = x.range();
|
|
17
|
+
return r;
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
template <int s, int b> ap_fixed<b, s> bit_shift(ap_int<b> x) {
|
|
21
|
+
#pragma HLS INLINE
|
|
22
|
+
ap_fixed<b, s> r;
|
|
23
|
+
r.range() = x.range();
|
|
24
|
+
return r;
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
template <int s, int b> ap_ufixed<b, s> bit_shift(ap_uint<b> x) {
|
|
28
|
+
#pragma HLS INLINE
|
|
29
|
+
ap_ufixed<b, s> r;
|
|
30
|
+
r.range() = x.range();
|
|
31
|
+
return r;
|
|
32
|
+
};
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from .rtl_model import RTLModel, VerilogModel, VHDLModel
|
|
2
|
+
from .verilog import comb_logic_gen as verilog_comb_logic_gen
|
|
3
|
+
from .verilog import generate_io_wrapper as verilog_generate_io_wrapper
|
|
4
|
+
from .vhdl import comb_logic_gen as vhdl_comb_logic_gen
|
|
5
|
+
from .vhdl import generate_io_wrapper as vhdl_generate_io_wrapper
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
'RTLModel',
|
|
9
|
+
'VerilogModel',
|
|
10
|
+
'VHDLModel',
|
|
11
|
+
'verilog_comb_logic_gen',
|
|
12
|
+
'verilog_generate_io_wrapper',
|
|
13
|
+
'vhdl_comb_logic_gen',
|
|
14
|
+
'vhdl_generate_io_wrapper',
|
|
15
|
+
]
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
#include "ioutil.hh"
|
|
2
|
+
#include <verilated.h>
|
|
3
|
+
|
|
4
|
+
#ifdef _OPENMP
|
|
5
|
+
#include <omp.h>
|
|
6
|
+
constexpr bool _openmp = true;
|
|
7
|
+
#else
|
|
8
|
+
constexpr bool _openmp = false;
|
|
9
|
+
#endif
|
|
10
|
+
|
|
11
|
+
template <typename CONFIG_T>
|
|
12
|
+
std::enable_if_t<CONFIG_T::II != 0>
|
|
13
|
+
_inference(int32_t *c_inp, int32_t *c_out, size_t n_samples) {
|
|
14
|
+
auto dut = std::make_unique<typename CONFIG_T::dut_t>();
|
|
15
|
+
|
|
16
|
+
size_t clk_req = n_samples * CONFIG_T::II + (CONFIG_T::latency - CONFIG_T::II) + 1;
|
|
17
|
+
|
|
18
|
+
for (size_t t_inp = 0; t_inp < clk_req; ++t_inp) {
|
|
19
|
+
size_t t_out = t_inp - CONFIG_T::latency;
|
|
20
|
+
|
|
21
|
+
if (t_inp < n_samples * CONFIG_T::II && t_inp % CONFIG_T::II == 0) {
|
|
22
|
+
write_input<CONFIG_T::N_inp, CONFIG_T::max_inp_bw>(
|
|
23
|
+
dut->model_inp, &c_inp[t_inp / CONFIG_T::II * CONFIG_T::N_inp]
|
|
24
|
+
);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
dut->clk = 0;
|
|
28
|
+
dut->eval();
|
|
29
|
+
dut->clk = 1;
|
|
30
|
+
dut->eval();
|
|
31
|
+
|
|
32
|
+
if (t_inp >= CONFIG_T::latency && t_out % CONFIG_T::II == 0) {
|
|
33
|
+
read_output<CONFIG_T::N_out, CONFIG_T::max_out_bw>(
|
|
34
|
+
dut->model_out, &c_out[t_out / CONFIG_T::II * CONFIG_T::N_out]
|
|
35
|
+
);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
dut->final();
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
template <typename CONFIG_T>
|
|
43
|
+
std::enable_if_t<CONFIG_T::II == 0>
|
|
44
|
+
_inference(int32_t *c_inp, int32_t *c_out, size_t n_samples) {
|
|
45
|
+
auto dut = std::make_unique<typename CONFIG_T::dut_t>();
|
|
46
|
+
|
|
47
|
+
for (size_t i = 0; i < n_samples; ++i) {
|
|
48
|
+
write_input<CONFIG_T::N_inp, CONFIG_T::max_inp_bw>(
|
|
49
|
+
dut->model_inp, &c_inp[i * CONFIG_T::N_inp]
|
|
50
|
+
);
|
|
51
|
+
dut->eval();
|
|
52
|
+
read_output<CONFIG_T::N_out, CONFIG_T::max_out_bw>(
|
|
53
|
+
dut->model_out, &c_out[i * CONFIG_T::N_out]
|
|
54
|
+
);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
dut->final();
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
template <typename CONFIG_T>
|
|
61
|
+
void batch_inference(int32_t *c_inp, int32_t *c_out, size_t n_samples, size_t n_threads) {
|
|
62
|
+
if (n_threads > 1 || n_threads == 0) {
|
|
63
|
+
#ifdef _OPENMP
|
|
64
|
+
size_t min_samples_per_thread;
|
|
65
|
+
size_t n_max_threads;
|
|
66
|
+
if (n_threads == 0) {
|
|
67
|
+
min_samples_per_thread = 1;
|
|
68
|
+
n_max_threads = omp_get_max_threads();
|
|
69
|
+
}
|
|
70
|
+
else {
|
|
71
|
+
min_samples_per_thread = std::max<size_t>(1, n_samples / n_threads);
|
|
72
|
+
n_max_threads = n_threads;
|
|
73
|
+
}
|
|
74
|
+
size_t n_sample_per_thread =
|
|
75
|
+
n_samples / n_max_threads + (n_samples % n_max_threads ? 1 : 0);
|
|
76
|
+
n_sample_per_thread =
|
|
77
|
+
std::max<size_t>(n_sample_per_thread, min_samples_per_thread);
|
|
78
|
+
size_t n_thread = n_samples / n_sample_per_thread;
|
|
79
|
+
n_thread += (n_samples % n_sample_per_thread) ? 1 : 0;
|
|
80
|
+
|
|
81
|
+
#pragma omp parallel for num_threads(n_thread) schedule(static)
|
|
82
|
+
for (size_t i = 0; i < n_thread; ++i) {
|
|
83
|
+
size_t start = i * n_sample_per_thread;
|
|
84
|
+
size_t end = std::min<size_t>(start + n_sample_per_thread, n_samples);
|
|
85
|
+
size_t n_samples_this_thread = end - start;
|
|
86
|
+
size_t offset_in = start * CONFIG_T::N_inp;
|
|
87
|
+
size_t offset_out = start * CONFIG_T::N_out;
|
|
88
|
+
_inference<CONFIG_T>(
|
|
89
|
+
&c_inp[offset_in], &c_out[offset_out], n_samples_this_thread
|
|
90
|
+
);
|
|
91
|
+
}
|
|
92
|
+
#else
|
|
93
|
+
_inference<CONFIG_T>(c_inp, c_out, n_samples);
|
|
94
|
+
#endif
|
|
95
|
+
}
|
|
96
|
+
else {
|
|
97
|
+
_inference<CONFIG_T>(c_inp, c_out, n_samples);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
default: slow
|
|
2
|
+
|
|
3
|
+
VERILATOR_ROOT = $(shell verilator -V | grep -a VERILATOR_ROOT | tail -1 | awk '{{print $$3}}')
|
|
4
|
+
INCLUDES = -I./obj_dir -I$(VERILATOR_ROOT)/include -I../src
|
|
5
|
+
WARNINGS = -Wl,--no-undefined
|
|
6
|
+
CFLAGS = -std=c++17 -fPIC
|
|
7
|
+
LINKFLAGS = $(INCLUDES) $(WARNINGS)
|
|
8
|
+
LIBNAME = lib$(VM_PREFIX)_$(STAMP).so
|
|
9
|
+
N_JOBS ?= $(shell nproc)
|
|
10
|
+
VERILATOR_FLAGS ?=
|
|
11
|
+
|
|
12
|
+
../src/$(VM_PREFIX).v: $(wildcard ../src/$(VM_PREFIX).vhd) $(wildcard ../src/$(VM_PREFIX)_stage*.vhd)
|
|
13
|
+
# vhdl specific - convert to verilog first for verilating
|
|
14
|
+
mkdir -p obj_dir
|
|
15
|
+
cp ../src/memfiles/* ./ 2>/dev/null || true
|
|
16
|
+
ghdl -a --std=08 --workdir=obj_dir ../src/static/multiplier.vhd ../src/static/mux.vhd ../src/static/negative.vhd ../src/static/shift_adder.vhd ../src/static/lookup_table.vhd $(wildcard ../src/$(VM_PREFIX:_wrapper=)_stage*.vhd) $(wildcard ../src/$(VM_PREFIX:_wrapper=).vhd) ../src/$(VM_PREFIX).vhd
|
|
17
|
+
ghdl synth --std=08 --workdir=obj_dir --out=verilog $(VM_PREFIX) > $(VM_PREFIX).v
|
|
18
|
+
|
|
19
|
+
./obj_dir/libV$(VM_PREFIX).a ./obj_dir/libverilated.a ./obj_dir/V$(VM_PREFIX)__ALL.a: ../src/$(VM_PREFIX).v $(wildcard ../src/$(VM_PREFIX)_stage*.v)
|
|
20
|
+
verilator --cc -j $(N_JOBS) -build $(VM_PREFIX).v --prefix V$(VM_PREFIX) $(VERILATOR_FLAGS) -CFLAGS "$(CFLAGS)" -I../src -I../src/static
|
|
21
|
+
|
|
22
|
+
$(LIBNAME): ./obj_dir/libV$(VM_PREFIX).a ./obj_dir/libverilated.a ./obj_dir/V$(VM_PREFIX)__ALL.a $(VM_PREFIX)_binder.cc
|
|
23
|
+
$(CXX) $(CFLAGS) $(LINKFLAGS) $(CXXFLAGS2) -pthread -shared -o $(LIBNAME) $(VM_PREFIX)_binder.cc ./obj_dir/libV$(VM_PREFIX).a ./obj_dir/libverilated.a ./obj_dir/V$(VM_PREFIX)__ALL.a $(EXTRA_CXXFLAGS)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
fast: CFLAGS += -O3
|
|
27
|
+
fast: $(LIBNAME)
|
|
28
|
+
|
|
29
|
+
slow: CFLAGS += -O
|
|
30
|
+
slow: $(LIBNAME)
|
|
31
|
+
|
|
32
|
+
clean:
|
|
33
|
+
rm -rf obj_dir
|
|
34
|
+
rm -f $(LIBNAME)
|