da4ml 0.3.2__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of da4ml might be problematic. Click here for more details.
- {da4ml-0.3.2/src/da4ml.egg-info → da4ml-0.4.0}/PKG-INFO +2 -2
- {da4ml-0.3.2 → da4ml-0.4.0}/docs/faq.md +3 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/docs/getting_started.md +1 -1
- {da4ml-0.3.2 → da4ml-0.4.0}/pyproject.toml +1 -1
- {da4ml-0.3.2 → da4ml-0.4.0}/src/da4ml/_version.py +3 -3
- da4ml-0.4.0/src/da4ml/codegen/__init__.py +9 -0
- da4ml-0.4.0/src/da4ml/codegen/hls/__init__.py +4 -0
- da4ml-0.3.2/src/da4ml/codegen/cpp/cpp_codegen.py → da4ml-0.4.0/src/da4ml/codegen/hls/hls_codegen.py +19 -12
- {da4ml-0.3.2/src/da4ml/codegen/cpp → da4ml-0.4.0/src/da4ml/codegen/hls}/hls_model.py +7 -7
- da4ml-0.4.0/src/da4ml/codegen/hls/source/binder_util.hh +50 -0
- {da4ml-0.3.2/src/da4ml/codegen/cpp → da4ml-0.4.0/src/da4ml/codegen/hls}/source/vitis_bitshift.hh +5 -3
- da4ml-0.4.0/src/da4ml/codegen/rtl/__init__.py +15 -0
- {da4ml-0.3.2/src/da4ml/codegen/verilog/source → da4ml-0.4.0/src/da4ml/codegen/rtl/common_source}/binder_util.hh +4 -4
- {da4ml-0.3.2/src/da4ml/codegen/verilog/source → da4ml-0.4.0/src/da4ml/codegen/rtl/common_source}/build_binder.mk +7 -1
- {da4ml-0.3.2/src/da4ml/codegen/verilog/source → da4ml-0.4.0/src/da4ml/codegen/rtl/common_source}/build_prj.tcl +28 -7
- da4ml-0.3.2/src/da4ml/codegen/verilog/verilog_model.py → da4ml-0.4.0/src/da4ml/codegen/rtl/rtl_model.py +87 -16
- {da4ml-0.3.2/src/da4ml/codegen → da4ml-0.4.0/src/da4ml/codegen/rtl}/verilog/__init__.py +0 -2
- {da4ml-0.3.2/src/da4ml/codegen → da4ml-0.4.0/src/da4ml/codegen/rtl}/verilog/comb.py +32 -34
- {da4ml-0.3.2/src/da4ml/codegen → da4ml-0.4.0/src/da4ml/codegen/rtl}/verilog/io_wrapper.py +8 -8
- {da4ml-0.3.2/src/da4ml/codegen → da4ml-0.4.0/src/da4ml/codegen/rtl}/verilog/pipeline.py +10 -10
- {da4ml-0.3.2/src/da4ml/codegen → da4ml-0.4.0/src/da4ml/codegen/rtl}/verilog/source/negative.v +2 -1
- da4ml-0.4.0/src/da4ml/codegen/rtl/vhdl/__init__.py +10 -0
- da4ml-0.4.0/src/da4ml/codegen/rtl/vhdl/comb.py +192 -0
- da4ml-0.4.0/src/da4ml/codegen/rtl/vhdl/io_wrapper.py +157 -0
- da4ml-0.4.0/src/da4ml/codegen/rtl/vhdl/pipeline.py +71 -0
- da4ml-0.4.0/src/da4ml/codegen/rtl/vhdl/source/multiplier.vhd +40 -0
- da4ml-0.4.0/src/da4ml/codegen/rtl/vhdl/source/mux.vhd +102 -0
- da4ml-0.4.0/src/da4ml/codegen/rtl/vhdl/source/negative.vhd +35 -0
- da4ml-0.4.0/src/da4ml/codegen/rtl/vhdl/source/shift_adder.vhd +101 -0
- da4ml-0.4.0/src/da4ml/codegen/rtl/vhdl/source/template.xdc +32 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/src/da4ml/converter/hgq2/parser.py +4 -2
- {da4ml-0.3.2 → da4ml-0.4.0}/src/da4ml/trace/fixed_variable.py +4 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/src/da4ml/trace/fixed_variable_array.py +4 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/src/da4ml/trace/ops/reduce_utils.py +3 -3
- {da4ml-0.3.2 → da4ml-0.4.0/src/da4ml.egg-info}/PKG-INFO +2 -2
- da4ml-0.4.0/src/da4ml.egg-info/SOURCES.txt +100 -0
- da4ml-0.3.2/src/da4ml/codegen/__init__.py +0 -12
- da4ml-0.3.2/src/da4ml/codegen/cpp/__init__.py +0 -4
- da4ml-0.3.2/src/da4ml/codegen/cpp/source/binder_util.hh +0 -56
- da4ml-0.3.2/src/da4ml.egg-info/SOURCES.txt +0 -90
- {da4ml-0.3.2 → da4ml-0.4.0}/.clang-format +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/.github/workflows/python-publish.yml +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/.github/workflows/sphinx-build.yml +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/.gitignore +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/.pre-commit-config.yaml +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/LICENSE +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/README.md +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/docs/Makefile +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/docs/_static/example.svg +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/docs/_static/icon.svg +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/docs/_static/stage1.svg +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/docs/_static/stage2.svg +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/docs/_static/workflow.svg +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/docs/cmvm.md +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/docs/conf.py +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/docs/dais.md +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/docs/index.rst +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/docs/install.md +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/docs/status.md +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/interperter/DAISInterpreter.cc +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/interperter/DAISInterpreter.hh +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/setup.cfg +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/src/da4ml/__init__.py +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/src/da4ml/cmvm/__init__.py +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/src/da4ml/cmvm/api.py +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/src/da4ml/cmvm/core/__init__.py +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/src/da4ml/cmvm/core/indexers.py +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/src/da4ml/cmvm/core/state_opr.py +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/src/da4ml/cmvm/types.py +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/src/da4ml/cmvm/util/__init__.py +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/src/da4ml/cmvm/util/bit_decompose.py +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/src/da4ml/cmvm/util/mat_decompose.py +0 -0
- {da4ml-0.3.2/src/da4ml/codegen/cpp → da4ml-0.4.0/src/da4ml/codegen/hls}/source/ap_types/ap_binary.h +0 -0
- {da4ml-0.3.2/src/da4ml/codegen/cpp → da4ml-0.4.0/src/da4ml/codegen/hls}/source/ap_types/ap_common.h +0 -0
- {da4ml-0.3.2/src/da4ml/codegen/cpp → da4ml-0.4.0/src/da4ml/codegen/hls}/source/ap_types/ap_decl.h +0 -0
- {da4ml-0.3.2/src/da4ml/codegen/cpp → da4ml-0.4.0/src/da4ml/codegen/hls}/source/ap_types/ap_fixed.h +0 -0
- {da4ml-0.3.2/src/da4ml/codegen/cpp → da4ml-0.4.0/src/da4ml/codegen/hls}/source/ap_types/ap_fixed_base.h +0 -0
- {da4ml-0.3.2/src/da4ml/codegen/cpp → da4ml-0.4.0/src/da4ml/codegen/hls}/source/ap_types/ap_fixed_ref.h +0 -0
- {da4ml-0.3.2/src/da4ml/codegen/cpp → da4ml-0.4.0/src/da4ml/codegen/hls}/source/ap_types/ap_fixed_special.h +0 -0
- {da4ml-0.3.2/src/da4ml/codegen/cpp → da4ml-0.4.0/src/da4ml/codegen/hls}/source/ap_types/ap_int.h +0 -0
- {da4ml-0.3.2/src/da4ml/codegen/cpp → da4ml-0.4.0/src/da4ml/codegen/hls}/source/ap_types/ap_int_base.h +0 -0
- {da4ml-0.3.2/src/da4ml/codegen/cpp → da4ml-0.4.0/src/da4ml/codegen/hls}/source/ap_types/ap_int_ref.h +0 -0
- {da4ml-0.3.2/src/da4ml/codegen/cpp → da4ml-0.4.0/src/da4ml/codegen/hls}/source/ap_types/ap_int_special.h +0 -0
- {da4ml-0.3.2/src/da4ml/codegen/cpp → da4ml-0.4.0/src/da4ml/codegen/hls}/source/ap_types/ap_shift_reg.h +0 -0
- {da4ml-0.3.2/src/da4ml/codegen/cpp → da4ml-0.4.0/src/da4ml/codegen/hls}/source/ap_types/etc/ap_private.h +0 -0
- {da4ml-0.3.2/src/da4ml/codegen/cpp → da4ml-0.4.0/src/da4ml/codegen/hls}/source/ap_types/hls_math.h +0 -0
- {da4ml-0.3.2/src/da4ml/codegen/cpp → da4ml-0.4.0/src/da4ml/codegen/hls}/source/ap_types/hls_stream.h +0 -0
- {da4ml-0.3.2/src/da4ml/codegen/cpp → da4ml-0.4.0/src/da4ml/codegen/hls}/source/ap_types/utils/x_hls_utils.h +0 -0
- {da4ml-0.3.2/src/da4ml/codegen/cpp → da4ml-0.4.0/src/da4ml/codegen/hls}/source/build_binder.mk +0 -0
- {da4ml-0.3.2/src/da4ml/codegen/verilog/source → da4ml-0.4.0/src/da4ml/codegen/rtl/common_source}/ioutil.hh +0 -0
- {da4ml-0.3.2/src/da4ml/codegen/verilog/source → da4ml-0.4.0/src/da4ml/codegen/rtl/common_source}/template.xdc +0 -0
- {da4ml-0.3.2/src/da4ml/codegen → da4ml-0.4.0/src/da4ml/codegen/rtl}/verilog/source/multiplier.v +0 -0
- {da4ml-0.3.2/src/da4ml/codegen → da4ml-0.4.0/src/da4ml/codegen/rtl}/verilog/source/mux.v +0 -0
- {da4ml-0.3.2/src/da4ml/codegen → da4ml-0.4.0/src/da4ml/codegen/rtl}/verilog/source/shift_adder.v +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/src/da4ml/converter/__init__.py +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/src/da4ml/converter/hgq2/__init__.py +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/src/da4ml/converter/hgq2/replica.py +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/src/da4ml/trace/__init__.py +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/src/da4ml/trace/ops/__init__.py +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/src/da4ml/trace/ops/conv_utils.py +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/src/da4ml/trace/ops/einsum_utils.py +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/src/da4ml/trace/pipeline.py +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/src/da4ml/trace/tracer.py +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/src/da4ml.egg-info/dependency_links.txt +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/src/da4ml.egg-info/requires.txt +0 -0
- {da4ml-0.3.2 → da4ml-0.4.0}/src/da4ml.egg-info/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: da4ml
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary:
|
|
3
|
+
Version: 0.4.0
|
|
4
|
+
Summary: Distributed Arithmetic for Machine Learning
|
|
5
5
|
Author-email: Chang Sun <chsun@cern.ch>
|
|
6
6
|
License: GNU Lesser General Public License v3 (LGPLv3)
|
|
7
7
|
Project-URL: repository, https://github.com/calad0i/da4ml
|
|
@@ -5,6 +5,9 @@ Two things:
|
|
|
5
5
|
1. Converting constant-matrix-vector multiplications (CMVMs) into optimized adder graphs with distributed arithmetic for FPGA implementation.
|
|
6
6
|
2. Converting (a part of) neural networks to fully parallel HDL or HLS with the CMVM optimization above.
|
|
7
7
|
|
|
8
|
+
## Should I use the standalone flow or the hls4ml-integrated flow?
|
|
9
|
+
If the network is supported by da4ml standalone, it is **recommended to use the standalone flow**. In most cases, the standalone flow gives better latency and timing, and is orders of magnitude faster in synthesis time. However, in some occasions, the hls4ml-integrated flow could provide better timing when the routing is highly challenging for the standalone flow. If the network is not supported by da4ml standalone (e.g., contains unsupported layers or operations), then the hls4ml-integrated flow is the only option.
|
|
10
|
+
|
|
8
11
|
## So does da4ml only work with neural networks with II=1?
|
|
9
12
|
No. When integrated with hls4ml, da4ml only requires that **each CMVM operation is unrolled (II=1)**. This is different from unrolling the whole model, e.g., convolution layers can still have II>1 by reusing the same CMVM kernel for different input windows.
|
|
10
13
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Getting Started with da4ml
|
|
2
2
|
|
|
3
|
-
da4ml can be used in three different ways
|
|
3
|
+
da4ml can be used in three different ways. When standalone code generation, it is recommended to use the functional API or HGQ2 integration. See [FAQ](./faq.html) for more details on when to use which flow.
|
|
4
4
|
|
|
5
5
|
## functional API:
|
|
6
6
|
|
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.
|
|
32
|
-
__version_tuple__ = version_tuple = (0,
|
|
31
|
+
__version__ = version = '0.4.0'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 4, 0)
|
|
33
33
|
|
|
34
|
-
__commit_id__ = commit_id = '
|
|
34
|
+
__commit_id__ = commit_id = 'gb2796d8af'
|
da4ml-0.3.2/src/da4ml/codegen/cpp/cpp_codegen.py → da4ml-0.4.0/src/da4ml/codegen/hls/hls_codegen.py
RENAMED
|
@@ -16,12 +16,19 @@ def kif_to_hlslib_type(k: bool | int = 1, i: int = 0, f: int = 0):
|
|
|
16
16
|
return f'ac_fixed<{int(k)},{k + i + f},{k + i}>'
|
|
17
17
|
|
|
18
18
|
|
|
19
|
+
def kif_to_oneapi_type(k: bool | int = 1, i: int = 0, f: int = 0):
|
|
20
|
+
# OneAPI requires at least 2 bits for all ac_fixed as of 2025.1
|
|
21
|
+
return f'ac_fixed<{int(k)},{max(k + i + f, 2)},{k + i}>'
|
|
22
|
+
|
|
23
|
+
|
|
19
24
|
def get_typestr_fn(flavor: str):
|
|
20
25
|
match flavor.lower():
|
|
21
26
|
case 'vitis':
|
|
22
27
|
typestr_fn = kif_to_vitis_type
|
|
23
28
|
case 'hlslib':
|
|
24
29
|
typestr_fn = kif_to_hlslib_type
|
|
30
|
+
case 'oneapi':
|
|
31
|
+
typestr_fn = kif_to_oneapi_type
|
|
25
32
|
case _:
|
|
26
33
|
raise ValueError(f'Unsupported flavor: {flavor}')
|
|
27
34
|
return typestr_fn
|
|
@@ -46,18 +53,18 @@ def ssa_gen(sol: Solution, print_latency: bool, typestr_fn: Callable[[bool | int
|
|
|
46
53
|
match op.opcode:
|
|
47
54
|
case -1:
|
|
48
55
|
# Input marker
|
|
49
|
-
val = f'
|
|
56
|
+
val = f'model_inp[{op.id0}]'
|
|
50
57
|
case 0 | 1:
|
|
51
58
|
# Common a+/-b<<shift op
|
|
52
59
|
ref1 = f'bit_shift<{op.data}>(v{op.id1})' if op.data != 0 else f'v{op.id1}'
|
|
53
60
|
val = f'{ref0} {"-" if op.opcode == 1 else "+"} {ref1}'
|
|
54
61
|
case 2 | -2:
|
|
55
|
-
if op.opcode == 2: # relu(
|
|
62
|
+
if op.opcode == 2: # relu(model_inp)
|
|
56
63
|
if ops[op.id0].qint.min < 0:
|
|
57
64
|
val = f'{ref0} > 0 ? {_type}({ref0}) : {_type}(0)'
|
|
58
65
|
else:
|
|
59
66
|
val = ref0
|
|
60
|
-
else: # relu(-
|
|
67
|
+
else: # relu(-model_inp)
|
|
61
68
|
if ops[op.id0].qint.max > 0:
|
|
62
69
|
val = f'{ref0} > 0 ? {_type}(0) : {_type}(-{ref0})'
|
|
63
70
|
else:
|
|
@@ -105,15 +112,15 @@ def output_gen(sol: Solution, typestr_fn: Callable[[bool | int, int, int], str])
|
|
|
105
112
|
lines = []
|
|
106
113
|
for i, idx in enumerate(sol.out_idxs):
|
|
107
114
|
if idx < 0:
|
|
108
|
-
lines.append(f'
|
|
115
|
+
lines.append(f'model_out[{i}] = 0;')
|
|
109
116
|
continue
|
|
110
117
|
_type = typestr_fn(*_minimal_kif(sol.out_qint[i]))
|
|
111
118
|
shift = sol.out_shifts[i]
|
|
112
119
|
neg_str = '-' if sol.out_negs[i] else ''
|
|
113
120
|
if shift == 0:
|
|
114
|
-
lines.append(f'
|
|
121
|
+
lines.append(f'model_out[{i}] = {_type}({neg_str}v{idx});')
|
|
115
122
|
else:
|
|
116
|
-
lines.append(f'
|
|
123
|
+
lines.append(f'model_out[{i}] = {_type}({neg_str}bit_shift<{shift}>(v{idx}));')
|
|
117
124
|
return lines
|
|
118
125
|
|
|
119
126
|
|
|
@@ -126,7 +133,7 @@ def get_io_types(sol: Solution, flavor: str):
|
|
|
126
133
|
return inp_type, out_type
|
|
127
134
|
|
|
128
135
|
|
|
129
|
-
def
|
|
136
|
+
def hls_logic_and_bridge_gen(
|
|
130
137
|
sol: Solution,
|
|
131
138
|
fn_name: str,
|
|
132
139
|
flavor: str,
|
|
@@ -140,7 +147,7 @@ def cpp_logic_and_bridge_gen(
|
|
|
140
147
|
|
|
141
148
|
n_in, n_out = sol.shape
|
|
142
149
|
template_def = 'template <typename inp_t, typename out_t>'
|
|
143
|
-
fn_signature = f'void {fn_name}(inp_t
|
|
150
|
+
fn_signature = f'void {fn_name}(inp_t model_inp[{n_in}], out_t model_out[{n_out}])'
|
|
144
151
|
pragmas = pragmas or []
|
|
145
152
|
|
|
146
153
|
ssa_lines = ssa_gen(sol, print_latency=print_latency, typestr_fn=typestr_fn)
|
|
@@ -173,12 +180,12 @@ bool openmp_enabled() {{
|
|
|
173
180
|
return _openmp;
|
|
174
181
|
}}
|
|
175
182
|
|
|
176
|
-
void inference_f64(double *
|
|
177
|
-
batch_inference<{fn_name}_config, double>(
|
|
183
|
+
void inference_f64(double *model_inp, double *model_out, size_t size) {{
|
|
184
|
+
batch_inference<{fn_name}_config, double>(model_inp, model_out, size);
|
|
178
185
|
}}
|
|
179
186
|
|
|
180
|
-
void inference_f32(float *
|
|
181
|
-
batch_inference<{fn_name}_config, float>(
|
|
187
|
+
void inference_f32(float *model_inp, float *model_out, size_t size) {{
|
|
188
|
+
batch_inference<{fn_name}_config, float>(model_inp, model_out, size);
|
|
182
189
|
}}
|
|
183
190
|
}}"""
|
|
184
191
|
return code, bridge
|
|
@@ -13,7 +13,7 @@ import numpy as np
|
|
|
13
13
|
from numpy.typing import NDArray
|
|
14
14
|
|
|
15
15
|
from da4ml.cmvm.types import Solution
|
|
16
|
-
from da4ml.codegen.
|
|
16
|
+
from da4ml.codegen.hls.hls_codegen import get_io_types, hls_logic_and_bridge_gen
|
|
17
17
|
|
|
18
18
|
from ... import codegen
|
|
19
19
|
from ...cmvm.types import _minimal_kif
|
|
@@ -39,7 +39,7 @@ class HLSModel:
|
|
|
39
39
|
self._prj_name = prj_name
|
|
40
40
|
self._path = Path(path)
|
|
41
41
|
self._flavor = flavor.lower()
|
|
42
|
-
assert self._flavor in ('vitis', 'hlslib'), f'Unsupported HLS flavor: {self._flavor}'
|
|
42
|
+
assert self._flavor in ('vitis', 'hlslib', 'oneapi'), f'Unsupported HLS flavor: {self._flavor}'
|
|
43
43
|
self._print_latency = print_latency
|
|
44
44
|
self._part_name = part_name
|
|
45
45
|
self._clock_period = clock_period
|
|
@@ -64,7 +64,7 @@ class HLSModel:
|
|
|
64
64
|
def write(self):
|
|
65
65
|
if not self._path.exists():
|
|
66
66
|
self._path.mkdir(parents=True, exist_ok=True)
|
|
67
|
-
template_def, bridge =
|
|
67
|
+
template_def, bridge = hls_logic_and_bridge_gen(
|
|
68
68
|
self._solution,
|
|
69
69
|
self._prj_name,
|
|
70
70
|
self._flavor,
|
|
@@ -104,11 +104,11 @@ class HLSModel:
|
|
|
104
104
|
with open(self._path / f'{self._prj_name}_bridge.cc', 'w') as f:
|
|
105
105
|
f.write(bridge)
|
|
106
106
|
|
|
107
|
-
shutil.copy(self.__src_root / '
|
|
108
|
-
shutil.copy(self.__src_root / f'
|
|
109
|
-
shutil.copy(self.__src_root / '
|
|
107
|
+
shutil.copy(self.__src_root / 'hls/source/binder_util.hh', self._path)
|
|
108
|
+
shutil.copy(self.__src_root / f'hls/source/{self._flavor}_bitshift.hh', self._path / 'bitshift.hh')
|
|
109
|
+
shutil.copy(self.__src_root / 'hls/source/build_binder.mk', self._path)
|
|
110
110
|
if self._flavor == 'vitis':
|
|
111
|
-
shutil.copytree(self.__src_root / '
|
|
111
|
+
shutil.copytree(self.__src_root / 'hls/source/ap_types', self._path / 'ap_types', dirs_exist_ok=True)
|
|
112
112
|
else:
|
|
113
113
|
pass
|
|
114
114
|
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
#include <cstddef>
|
|
3
|
+
|
|
4
|
+
#ifdef _OPENMP
|
|
5
|
+
#include <algorithm>
|
|
6
|
+
#include <omp.h>
|
|
7
|
+
constexpr bool _openmp = true;
|
|
8
|
+
#else
|
|
9
|
+
constexpr bool _openmp = false;
|
|
10
|
+
#endif
|
|
11
|
+
|
|
12
|
+
template <typename CONFIG_T, typename T> void _inference(T *c_inp, T *c_out, size_t n_samples) {
|
|
13
|
+
typename CONFIG_T::inp_t in_fixed_buf[CONFIG_T::N_inp];
|
|
14
|
+
typename CONFIG_T::out_t out_fixed_buf[CONFIG_T::N_out];
|
|
15
|
+
|
|
16
|
+
for (size_t i = 0; i < n_samples; ++i) {
|
|
17
|
+
size_t offset_in = i * CONFIG_T::N_inp;
|
|
18
|
+
size_t offset_out = i * CONFIG_T::N_out;
|
|
19
|
+
for (size_t j = 0; j < CONFIG_T::N_inp; ++j) {
|
|
20
|
+
in_fixed_buf[j] = c_inp[offset_in + j];
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
CONFIG_T::f(in_fixed_buf, out_fixed_buf);
|
|
24
|
+
|
|
25
|
+
for (size_t j = 0; j < CONFIG_T::N_out; ++j) {
|
|
26
|
+
c_out[offset_out + j] = out_fixed_buf[j];
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
template <typename CONFIG_T, typename T> void batch_inference(T *c_inp, T *c_out, size_t n_samples) {
|
|
32
|
+
#ifdef _OPENMP
|
|
33
|
+
size_t n_max_threads = omp_get_max_threads();
|
|
34
|
+
size_t n_samples_per_thread = std::max<size_t>(n_samples / n_max_threads, 32);
|
|
35
|
+
size_t n_thread = n_samples / n_samples_per_thread;
|
|
36
|
+
n_thread += (n_samples % n_samples_per_thread) ? 1 : 0;
|
|
37
|
+
|
|
38
|
+
#pragma omp parallel for num_threads(n_thread) schedule(static)
|
|
39
|
+
for (size_t i = 0; i < n_thread; ++i) {
|
|
40
|
+
size_t start = i * n_samples_per_thread;
|
|
41
|
+
size_t end = std::min<size_t>(start + n_samples_per_thread, n_samples);
|
|
42
|
+
size_t n_samples_this_thread = end - start;
|
|
43
|
+
size_t offset_in = start * CONFIG_T::N_inp;
|
|
44
|
+
size_t offset_out = start * CONFIG_T::N_out;
|
|
45
|
+
_inference<CONFIG_T, T>(&c_inp[offset_in], &c_out[offset_out], n_samples_this_thread);
|
|
46
|
+
}
|
|
47
|
+
#else
|
|
48
|
+
_inference<CONFIG_T, T>(c_inp, c_out, n_samples);
|
|
49
|
+
#endif
|
|
50
|
+
}
|
{da4ml-0.3.2/src/da4ml/codegen/cpp → da4ml-0.4.0/src/da4ml/codegen/hls}/source/vitis_bitshift.hh
RENAMED
|
@@ -1,14 +1,16 @@
|
|
|
1
1
|
#pragma once
|
|
2
|
-
#include "
|
|
2
|
+
#include "ap_fixed.h"
|
|
3
3
|
|
|
4
|
-
template <int s, int b, int i, ap_q_mode Q, ap_o_mode O, int N>
|
|
4
|
+
template <int s, int b, int i, ap_q_mode Q, ap_o_mode O, int N>
|
|
5
|
+
ap_fixed<b, i + s> bit_shift(ap_fixed<b, i, Q, O, N> x) {
|
|
5
6
|
#pragma HLS INLINE
|
|
6
7
|
ap_fixed<b, i + s> r;
|
|
7
8
|
r.range() = x.range();
|
|
8
9
|
return r;
|
|
9
10
|
};
|
|
10
11
|
|
|
11
|
-
template <int s, int b, int i, ap_q_mode Q, ap_o_mode O, int N>
|
|
12
|
+
template <int s, int b, int i, ap_q_mode Q, ap_o_mode O, int N>
|
|
13
|
+
ap_ufixed<b, i + s> bit_shift(ap_ufixed<b, i, Q, O, N> x) {
|
|
12
14
|
#pragma HLS INLINE
|
|
13
15
|
ap_ufixed<b, i + s> r;
|
|
14
16
|
r.range() = x.range();
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from .rtl_model import RTLModel, VerilogModel, VHDLModel
|
|
2
|
+
from .verilog import comb_logic_gen as verilog_comb_logic_gen
|
|
3
|
+
from .verilog import generate_io_wrapper as verilog_generate_io_wrapper
|
|
4
|
+
from .vhdl import comb_logic_gen as vhdl_comb_logic_gen
|
|
5
|
+
from .vhdl import generate_io_wrapper as vhdl_generate_io_wrapper
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
'RTLModel',
|
|
9
|
+
'VerilogModel',
|
|
10
|
+
'VHDLModel',
|
|
11
|
+
'verilog_comb_logic_gen',
|
|
12
|
+
'verilog_generate_io_wrapper',
|
|
13
|
+
'vhdl_comb_logic_gen',
|
|
14
|
+
'vhdl_generate_io_wrapper',
|
|
15
|
+
]
|
|
@@ -19,7 +19,7 @@ std::enable_if_t<CONFIG_T::II != 0> _inference(int32_t *c_inp, int32_t *c_out, s
|
|
|
19
19
|
|
|
20
20
|
if (t_inp < n_samples * CONFIG_T::II && t_inp % CONFIG_T::II == 0) {
|
|
21
21
|
write_input<CONFIG_T::N_inp, CONFIG_T::max_inp_bw>(
|
|
22
|
-
dut->
|
|
22
|
+
dut->model_inp, &c_inp[t_inp / CONFIG_T::II * CONFIG_T::N_inp]
|
|
23
23
|
);
|
|
24
24
|
}
|
|
25
25
|
|
|
@@ -28,7 +28,7 @@ std::enable_if_t<CONFIG_T::II != 0> _inference(int32_t *c_inp, int32_t *c_out, s
|
|
|
28
28
|
|
|
29
29
|
if (t_inp > CONFIG_T::latency && t_out % CONFIG_T::II == 0) {
|
|
30
30
|
read_output<CONFIG_T::N_out, CONFIG_T::max_out_bw>(
|
|
31
|
-
dut->
|
|
31
|
+
dut->model_out, &c_out[t_out / CONFIG_T::II * CONFIG_T::N_out]
|
|
32
32
|
);
|
|
33
33
|
}
|
|
34
34
|
|
|
@@ -44,9 +44,9 @@ std::enable_if_t<CONFIG_T::II == 0> _inference(int32_t *c_inp, int32_t *c_out, s
|
|
|
44
44
|
auto dut = std::make_unique<typename CONFIG_T::dut_t>();
|
|
45
45
|
|
|
46
46
|
for (size_t i = 0; i < n_samples; ++i) {
|
|
47
|
-
write_input<CONFIG_T::N_inp, CONFIG_T::max_inp_bw>(dut->
|
|
47
|
+
write_input<CONFIG_T::N_inp, CONFIG_T::max_inp_bw>(dut->model_inp, &c_inp[i * CONFIG_T::N_inp]);
|
|
48
48
|
dut->eval();
|
|
49
|
-
read_output<CONFIG_T::N_out, CONFIG_T::max_out_bw>(dut->
|
|
49
|
+
read_output<CONFIG_T::N_out, CONFIG_T::max_out_bw>(dut->model_out, &c_out[i * CONFIG_T::N_out]);
|
|
50
50
|
}
|
|
51
51
|
|
|
52
52
|
dut->final();
|
|
@@ -7,10 +7,16 @@ CFLAGS = -std=c++17 -fPIC
|
|
|
7
7
|
LINKFLAGS = $(INCLUDES) $(WARNINGS)
|
|
8
8
|
LIBNAME = lib$(VM_PREFIX)_$(STAMP).so
|
|
9
9
|
N_JOBS ?= $(shell nproc)
|
|
10
|
+
VERILATOR_FLAGS ?=
|
|
10
11
|
|
|
12
|
+
$(VM_PREFIX).v: $(wildcard $(VM_PREFIX).vhd)
|
|
13
|
+
# vhdl specific - convert to verilog first for verilating
|
|
14
|
+
mkdir -p obj_dir
|
|
15
|
+
ghdl -a --std=08 --workdir=obj_dir multiplier.vhd mux.vhd negative.vhd shift_adder.vhd $(wildcard $(VM_PREFIX:_wrapper=)_stage*.vhd) $(wildcard $(VM_PREFIX:_wrapper=).vhd) $(VM_PREFIX).vhd
|
|
16
|
+
ghdl synth --std=08 --workdir=obj_dir --out=verilog $(VM_PREFIX) > $(VM_PREFIX).v
|
|
11
17
|
|
|
12
18
|
./obj_dir/libV$(VM_PREFIX).a ./obj_dir/libverilated.a ./obj_dir/V$(VM_PREFIX)__ALL.a: $(VM_PREFIX).v
|
|
13
|
-
verilator --cc -j $(N_JOBS) -
|
|
19
|
+
verilator --cc -j $(N_JOBS) -build $(VM_PREFIX).v --prefix V$(VM_PREFIX) $(VERILATOR_FLAGS) -CFLAGS "$(CFLAGS)"
|
|
14
20
|
|
|
15
21
|
$(LIBNAME): ./obj_dir/libV$(VM_PREFIX).a ./obj_dir/libverilated.a ./obj_dir/V$(VM_PREFIX)__ALL.a $(VM_PREFIX)_binder.cc
|
|
16
22
|
$(CXX) $(CFLAGS) $(LINKFLAGS) $(CXXFLAGS2) -pthread -shared -o $(LIBNAME) $(VM_PREFIX)_binder.cc ./obj_dir/libV$(VM_PREFIX).a ./obj_dir/libverilated.a ./obj_dir/V$(VM_PREFIX)__ALL.a $(EXTRA_CXXFLAGS)
|
|
@@ -1,20 +1,41 @@
|
|
|
1
1
|
set project_name "${PROJECT_NAME}"
|
|
2
2
|
set device "${DEVICE}"
|
|
3
|
+
set source_type "${SOURCE_TYPE}"
|
|
3
4
|
|
|
4
5
|
set top_module "${project_name}"
|
|
5
6
|
set output_dir "./output_${project_name}"
|
|
6
7
|
|
|
7
8
|
create_project $project_name "${output_dir}/$project_name" -force -part $device
|
|
8
9
|
|
|
9
|
-
set_property TARGET_LANGUAGE Verilog [current_project]
|
|
10
10
|
set_property DEFAULT_LIB work [current_project]
|
|
11
11
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
12
|
+
if { $source_type != "vhdl" && $source_type != "verilog" } {
|
|
13
|
+
puts "Error: SOURCE_TYPE must be either 'vhdl' or 'verilog'."
|
|
14
|
+
exit 1
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
if { $source_type == "vhdl" } {
|
|
18
|
+
set_property TARGET_LANGUAGE VHDL [current_project]
|
|
19
|
+
|
|
20
|
+
read_vhdl -vhdl2008 "${project_name}.vhd"
|
|
21
|
+
read_vhdl -vhdl2008 "shift_adder.vhd"
|
|
22
|
+
read_vhdl -vhdl2008 "negative.vhd"
|
|
23
|
+
read_vhdl -vhdl2008 "mux.vhd"
|
|
24
|
+
read_vhdl -vhdl2008 "multiplier.vhd"
|
|
25
|
+
foreach file [glob -nocomplain "${project_name}_stage*.vhd"] {
|
|
26
|
+
read_vhdl -vhdl2008 $file
|
|
27
|
+
}
|
|
28
|
+
} else {
|
|
29
|
+
set_property TARGET_LANGUAGE Verilog [current_project]
|
|
30
|
+
|
|
31
|
+
read_verilog "${project_name}.v"
|
|
32
|
+
read_verilog "shift_adder.v"
|
|
33
|
+
read_verilog "negative.v"
|
|
34
|
+
read_verilog "mux.v"
|
|
35
|
+
read_verilog "multiplier.v"
|
|
36
|
+
foreach file [glob -nocomplain "${project_name}_stage*.v"] {
|
|
37
|
+
read_verilog $file
|
|
38
|
+
}
|
|
18
39
|
}
|
|
19
40
|
|
|
20
41
|
read_xdc "${project_name}.xdc" -mode out_of_context
|
|
@@ -10,10 +10,9 @@ from uuid import uuid4
|
|
|
10
10
|
import numpy as np
|
|
11
11
|
from numpy.typing import NDArray
|
|
12
12
|
|
|
13
|
-
from ... import codegen
|
|
14
13
|
from ...cmvm.types import CascadedSolution, Solution, _minimal_kif
|
|
15
14
|
from ...trace.pipeline import to_pipeline
|
|
16
|
-
from
|
|
15
|
+
from .. import rtl
|
|
17
16
|
|
|
18
17
|
|
|
19
18
|
def get_io_kifs(sol: Solution | CascadedSolution):
|
|
@@ -22,12 +21,13 @@ def get_io_kifs(sol: Solution | CascadedSolution):
|
|
|
22
21
|
return np.array(inp_kifs, np.int8), np.array(out_kifs, np.int8)
|
|
23
22
|
|
|
24
23
|
|
|
25
|
-
class
|
|
24
|
+
class RTLModel:
|
|
26
25
|
def __init__(
|
|
27
26
|
self,
|
|
28
27
|
solution: Solution | CascadedSolution,
|
|
29
28
|
prj_name: str,
|
|
30
29
|
path: str | Path,
|
|
30
|
+
flavor: str = 'verilog',
|
|
31
31
|
latency_cutoff: float = -1,
|
|
32
32
|
print_latency: bool = True,
|
|
33
33
|
part_name: str = 'xcvu13p-flga2577-2-e',
|
|
@@ -36,18 +36,21 @@ class VerilogModel:
|
|
|
36
36
|
io_delay_minmax: tuple[float, float] = (0.2, 0.4),
|
|
37
37
|
register_layers: int = 1,
|
|
38
38
|
):
|
|
39
|
+
self._flavor = flavor.lower()
|
|
39
40
|
self._solution = solution
|
|
40
41
|
self._path = Path(path)
|
|
41
42
|
self._prj_name = prj_name
|
|
42
43
|
self._latency_cutoff = latency_cutoff
|
|
43
44
|
self._print_latency = print_latency
|
|
44
|
-
self.__src_root = Path(
|
|
45
|
+
self.__src_root = Path(rtl.__file__).parent
|
|
45
46
|
self._part_name = part_name
|
|
46
47
|
self._clock_period = clock_period
|
|
47
48
|
self._clock_uncertainty = clock_uncertainty
|
|
48
49
|
self._io_delay_minmax = io_delay_minmax
|
|
49
50
|
self._register_layers = register_layers
|
|
50
51
|
|
|
52
|
+
assert self._flavor in ('vhdl', 'verilog'), f'Unsupported flavor {flavor}, only vhdl and verilog are supported.'
|
|
53
|
+
|
|
51
54
|
self._pipe = solution if isinstance(solution, CascadedSolution) else None
|
|
52
55
|
if latency_cutoff > 0 and self._pipe is None:
|
|
53
56
|
assert isinstance(solution, Solution)
|
|
@@ -62,16 +65,23 @@ class VerilogModel:
|
|
|
62
65
|
self._uuid = None
|
|
63
66
|
|
|
64
67
|
def write(self):
|
|
68
|
+
flavor = self._flavor
|
|
69
|
+
suffix = 'v' if flavor == 'verilog' else 'vhd'
|
|
70
|
+
if flavor == 'vhdl':
|
|
71
|
+
from .vhdl import binder_gen, comb_logic_gen, generate_io_wrapper, pipeline_logic_gen
|
|
72
|
+
else: # verilog
|
|
73
|
+
from .verilog import binder_gen, comb_logic_gen, generate_io_wrapper, pipeline_logic_gen
|
|
74
|
+
|
|
65
75
|
self._path.mkdir(parents=True, exist_ok=True)
|
|
66
76
|
if self._pipe is not None: # Pipeline
|
|
67
77
|
# Main logic
|
|
68
78
|
codes = pipeline_logic_gen(self._pipe, self._prj_name, self._print_latency, register_layers=self._register_layers)
|
|
69
79
|
for k, v in codes.items():
|
|
70
|
-
with open(self._path / f'{k}.
|
|
80
|
+
with open(self._path / f'{k}.{suffix}', 'w') as f:
|
|
71
81
|
f.write(v)
|
|
72
82
|
|
|
73
83
|
# Build script
|
|
74
|
-
with open(self.__src_root / '
|
|
84
|
+
with open(self.__src_root / 'common_source/build_prj.tcl') as f:
|
|
75
85
|
tcl = f.read()
|
|
76
86
|
tcl = tcl.replace('${DEVICE}', self._part_name)
|
|
77
87
|
tcl = tcl.replace('${PROJECT_NAME}', self._prj_name)
|
|
@@ -79,7 +89,7 @@ class VerilogModel:
|
|
|
79
89
|
f.write(tcl)
|
|
80
90
|
|
|
81
91
|
# XDC
|
|
82
|
-
with open(self.__src_root / '
|
|
92
|
+
with open(self.__src_root / 'common_source/template.xdc') as f:
|
|
83
93
|
xdc = f.read()
|
|
84
94
|
xdc = xdc.replace('${CLOCK_PERIOD}', str(self._clock_period))
|
|
85
95
|
xdc = xdc.replace('${UNCERTAINITY_SETUP}', str(self._clock_uncertainty))
|
|
@@ -89,7 +99,7 @@ class VerilogModel:
|
|
|
89
99
|
with open(self._path / f'{self._prj_name}.xdc', 'w') as f:
|
|
90
100
|
f.write(xdc)
|
|
91
101
|
|
|
92
|
-
# C++ binder w/
|
|
102
|
+
# C++ binder w/ HDL wrapper for uniform bw
|
|
93
103
|
binder = binder_gen(self._pipe, f'{self._prj_name}_wrapper', 1, self._register_layers)
|
|
94
104
|
|
|
95
105
|
# Verilog IO wrapper (non-uniform bw to uniform one, clk passthrough)
|
|
@@ -101,24 +111,25 @@ class VerilogModel:
|
|
|
101
111
|
|
|
102
112
|
# Main logic
|
|
103
113
|
code = comb_logic_gen(self._solution, self._prj_name, self._print_latency, '`timescale 1ns/1ps')
|
|
104
|
-
with open(self._path / f'{self._prj_name}.
|
|
114
|
+
with open(self._path / f'{self._prj_name}.{suffix}', 'w') as f:
|
|
105
115
|
f.write(code)
|
|
106
116
|
|
|
107
117
|
# Verilog IO wrapper (non-uniform bw to uniform one, no clk)
|
|
108
118
|
io_wrapper = generate_io_wrapper(self._solution, self._prj_name, False)
|
|
109
119
|
binder = binder_gen(self._solution, f'{self._prj_name}_wrapper')
|
|
110
120
|
|
|
111
|
-
with open(self._path / f'{self._prj_name}_wrapper.
|
|
121
|
+
with open(self._path / f'{self._prj_name}_wrapper.{suffix}', 'w') as f:
|
|
112
122
|
f.write(io_wrapper)
|
|
113
123
|
with open(self._path / f'{self._prj_name}_wrapper_binder.cc', 'w') as f:
|
|
114
124
|
f.write(binder)
|
|
115
125
|
|
|
116
126
|
# Common resource copy
|
|
117
|
-
for fname in self.__src_root.glob('
|
|
127
|
+
for fname in self.__src_root.glob(f'{flavor}/source/*.{suffix}'):
|
|
118
128
|
shutil.copy(fname, self._path)
|
|
119
|
-
|
|
120
|
-
shutil.copy(self.__src_root / '
|
|
121
|
-
shutil.copy(self.__src_root / '
|
|
129
|
+
|
|
130
|
+
shutil.copy(self.__src_root / 'common_source/build_binder.mk', self._path)
|
|
131
|
+
shutil.copy(self.__src_root / 'common_source/ioutil.hh', self._path)
|
|
132
|
+
shutil.copy(self.__src_root / 'common_source/binder_util.hh', self._path)
|
|
122
133
|
self._solution.save(self._path / 'model.json')
|
|
123
134
|
with open(self._path / 'misc.json', 'w') as f:
|
|
124
135
|
f.write(f'{{"cost": {self._solution.cost}}}')
|
|
@@ -152,6 +163,7 @@ class VerilogModel:
|
|
|
152
163
|
env['VM_PREFIX'] = f'{self._prj_name}_wrapper'
|
|
153
164
|
env['STAMP'] = self._uuid
|
|
154
165
|
env['EXTRA_CXXFLAGS'] = '-fopenmp' if openmp else ''
|
|
166
|
+
env['VERILATOR_FLAGS'] = '-Wall' if self._flavor == 'verilog' else ''
|
|
155
167
|
if nproc is not None:
|
|
156
168
|
env['N_JOBS'] = str(nproc)
|
|
157
169
|
if o3:
|
|
@@ -219,7 +231,7 @@ class VerilogModel:
|
|
|
219
231
|
self.write()
|
|
220
232
|
self._compile(verbose=verbose, openmp=openmp, nproc=nproc, o3=o3, clean=clean)
|
|
221
233
|
|
|
222
|
-
def predict(self, data: NDArray[np.floating]):
|
|
234
|
+
def predict(self, data: NDArray[np.floating]) -> NDArray[np.float32]:
|
|
223
235
|
"""Run the model on the input data.
|
|
224
236
|
|
|
225
237
|
Parameters
|
|
@@ -233,6 +245,7 @@ class VerilogModel:
|
|
|
233
245
|
NDArray[np.float64]
|
|
234
246
|
Output of the model in shape (n_samples, output_size).
|
|
235
247
|
"""
|
|
248
|
+
|
|
236
249
|
assert self._lib is not None, 'Library not loaded, call .compile() first.'
|
|
237
250
|
inp_size, out_size = self._solution.shape
|
|
238
251
|
|
|
@@ -258,7 +271,7 @@ class VerilogModel:
|
|
|
258
271
|
# Unscale the output int32 to recover fp values
|
|
259
272
|
k, i, f = np.max(k_out), np.max(i_out), np.max(f_out)
|
|
260
273
|
a, b, c = 2.0 ** (k + i + f), k * 2.0 ** (i + f), 2.0**-f
|
|
261
|
-
return ((out_data.reshape(n_sample, out_size) + b) % a - b) * c
|
|
274
|
+
return ((out_data.reshape(n_sample, out_size) + b) % a - b) * c.astype(np.float32)
|
|
262
275
|
|
|
263
276
|
def __repr__(self):
|
|
264
277
|
inp_size, out_size = self._solution.shape
|
|
@@ -289,3 +302,61 @@ Estimated cost: {cost} LUTs"""
|
|
|
289
302
|
else:
|
|
290
303
|
spec += '\nEmulator is **not compiled**'
|
|
291
304
|
return spec
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
class VerilogModel(RTLModel):
|
|
308
|
+
def __init__(
|
|
309
|
+
self,
|
|
310
|
+
solution: Solution | CascadedSolution,
|
|
311
|
+
prj_name: str,
|
|
312
|
+
path: str | Path,
|
|
313
|
+
latency_cutoff: float = -1,
|
|
314
|
+
print_latency: bool = True,
|
|
315
|
+
part_name: str = 'xcvu13p-flga2577-2-e',
|
|
316
|
+
clock_period: float = 5,
|
|
317
|
+
clock_uncertainty: float = 0.1,
|
|
318
|
+
io_delay_minmax: tuple[float, float] = (0.2, 0.4),
|
|
319
|
+
register_layers: int = 1,
|
|
320
|
+
):
|
|
321
|
+
self._hdl_model = super().__init__(
|
|
322
|
+
solution,
|
|
323
|
+
prj_name,
|
|
324
|
+
path,
|
|
325
|
+
'verilog',
|
|
326
|
+
latency_cutoff,
|
|
327
|
+
print_latency,
|
|
328
|
+
part_name,
|
|
329
|
+
clock_period,
|
|
330
|
+
clock_uncertainty,
|
|
331
|
+
io_delay_minmax,
|
|
332
|
+
register_layers,
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
class VHDLModel(RTLModel):
|
|
337
|
+
def __init__(
|
|
338
|
+
self,
|
|
339
|
+
solution: Solution | CascadedSolution,
|
|
340
|
+
prj_name: str,
|
|
341
|
+
path: str | Path,
|
|
342
|
+
latency_cutoff: float = -1,
|
|
343
|
+
print_latency: bool = True,
|
|
344
|
+
part_name: str = 'xcvu13p-flga2577-2-e',
|
|
345
|
+
clock_period: float = 5,
|
|
346
|
+
clock_uncertainty: float = 0.1,
|
|
347
|
+
io_delay_minmax: tuple[float, float] = (0.2, 0.4),
|
|
348
|
+
register_layers: int = 1,
|
|
349
|
+
):
|
|
350
|
+
self._hdl_model = super().__init__(
|
|
351
|
+
solution,
|
|
352
|
+
prj_name,
|
|
353
|
+
path,
|
|
354
|
+
'vhdl',
|
|
355
|
+
latency_cutoff,
|
|
356
|
+
print_latency,
|
|
357
|
+
part_name,
|
|
358
|
+
clock_period,
|
|
359
|
+
clock_uncertainty,
|
|
360
|
+
io_delay_minmax,
|
|
361
|
+
register_layers,
|
|
362
|
+
)
|
|
@@ -1,12 +1,10 @@
|
|
|
1
1
|
from .comb import comb_logic_gen
|
|
2
2
|
from .io_wrapper import binder_gen, generate_io_wrapper
|
|
3
3
|
from .pipeline import pipeline_logic_gen
|
|
4
|
-
from .verilog_model import VerilogModel
|
|
5
4
|
|
|
6
5
|
__all__ = [
|
|
7
6
|
'comb_logic_gen',
|
|
8
7
|
'generate_io_wrapper',
|
|
9
8
|
'pipeline_logic_gen',
|
|
10
9
|
'binder_gen',
|
|
11
|
-
'VerilogModel',
|
|
12
10
|
]
|