da4ml 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of da4ml might be problematic. Click here for more details.
- da4ml/_version.py +2 -2
- da4ml/codegen/__init__.py +4 -7
- da4ml/codegen/hls/__init__.py +4 -0
- da4ml/codegen/{cpp/cpp_codegen.py → hls/hls_codegen.py} +19 -12
- da4ml/codegen/{cpp → hls}/hls_model.py +7 -7
- da4ml/codegen/rtl/__init__.py +15 -0
- da4ml/codegen/{verilog/source → rtl/common_source}/binder_util.hh +4 -4
- da4ml/codegen/{verilog/source → rtl/common_source}/build_binder.mk +7 -1
- da4ml/codegen/{verilog/source → rtl/common_source}/build_prj.tcl +28 -7
- da4ml/codegen/{verilog/verilog_model.py → rtl/rtl_model.py} +87 -16
- da4ml/codegen/{verilog → rtl/verilog}/__init__.py +0 -2
- da4ml/codegen/{verilog → rtl/verilog}/comb.py +32 -34
- da4ml/codegen/{verilog → rtl/verilog}/io_wrapper.py +8 -8
- da4ml/codegen/{verilog → rtl/verilog}/pipeline.py +10 -10
- da4ml/codegen/{verilog → rtl/verilog}/source/negative.v +2 -1
- da4ml/codegen/rtl/vhdl/__init__.py +10 -0
- da4ml/codegen/rtl/vhdl/comb.py +192 -0
- da4ml/codegen/rtl/vhdl/io_wrapper.py +157 -0
- da4ml/codegen/rtl/vhdl/pipeline.py +71 -0
- da4ml/codegen/rtl/vhdl/source/multiplier.vhd +40 -0
- da4ml/codegen/rtl/vhdl/source/mux.vhd +102 -0
- da4ml/codegen/rtl/vhdl/source/negative.vhd +35 -0
- da4ml/codegen/rtl/vhdl/source/shift_adder.vhd +101 -0
- da4ml/codegen/rtl/vhdl/source/template.xdc +32 -0
- {da4ml-0.3.3.dist-info → da4ml-0.4.0.dist-info}/METADATA +2 -2
- da4ml-0.4.0.dist-info/RECORD +76 -0
- da4ml/codegen/cpp/__init__.py +0 -4
- da4ml-0.3.3.dist-info/RECORD +0 -66
- /da4ml/codegen/{cpp → hls}/source/ap_types/ap_binary.h +0 -0
- /da4ml/codegen/{cpp → hls}/source/ap_types/ap_common.h +0 -0
- /da4ml/codegen/{cpp → hls}/source/ap_types/ap_decl.h +0 -0
- /da4ml/codegen/{cpp → hls}/source/ap_types/ap_fixed.h +0 -0
- /da4ml/codegen/{cpp → hls}/source/ap_types/ap_fixed_base.h +0 -0
- /da4ml/codegen/{cpp → hls}/source/ap_types/ap_fixed_ref.h +0 -0
- /da4ml/codegen/{cpp → hls}/source/ap_types/ap_fixed_special.h +0 -0
- /da4ml/codegen/{cpp → hls}/source/ap_types/ap_int.h +0 -0
- /da4ml/codegen/{cpp → hls}/source/ap_types/ap_int_base.h +0 -0
- /da4ml/codegen/{cpp → hls}/source/ap_types/ap_int_ref.h +0 -0
- /da4ml/codegen/{cpp → hls}/source/ap_types/ap_int_special.h +0 -0
- /da4ml/codegen/{cpp → hls}/source/ap_types/ap_shift_reg.h +0 -0
- /da4ml/codegen/{cpp → hls}/source/ap_types/etc/ap_private.h +0 -0
- /da4ml/codegen/{cpp → hls}/source/ap_types/hls_math.h +0 -0
- /da4ml/codegen/{cpp → hls}/source/ap_types/hls_stream.h +0 -0
- /da4ml/codegen/{cpp → hls}/source/ap_types/utils/x_hls_utils.h +0 -0
- /da4ml/codegen/{cpp → hls}/source/binder_util.hh +0 -0
- /da4ml/codegen/{cpp → hls}/source/build_binder.mk +0 -0
- /da4ml/codegen/{cpp → hls}/source/vitis_bitshift.hh +0 -0
- /da4ml/codegen/{verilog/source → rtl/common_source}/ioutil.hh +0 -0
- /da4ml/codegen/{verilog/source → rtl/common_source}/template.xdc +0 -0
- /da4ml/codegen/{verilog → rtl/verilog}/source/multiplier.v +0 -0
- /da4ml/codegen/{verilog → rtl/verilog}/source/mux.v +0 -0
- /da4ml/codegen/{verilog → rtl/verilog}/source/shift_adder.v +0 -0
- {da4ml-0.3.3.dist-info → da4ml-0.4.0.dist-info}/WHEEL +0 -0
- {da4ml-0.3.3.dist-info → da4ml-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {da4ml-0.3.3.dist-info → da4ml-0.4.0.dist-info}/top_level.txt +0 -0
da4ml/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.
|
|
32
|
-
__version_tuple__ = version_tuple = (0,
|
|
31
|
+
__version__ = version = '0.4.0'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 4, 0)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
da4ml/codegen/__init__.py
CHANGED
|
@@ -1,12 +1,9 @@
|
|
|
1
|
-
from .
|
|
2
|
-
from .
|
|
1
|
+
from .hls import HLSModel
|
|
2
|
+
from .rtl import RTLModel, VerilogModel, VHDLModel
|
|
3
3
|
|
|
4
4
|
__all__ = [
|
|
5
|
-
'cpp_logic_and_bridge_gen',
|
|
6
|
-
'comb_logic_gen',
|
|
7
|
-
'generate_io_wrapper',
|
|
8
|
-
'pipeline_logic_gen',
|
|
9
|
-
'binder_gen',
|
|
10
5
|
'HLSModel',
|
|
11
6
|
'VerilogModel',
|
|
7
|
+
'VHDLModel',
|
|
8
|
+
'RTLModel',
|
|
12
9
|
]
|
|
@@ -16,12 +16,19 @@ def kif_to_hlslib_type(k: bool | int = 1, i: int = 0, f: int = 0):
|
|
|
16
16
|
return f'ac_fixed<{int(k)},{k + i + f},{k + i}>'
|
|
17
17
|
|
|
18
18
|
|
|
19
|
+
def kif_to_oneapi_type(k: bool | int = 1, i: int = 0, f: int = 0):
|
|
20
|
+
# OneAPI requires at least 2 bits for all ac_fixed as of 2025.1
|
|
21
|
+
return f'ac_fixed<{int(k)},{max(k + i + f, 2)},{k + i}>'
|
|
22
|
+
|
|
23
|
+
|
|
19
24
|
def get_typestr_fn(flavor: str):
|
|
20
25
|
match flavor.lower():
|
|
21
26
|
case 'vitis':
|
|
22
27
|
typestr_fn = kif_to_vitis_type
|
|
23
28
|
case 'hlslib':
|
|
24
29
|
typestr_fn = kif_to_hlslib_type
|
|
30
|
+
case 'oneapi':
|
|
31
|
+
typestr_fn = kif_to_oneapi_type
|
|
25
32
|
case _:
|
|
26
33
|
raise ValueError(f'Unsupported flavor: {flavor}')
|
|
27
34
|
return typestr_fn
|
|
@@ -46,18 +53,18 @@ def ssa_gen(sol: Solution, print_latency: bool, typestr_fn: Callable[[bool | int
|
|
|
46
53
|
match op.opcode:
|
|
47
54
|
case -1:
|
|
48
55
|
# Input marker
|
|
49
|
-
val = f'
|
|
56
|
+
val = f'model_inp[{op.id0}]'
|
|
50
57
|
case 0 | 1:
|
|
51
58
|
# Common a+/-b<<shift op
|
|
52
59
|
ref1 = f'bit_shift<{op.data}>(v{op.id1})' if op.data != 0 else f'v{op.id1}'
|
|
53
60
|
val = f'{ref0} {"-" if op.opcode == 1 else "+"} {ref1}'
|
|
54
61
|
case 2 | -2:
|
|
55
|
-
if op.opcode == 2: # relu(
|
|
62
|
+
if op.opcode == 2: # relu(model_inp)
|
|
56
63
|
if ops[op.id0].qint.min < 0:
|
|
57
64
|
val = f'{ref0} > 0 ? {_type}({ref0}) : {_type}(0)'
|
|
58
65
|
else:
|
|
59
66
|
val = ref0
|
|
60
|
-
else: # relu(-
|
|
67
|
+
else: # relu(-model_inp)
|
|
61
68
|
if ops[op.id0].qint.max > 0:
|
|
62
69
|
val = f'{ref0} > 0 ? {_type}(0) : {_type}(-{ref0})'
|
|
63
70
|
else:
|
|
@@ -105,15 +112,15 @@ def output_gen(sol: Solution, typestr_fn: Callable[[bool | int, int, int], str])
|
|
|
105
112
|
lines = []
|
|
106
113
|
for i, idx in enumerate(sol.out_idxs):
|
|
107
114
|
if idx < 0:
|
|
108
|
-
lines.append(f'
|
|
115
|
+
lines.append(f'model_out[{i}] = 0;')
|
|
109
116
|
continue
|
|
110
117
|
_type = typestr_fn(*_minimal_kif(sol.out_qint[i]))
|
|
111
118
|
shift = sol.out_shifts[i]
|
|
112
119
|
neg_str = '-' if sol.out_negs[i] else ''
|
|
113
120
|
if shift == 0:
|
|
114
|
-
lines.append(f'
|
|
121
|
+
lines.append(f'model_out[{i}] = {_type}({neg_str}v{idx});')
|
|
115
122
|
else:
|
|
116
|
-
lines.append(f'
|
|
123
|
+
lines.append(f'model_out[{i}] = {_type}({neg_str}bit_shift<{shift}>(v{idx}));')
|
|
117
124
|
return lines
|
|
118
125
|
|
|
119
126
|
|
|
@@ -126,7 +133,7 @@ def get_io_types(sol: Solution, flavor: str):
|
|
|
126
133
|
return inp_type, out_type
|
|
127
134
|
|
|
128
135
|
|
|
129
|
-
def
|
|
136
|
+
def hls_logic_and_bridge_gen(
|
|
130
137
|
sol: Solution,
|
|
131
138
|
fn_name: str,
|
|
132
139
|
flavor: str,
|
|
@@ -140,7 +147,7 @@ def cpp_logic_and_bridge_gen(
|
|
|
140
147
|
|
|
141
148
|
n_in, n_out = sol.shape
|
|
142
149
|
template_def = 'template <typename inp_t, typename out_t>'
|
|
143
|
-
fn_signature = f'void {fn_name}(inp_t
|
|
150
|
+
fn_signature = f'void {fn_name}(inp_t model_inp[{n_in}], out_t model_out[{n_out}])'
|
|
144
151
|
pragmas = pragmas or []
|
|
145
152
|
|
|
146
153
|
ssa_lines = ssa_gen(sol, print_latency=print_latency, typestr_fn=typestr_fn)
|
|
@@ -173,12 +180,12 @@ bool openmp_enabled() {{
|
|
|
173
180
|
return _openmp;
|
|
174
181
|
}}
|
|
175
182
|
|
|
176
|
-
void inference_f64(double *
|
|
177
|
-
batch_inference<{fn_name}_config, double>(
|
|
183
|
+
void inference_f64(double *model_inp, double *model_out, size_t size) {{
|
|
184
|
+
batch_inference<{fn_name}_config, double>(model_inp, model_out, size);
|
|
178
185
|
}}
|
|
179
186
|
|
|
180
|
-
void inference_f32(float *
|
|
181
|
-
batch_inference<{fn_name}_config, float>(
|
|
187
|
+
void inference_f32(float *model_inp, float *model_out, size_t size) {{
|
|
188
|
+
batch_inference<{fn_name}_config, float>(model_inp, model_out, size);
|
|
182
189
|
}}
|
|
183
190
|
}}"""
|
|
184
191
|
return code, bridge
|
|
@@ -13,7 +13,7 @@ import numpy as np
|
|
|
13
13
|
from numpy.typing import NDArray
|
|
14
14
|
|
|
15
15
|
from da4ml.cmvm.types import Solution
|
|
16
|
-
from da4ml.codegen.
|
|
16
|
+
from da4ml.codegen.hls.hls_codegen import get_io_types, hls_logic_and_bridge_gen
|
|
17
17
|
|
|
18
18
|
from ... import codegen
|
|
19
19
|
from ...cmvm.types import _minimal_kif
|
|
@@ -39,7 +39,7 @@ class HLSModel:
|
|
|
39
39
|
self._prj_name = prj_name
|
|
40
40
|
self._path = Path(path)
|
|
41
41
|
self._flavor = flavor.lower()
|
|
42
|
-
assert self._flavor in ('vitis', 'hlslib'), f'Unsupported HLS flavor: {self._flavor}'
|
|
42
|
+
assert self._flavor in ('vitis', 'hlslib', 'oneapi'), f'Unsupported HLS flavor: {self._flavor}'
|
|
43
43
|
self._print_latency = print_latency
|
|
44
44
|
self._part_name = part_name
|
|
45
45
|
self._clock_period = clock_period
|
|
@@ -64,7 +64,7 @@ class HLSModel:
|
|
|
64
64
|
def write(self):
|
|
65
65
|
if not self._path.exists():
|
|
66
66
|
self._path.mkdir(parents=True, exist_ok=True)
|
|
67
|
-
template_def, bridge =
|
|
67
|
+
template_def, bridge = hls_logic_and_bridge_gen(
|
|
68
68
|
self._solution,
|
|
69
69
|
self._prj_name,
|
|
70
70
|
self._flavor,
|
|
@@ -104,11 +104,11 @@ class HLSModel:
|
|
|
104
104
|
with open(self._path / f'{self._prj_name}_bridge.cc', 'w') as f:
|
|
105
105
|
f.write(bridge)
|
|
106
106
|
|
|
107
|
-
shutil.copy(self.__src_root / '
|
|
108
|
-
shutil.copy(self.__src_root / f'
|
|
109
|
-
shutil.copy(self.__src_root / '
|
|
107
|
+
shutil.copy(self.__src_root / 'hls/source/binder_util.hh', self._path)
|
|
108
|
+
shutil.copy(self.__src_root / f'hls/source/{self._flavor}_bitshift.hh', self._path / 'bitshift.hh')
|
|
109
|
+
shutil.copy(self.__src_root / 'hls/source/build_binder.mk', self._path)
|
|
110
110
|
if self._flavor == 'vitis':
|
|
111
|
-
shutil.copytree(self.__src_root / '
|
|
111
|
+
shutil.copytree(self.__src_root / 'hls/source/ap_types', self._path / 'ap_types', dirs_exist_ok=True)
|
|
112
112
|
else:
|
|
113
113
|
pass
|
|
114
114
|
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from .rtl_model import RTLModel, VerilogModel, VHDLModel
|
|
2
|
+
from .verilog import comb_logic_gen as verilog_comb_logic_gen
|
|
3
|
+
from .verilog import generate_io_wrapper as verilog_generate_io_wrapper
|
|
4
|
+
from .vhdl import comb_logic_gen as vhdl_comb_logic_gen
|
|
5
|
+
from .vhdl import generate_io_wrapper as vhdl_generate_io_wrapper
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
'RTLModel',
|
|
9
|
+
'VerilogModel',
|
|
10
|
+
'VHDLModel',
|
|
11
|
+
'verilog_comb_logic_gen',
|
|
12
|
+
'verilog_generate_io_wrapper',
|
|
13
|
+
'vhdl_comb_logic_gen',
|
|
14
|
+
'vhdl_generate_io_wrapper',
|
|
15
|
+
]
|
|
@@ -19,7 +19,7 @@ std::enable_if_t<CONFIG_T::II != 0> _inference(int32_t *c_inp, int32_t *c_out, s
|
|
|
19
19
|
|
|
20
20
|
if (t_inp < n_samples * CONFIG_T::II && t_inp % CONFIG_T::II == 0) {
|
|
21
21
|
write_input<CONFIG_T::N_inp, CONFIG_T::max_inp_bw>(
|
|
22
|
-
dut->
|
|
22
|
+
dut->model_inp, &c_inp[t_inp / CONFIG_T::II * CONFIG_T::N_inp]
|
|
23
23
|
);
|
|
24
24
|
}
|
|
25
25
|
|
|
@@ -28,7 +28,7 @@ std::enable_if_t<CONFIG_T::II != 0> _inference(int32_t *c_inp, int32_t *c_out, s
|
|
|
28
28
|
|
|
29
29
|
if (t_inp > CONFIG_T::latency && t_out % CONFIG_T::II == 0) {
|
|
30
30
|
read_output<CONFIG_T::N_out, CONFIG_T::max_out_bw>(
|
|
31
|
-
dut->
|
|
31
|
+
dut->model_out, &c_out[t_out / CONFIG_T::II * CONFIG_T::N_out]
|
|
32
32
|
);
|
|
33
33
|
}
|
|
34
34
|
|
|
@@ -44,9 +44,9 @@ std::enable_if_t<CONFIG_T::II == 0> _inference(int32_t *c_inp, int32_t *c_out, s
|
|
|
44
44
|
auto dut = std::make_unique<typename CONFIG_T::dut_t>();
|
|
45
45
|
|
|
46
46
|
for (size_t i = 0; i < n_samples; ++i) {
|
|
47
|
-
write_input<CONFIG_T::N_inp, CONFIG_T::max_inp_bw>(dut->
|
|
47
|
+
write_input<CONFIG_T::N_inp, CONFIG_T::max_inp_bw>(dut->model_inp, &c_inp[i * CONFIG_T::N_inp]);
|
|
48
48
|
dut->eval();
|
|
49
|
-
read_output<CONFIG_T::N_out, CONFIG_T::max_out_bw>(dut->
|
|
49
|
+
read_output<CONFIG_T::N_out, CONFIG_T::max_out_bw>(dut->model_out, &c_out[i * CONFIG_T::N_out]);
|
|
50
50
|
}
|
|
51
51
|
|
|
52
52
|
dut->final();
|
|
@@ -7,10 +7,16 @@ CFLAGS = -std=c++17 -fPIC
|
|
|
7
7
|
LINKFLAGS = $(INCLUDES) $(WARNINGS)
|
|
8
8
|
LIBNAME = lib$(VM_PREFIX)_$(STAMP).so
|
|
9
9
|
N_JOBS ?= $(shell nproc)
|
|
10
|
+
VERILATOR_FLAGS ?=
|
|
10
11
|
|
|
12
|
+
$(VM_PREFIX).v: $(wildcard $(VM_PREFIX).vhd)
|
|
13
|
+
# vhdl specific - convert to verilog first for verilating
|
|
14
|
+
mkdir -p obj_dir
|
|
15
|
+
ghdl -a --std=08 --workdir=obj_dir multiplier.vhd mux.vhd negative.vhd shift_adder.vhd $(wildcard $(VM_PREFIX:_wrapper=)_stage*.vhd) $(wildcard $(VM_PREFIX:_wrapper=).vhd) $(VM_PREFIX).vhd
|
|
16
|
+
ghdl synth --std=08 --workdir=obj_dir --out=verilog $(VM_PREFIX) > $(VM_PREFIX).v
|
|
11
17
|
|
|
12
18
|
./obj_dir/libV$(VM_PREFIX).a ./obj_dir/libverilated.a ./obj_dir/V$(VM_PREFIX)__ALL.a: $(VM_PREFIX).v
|
|
13
|
-
verilator --cc -j $(N_JOBS) -
|
|
19
|
+
verilator --cc -j $(N_JOBS) -build $(VM_PREFIX).v --prefix V$(VM_PREFIX) $(VERILATOR_FLAGS) -CFLAGS "$(CFLAGS)"
|
|
14
20
|
|
|
15
21
|
$(LIBNAME): ./obj_dir/libV$(VM_PREFIX).a ./obj_dir/libverilated.a ./obj_dir/V$(VM_PREFIX)__ALL.a $(VM_PREFIX)_binder.cc
|
|
16
22
|
$(CXX) $(CFLAGS) $(LINKFLAGS) $(CXXFLAGS2) -pthread -shared -o $(LIBNAME) $(VM_PREFIX)_binder.cc ./obj_dir/libV$(VM_PREFIX).a ./obj_dir/libverilated.a ./obj_dir/V$(VM_PREFIX)__ALL.a $(EXTRA_CXXFLAGS)
|
|
@@ -1,20 +1,41 @@
|
|
|
1
1
|
set project_name "${PROJECT_NAME}"
|
|
2
2
|
set device "${DEVICE}"
|
|
3
|
+
set source_type "${SOURCE_TYPE}"
|
|
3
4
|
|
|
4
5
|
set top_module "${project_name}"
|
|
5
6
|
set output_dir "./output_${project_name}"
|
|
6
7
|
|
|
7
8
|
create_project $project_name "${output_dir}/$project_name" -force -part $device
|
|
8
9
|
|
|
9
|
-
set_property TARGET_LANGUAGE Verilog [current_project]
|
|
10
10
|
set_property DEFAULT_LIB work [current_project]
|
|
11
11
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
12
|
+
if { $source_type != "vhdl" && $source_type != "verilog" } {
|
|
13
|
+
puts "Error: SOURCE_TYPE must be either 'vhdl' or 'verilog'."
|
|
14
|
+
exit 1
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
if { $source_type == "vhdl" } {
|
|
18
|
+
set_property TARGET_LANGUAGE VHDL [current_project]
|
|
19
|
+
|
|
20
|
+
read_vhdl -vhdl2008 "${project_name}.vhd"
|
|
21
|
+
read_vhdl -vhdl2008 "shift_adder.vhd"
|
|
22
|
+
read_vhdl -vhdl2008 "negative.vhd"
|
|
23
|
+
read_vhdl -vhdl2008 "mux.vhd"
|
|
24
|
+
read_vhdl -vhdl2008 "multiplier.vhd"
|
|
25
|
+
foreach file [glob -nocomplain "${project_name}_stage*.vhd"] {
|
|
26
|
+
read_vhdl -vhdl2008 $file
|
|
27
|
+
}
|
|
28
|
+
} else {
|
|
29
|
+
set_property TARGET_LANGUAGE Verilog [current_project]
|
|
30
|
+
|
|
31
|
+
read_verilog "${project_name}.v"
|
|
32
|
+
read_verilog "shift_adder.v"
|
|
33
|
+
read_verilog "negative.v"
|
|
34
|
+
read_verilog "mux.v"
|
|
35
|
+
read_verilog "multiplier.v"
|
|
36
|
+
foreach file [glob -nocomplain "${project_name}_stage*.v"] {
|
|
37
|
+
read_verilog $file
|
|
38
|
+
}
|
|
18
39
|
}
|
|
19
40
|
|
|
20
41
|
read_xdc "${project_name}.xdc" -mode out_of_context
|
|
@@ -10,10 +10,9 @@ from uuid import uuid4
|
|
|
10
10
|
import numpy as np
|
|
11
11
|
from numpy.typing import NDArray
|
|
12
12
|
|
|
13
|
-
from ... import codegen
|
|
14
13
|
from ...cmvm.types import CascadedSolution, Solution, _minimal_kif
|
|
15
14
|
from ...trace.pipeline import to_pipeline
|
|
16
|
-
from
|
|
15
|
+
from .. import rtl
|
|
17
16
|
|
|
18
17
|
|
|
19
18
|
def get_io_kifs(sol: Solution | CascadedSolution):
|
|
@@ -22,12 +21,13 @@ def get_io_kifs(sol: Solution | CascadedSolution):
|
|
|
22
21
|
return np.array(inp_kifs, np.int8), np.array(out_kifs, np.int8)
|
|
23
22
|
|
|
24
23
|
|
|
25
|
-
class
|
|
24
|
+
class RTLModel:
|
|
26
25
|
def __init__(
|
|
27
26
|
self,
|
|
28
27
|
solution: Solution | CascadedSolution,
|
|
29
28
|
prj_name: str,
|
|
30
29
|
path: str | Path,
|
|
30
|
+
flavor: str = 'verilog',
|
|
31
31
|
latency_cutoff: float = -1,
|
|
32
32
|
print_latency: bool = True,
|
|
33
33
|
part_name: str = 'xcvu13p-flga2577-2-e',
|
|
@@ -36,18 +36,21 @@ class VerilogModel:
|
|
|
36
36
|
io_delay_minmax: tuple[float, float] = (0.2, 0.4),
|
|
37
37
|
register_layers: int = 1,
|
|
38
38
|
):
|
|
39
|
+
self._flavor = flavor.lower()
|
|
39
40
|
self._solution = solution
|
|
40
41
|
self._path = Path(path)
|
|
41
42
|
self._prj_name = prj_name
|
|
42
43
|
self._latency_cutoff = latency_cutoff
|
|
43
44
|
self._print_latency = print_latency
|
|
44
|
-
self.__src_root = Path(
|
|
45
|
+
self.__src_root = Path(rtl.__file__).parent
|
|
45
46
|
self._part_name = part_name
|
|
46
47
|
self._clock_period = clock_period
|
|
47
48
|
self._clock_uncertainty = clock_uncertainty
|
|
48
49
|
self._io_delay_minmax = io_delay_minmax
|
|
49
50
|
self._register_layers = register_layers
|
|
50
51
|
|
|
52
|
+
assert self._flavor in ('vhdl', 'verilog'), f'Unsupported flavor {flavor}, only vhdl and verilog are supported.'
|
|
53
|
+
|
|
51
54
|
self._pipe = solution if isinstance(solution, CascadedSolution) else None
|
|
52
55
|
if latency_cutoff > 0 and self._pipe is None:
|
|
53
56
|
assert isinstance(solution, Solution)
|
|
@@ -62,16 +65,23 @@ class VerilogModel:
|
|
|
62
65
|
self._uuid = None
|
|
63
66
|
|
|
64
67
|
def write(self):
|
|
68
|
+
flavor = self._flavor
|
|
69
|
+
suffix = 'v' if flavor == 'verilog' else 'vhd'
|
|
70
|
+
if flavor == 'vhdl':
|
|
71
|
+
from .vhdl import binder_gen, comb_logic_gen, generate_io_wrapper, pipeline_logic_gen
|
|
72
|
+
else: # verilog
|
|
73
|
+
from .verilog import binder_gen, comb_logic_gen, generate_io_wrapper, pipeline_logic_gen
|
|
74
|
+
|
|
65
75
|
self._path.mkdir(parents=True, exist_ok=True)
|
|
66
76
|
if self._pipe is not None: # Pipeline
|
|
67
77
|
# Main logic
|
|
68
78
|
codes = pipeline_logic_gen(self._pipe, self._prj_name, self._print_latency, register_layers=self._register_layers)
|
|
69
79
|
for k, v in codes.items():
|
|
70
|
-
with open(self._path / f'{k}.
|
|
80
|
+
with open(self._path / f'{k}.{suffix}', 'w') as f:
|
|
71
81
|
f.write(v)
|
|
72
82
|
|
|
73
83
|
# Build script
|
|
74
|
-
with open(self.__src_root / '
|
|
84
|
+
with open(self.__src_root / 'common_source/build_prj.tcl') as f:
|
|
75
85
|
tcl = f.read()
|
|
76
86
|
tcl = tcl.replace('${DEVICE}', self._part_name)
|
|
77
87
|
tcl = tcl.replace('${PROJECT_NAME}', self._prj_name)
|
|
@@ -79,7 +89,7 @@ class VerilogModel:
|
|
|
79
89
|
f.write(tcl)
|
|
80
90
|
|
|
81
91
|
# XDC
|
|
82
|
-
with open(self.__src_root / '
|
|
92
|
+
with open(self.__src_root / 'common_source/template.xdc') as f:
|
|
83
93
|
xdc = f.read()
|
|
84
94
|
xdc = xdc.replace('${CLOCK_PERIOD}', str(self._clock_period))
|
|
85
95
|
xdc = xdc.replace('${UNCERTAINITY_SETUP}', str(self._clock_uncertainty))
|
|
@@ -89,7 +99,7 @@ class VerilogModel:
|
|
|
89
99
|
with open(self._path / f'{self._prj_name}.xdc', 'w') as f:
|
|
90
100
|
f.write(xdc)
|
|
91
101
|
|
|
92
|
-
# C++ binder w/
|
|
102
|
+
# C++ binder w/ HDL wrapper for uniform bw
|
|
93
103
|
binder = binder_gen(self._pipe, f'{self._prj_name}_wrapper', 1, self._register_layers)
|
|
94
104
|
|
|
95
105
|
# Verilog IO wrapper (non-uniform bw to uniform one, clk passthrough)
|
|
@@ -101,24 +111,25 @@ class VerilogModel:
|
|
|
101
111
|
|
|
102
112
|
# Main logic
|
|
103
113
|
code = comb_logic_gen(self._solution, self._prj_name, self._print_latency, '`timescale 1ns/1ps')
|
|
104
|
-
with open(self._path / f'{self._prj_name}.
|
|
114
|
+
with open(self._path / f'{self._prj_name}.{suffix}', 'w') as f:
|
|
105
115
|
f.write(code)
|
|
106
116
|
|
|
107
117
|
# Verilog IO wrapper (non-uniform bw to uniform one, no clk)
|
|
108
118
|
io_wrapper = generate_io_wrapper(self._solution, self._prj_name, False)
|
|
109
119
|
binder = binder_gen(self._solution, f'{self._prj_name}_wrapper')
|
|
110
120
|
|
|
111
|
-
with open(self._path / f'{self._prj_name}_wrapper.
|
|
121
|
+
with open(self._path / f'{self._prj_name}_wrapper.{suffix}', 'w') as f:
|
|
112
122
|
f.write(io_wrapper)
|
|
113
123
|
with open(self._path / f'{self._prj_name}_wrapper_binder.cc', 'w') as f:
|
|
114
124
|
f.write(binder)
|
|
115
125
|
|
|
116
126
|
# Common resource copy
|
|
117
|
-
for fname in self.__src_root.glob('
|
|
127
|
+
for fname in self.__src_root.glob(f'{flavor}/source/*.{suffix}'):
|
|
118
128
|
shutil.copy(fname, self._path)
|
|
119
|
-
|
|
120
|
-
shutil.copy(self.__src_root / '
|
|
121
|
-
shutil.copy(self.__src_root / '
|
|
129
|
+
|
|
130
|
+
shutil.copy(self.__src_root / 'common_source/build_binder.mk', self._path)
|
|
131
|
+
shutil.copy(self.__src_root / 'common_source/ioutil.hh', self._path)
|
|
132
|
+
shutil.copy(self.__src_root / 'common_source/binder_util.hh', self._path)
|
|
122
133
|
self._solution.save(self._path / 'model.json')
|
|
123
134
|
with open(self._path / 'misc.json', 'w') as f:
|
|
124
135
|
f.write(f'{{"cost": {self._solution.cost}}}')
|
|
@@ -152,6 +163,7 @@ class VerilogModel:
|
|
|
152
163
|
env['VM_PREFIX'] = f'{self._prj_name}_wrapper'
|
|
153
164
|
env['STAMP'] = self._uuid
|
|
154
165
|
env['EXTRA_CXXFLAGS'] = '-fopenmp' if openmp else ''
|
|
166
|
+
env['VERILATOR_FLAGS'] = '-Wall' if self._flavor == 'verilog' else ''
|
|
155
167
|
if nproc is not None:
|
|
156
168
|
env['N_JOBS'] = str(nproc)
|
|
157
169
|
if o3:
|
|
@@ -219,7 +231,7 @@ class VerilogModel:
|
|
|
219
231
|
self.write()
|
|
220
232
|
self._compile(verbose=verbose, openmp=openmp, nproc=nproc, o3=o3, clean=clean)
|
|
221
233
|
|
|
222
|
-
def predict(self, data: NDArray[np.floating]):
|
|
234
|
+
def predict(self, data: NDArray[np.floating]) -> NDArray[np.float32]:
|
|
223
235
|
"""Run the model on the input data.
|
|
224
236
|
|
|
225
237
|
Parameters
|
|
@@ -233,6 +245,7 @@ class VerilogModel:
|
|
|
233
245
|
NDArray[np.float64]
|
|
234
246
|
Output of the model in shape (n_samples, output_size).
|
|
235
247
|
"""
|
|
248
|
+
|
|
236
249
|
assert self._lib is not None, 'Library not loaded, call .compile() first.'
|
|
237
250
|
inp_size, out_size = self._solution.shape
|
|
238
251
|
|
|
@@ -258,7 +271,7 @@ class VerilogModel:
|
|
|
258
271
|
# Unscale the output int32 to recover fp values
|
|
259
272
|
k, i, f = np.max(k_out), np.max(i_out), np.max(f_out)
|
|
260
273
|
a, b, c = 2.0 ** (k + i + f), k * 2.0 ** (i + f), 2.0**-f
|
|
261
|
-
return ((out_data.reshape(n_sample, out_size) + b) % a - b) * c
|
|
274
|
+
return ((out_data.reshape(n_sample, out_size) + b) % a - b) * c.astype(np.float32)
|
|
262
275
|
|
|
263
276
|
def __repr__(self):
|
|
264
277
|
inp_size, out_size = self._solution.shape
|
|
@@ -289,3 +302,61 @@ Estimated cost: {cost} LUTs"""
|
|
|
289
302
|
else:
|
|
290
303
|
spec += '\nEmulator is **not compiled**'
|
|
291
304
|
return spec
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
class VerilogModel(RTLModel):
|
|
308
|
+
def __init__(
|
|
309
|
+
self,
|
|
310
|
+
solution: Solution | CascadedSolution,
|
|
311
|
+
prj_name: str,
|
|
312
|
+
path: str | Path,
|
|
313
|
+
latency_cutoff: float = -1,
|
|
314
|
+
print_latency: bool = True,
|
|
315
|
+
part_name: str = 'xcvu13p-flga2577-2-e',
|
|
316
|
+
clock_period: float = 5,
|
|
317
|
+
clock_uncertainty: float = 0.1,
|
|
318
|
+
io_delay_minmax: tuple[float, float] = (0.2, 0.4),
|
|
319
|
+
register_layers: int = 1,
|
|
320
|
+
):
|
|
321
|
+
self._hdl_model = super().__init__(
|
|
322
|
+
solution,
|
|
323
|
+
prj_name,
|
|
324
|
+
path,
|
|
325
|
+
'verilog',
|
|
326
|
+
latency_cutoff,
|
|
327
|
+
print_latency,
|
|
328
|
+
part_name,
|
|
329
|
+
clock_period,
|
|
330
|
+
clock_uncertainty,
|
|
331
|
+
io_delay_minmax,
|
|
332
|
+
register_layers,
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
class VHDLModel(RTLModel):
|
|
337
|
+
def __init__(
|
|
338
|
+
self,
|
|
339
|
+
solution: Solution | CascadedSolution,
|
|
340
|
+
prj_name: str,
|
|
341
|
+
path: str | Path,
|
|
342
|
+
latency_cutoff: float = -1,
|
|
343
|
+
print_latency: bool = True,
|
|
344
|
+
part_name: str = 'xcvu13p-flga2577-2-e',
|
|
345
|
+
clock_period: float = 5,
|
|
346
|
+
clock_uncertainty: float = 0.1,
|
|
347
|
+
io_delay_minmax: tuple[float, float] = (0.2, 0.4),
|
|
348
|
+
register_layers: int = 1,
|
|
349
|
+
):
|
|
350
|
+
self._hdl_model = super().__init__(
|
|
351
|
+
solution,
|
|
352
|
+
prj_name,
|
|
353
|
+
path,
|
|
354
|
+
'vhdl',
|
|
355
|
+
latency_cutoff,
|
|
356
|
+
print_latency,
|
|
357
|
+
part_name,
|
|
358
|
+
clock_period,
|
|
359
|
+
clock_uncertainty,
|
|
360
|
+
io_delay_minmax,
|
|
361
|
+
register_layers,
|
|
362
|
+
)
|
|
@@ -1,12 +1,10 @@
|
|
|
1
1
|
from .comb import comb_logic_gen
|
|
2
2
|
from .io_wrapper import binder_gen, generate_io_wrapper
|
|
3
3
|
from .pipeline import pipeline_logic_gen
|
|
4
|
-
from .verilog_model import VerilogModel
|
|
5
4
|
|
|
6
5
|
__all__ = [
|
|
7
6
|
'comb_logic_gen',
|
|
8
7
|
'generate_io_wrapper',
|
|
9
8
|
'pipeline_logic_gen',
|
|
10
9
|
'binder_gen',
|
|
11
|
-
'VerilogModel',
|
|
12
10
|
]
|
|
@@ -2,7 +2,25 @@ from math import ceil, log2
|
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
|
|
5
|
-
from
|
|
5
|
+
from ....cmvm.types import Op, QInterval, Solution, _minimal_kif
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def make_neg(
|
|
9
|
+
lines: list[str],
|
|
10
|
+
op: Op,
|
|
11
|
+
ops: list[Op],
|
|
12
|
+
bw0: int,
|
|
13
|
+
v0_name: str,
|
|
14
|
+
):
|
|
15
|
+
_min, _max, step = ops[op.id0].qint
|
|
16
|
+
bw_neg = max(sum(_minimal_kif(QInterval(-_max, -_min, step))), bw0)
|
|
17
|
+
was_signed = int(_min < 0)
|
|
18
|
+
lines.append(
|
|
19
|
+
f'wire [{bw_neg - 1}:0] v{op.id0}_neg; negative #({bw0}, {bw_neg}, {was_signed}) op_neg_{op.id0} ({v0_name}, v{op.id0}_neg);'
|
|
20
|
+
)
|
|
21
|
+
bw0 = bw_neg
|
|
22
|
+
v0_name = f'v{op.id0}_neg'
|
|
23
|
+
return bw0, v0_name
|
|
6
24
|
|
|
7
25
|
|
|
8
26
|
def ssa_gen(sol: Solution, neg_defined: set[int], print_latency: bool = False):
|
|
@@ -30,7 +48,7 @@ def ssa_gen(sol: Solution, neg_defined: set[int], print_latency: bool = False):
|
|
|
30
48
|
match op.opcode:
|
|
31
49
|
case -1: # Input marker
|
|
32
50
|
i0, i1 = inp_idxs[op.id0]
|
|
33
|
-
line = f'{_def} assign {v} =
|
|
51
|
+
line = f'{_def} assign {v} = model_inp[{i0}:{i1}];'
|
|
34
52
|
|
|
35
53
|
case 0 | 1: # Common a+/-b<<shift oprs
|
|
36
54
|
p0, p1 = kifs[op.id0], kifs[op.id1] # precision -> keep_neg, integers (no sign), fractional
|
|
@@ -49,45 +67,25 @@ def ssa_gen(sol: Solution, neg_defined: set[int], print_latency: bool = False):
|
|
|
49
67
|
v0_name = f'v{op.id0}'
|
|
50
68
|
bw0 = widths[op.id0]
|
|
51
69
|
|
|
52
|
-
if op.opcode == -2:
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
if op.id0 not in neg_defined:
|
|
56
|
-
neg_defined.add(op.id0)
|
|
57
|
-
was_signed = int(kifs[op.id0][0])
|
|
58
|
-
lines.append(
|
|
59
|
-
f'wire [{bw_neg - 1}:0] v{op.id0}_neg; negative #({bw0}, {bw_neg}, {was_signed}) op_neg_{op.id0} ({v0_name}, v{op.id0}_neg);'
|
|
60
|
-
)
|
|
61
|
-
bw0 = bw_neg
|
|
62
|
-
v0_name = f'v{op.id0}_neg'
|
|
70
|
+
if op.opcode == -2 and op.id0 not in neg_defined:
|
|
71
|
+
neg_defined.add(op.id0)
|
|
72
|
+
bw0, v0_name = make_neg(lines, op, ops, bw0, v0_name)
|
|
63
73
|
if ops[op.id0].qint.min < 0:
|
|
64
74
|
line = f'{_def} assign {v} = {v0_name}[{i0}:{i1}] & {{{bw}{{~{v0_name}[{bw0 - 1}]}}}};'
|
|
65
75
|
else:
|
|
66
76
|
line = f'{_def} assign {v} = {v0_name}[{i0}:{i1}];'
|
|
77
|
+
|
|
67
78
|
case 3 | -3: # Explicit quantization
|
|
68
79
|
lsb_bias = kifs[op.id0][2] - kifs[i][2]
|
|
69
80
|
i0, i1 = bw + lsb_bias - 1, lsb_bias
|
|
70
81
|
v0_name = f'v{op.id0}'
|
|
71
82
|
bw0 = widths[op.id0]
|
|
72
83
|
|
|
73
|
-
if op.opcode == -3:
|
|
74
|
-
|
|
75
|
-
lines
|
|
76
|
-
bw_neg = max(sum(_minimal_kif(QInterval(-_max, -_min, step))), bw0)
|
|
77
|
-
if op.id0 not in neg_defined:
|
|
78
|
-
neg_defined.add(op.id0)
|
|
79
|
-
# lines.append('/* verilator lint_off WIDTHTRUNC */')
|
|
80
|
-
# lines.append(
|
|
81
|
-
# f'wire [{bw_neg - 1}:0] v{op.id0}_neg; assign v{op.id0}_neg[{bw_neg - 1}:0] = -{v0_name}[{bw0 - 1}:0];'
|
|
82
|
-
# )
|
|
83
|
-
# lines.append('/* verilator lint_on WIDTHTRUNC */')
|
|
84
|
-
was_signed = int(kifs[op.id0][0])
|
|
85
|
-
lines.append(
|
|
86
|
-
f'wire [{bw_neg - 1}:0] v{op.id0}_neg; negative #({bw0}, {bw_neg}, {was_signed}) op_neg_{op.id0} ({v0_name}, v{op.id0}_neg);'
|
|
87
|
-
)
|
|
88
|
-
v0_name = f'v{op.id0}_neg'
|
|
89
|
-
|
|
84
|
+
if op.opcode == -3 and op.id0 not in neg_defined:
|
|
85
|
+
neg_defined.add(op.id0)
|
|
86
|
+
bw0, v0_name = make_neg(lines, op, ops, bw0, v0_name)
|
|
90
87
|
line = f'{_def} assign {v} = {v0_name}[{i0}:{i1}];'
|
|
88
|
+
|
|
91
89
|
case 4: # constant addition
|
|
92
90
|
num = op.data
|
|
93
91
|
sign, mag = int(num < 0), abs(num)
|
|
@@ -152,10 +150,10 @@ def output_gen(sol: Solution, neg_defined: set[int]):
|
|
|
152
150
|
lines.append(
|
|
153
151
|
f'wire [{bw - 1}:0] v{idx}_neg; negative #({bw0}, {bw}, {was_signed}) op_neg_{idx} (v{idx}, v{idx}_neg);'
|
|
154
152
|
)
|
|
155
|
-
lines.append(f'assign
|
|
153
|
+
lines.append(f'assign model_out[{i0}:{i1}] = v{idx}_neg[{bw - 1}:0];')
|
|
156
154
|
|
|
157
155
|
else:
|
|
158
|
-
lines.append(f'assign
|
|
156
|
+
lines.append(f'assign model_out[{i0}:{i1}] = v{idx}[{bw - 1}:0];')
|
|
159
157
|
return lines
|
|
160
158
|
|
|
161
159
|
|
|
@@ -165,8 +163,8 @@ def comb_logic_gen(sol: Solution, fn_name: str, print_latency: bool = False, tim
|
|
|
165
163
|
|
|
166
164
|
fn_signature = [
|
|
167
165
|
f'module {fn_name} (',
|
|
168
|
-
f' input [{inp_bits - 1}:0]
|
|
169
|
-
f' output [{out_bits - 1}:0]
|
|
166
|
+
f' input [{inp_bits - 1}:0] model_inp,',
|
|
167
|
+
f' output [{out_bits - 1}:0] model_out',
|
|
170
168
|
');',
|
|
171
169
|
]
|
|
172
170
|
|