da4ml 0.3.3__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of da4ml might be problematic. Click here for more details.
- da4ml/_version.py +2 -2
- da4ml/codegen/__init__.py +4 -7
- da4ml/codegen/hls/__init__.py +4 -0
- da4ml/codegen/{cpp/cpp_codegen.py → hls/hls_codegen.py} +19 -12
- da4ml/codegen/{cpp → hls}/hls_model.py +7 -7
- da4ml/codegen/rtl/__init__.py +15 -0
- da4ml/codegen/{verilog/source → rtl/common_source}/binder_util.hh +4 -4
- da4ml/codegen/{verilog/source → rtl/common_source}/build_binder.mk +7 -1
- da4ml/codegen/{verilog/source → rtl/common_source}/build_prj.tcl +28 -7
- da4ml/codegen/{verilog/verilog_model.py → rtl/rtl_model.py} +90 -18
- da4ml/codegen/{verilog → rtl/verilog}/__init__.py +0 -2
- da4ml/codegen/{verilog → rtl/verilog}/comb.py +32 -34
- da4ml/codegen/{verilog → rtl/verilog}/io_wrapper.py +8 -8
- da4ml/codegen/{verilog → rtl/verilog}/pipeline.py +10 -10
- da4ml/codegen/{verilog → rtl/verilog}/source/negative.v +2 -1
- da4ml/codegen/rtl/vhdl/__init__.py +10 -0
- da4ml/codegen/rtl/vhdl/comb.py +192 -0
- da4ml/codegen/rtl/vhdl/io_wrapper.py +157 -0
- da4ml/codegen/rtl/vhdl/pipeline.py +71 -0
- da4ml/codegen/rtl/vhdl/source/multiplier.vhd +40 -0
- da4ml/codegen/rtl/vhdl/source/mux.vhd +102 -0
- da4ml/codegen/rtl/vhdl/source/negative.vhd +35 -0
- da4ml/codegen/rtl/vhdl/source/shift_adder.vhd +101 -0
- da4ml/codegen/rtl/vhdl/source/template.xdc +32 -0
- da4ml/converter/hgq2/replica.py +2 -3
- {da4ml-0.3.3.dist-info → da4ml-0.4.1.dist-info}/METADATA +2 -2
- da4ml-0.4.1.dist-info/RECORD +76 -0
- da4ml/codegen/cpp/__init__.py +0 -4
- da4ml-0.3.3.dist-info/RECORD +0 -66
- /da4ml/codegen/{cpp → hls}/source/ap_types/ap_binary.h +0 -0
- /da4ml/codegen/{cpp → hls}/source/ap_types/ap_common.h +0 -0
- /da4ml/codegen/{cpp → hls}/source/ap_types/ap_decl.h +0 -0
- /da4ml/codegen/{cpp → hls}/source/ap_types/ap_fixed.h +0 -0
- /da4ml/codegen/{cpp → hls}/source/ap_types/ap_fixed_base.h +0 -0
- /da4ml/codegen/{cpp → hls}/source/ap_types/ap_fixed_ref.h +0 -0
- /da4ml/codegen/{cpp → hls}/source/ap_types/ap_fixed_special.h +0 -0
- /da4ml/codegen/{cpp → hls}/source/ap_types/ap_int.h +0 -0
- /da4ml/codegen/{cpp → hls}/source/ap_types/ap_int_base.h +0 -0
- /da4ml/codegen/{cpp → hls}/source/ap_types/ap_int_ref.h +0 -0
- /da4ml/codegen/{cpp → hls}/source/ap_types/ap_int_special.h +0 -0
- /da4ml/codegen/{cpp → hls}/source/ap_types/ap_shift_reg.h +0 -0
- /da4ml/codegen/{cpp → hls}/source/ap_types/etc/ap_private.h +0 -0
- /da4ml/codegen/{cpp → hls}/source/ap_types/hls_math.h +0 -0
- /da4ml/codegen/{cpp → hls}/source/ap_types/hls_stream.h +0 -0
- /da4ml/codegen/{cpp → hls}/source/ap_types/utils/x_hls_utils.h +0 -0
- /da4ml/codegen/{cpp → hls}/source/binder_util.hh +0 -0
- /da4ml/codegen/{cpp → hls}/source/build_binder.mk +0 -0
- /da4ml/codegen/{cpp → hls}/source/vitis_bitshift.hh +0 -0
- /da4ml/codegen/{verilog/source → rtl/common_source}/ioutil.hh +0 -0
- /da4ml/codegen/{verilog/source → rtl/common_source}/template.xdc +0 -0
- /da4ml/codegen/{verilog → rtl/verilog}/source/multiplier.v +0 -0
- /da4ml/codegen/{verilog → rtl/verilog}/source/mux.v +0 -0
- /da4ml/codegen/{verilog → rtl/verilog}/source/shift_adder.v +0 -0
- {da4ml-0.3.3.dist-info → da4ml-0.4.1.dist-info}/WHEEL +0 -0
- {da4ml-0.3.3.dist-info → da4ml-0.4.1.dist-info}/licenses/LICENSE +0 -0
- {da4ml-0.3.3.dist-info → da4ml-0.4.1.dist-info}/top_level.txt +0 -0
da4ml/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.
|
|
32
|
-
__version_tuple__ = version_tuple = (0,
|
|
31
|
+
__version__ = version = '0.4.1'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 4, 1)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
da4ml/codegen/__init__.py
CHANGED
|
@@ -1,12 +1,9 @@
|
|
|
1
|
-
from .
|
|
2
|
-
from .
|
|
1
|
+
from .hls import HLSModel
|
|
2
|
+
from .rtl import RTLModel, VerilogModel, VHDLModel
|
|
3
3
|
|
|
4
4
|
__all__ = [
|
|
5
|
-
'cpp_logic_and_bridge_gen',
|
|
6
|
-
'comb_logic_gen',
|
|
7
|
-
'generate_io_wrapper',
|
|
8
|
-
'pipeline_logic_gen',
|
|
9
|
-
'binder_gen',
|
|
10
5
|
'HLSModel',
|
|
11
6
|
'VerilogModel',
|
|
7
|
+
'VHDLModel',
|
|
8
|
+
'RTLModel',
|
|
12
9
|
]
|
|
@@ -16,12 +16,19 @@ def kif_to_hlslib_type(k: bool | int = 1, i: int = 0, f: int = 0):
|
|
|
16
16
|
return f'ac_fixed<{int(k)},{k + i + f},{k + i}>'
|
|
17
17
|
|
|
18
18
|
|
|
19
|
+
def kif_to_oneapi_type(k: bool | int = 1, i: int = 0, f: int = 0):
|
|
20
|
+
# OneAPI requires at least 2 bits for all ac_fixed as of 2025.1
|
|
21
|
+
return f'ac_fixed<{int(k)},{max(k + i + f, 2)},{k + i}>'
|
|
22
|
+
|
|
23
|
+
|
|
19
24
|
def get_typestr_fn(flavor: str):
|
|
20
25
|
match flavor.lower():
|
|
21
26
|
case 'vitis':
|
|
22
27
|
typestr_fn = kif_to_vitis_type
|
|
23
28
|
case 'hlslib':
|
|
24
29
|
typestr_fn = kif_to_hlslib_type
|
|
30
|
+
case 'oneapi':
|
|
31
|
+
typestr_fn = kif_to_oneapi_type
|
|
25
32
|
case _:
|
|
26
33
|
raise ValueError(f'Unsupported flavor: {flavor}')
|
|
27
34
|
return typestr_fn
|
|
@@ -46,18 +53,18 @@ def ssa_gen(sol: Solution, print_latency: bool, typestr_fn: Callable[[bool | int
|
|
|
46
53
|
match op.opcode:
|
|
47
54
|
case -1:
|
|
48
55
|
# Input marker
|
|
49
|
-
val = f'
|
|
56
|
+
val = f'model_inp[{op.id0}]'
|
|
50
57
|
case 0 | 1:
|
|
51
58
|
# Common a+/-b<<shift op
|
|
52
59
|
ref1 = f'bit_shift<{op.data}>(v{op.id1})' if op.data != 0 else f'v{op.id1}'
|
|
53
60
|
val = f'{ref0} {"-" if op.opcode == 1 else "+"} {ref1}'
|
|
54
61
|
case 2 | -2:
|
|
55
|
-
if op.opcode == 2: # relu(
|
|
62
|
+
if op.opcode == 2: # relu(model_inp)
|
|
56
63
|
if ops[op.id0].qint.min < 0:
|
|
57
64
|
val = f'{ref0} > 0 ? {_type}({ref0}) : {_type}(0)'
|
|
58
65
|
else:
|
|
59
66
|
val = ref0
|
|
60
|
-
else: # relu(-
|
|
67
|
+
else: # relu(-model_inp)
|
|
61
68
|
if ops[op.id0].qint.max > 0:
|
|
62
69
|
val = f'{ref0} > 0 ? {_type}(0) : {_type}(-{ref0})'
|
|
63
70
|
else:
|
|
@@ -105,15 +112,15 @@ def output_gen(sol: Solution, typestr_fn: Callable[[bool | int, int, int], str])
|
|
|
105
112
|
lines = []
|
|
106
113
|
for i, idx in enumerate(sol.out_idxs):
|
|
107
114
|
if idx < 0:
|
|
108
|
-
lines.append(f'
|
|
115
|
+
lines.append(f'model_out[{i}] = 0;')
|
|
109
116
|
continue
|
|
110
117
|
_type = typestr_fn(*_minimal_kif(sol.out_qint[i]))
|
|
111
118
|
shift = sol.out_shifts[i]
|
|
112
119
|
neg_str = '-' if sol.out_negs[i] else ''
|
|
113
120
|
if shift == 0:
|
|
114
|
-
lines.append(f'
|
|
121
|
+
lines.append(f'model_out[{i}] = {_type}({neg_str}v{idx});')
|
|
115
122
|
else:
|
|
116
|
-
lines.append(f'
|
|
123
|
+
lines.append(f'model_out[{i}] = {_type}({neg_str}bit_shift<{shift}>(v{idx}));')
|
|
117
124
|
return lines
|
|
118
125
|
|
|
119
126
|
|
|
@@ -126,7 +133,7 @@ def get_io_types(sol: Solution, flavor: str):
|
|
|
126
133
|
return inp_type, out_type
|
|
127
134
|
|
|
128
135
|
|
|
129
|
-
def
|
|
136
|
+
def hls_logic_and_bridge_gen(
|
|
130
137
|
sol: Solution,
|
|
131
138
|
fn_name: str,
|
|
132
139
|
flavor: str,
|
|
@@ -140,7 +147,7 @@ def cpp_logic_and_bridge_gen(
|
|
|
140
147
|
|
|
141
148
|
n_in, n_out = sol.shape
|
|
142
149
|
template_def = 'template <typename inp_t, typename out_t>'
|
|
143
|
-
fn_signature = f'void {fn_name}(inp_t
|
|
150
|
+
fn_signature = f'void {fn_name}(inp_t model_inp[{n_in}], out_t model_out[{n_out}])'
|
|
144
151
|
pragmas = pragmas or []
|
|
145
152
|
|
|
146
153
|
ssa_lines = ssa_gen(sol, print_latency=print_latency, typestr_fn=typestr_fn)
|
|
@@ -173,12 +180,12 @@ bool openmp_enabled() {{
|
|
|
173
180
|
return _openmp;
|
|
174
181
|
}}
|
|
175
182
|
|
|
176
|
-
void inference_f64(double *
|
|
177
|
-
batch_inference<{fn_name}_config, double>(
|
|
183
|
+
void inference_f64(double *model_inp, double *model_out, size_t size) {{
|
|
184
|
+
batch_inference<{fn_name}_config, double>(model_inp, model_out, size);
|
|
178
185
|
}}
|
|
179
186
|
|
|
180
|
-
void inference_f32(float *
|
|
181
|
-
batch_inference<{fn_name}_config, float>(
|
|
187
|
+
void inference_f32(float *model_inp, float *model_out, size_t size) {{
|
|
188
|
+
batch_inference<{fn_name}_config, float>(model_inp, model_out, size);
|
|
182
189
|
}}
|
|
183
190
|
}}"""
|
|
184
191
|
return code, bridge
|
|
@@ -13,7 +13,7 @@ import numpy as np
|
|
|
13
13
|
from numpy.typing import NDArray
|
|
14
14
|
|
|
15
15
|
from da4ml.cmvm.types import Solution
|
|
16
|
-
from da4ml.codegen.
|
|
16
|
+
from da4ml.codegen.hls.hls_codegen import get_io_types, hls_logic_and_bridge_gen
|
|
17
17
|
|
|
18
18
|
from ... import codegen
|
|
19
19
|
from ...cmvm.types import _minimal_kif
|
|
@@ -39,7 +39,7 @@ class HLSModel:
|
|
|
39
39
|
self._prj_name = prj_name
|
|
40
40
|
self._path = Path(path)
|
|
41
41
|
self._flavor = flavor.lower()
|
|
42
|
-
assert self._flavor in ('vitis', 'hlslib'), f'Unsupported HLS flavor: {self._flavor}'
|
|
42
|
+
assert self._flavor in ('vitis', 'hlslib', 'oneapi'), f'Unsupported HLS flavor: {self._flavor}'
|
|
43
43
|
self._print_latency = print_latency
|
|
44
44
|
self._part_name = part_name
|
|
45
45
|
self._clock_period = clock_period
|
|
@@ -64,7 +64,7 @@ class HLSModel:
|
|
|
64
64
|
def write(self):
|
|
65
65
|
if not self._path.exists():
|
|
66
66
|
self._path.mkdir(parents=True, exist_ok=True)
|
|
67
|
-
template_def, bridge =
|
|
67
|
+
template_def, bridge = hls_logic_and_bridge_gen(
|
|
68
68
|
self._solution,
|
|
69
69
|
self._prj_name,
|
|
70
70
|
self._flavor,
|
|
@@ -104,11 +104,11 @@ class HLSModel:
|
|
|
104
104
|
with open(self._path / f'{self._prj_name}_bridge.cc', 'w') as f:
|
|
105
105
|
f.write(bridge)
|
|
106
106
|
|
|
107
|
-
shutil.copy(self.__src_root / '
|
|
108
|
-
shutil.copy(self.__src_root / f'
|
|
109
|
-
shutil.copy(self.__src_root / '
|
|
107
|
+
shutil.copy(self.__src_root / 'hls/source/binder_util.hh', self._path)
|
|
108
|
+
shutil.copy(self.__src_root / f'hls/source/{self._flavor}_bitshift.hh', self._path / 'bitshift.hh')
|
|
109
|
+
shutil.copy(self.__src_root / 'hls/source/build_binder.mk', self._path)
|
|
110
110
|
if self._flavor == 'vitis':
|
|
111
|
-
shutil.copytree(self.__src_root / '
|
|
111
|
+
shutil.copytree(self.__src_root / 'hls/source/ap_types', self._path / 'ap_types', dirs_exist_ok=True)
|
|
112
112
|
else:
|
|
113
113
|
pass
|
|
114
114
|
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from .rtl_model import RTLModel, VerilogModel, VHDLModel
|
|
2
|
+
from .verilog import comb_logic_gen as verilog_comb_logic_gen
|
|
3
|
+
from .verilog import generate_io_wrapper as verilog_generate_io_wrapper
|
|
4
|
+
from .vhdl import comb_logic_gen as vhdl_comb_logic_gen
|
|
5
|
+
from .vhdl import generate_io_wrapper as vhdl_generate_io_wrapper
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
'RTLModel',
|
|
9
|
+
'VerilogModel',
|
|
10
|
+
'VHDLModel',
|
|
11
|
+
'verilog_comb_logic_gen',
|
|
12
|
+
'verilog_generate_io_wrapper',
|
|
13
|
+
'vhdl_comb_logic_gen',
|
|
14
|
+
'vhdl_generate_io_wrapper',
|
|
15
|
+
]
|
|
@@ -19,7 +19,7 @@ std::enable_if_t<CONFIG_T::II != 0> _inference(int32_t *c_inp, int32_t *c_out, s
|
|
|
19
19
|
|
|
20
20
|
if (t_inp < n_samples * CONFIG_T::II && t_inp % CONFIG_T::II == 0) {
|
|
21
21
|
write_input<CONFIG_T::N_inp, CONFIG_T::max_inp_bw>(
|
|
22
|
-
dut->
|
|
22
|
+
dut->model_inp, &c_inp[t_inp / CONFIG_T::II * CONFIG_T::N_inp]
|
|
23
23
|
);
|
|
24
24
|
}
|
|
25
25
|
|
|
@@ -28,7 +28,7 @@ std::enable_if_t<CONFIG_T::II != 0> _inference(int32_t *c_inp, int32_t *c_out, s
|
|
|
28
28
|
|
|
29
29
|
if (t_inp > CONFIG_T::latency && t_out % CONFIG_T::II == 0) {
|
|
30
30
|
read_output<CONFIG_T::N_out, CONFIG_T::max_out_bw>(
|
|
31
|
-
dut->
|
|
31
|
+
dut->model_out, &c_out[t_out / CONFIG_T::II * CONFIG_T::N_out]
|
|
32
32
|
);
|
|
33
33
|
}
|
|
34
34
|
|
|
@@ -44,9 +44,9 @@ std::enable_if_t<CONFIG_T::II == 0> _inference(int32_t *c_inp, int32_t *c_out, s
|
|
|
44
44
|
auto dut = std::make_unique<typename CONFIG_T::dut_t>();
|
|
45
45
|
|
|
46
46
|
for (size_t i = 0; i < n_samples; ++i) {
|
|
47
|
-
write_input<CONFIG_T::N_inp, CONFIG_T::max_inp_bw>(dut->
|
|
47
|
+
write_input<CONFIG_T::N_inp, CONFIG_T::max_inp_bw>(dut->model_inp, &c_inp[i * CONFIG_T::N_inp]);
|
|
48
48
|
dut->eval();
|
|
49
|
-
read_output<CONFIG_T::N_out, CONFIG_T::max_out_bw>(dut->
|
|
49
|
+
read_output<CONFIG_T::N_out, CONFIG_T::max_out_bw>(dut->model_out, &c_out[i * CONFIG_T::N_out]);
|
|
50
50
|
}
|
|
51
51
|
|
|
52
52
|
dut->final();
|
|
@@ -7,10 +7,16 @@ CFLAGS = -std=c++17 -fPIC
|
|
|
7
7
|
LINKFLAGS = $(INCLUDES) $(WARNINGS)
|
|
8
8
|
LIBNAME = lib$(VM_PREFIX)_$(STAMP).so
|
|
9
9
|
N_JOBS ?= $(shell nproc)
|
|
10
|
+
VERILATOR_FLAGS ?=
|
|
10
11
|
|
|
12
|
+
$(VM_PREFIX).v: $(wildcard $(VM_PREFIX).vhd)
|
|
13
|
+
# vhdl specific - convert to verilog first for verilating
|
|
14
|
+
mkdir -p obj_dir
|
|
15
|
+
ghdl -a --std=08 --workdir=obj_dir multiplier.vhd mux.vhd negative.vhd shift_adder.vhd $(wildcard $(VM_PREFIX:_wrapper=)_stage*.vhd) $(wildcard $(VM_PREFIX:_wrapper=).vhd) $(VM_PREFIX).vhd
|
|
16
|
+
ghdl synth --std=08 --workdir=obj_dir --out=verilog $(VM_PREFIX) > $(VM_PREFIX).v
|
|
11
17
|
|
|
12
18
|
./obj_dir/libV$(VM_PREFIX).a ./obj_dir/libverilated.a ./obj_dir/V$(VM_PREFIX)__ALL.a: $(VM_PREFIX).v
|
|
13
|
-
verilator --cc -j $(N_JOBS) -
|
|
19
|
+
verilator --cc -j $(N_JOBS) -build $(VM_PREFIX).v --prefix V$(VM_PREFIX) $(VERILATOR_FLAGS) -CFLAGS "$(CFLAGS)"
|
|
14
20
|
|
|
15
21
|
$(LIBNAME): ./obj_dir/libV$(VM_PREFIX).a ./obj_dir/libverilated.a ./obj_dir/V$(VM_PREFIX)__ALL.a $(VM_PREFIX)_binder.cc
|
|
16
22
|
$(CXX) $(CFLAGS) $(LINKFLAGS) $(CXXFLAGS2) -pthread -shared -o $(LIBNAME) $(VM_PREFIX)_binder.cc ./obj_dir/libV$(VM_PREFIX).a ./obj_dir/libverilated.a ./obj_dir/V$(VM_PREFIX)__ALL.a $(EXTRA_CXXFLAGS)
|
|
@@ -1,20 +1,41 @@
|
|
|
1
1
|
set project_name "${PROJECT_NAME}"
|
|
2
2
|
set device "${DEVICE}"
|
|
3
|
+
set source_type "${SOURCE_TYPE}"
|
|
3
4
|
|
|
4
5
|
set top_module "${project_name}"
|
|
5
6
|
set output_dir "./output_${project_name}"
|
|
6
7
|
|
|
7
8
|
create_project $project_name "${output_dir}/$project_name" -force -part $device
|
|
8
9
|
|
|
9
|
-
set_property TARGET_LANGUAGE Verilog [current_project]
|
|
10
10
|
set_property DEFAULT_LIB work [current_project]
|
|
11
11
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
12
|
+
if { $source_type != "vhdl" && $source_type != "verilog" } {
|
|
13
|
+
puts "Error: SOURCE_TYPE must be either 'vhdl' or 'verilog'."
|
|
14
|
+
exit 1
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
if { $source_type == "vhdl" } {
|
|
18
|
+
set_property TARGET_LANGUAGE VHDL [current_project]
|
|
19
|
+
|
|
20
|
+
read_vhdl -vhdl2008 "${project_name}.vhd"
|
|
21
|
+
read_vhdl -vhdl2008 "shift_adder.vhd"
|
|
22
|
+
read_vhdl -vhdl2008 "negative.vhd"
|
|
23
|
+
read_vhdl -vhdl2008 "mux.vhd"
|
|
24
|
+
read_vhdl -vhdl2008 "multiplier.vhd"
|
|
25
|
+
foreach file [glob -nocomplain "${project_name}_stage*.vhd"] {
|
|
26
|
+
read_vhdl -vhdl2008 $file
|
|
27
|
+
}
|
|
28
|
+
} else {
|
|
29
|
+
set_property TARGET_LANGUAGE Verilog [current_project]
|
|
30
|
+
|
|
31
|
+
read_verilog "${project_name}.v"
|
|
32
|
+
read_verilog "shift_adder.v"
|
|
33
|
+
read_verilog "negative.v"
|
|
34
|
+
read_verilog "mux.v"
|
|
35
|
+
read_verilog "multiplier.v"
|
|
36
|
+
foreach file [glob -nocomplain "${project_name}_stage*.v"] {
|
|
37
|
+
read_verilog $file
|
|
38
|
+
}
|
|
18
39
|
}
|
|
19
40
|
|
|
20
41
|
read_xdc "${project_name}.xdc" -mode out_of_context
|
|
@@ -10,10 +10,9 @@ from uuid import uuid4
|
|
|
10
10
|
import numpy as np
|
|
11
11
|
from numpy.typing import NDArray
|
|
12
12
|
|
|
13
|
-
from ... import codegen
|
|
14
13
|
from ...cmvm.types import CascadedSolution, Solution, _minimal_kif
|
|
15
14
|
from ...trace.pipeline import to_pipeline
|
|
16
|
-
from
|
|
15
|
+
from .. import rtl
|
|
17
16
|
|
|
18
17
|
|
|
19
18
|
def get_io_kifs(sol: Solution | CascadedSolution):
|
|
@@ -22,12 +21,13 @@ def get_io_kifs(sol: Solution | CascadedSolution):
|
|
|
22
21
|
return np.array(inp_kifs, np.int8), np.array(out_kifs, np.int8)
|
|
23
22
|
|
|
24
23
|
|
|
25
|
-
class
|
|
24
|
+
class RTLModel:
|
|
26
25
|
def __init__(
|
|
27
26
|
self,
|
|
28
27
|
solution: Solution | CascadedSolution,
|
|
29
28
|
prj_name: str,
|
|
30
29
|
path: str | Path,
|
|
30
|
+
flavor: str = 'verilog',
|
|
31
31
|
latency_cutoff: float = -1,
|
|
32
32
|
print_latency: bool = True,
|
|
33
33
|
part_name: str = 'xcvu13p-flga2577-2-e',
|
|
@@ -36,18 +36,21 @@ class VerilogModel:
|
|
|
36
36
|
io_delay_minmax: tuple[float, float] = (0.2, 0.4),
|
|
37
37
|
register_layers: int = 1,
|
|
38
38
|
):
|
|
39
|
+
self._flavor = flavor.lower()
|
|
39
40
|
self._solution = solution
|
|
40
41
|
self._path = Path(path)
|
|
41
42
|
self._prj_name = prj_name
|
|
42
43
|
self._latency_cutoff = latency_cutoff
|
|
43
44
|
self._print_latency = print_latency
|
|
44
|
-
self.__src_root = Path(
|
|
45
|
+
self.__src_root = Path(rtl.__file__).parent
|
|
45
46
|
self._part_name = part_name
|
|
46
47
|
self._clock_period = clock_period
|
|
47
48
|
self._clock_uncertainty = clock_uncertainty
|
|
48
49
|
self._io_delay_minmax = io_delay_minmax
|
|
49
50
|
self._register_layers = register_layers
|
|
50
51
|
|
|
52
|
+
assert self._flavor in ('vhdl', 'verilog'), f'Unsupported flavor {flavor}, only vhdl and verilog are supported.'
|
|
53
|
+
|
|
51
54
|
self._pipe = solution if isinstance(solution, CascadedSolution) else None
|
|
52
55
|
if latency_cutoff > 0 and self._pipe is None:
|
|
53
56
|
assert isinstance(solution, Solution)
|
|
@@ -62,34 +65,42 @@ class VerilogModel:
|
|
|
62
65
|
self._uuid = None
|
|
63
66
|
|
|
64
67
|
def write(self):
|
|
68
|
+
flavor = self._flavor
|
|
69
|
+
suffix = 'v' if flavor == 'verilog' else 'vhd'
|
|
70
|
+
if flavor == 'vhdl':
|
|
71
|
+
from .vhdl import binder_gen, comb_logic_gen, generate_io_wrapper, pipeline_logic_gen
|
|
72
|
+
else: # verilog
|
|
73
|
+
from .verilog import binder_gen, comb_logic_gen, generate_io_wrapper, pipeline_logic_gen
|
|
74
|
+
|
|
65
75
|
self._path.mkdir(parents=True, exist_ok=True)
|
|
66
76
|
if self._pipe is not None: # Pipeline
|
|
67
77
|
# Main logic
|
|
68
78
|
codes = pipeline_logic_gen(self._pipe, self._prj_name, self._print_latency, register_layers=self._register_layers)
|
|
69
79
|
for k, v in codes.items():
|
|
70
|
-
with open(self._path / f'{k}.
|
|
80
|
+
with open(self._path / f'{k}.{suffix}', 'w') as f:
|
|
71
81
|
f.write(v)
|
|
72
82
|
|
|
73
83
|
# Build script
|
|
74
|
-
with open(self.__src_root / '
|
|
84
|
+
with open(self.__src_root / 'common_source/build_prj.tcl') as f:
|
|
75
85
|
tcl = f.read()
|
|
76
86
|
tcl = tcl.replace('${DEVICE}', self._part_name)
|
|
77
87
|
tcl = tcl.replace('${PROJECT_NAME}', self._prj_name)
|
|
88
|
+
tcl = tcl.replace('${SOURCE_TYPE}', flavor)
|
|
78
89
|
with open(self._path / 'build_prj.tcl', 'w') as f:
|
|
79
90
|
f.write(tcl)
|
|
80
91
|
|
|
81
92
|
# XDC
|
|
82
|
-
with open(self.__src_root / '
|
|
93
|
+
with open(self.__src_root / 'common_source/template.xdc') as f:
|
|
83
94
|
xdc = f.read()
|
|
84
95
|
xdc = xdc.replace('${CLOCK_PERIOD}', str(self._clock_period))
|
|
85
96
|
xdc = xdc.replace('${UNCERTAINITY_SETUP}', str(self._clock_uncertainty))
|
|
86
97
|
xdc = xdc.replace('${UNCERTAINITY_HOLD}', str(self._clock_uncertainty))
|
|
87
|
-
xdc = xdc.replace('${DELAY_MAX}', str(self._io_delay_minmax[
|
|
88
|
-
xdc = xdc.replace('${DELAY_MIN}', str(self._io_delay_minmax[
|
|
98
|
+
xdc = xdc.replace('${DELAY_MAX}', str(self._io_delay_minmax[1]))
|
|
99
|
+
xdc = xdc.replace('${DELAY_MIN}', str(self._io_delay_minmax[0]))
|
|
89
100
|
with open(self._path / f'{self._prj_name}.xdc', 'w') as f:
|
|
90
101
|
f.write(xdc)
|
|
91
102
|
|
|
92
|
-
# C++ binder w/
|
|
103
|
+
# C++ binder w/ HDL wrapper for uniform bw
|
|
93
104
|
binder = binder_gen(self._pipe, f'{self._prj_name}_wrapper', 1, self._register_layers)
|
|
94
105
|
|
|
95
106
|
# Verilog IO wrapper (non-uniform bw to uniform one, clk passthrough)
|
|
@@ -101,24 +112,25 @@ class VerilogModel:
|
|
|
101
112
|
|
|
102
113
|
# Main logic
|
|
103
114
|
code = comb_logic_gen(self._solution, self._prj_name, self._print_latency, '`timescale 1ns/1ps')
|
|
104
|
-
with open(self._path / f'{self._prj_name}.
|
|
115
|
+
with open(self._path / f'{self._prj_name}.{suffix}', 'w') as f:
|
|
105
116
|
f.write(code)
|
|
106
117
|
|
|
107
118
|
# Verilog IO wrapper (non-uniform bw to uniform one, no clk)
|
|
108
119
|
io_wrapper = generate_io_wrapper(self._solution, self._prj_name, False)
|
|
109
120
|
binder = binder_gen(self._solution, f'{self._prj_name}_wrapper')
|
|
110
121
|
|
|
111
|
-
with open(self._path / f'{self._prj_name}_wrapper.
|
|
122
|
+
with open(self._path / f'{self._prj_name}_wrapper.{suffix}', 'w') as f:
|
|
112
123
|
f.write(io_wrapper)
|
|
113
124
|
with open(self._path / f'{self._prj_name}_wrapper_binder.cc', 'w') as f:
|
|
114
125
|
f.write(binder)
|
|
115
126
|
|
|
116
127
|
# Common resource copy
|
|
117
|
-
for fname in self.__src_root.glob('
|
|
128
|
+
for fname in self.__src_root.glob(f'{flavor}/source/*.{suffix}'):
|
|
118
129
|
shutil.copy(fname, self._path)
|
|
119
|
-
|
|
120
|
-
shutil.copy(self.__src_root / '
|
|
121
|
-
shutil.copy(self.__src_root / '
|
|
130
|
+
|
|
131
|
+
shutil.copy(self.__src_root / 'common_source/build_binder.mk', self._path)
|
|
132
|
+
shutil.copy(self.__src_root / 'common_source/ioutil.hh', self._path)
|
|
133
|
+
shutil.copy(self.__src_root / 'common_source/binder_util.hh', self._path)
|
|
122
134
|
self._solution.save(self._path / 'model.json')
|
|
123
135
|
with open(self._path / 'misc.json', 'w') as f:
|
|
124
136
|
f.write(f'{{"cost": {self._solution.cost}}}')
|
|
@@ -152,6 +164,7 @@ class VerilogModel:
|
|
|
152
164
|
env['VM_PREFIX'] = f'{self._prj_name}_wrapper'
|
|
153
165
|
env['STAMP'] = self._uuid
|
|
154
166
|
env['EXTRA_CXXFLAGS'] = '-fopenmp' if openmp else ''
|
|
167
|
+
env['VERILATOR_FLAGS'] = '-Wall' if self._flavor == 'verilog' else ''
|
|
155
168
|
if nproc is not None:
|
|
156
169
|
env['N_JOBS'] = str(nproc)
|
|
157
170
|
if o3:
|
|
@@ -219,7 +232,7 @@ class VerilogModel:
|
|
|
219
232
|
self.write()
|
|
220
233
|
self._compile(verbose=verbose, openmp=openmp, nproc=nproc, o3=o3, clean=clean)
|
|
221
234
|
|
|
222
|
-
def predict(self, data: NDArray[np.floating]):
|
|
235
|
+
def predict(self, data: NDArray[np.floating]) -> NDArray[np.float32]:
|
|
223
236
|
"""Run the model on the input data.
|
|
224
237
|
|
|
225
238
|
Parameters
|
|
@@ -233,6 +246,7 @@ class VerilogModel:
|
|
|
233
246
|
NDArray[np.float64]
|
|
234
247
|
Output of the model in shape (n_samples, output_size).
|
|
235
248
|
"""
|
|
249
|
+
|
|
236
250
|
assert self._lib is not None, 'Library not loaded, call .compile() first.'
|
|
237
251
|
inp_size, out_size = self._solution.shape
|
|
238
252
|
|
|
@@ -258,7 +272,7 @@ class VerilogModel:
|
|
|
258
272
|
# Unscale the output int32 to recover fp values
|
|
259
273
|
k, i, f = np.max(k_out), np.max(i_out), np.max(f_out)
|
|
260
274
|
a, b, c = 2.0 ** (k + i + f), k * 2.0 ** (i + f), 2.0**-f
|
|
261
|
-
return ((out_data.reshape(n_sample, out_size) + b) % a - b) * c
|
|
275
|
+
return ((out_data.reshape(n_sample, out_size) + b) % a - b) * c.astype(np.float32)
|
|
262
276
|
|
|
263
277
|
def __repr__(self):
|
|
264
278
|
inp_size, out_size = self._solution.shape
|
|
@@ -289,3 +303,61 @@ Estimated cost: {cost} LUTs"""
|
|
|
289
303
|
else:
|
|
290
304
|
spec += '\nEmulator is **not compiled**'
|
|
291
305
|
return spec
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
class VerilogModel(RTLModel):
|
|
309
|
+
def __init__(
|
|
310
|
+
self,
|
|
311
|
+
solution: Solution | CascadedSolution,
|
|
312
|
+
prj_name: str,
|
|
313
|
+
path: str | Path,
|
|
314
|
+
latency_cutoff: float = -1,
|
|
315
|
+
print_latency: bool = True,
|
|
316
|
+
part_name: str = 'xcvu13p-flga2577-2-e',
|
|
317
|
+
clock_period: float = 5,
|
|
318
|
+
clock_uncertainty: float = 0.1,
|
|
319
|
+
io_delay_minmax: tuple[float, float] = (0.2, 0.4),
|
|
320
|
+
register_layers: int = 1,
|
|
321
|
+
):
|
|
322
|
+
self._hdl_model = super().__init__(
|
|
323
|
+
solution,
|
|
324
|
+
prj_name,
|
|
325
|
+
path,
|
|
326
|
+
'verilog',
|
|
327
|
+
latency_cutoff,
|
|
328
|
+
print_latency,
|
|
329
|
+
part_name,
|
|
330
|
+
clock_period,
|
|
331
|
+
clock_uncertainty,
|
|
332
|
+
io_delay_minmax,
|
|
333
|
+
register_layers,
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
class VHDLModel(RTLModel):
|
|
338
|
+
def __init__(
|
|
339
|
+
self,
|
|
340
|
+
solution: Solution | CascadedSolution,
|
|
341
|
+
prj_name: str,
|
|
342
|
+
path: str | Path,
|
|
343
|
+
latency_cutoff: float = -1,
|
|
344
|
+
print_latency: bool = True,
|
|
345
|
+
part_name: str = 'xcvu13p-flga2577-2-e',
|
|
346
|
+
clock_period: float = 5,
|
|
347
|
+
clock_uncertainty: float = 0.1,
|
|
348
|
+
io_delay_minmax: tuple[float, float] = (0.2, 0.4),
|
|
349
|
+
register_layers: int = 1,
|
|
350
|
+
):
|
|
351
|
+
self._hdl_model = super().__init__(
|
|
352
|
+
solution,
|
|
353
|
+
prj_name,
|
|
354
|
+
path,
|
|
355
|
+
'vhdl',
|
|
356
|
+
latency_cutoff,
|
|
357
|
+
print_latency,
|
|
358
|
+
part_name,
|
|
359
|
+
clock_period,
|
|
360
|
+
clock_uncertainty,
|
|
361
|
+
io_delay_minmax,
|
|
362
|
+
register_layers,
|
|
363
|
+
)
|
|
@@ -1,12 +1,10 @@
|
|
|
1
1
|
from .comb import comb_logic_gen
|
|
2
2
|
from .io_wrapper import binder_gen, generate_io_wrapper
|
|
3
3
|
from .pipeline import pipeline_logic_gen
|
|
4
|
-
from .verilog_model import VerilogModel
|
|
5
4
|
|
|
6
5
|
__all__ = [
|
|
7
6
|
'comb_logic_gen',
|
|
8
7
|
'generate_io_wrapper',
|
|
9
8
|
'pipeline_logic_gen',
|
|
10
9
|
'binder_gen',
|
|
11
|
-
'VerilogModel',
|
|
12
10
|
]
|
|
@@ -2,7 +2,25 @@ from math import ceil, log2
|
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
|
|
5
|
-
from
|
|
5
|
+
from ....cmvm.types import Op, QInterval, Solution, _minimal_kif
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def make_neg(
|
|
9
|
+
lines: list[str],
|
|
10
|
+
op: Op,
|
|
11
|
+
ops: list[Op],
|
|
12
|
+
bw0: int,
|
|
13
|
+
v0_name: str,
|
|
14
|
+
):
|
|
15
|
+
_min, _max, step = ops[op.id0].qint
|
|
16
|
+
bw_neg = max(sum(_minimal_kif(QInterval(-_max, -_min, step))), bw0)
|
|
17
|
+
was_signed = int(_min < 0)
|
|
18
|
+
lines.append(
|
|
19
|
+
f'wire [{bw_neg - 1}:0] v{op.id0}_neg; negative #({bw0}, {bw_neg}, {was_signed}) op_neg_{op.id0} ({v0_name}, v{op.id0}_neg);'
|
|
20
|
+
)
|
|
21
|
+
bw0 = bw_neg
|
|
22
|
+
v0_name = f'v{op.id0}_neg'
|
|
23
|
+
return bw0, v0_name
|
|
6
24
|
|
|
7
25
|
|
|
8
26
|
def ssa_gen(sol: Solution, neg_defined: set[int], print_latency: bool = False):
|
|
@@ -30,7 +48,7 @@ def ssa_gen(sol: Solution, neg_defined: set[int], print_latency: bool = False):
|
|
|
30
48
|
match op.opcode:
|
|
31
49
|
case -1: # Input marker
|
|
32
50
|
i0, i1 = inp_idxs[op.id0]
|
|
33
|
-
line = f'{_def} assign {v} =
|
|
51
|
+
line = f'{_def} assign {v} = model_inp[{i0}:{i1}];'
|
|
34
52
|
|
|
35
53
|
case 0 | 1: # Common a+/-b<<shift oprs
|
|
36
54
|
p0, p1 = kifs[op.id0], kifs[op.id1] # precision -> keep_neg, integers (no sign), fractional
|
|
@@ -49,45 +67,25 @@ def ssa_gen(sol: Solution, neg_defined: set[int], print_latency: bool = False):
|
|
|
49
67
|
v0_name = f'v{op.id0}'
|
|
50
68
|
bw0 = widths[op.id0]
|
|
51
69
|
|
|
52
|
-
if op.opcode == -2:
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
if op.id0 not in neg_defined:
|
|
56
|
-
neg_defined.add(op.id0)
|
|
57
|
-
was_signed = int(kifs[op.id0][0])
|
|
58
|
-
lines.append(
|
|
59
|
-
f'wire [{bw_neg - 1}:0] v{op.id0}_neg; negative #({bw0}, {bw_neg}, {was_signed}) op_neg_{op.id0} ({v0_name}, v{op.id0}_neg);'
|
|
60
|
-
)
|
|
61
|
-
bw0 = bw_neg
|
|
62
|
-
v0_name = f'v{op.id0}_neg'
|
|
70
|
+
if op.opcode == -2 and op.id0 not in neg_defined:
|
|
71
|
+
neg_defined.add(op.id0)
|
|
72
|
+
bw0, v0_name = make_neg(lines, op, ops, bw0, v0_name)
|
|
63
73
|
if ops[op.id0].qint.min < 0:
|
|
64
74
|
line = f'{_def} assign {v} = {v0_name}[{i0}:{i1}] & {{{bw}{{~{v0_name}[{bw0 - 1}]}}}};'
|
|
65
75
|
else:
|
|
66
76
|
line = f'{_def} assign {v} = {v0_name}[{i0}:{i1}];'
|
|
77
|
+
|
|
67
78
|
case 3 | -3: # Explicit quantization
|
|
68
79
|
lsb_bias = kifs[op.id0][2] - kifs[i][2]
|
|
69
80
|
i0, i1 = bw + lsb_bias - 1, lsb_bias
|
|
70
81
|
v0_name = f'v{op.id0}'
|
|
71
82
|
bw0 = widths[op.id0]
|
|
72
83
|
|
|
73
|
-
if op.opcode == -3:
|
|
74
|
-
|
|
75
|
-
lines
|
|
76
|
-
bw_neg = max(sum(_minimal_kif(QInterval(-_max, -_min, step))), bw0)
|
|
77
|
-
if op.id0 not in neg_defined:
|
|
78
|
-
neg_defined.add(op.id0)
|
|
79
|
-
# lines.append('/* verilator lint_off WIDTHTRUNC */')
|
|
80
|
-
# lines.append(
|
|
81
|
-
# f'wire [{bw_neg - 1}:0] v{op.id0}_neg; assign v{op.id0}_neg[{bw_neg - 1}:0] = -{v0_name}[{bw0 - 1}:0];'
|
|
82
|
-
# )
|
|
83
|
-
# lines.append('/* verilator lint_on WIDTHTRUNC */')
|
|
84
|
-
was_signed = int(kifs[op.id0][0])
|
|
85
|
-
lines.append(
|
|
86
|
-
f'wire [{bw_neg - 1}:0] v{op.id0}_neg; negative #({bw0}, {bw_neg}, {was_signed}) op_neg_{op.id0} ({v0_name}, v{op.id0}_neg);'
|
|
87
|
-
)
|
|
88
|
-
v0_name = f'v{op.id0}_neg'
|
|
89
|
-
|
|
84
|
+
if op.opcode == -3 and op.id0 not in neg_defined:
|
|
85
|
+
neg_defined.add(op.id0)
|
|
86
|
+
bw0, v0_name = make_neg(lines, op, ops, bw0, v0_name)
|
|
90
87
|
line = f'{_def} assign {v} = {v0_name}[{i0}:{i1}];'
|
|
88
|
+
|
|
91
89
|
case 4: # constant addition
|
|
92
90
|
num = op.data
|
|
93
91
|
sign, mag = int(num < 0), abs(num)
|
|
@@ -152,10 +150,10 @@ def output_gen(sol: Solution, neg_defined: set[int]):
|
|
|
152
150
|
lines.append(
|
|
153
151
|
f'wire [{bw - 1}:0] v{idx}_neg; negative #({bw0}, {bw}, {was_signed}) op_neg_{idx} (v{idx}, v{idx}_neg);'
|
|
154
152
|
)
|
|
155
|
-
lines.append(f'assign
|
|
153
|
+
lines.append(f'assign model_out[{i0}:{i1}] = v{idx}_neg[{bw - 1}:0];')
|
|
156
154
|
|
|
157
155
|
else:
|
|
158
|
-
lines.append(f'assign
|
|
156
|
+
lines.append(f'assign model_out[{i0}:{i1}] = v{idx}[{bw - 1}:0];')
|
|
159
157
|
return lines
|
|
160
158
|
|
|
161
159
|
|
|
@@ -165,8 +163,8 @@ def comb_logic_gen(sol: Solution, fn_name: str, print_latency: bool = False, tim
|
|
|
165
163
|
|
|
166
164
|
fn_signature = [
|
|
167
165
|
f'module {fn_name} (',
|
|
168
|
-
f' input [{inp_bits - 1}:0]
|
|
169
|
-
f' output [{out_bits - 1}:0]
|
|
166
|
+
f' input [{inp_bits - 1}:0] model_inp,',
|
|
167
|
+
f' output [{out_bits - 1}:0] model_out',
|
|
170
168
|
');',
|
|
171
169
|
]
|
|
172
170
|
|