da4ml 0.2.1__py3-none-any.whl → 0.3.0.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of da4ml might be problematic. Click here for more details.
- da4ml/_version.py +2 -2
- da4ml/cmvm/types.py +95 -15
- da4ml/codegen/__init__.py +5 -4
- da4ml/codegen/cpp/__init__.py +2 -1
- da4ml/codegen/cpp/cpp_codegen.py +56 -23
- da4ml/codegen/cpp/hls_model.py +252 -0
- da4ml/codegen/cpp/source/ap_types/ap_binary.h +78 -0
- da4ml/codegen/cpp/source/ap_types/ap_common.h +376 -0
- da4ml/codegen/cpp/source/ap_types/ap_decl.h +212 -0
- da4ml/codegen/cpp/source/ap_types/ap_fixed.h +360 -0
- da4ml/codegen/cpp/source/ap_types/ap_fixed_base.h +2354 -0
- da4ml/codegen/cpp/source/ap_types/ap_fixed_ref.h +718 -0
- da4ml/codegen/cpp/source/ap_types/ap_fixed_special.h +230 -0
- da4ml/codegen/cpp/source/ap_types/ap_int.h +330 -0
- da4ml/codegen/cpp/source/ap_types/ap_int_base.h +1885 -0
- da4ml/codegen/cpp/source/ap_types/ap_int_ref.h +1346 -0
- da4ml/codegen/cpp/source/ap_types/ap_int_special.h +223 -0
- da4ml/codegen/cpp/source/ap_types/ap_shift_reg.h +138 -0
- da4ml/codegen/cpp/source/ap_types/etc/ap_private.h +7199 -0
- da4ml/codegen/cpp/source/ap_types/hls_math.h +27 -0
- da4ml/codegen/cpp/source/ap_types/hls_stream.h +263 -0
- da4ml/codegen/cpp/source/ap_types/utils/x_hls_utils.h +80 -0
- da4ml/codegen/cpp/source/binder_util.hh +56 -0
- da4ml/codegen/cpp/source/build_binder.mk +24 -0
- da4ml/codegen/cpp/source/{vitis.h → vitis_bitshift.hh} +1 -1
- da4ml/codegen/verilog/__init__.py +2 -3
- da4ml/codegen/verilog/comb.py +65 -24
- da4ml/codegen/verilog/io_wrapper.py +36 -141
- da4ml/codegen/verilog/source/binder_util.hh +72 -0
- da4ml/codegen/verilog/source/mux.v +58 -0
- da4ml/codegen/verilog/source/negative.v +28 -0
- da4ml/codegen/verilog/source/shift_adder.v +4 -1
- da4ml/codegen/verilog/source/template.xdc +3 -0
- da4ml/codegen/verilog/verilog_model.py +36 -12
- da4ml/converter/__init__.py +0 -0
- da4ml/converter/hgq2/parser.py +105 -0
- da4ml/converter/hgq2/replica.py +383 -0
- da4ml/trace/__init__.py +2 -2
- da4ml/trace/fixed_variable.py +175 -16
- da4ml/trace/fixed_variable_array.py +109 -4
- da4ml/trace/ops/__init__.py +22 -6
- da4ml/trace/ops/conv_utils.py +147 -15
- da4ml/trace/ops/einsum_utils.py +9 -6
- da4ml/trace/ops/reduce_utils.py +103 -0
- da4ml/trace/pipeline.py +36 -34
- da4ml/trace/tracer.py +37 -7
- da4ml-0.3.0.post1.dist-info/METADATA +107 -0
- da4ml-0.3.0.post1.dist-info/RECORD +64 -0
- da4ml/codegen/cpp/source/vitis_bridge.h +0 -17
- da4ml-0.2.1.dist-info/METADATA +0 -65
- da4ml-0.2.1.dist-info/RECORD +0 -39
- /da4ml/codegen/verilog/source/{ioutils.hh → ioutil.hh} +0 -0
- {da4ml-0.2.1.dist-info → da4ml-0.3.0.post1.dist-info}/WHEEL +0 -0
- {da4ml-0.2.1.dist-info → da4ml-0.3.0.post1.dist-info}/licenses/LICENSE +0 -0
- {da4ml-0.2.1.dist-info → da4ml-0.3.0.post1.dist-info}/top_level.txt +0 -0
da4ml/trace/tracer.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from collections.abc import Sequence
|
|
2
2
|
from decimal import Decimal
|
|
3
|
+
from itertools import chain
|
|
3
4
|
from math import log2
|
|
4
5
|
from typing import overload
|
|
5
6
|
from uuid import UUID
|
|
@@ -11,20 +12,20 @@ from .fixed_variable import FixedVariable, _const_f
|
|
|
11
12
|
from .fixed_variable_array import FixedVariableArray
|
|
12
13
|
|
|
13
14
|
|
|
14
|
-
def
|
|
15
|
-
if v in gathered:
|
|
15
|
+
def _recursive_gather(v: FixedVariable, gathered: dict[UUID, FixedVariable]):
|
|
16
|
+
if v.id in gathered:
|
|
16
17
|
return
|
|
17
18
|
assert v._from is not None
|
|
18
19
|
for _v in v._from:
|
|
19
20
|
if _v.id not in gathered:
|
|
20
|
-
|
|
21
|
+
_recursive_gather(_v, gathered)
|
|
21
22
|
gathered[v.id] = v
|
|
22
23
|
|
|
23
24
|
|
|
24
25
|
def gather_variables(inputs: Sequence[FixedVariable], outputs: Sequence[FixedVariable]):
|
|
25
26
|
gathered = {v.id: v for v in inputs}
|
|
26
27
|
for o in outputs:
|
|
27
|
-
|
|
28
|
+
_recursive_gather(o, gathered)
|
|
28
29
|
|
|
29
30
|
variables = list(gathered.values())
|
|
30
31
|
|
|
@@ -85,6 +86,19 @@ def _comb_trace(inputs: Sequence[FixedVariable], outputs: Sequence[FixedVariable
|
|
|
85
86
|
qint = QInterval(qint.min, qint.min, step)
|
|
86
87
|
data = qint.min / step
|
|
87
88
|
ops.append(Op(-1, -1, 5, int(data), qint, v.latency, v.cost))
|
|
89
|
+
case 'msb_mux':
|
|
90
|
+
qint = v.unscaled.qint
|
|
91
|
+
key, in0, in1 = v._from
|
|
92
|
+
opcode = 6 if in1._factor > 0 else -6
|
|
93
|
+
idk, id0, id1 = index[key.id], index[in0.id], index[in1.id]
|
|
94
|
+
f0, f1 = in0._factor, in1._factor
|
|
95
|
+
shift = int(log2(abs(f1 / f0)))
|
|
96
|
+
data = idk + (shift << 32)
|
|
97
|
+
assert idk < i and id0 < i and id1 < i
|
|
98
|
+
assert key._factor > 0, f'Cannot mux on v{key.id} with negative factor {key._factor}'
|
|
99
|
+
op = Op(id0, id1, opcode, data, qint, v.latency, v.cost)
|
|
100
|
+
ops.append(op)
|
|
101
|
+
|
|
88
102
|
case _:
|
|
89
103
|
raise NotImplementedError(f'Operation "{v.opr}" is not supported in tracing')
|
|
90
104
|
out_index = [index[v.id] for v in outputs]
|
|
@@ -101,8 +115,15 @@ def comb_trace(inputs: FixedVariableArray, outputs: FixedVariableArray) -> Solut
|
|
|
101
115
|
|
|
102
116
|
def comb_trace(inputs, outputs):
|
|
103
117
|
inputs, outputs = list(np.ravel(inputs)), list(np.ravel(outputs))
|
|
104
|
-
|
|
105
|
-
|
|
118
|
+
|
|
119
|
+
if any(not isinstance(v, FixedVariable) for v in outputs):
|
|
120
|
+
hwconf = inputs[0].hwconf
|
|
121
|
+
latency = max(v.latency for v in chain(inputs, outputs) if isinstance(v, FixedVariable))
|
|
122
|
+
outputs = list(outputs)
|
|
123
|
+
for i, v in enumerate(outputs):
|
|
124
|
+
if not isinstance(v, FixedVariable):
|
|
125
|
+
outputs[i] = FixedVariable.from_const(v, hwconf, latency, 1)
|
|
126
|
+
|
|
106
127
|
ops, out_index = _comb_trace(inputs, outputs)
|
|
107
128
|
shape = len(inputs), len(outputs)
|
|
108
129
|
inp_shift = [0] * shape[0]
|
|
@@ -110,7 +131,7 @@ def comb_trace(inputs, outputs):
|
|
|
110
131
|
out_shift = [int(log2(abs(sf))) for sf in out_sf]
|
|
111
132
|
out_neg = [sf < 0 for sf in out_sf]
|
|
112
133
|
|
|
113
|
-
|
|
134
|
+
sol = Solution(
|
|
114
135
|
shape,
|
|
115
136
|
inp_shift,
|
|
116
137
|
out_index,
|
|
@@ -120,3 +141,12 @@ def comb_trace(inputs, outputs):
|
|
|
120
141
|
outputs[0].hwconf.carry_size,
|
|
121
142
|
outputs[0].hwconf.adder_size,
|
|
122
143
|
)
|
|
144
|
+
|
|
145
|
+
ref_count = sol.ref_count
|
|
146
|
+
|
|
147
|
+
for i in range(len(ops)):
|
|
148
|
+
if ref_count[i] == 0:
|
|
149
|
+
op = ops[i]
|
|
150
|
+
sol.ops[i] = Op(-1, -1, op[2], 0, QInterval(0, 0, 1), op[5], op[6])
|
|
151
|
+
|
|
152
|
+
return sol
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: da4ml
|
|
3
|
+
Version: 0.3.0.post1
|
|
4
|
+
Summary: Digital Arithmetic for Machine Learning
|
|
5
|
+
Author-email: Chang Sun <chsun@cern.ch>
|
|
6
|
+
License: GNU Lesser General Public License v3 (LGPLv3)
|
|
7
|
+
Project-URL: repository, https://github.com/calad0i/da4ml
|
|
8
|
+
Keywords: CMVM,distributed arithmetic,hls4ml,MCM,subexpression elimination
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)
|
|
11
|
+
Classifier: Operating System :: OS Independent
|
|
12
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Requires-Python: >=3.10
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
License-File: LICENSE
|
|
20
|
+
Requires-Dist: llvmlite>=0.44
|
|
21
|
+
Requires-Dist: numba>=0.61
|
|
22
|
+
Dynamic: license-file
|
|
23
|
+
|
|
24
|
+
# da4ml: Distributed Arithmetic for Machine Learning
|
|
25
|
+
|
|
26
|
+
This project performs Constant Matrix-Vector Multiplication (CMVM) with Distributed Arithmetic (DA) for Machine Learning (ML) on a Field Programmable Gate Arrays (FPGAs).
|
|
27
|
+
|
|
28
|
+
CMVM optimization is done through greedy CSE of two-term subexpressions, with possible Delay Constraints (DC). The optimization is done in jitted Python (Numba), and a list of optimized operations is generated as traced Python code.
|
|
29
|
+
|
|
30
|
+
The project generates Verilog or Vitis HLS code for the optimized CMVM operations. This project can be used in conjunction with [`hls4ml`](https://github.com/fastmachinelearning/hls4ml/) for optimizing the neural networks deployed on FPGAs. For a subset of neural networks, the full design can be generated standalone in Verilog or Vitis HLS.
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
## Installation
|
|
34
|
+
|
|
35
|
+
The project is available on PyPI and can be installed with pip:
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
pip install da4ml
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
Notice that `numba>=6.0.0` is required for the project to work. The project does not work with `python<3.10`. If the project fails to compile, try upgrading `numba` and `llvmlite` to the latest versions.
|
|
42
|
+
|
|
43
|
+
## `hls4ml`
|
|
44
|
+
|
|
45
|
+
The major use of this project is through the `distributed_arithmetic` strategy in the `hls4ml`:
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
model_hls = hls4ml.converters.convert_from_keras_model(
|
|
49
|
+
model,
|
|
50
|
+
hls_config={
|
|
51
|
+
'Model': {
|
|
52
|
+
...
|
|
53
|
+
'Strategy': 'distributed_arithmetic',
|
|
54
|
+
},
|
|
55
|
+
...
|
|
56
|
+
},
|
|
57
|
+
...
|
|
58
|
+
)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Currently, `Dense/Conv1D/Conv2D` layers are supported for both `io_parallel` and `io_stream` dataflows. However, notice that distributed arithmetic implies `reuse_factor=1`, as the whole kernel is implemented in combinational logic.
|
|
62
|
+
|
|
63
|
+
## Standalone usage
|
|
64
|
+
|
|
65
|
+
### `HGQ2`
|
|
66
|
+
|
|
67
|
+
For some models trained with `HGQ2`, the `da4ml` can be used to generate the whole model in Verilog or Vitis HLS:
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
from da4ml.codegen import HLSModel, VerilogModel
|
|
71
|
+
from da4ml.converter.hgq2.parser import trace_model
|
|
72
|
+
from da4ml.trace import comb_trace
|
|
73
|
+
|
|
74
|
+
inp, out = trace_model(hgq2_model)
|
|
75
|
+
comb_logic = comb_trace(inp[0], out[0]) # Currently, only models with 1 input and 1 output are supported
|
|
76
|
+
|
|
77
|
+
# Pipelined Verilog model generation
|
|
78
|
+
# `latency_cutoff` is used to control auto piplining behavior. To disable pipelining, set it to -1.
|
|
79
|
+
verilog_model = VerilogModel(sol, prj_name='barbar', path='/tmp/barbar', latency_cutoff=5)
|
|
80
|
+
verilog_model.compile() # write and verilator binding
|
|
81
|
+
verilog_model.predict(inputs)
|
|
82
|
+
|
|
83
|
+
vitis_hls_model = HLSModel(sol, prj_name='foo', path='/tmp/foo', flavor='vitis') # Only vitis is supported for now
|
|
84
|
+
vitis_hls_model.compile() # write and hls binding
|
|
85
|
+
vitis_hls_model.predict(inputs)
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### Functional Definition
|
|
89
|
+
For generic operations, one can define a combinational logic with the functional API:
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
from da4ml.trace import FixedVariableArray, HWConfig, comb_trace
|
|
93
|
+
from da4ml.trace.ops import einsum, relu, quantize, conv, pool
|
|
94
|
+
|
|
95
|
+
# k, i, f are numpy arrays of integers: keep_negative (0/1), integer bits (excl. sign), fractional bits
|
|
96
|
+
inp = FixedVariableArray.from_kif(k, i, f, HWConfig(1, -1, -1), solver_options={'hard_dc':2})
|
|
97
|
+
out = inp @ kernel
|
|
98
|
+
out = relu(out)
|
|
99
|
+
out = einsum(equation, out, weights)
|
|
100
|
+
...
|
|
101
|
+
|
|
102
|
+
comb = comb_trace(inp, out)
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
`+`, `-`, `@` are supported as well as `einsum`, `relu`, `quantize` (WRAP, with TRN or RND), `conv`, `pool` (average only). For multiplications, only power-of-two multipliers are supported, otherwise use `einsum` or `@` operators.
|
|
106
|
+
|
|
107
|
+
The `comb_trace` returns a `Solution` objects that contains a list of low-level operations that are used to implement the combinational logic, which in turn can be used to generate Verilog or Vitis HLS code.
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
da4ml/__init__.py,sha256=IETRRvzsJvPMLu1kzzi8UN5FYaM5MhNaXH2A_ZKr2_w,469
|
|
2
|
+
da4ml/_version.py,sha256=uYHHQtYrsf_vg1G4qaENpapNqr41eUiWJdo-mm-U-PM,526
|
|
3
|
+
da4ml/cmvm/__init__.py,sha256=4Tbt913k9zP0w8R1p6Oss06v5jrManbUhskyHl6e-U0,154
|
|
4
|
+
da4ml/cmvm/api.py,sha256=JpecMt6g8zutGh_uWT61_0iX8TuXct7-jq7N7HMIsgA,9626
|
|
5
|
+
da4ml/cmvm/types.py,sha256=hdthYdP5muIQ-9qFE0CjObGT7lCxB1-udXU16LxtuBI,20959
|
|
6
|
+
da4ml/cmvm/core/__init__.py,sha256=bp2CXI4EOVOQSho1qwfusNs0RliZRt2dV0hZ33W_Kjo,7703
|
|
7
|
+
da4ml/cmvm/core/indexers.py,sha256=QjXgvExS-B2abHTJPDG4NufMdMEflo1i6cUhFOgJpH4,2945
|
|
8
|
+
da4ml/cmvm/core/state_opr.py,sha256=wLqO8qVuM2-qCE5LDeYJDNkUruIPHy63obsv4-x-aR8,8661
|
|
9
|
+
da4ml/cmvm/util/__init__.py,sha256=DkBlUEKA_Gu7n576ja_xZlAQfToWmNL9VXU-jmj6a-g,145
|
|
10
|
+
da4ml/cmvm/util/bit_decompose.py,sha256=SUco70HRYf4r1JU6BXwcgabDrhm_yAmucae5FC67i4I,2216
|
|
11
|
+
da4ml/cmvm/util/mat_decompose.py,sha256=eSJNlXwx_jxgqt5vLJrSLQaeq2ZXu8j9mC4d-eq883M,4094
|
|
12
|
+
da4ml/codegen/__init__.py,sha256=Chdh3oO_vLR4saLbT9VxBPz_0wlEzxJldFSZaVUJo7U,331
|
|
13
|
+
da4ml/codegen/cpp/__init__.py,sha256=SIePoi_T4iJph50OQUosAnaVuLCckukYjLxp91Y8xQs,134
|
|
14
|
+
da4ml/codegen/cpp/cpp_codegen.py,sha256=6lBF1I-xXdIABEWF60owBmQiISuI6mrITCqLqhsEHrQ,6033
|
|
15
|
+
da4ml/codegen/cpp/hls_model.py,sha256=J5lnB8sAvMy0Bo5MSJOpgyUm1tzEJqBxgPTlOd38Gbg,8978
|
|
16
|
+
da4ml/codegen/cpp/source/binder_util.hh,sha256=pBVmhXIDvdCr8n2wwYehc3Fpp60sWYrrZaDoP3x9JZE,1880
|
|
17
|
+
da4ml/codegen/cpp/source/build_binder.mk,sha256=RLu4TP28aJsveyMOHxuDRGEJVoIPMo9T8WyPtqnmtbQ,584
|
|
18
|
+
da4ml/codegen/cpp/source/vitis_bitshift.hh,sha256=yFpYCVJ8gof-EzPjkIWWZYmdFh_wk133Pxzs7f61IQo,774
|
|
19
|
+
da4ml/codegen/cpp/source/ap_types/ap_binary.h,sha256=yOcafu2IofstDqxn0wDq8vY3JIwZQ9H5z6IY1dEqMr0,2764
|
|
20
|
+
da4ml/codegen/cpp/source/ap_types/ap_common.h,sha256=1hJY9uvKOdwRSSll5uehUISZR4tsSsQ1z4PNRUc44KU,10180
|
|
21
|
+
da4ml/codegen/cpp/source/ap_types/ap_decl.h,sha256=z1HsH-2RSvSoofTZR7RHeqIfAnEYVuHcIu_ute9gjEg,6473
|
|
22
|
+
da4ml/codegen/cpp/source/ap_types/ap_fixed.h,sha256=3ld4qyF475nDto57AHcsLd-PfoJ7dlplDoZPLXIo6d4,12185
|
|
23
|
+
da4ml/codegen/cpp/source/ap_types/ap_fixed_base.h,sha256=Cd1AJQZjHxVKbvo4w9a9ylkEyNjdXHR7VF9iUoGTb0o,85182
|
|
24
|
+
da4ml/codegen/cpp/source/ap_types/ap_fixed_ref.h,sha256=TO9yZqdWf0VksXmG4SN9_n_CDYQVWU4yuja0YfkrQCw,27302
|
|
25
|
+
da4ml/codegen/cpp/source/ap_types/ap_fixed_special.h,sha256=yXfQnjAc8vJv5T6R9a4L_eA0U_a0ypzK_RSn8yqzt_s,6985
|
|
26
|
+
da4ml/codegen/cpp/source/ap_types/ap_int.h,sha256=nTiyrFN8IPCGRs5RYpCkLT9y4IxaqoRUHtIbpUiOLNA,10012
|
|
27
|
+
da4ml/codegen/cpp/source/ap_types/ap_int_base.h,sha256=Kt4QjfUW85r8lxjY4ESqelR_CnpM0ubb4K5d2G03GMQ,71735
|
|
28
|
+
da4ml/codegen/cpp/source/ap_types/ap_int_ref.h,sha256=5rsOdablweC9hKGtQ8Kktr077sEQ91gzSH5G5hM7m5Y,55218
|
|
29
|
+
da4ml/codegen/cpp/source/ap_types/ap_int_special.h,sha256=HIvRRuiKGpAnCpigURX0cOQUX88dbp3lGkUWpbglMCI,6301
|
|
30
|
+
da4ml/codegen/cpp/source/ap_types/ap_shift_reg.h,sha256=wqe8j3ikbdZiXwYsYlAsFbOFeQLhYXIbKoRC6fJGeuc,4894
|
|
31
|
+
da4ml/codegen/cpp/source/ap_types/hls_math.h,sha256=abFBoZzYjm_pfC2wkuclVh1HuvYJ_YobnN-1Q99GRic,674
|
|
32
|
+
da4ml/codegen/cpp/source/ap_types/hls_stream.h,sha256=NTkVfbE48c6XnMIfR9WzJbDwUnfe6y19xJXxBS3G--I,7480
|
|
33
|
+
da4ml/codegen/cpp/source/ap_types/etc/ap_private.h,sha256=TDdxGIX0r3D6Ql8KeXoceRmHhdlwFA3Akr3-vvMVAtk,261465
|
|
34
|
+
da4ml/codegen/cpp/source/ap_types/utils/x_hls_utils.h,sha256=x24cf1HyZKv0J8YQIoUvYE3uw6SNL7vWetRGIiFm2Jw,2227
|
|
35
|
+
da4ml/codegen/verilog/__init__.py,sha256=rXmW2V9sDp2RYMDAWlhj_gfMXH3G5lPNmLrFtsJjn_A,298
|
|
36
|
+
da4ml/codegen/verilog/comb.py,sha256=CmCwiddeiT4TCZV088lF2ENlAXx3vjZKszTz1sYXEao,7614
|
|
37
|
+
da4ml/codegen/verilog/io_wrapper.py,sha256=SSs-ZRhBVLR6tpFso8GNGk-FH6JDe-p7LPvVPjTspxo,5002
|
|
38
|
+
da4ml/codegen/verilog/pipeline.py,sha256=YsPRTLp04Aofg33QMw6_ga3fNX9LeCD7Pq2PnERLWOg,2377
|
|
39
|
+
da4ml/codegen/verilog/verilog_model.py,sha256=_50dggtH24xMdI0beuyvdsv8G8dlB4MWa1m8KWZQdNE,12295
|
|
40
|
+
da4ml/codegen/verilog/source/binder_util.hh,sha256=Dn9ysUdonw0HR8bxom8YfQF7vc1LEvT_B1V_o8Gw1rY,2503
|
|
41
|
+
da4ml/codegen/verilog/source/build_binder.mk,sha256=rQbI98itE_b1wIQ_0uCXfBzNmGK2XT4vWmRyCJNnPKk,960
|
|
42
|
+
da4ml/codegen/verilog/source/build_prj.tcl,sha256=bcFCpcHR26TJGOQZEpUx0eM1SEiJOCoH-9EPpIvqWu0,3124
|
|
43
|
+
da4ml/codegen/verilog/source/ioutil.hh,sha256=1o1-oIyQyYc9CU91bBxuitVzzcrNT8p4MTarFKiJoG4,3967
|
|
44
|
+
da4ml/codegen/verilog/source/mux.v,sha256=1PMSQKGR_Cku1EQnePBVCuX6we_dqYBXW54WBEURvs0,1928
|
|
45
|
+
da4ml/codegen/verilog/source/negative.v,sha256=YphTCLnYslktsnCPq1xjbYgIFavani5NBbqs20uwhBI,688
|
|
46
|
+
da4ml/codegen/verilog/source/shift_adder.v,sha256=qrpXBX9bhHI-o75v5zshOfq0giEATvbeGgTir20_S3Q,1915
|
|
47
|
+
da4ml/codegen/verilog/source/template.xdc,sha256=GlSRy8tw_orohSuUwUSNEYJLLkAAHttGTfLTcQqRQDg,1262
|
|
48
|
+
da4ml/converter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
49
|
+
da4ml/converter/hgq2/parser.py,sha256=bAtnEXQxRKU9a1HFJWTy-e_HDzZY_wXOBVdyYG3ndsM,3826
|
|
50
|
+
da4ml/converter/hgq2/replica.py,sha256=9ICJGfK2Q2C_glwE0KMcvXttuWvJYRblkO7RLmalzss,13829
|
|
51
|
+
da4ml/trace/__init__.py,sha256=dv-rti3t8iE0RqeThfOb40mAg8FZB2WkkGQq3enJft0,282
|
|
52
|
+
da4ml/trace/fixed_variable.py,sha256=6dfMHBN1NfqYIbPZ79GCPCXj2JFQUKTyDZu6xDaG3rg,17082
|
|
53
|
+
da4ml/trace/fixed_variable_array.py,sha256=A0ApTvZxpkr7kHrUQkyhrGJuuPe4kDgLFyD_1CW7lBk,10985
|
|
54
|
+
da4ml/trace/pipeline.py,sha256=_R2uqWgnpuQ4tD7VKz2eu8CF9Air2RtYH2o03Vfg0Mk,5353
|
|
55
|
+
da4ml/trace/tracer.py,sha256=NqPEH9hyVlGQOf9_kJL3A7SujCcxkT-z28bk0Ael5jE,5664
|
|
56
|
+
da4ml/trace/ops/__init__.py,sha256=I4VqB43lVkFlLtkoWxiSDHBFGvxKwutNbAJw5aLVeAI,2108
|
|
57
|
+
da4ml/trace/ops/conv_utils.py,sha256=Yn73t4F6Tcs1hBwK08L1DPOin2HYVcng4PSkU4vuZFo,8245
|
|
58
|
+
da4ml/trace/ops/einsum_utils.py,sha256=MoWvOfvtVjXGwqEhXEzZ3uGrgSmLTHngV8I1eLyANGE,11433
|
|
59
|
+
da4ml/trace/ops/reduce_utils.py,sha256=8gohGQRVr8Bn5rfyrGsnE8EDxUXAObv521qu4mJrX9I,3348
|
|
60
|
+
da4ml-0.3.0.post1.dist-info/licenses/LICENSE,sha256=46mU2C5kSwOnkqkw9XQAJlhBL2JAf1_uCD8lVcXyMRg,7652
|
|
61
|
+
da4ml-0.3.0.post1.dist-info/METADATA,sha256=PTn1XMH7eBRfw3nLUqD1OzktCsH6V9SzxqXw3wK5ShE,4575
|
|
62
|
+
da4ml-0.3.0.post1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
63
|
+
da4ml-0.3.0.post1.dist-info/top_level.txt,sha256=N0tnKVwRqFiffFdeAzCgFq71hUNySh5-ITbNd6-R58Q,6
|
|
64
|
+
da4ml-0.3.0.post1.dist-info/RECORD,,
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
#pragma once
|
|
2
|
-
#include "ap_fixed.h"
|
|
3
|
-
|
|
4
|
-
template <typename inp_t, typename out_t, size_t SIZE_IN, size_t SIZE_OUT, typename F>
|
|
5
|
-
void vitis_bridge(F f, double *inp, double *out, int size) {
|
|
6
|
-
inp_t in_fixed_buf[SIZE_IN];
|
|
7
|
-
out_t out_fixed_buf[SIZE_OUT];
|
|
8
|
-
for (int i = 0; i < size; i++) {
|
|
9
|
-
for (int j = 0; j < SIZE_IN; j++) {
|
|
10
|
-
in_fixed_buf[j] = inp_t(inp[i * SIZE_IN + j]);
|
|
11
|
-
}
|
|
12
|
-
f(in_fixed_buf, out_fixed_buf);
|
|
13
|
-
for (int j = 0; j < SIZE_OUT; j++) {
|
|
14
|
-
out[i * SIZE_OUT + j] = double(out_fixed_buf[j]);
|
|
15
|
-
}
|
|
16
|
-
}
|
|
17
|
-
}
|
da4ml-0.2.1.dist-info/METADATA
DELETED
|
@@ -1,65 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: da4ml
|
|
3
|
-
Version: 0.2.1
|
|
4
|
-
Summary: Digital Arithmetic for Machine Learning
|
|
5
|
-
Author-email: Chang Sun <chsun@cern.ch>
|
|
6
|
-
License: GNU Lesser General Public License v3 (LGPLv3)
|
|
7
|
-
Project-URL: repository, https://github.com/calad0i/da4ml
|
|
8
|
-
Keywords: CMVM,distributed arithmetic,hls4ml,MCM,subexpression elimination
|
|
9
|
-
Classifier: Development Status :: 4 - Beta
|
|
10
|
-
Classifier: License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)
|
|
11
|
-
Classifier: Operating System :: OS Independent
|
|
12
|
-
Classifier: Programming Language :: Python :: 3 :: Only
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
-
Requires-Python: >=3.10
|
|
18
|
-
Description-Content-Type: text/markdown
|
|
19
|
-
License-File: LICENSE
|
|
20
|
-
Requires-Dist: llvmlite>=0.44
|
|
21
|
-
Requires-Dist: numba>=0.61
|
|
22
|
-
Dynamic: license-file
|
|
23
|
-
|
|
24
|
-
# da4ml: Distributed Arithmetic for Machine Learning
|
|
25
|
-
|
|
26
|
-
This project performs Constant Matrix-Vector Multiplication (CMVM) with Distributed Arithmetic (DA) for Machine Learning (ML) on a Field Programmable Gate Arrays (FPGAs).
|
|
27
|
-
|
|
28
|
-
CMVM optimization is done through greedy CSE of two-term subexpressions, with possible Delay Constraints (DC). The optimization is done in jitted Python (Numba), and a list of optimized operations is generated as traced Python code.
|
|
29
|
-
|
|
30
|
-
At the moment, the project only generates Vitis HLS C++ code for the FPGA implementation of the optimized CMVM kernel. HDL code generation is planned for the future. Currently, the major use of this repository is through the `distributed_arithmetic` strategy in the [`hls4ml`](https://github.com/fastmachinelearning/hls4ml/) project.
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
## Installation
|
|
34
|
-
|
|
35
|
-
The project is available on PyPI and can be installed with pip:
|
|
36
|
-
|
|
37
|
-
```bash
|
|
38
|
-
pip install da4ml
|
|
39
|
-
```
|
|
40
|
-
|
|
41
|
-
Notice that `numba>=6.0.0` is required for the project to work. The project does not work with `python<3.10`. If the project fails to compile, try upgrading `numba` and `llvmlite` to the latest versions.
|
|
42
|
-
|
|
43
|
-
## `hls4ml`
|
|
44
|
-
|
|
45
|
-
The major use of this project is through the `distributed_arithmetic` strategy in the `hls4ml`:
|
|
46
|
-
|
|
47
|
-
```python
|
|
48
|
-
model_hls = hls4ml.converters.convert_from_keras_model(
|
|
49
|
-
model,
|
|
50
|
-
hls_config={
|
|
51
|
-
'Model': {
|
|
52
|
-
...
|
|
53
|
-
'Strategy': 'distributed_arithmetic',
|
|
54
|
-
},
|
|
55
|
-
...
|
|
56
|
-
},
|
|
57
|
-
...
|
|
58
|
-
)
|
|
59
|
-
```
|
|
60
|
-
|
|
61
|
-
Currently, `Dense/Conv1D/Conv2D` layers are supported for both `io_parallel` and `io_stream` dataflows. However, notice that distributed arithmetic implies `reuse_factor=1`, as the whole kernel is implemented in combinational logic.
|
|
62
|
-
|
|
63
|
-
### Notice
|
|
64
|
-
|
|
65
|
-
Currently, only the `da4ml-v3` branch of `hls4ml` supports the `distributed_arithmetic` strategy. The `da4ml-v3` branch is not yet merged into the `main` branch of `hls4ml`, so you need to install it from the GitHub repository.
|
da4ml-0.2.1.dist-info/RECORD
DELETED
|
@@ -1,39 +0,0 @@
|
|
|
1
|
-
da4ml/__init__.py,sha256=IETRRvzsJvPMLu1kzzi8UN5FYaM5MhNaXH2A_ZKr2_w,469
|
|
2
|
-
da4ml/_version.py,sha256=UoNvMtd4wCG76RwoSpNCUtaFyTwakGcZolfjXzNVSMY,511
|
|
3
|
-
da4ml/cmvm/__init__.py,sha256=4Tbt913k9zP0w8R1p6Oss06v5jrManbUhskyHl6e-U0,154
|
|
4
|
-
da4ml/cmvm/api.py,sha256=JpecMt6g8zutGh_uWT61_0iX8TuXct7-jq7N7HMIsgA,9626
|
|
5
|
-
da4ml/cmvm/types.py,sha256=MckE6hBnRX2bMvT86CjvyxAMSK7grCrCRn2f_f3qgAw,17844
|
|
6
|
-
da4ml/cmvm/core/__init__.py,sha256=bp2CXI4EOVOQSho1qwfusNs0RliZRt2dV0hZ33W_Kjo,7703
|
|
7
|
-
da4ml/cmvm/core/indexers.py,sha256=QjXgvExS-B2abHTJPDG4NufMdMEflo1i6cUhFOgJpH4,2945
|
|
8
|
-
da4ml/cmvm/core/state_opr.py,sha256=wLqO8qVuM2-qCE5LDeYJDNkUruIPHy63obsv4-x-aR8,8661
|
|
9
|
-
da4ml/cmvm/util/__init__.py,sha256=DkBlUEKA_Gu7n576ja_xZlAQfToWmNL9VXU-jmj6a-g,145
|
|
10
|
-
da4ml/cmvm/util/bit_decompose.py,sha256=SUco70HRYf4r1JU6BXwcgabDrhm_yAmucae5FC67i4I,2216
|
|
11
|
-
da4ml/cmvm/util/mat_decompose.py,sha256=eSJNlXwx_jxgqt5vLJrSLQaeq2ZXu8j9mC4d-eq883M,4094
|
|
12
|
-
da4ml/codegen/__init__.py,sha256=g58EgubgPPoiwRTBduSzm6hAc-poPcK6egdoECfPx9o,329
|
|
13
|
-
da4ml/codegen/cpp/__init__.py,sha256=Tw4XeU_oJsyUkTrsfEPuZ-r0rGAo8E2NX5wn_VTA7NM,90
|
|
14
|
-
da4ml/codegen/cpp/cpp_codegen.py,sha256=FnVPgD8McFFdrecdI1u_ybDLQ0RFuVpJ0xO5Ne1D8j0,4811
|
|
15
|
-
da4ml/codegen/cpp/source/vitis.h,sha256=ovEefBOfW5-PXuDdRObPGNokGGFHiixDCpPWeTN6aTo,765
|
|
16
|
-
da4ml/codegen/cpp/source/vitis_bridge.h,sha256=XvvGw3A4eAaXKi5jp50bMKUsNfd5iQ-HhUKtsty1uns,567
|
|
17
|
-
da4ml/codegen/verilog/__init__.py,sha256=obRTdtMWhPHsxFHg2ADoPd3iDBEX8nk_6HuCet5EDz0,356
|
|
18
|
-
da4ml/codegen/verilog/comb.py,sha256=EZONCceEvIKHHF8yLY-i2V_U_8THw_dJEQWujjCJ5iI,5592
|
|
19
|
-
da4ml/codegen/verilog/io_wrapper.py,sha256=TrfJpJxU4uPTGW02_uFb7cjhFlhuVjH3rY0iWuf-lYk,8003
|
|
20
|
-
da4ml/codegen/verilog/pipeline.py,sha256=YsPRTLp04Aofg33QMw6_ga3fNX9LeCD7Pq2PnERLWOg,2377
|
|
21
|
-
da4ml/codegen/verilog/verilog_model.py,sha256=xTH4-B3PG0jJtQ84NiADvHgU5JbkMEoz_UGuspMHh4Y,10869
|
|
22
|
-
da4ml/codegen/verilog/source/build_binder.mk,sha256=rQbI98itE_b1wIQ_0uCXfBzNmGK2XT4vWmRyCJNnPKk,960
|
|
23
|
-
da4ml/codegen/verilog/source/build_prj.tcl,sha256=bcFCpcHR26TJGOQZEpUx0eM1SEiJOCoH-9EPpIvqWu0,3124
|
|
24
|
-
da4ml/codegen/verilog/source/ioutils.hh,sha256=1o1-oIyQyYc9CU91bBxuitVzzcrNT8p4MTarFKiJoG4,3967
|
|
25
|
-
da4ml/codegen/verilog/source/shift_adder.v,sha256=l2ofym56Y-_PeeY9fwkcZeW9MzrTL_WxvSTvoWERJrU,1885
|
|
26
|
-
da4ml/codegen/verilog/source/template.xdc,sha256=ON8i-TK96Yo6FoZ66WzcVKELajTF5TBmbWFbEilna2U,1142
|
|
27
|
-
da4ml/trace/__init__.py,sha256=1br9bWeFb33t69k6h1XQ50iJhLCqrRuEHtqEawELp-c,230
|
|
28
|
-
da4ml/trace/fixed_variable.py,sha256=DthYqQJt2JD2t6X9nuNRPi80SQ7XriXdyXqw5CMR95Y,11669
|
|
29
|
-
da4ml/trace/fixed_variable_array.py,sha256=7Ds92DLnVYMK6_G_l6DWCxgWf7Y8frBiyBj-vaqeWKk,6929
|
|
30
|
-
da4ml/trace/pipeline.py,sha256=dYduPBNUeyW2Ws392hZNGJEo0qI5ynpn-iC2n7UVahk,5687
|
|
31
|
-
da4ml/trace/tracer.py,sha256=xEQQNHkJ8VFt8cDFISzhX6fNVi2JVfgfCg2aca80E2c,4597
|
|
32
|
-
da4ml/trace/ops/__init__.py,sha256=qz0DLPUyxBAu08RCN22kCkJj1EPKanC8ey8NB3_K8co,1640
|
|
33
|
-
da4ml/trace/ops/conv_utils.py,sha256=LtgP3iSZ3fNV6QkEVBzT7ixt-7WTdmBDrFTtQ_9D5aE,3638
|
|
34
|
-
da4ml/trace/ops/einsum_utils.py,sha256=miyMyzJwBLpLTEzXU4vErPE1Xk-ckZG0cjhd13MLAuA,11325
|
|
35
|
-
da4ml-0.2.1.dist-info/licenses/LICENSE,sha256=46mU2C5kSwOnkqkw9XQAJlhBL2JAf1_uCD8lVcXyMRg,7652
|
|
36
|
-
da4ml-0.2.1.dist-info/METADATA,sha256=4Kj_ehj1jTDa21JMDIsujUwUzpv7csj2oB23ib3y998,2849
|
|
37
|
-
da4ml-0.2.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
38
|
-
da4ml-0.2.1.dist-info/top_level.txt,sha256=N0tnKVwRqFiffFdeAzCgFq71hUNySh5-ITbNd6-R58Q,6
|
|
39
|
-
da4ml-0.2.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|