da4ml 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of da4ml might be problematic. Click here for more details.

Files changed (55) hide show
  1. da4ml/_version.py +2 -2
  2. da4ml/cmvm/types.py +95 -15
  3. da4ml/codegen/__init__.py +5 -4
  4. da4ml/codegen/cpp/__init__.py +2 -1
  5. da4ml/codegen/cpp/cpp_codegen.py +56 -23
  6. da4ml/codegen/cpp/hls_model.py +252 -0
  7. da4ml/codegen/cpp/source/ap_types/ap_binary.h +78 -0
  8. da4ml/codegen/cpp/source/ap_types/ap_common.h +376 -0
  9. da4ml/codegen/cpp/source/ap_types/ap_decl.h +212 -0
  10. da4ml/codegen/cpp/source/ap_types/ap_fixed.h +360 -0
  11. da4ml/codegen/cpp/source/ap_types/ap_fixed_base.h +2354 -0
  12. da4ml/codegen/cpp/source/ap_types/ap_fixed_ref.h +718 -0
  13. da4ml/codegen/cpp/source/ap_types/ap_fixed_special.h +230 -0
  14. da4ml/codegen/cpp/source/ap_types/ap_int.h +330 -0
  15. da4ml/codegen/cpp/source/ap_types/ap_int_base.h +1885 -0
  16. da4ml/codegen/cpp/source/ap_types/ap_int_ref.h +1346 -0
  17. da4ml/codegen/cpp/source/ap_types/ap_int_special.h +223 -0
  18. da4ml/codegen/cpp/source/ap_types/ap_shift_reg.h +138 -0
  19. da4ml/codegen/cpp/source/ap_types/etc/ap_private.h +7199 -0
  20. da4ml/codegen/cpp/source/ap_types/hls_math.h +27 -0
  21. da4ml/codegen/cpp/source/ap_types/hls_stream.h +263 -0
  22. da4ml/codegen/cpp/source/ap_types/utils/x_hls_utils.h +80 -0
  23. da4ml/codegen/cpp/source/binder_util.hh +56 -0
  24. da4ml/codegen/cpp/source/build_binder.mk +24 -0
  25. da4ml/codegen/cpp/source/{vitis.h → vitis_bitshift.hh} +1 -1
  26. da4ml/codegen/verilog/__init__.py +2 -3
  27. da4ml/codegen/verilog/comb.py +65 -24
  28. da4ml/codegen/verilog/io_wrapper.py +36 -141
  29. da4ml/codegen/verilog/source/binder_util.hh +72 -0
  30. da4ml/codegen/verilog/source/mux.v +58 -0
  31. da4ml/codegen/verilog/source/negative.v +28 -0
  32. da4ml/codegen/verilog/source/shift_adder.v +4 -1
  33. da4ml/codegen/verilog/source/template.xdc +3 -0
  34. da4ml/codegen/verilog/verilog_model.py +36 -12
  35. da4ml/converter/__init__.py +0 -0
  36. da4ml/converter/hgq2/parser.py +105 -0
  37. da4ml/converter/hgq2/replica.py +383 -0
  38. da4ml/trace/__init__.py +2 -2
  39. da4ml/trace/fixed_variable.py +175 -16
  40. da4ml/trace/fixed_variable_array.py +109 -4
  41. da4ml/trace/ops/__init__.py +22 -6
  42. da4ml/trace/ops/conv_utils.py +146 -14
  43. da4ml/trace/ops/einsum_utils.py +9 -6
  44. da4ml/trace/ops/reduce_utils.py +103 -0
  45. da4ml/trace/pipeline.py +36 -34
  46. da4ml/trace/tracer.py +37 -7
  47. da4ml-0.3.0.dist-info/METADATA +107 -0
  48. da4ml-0.3.0.dist-info/RECORD +64 -0
  49. da4ml/codegen/cpp/source/vitis_bridge.h +0 -17
  50. da4ml-0.2.1.dist-info/METADATA +0 -65
  51. da4ml-0.2.1.dist-info/RECORD +0 -39
  52. /da4ml/codegen/verilog/source/{ioutils.hh → ioutil.hh} +0 -0
  53. {da4ml-0.2.1.dist-info → da4ml-0.3.0.dist-info}/WHEEL +0 -0
  54. {da4ml-0.2.1.dist-info → da4ml-0.3.0.dist-info}/licenses/LICENSE +0 -0
  55. {da4ml-0.2.1.dist-info → da4ml-0.3.0.dist-info}/top_level.txt +0 -0
da4ml/trace/tracer.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from collections.abc import Sequence
2
2
  from decimal import Decimal
3
+ from itertools import chain
3
4
  from math import log2
4
5
  from typing import overload
5
6
  from uuid import UUID
@@ -11,20 +12,20 @@ from .fixed_variable import FixedVariable, _const_f
11
12
  from .fixed_variable_array import FixedVariableArray
12
13
 
13
14
 
14
- def _recursive_trace(v: FixedVariable, gathered: dict[UUID, FixedVariable]):
15
- if v in gathered:
15
+ def _recursive_gather(v: FixedVariable, gathered: dict[UUID, FixedVariable]):
16
+ if v.id in gathered:
16
17
  return
17
18
  assert v._from is not None
18
19
  for _v in v._from:
19
20
  if _v.id not in gathered:
20
- _recursive_trace(_v, gathered)
21
+ _recursive_gather(_v, gathered)
21
22
  gathered[v.id] = v
22
23
 
23
24
 
24
25
  def gather_variables(inputs: Sequence[FixedVariable], outputs: Sequence[FixedVariable]):
25
26
  gathered = {v.id: v for v in inputs}
26
27
  for o in outputs:
27
- _recursive_trace(o, gathered)
28
+ _recursive_gather(o, gathered)
28
29
 
29
30
  variables = list(gathered.values())
30
31
 
@@ -85,6 +86,19 @@ def _comb_trace(inputs: Sequence[FixedVariable], outputs: Sequence[FixedVariable
85
86
  qint = QInterval(qint.min, qint.min, step)
86
87
  data = qint.min / step
87
88
  ops.append(Op(-1, -1, 5, int(data), qint, v.latency, v.cost))
89
+ case 'msb_mux':
90
+ qint = v.unscaled.qint
91
+ key, in0, in1 = v._from
92
+ opcode = 6 if in1._factor > 0 else -6
93
+ idk, id0, id1 = index[key.id], index[in0.id], index[in1.id]
94
+ f0, f1 = in0._factor, in1._factor
95
+ shift = int(log2(abs(f1 / f0)))
96
+ data = idk + (shift << 32)
97
+ assert idk < i and id0 < i and id1 < i
98
+ assert key._factor > 0, f'Cannot mux on v{key.id} with negative factor {key._factor}'
99
+ op = Op(id0, id1, opcode, data, qint, v.latency, v.cost)
100
+ ops.append(op)
101
+
88
102
  case _:
89
103
  raise NotImplementedError(f'Operation "{v.opr}" is not supported in tracing')
90
104
  out_index = [index[v.id] for v in outputs]
@@ -101,8 +115,15 @@ def comb_trace(inputs: FixedVariableArray, outputs: FixedVariableArray) -> Solut
101
115
 
102
116
  def comb_trace(inputs, outputs):
103
117
  inputs, outputs = list(np.ravel(inputs)), list(np.ravel(outputs))
104
- # latency = max(v.latency if isinstance(v, FixedVariable) else 0 for v in outputs)
105
- # outputs = [v if isinstance(v, FixedVariable) else FixedVariable(v,v,0, latency=latency, opr='const') for v in outputs]
118
+
119
+ if any(not isinstance(v, FixedVariable) for v in outputs):
120
+ hwconf = inputs[0].hwconf
121
+ latency = max(v.latency for v in chain(inputs, outputs) if isinstance(v, FixedVariable))
122
+ outputs = list(outputs)
123
+ for i, v in enumerate(outputs):
124
+ if not isinstance(v, FixedVariable):
125
+ outputs[i] = FixedVariable.from_const(v, hwconf, latency, 1)
126
+
106
127
  ops, out_index = _comb_trace(inputs, outputs)
107
128
  shape = len(inputs), len(outputs)
108
129
  inp_shift = [0] * shape[0]
@@ -110,7 +131,7 @@ def comb_trace(inputs, outputs):
110
131
  out_shift = [int(log2(abs(sf))) for sf in out_sf]
111
132
  out_neg = [sf < 0 for sf in out_sf]
112
133
 
113
- return Solution(
134
+ sol = Solution(
114
135
  shape,
115
136
  inp_shift,
116
137
  out_index,
@@ -120,3 +141,12 @@ def comb_trace(inputs, outputs):
120
141
  outputs[0].hwconf.carry_size,
121
142
  outputs[0].hwconf.adder_size,
122
143
  )
144
+
145
+ ref_count = sol.ref_count
146
+
147
+ for i in range(len(ops)):
148
+ if ref_count[i] == 0:
149
+ op = ops[i]
150
+ sol.ops[i] = Op(-1, -1, op[2], 0, QInterval(0, 0, 1), op[5], op[6])
151
+
152
+ return sol
@@ -0,0 +1,107 @@
1
+ Metadata-Version: 2.4
2
+ Name: da4ml
3
+ Version: 0.3.0
4
+ Summary: Digital Arithmetic for Machine Learning
5
+ Author-email: Chang Sun <chsun@cern.ch>
6
+ License: GNU Lesser General Public License v3 (LGPLv3)
7
+ Project-URL: repository, https://github.com/calad0i/da4ml
8
+ Keywords: CMVM,distributed arithmetic,hls4ml,MCM,subexpression elimination
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)
11
+ Classifier: Operating System :: OS Independent
12
+ Classifier: Programming Language :: Python :: 3 :: Only
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Requires-Python: >=3.10
18
+ Description-Content-Type: text/markdown
19
+ License-File: LICENSE
20
+ Requires-Dist: llvmlite>=0.44
21
+ Requires-Dist: numba>=0.61
22
+ Dynamic: license-file
23
+
24
+ # da4ml: Distributed Arithmetic for Machine Learning
25
+
26
+ This project performs Constant Matrix-Vector Multiplication (CMVM) with Distributed Arithmetic (DA) for Machine Learning (ML) on a Field Programmable Gate Arrays (FPGAs).
27
+
28
+ CMVM optimization is done through greedy CSE of two-term subexpressions, with possible Delay Constraints (DC). The optimization is done in jitted Python (Numba), and a list of optimized operations is generated as traced Python code.
29
+
30
+ The project generates Verilog or Vitis HLS code for the optimized CMVM operations. This project can be used in conjunction with [`hls4ml`](https://github.com/fastmachinelearning/hls4ml/) for optimizing the neural networks deployed on FPGAs. For a subset of neural networks, the full design can be generated standalone in Verilog or Vitis HLS.
31
+
32
+
33
+ ## Installation
34
+
35
+ The project is available on PyPI and can be installed with pip:
36
+
37
+ ```bash
38
+ pip install da4ml
39
+ ```
40
+
41
+ Notice that `numba>=6.0.0` is required for the project to work. The project does not work with `python<3.10`. If the project fails to compile, try upgrading `numba` and `llvmlite` to the latest versions.
42
+
43
+ ## `hls4ml`
44
+
45
+ The major use of this project is through the `distributed_arithmetic` strategy in the `hls4ml`:
46
+
47
+ ```python
48
+ model_hls = hls4ml.converters.convert_from_keras_model(
49
+ model,
50
+ hls_config={
51
+ 'Model': {
52
+ ...
53
+ 'Strategy': 'distributed_arithmetic',
54
+ },
55
+ ...
56
+ },
57
+ ...
58
+ )
59
+ ```
60
+
61
+ Currently, `Dense/Conv1D/Conv2D` layers are supported for both `io_parallel` and `io_stream` dataflows. However, notice that distributed arithmetic implies `reuse_factor=1`, as the whole kernel is implemented in combinational logic.
62
+
63
+ ## Standalone usage
64
+
65
+ ### `HGQ2`
66
+
67
+ For some models trained with `HGQ2`, the `da4ml` can be used to generate the whole model in Verilog or Vitis HLS:
68
+
69
+ ```python
70
+ from da4ml.codegen import HLSModel, VerilogModel
71
+ from da4ml.converter.hgq2.parser import trace_model
72
+ from da4ml.trace import comb_trace
73
+
74
+ inp, out = trace_model(hgq2_model)
75
+ comb_logic = comb_trace(inp[0], out[0]) # Currently, only models with 1 input and 1 output are supported
76
+
77
+ # Pipelined Verilog model generation
78
+ # `latency_cutoff` is used to control auto piplining behavior. To disable pipelining, set it to -1.
79
+ verilog_model = VerilogModel(sol, prj_name='barbar', path='/tmp/barbar', latency_cutoff=5)
80
+ verilog_model.compile() # write and verilator binding
81
+ verilog_model.predict(inputs)
82
+
83
+ vitis_hls_model = HLSModel(sol, prj_name='foo', path='/tmp/foo', flavor='vitis') # Only vitis is supported for now
84
+ vitis_hls_model.compile() # write and hls binding
85
+ vitis_hls_model.predict(inputs)
86
+ ```
87
+
88
+ ### Functional Definition
89
+ For generic operations, one can define a combinational logic with the functional API:
90
+
91
+ ```python
92
+ from da4ml.trace import FixedVariableArray, HWConfig, comb_trace
93
+ from da4ml.trace.ops import einsum, relu, quantize, conv, pool
94
+
95
+ # k, i, f are numpy arrays of integers: keep_negative (0/1), integer bits (excl. sign), fractional bits
96
+ inp = FixedVariableArray.from_kif(k, i, f, HWConfig(1, -1, -1), solver_options={'hard_dc':2})
97
+ out = inp @ kernel
98
+ out = relu(out)
99
+ out = einsum(equation, out, weights)
100
+ ...
101
+
102
+ comb = comb_trace(inp, out)
103
+ ```
104
+
105
+ `+`, `-`, `@` are supported as well as `einsum`, `relu`, `quantize` (WRAP, with TRN or RND), `conv`, `pool` (average only). For multiplications, only power-of-two multipliers are supported, otherwise use `einsum` or `@` operators.
106
+
107
+ The `comb_trace` returns a `Solution` objects that contains a list of low-level operations that are used to implement the combinational logic, which in turn can be used to generate Verilog or Vitis HLS code.
@@ -0,0 +1,64 @@
1
+ da4ml/__init__.py,sha256=IETRRvzsJvPMLu1kzzi8UN5FYaM5MhNaXH2A_ZKr2_w,469
2
+ da4ml/_version.py,sha256=AGmG_Lx0-9ztFw_7d9mYbaYuC-2abxE1oXOUNAY29YY,511
3
+ da4ml/cmvm/__init__.py,sha256=4Tbt913k9zP0w8R1p6Oss06v5jrManbUhskyHl6e-U0,154
4
+ da4ml/cmvm/api.py,sha256=JpecMt6g8zutGh_uWT61_0iX8TuXct7-jq7N7HMIsgA,9626
5
+ da4ml/cmvm/types.py,sha256=hdthYdP5muIQ-9qFE0CjObGT7lCxB1-udXU16LxtuBI,20959
6
+ da4ml/cmvm/core/__init__.py,sha256=bp2CXI4EOVOQSho1qwfusNs0RliZRt2dV0hZ33W_Kjo,7703
7
+ da4ml/cmvm/core/indexers.py,sha256=QjXgvExS-B2abHTJPDG4NufMdMEflo1i6cUhFOgJpH4,2945
8
+ da4ml/cmvm/core/state_opr.py,sha256=wLqO8qVuM2-qCE5LDeYJDNkUruIPHy63obsv4-x-aR8,8661
9
+ da4ml/cmvm/util/__init__.py,sha256=DkBlUEKA_Gu7n576ja_xZlAQfToWmNL9VXU-jmj6a-g,145
10
+ da4ml/cmvm/util/bit_decompose.py,sha256=SUco70HRYf4r1JU6BXwcgabDrhm_yAmucae5FC67i4I,2216
11
+ da4ml/cmvm/util/mat_decompose.py,sha256=eSJNlXwx_jxgqt5vLJrSLQaeq2ZXu8j9mC4d-eq883M,4094
12
+ da4ml/codegen/__init__.py,sha256=Chdh3oO_vLR4saLbT9VxBPz_0wlEzxJldFSZaVUJo7U,331
13
+ da4ml/codegen/cpp/__init__.py,sha256=SIePoi_T4iJph50OQUosAnaVuLCckukYjLxp91Y8xQs,134
14
+ da4ml/codegen/cpp/cpp_codegen.py,sha256=6lBF1I-xXdIABEWF60owBmQiISuI6mrITCqLqhsEHrQ,6033
15
+ da4ml/codegen/cpp/hls_model.py,sha256=J5lnB8sAvMy0Bo5MSJOpgyUm1tzEJqBxgPTlOd38Gbg,8978
16
+ da4ml/codegen/cpp/source/binder_util.hh,sha256=pBVmhXIDvdCr8n2wwYehc3Fpp60sWYrrZaDoP3x9JZE,1880
17
+ da4ml/codegen/cpp/source/build_binder.mk,sha256=RLu4TP28aJsveyMOHxuDRGEJVoIPMo9T8WyPtqnmtbQ,584
18
+ da4ml/codegen/cpp/source/vitis_bitshift.hh,sha256=yFpYCVJ8gof-EzPjkIWWZYmdFh_wk133Pxzs7f61IQo,774
19
+ da4ml/codegen/cpp/source/ap_types/ap_binary.h,sha256=yOcafu2IofstDqxn0wDq8vY3JIwZQ9H5z6IY1dEqMr0,2764
20
+ da4ml/codegen/cpp/source/ap_types/ap_common.h,sha256=1hJY9uvKOdwRSSll5uehUISZR4tsSsQ1z4PNRUc44KU,10180
21
+ da4ml/codegen/cpp/source/ap_types/ap_decl.h,sha256=z1HsH-2RSvSoofTZR7RHeqIfAnEYVuHcIu_ute9gjEg,6473
22
+ da4ml/codegen/cpp/source/ap_types/ap_fixed.h,sha256=3ld4qyF475nDto57AHcsLd-PfoJ7dlplDoZPLXIo6d4,12185
23
+ da4ml/codegen/cpp/source/ap_types/ap_fixed_base.h,sha256=Cd1AJQZjHxVKbvo4w9a9ylkEyNjdXHR7VF9iUoGTb0o,85182
24
+ da4ml/codegen/cpp/source/ap_types/ap_fixed_ref.h,sha256=TO9yZqdWf0VksXmG4SN9_n_CDYQVWU4yuja0YfkrQCw,27302
25
+ da4ml/codegen/cpp/source/ap_types/ap_fixed_special.h,sha256=yXfQnjAc8vJv5T6R9a4L_eA0U_a0ypzK_RSn8yqzt_s,6985
26
+ da4ml/codegen/cpp/source/ap_types/ap_int.h,sha256=nTiyrFN8IPCGRs5RYpCkLT9y4IxaqoRUHtIbpUiOLNA,10012
27
+ da4ml/codegen/cpp/source/ap_types/ap_int_base.h,sha256=Kt4QjfUW85r8lxjY4ESqelR_CnpM0ubb4K5d2G03GMQ,71735
28
+ da4ml/codegen/cpp/source/ap_types/ap_int_ref.h,sha256=5rsOdablweC9hKGtQ8Kktr077sEQ91gzSH5G5hM7m5Y,55218
29
+ da4ml/codegen/cpp/source/ap_types/ap_int_special.h,sha256=HIvRRuiKGpAnCpigURX0cOQUX88dbp3lGkUWpbglMCI,6301
30
+ da4ml/codegen/cpp/source/ap_types/ap_shift_reg.h,sha256=wqe8j3ikbdZiXwYsYlAsFbOFeQLhYXIbKoRC6fJGeuc,4894
31
+ da4ml/codegen/cpp/source/ap_types/hls_math.h,sha256=abFBoZzYjm_pfC2wkuclVh1HuvYJ_YobnN-1Q99GRic,674
32
+ da4ml/codegen/cpp/source/ap_types/hls_stream.h,sha256=NTkVfbE48c6XnMIfR9WzJbDwUnfe6y19xJXxBS3G--I,7480
33
+ da4ml/codegen/cpp/source/ap_types/etc/ap_private.h,sha256=TDdxGIX0r3D6Ql8KeXoceRmHhdlwFA3Akr3-vvMVAtk,261465
34
+ da4ml/codegen/cpp/source/ap_types/utils/x_hls_utils.h,sha256=x24cf1HyZKv0J8YQIoUvYE3uw6SNL7vWetRGIiFm2Jw,2227
35
+ da4ml/codegen/verilog/__init__.py,sha256=rXmW2V9sDp2RYMDAWlhj_gfMXH3G5lPNmLrFtsJjn_A,298
36
+ da4ml/codegen/verilog/comb.py,sha256=CmCwiddeiT4TCZV088lF2ENlAXx3vjZKszTz1sYXEao,7614
37
+ da4ml/codegen/verilog/io_wrapper.py,sha256=SSs-ZRhBVLR6tpFso8GNGk-FH6JDe-p7LPvVPjTspxo,5002
38
+ da4ml/codegen/verilog/pipeline.py,sha256=YsPRTLp04Aofg33QMw6_ga3fNX9LeCD7Pq2PnERLWOg,2377
39
+ da4ml/codegen/verilog/verilog_model.py,sha256=_50dggtH24xMdI0beuyvdsv8G8dlB4MWa1m8KWZQdNE,12295
40
+ da4ml/codegen/verilog/source/binder_util.hh,sha256=Dn9ysUdonw0HR8bxom8YfQF7vc1LEvT_B1V_o8Gw1rY,2503
41
+ da4ml/codegen/verilog/source/build_binder.mk,sha256=rQbI98itE_b1wIQ_0uCXfBzNmGK2XT4vWmRyCJNnPKk,960
42
+ da4ml/codegen/verilog/source/build_prj.tcl,sha256=bcFCpcHR26TJGOQZEpUx0eM1SEiJOCoH-9EPpIvqWu0,3124
43
+ da4ml/codegen/verilog/source/ioutil.hh,sha256=1o1-oIyQyYc9CU91bBxuitVzzcrNT8p4MTarFKiJoG4,3967
44
+ da4ml/codegen/verilog/source/mux.v,sha256=1PMSQKGR_Cku1EQnePBVCuX6we_dqYBXW54WBEURvs0,1928
45
+ da4ml/codegen/verilog/source/negative.v,sha256=YphTCLnYslktsnCPq1xjbYgIFavani5NBbqs20uwhBI,688
46
+ da4ml/codegen/verilog/source/shift_adder.v,sha256=qrpXBX9bhHI-o75v5zshOfq0giEATvbeGgTir20_S3Q,1915
47
+ da4ml/codegen/verilog/source/template.xdc,sha256=GlSRy8tw_orohSuUwUSNEYJLLkAAHttGTfLTcQqRQDg,1262
48
+ da4ml/converter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
+ da4ml/converter/hgq2/parser.py,sha256=bAtnEXQxRKU9a1HFJWTy-e_HDzZY_wXOBVdyYG3ndsM,3826
50
+ da4ml/converter/hgq2/replica.py,sha256=9ICJGfK2Q2C_glwE0KMcvXttuWvJYRblkO7RLmalzss,13829
51
+ da4ml/trace/__init__.py,sha256=dv-rti3t8iE0RqeThfOb40mAg8FZB2WkkGQq3enJft0,282
52
+ da4ml/trace/fixed_variable.py,sha256=6dfMHBN1NfqYIbPZ79GCPCXj2JFQUKTyDZu6xDaG3rg,17082
53
+ da4ml/trace/fixed_variable_array.py,sha256=A0ApTvZxpkr7kHrUQkyhrGJuuPe4kDgLFyD_1CW7lBk,10985
54
+ da4ml/trace/pipeline.py,sha256=_R2uqWgnpuQ4tD7VKz2eu8CF9Air2RtYH2o03Vfg0Mk,5353
55
+ da4ml/trace/tracer.py,sha256=NqPEH9hyVlGQOf9_kJL3A7SujCcxkT-z28bk0Ael5jE,5664
56
+ da4ml/trace/ops/__init__.py,sha256=I4VqB43lVkFlLtkoWxiSDHBFGvxKwutNbAJw5aLVeAI,2108
57
+ da4ml/trace/ops/conv_utils.py,sha256=P8ccpeHF5D0jY9LdkIfwui3HNs-WpTnGfwiYehtboCU,8246
58
+ da4ml/trace/ops/einsum_utils.py,sha256=MoWvOfvtVjXGwqEhXEzZ3uGrgSmLTHngV8I1eLyANGE,11433
59
+ da4ml/trace/ops/reduce_utils.py,sha256=8gohGQRVr8Bn5rfyrGsnE8EDxUXAObv521qu4mJrX9I,3348
60
+ da4ml-0.3.0.dist-info/licenses/LICENSE,sha256=46mU2C5kSwOnkqkw9XQAJlhBL2JAf1_uCD8lVcXyMRg,7652
61
+ da4ml-0.3.0.dist-info/METADATA,sha256=JVugmlP6dRkZ8PZTOisE6OZ3JaNNjm18yKAUBDZC04s,4569
62
+ da4ml-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
63
+ da4ml-0.3.0.dist-info/top_level.txt,sha256=N0tnKVwRqFiffFdeAzCgFq71hUNySh5-ITbNd6-R58Q,6
64
+ da4ml-0.3.0.dist-info/RECORD,,
@@ -1,17 +0,0 @@
1
- #pragma once
2
- #include "ap_fixed.h"
3
-
4
- template <typename inp_t, typename out_t, size_t SIZE_IN, size_t SIZE_OUT, typename F>
5
- void vitis_bridge(F f, double *inp, double *out, int size) {
6
- inp_t in_fixed_buf[SIZE_IN];
7
- out_t out_fixed_buf[SIZE_OUT];
8
- for (int i = 0; i < size; i++) {
9
- for (int j = 0; j < SIZE_IN; j++) {
10
- in_fixed_buf[j] = inp_t(inp[i * SIZE_IN + j]);
11
- }
12
- f(in_fixed_buf, out_fixed_buf);
13
- for (int j = 0; j < SIZE_OUT; j++) {
14
- out[i * SIZE_OUT + j] = double(out_fixed_buf[j]);
15
- }
16
- }
17
- }
@@ -1,65 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: da4ml
3
- Version: 0.2.1
4
- Summary: Digital Arithmetic for Machine Learning
5
- Author-email: Chang Sun <chsun@cern.ch>
6
- License: GNU Lesser General Public License v3 (LGPLv3)
7
- Project-URL: repository, https://github.com/calad0i/da4ml
8
- Keywords: CMVM,distributed arithmetic,hls4ml,MCM,subexpression elimination
9
- Classifier: Development Status :: 4 - Beta
10
- Classifier: License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)
11
- Classifier: Operating System :: OS Independent
12
- Classifier: Programming Language :: Python :: 3 :: Only
13
- Classifier: Programming Language :: Python :: 3.10
14
- Classifier: Programming Language :: Python :: 3.11
15
- Classifier: Programming Language :: Python :: 3.12
16
- Classifier: Programming Language :: Python :: 3.13
17
- Requires-Python: >=3.10
18
- Description-Content-Type: text/markdown
19
- License-File: LICENSE
20
- Requires-Dist: llvmlite>=0.44
21
- Requires-Dist: numba>=0.61
22
- Dynamic: license-file
23
-
24
- # da4ml: Distributed Arithmetic for Machine Learning
25
-
26
- This project performs Constant Matrix-Vector Multiplication (CMVM) with Distributed Arithmetic (DA) for Machine Learning (ML) on a Field Programmable Gate Arrays (FPGAs).
27
-
28
- CMVM optimization is done through greedy CSE of two-term subexpressions, with possible Delay Constraints (DC). The optimization is done in jitted Python (Numba), and a list of optimized operations is generated as traced Python code.
29
-
30
- At the moment, the project only generates Vitis HLS C++ code for the FPGA implementation of the optimized CMVM kernel. HDL code generation is planned for the future. Currently, the major use of this repository is through the `distributed_arithmetic` strategy in the [`hls4ml`](https://github.com/fastmachinelearning/hls4ml/) project.
31
-
32
-
33
- ## Installation
34
-
35
- The project is available on PyPI and can be installed with pip:
36
-
37
- ```bash
38
- pip install da4ml
39
- ```
40
-
41
- Notice that `numba>=6.0.0` is required for the project to work. The project does not work with `python<3.10`. If the project fails to compile, try upgrading `numba` and `llvmlite` to the latest versions.
42
-
43
- ## `hls4ml`
44
-
45
- The major use of this project is through the `distributed_arithmetic` strategy in the `hls4ml`:
46
-
47
- ```python
48
- model_hls = hls4ml.converters.convert_from_keras_model(
49
- model,
50
- hls_config={
51
- 'Model': {
52
- ...
53
- 'Strategy': 'distributed_arithmetic',
54
- },
55
- ...
56
- },
57
- ...
58
- )
59
- ```
60
-
61
- Currently, `Dense/Conv1D/Conv2D` layers are supported for both `io_parallel` and `io_stream` dataflows. However, notice that distributed arithmetic implies `reuse_factor=1`, as the whole kernel is implemented in combinational logic.
62
-
63
- ### Notice
64
-
65
- Currently, only the `da4ml-v3` branch of `hls4ml` supports the `distributed_arithmetic` strategy. The `da4ml-v3` branch is not yet merged into the `main` branch of `hls4ml`, so you need to install it from the GitHub repository.
@@ -1,39 +0,0 @@
1
- da4ml/__init__.py,sha256=IETRRvzsJvPMLu1kzzi8UN5FYaM5MhNaXH2A_ZKr2_w,469
2
- da4ml/_version.py,sha256=UoNvMtd4wCG76RwoSpNCUtaFyTwakGcZolfjXzNVSMY,511
3
- da4ml/cmvm/__init__.py,sha256=4Tbt913k9zP0w8R1p6Oss06v5jrManbUhskyHl6e-U0,154
4
- da4ml/cmvm/api.py,sha256=JpecMt6g8zutGh_uWT61_0iX8TuXct7-jq7N7HMIsgA,9626
5
- da4ml/cmvm/types.py,sha256=MckE6hBnRX2bMvT86CjvyxAMSK7grCrCRn2f_f3qgAw,17844
6
- da4ml/cmvm/core/__init__.py,sha256=bp2CXI4EOVOQSho1qwfusNs0RliZRt2dV0hZ33W_Kjo,7703
7
- da4ml/cmvm/core/indexers.py,sha256=QjXgvExS-B2abHTJPDG4NufMdMEflo1i6cUhFOgJpH4,2945
8
- da4ml/cmvm/core/state_opr.py,sha256=wLqO8qVuM2-qCE5LDeYJDNkUruIPHy63obsv4-x-aR8,8661
9
- da4ml/cmvm/util/__init__.py,sha256=DkBlUEKA_Gu7n576ja_xZlAQfToWmNL9VXU-jmj6a-g,145
10
- da4ml/cmvm/util/bit_decompose.py,sha256=SUco70HRYf4r1JU6BXwcgabDrhm_yAmucae5FC67i4I,2216
11
- da4ml/cmvm/util/mat_decompose.py,sha256=eSJNlXwx_jxgqt5vLJrSLQaeq2ZXu8j9mC4d-eq883M,4094
12
- da4ml/codegen/__init__.py,sha256=g58EgubgPPoiwRTBduSzm6hAc-poPcK6egdoECfPx9o,329
13
- da4ml/codegen/cpp/__init__.py,sha256=Tw4XeU_oJsyUkTrsfEPuZ-r0rGAo8E2NX5wn_VTA7NM,90
14
- da4ml/codegen/cpp/cpp_codegen.py,sha256=FnVPgD8McFFdrecdI1u_ybDLQ0RFuVpJ0xO5Ne1D8j0,4811
15
- da4ml/codegen/cpp/source/vitis.h,sha256=ovEefBOfW5-PXuDdRObPGNokGGFHiixDCpPWeTN6aTo,765
16
- da4ml/codegen/cpp/source/vitis_bridge.h,sha256=XvvGw3A4eAaXKi5jp50bMKUsNfd5iQ-HhUKtsty1uns,567
17
- da4ml/codegen/verilog/__init__.py,sha256=obRTdtMWhPHsxFHg2ADoPd3iDBEX8nk_6HuCet5EDz0,356
18
- da4ml/codegen/verilog/comb.py,sha256=EZONCceEvIKHHF8yLY-i2V_U_8THw_dJEQWujjCJ5iI,5592
19
- da4ml/codegen/verilog/io_wrapper.py,sha256=TrfJpJxU4uPTGW02_uFb7cjhFlhuVjH3rY0iWuf-lYk,8003
20
- da4ml/codegen/verilog/pipeline.py,sha256=YsPRTLp04Aofg33QMw6_ga3fNX9LeCD7Pq2PnERLWOg,2377
21
- da4ml/codegen/verilog/verilog_model.py,sha256=xTH4-B3PG0jJtQ84NiADvHgU5JbkMEoz_UGuspMHh4Y,10869
22
- da4ml/codegen/verilog/source/build_binder.mk,sha256=rQbI98itE_b1wIQ_0uCXfBzNmGK2XT4vWmRyCJNnPKk,960
23
- da4ml/codegen/verilog/source/build_prj.tcl,sha256=bcFCpcHR26TJGOQZEpUx0eM1SEiJOCoH-9EPpIvqWu0,3124
24
- da4ml/codegen/verilog/source/ioutils.hh,sha256=1o1-oIyQyYc9CU91bBxuitVzzcrNT8p4MTarFKiJoG4,3967
25
- da4ml/codegen/verilog/source/shift_adder.v,sha256=l2ofym56Y-_PeeY9fwkcZeW9MzrTL_WxvSTvoWERJrU,1885
26
- da4ml/codegen/verilog/source/template.xdc,sha256=ON8i-TK96Yo6FoZ66WzcVKELajTF5TBmbWFbEilna2U,1142
27
- da4ml/trace/__init__.py,sha256=1br9bWeFb33t69k6h1XQ50iJhLCqrRuEHtqEawELp-c,230
28
- da4ml/trace/fixed_variable.py,sha256=DthYqQJt2JD2t6X9nuNRPi80SQ7XriXdyXqw5CMR95Y,11669
29
- da4ml/trace/fixed_variable_array.py,sha256=7Ds92DLnVYMK6_G_l6DWCxgWf7Y8frBiyBj-vaqeWKk,6929
30
- da4ml/trace/pipeline.py,sha256=dYduPBNUeyW2Ws392hZNGJEo0qI5ynpn-iC2n7UVahk,5687
31
- da4ml/trace/tracer.py,sha256=xEQQNHkJ8VFt8cDFISzhX6fNVi2JVfgfCg2aca80E2c,4597
32
- da4ml/trace/ops/__init__.py,sha256=qz0DLPUyxBAu08RCN22kCkJj1EPKanC8ey8NB3_K8co,1640
33
- da4ml/trace/ops/conv_utils.py,sha256=LtgP3iSZ3fNV6QkEVBzT7ixt-7WTdmBDrFTtQ_9D5aE,3638
34
- da4ml/trace/ops/einsum_utils.py,sha256=miyMyzJwBLpLTEzXU4vErPE1Xk-ckZG0cjhd13MLAuA,11325
35
- da4ml-0.2.1.dist-info/licenses/LICENSE,sha256=46mU2C5kSwOnkqkw9XQAJlhBL2JAf1_uCD8lVcXyMRg,7652
36
- da4ml-0.2.1.dist-info/METADATA,sha256=4Kj_ehj1jTDa21JMDIsujUwUzpv7csj2oB23ib3y998,2849
37
- da4ml-0.2.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
38
- da4ml-0.2.1.dist-info/top_level.txt,sha256=N0tnKVwRqFiffFdeAzCgFq71hUNySh5-ITbNd6-R58Q,6
39
- da4ml-0.2.1.dist-info/RECORD,,
File without changes
File without changes