da4ml 0.1.2__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of da4ml might be problematic. Click here for more details.
- da4ml/__init__.py +16 -16
- da4ml/_version.py +2 -2
- da4ml/cmvm/__init__.py +3 -34
- da4ml/cmvm/api.py +235 -73
- da4ml/cmvm/core/__init__.py +221 -0
- da4ml/cmvm/core/indexers.py +83 -0
- da4ml/cmvm/core/state_opr.py +284 -0
- da4ml/cmvm/types.py +569 -0
- da4ml/cmvm/util/__init__.py +7 -0
- da4ml/cmvm/util/bit_decompose.py +86 -0
- da4ml/cmvm/util/mat_decompose.py +121 -0
- da4ml/codegen/__init__.py +11 -0
- da4ml/codegen/cpp/__init__.py +3 -0
- da4ml/codegen/cpp/cpp_codegen.py +148 -0
- da4ml/codegen/cpp/source/vitis.h +30 -0
- da4ml/codegen/cpp/source/vitis_bridge.h +17 -0
- da4ml/codegen/verilog/__init__.py +13 -0
- da4ml/codegen/verilog/comb.py +146 -0
- da4ml/codegen/verilog/io_wrapper.py +255 -0
- da4ml/codegen/verilog/pipeline.py +67 -0
- da4ml/codegen/verilog/source/build_binder.mk +27 -0
- da4ml/codegen/verilog/source/build_prj.tcl +74 -0
- da4ml/codegen/verilog/source/ioutils.hh +117 -0
- da4ml/codegen/verilog/source/shift_adder.v +56 -0
- da4ml/codegen/verilog/source/template.xdc +29 -0
- da4ml/codegen/verilog/verilog_model.py +268 -0
- da4ml/trace/__init__.py +6 -0
- da4ml/trace/fixed_variable.py +358 -0
- da4ml/trace/fixed_variable_array.py +187 -0
- da4ml/trace/ops/__init__.py +55 -0
- da4ml/trace/ops/conv_utils.py +104 -0
- da4ml/trace/ops/einsum_utils.py +299 -0
- da4ml/trace/pipeline.py +155 -0
- da4ml/trace/tracer.py +122 -0
- da4ml-0.2.1.dist-info/METADATA +65 -0
- da4ml-0.2.1.dist-info/RECORD +39 -0
- {da4ml-0.1.2.dist-info → da4ml-0.2.1.dist-info}/WHEEL +1 -1
- da4ml/cmvm/balanced_reduction.py +0 -46
- da4ml/cmvm/cmvm.py +0 -328
- da4ml/cmvm/codegen.py +0 -159
- da4ml/cmvm/csd.py +0 -73
- da4ml/cmvm/fixed_variable.py +0 -205
- da4ml/cmvm/graph_compile.py +0 -85
- da4ml/cmvm/nb_fixed_precision.py +0 -98
- da4ml/cmvm/scoring.py +0 -55
- da4ml/cmvm/utils.py +0 -5
- da4ml-0.1.2.dist-info/METADATA +0 -122
- da4ml-0.1.2.dist-info/RECORD +0 -18
- {da4ml-0.1.2.dist-info → da4ml-0.2.1.dist-info}/licenses/LICENSE +0 -0
- {da4ml-0.1.2.dist-info → da4ml-0.2.1.dist-info}/top_level.txt +0 -0
da4ml/cmvm/fixed_variable.py
DELETED
|
@@ -1,205 +0,0 @@
|
|
|
1
|
-
from math import ceil, log2
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
class Singleton(type):
|
|
5
|
-
_instances = {}
|
|
6
|
-
|
|
7
|
-
def __call__(cls, *args, **kwargs):
|
|
8
|
-
if cls not in cls._instances:
|
|
9
|
-
cls._instances[cls] = super().__call__(*args, **kwargs)
|
|
10
|
-
return cls._instances[cls]
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class Namer(metaclass=Singleton):
|
|
14
|
-
def __init__(self):
|
|
15
|
-
self._counters = {}
|
|
16
|
-
self._scope = 'global'
|
|
17
|
-
|
|
18
|
-
def set_scope(self, scope: str):
|
|
19
|
-
self._scope = scope
|
|
20
|
-
|
|
21
|
-
def tmp_scope(self):
|
|
22
|
-
_counters = self._counters
|
|
23
|
-
outer = self
|
|
24
|
-
|
|
25
|
-
class _Ctx:
|
|
26
|
-
def __enter__(self):
|
|
27
|
-
outer._counters = {}
|
|
28
|
-
|
|
29
|
-
def __exit__(self, *args) -> None:
|
|
30
|
-
outer._counters = _counters
|
|
31
|
-
|
|
32
|
-
return _Ctx()
|
|
33
|
-
|
|
34
|
-
def __call__(self, name: str, scope: str | None = None) -> str:
|
|
35
|
-
scope = self._scope if scope is None else scope
|
|
36
|
-
counters = self._counters.setdefault(scope, {})
|
|
37
|
-
if name not in counters:
|
|
38
|
-
counters[name] = -1
|
|
39
|
-
counters[name] += 1
|
|
40
|
-
return f'{name}{counters[name]}'
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
class FixedVariable:
|
|
44
|
-
def __init__(
|
|
45
|
-
self,
|
|
46
|
-
int_min: int,
|
|
47
|
-
int_max: int,
|
|
48
|
-
shift: int,
|
|
49
|
-
symmetric: bool = False,
|
|
50
|
-
_depth: int = 0,
|
|
51
|
-
name: str = '',
|
|
52
|
-
_factor: int = 1,
|
|
53
|
-
_from: tuple['FixedVariable', 'FixedVariable'] | None = None,
|
|
54
|
-
namer=Namer(),
|
|
55
|
-
):
|
|
56
|
-
self.int_min = int_min
|
|
57
|
-
self.int_max = int_max
|
|
58
|
-
self.shift = shift
|
|
59
|
-
self.symmetric = symmetric
|
|
60
|
-
self._depth = _depth
|
|
61
|
-
self.name = name
|
|
62
|
-
self._factor = _factor
|
|
63
|
-
self._from = _from
|
|
64
|
-
self.namer = namer
|
|
65
|
-
|
|
66
|
-
if self.int_min > self.int_max:
|
|
67
|
-
raise ValueError('int_min must be less than or equal to int_max')
|
|
68
|
-
|
|
69
|
-
@property
|
|
70
|
-
def k(self) -> int:
|
|
71
|
-
return int(self.min < 0)
|
|
72
|
-
|
|
73
|
-
@property
|
|
74
|
-
def b(self) -> int:
|
|
75
|
-
return ceil(log2(max(self.int_max + 1, -self.int_min)))
|
|
76
|
-
|
|
77
|
-
@property
|
|
78
|
-
def i(self) -> int:
|
|
79
|
-
return self.b - self.shift
|
|
80
|
-
|
|
81
|
-
@property
|
|
82
|
-
def min(self) -> float:
|
|
83
|
-
return self.int_min * 2.0 ** (-self.shift)
|
|
84
|
-
|
|
85
|
-
@property
|
|
86
|
-
def max(self) -> float:
|
|
87
|
-
return self.int_max * 2.0 ** (-self.shift)
|
|
88
|
-
|
|
89
|
-
def __str__(self) -> str:
|
|
90
|
-
s = '' if self.k else 'u'
|
|
91
|
-
p = f'{s}fixed({self.b+self.k}, {self.i+self.k})'
|
|
92
|
-
if self.int_min == self.int_max:
|
|
93
|
-
return f'{p}({self.min})'
|
|
94
|
-
return p
|
|
95
|
-
|
|
96
|
-
def __add__(self, other: 'FixedVariable|float'):
|
|
97
|
-
if other == 0:
|
|
98
|
-
return self
|
|
99
|
-
if not isinstance(other, FixedVariable):
|
|
100
|
-
return self + self.from_const(other, self.namer)
|
|
101
|
-
|
|
102
|
-
assert self.namer is other.namer, 'Namer must be the same'
|
|
103
|
-
shift = max(self.shift, other.shift)
|
|
104
|
-
_shift0, _shift1 = shift - self.shift, shift - other.shift
|
|
105
|
-
int_min = (self.int_min << _shift0) + (other.int_min << _shift1)
|
|
106
|
-
int_max = (self.int_max << _shift0) + (other.int_max << _shift1)
|
|
107
|
-
|
|
108
|
-
return FixedVariable(
|
|
109
|
-
int_min,
|
|
110
|
-
int_max,
|
|
111
|
-
shift,
|
|
112
|
-
symmetric=False,
|
|
113
|
-
_depth=max(self._depth, other._depth) + 1,
|
|
114
|
-
_from=(self, other),
|
|
115
|
-
_factor=1,
|
|
116
|
-
name=self.namer('v'),
|
|
117
|
-
namer=self.namer,
|
|
118
|
-
)
|
|
119
|
-
|
|
120
|
-
def __radd__(self, other: 'FixedVariable|float'):
|
|
121
|
-
return self + other
|
|
122
|
-
|
|
123
|
-
def __neg__(self):
|
|
124
|
-
return FixedVariable(
|
|
125
|
-
-self.int_max,
|
|
126
|
-
-self.int_min,
|
|
127
|
-
self.shift,
|
|
128
|
-
symmetric=False,
|
|
129
|
-
_depth=self._depth,
|
|
130
|
-
_from=self._from,
|
|
131
|
-
_factor=-self._factor,
|
|
132
|
-
name=self.name,
|
|
133
|
-
namer=self.namer,
|
|
134
|
-
)
|
|
135
|
-
|
|
136
|
-
def __sub__(self, other: 'FixedVariable'):
|
|
137
|
-
return self + (-other)
|
|
138
|
-
|
|
139
|
-
def __lshift__(self, other: int):
|
|
140
|
-
return FixedVariable(
|
|
141
|
-
self.int_min,
|
|
142
|
-
self.int_max,
|
|
143
|
-
self.shift - other,
|
|
144
|
-
False,
|
|
145
|
-
self._depth,
|
|
146
|
-
_from=self._from,
|
|
147
|
-
_factor=self._factor * 2**other,
|
|
148
|
-
name=self.name,
|
|
149
|
-
namer=self.namer,
|
|
150
|
-
)
|
|
151
|
-
|
|
152
|
-
def __rshift__(self, other: int):
|
|
153
|
-
return self << -other
|
|
154
|
-
|
|
155
|
-
def __mul__(self, other: float):
|
|
156
|
-
if other == 1:
|
|
157
|
-
return self
|
|
158
|
-
if other == -1:
|
|
159
|
-
return -self
|
|
160
|
-
if other == 0:
|
|
161
|
-
return self.from_const(0, self.namer)
|
|
162
|
-
assert log2(abs(other)) % 1 == 0
|
|
163
|
-
sign = -1 if other < 0 else 1
|
|
164
|
-
shift = int(log2(abs(other)))
|
|
165
|
-
return self << shift if sign == 1 else -self << shift
|
|
166
|
-
|
|
167
|
-
def __rmul__(self, other: float):
|
|
168
|
-
return self * other
|
|
169
|
-
|
|
170
|
-
def __repr__(self) -> str:
|
|
171
|
-
if self._factor == 1:
|
|
172
|
-
return self.__str__()
|
|
173
|
-
return f'({self._factor}) {self.__str__()}'
|
|
174
|
-
|
|
175
|
-
@classmethod
|
|
176
|
-
def from_nb_precision(cls, p, name: str | None = None, namer=Namer()):
|
|
177
|
-
name = Namer()('inp') if name is None else name
|
|
178
|
-
return cls(p.int_min, p.int_max, p.shift, p.symmetric, p._depth, name=name, namer=namer)
|
|
179
|
-
|
|
180
|
-
@classmethod
|
|
181
|
-
def from_const(cls, value: float, namer=Namer()):
|
|
182
|
-
if value == 0:
|
|
183
|
-
return cls(0, 0, 0, False, 0, '0', namer=namer)
|
|
184
|
-
_low, _high = -32, 32
|
|
185
|
-
while _high - _low > 1:
|
|
186
|
-
_mid = (_high + _low) // 2
|
|
187
|
-
_value = value * (2.0**_mid)
|
|
188
|
-
if _value == int(_value):
|
|
189
|
-
_high = _mid
|
|
190
|
-
else:
|
|
191
|
-
_low = _mid
|
|
192
|
-
_value = value * (2.0**_high)
|
|
193
|
-
shift = int(_high)
|
|
194
|
-
int_min = int_max = int(_value)
|
|
195
|
-
return cls(
|
|
196
|
-
int_max,
|
|
197
|
-
int_min,
|
|
198
|
-
shift,
|
|
199
|
-
symmetric=False,
|
|
200
|
-
_depth=0,
|
|
201
|
-
_from=None,
|
|
202
|
-
_factor=1,
|
|
203
|
-
name=str(value),
|
|
204
|
-
namer=namer,
|
|
205
|
-
)
|
da4ml/cmvm/graph_compile.py
DELETED
|
@@ -1,85 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
|
|
3
|
-
from .balanced_reduction import balanced_reduction
|
|
4
|
-
from .fixed_variable import FixedVariable
|
|
5
|
-
from .nb_fixed_precision import NBFixedPrecision
|
|
6
|
-
from .utils import DAState, OpCode
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
def var_from_nb_var(op_codes: OpCode, nb_variables: list[NBFixedPrecision], bias_idx: int):
|
|
10
|
-
variables: list[FixedVariable] = []
|
|
11
|
-
|
|
12
|
-
i = 0
|
|
13
|
-
while i < len(op_codes) and op_codes[i].pos1 < 0:
|
|
14
|
-
op_code = op_codes[i]
|
|
15
|
-
pos0, pos1 = op_code.pos0, op_code.pos1
|
|
16
|
-
shift = op_code.shift0
|
|
17
|
-
if pos1 >= 0:
|
|
18
|
-
break
|
|
19
|
-
v = FixedVariable.from_nb_precision(nb_variables[pos0], name=f'inp[{pos0+bias_idx}]')
|
|
20
|
-
v._factor = 2.0**shift
|
|
21
|
-
variables.append(v)
|
|
22
|
-
i += 1
|
|
23
|
-
|
|
24
|
-
while i < len(op_codes):
|
|
25
|
-
op_code = op_codes[i]
|
|
26
|
-
pos0, pos1 = op_code.pos0, op_code.pos1
|
|
27
|
-
shift0, shift1 = op_code.shift0, op_code.shift1
|
|
28
|
-
sign0, sign1 = op_code.sign0, op_code.sign1
|
|
29
|
-
v0, v1 = variables[pos0], variables[pos1]
|
|
30
|
-
v = sign0 * (v0 << shift0) + sign1 * (v1 << shift1)
|
|
31
|
-
variables.append(v)
|
|
32
|
-
i += 1
|
|
33
|
-
|
|
34
|
-
return variables
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def gather_output_var_cumlist(state: DAState, variables: list[FixedVariable]):
|
|
38
|
-
n_out = state.kernel.shape[1]
|
|
39
|
-
cumlist = [[] for _ in range(n_out)]
|
|
40
|
-
csd = np.array(state.csd)
|
|
41
|
-
for di, do, shift in zip(*np.where(csd != 0)):
|
|
42
|
-
sign = csd[di, do, shift]
|
|
43
|
-
cumlist[do].append(sign * (variables[di] << shift))
|
|
44
|
-
return cumlist
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
def graph_compile_states(states: list[list[DAState]], signed_balanced_reduction=True):
|
|
48
|
-
n_split_in = len(states)
|
|
49
|
-
n_split_out = len(states[0])
|
|
50
|
-
assert all(len(states[i]) == n_split_out for i in range(n_split_in))
|
|
51
|
-
kernel_shapes = np.empty((n_split_in, n_split_out, 2), dtype=np.int64)
|
|
52
|
-
for i, j in np.ndindex(n_split_in, n_split_out):
|
|
53
|
-
state = states[i][j]
|
|
54
|
-
kernel_shapes[i, j] = state.kernel.shape
|
|
55
|
-
assert np.all(np.std(kernel_shapes[:, :, 0], axis=1) == 0), 'Input kernel shapes must be the same'
|
|
56
|
-
assert np.all(np.std(kernel_shapes[:, :, 1], axis=0) == 0), 'Output kernel shapes must be the same'
|
|
57
|
-
n_in = kernel_shapes[:, 0, 0]
|
|
58
|
-
n_out = kernel_shapes[0, :, 1]
|
|
59
|
-
idx_in_biases = np.cumsum([0] + list(n_in[:-1]))
|
|
60
|
-
idx_out_biases = np.cumsum([0] + list(n_out[:-1]))
|
|
61
|
-
|
|
62
|
-
input_variables = []
|
|
63
|
-
idx_in_bias = 0
|
|
64
|
-
for i in range(n_split_in):
|
|
65
|
-
_state = states[i][0]
|
|
66
|
-
_n_in = n_in[i]
|
|
67
|
-
_vars = var_from_nb_var(_state.op_codes[:_n_in], _state.variables[:_n_in], idx_in_bias)
|
|
68
|
-
input_variables.append(_vars)
|
|
69
|
-
idx_in_bias += _state.kernel.shape[0]
|
|
70
|
-
|
|
71
|
-
output_variables = [[] for _ in range(np.sum(n_out))]
|
|
72
|
-
for i, j in np.ndindex(n_split_in, n_split_out):
|
|
73
|
-
state = states[i][j]
|
|
74
|
-
idx_in_bias = idx_in_biases[i]
|
|
75
|
-
idx_out_bias = idx_out_biases[j]
|
|
76
|
-
variables = var_from_nb_var(state.op_codes, state.variables, idx_in_bias)
|
|
77
|
-
_cumlist = gather_output_var_cumlist(state, variables)
|
|
78
|
-
for k, buf in enumerate(_cumlist):
|
|
79
|
-
output_variables[idx_out_bias + k].extend(buf)
|
|
80
|
-
|
|
81
|
-
_output_variables: list[FixedVariable] = [
|
|
82
|
-
balanced_reduction(buf, signed=signed_balanced_reduction) for buf in output_variables
|
|
83
|
-
] # type: ignore
|
|
84
|
-
input_variables = [v for vs in input_variables for v in vs]
|
|
85
|
-
return input_variables, _output_variables
|
da4ml/cmvm/nb_fixed_precision.py
DELETED
|
@@ -1,98 +0,0 @@
|
|
|
1
|
-
from math import ceil, log2
|
|
2
|
-
|
|
3
|
-
from numba import int32
|
|
4
|
-
from numba import types as nb_types
|
|
5
|
-
from numba.experimental import jitclass
|
|
6
|
-
|
|
7
|
-
spec = [
|
|
8
|
-
('int_min', int32),
|
|
9
|
-
('int_max', int32),
|
|
10
|
-
('shift', int32),
|
|
11
|
-
('symmetric', nb_types.boolean),
|
|
12
|
-
('_depth', int32),
|
|
13
|
-
]
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
@jitclass(spec=spec) # type: ignore
|
|
17
|
-
class NBFixedPrecision:
|
|
18
|
-
def __init__(
|
|
19
|
-
self,
|
|
20
|
-
int_min: int,
|
|
21
|
-
int_max: int,
|
|
22
|
-
shift: int,
|
|
23
|
-
symmetric: bool = False,
|
|
24
|
-
_depth: int = 0,
|
|
25
|
-
):
|
|
26
|
-
self.int_min = int_min
|
|
27
|
-
self.int_max = int_max
|
|
28
|
-
self.shift = shift
|
|
29
|
-
self.symmetric = symmetric
|
|
30
|
-
self._depth = _depth
|
|
31
|
-
|
|
32
|
-
if self.int_min > self.int_max:
|
|
33
|
-
raise ValueError('int_min must be less than or equal to int_max')
|
|
34
|
-
|
|
35
|
-
@property
|
|
36
|
-
def k(self) -> int:
|
|
37
|
-
return int(self.min < 0)
|
|
38
|
-
|
|
39
|
-
@property
|
|
40
|
-
def b(self) -> int:
|
|
41
|
-
return ceil(log2(max(self.int_max + 1, -self.int_min)))
|
|
42
|
-
|
|
43
|
-
@property
|
|
44
|
-
def i(self) -> int:
|
|
45
|
-
return self.b - self.shift
|
|
46
|
-
|
|
47
|
-
@property
|
|
48
|
-
def min(self) -> float:
|
|
49
|
-
return self.int_min * 2.0 ** (-self.shift)
|
|
50
|
-
|
|
51
|
-
@property
|
|
52
|
-
def max(self) -> float:
|
|
53
|
-
return self.int_max * 2.0 ** (-self.shift)
|
|
54
|
-
|
|
55
|
-
def __str__(self) -> str:
|
|
56
|
-
s = '' if self.k else 'u'
|
|
57
|
-
p = f'ap_{s}fixed({self.b+self.k}, {self.i+self.k})'
|
|
58
|
-
if self.int_min == self.int_max:
|
|
59
|
-
return f'{p}({self.min})'
|
|
60
|
-
return p
|
|
61
|
-
|
|
62
|
-
def __add__(self, other: 'NBFixedPrecision'):
|
|
63
|
-
shift = max(self.shift, other.shift)
|
|
64
|
-
_shift0, _shift1 = shift - self.shift, shift - other.shift
|
|
65
|
-
int_min = (self.int_min << _shift0) + (other.int_min << _shift1)
|
|
66
|
-
int_max = (self.int_max << _shift0) + (other.int_max << _shift1)
|
|
67
|
-
|
|
68
|
-
return NBFixedPrecision(
|
|
69
|
-
int_min,
|
|
70
|
-
int_max,
|
|
71
|
-
shift,
|
|
72
|
-
False,
|
|
73
|
-
max(self._depth, other._depth) + 1,
|
|
74
|
-
)
|
|
75
|
-
|
|
76
|
-
def __neg__(self):
|
|
77
|
-
return NBFixedPrecision(
|
|
78
|
-
-self.int_max,
|
|
79
|
-
-self.int_min,
|
|
80
|
-
self.shift,
|
|
81
|
-
False,
|
|
82
|
-
self._depth,
|
|
83
|
-
)
|
|
84
|
-
|
|
85
|
-
def __sub__(self, other: 'NBFixedPrecision'):
|
|
86
|
-
return self + (-other)
|
|
87
|
-
|
|
88
|
-
def __lshift__(self, other: int):
|
|
89
|
-
return NBFixedPrecision(
|
|
90
|
-
self.int_min,
|
|
91
|
-
self.int_max,
|
|
92
|
-
self.shift - other,
|
|
93
|
-
False,
|
|
94
|
-
self._depth,
|
|
95
|
-
)
|
|
96
|
-
|
|
97
|
-
def __rshift__(self, other: int):
|
|
98
|
-
return self << -other
|
da4ml/cmvm/scoring.py
DELETED
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
from numba import njit
|
|
2
|
-
|
|
3
|
-
from .nb_fixed_precision import NBFixedPrecision
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def py_resource_scorer(p1, p2) -> float:
|
|
7
|
-
b0, b1 = p1.b, p2.b
|
|
8
|
-
f1, f2 = p1.shift, p2.shift
|
|
9
|
-
pl0, pl1 = -f1, -f2
|
|
10
|
-
ph0, ph1 = pl0 + b0, pl1 + b1
|
|
11
|
-
pl, pL = (pl0, pl1) if pl0 < pl1 else (pl1, pl0)
|
|
12
|
-
ph, pH = (ph0, ph1) if ph0 < ph1 else (ph1, ph0)
|
|
13
|
-
|
|
14
|
-
return ph - pL
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def py_latency_scorer(p1, p2) -> float:
|
|
18
|
-
return -float(abs(p1._depth - p2._depth))
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
def py_scorer(p1, p2, dshift: int = 0, dsign: int = 0) -> float:
|
|
22
|
-
p2 = p2 << dshift
|
|
23
|
-
rs = py_resource_scorer(p1, p2)
|
|
24
|
-
ls = py_latency_scorer(p1, p2)
|
|
25
|
-
score = rs + ls
|
|
26
|
-
return score
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
@njit
|
|
30
|
-
def resource_scorer(p1: NBFixedPrecision, p2: NBFixedPrecision) -> float:
|
|
31
|
-
b0, b1 = p1.b, p2.b
|
|
32
|
-
f1, f2 = p1.shift, p2.shift
|
|
33
|
-
pl0, pl1 = -f1, -f2
|
|
34
|
-
ph0, ph1 = pl0 + b0, pl1 + b1
|
|
35
|
-
pl, pL = (pl0, pl1) if pl0 < pl1 else (pl1, pl0)
|
|
36
|
-
ph, pH = (ph0, ph1) if ph0 < ph1 else (ph1, ph0)
|
|
37
|
-
|
|
38
|
-
return ph - pL
|
|
39
|
-
n_full = max(0, ph - pL)
|
|
40
|
-
|
|
41
|
-
return n_full**2 / (pH - pl)
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
@njit
|
|
45
|
-
def latency_scorer(p1: NBFixedPrecision, p2: NBFixedPrecision) -> float:
|
|
46
|
-
return -float(abs(p1._depth - p2._depth))
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
@njit
|
|
50
|
-
def scorer(p1: NBFixedPrecision, p2: NBFixedPrecision, dshift: int, dsign: int) -> float:
|
|
51
|
-
p2 = p2 << dshift
|
|
52
|
-
rs = resource_scorer(p1, p2)
|
|
53
|
-
ls = latency_scorer(p1, p2)
|
|
54
|
-
score = rs + ls
|
|
55
|
-
return score
|
da4ml/cmvm/utils.py
DELETED
|
@@ -1,5 +0,0 @@
|
|
|
1
|
-
from collections import namedtuple
|
|
2
|
-
|
|
3
|
-
DAState = namedtuple('DAState', ['csd', 'variables', 'op_codes', 'pairs', 'score', 'kernel'])
|
|
4
|
-
Score = namedtuple('Score', ['potential', 'realized', 'lost', 'value'])
|
|
5
|
-
OpCode = namedtuple('OPCode', ['pos0', 'pos1', 'shift0', 'shift1', 'sign0', 'sign1'])
|
da4ml-0.1.2.dist-info/METADATA
DELETED
|
@@ -1,122 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: da4ml
|
|
3
|
-
Version: 0.1.2
|
|
4
|
-
Summary: Digital Arithmetic for Machine Learning
|
|
5
|
-
Author-email: Chang Sun <chsun@cern.ch>
|
|
6
|
-
License: GNU Lesser General Public License v3 (LGPLv3)
|
|
7
|
-
Project-URL: repository, https://github.com/calad0i/da4ml
|
|
8
|
-
Keywords: CMVM,distributed arithmetic,hls4ml,MCM,subexpression elimination
|
|
9
|
-
Classifier: Development Status :: 4 - Beta
|
|
10
|
-
Classifier: License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)
|
|
11
|
-
Classifier: Operating System :: OS Independent
|
|
12
|
-
Classifier: Programming Language :: Python :: 3 :: Only
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
-
Requires-Python: >=3.10
|
|
18
|
-
Description-Content-Type: text/markdown
|
|
19
|
-
License-File: LICENSE
|
|
20
|
-
Requires-Dist: llvmlite>=0.43
|
|
21
|
-
Requires-Dist: numba>=0.60
|
|
22
|
-
Dynamic: license-file
|
|
23
|
-
|
|
24
|
-
# da4ml: Distributed Arithmetic for Machine Learning
|
|
25
|
-
|
|
26
|
-
This project performs Constant Matrix-Vector Multiplication (CMVM) with Distributed Arithmetic (DA) for Machine Learning (ML) on a Field Programmable Gate Arrays (FPGAs).
|
|
27
|
-
|
|
28
|
-
CMVM optimization is done through greedy CSE of two-term subexpressions, with possible Delay Constraints (DC). The optimization is done in jitted Python (Numba), and a list of optimized operations is generated as traced Python code.
|
|
29
|
-
|
|
30
|
-
At the moment, the project only generates Vitis HLS C++ code for the FPGA implementation of the optimized CMVM kernel. HDL code generation is planned for the future. Currently, the major use of this repository is through the `distributed_arithmetic` strategy in the [`hls4ml`](https://github.com/fastmachinelearning/hls4ml/) project.
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
## Installation
|
|
34
|
-
|
|
35
|
-
The project is available on PyPI and can be installed with pip:
|
|
36
|
-
|
|
37
|
-
```bash
|
|
38
|
-
pip install da4ml
|
|
39
|
-
```
|
|
40
|
-
|
|
41
|
-
Notice that `numba>=6.0.0` is required for the project to work. The project does not work with `python<3.10`. If the project fails to compile, try upgrading `numba` and `llvmlite` to the latest versions.
|
|
42
|
-
|
|
43
|
-
## `hls4ml`
|
|
44
|
-
|
|
45
|
-
The major use of this project is through the `distributed_arithmetic` strategy in the `hls4ml`:
|
|
46
|
-
|
|
47
|
-
```python
|
|
48
|
-
model_hls = hls4ml.converters.convert_from_keras_model(
|
|
49
|
-
model,
|
|
50
|
-
hls_config={
|
|
51
|
-
'Model': {
|
|
52
|
-
...
|
|
53
|
-
'Strategy': 'distributed_arithmetic',
|
|
54
|
-
},
|
|
55
|
-
...
|
|
56
|
-
},
|
|
57
|
-
...
|
|
58
|
-
)
|
|
59
|
-
```
|
|
60
|
-
Currently, `Dense/Conv1D/Conv2D` layers are supported for both `io_parallel` and `io_stream` dataflows. However, notice that distributed arithmetic implies `reuse_factor=1`, as the whole kernel is implemented in combinational logic.
|
|
61
|
-
|
|
62
|
-
### Notice
|
|
63
|
-
|
|
64
|
-
Currently, only the `da4ml-v2` branch of `hls4ml` supports the `distributed_arithmetic` strategy. The `da4ml-v2` branch is not yet merged into the `main` branch of `hls4ml`, so you need to install it from the GitHub repository.
|
|
65
|
-
|
|
66
|
-
## Direct Usage
|
|
67
|
-
|
|
68
|
-
If you want to use it directly, you can use the `da4ml.api.fn_from_kernel` function, which creates a Python function from a 2x2 kernel `float[n_in, n_out]` and its corresponding code. The function signature is:
|
|
69
|
-
|
|
70
|
-
```python
|
|
71
|
-
def fn_from_kernel(
|
|
72
|
-
kernel: np.ndarray,
|
|
73
|
-
signs: list[bool],
|
|
74
|
-
bits: list[int],
|
|
75
|
-
int_bits: list[int],
|
|
76
|
-
symmetrics: list[bool],
|
|
77
|
-
depths: list[int] | None = None,
|
|
78
|
-
n_beams: int = 1,
|
|
79
|
-
dc: int | None = None,
|
|
80
|
-
n_inp_max: int = -1,
|
|
81
|
-
n_out_max: int = -1,
|
|
82
|
-
codegen_backend: PyCodegenBackend = PyCodegenBackend(),
|
|
83
|
-
signed_balanced_reduction: bool = True,
|
|
84
|
-
) -> tuple[Callable[[list[T]], list[T]], str]:
|
|
85
|
-
"""Compile a CMVM operation, with the constant kernel, into a function with only accumulation/subtraction/shift operations.
|
|
86
|
-
|
|
87
|
-
Parameters
|
|
88
|
-
----------
|
|
89
|
-
kernel : np.ndarray
|
|
90
|
-
The kernel to compile. Must be of shape (n_inp, n_out).
|
|
91
|
-
signs : list[bool]
|
|
92
|
-
If the input is signed. Must be of length n_inp.
|
|
93
|
-
bits : list[int]
|
|
94
|
-
The bitwidth of the inputs. Must be of length n_inp.
|
|
95
|
-
int_bits : list[int]
|
|
96
|
-
The number of integer bits in the inputs (incl. sign bit!). Must be of length n_inp.
|
|
97
|
-
symmetrics : list[bool]
|
|
98
|
-
If the input is symmetricly quantized. Must be of length n_inp.
|
|
99
|
-
depths : list[int]|None, optional
|
|
100
|
-
The depth associated with each input. Must be of length n_inp. Defaults to [0]*n_inp.
|
|
101
|
-
n_beams : int, optional
|
|
102
|
-
Number of beams to use in beam search. Defaults to 1. (Currently disabled!)
|
|
103
|
-
dc : int | None, optional
|
|
104
|
-
Delay constraint. Not implemented yet. Defaults to None.
|
|
105
|
-
n_inp_max : int, optional
|
|
106
|
-
Number of inputs to process in one block. Defaults to -1 (no limit). Decrease to improve performance, but result will be less optimal.
|
|
107
|
-
n_out_max : int, optional
|
|
108
|
-
Number of outputs to process in one block. Defaults to -1 (no limit). Decrease to improve performance, but result will be less optimal.
|
|
109
|
-
codegen_backend : PyCodegenBackend, optional
|
|
110
|
-
The codegen backend to be used. Defaults to PyCodegenBackend().
|
|
111
|
-
signed_balanced_reduction : bool, optional
|
|
112
|
-
If the reduction tree should isolate the plus and minus terms. Set to False to improve latency. Defaults to True.
|
|
113
|
-
|
|
114
|
-
Returns
|
|
115
|
-
-------
|
|
116
|
-
tuple[Callable[[list[T]], list[T]], str]
|
|
117
|
-
fn : Callable[[list[T]], list[T]]
|
|
118
|
-
The compiled python function. It takes a list of inputs and returns a list of outputs with only accumulation/subtraction/powers of 2 operations.
|
|
119
|
-
fn_str : str
|
|
120
|
-
The code of the compiled function, depending on the codegen_backend used.
|
|
121
|
-
"""
|
|
122
|
-
```
|
da4ml-0.1.2.dist-info/RECORD
DELETED
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
da4ml/__init__.py,sha256=kkpf91aU4n9MMsX63Me90mo_0JSQyU3N-vhgTIrjkT0,437
|
|
2
|
-
da4ml/_version.py,sha256=bSmADqydH8nBu-J4lG8UVuR7hnU_zcwhnSav2oQ0W0A,511
|
|
3
|
-
da4ml/cmvm/__init__.py,sha256=UUnJxtZGQpqVWKT_djuPr7CqyiWlUrmKoyAvH7f4-q8,848
|
|
4
|
-
da4ml/cmvm/api.py,sha256=G7YZ1n32EaYzsahSUwB-0vvRZt1Q11idShhn735vDCM,3461
|
|
5
|
-
da4ml/cmvm/balanced_reduction.py,sha256=fDKaRIY4WkRo3s5dGWeWmylDgjMD-hr1-tpdwu7EL6Y,1533
|
|
6
|
-
da4ml/cmvm/cmvm.py,sha256=4RN6sKLNxybHZcBoUkw4MBluLZoEhAArutt8BE5NCNE,10689
|
|
7
|
-
da4ml/cmvm/codegen.py,sha256=w_1xR36Oxwb-1XivFwjQ_n2uRw-abjwzjOhwARg_93k,6088
|
|
8
|
-
da4ml/cmvm/csd.py,sha256=k-9k0CigqnvyrgtXzBvKLZ32FdwWUE_EhAlXkK6Mlxk,1988
|
|
9
|
-
da4ml/cmvm/fixed_variable.py,sha256=APPT2EN7hOwjHIaD5JdOpd7riGUpmtXUJ6QaAnE1cjw,5753
|
|
10
|
-
da4ml/cmvm/graph_compile.py,sha256=bODuY7DAi-9L-ZleAzK0szZDrEH4Ljq2ocINUHXD588,3322
|
|
11
|
-
da4ml/cmvm/nb_fixed_precision.py,sha256=M2bnflNNOGLgUsI0QGlF2noT5ZJD-2bU2dZ3lw8GZvM,2457
|
|
12
|
-
da4ml/cmvm/scoring.py,sha256=EUREpoyQHUe-vpRat_JjRD73uQhD0dOhnG2D3opazwQ,1375
|
|
13
|
-
da4ml/cmvm/utils.py,sha256=Svp82TOtpNOtKfXlCkijq7DBSbMEqyWoY2PAYVhXXQY,288
|
|
14
|
-
da4ml-0.1.2.dist-info/licenses/LICENSE,sha256=46mU2C5kSwOnkqkw9XQAJlhBL2JAf1_uCD8lVcXyMRg,7652
|
|
15
|
-
da4ml-0.1.2.dist-info/METADATA,sha256=YirD1SJdRbYNKdbY68TWtacjfs-bUrcWIbQBdoIIaYw,5507
|
|
16
|
-
da4ml-0.1.2.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
|
|
17
|
-
da4ml-0.1.2.dist-info/top_level.txt,sha256=N0tnKVwRqFiffFdeAzCgFq71hUNySh5-ITbNd6-R58Q,6
|
|
18
|
-
da4ml-0.1.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|