hwcomponents-library 1.1.0.dev21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hwcomponents_library/__init__.py +12 -0
- hwcomponents_library/_version.py +34 -0
- hwcomponents_library/_version_scheme.py +43 -0
- hwcomponents_library/base.py +12 -0
- hwcomponents_library/library/__init__.py +0 -0
- hwcomponents_library/library/aladdin.py +409 -0
- hwcomponents_library/library/atomlayer.py +204 -0
- hwcomponents_library/library/brahms.py +85 -0
- hwcomponents_library/library/dummy.py +172 -0
- hwcomponents_library/library/forms.py +80 -0
- hwcomponents_library/library/isaac.py +602 -0
- hwcomponents_library/library/jia.py +232 -0
- hwcomponents_library/library/misc.py +199 -0
- hwcomponents_library/library/newton.py +127 -0
- hwcomponents_library/library/raella.py +89 -0
- hwcomponents_library/library/timely.py +437 -0
- hwcomponents_library/library/wan.py +288 -0
- hwcomponents_library-1.1.0.dev21.dist-info/METADATA +57 -0
- hwcomponents_library-1.1.0.dev21.dist-info/RECORD +21 -0
- hwcomponents_library-1.1.0.dev21.dist-info/WHEEL +5 -0
- hwcomponents_library-1.1.0.dev21.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
"""
|
|
2
|
+
@ARTICLE{9082159,
|
|
3
|
+
author={Jia, Hongyang and Valavi, Hossein and Tang, Yinqi and Zhang, Jintao and Verma, Naveen},
|
|
4
|
+
journal={IEEE Journal of Solid-State Circuits},
|
|
5
|
+
title={A Programmable Heterogeneous Microprocessor Based on Bit-Scalable In-Memory Computing},
|
|
6
|
+
year={2020},
|
|
7
|
+
volume={55},
|
|
8
|
+
number={9},
|
|
9
|
+
pages={2609-2621},
|
|
10
|
+
doi={10.1109/JSSC.2020.2987714}}
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from hwcomponents_library.base import LibraryEstimatorClassBase
|
|
14
|
+
from hwcomponents.scaling import *
|
|
15
|
+
from hwcomponents import actionDynamicEnergy
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# Original CSV contents:
|
|
19
|
+
# tech_node,global_cycle_period,resolution,voltage,energy,area,action
|
|
20
|
+
# 65nm, 540e-9, 8, 1.2, 2.25, 5000,read
|
|
21
|
+
# 65nm, 540e-9, 8, 1.2, 1.2, 5000,leak
|
|
22
|
+
# 65nm, 540e-9, 8, 1.2, 0, 5000,write|update
|
|
23
|
+
class JiaShiftAdd(LibraryEstimatorClassBase):
|
|
24
|
+
"""
|
|
25
|
+
The shift-and-add unit from Jia et al. JSSC 2020. This unit will sum and accumulate
|
|
26
|
+
values in a register, while also shifting the register contents to accept various
|
|
27
|
+
power-of-two scaling factors for the summed values.
|
|
28
|
+
|
|
29
|
+
Parameters
|
|
30
|
+
----------
|
|
31
|
+
tech_node: float
|
|
32
|
+
Technology node in meters.
|
|
33
|
+
resolution: int
|
|
34
|
+
Resolution of the shift-and-add unit in bits. This is the number of bits of each
|
|
35
|
+
input value that is added to the register.
|
|
36
|
+
voltage: float
|
|
37
|
+
Voltage of the shift-and-add unit in volts.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
component_name = "jia_shift_add"
|
|
41
|
+
priority = 0.9
|
|
42
|
+
|
|
43
|
+
def __init__(self, tech_node: float, resolution: int = 8, voltage: float = 1.2):
|
|
44
|
+
super().__init__(leak_power=2.22e-6, area=5000.0e-12)
|
|
45
|
+
self.tech_node: float = self.scale(
|
|
46
|
+
"tech_node",
|
|
47
|
+
tech_node,
|
|
48
|
+
65e-9,
|
|
49
|
+
tech_node_energy,
|
|
50
|
+
tech_node_area,
|
|
51
|
+
tech_node_leak,
|
|
52
|
+
)
|
|
53
|
+
self.resolution: int = self.scale(
|
|
54
|
+
"resolution", resolution, 8, pow_base(2), pow_base(2), pow_base(2)
|
|
55
|
+
)
|
|
56
|
+
self.voltage: float = self.scale("voltage", voltage, 1.2, noscale, quadratic, 1)
|
|
57
|
+
|
|
58
|
+
@actionDynamicEnergy
|
|
59
|
+
def shift_and_add(self) -> float:
|
|
60
|
+
"""
|
|
61
|
+
Returns the energy consumed by a shift+add operation in Joules.
|
|
62
|
+
|
|
63
|
+
Returns
|
|
64
|
+
-------
|
|
65
|
+
float
|
|
66
|
+
The energy consumed by a shift+add operation in Joules.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
return 2.25e-12
|
|
70
|
+
|
|
71
|
+
@actionDynamicEnergy
|
|
72
|
+
def write(self) -> float:
|
|
73
|
+
"""
|
|
74
|
+
Returns the energy consumed by a shift+add operation in Joules.
|
|
75
|
+
|
|
76
|
+
Returns
|
|
77
|
+
-------
|
|
78
|
+
float
|
|
79
|
+
The energy consumed by a shift+add operation in Joules.
|
|
80
|
+
"""
|
|
81
|
+
return self.shift_and_add()
|
|
82
|
+
|
|
83
|
+
@actionDynamicEnergy
|
|
84
|
+
def read(self) -> float:
|
|
85
|
+
"""
|
|
86
|
+
Zero Joules to read
|
|
87
|
+
|
|
88
|
+
Returns
|
|
89
|
+
-------
|
|
90
|
+
float
|
|
91
|
+
Zero
|
|
92
|
+
"""
|
|
93
|
+
return 0.0
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# Original CSV contents:
|
|
97
|
+
# tech_node,global_cycle_period,rows,resolution,voltage,energy,area,action
|
|
98
|
+
# 65nm, 540e-9, 1, 8, 1.2, 0.5, 174, read
|
|
99
|
+
# 65nm, 540e-9, 1, 8, 1.2, 0.2, 174, leak
|
|
100
|
+
# 65nm, 540e-9, 1, 8, 1.2, 0, 174, write|update
|
|
101
|
+
class JiaZeroGate(LibraryEstimatorClassBase):
|
|
102
|
+
"""
|
|
103
|
+
The zero gating unit from Jia et al. JSSC 2020. This unit gates analog voltages for
|
|
104
|
+
zero-valued inputs going into the rows of the crossbar array.
|
|
105
|
+
|
|
106
|
+
Parameters
|
|
107
|
+
----------
|
|
108
|
+
tech_node: float
|
|
109
|
+
Technology node in meters.
|
|
110
|
+
rows: int
|
|
111
|
+
Number of rows in the crossbar array, equal to the number of checks done by the
|
|
112
|
+
zero gate.
|
|
113
|
+
resolution: int
|
|
114
|
+
Resolution of each input in bits.
|
|
115
|
+
voltage: float
|
|
116
|
+
Voltage of the zero gating unit in volts.
|
|
117
|
+
"""
|
|
118
|
+
|
|
119
|
+
component_name = "jia_zero_gate"
|
|
120
|
+
priority = 0.9
|
|
121
|
+
|
|
122
|
+
def __init__(
|
|
123
|
+
self, tech_node: float, rows: int = 1, resolution: int = 8, voltage: float = 1.2
|
|
124
|
+
):
|
|
125
|
+
super().__init__(leak_power=3.70e-7, area=174.0e-12)
|
|
126
|
+
self.tech_node: float = self.scale(
|
|
127
|
+
"tech_node",
|
|
128
|
+
tech_node,
|
|
129
|
+
65e-9,
|
|
130
|
+
tech_node_energy,
|
|
131
|
+
tech_node_area,
|
|
132
|
+
tech_node_leak,
|
|
133
|
+
)
|
|
134
|
+
self.rows: int = self.scale("rows", rows, 1, linear, noscale, noscale)
|
|
135
|
+
self.resolution: int = self.scale(
|
|
136
|
+
"resolution", resolution, 8, pow_base(2), pow_base(2), pow_base(2)
|
|
137
|
+
)
|
|
138
|
+
self.voltage: float = self.scale("voltage", voltage, 1.2, noscale, quadratic, 1)
|
|
139
|
+
|
|
140
|
+
@actionDynamicEnergy(bits_per_action="resolution")
|
|
141
|
+
def zero_gate(self) -> float:
|
|
142
|
+
"""
|
|
143
|
+
Returns the energy consumed to zero gate & read an input in Joules.
|
|
144
|
+
|
|
145
|
+
Parameters
|
|
146
|
+
----------
|
|
147
|
+
bits_per_action: int
|
|
148
|
+
The number of bits to check for zero.
|
|
149
|
+
|
|
150
|
+
Returns
|
|
151
|
+
-------
|
|
152
|
+
float
|
|
153
|
+
The energy consumed to zero gate & read an input in Joules.
|
|
154
|
+
"""
|
|
155
|
+
return 0.5e-12
|
|
156
|
+
|
|
157
|
+
def read(self) -> float:
|
|
158
|
+
"""
|
|
159
|
+
Returns the energy consumed to zero gate & read an input in Joules.
|
|
160
|
+
|
|
161
|
+
Parameters
|
|
162
|
+
----------
|
|
163
|
+
bits_per_action: int
|
|
164
|
+
The number of bits to check for zero.
|
|
165
|
+
|
|
166
|
+
Returns
|
|
167
|
+
-------
|
|
168
|
+
float
|
|
169
|
+
The energy consumed to zero gate & read an input in Joules.
|
|
170
|
+
"""
|
|
171
|
+
return self.zero_gate()
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
# Original CSV contents:
|
|
175
|
+
# tech_node,global_cycle_period,voltage,energy,area, action
|
|
176
|
+
# 65nm, 540e-9, 1.2, 12, 10535,read
|
|
177
|
+
# 65nm, 540e-9, 1.2, 2.4, 10535,leak
|
|
178
|
+
# 65nm, 540e-9, 1.2, 0, 10535,write|update
|
|
179
|
+
class JiaDatapath(LibraryEstimatorClassBase):
|
|
180
|
+
"""
|
|
181
|
+
The datapath in Jia et al. JSSC 2020. This datapath will perform quantization and
|
|
182
|
+
activation functions on accumulated ouputs.
|
|
183
|
+
|
|
184
|
+
Parameters
|
|
185
|
+
----------
|
|
186
|
+
tech_node: float
|
|
187
|
+
Technology node in meters.
|
|
188
|
+
voltage: float
|
|
189
|
+
Voltage of the datapath in volts.
|
|
190
|
+
|
|
191
|
+
"""
|
|
192
|
+
|
|
193
|
+
component_name = "jia_datapath"
|
|
194
|
+
priority = 0.9
|
|
195
|
+
|
|
196
|
+
def __init__(self, tech_node: float, voltage: float = 1.2):
|
|
197
|
+
super().__init__(leak_power=4.44e-6, area=10535.0e-12)
|
|
198
|
+
self.tech_node: float = self.scale(
|
|
199
|
+
"tech_node",
|
|
200
|
+
tech_node,
|
|
201
|
+
65e-9,
|
|
202
|
+
tech_node_energy,
|
|
203
|
+
tech_node_area,
|
|
204
|
+
tech_node_leak,
|
|
205
|
+
)
|
|
206
|
+
self.voltage: float = self.scale("voltage", voltage, 1.2, noscale, quadratic, 1)
|
|
207
|
+
|
|
208
|
+
@actionDynamicEnergy
|
|
209
|
+
def process(self) -> float:
|
|
210
|
+
"""
|
|
211
|
+
Returns the energy consumed by the datapath to quantize and apply activation
|
|
212
|
+
functions on a single input.
|
|
213
|
+
|
|
214
|
+
Returns
|
|
215
|
+
-------
|
|
216
|
+
float
|
|
217
|
+
The energy consumed by the datapath to process an input in Joules.
|
|
218
|
+
"""
|
|
219
|
+
return 2.4e-12
|
|
220
|
+
|
|
221
|
+
@actionDynamicEnergy
|
|
222
|
+
def read(self) -> float:
|
|
223
|
+
"""
|
|
224
|
+
Returns the energy consumed by the datapath to quantize and apply activation
|
|
225
|
+
functions on a single input.
|
|
226
|
+
|
|
227
|
+
Returns
|
|
228
|
+
-------
|
|
229
|
+
float
|
|
230
|
+
The energy consumed by the datapath to process an input in Joules.
|
|
231
|
+
"""
|
|
232
|
+
return self.process()
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
"""
|
|
2
|
+
@ARTICLE{9131838,
|
|
3
|
+
author={Giterman, Robert and Shalom, Amir and Burg, Andreas and Fish, Alexander and Teman, Adam},
|
|
4
|
+
journal={IEEE Solid-State Circuits Letters},
|
|
5
|
+
title={A 1-Mbit Fully Logic-Compatible 3T Gain-Cell Embedded DRAM in 16-nm FinFET},
|
|
6
|
+
year={2020},
|
|
7
|
+
volume={3},
|
|
8
|
+
number={},
|
|
9
|
+
pages={110-113},
|
|
10
|
+
keywords={Random access memory;FinFETs;Temperature measurement;Leakage currents;Power demand;Voltage measurement;Embedded DRAM;gain cell (GC);low voltage;retention time;SRAM},
|
|
11
|
+
doi={10.1109/LSSC.2020.3006496}}
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from hwcomponents_library.base import LibraryEstimatorClassBase
|
|
15
|
+
from hwcomponents.scaling import *
|
|
16
|
+
from hwcomponents import actionDynamicEnergy
|
|
17
|
+
from hwcomponents_cacti import SRAM
|
|
18
|
+
from hwcomponents_library.library.aladdin import AladdinRegister, AladdinAdder
|
|
19
|
+
|
|
20
|
+
# Original CSV contents:
|
|
21
|
+
# tech_node,global_cycle_period,width|datawidth,depth,energy,area,action
|
|
22
|
+
# 16nm,1e-9,1024,1024,2641.92,131570,read
|
|
23
|
+
# 16nm,1e-9,1024,1024,2519.04,131570,write|update
|
|
24
|
+
# 16nm,1e-9,1024,1024,0.381,131570,leak
|
|
25
|
+
# # Read: 2.58 uW / MHz
|
|
26
|
+
# # Write: 2.46 uW / MHz
|
|
27
|
+
# # Leak + Refresh: (105uw leak) + (276uW refresh) = 381uW
|
|
28
|
+
# # @ARTICLE{9131838,
|
|
29
|
+
# # author={Giterman, Robert and Shalom, Amir and Burg, Andreas and Fish, Alexander and Teman, Adam},
|
|
30
|
+
# # journal={IEEE Solid-State Circuits Letters},
|
|
31
|
+
# # title={A 1-Mbit Fully Logic-Compatible 3T Gain-Cell Embedded DRAM in 16-nm FinFET},
|
|
32
|
+
# # year={2020},
|
|
33
|
+
# # volume={3},
|
|
34
|
+
# # number={},
|
|
35
|
+
# # pages={110-113},
|
|
36
|
+
# # keywords={Random access memory;FinFETs;Temperature measurement;Leakage currents;Power demand;Voltage measurement;Embedded DRAM;gain cell (GC);low voltage;retention time;SRAM},
|
|
37
|
+
# # doi={10.1109/LSSC.2020.3006496}}
|
|
38
|
+
class RaaamEDRAM(LibraryEstimatorClassBase):
|
|
39
|
+
"""
|
|
40
|
+
RAAAM EDRAM from Giterman et al. LSSC 2020. This is a MB-class embedded DRAM unit.
|
|
41
|
+
|
|
42
|
+
Parameters
|
|
43
|
+
----------
|
|
44
|
+
tech_node: float
|
|
45
|
+
Technology node in meters.
|
|
46
|
+
width: int
|
|
47
|
+
Width of the eDRAM in bits. This is the width of a read/write port. Total size =
|
|
48
|
+
width * depth.
|
|
49
|
+
depth: int
|
|
50
|
+
The number of entries in the eDRAM, each with `width` bits. Total size = width *
|
|
51
|
+
depth.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
component_name = "raaam_edram"
|
|
55
|
+
priority = 0.9
|
|
56
|
+
|
|
57
|
+
def __init__(self, tech_node: float, width: int = 1024, depth: int = 1024):
|
|
58
|
+
super().__init__(leak_power=3.81e-4, area=131570.0e-12)
|
|
59
|
+
self.tech_node: float = self.scale(
|
|
60
|
+
"tech_node",
|
|
61
|
+
tech_node,
|
|
62
|
+
16e-9,
|
|
63
|
+
tech_node_energy,
|
|
64
|
+
tech_node_area,
|
|
65
|
+
tech_node_leak,
|
|
66
|
+
)
|
|
67
|
+
self.width: int = self.scale("width", width, 1024, linear, linear, linear)
|
|
68
|
+
self.depth: int = self.scale(
|
|
69
|
+
"depth", depth, 1024, linear, cacti_depth_energy, cacti_depth_energy
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
@actionDynamicEnergy(bits_per_action="width")
|
|
73
|
+
def read(self) -> float:
|
|
74
|
+
"""
|
|
75
|
+
Returns the energy consumed by a read operation in Joules.
|
|
76
|
+
|
|
77
|
+
Parameters
|
|
78
|
+
----------
|
|
79
|
+
bits_per_action: int
|
|
80
|
+
The number of bits to read.
|
|
81
|
+
|
|
82
|
+
Returns
|
|
83
|
+
-------
|
|
84
|
+
float
|
|
85
|
+
The energy consumed by a read operation in Joules.
|
|
86
|
+
"""
|
|
87
|
+
return 2641.92e-12
|
|
88
|
+
|
|
89
|
+
@actionDynamicEnergy(bits_per_action="width")
|
|
90
|
+
def write(self) -> float:
|
|
91
|
+
"""
|
|
92
|
+
Returns the energy consumed by a write operation in Joules.
|
|
93
|
+
|
|
94
|
+
Parameters
|
|
95
|
+
----------
|
|
96
|
+
bits_per_action: int
|
|
97
|
+
The number of bits to write.
|
|
98
|
+
|
|
99
|
+
Returns
|
|
100
|
+
-------
|
|
101
|
+
float
|
|
102
|
+
The energy consumed by a write operation in Joules.
|
|
103
|
+
"""
|
|
104
|
+
return 2519.04e-12
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class SmartBufferSRAM(LibraryEstimatorClassBase):
|
|
108
|
+
"""
|
|
109
|
+
An SRAM with an address generator that sequentially reads addresses in the SRAM.
|
|
110
|
+
|
|
111
|
+
Parameters
|
|
112
|
+
----------
|
|
113
|
+
tech_node: The technology node in meters.
|
|
114
|
+
width: The width of the read and write ports in bits. This is the number of bits
|
|
115
|
+
that are accssed by any one read/write. Total size = width * depth.
|
|
116
|
+
depth: The number of entries in the SRAM, each with `width` bits. Total size =
|
|
117
|
+
width * depth.
|
|
118
|
+
n_rw_ports: The number of read/write ports. Bandwidth will increase with more
|
|
119
|
+
ports.
|
|
120
|
+
n_banks: The number of banks. Bandwidth will increase with more banks.
|
|
121
|
+
|
|
122
|
+
Attributes
|
|
123
|
+
----------
|
|
124
|
+
sram: The SRAM buffer.
|
|
125
|
+
address_reg: The register that holds the current address.
|
|
126
|
+
delta_reg: The register that holds the increment value.
|
|
127
|
+
adder: The adder that adds the increment value to the current address.
|
|
128
|
+
"""
|
|
129
|
+
component_name = ["smart_buffer_sram", "smartbuffer_sram", "smartbuffersram"]
|
|
130
|
+
priority = 0.5
|
|
131
|
+
|
|
132
|
+
def __init__(
|
|
133
|
+
self,
|
|
134
|
+
tech_node: float,
|
|
135
|
+
width: int,
|
|
136
|
+
depth: int,
|
|
137
|
+
n_rw_ports: int=1,
|
|
138
|
+
n_banks: int=1,
|
|
139
|
+
):
|
|
140
|
+
self.sram: SRAM = SRAM(
|
|
141
|
+
tech_node=tech_node,
|
|
142
|
+
width=width,
|
|
143
|
+
depth=depth,
|
|
144
|
+
n_rw_ports=n_rw_ports,
|
|
145
|
+
n_banks=n_banks,
|
|
146
|
+
)
|
|
147
|
+
self.address_bits = max(math.ceil(math.log2(depth)), 1)
|
|
148
|
+
self.width = width
|
|
149
|
+
|
|
150
|
+
self.address_reg = AladdinRegister(width=self.address_bits, tech_node=tech_node)
|
|
151
|
+
self.delta_reg = AladdinRegister(width=self.address_bits, tech_node=tech_node)
|
|
152
|
+
self.adder = AladdinAdder(width=self.address_bits, tech_node=tech_node)
|
|
153
|
+
|
|
154
|
+
super().__init__(subcomponents=[
|
|
155
|
+
self.sram,
|
|
156
|
+
self.address_reg,
|
|
157
|
+
self.delta_reg,
|
|
158
|
+
self.adder,
|
|
159
|
+
])
|
|
160
|
+
|
|
161
|
+
@actionDynamicEnergy(bits_per_action="width")
|
|
162
|
+
def read(self) -> float:
|
|
163
|
+
"""
|
|
164
|
+
Returns the energy consumed by a read operation in Joules.
|
|
165
|
+
|
|
166
|
+
Parameters
|
|
167
|
+
----------
|
|
168
|
+
bits_per_action: int
|
|
169
|
+
The number of bits to read.
|
|
170
|
+
|
|
171
|
+
Returns
|
|
172
|
+
-------
|
|
173
|
+
float
|
|
174
|
+
The energy consumed by a read operation in Joules.
|
|
175
|
+
"""
|
|
176
|
+
self.sram.read(bits_per_action=self.width)
|
|
177
|
+
self.address_reg.read()
|
|
178
|
+
self.delta_reg.read()
|
|
179
|
+
self.adder.add()
|
|
180
|
+
|
|
181
|
+
@actionDynamicEnergy(bits_per_action="width")
|
|
182
|
+
def write(self) -> float:
|
|
183
|
+
"""
|
|
184
|
+
Returns the energy consumed by a write operation in Joules.
|
|
185
|
+
|
|
186
|
+
Parameters
|
|
187
|
+
----------
|
|
188
|
+
bits_per_action: int
|
|
189
|
+
The number of bits to write.
|
|
190
|
+
|
|
191
|
+
Returns
|
|
192
|
+
-------
|
|
193
|
+
float
|
|
194
|
+
The energy consumed by a write operation in Joules.
|
|
195
|
+
"""
|
|
196
|
+
self.sram.write(bits_per_action=self.width)
|
|
197
|
+
self.address_reg.write()
|
|
198
|
+
self.delta_reg.read()
|
|
199
|
+
self.adder.add()
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"""
|
|
2
|
+
@ARTICLE{8474954,
|
|
3
|
+
author={Nag, Anirban and Balasubramonian, Rajeev and Srikumar, Vivek and Walker, Ross and Shafiee, Ali and Strachan, John Paul and Muralimanohar, Naveen},
|
|
4
|
+
journal={IEEE Micro},
|
|
5
|
+
title={Newton: Gravitating Towards the Physical Limits of Crossbar Acceleration},
|
|
6
|
+
year={2018},
|
|
7
|
+
volume={38},
|
|
8
|
+
number={5},
|
|
9
|
+
pages={41-49},
|
|
10
|
+
doi={10.1109/MM.2018.053631140}}
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from hwcomponents_library.base import LibraryEstimatorClassBase
|
|
14
|
+
from hwcomponents.scaling import *
|
|
15
|
+
from hwcomponents import actionDynamicEnergy
|
|
16
|
+
from .isaac import IsaacADC
|
|
17
|
+
from .isaac import IsaacDAC
|
|
18
|
+
from .isaac import IsaacEDRAM
|
|
19
|
+
from .isaac import IsaacEDRAMBus
|
|
20
|
+
from .isaac import IsaacRouter
|
|
21
|
+
from .isaac import IsaacShiftAdd
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# Original CSV contents:
|
|
25
|
+
# tech_node,global_cycle_period,resolution,energy,area,action
|
|
26
|
+
# 32nm,1e-9,9,2.58333333333,1500,convert|read
|
|
27
|
+
# 32nm,1e-9,9,0,1500,write|update|leak
|
|
28
|
+
# # Energy: 3.1*10^-3 W / (1.2*10^9 ADC BW) * 10 ^ 12 J->pJ
|
|
29
|
+
# # Newton's adapative ADC resolution table:
|
|
30
|
+
# # 9,9,7,5,1,0,9,3
|
|
31
|
+
# # 9,9,8,6,2,0,9,4
|
|
32
|
+
# # 9,9,9,7,3,1,9,5
|
|
33
|
+
# # 9,9,9,8,4,2,9,6
|
|
34
|
+
# # 8,9,9,9,5,3,9,7
|
|
35
|
+
# # 7,9,9,9,6,4,9,8
|
|
36
|
+
# # 6,8,9,9,7,5,9,9
|
|
37
|
+
# # 5,7,9,9,8,6,9,9
|
|
38
|
+
# # 4,6,9,9,9,7,8,9
|
|
39
|
+
# # 3,5,9,9,9,8,7,9
|
|
40
|
+
# # 2,4,8,9,9,9,6,9
|
|
41
|
+
# # 1,3,7,9,9,9,5,9
|
|
42
|
+
# # 0,2,6,8,9,9,4,9
|
|
43
|
+
# # 0,1,5,7,9,9,3,9
|
|
44
|
+
# # 0,0,4,6,9,9,2,8
|
|
45
|
+
# # 0,0,3,5,9,9,1,7
|
|
46
|
+
# # Newton assumes a linear scaling: 9-bit ADC uses X/9 power for X-bit convert.
|
|
47
|
+
# # Matches with the table above:
|
|
48
|
+
# # Sum of this table is 832. Sum of full-resolution (all table entries = 9)
|
|
49
|
+
# # is 1152. This is a 40% reduction, matching with the reported 40%
|
|
50
|
+
# # ADC power reduction in the paper.
|
|
51
|
+
class NewtonADC(LibraryEstimatorClassBase):
|
|
52
|
+
"""
|
|
53
|
+
The ADC from the Newton paper. This is a 9-bit ADC that can optionally stop
|
|
54
|
+
quantizing after a certain number of bits.
|
|
55
|
+
|
|
56
|
+
Parameters
|
|
57
|
+
----------
|
|
58
|
+
tech_node: float
|
|
59
|
+
Technology node in meters.
|
|
60
|
+
resolution: int
|
|
61
|
+
Resolution of the ADC in bits.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
component_name = "newton_adc"
|
|
65
|
+
priority = 0.9
|
|
66
|
+
|
|
67
|
+
def __init__(self, tech_node: float, resolution: int = 9):
|
|
68
|
+
super().__init__(leak_power=0.0, area=1500.0e-12)
|
|
69
|
+
self.tech_node: float = self.scale(
|
|
70
|
+
"tech_node",
|
|
71
|
+
tech_node,
|
|
72
|
+
32e-9,
|
|
73
|
+
tech_node_energy,
|
|
74
|
+
tech_node_area,
|
|
75
|
+
tech_node_leak,
|
|
76
|
+
)
|
|
77
|
+
self.resolution: int = self.scale(
|
|
78
|
+
"resolution", resolution, 9, pow_base(2), pow_base(2), pow_base(2)
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
@actionDynamicEnergy
|
|
82
|
+
def convert(self) -> float:
|
|
83
|
+
"""
|
|
84
|
+
Returns the energy consumed by a convert operation in Joules.
|
|
85
|
+
|
|
86
|
+
Returns
|
|
87
|
+
-------
|
|
88
|
+
float
|
|
89
|
+
The energy consumed by a convert operation in Joules.
|
|
90
|
+
"""
|
|
91
|
+
return 2.58333333333e-12
|
|
92
|
+
|
|
93
|
+
@actionDynamicEnergy
|
|
94
|
+
def read(self) -> float:
|
|
95
|
+
"""
|
|
96
|
+
Returns the energy consumed by a convert operation in Joules.
|
|
97
|
+
|
|
98
|
+
Returns
|
|
99
|
+
-------
|
|
100
|
+
float
|
|
101
|
+
The energy consumed by a convert operation in Joules.
|
|
102
|
+
"""
|
|
103
|
+
return 2.58333333333e-12
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class NewtonADC(IsaacADC):
|
|
107
|
+
pass
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class NewtonDAC(IsaacDAC):
|
|
111
|
+
pass
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class NewtonRouter(IsaacRouter):
|
|
115
|
+
pass
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class NewtonEDRAM(IsaacEDRAM):
|
|
119
|
+
pass
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class NewtonEDRAMBus(IsaacEDRAMBus):
|
|
123
|
+
pass
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
class NewtonShiftAdd(IsaacShiftAdd):
|
|
127
|
+
pass
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""
|
|
2
|
+
@inproceedings{10.1145/3579371.3589062,
|
|
3
|
+
author = {Andrulis, Tanner and Emer, Joel S. and Sze, Vivienne},
|
|
4
|
+
title = {RAELLA: Reforming the Arithmetic for Efficient, Low-Resolution, and Low-Loss Analog PIM: No Retraining Required!},
|
|
5
|
+
year = {2023},
|
|
6
|
+
isbn = {9798400700958},
|
|
7
|
+
publisher = {Association for Computing Machinery},
|
|
8
|
+
address = {New York, NY, USA},
|
|
9
|
+
url = {https://doi.org/10.1145/3579371.3589062},
|
|
10
|
+
doi = {10.1145/3579371.3589062},
|
|
11
|
+
abstract = {Processing-In-Memory (PIM) accelerators have the potential to efficiently run Deep Neural Network (DNN) inference by reducing costly data movement and by using resistive RAM (ReRAM) for efficient analog compute. Unfortunately, overall PIM accelerator efficiency is limited by energy-intensive analog-to-digital converters (ADCs). Furthermore, existing accelerators that reduce ADC cost do so by changing DNN weights or by using low-resolution ADCs that reduce output fidelity. These strategies harm DNN accuracy and/or require costly DNN retraining to compensate.To address these issues, we propose the RAELLA architecture. RAELLA adapts the architecture to each DNN; it lowers the resolution of computed analog values by encoding weights to produce near-zero analog values, adaptively slicing weights for each DNN layer, and dynamically slicing inputs through speculation and recovery. Low-resolution analog values allow RAELLA to both use efficient low-resolution ADCs and maintain accuracy without retraining, all while computing with fewer ADC converts.Compared to other low-accuracy-loss PIM accelerators, RAELLA increases energy efficiency by up to 4.9\texttimes{} and throughput by up to 3.3\texttimes{}. Compared to PIM accelerators that cause accuracy loss and retrain DNNs to recover, RAELLA achieves similar efficiency and throughput without expensive DNN retraining.},
|
|
12
|
+
booktitle = {Proceedings of the 50th Annual International Symposium on Computer Architecture},
|
|
13
|
+
articleno = {27},
|
|
14
|
+
numpages = {16},
|
|
15
|
+
keywords = {processing in memory, compute in memory, analog, neural networks, accelerator, architecture, slicing, ADC, ReRAM},
|
|
16
|
+
location = {Orlando, FL, USA},
|
|
17
|
+
series = {ISCA '23}
|
|
18
|
+
}
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from hwcomponents_library.base import LibraryEstimatorClassBase
|
|
22
|
+
from hwcomponents.scaling import *
|
|
23
|
+
from hwcomponents import actionDynamicEnergy
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# Original CSV contents:
|
|
27
|
+
# tech_node,global_cycle_period,energy,area,n_instances,action
|
|
28
|
+
# 40nm,1e-9,0.25,0,1,multiply|read,
|
|
29
|
+
# 40nm,1e-9,0,0,1,update|leak|write,
|
|
30
|
+
# # Assuming multiplication energy scales linearly with input, weight, and output energy
|
|
31
|
+
# # Efficient processing of DNNs (Sze, 2020): 8b*8b->16b multiply 0.2pJ
|
|
32
|
+
# # 16b * 8b -> 8b multiply: 0.2 pJ
|
|
33
|
+
# # We do this at the L2 (large) tile level, so area will be negligible
|
|
34
|
+
class RaellaQuantMultiplier(LibraryEstimatorClassBase):
|
|
35
|
+
"""
|
|
36
|
+
The quantization & multipliler from the RAELLA paper. This unit will multiply a
|
|
37
|
+
partial sum value by a quantization scale to apply linear quntization.
|
|
38
|
+
|
|
39
|
+
Parameters
|
|
40
|
+
----------
|
|
41
|
+
tech_node: float
|
|
42
|
+
Technology node in meters.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
component_name = "raella_quant_multiplier"
|
|
46
|
+
priority = 0.9
|
|
47
|
+
|
|
48
|
+
def __init__(self, tech_node: float, resolution: int=16):
|
|
49
|
+
super().__init__(leak_power=0.0, area=0.0e-12)
|
|
50
|
+
self.tech_node: float = self.scale(
|
|
51
|
+
"tech_node",
|
|
52
|
+
tech_node,
|
|
53
|
+
40e-9,
|
|
54
|
+
tech_node_energy,
|
|
55
|
+
tech_node_area,
|
|
56
|
+
tech_node_leak,
|
|
57
|
+
)
|
|
58
|
+
self.resolution: int = self.scale(
|
|
59
|
+
"resolution",
|
|
60
|
+
resolution,
|
|
61
|
+
16,
|
|
62
|
+
linear,
|
|
63
|
+
linear,
|
|
64
|
+
linear
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
@actionDynamicEnergy
|
|
68
|
+
def multiply(self) -> float:
|
|
69
|
+
"""
|
|
70
|
+
Returns the energy consumed by a multiply operation in Joules.
|
|
71
|
+
|
|
72
|
+
Returns
|
|
73
|
+
-------
|
|
74
|
+
float
|
|
75
|
+
The energy consumed by a multiply operation in Joules.
|
|
76
|
+
"""
|
|
77
|
+
return 0.25e-12
|
|
78
|
+
|
|
79
|
+
@actionDynamicEnergy
|
|
80
|
+
def read(self) -> float:
|
|
81
|
+
"""
|
|
82
|
+
Returns the energy consumed by a multiply operation in Joules.
|
|
83
|
+
|
|
84
|
+
Returns
|
|
85
|
+
-------
|
|
86
|
+
float
|
|
87
|
+
The energy consumed by a multiply operation in Joules.
|
|
88
|
+
"""
|
|
89
|
+
return 0.25e-12
|