accelforge 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of accelforge might be problematic. Click here for more details.

Files changed (258) hide show
  1. accelforge/__init__.py +21 -0
  2. accelforge/_accelerated_imports.py +16 -0
  3. accelforge/_deprecate/_simanneal/evalmapping.py +271 -0
  4. accelforge/_deprecate/_simanneal/mapspaceglobals.py +298 -0
  5. accelforge/_deprecate/_simanneal/simanneal.py +666 -0
  6. accelforge/_deprecate/_simanneal/tracking.py +105 -0
  7. accelforge/_deprecate/_simanneal/wrappers.py +218 -0
  8. accelforge/_deprecate/_simanneal2/__init__.py +7 -0
  9. accelforge/_deprecate/_simanneal2/simanneal.py +493 -0
  10. accelforge/_deprecate/_simanneal2/tracking.py +116 -0
  11. accelforge/_deprecate/compatibility_util.py +181 -0
  12. accelforge/_deprecate/layerdeduplication/__init__.py +2 -0
  13. accelforge/_deprecate/layerdeduplication/group_similar_einsums.py +160 -0
  14. accelforge/_deprecate/layerdeduplication/grouped_einsums.py +84 -0
  15. accelforge/_deprecate/mapping_filter_tags/__init__.py +2 -0
  16. accelforge/_deprecate/mapping_filter_tags/ffmt.py +212 -0
  17. accelforge/_deprecate/mapping_filter_tags/onesplit.py +24 -0
  18. accelforge/_deprecate/mapping_filter_tags/util.py +24 -0
  19. accelforge/_deprecate/tags.py +69 -0
  20. accelforge/_deprecate/viz/__init__.py +0 -0
  21. accelforge/_deprecate/viz/interactive.py +159 -0
  22. accelforge/_deprecate/viz/reservationtree.py +307 -0
  23. accelforge/_deprecate/viz/ski_slope.py +88 -0
  24. accelforge/_version.py +15 -0
  25. accelforge/examples.py +39 -0
  26. accelforge/frontend/__init__.py +10 -0
  27. accelforge/frontend/_binding.py +129 -0
  28. accelforge/frontend/_workload_isl/__init__.py +2 -0
  29. accelforge/frontend/_workload_isl/_isl.py +149 -0
  30. accelforge/frontend/_workload_isl/_symbolic.py +141 -0
  31. accelforge/frontend/arch copy.py +1544 -0
  32. accelforge/frontend/arch.py +1642 -0
  33. accelforge/frontend/config.py +63 -0
  34. accelforge/frontend/mapper/__init__.py +5 -0
  35. accelforge/frontend/mapper/ffm.py +126 -0
  36. accelforge/frontend/mapper/mapper.py +7 -0
  37. accelforge/frontend/mapper/metrics.py +30 -0
  38. accelforge/frontend/mapping/__init__.py +1 -0
  39. accelforge/frontend/mapping/mapping.py +1736 -0
  40. accelforge/frontend/model.py +14 -0
  41. accelforge/frontend/renames.py +150 -0
  42. accelforge/frontend/spec copy.py +230 -0
  43. accelforge/frontend/spec.py +301 -0
  44. accelforge/frontend/variables.py +12 -0
  45. accelforge/frontend/workload.py +952 -0
  46. accelforge/mapper/FFM/__init__.py +9 -0
  47. accelforge/mapper/FFM/_join_pmappings/__init__.py +0 -0
  48. accelforge/mapper/FFM/_join_pmappings/compatibility.py +653 -0
  49. accelforge/mapper/FFM/_join_pmappings/compress_pmappings.py +140 -0
  50. accelforge/mapper/FFM/_join_pmappings/join_pmappings.py +703 -0
  51. accelforge/mapper/FFM/_join_pmappings/pmapping_dataframe.py +901 -0
  52. accelforge/mapper/FFM/_join_pmappings/pmapping_group.py +337 -0
  53. accelforge/mapper/FFM/_make_pmappings/contraints/__init__.py +0 -0
  54. accelforge/mapper/FFM/_make_pmappings/contraints/constraints.py +360 -0
  55. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/__init__.py +1 -0
  56. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_loops.py +373 -0
  57. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_pmapping_templates.py +463 -0
  58. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_reservations.py +95 -0
  59. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storage_order.py +382 -0
  60. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storages.py +155 -0
  61. accelforge/mapper/FFM/_make_pmappings/make_pmappings.py +411 -0
  62. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/__init__.py +1 -0
  63. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_pmappings_from_templates.py +407 -0
  64. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_tile_shapes.py +1681 -0
  65. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/run_model.py +170 -0
  66. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/symbol_relations.py +174 -0
  67. accelforge/mapper/FFM/_make_pmappings/pmapper_job.py +282 -0
  68. accelforge/mapper/FFM/_pareto_df/df_convention.py +273 -0
  69. accelforge/mapper/FFM/_pareto_df/pareto copy.py +836 -0
  70. accelforge/mapper/FFM/_pareto_df/pareto.py +508 -0
  71. accelforge/mapper/FFM/data.py +61 -0
  72. accelforge/mapper/FFM/main copy.py +236 -0
  73. accelforge/mapper/FFM/main.py +208 -0
  74. accelforge/mapper/FFM/mappings.py +510 -0
  75. accelforge/mapper/FFM/pmappings.py +310 -0
  76. accelforge/mapper/__init__.py +4 -0
  77. accelforge/mapper.py +0 -0
  78. accelforge/model/__init__.py +1 -0
  79. accelforge/model/_looptree/__init__.py +0 -0
  80. accelforge/model/_looptree/accesses.py +335 -0
  81. accelforge/model/_looptree/capacity/__init__.py +1 -0
  82. accelforge/model/_looptree/capacity/aggregators.py +36 -0
  83. accelforge/model/_looptree/capacity/capacity.py +47 -0
  84. accelforge/model/_looptree/energy.py +150 -0
  85. accelforge/model/_looptree/equivalent_ranks.py +29 -0
  86. accelforge/model/_looptree/latency/__init__.py +1 -0
  87. accelforge/model/_looptree/latency/latency.py +98 -0
  88. accelforge/model/_looptree/latency/memory.py +120 -0
  89. accelforge/model/_looptree/latency/processors.py +92 -0
  90. accelforge/model/_looptree/mapping_utilities.py +71 -0
  91. accelforge/model/_looptree/reuse/__init__.py +4 -0
  92. accelforge/model/_looptree/reuse/isl/__init__.py +1 -0
  93. accelforge/model/_looptree/reuse/isl/des.py +59 -0
  94. accelforge/model/_looptree/reuse/isl/isl_functions.py +374 -0
  95. accelforge/model/_looptree/reuse/isl/mapping_to_isl/__init__.py +4 -0
  96. accelforge/model/_looptree/reuse/isl/mapping_to_isl/analyze_mapping.py +297 -0
  97. accelforge/model/_looptree/reuse/isl/mapping_to_isl/skews_from_mapping.py +236 -0
  98. accelforge/model/_looptree/reuse/isl/mapping_to_isl/tiling.py +685 -0
  99. accelforge/model/_looptree/reuse/isl/mapping_to_isl/types.py +188 -0
  100. accelforge/model/_looptree/reuse/isl/spatial.py +260 -0
  101. accelforge/model/_looptree/reuse/isl/temporal.py +182 -0
  102. accelforge/model/_looptree/reuse/symbolic/__init__.py +1 -0
  103. accelforge/model/_looptree/reuse/symbolic/symbolic copy 2.py +1346 -0
  104. accelforge/model/_looptree/reuse/symbolic/symbolic copy.py +1408 -0
  105. accelforge/model/_looptree/reuse/symbolic/symbolic.py +1396 -0
  106. accelforge/model/_looptree/run.py +122 -0
  107. accelforge/model/_looptree/types.py +26 -0
  108. accelforge/model/_looptree/visualization/__init__.py +0 -0
  109. accelforge/model/_looptree/visualization/occupancy.py +11 -0
  110. accelforge/model/main.py +222 -0
  111. accelforge/plotting/__init__.py +2 -0
  112. accelforge/plotting/mappings.py +219 -0
  113. accelforge/plotting/specs.py +57 -0
  114. accelforge/util/__init__.py +4 -0
  115. accelforge/util/_base_analysis_types.py +24 -0
  116. accelforge/util/_basetypes.py +1089 -0
  117. accelforge/util/_frozenset.py +36 -0
  118. accelforge/util/_isl.py +29 -0
  119. accelforge/util/_itertools.py +14 -0
  120. accelforge/util/_mathfuncs.py +57 -0
  121. accelforge/util/_parse_expressions.py +339 -0
  122. accelforge/util/_picklecache.py +32 -0
  123. accelforge/util/_setexpressions.py +268 -0
  124. accelforge/util/_sympy/__init__.py +0 -0
  125. accelforge/util/_sympy/broadcast_max.py +18 -0
  126. accelforge/util/_visualization.py +112 -0
  127. accelforge/util/_yaml.py +579 -0
  128. accelforge/util/parallel.py +193 -0
  129. accelforge-0.0.1.dist-info/METADATA +64 -0
  130. accelforge-0.0.1.dist-info/RECORD +258 -0
  131. accelforge-0.0.1.dist-info/WHEEL +5 -0
  132. accelforge-0.0.1.dist-info/licenses/LICENSE +19 -0
  133. accelforge-0.0.1.dist-info/top_level.txt +5 -0
  134. docs/_build/html/_sources/fastfusion.frontend.mapper.rst.txt +37 -0
  135. docs/_build/html/_sources/fastfusion.frontend.rst.txt +70 -0
  136. docs/_build/html/_sources/fastfusion.frontend.workload.rst.txt +21 -0
  137. docs/_build/html/_sources/fastfusion.mapper.FFM.rst.txt +37 -0
  138. docs/_build/html/_sources/fastfusion.mapper.rst.txt +18 -0
  139. docs/_build/html/_sources/fastfusion.rst.txt +20 -0
  140. docs/_build/html/_sources/fastfusion.util.rst.txt +21 -0
  141. docs/_build/html/_sources/index.rst.txt +87 -0
  142. docs/_build/html/_sources/modules.rst.txt +7 -0
  143. docs/_build/html/_sources/notes/citation.rst.txt +45 -0
  144. docs/_build/html/_sources/notes/definitions.rst.txt +43 -0
  145. docs/_build/html/_sources/notes/faqs.rst.txt +39 -0
  146. docs/_build/html/_sources/notes/modeling/accelerator_energy_latency.rst.txt +72 -0
  147. docs/_build/html/_sources/notes/modeling/component_energy_area.rst.txt +96 -0
  148. docs/_build/html/_sources/notes/modeling/mapping.rst.txt +100 -0
  149. docs/_build/html/_sources/notes/modeling.rst.txt +33 -0
  150. docs/_build/html/_sources/notes/parsing/arithmetic_parsing.rst.txt +136 -0
  151. docs/_build/html/_sources/notes/parsing/setexpressions.rst.txt +63 -0
  152. docs/_build/html/_sources/notes/parsing/yaml_parsing.rst.txt +176 -0
  153. docs/_build/html/_sources/notes/quickstart_and_installation.rst.txt +9 -0
  154. docs/_build/html/_sources/notes/spec/architecture.rst.txt +133 -0
  155. docs/_build/html/_sources/notes/spec/mapping.rst.txt +12 -0
  156. docs/_build/html/_sources/notes/spec/workload.rst.txt +83 -0
  157. docs/_build/html/_sources/notes/spec.rst.txt +36 -0
  158. docs/source/_ext/include_attrs.py +213 -0
  159. docs/source/_ext/include_docstring.py +364 -0
  160. docs/source/_ext/include_functions.py +154 -0
  161. docs/source/_ext/include_notebook.py +131 -0
  162. docs/source/_ext/include_yaml.py +119 -0
  163. docs/source/_ext/inherited_attributes.py +222 -0
  164. docs/source/_ext/paths.py +4 -0
  165. docs/source/conf.py +79 -0
  166. examples/arches/compute_in_memory/_include.yaml +74 -0
  167. examples/arches/compute_in_memory/_include_functions.py +229 -0
  168. examples/arches/compute_in_memory/_load_spec.py +57 -0
  169. examples/arches/compute_in_memory/components/c2c_multiplier.py +181 -0
  170. examples/arches/compute_in_memory/components/dac_c2c_r2r.py +605 -0
  171. examples/arches/compute_in_memory/components/misc.py +195 -0
  172. examples/arches/compute_in_memory/components/util/bit_functions.py +51 -0
  173. examples/arches/compute_in_memory/components/zero_comparator.py +92 -0
  174. examples/arches/compute_in_memory/isaac.yaml +233 -0
  175. examples/arches/compute_in_memory/memory_cells/ecram_demo.yaml +63 -0
  176. examples/arches/compute_in_memory/memory_cells/rram_example.yaml +63 -0
  177. examples/arches/compute_in_memory/memory_cells/rram_isaac_isca_2016.yaml +64 -0
  178. examples/arches/compute_in_memory/memory_cells/rram_neurosim_default.yaml +63 -0
  179. examples/arches/compute_in_memory/memory_cells/rram_raella_isca_2023.yaml +70 -0
  180. examples/arches/compute_in_memory/memory_cells/rram_wan_nature_2022.yaml +63 -0
  181. examples/arches/compute_in_memory/memory_cells/sram_colonnade_jssc_2021.yaml +63 -0
  182. examples/arches/compute_in_memory/memory_cells/sram_example.yaml +63 -0
  183. examples/arches/compute_in_memory/memory_cells/sram_jia_jssc_2020.yaml +63 -0
  184. examples/arches/compute_in_memory/memory_cells/sram_sinangil_jssc_2021.yaml +63 -0
  185. examples/arches/compute_in_memory/memory_cells/sram_wang_vlsi_2022.yaml +63 -0
  186. examples/arches/compute_in_memory/wang_vlsi_2022.yaml +289 -0
  187. examples/arches/eyeriss.yaml +68 -0
  188. examples/arches/fanout_variations/at_glb.yaml +31 -0
  189. examples/arches/fanout_variations/at_glb_with_fanout_node.yaml +34 -0
  190. examples/arches/fanout_variations/at_mac.yaml +31 -0
  191. examples/arches/fanout_variations/at_mac_with_constraints.yaml +38 -0
  192. examples/arches/fanout_variations/at_mac_with_fanout_node.yaml +34 -0
  193. examples/arches/nvdla.yaml +47 -0
  194. examples/arches/simple.yaml +28 -0
  195. examples/arches/tpu_v4i.yaml +67 -0
  196. examples/mappings/unfused_matmuls_to_simple.yaml +33 -0
  197. examples/misc/component_annotated.yaml +33 -0
  198. examples/workloads/gpt3_6.7B.yaml +124 -0
  199. examples/workloads/matmuls.yaml +20 -0
  200. examples/workloads/mobilenet_28.yaml +81 -0
  201. examples/workloads/mobilenet_various_separate.yaml +106 -0
  202. examples/workloads/three_matmuls_annotated.yaml +59 -0
  203. notebooks/.ipynb_checkpoints/fastfusion_arch_study_michael-checkpoint.ipynb +359 -0
  204. notebooks/compute_in_memory/_scripts.py +339 -0
  205. notebooks/compute_in_memory/isaac.guide.ipynb +270 -0
  206. notebooks/compute_in_memory/wang_vlsi_2022.ipynb +602 -0
  207. notebooks/paths.py +4 -0
  208. notebooks/tutorials/.ipynb_checkpoints/1_FFM-checkpoint.ipynb +3110 -0
  209. notebooks/tutorials/FFM.ipynb +3498 -0
  210. notebooks/tutorials/_include.py +48 -0
  211. notebooks/tutorials/component_energy_area.ipynb +363 -0
  212. tests/Q_mapping.yaml +38 -0
  213. tests/__init__.py +0 -0
  214. tests/conv.mapping.yaml +27 -0
  215. tests/conv.workload.yaml +13 -0
  216. tests/conv_sym.mapping.yaml +43 -0
  217. tests/copy.mapping.yaml +35 -0
  218. tests/copy.workload.yaml +15 -0
  219. tests/distribuffers/__init__.py +0 -0
  220. tests/distribuffers/multicast/test_cases.yaml +482 -0
  221. tests/distribuffers/spec/binding/valid_bindings.yaml +97 -0
  222. tests/distribuffers/spec/distributed.yaml +100 -0
  223. tests/distribuffers/spec/logical_arch.yaml +32 -0
  224. tests/distribuffers/spec/physical_arch.yaml +69 -0
  225. tests/distribuffers/test_binding.py +48 -0
  226. tests/frontend/__init__.py +0 -0
  227. tests/frontend/test_mapping_viz.py +52 -0
  228. tests/mapper/__init__.py +0 -0
  229. tests/mapper/configs/conv1d/conv1d.mapping.yaml +31 -0
  230. tests/mapper/configs/conv1d/conv1d.workload.yaml +11 -0
  231. tests/mapper/configs/two_conv1d/two_conv1d.expected.yaml +38 -0
  232. tests/mapper/configs/two_conv1d/two_conv1d.mapping.yaml +54 -0
  233. tests/mapper/configs/two_conv1d/two_conv1d.workload.yaml +19 -0
  234. tests/mapper/test_mapping_to_isl.py +90 -0
  235. tests/mapper/test_spatial_reuse_analysis.py +67 -0
  236. tests/mapper/test_temporal_reuse_analysis.py +56 -0
  237. tests/mapper/util.py +58 -0
  238. tests/matmul.mapping.yaml +29 -0
  239. tests/matmul.workload.yaml +12 -0
  240. tests/matmul_spatial.mapping.yaml +44 -0
  241. tests/mha.renames.yaml +65 -0
  242. tests/mha.workload.yaml +67 -0
  243. tests/mha.yaml +59 -0
  244. tests/mha_full.workload.yaml +67 -0
  245. tests/mobilenet.workload.yaml +35 -0
  246. tests/mobilenet_long.workload.yaml +64 -0
  247. tests/pmappingcache.py +24 -0
  248. tests/processing_stage.arch.yaml +40 -0
  249. tests/snowcat.arch.yaml +36 -0
  250. tests/test_ffm_join_pmappings.py +106 -0
  251. tests/test_ffm_make_pmappings.py +82 -0
  252. tests/test_ffm_make_tile_shapes.py +49 -0
  253. tests/test_mapper.py +100 -0
  254. tests/test_model.py +37 -0
  255. tests/test_plotting.py +72 -0
  256. tests/test_processing_stage.py +46 -0
  257. tests/test_symbolic_model.py +248 -0
  258. tests/test_workload.py +141 -0
@@ -0,0 +1,195 @@
1
+ from numbers import Number
2
+ from typing import Optional, List
3
+ from hwcomponents.scaling import tech_node_area
4
+ from util.bit_functions import rescale_sum_to_1
5
+ from hwcomponents import ComponentModel, action
6
+
7
+
8
+ class Capacitor(ComponentModel):
9
+ """
10
+ A capacitor.
11
+
12
+ Parameters
13
+ ----------
14
+ capacitance: float
15
+ The capacitance of this capacitor in Farads.
16
+ tech_node: int
17
+ The tech_node node in meters.
18
+ voltage: float
19
+ The supply voltage in volts.
20
+ cap_per_m2: float
21
+ The capacitance per square meter in Farads per square meter.
22
+ border_area: float
23
+ The border area around the capacitor in square meters.
24
+ voltage_raise_threshold: float
25
+ Latency is calculated as the time it takes to raise voltage to this proportion
26
+ of the target voltage.
27
+ supply_resistance: float
28
+ The supply resistance in ohms. If 0, then voltage is assumed to converge
29
+ instantly.
30
+
31
+ Attributes
32
+ ----------
33
+ capacitance: float
34
+ The capacitance of this capacitor in Farads.
35
+ tech_node: int
36
+ The tech_node node in meters.
37
+ voltage: float
38
+ The supply voltage in volts.
39
+ cap_per_m2: float
40
+ The capacitance per square meter in Farads per square meter.
41
+ border_area: float
42
+ The border area around the capacitor in square meters.
43
+ voltage_raise_threshold: float
44
+ Latency is calculated as the time it takes to raise voltage to this proportion
45
+ of the target voltage.
46
+ supply_resistance: float
47
+ The supply resistance in ohms. If 0, then voltage is assumed to converge
48
+ instantly.
49
+ """
50
+
51
+ priority = 0.5
52
+ """
53
+ Priority determines which model is used when multiple models are available for a
54
+ given component. Higher priority models are used first. Must be a number between 0
55
+ and 1.
56
+ """
57
+
58
+ def __init__(
59
+ self,
60
+ capacitance: Number,
61
+ tech_node: float,
62
+ voltage: Number = 0.7,
63
+ cap_per_m2: Optional[Number] = "1e-3 scaled by tech node",
64
+ border_area: Optional[Number] = "1e-12 scaled by tech node",
65
+ ):
66
+ self.capacitance = capacitance
67
+ self.voltage = voltage
68
+
69
+ if cap_per_m2 == "1e-3 scaled by tech node":
70
+ cap_per_m2 = 2.3e-3 * tech_node_area(tech_node, 22e-9)
71
+ if border_area == "1e-12 scaled by tech node":
72
+ border_area = 1e-12 * tech_node_area(tech_node, 22e-9)
73
+
74
+ self.cap_per_m2 = cap_per_m2
75
+ self.border_area = border_area
76
+
77
+ super().__init__(
78
+ area=self.capacitance / self.cap_per_m2 + self.border_area, leak_power=0
79
+ )
80
+
81
+ @action
82
+ def raise_voltage_to(
83
+ self,
84
+ target_voltage: float,
85
+ supply_voltage: float = None,
86
+ ) -> float:
87
+ """
88
+ Raise the voltage to the target voltage using the supply voltage as a supply.
89
+
90
+ Parameters
91
+ ----------
92
+ target_voltage: float
93
+ The target voltage to raise the voltage to.
94
+ supply_voltage: float
95
+ The supply voltage to use as a supply. If None, then the supply voltage is
96
+ assumed to be the voltage set in the attributes of this capacitor.
97
+
98
+ Returns
99
+ -------
100
+ energy, latency: tuple
101
+ The energy required to raise the voltage to the target voltage. Latency is
102
+ 0.
103
+ """
104
+ if supply_voltage is None:
105
+ supply_voltage = self.voltage
106
+ assert target_voltage <= supply_voltage, (
107
+ f"Can not raise voltage to {target_voltage} when supply voltage "
108
+ f"is {supply_voltage}."
109
+ )
110
+ return self.capacitance * target_voltage * supply_voltage, 0
111
+
112
+ @action
113
+ def switch(
114
+ self,
115
+ value_probabilities: List[Number],
116
+ zero_between_values: bool = True,
117
+ supply_voltage: float = None,
118
+ ) -> float:
119
+ """
120
+ Calculates the expected energy to switch voltage to the values in
121
+ value_probabilities.
122
+
123
+ Parameters
124
+ ----------
125
+ value_probabilities: List[Number]
126
+ The probabilities of the values to switch to. This is a histogram, assumed
127
+ to be spaced between 0 and supply_voltage, inclusive.
128
+ zero_between_values: bool
129
+ Whether to zero the voltage between values.
130
+ supply_voltage: float
131
+ The supply voltage to use as a supply. If None, then the supply voltage is
132
+ assumed to be the voltage set in the attributes of this capacitor.
133
+
134
+ Returns
135
+ -------
136
+ energy, latency: tuple
137
+ The energy required to switch the voltage to the values in
138
+ value_probabilities. Latency is 0.
139
+ """
140
+ supply_voltage = self.voltage if supply_voltage is None else supply_voltage
141
+ expected_energy = 0
142
+ value_probabilities = rescale_sum_to_1(value_probabilities)
143
+ for v0, p0 in enumerate(value_probabilities):
144
+ for v1, p1 in enumerate(value_probabilities):
145
+ v0 = 0 if zero_between_values else v0
146
+ if v1 < v0:
147
+ continue
148
+ e0 = self.raise_voltage_to(
149
+ v0 / (len(value_probabilities) - 1) * self.voltage, supply_voltage
150
+ )[0]
151
+ e1 = self.raise_voltage_to(
152
+ v1 / (len(value_probabilities) - 1) * self.voltage, supply_voltage
153
+ )[0]
154
+ expected_energy += (e1 - e0) * p0 * p1
155
+ return expected_energy, 0
156
+
157
+
158
+ class Wire(Capacitor):
159
+ """
160
+ A wire.
161
+
162
+ Parameters
163
+ ----------
164
+ length: Number
165
+ The length of the wire in meters.
166
+ capacitance_per_m: Number
167
+ The capacitance per meter in Farads per meter.
168
+ voltage: Number
169
+ The supply voltage of the wire in volts.
170
+
171
+ Attributes
172
+ ----------
173
+ length: Number
174
+ The length of the wire in meters.
175
+ capacitance_per_m: Number
176
+ The capacitance per meter in Farads per meter.
177
+ voltage: Number
178
+ The supply voltage of the wire in volts.
179
+ """
180
+
181
+ def __init__(
182
+ self,
183
+ length: Number,
184
+ capacitance_per_m: Number = 2e-10,
185
+ voltage: Number = 0.7,
186
+ **kwargs,
187
+ ):
188
+ super().__init__(
189
+ capacitance=length * capacitance_per_m,
190
+ voltage=voltage,
191
+ )
192
+ self.length = length
193
+ self.capacitance_per_m = capacitance_per_m
194
+ self.voltage = voltage
195
+ self.area_scale = 0
@@ -0,0 +1,51 @@
1
+ import math
2
+ from typing import List
3
+
4
+
5
+ def rescale_sum_to_1(array: List[float], do_not_change_index: int = -1) -> List[float]:
6
+ """Rescales all list elements such that the sum is 1."""
7
+ sum_array = sum([a for i, a in enumerate(array) if i != do_not_change_index])
8
+ target_sum = 1 - array[do_not_change_index] if do_not_change_index >= 0 else 1
9
+ scaleby = target_sum / sum_array
10
+ return [a * scaleby if i != do_not_change_index else a for i, a in enumerate(array)]
11
+
12
+
13
+ def set_element_rescale_sum_to_1(array: List[float], index: int, value: float):
14
+ """
15
+ Sets an element of a list, then rescales all list elements such that the sum is 1.
16
+ """
17
+ array[index] = value
18
+ return rescale_sum_to_1(array, index)
19
+
20
+
21
+ def value2bits(value: int, resolution: int) -> List[int]:
22
+ """Converts a value to a list of bits."""
23
+ return [int(i) for i in bin(value)[2:].zfill(resolution)]
24
+
25
+
26
+ def bit_distribution_2_hist(
27
+ bit_distribution: List[float], zero_prob: float = None
28
+ ) -> List[float]:
29
+ """Converts a bit distribution to a value distribution."""
30
+ hist = [1] * 2 ** len(bit_distribution)
31
+ for value in range(2 ** len(bit_distribution)):
32
+ bits = value2bits(value, len(bit_distribution))
33
+ for i, prob in enumerate(bit_distribution):
34
+ hist[value] *= prob if bits[i] else 1 - prob
35
+
36
+ if zero_prob is not None:
37
+ set_element_rescale_sum_to_1(hist, 0, zero_prob)
38
+ return rescale_sum_to_1(hist)
39
+
40
+
41
+ def hist_2_bit_distribution(hist: List[float]) -> List[float]:
42
+ """Converts a value distribution to a bit distribution."""
43
+ sum_hist = sum(hist)
44
+ hist = [i / sum_hist for i in hist]
45
+
46
+ bit_distribution = [0] * math.ceil(math.log(len(hist), 2))
47
+ for value in range(len(hist)):
48
+ for i, bit in enumerate(value2bits(value, len(bit_distribution))):
49
+ bit_distribution[i] += hist[value] * bit
50
+
51
+ return bit_distribution
@@ -0,0 +1,92 @@
1
+ from math import ceil, log2
2
+ from hwcomponents import ComponentModel, action
3
+ from hwcomponents.scaling import linear, quadratic, reciprocal
4
+ from hwcomponents_library.library.aladdin import AladdinComparator, AladdinCounter
5
+ from hwcomponents_neurosim import FlipFlop
6
+
7
+
8
+ class ZeroComparator(ComponentModel):
9
+ """
10
+ Counts the number of zeros in a list of values. Includes a flag for each zero.
11
+
12
+ Based on the zero gating logic in the paper: A Programmable Heterogeneous
13
+ Microprocessor Based on Bit-Scalable In-Memory Computing, by Hongyang Jia, Hossein
14
+ Valavi, Yinqi Tang, Jintao Zhang, and Naveen Verma, JSSC 2020
15
+ 10.1109/JSSC.2020.2987714
16
+
17
+ Parameters
18
+ ----------
19
+ n_comparators: int
20
+ The number of comparators to include.
21
+ n_bits: int
22
+ The number of bits of each comparator.
23
+ tech_node: str
24
+ The technology node of the comparators.
25
+ voltage: float
26
+ The voltage of the comparators.
27
+ """
28
+
29
+ priority = 0.5
30
+
31
+ def __init__(
32
+ self,
33
+ n_comparators: int,
34
+ n_bits: int,
35
+ tech_node: str,
36
+ voltage: float = 0.85
37
+ ):
38
+ self.n_comparators = n_comparators
39
+ self.n_bits = n_bits
40
+
41
+ # Scale up the comparator to handle all the comparators
42
+ self.comparator = AladdinComparator(
43
+ n_bits=n_bits,
44
+ tech_node=tech_node,
45
+ )
46
+ self.comparator.energy_scale *= n_comparators
47
+ self.comparator.area_scale *= n_comparators
48
+
49
+ # Flip flops are used one bit at a time, so we only make one bit and scale the
50
+ # energy and latency
51
+ self.flip_flop = FlipFlop(
52
+ n_bits=1,
53
+ tech_node=tech_node,
54
+ )
55
+ self.flip_flop.energy_scale *= n_bits
56
+ self.flip_flop.latency_scale *= n_bits
57
+
58
+ # Zero counter is shared between all the comparators, so scale the energy and
59
+ # latency to activate with each one
60
+ self.zeros_counter = AladdinCounter(
61
+ n_bits=ceil(log2(n_comparators)),
62
+ tech_node=tech_node,
63
+ )
64
+ self.zeros_counter.energy_scale *= n_comparators
65
+ self.zeros_counter.latency_scale *= n_comparators
66
+
67
+ super().__init__(
68
+ subcomponents=[
69
+ self.comparator,
70
+ self.flip_flop,
71
+ self.zeros_counter,
72
+ ],
73
+ )
74
+
75
+ for subcomponent in self.subcomponents:
76
+ subcomponent.scale(
77
+ "voltage",
78
+ voltage,
79
+ 0.85,
80
+ area_scale_function=linear,
81
+ energy_scale_function=quadratic,
82
+ latency_scale_function=reciprocal,
83
+ leak_power_scale_function=linear,
84
+
85
+ )
86
+ subcomponent.leak_power_scale *= 0.02 # Low-leakage technology
87
+
88
+ @action
89
+ def read(self) -> tuple[float, float]:
90
+ self.comparator.read()
91
+ self.flip_flop.read()
92
+ self.zeros_counter.read()
@@ -0,0 +1,233 @@
1
+ # @inproceedings{10.1109/ISCA.2016.12,
2
+ # author = {Shafiee, Ali and Nag, Anirban and Muralimanohar, Naveen and Balasubramonian, Rajeev and Strachan, John Paul and Hu, Miao and Williams, R. Stanley and Srikumar, Vivek},
3
+ # title = {ISAAC: a convolutional neural network accelerator with in-situ analog arithmetic in crossbars},
4
+ # year = {2016},
5
+ # isbn = {9781467389471},
6
+ # publisher = {IEEE Press},
7
+ # url = {https://doi.org/10.1109/ISCA.2016.12},
8
+ # doi = {10.1109/ISCA.2016.12},
9
+ # abstract = {A number of recent efforts have attempted to design accelerators for popular machine learning algorithms, such as those involving convolutional and deep neural networks (CNNs and DNNs). These algorithms typically involve a large number of multiply-accumulate (dot-product) operations. A recent project, DaDianNao, adopts a near data processing approach, where a specialized neural functional unit performs all the digital arithmetic operations and receives input weights from adjacent eDRAM banks.This work explores an in-situ processing approach, where memristor crossbar arrays not only store input weights, but are also used to perform dot-product operations in an analog manner. While the use of crossbar memory as an analog dot-product engine is well known, no prior work has designed or characterized a full-fledged accelerator based on crossbars. In particular, our work makes the following contributions: (i) We design a pipelined architecture, with some crossbars dedicated for each neural network layer, and eDRAM buffers that aggregate data between pipeline stages. (ii) We define new data encoding techniques that are amenable to analog computations and that can reduce the high overheads of analog-to-digital conversion (ADC). (iii) We define the many supporting digital components required in an analog CNN accelerator and carry out a design space exploration to identify the best balance of memristor storage/compute, ADCs, and eDRAM storage on a chip. On a suite of CNN and DNN workloads, the proposed ISAAC architecture yields improvements of 14.8\texttimes{}, 5.5\texttimes{}, and 7.5\texttimes{} in throughput, energy, and computational density (respectively), relative to the state-of-the-art DaDianNao architecture.},
10
+ # booktitle = {Proceedings of the 43rd International Symposium on Computer Architecture},
11
+ # pages = {14–26},
12
+ # numpages = {13},
13
+ # keywords = {CNN, DNN, accelerator, analog, memristor, neural},
14
+ # location = {Seoul, Republic of Korea},
15
+ # series = {ISCA '16}
16
+ # }
17
+
18
+ {{include_text('_include.yaml')}}
19
+ {{add_to_path('./memory_cells')}}
20
+
21
+ arch:
22
+ arch_globals_dependent_on_workload:
23
+ <<: *variables_global
24
+ # ===========================================================================
25
+ # Encoding-dependent parameters
26
+ # ===========================================================================
27
+ encoded_input_bits: input_bits
28
+ encoded_weight_bits: weight_bits
29
+ encoded_output_bits: output_bits
30
+
31
+ input_encoding_func: offset_encode_hist
32
+ weight_encoding_func: offset_encode_hist
33
+
34
+ # For accuracy model. Can in-array accumulation include signed values?
35
+ # Signed accumulation not compatible with offset encoding (since offset
36
+ # encoding makes values non-negative).
37
+ signed_sum_across_inputs: False
38
+ signed_sum_across_weights: False
39
+
40
+ # ===========================================================================
41
+ # Architecture & CiM Array Structure
42
+ # ===========================================================================
43
+ # DEFINITIONS:
44
+ # - Cell: Smallest structure capable of storing memory. Note that a cell may
45
+ # store more than one bit. For example, a cell consisting of a RRAM
46
+ # device may store >1 bits, while a cell consisting of an SRAM
47
+ # bitcell may store only 1 bit.
48
+ # - CiM Unit: Smallest structure capable of computing an analog MAC.
49
+ # - CiM Unit Width Cells:
50
+ # Number of CiM unit cells that are accessed as one. These cells receive
51
+ # one analog input and compute one analog MAC per timestep.
52
+ # - CiM Unit Depth Cells:
53
+ # Number of independent groups of "CiM Unit Width" cells that form a CiM
54
+ # unit. Each of these groups is indepently addressible and operates in
55
+ # must be activated in a different timestep than the others.
56
+ cim_unit_width_cells: 1
57
+ cim_unit_depth_cells: 1
58
+ bits_per_cell: 2
59
+
60
+ # ===========================================================================
61
+ # Data Converters
62
+ # ===========================================================================
63
+ adc_resolution: 8
64
+ voltage_dac_resolution: 1
65
+ temporal_dac_resolution: 1
66
+
67
+ n_adc_per_bank: 1
68
+
69
+ # ===========================================================================
70
+ # Hardware
71
+ # ===========================================================================
72
+ cycle_period: 1e-9
73
+ read_pulse_width: 1e-9
74
+
75
+ extra_attributes_for_all_component_models:
76
+ <<: *cim_component_attributes
77
+ tech_node: tech_node
78
+ cycle_period: cycle_period
79
+
80
+ nodes:
81
+ - !Memory # Input buffer
82
+ name: InputBuffer
83
+ tensors: {keep: input}
84
+ size: MultiArrayFanout.get_fanout() * array_parallel_inputs * supported_input_bits
85
+ component_class: SmartBufferSRAM
86
+
87
+ - !Memory # Output buffer
88
+ name: OutputBuffer
89
+ tensors: {keep: output}
90
+ size: MultiArrayFanout.get_fanout() * array_parallel_outputs // min_weight_slices * supported_output_bits * 2
91
+ component_class: SmartBufferSRAM
92
+
93
+ - !ProcessingStage # Shift+add sums outputs from multiple slices
94
+ name: ShiftAdd
95
+ tensors: {keep: output}
96
+ direction: up
97
+ n_parallel_instances: MultiArrayFanout.get_fanout() # Match throughput with arrays
98
+ bits_per_action: output_bits / n_sliced_psums # n_sliced_psums reads to get an output
99
+ component_class: ISAACShiftAdd
100
+ extra_attributes_for_component_model:
101
+ n_bits: supported_output_bits
102
+ shift_register_n_bits: supported_output_bits * 2
103
+
104
+ - !Fanout # array: Independent array with memory elements and peripherals.
105
+ name: MultiArrayFanout
106
+ spatial:
107
+ - name: array
108
+ fanout: 8
109
+
110
+ - !ProcessingStage # ADC
111
+ name: ADC
112
+ tensors: {keep: output}
113
+ direction: up
114
+ bits_per_action: output_bits / n_sliced_psums # n_sliced_psums reads to get an output
115
+ component_class: ADC
116
+ energy_scale: adc_energy_scale
117
+ area_scale: adc_area_scale
118
+ extra_attributes_for_component_model:
119
+ throughput_scale: 1 / 100 # 100 cycles to process all outputs
120
+ throughput: 1 / cycle_period * cols_active_at_once * throughput_scale
121
+ n_bits: adc_resolution
122
+
123
+ - !ProcessingStage # Row drivers feed inputs onto the rows of the array
124
+ name: RowDrivers
125
+ tensors: {keep: input}
126
+ direction: down
127
+ bits_per_action: input_bits / n_input_slices # n_input_slices reads to send an input
128
+ component_class: ArrayRowDrivers
129
+
130
+ - !ProcessingStage # Column drivers precharge the array columns
131
+ name: ColumnDrivers
132
+ tensors: {keep: output}
133
+ direction: up
134
+ bits_per_action: output_bits / n_sliced_psums # n_sliced_psums reads to get an output
135
+ component_class: ArrayColumnDrivers
136
+
137
+ - !Fanout
138
+ name: ArrayFanout
139
+ spatial:
140
+ - name: array_reuse_input # Special name that determines array size
141
+ fanout: 128
142
+ may_reuse: input
143
+ reuse: input
144
+ min_usage: 1
145
+ usage_scale: n_weight_slices
146
+ - name: array_reuse_output # Special name that determines array size
147
+ fanout: 128
148
+ may_reuse: output
149
+ reuse: output
150
+ min_usage: 1
151
+
152
+ # This is the CiM unit that stores weights and computes MACs. Each CiM unit stores a
153
+ # different weight slice of up to cim_unit_width_cells bits. It may also store up to
154
+ # cim_unit_depth_cells independently-addressable weight slices, but may only compute
155
+ # MACs on one slice at a time. One of these components represents a collection of CiM
156
+ # units, that together hold one weight.
157
+ - !Memory
158
+ name: CimUnit
159
+ tensors: {keep: weight, no_refetch_from_above: weight, force_memory_hierarchy_order: False}
160
+ size: cim_unit_width_cells * cim_unit_depth_cells * bits_per_cell * n_weight_slices
161
+ # Requires (n_weight_slices * n_input_slices) = n_sliced_psums reads to fully use
162
+ # one weight
163
+ bits_per_action: weight.bits_per_value / n_sliced_psums
164
+ # Bind together n_weight_slices instances to hold one weight
165
+ n_parallel_instances: n_weight_slices
166
+ component_class: MemoryCell
167
+ extra_attributes_for_component_model:
168
+ n_instances: cim_unit_width_cells * cim_unit_depth_cells
169
+
170
+ # We account for compute energy in the CimUnit reads
171
+ - !Compute
172
+ name: FreeCompute
173
+ component_class: Dummy
174
+ enabled: len(All) == 3
175
+
176
+ # These variables pertain to the workload, microarch, and circuits. They should
177
+ # be matched between architectures when comparing for a fair comparison.
178
+ # Furthermore, this file should follow the same format for all architectures
179
+ # such that we can mix and match architectures with different iso files.
180
+ variables:
181
+ # ===========================================================================
182
+ # Workload, microarch, circuits. Things that should be matched
183
+ # between architectures when comparing.
184
+ # ===========================================================================
185
+ # Set by CiM processor if these values are available in the workload.
186
+ # Otherwise, use the defaults here.
187
+ inputs_hist: [0, 0, 0, 3, 1, 0, 0]
188
+ weights_hist: [0, 1, 3, 4, 3, 1, 0]
189
+ outputs_hist: inputs_hist
190
+
191
+ ## Microarch ----------------------------------------------------------------
192
+ supported_input_bits: 8 # Maximum input bits supported by the arch.
193
+ supported_weight_bits: 8 # Maximum weight bits supported by the arch.
194
+ supported_output_bits: 8 # Maximum output bits supported by the arch.
195
+ min_supported_input_bits: 1 # Minimum input bits supported by the arch.
196
+ min_supported_weight_bits: 2 # Minimum weight bits supported by the arch.
197
+ min_supported_output_bits: 1 # Minimum output bits supported by the arch.
198
+
199
+ # Circuits ------------------------------------------------------------------
200
+ voltage: 1
201
+ tech_node: 32e-9 # nm
202
+ cell_config: "{{find_path('rram_isaac_isca_2016.yaml')}}"
203
+ voltage_energy_scale: voltage ** 2
204
+ voltage_latency_scale: voltage
205
+
206
+ # Calibration ---------------------------------------------------------------
207
+ adc_energy_scale: 1
208
+ adc_area_scale: 1
209
+ row_col_drivers_area_scale: 1
210
+
211
+
212
+ # This workload is sized to get peak throughput & energy efficiency.
213
+ workload:
214
+ rank_sizes:
215
+ M: 1
216
+ N: 16 * 8
217
+ K: 128
218
+
219
+ einsums:
220
+ - name: Matmul
221
+ tensor_accesses:
222
+ - {name: input, projection: [m, k], bits_per_value: 16}
223
+ - {name: weight, projection: [k, n], bits_per_value: 16}
224
+ - {name: output, projection: [m, n], output: True, bits_per_value: 16}
225
+
226
+ - name: Matmul2
227
+ tensor_accesses:
228
+ - {name: input2, projection: [m, k], bits_per_value: 32}
229
+ - {name: weight2, projection: [k, n], bits_per_value: 32}
230
+ - {name: output2, projection: [m, n], output: True, bits_per_value: 32}
231
+ renames: {input: input2, weight: weight2, output: output2}
232
+
233
+ renames: {} # Not needed for this workload
@@ -0,0 +1,63 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Device type and top-level parameters
3
+
4
+ # Device type
5
+ -MemCellType: SRAM # SRAM or RRAM
6
+ # -ReadMode: current # Comment this line for voltage
7
+ -DeviceRoadmap -1LP 1HP 2LSTP: 2
8
+
9
+ # Device size
10
+ -CellArea (F^2): 24
11
+ -CellAspectRatio: 1.25 # Width/Height
12
+
13
+ # Calibration parameters
14
+ -CellReadLeakEnergyMultiplier: 1
15
+ -CellWriteEnergyMultiplier: 1
16
+ -CellCapacitanceMultiplier: 0
17
+ -CellCapacitanceAdjust (F): 0.174e-15 # Capacitance = (Nominal Capacitance) * Multiplier + Adjust
18
+
19
+ # Used for RRAM with access transistors or SRAM
20
+ -AccessCMOSWidth (F): 0
21
+
22
+ # Affects NeuroSim-realized ADCs. NOTE: Most models use ADC plug-in ADCs and
23
+ # this has little to no effect.
24
+ -ReadMode: voltage # voltage or current
25
+
26
+ # -----------------------------------------------------------------------------
27
+ # SRAM-only parameters
28
+ # SRAM configuration. Only used if MemCellType is SRAM
29
+ -SRAMCellNMOSWidth (F): 2
30
+ -SRAMCellPMOSWidth (F): 2
31
+ -MinSenseVoltage (mV): 20
32
+
33
+ # -----------------------------------------------------------------------------
34
+ # RRAM-only parameters
35
+ # RRAM configuration. Only used if MemCellType is RRAM
36
+ -ResistanceOn (ohm): 625000
37
+ -ResistanceOff (ohm): 10000000
38
+
39
+ # Set parameters
40
+ -SetVoltage (V): 2 # From PRIME
41
+ -SetPulse (ns): 40
42
+ # -SetCurrent (uA): 400 # OVERWRITES PREVIOUS LINES IF UNCOMMENTED
43
+ # -SetEnergy (pJ): 8 # OVERWRITES PREVIOUS LINES IF UNCOMMENTED
44
+ # -SetPower (uW): 800 # OVERWRITES PREVIOUS LINES IF UNCOMMENTED
45
+
46
+ # Read parameters
47
+ -ReadVoltage (V): 0.5 # From PRIME
48
+ -ReadPulse (ns): 5
49
+ # -ReadCurrent (uA): 40 # OVERWRITES PREVIOUS LINES IF UNCOMMENTED
50
+ # -ReadEnergy (pJ): 0.008 # OVERWRITES PREVIOUS LINES IF UNCOMMENTED
51
+ # -ReadPower (uW): 8 # OVERWRITES PREVIOUS LINES IF UNCOMMENTED
52
+
53
+ # Access transistor parameters
54
+ -AccessType: None # CMOS or None
55
+ -AccessTransistorResistance (ohm): 1000
56
+ -AccessVoltage (V): 0.1 # Access transistor voltage
57
+
58
+ # -----------------------------------------------------------------------------
59
+ # Other parameters that can be set via cell component attributes:
60
+
61
+ # "voltage" is the global supply voltage
62
+ # "threshold_voltage" is the global threshold voltage
63
+ # "read_pulse_width" is the read pulse width